summaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /tools/perf/util
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build384
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN52
-rw-r--r--tools/perf/util/addr_location.c44
-rw-r--r--tools/perf/util/addr_location.h31
-rw-r--r--tools/perf/util/affinity.c85
-rw-r--r--tools/perf/util/affinity.h17
-rw-r--r--tools/perf/util/amd-sample-raw.c341
-rw-r--r--tools/perf/util/annotate.c3423
-rw-r--r--tools/perf/util/annotate.h434
-rw-r--r--tools/perf/util/archinsn.h12
-rw-r--r--tools/perf/util/arm-spe-decoder/Build1
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c267
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h114
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c546
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h159
-rw-r--r--tools/perf/util/arm-spe.c1379
-rw-r--r--tools/perf/util/arm-spe.h31
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c63
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.h12
-rw-r--r--tools/perf/util/auxtrace.c2871
-rw-r--r--tools/perf/util/auxtrace.h906
-rw-r--r--tools/perf/util/block-info.c498
-rw-r--r--tools/perf/util/block-info.h86
-rw-r--r--tools/perf/util/block-range.c327
-rw-r--r--tools/perf/util/block-range.h76
-rw-r--r--tools/perf/util/bpf-event.c585
-rw-r--r--tools/perf/util/bpf-event.h60
-rw-r--r--tools/perf/util/bpf-filter.c207
-rw-r--r--tools/perf/util/bpf-filter.h49
-rw-r--r--tools/perf/util/bpf-filter.l159
-rw-r--r--tools/perf/util/bpf-filter.y80
-rw-r--r--tools/perf/util/bpf-prologue.h37
-rw-r--r--tools/perf/util/bpf-utils.c260
-rw-r--r--tools/perf/util/bpf-utils.h76
-rw-r--r--tools/perf/util/bpf_counter.c819
-rw-r--r--tools/perf/util/bpf_counter.h137
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c295
-rw-r--r--tools/perf/util/bpf_ftrace.c154
-rw-r--r--tools/perf/util/bpf_kwork.c349
-rw-r--r--tools/perf/util/bpf_lock_contention.c375
-rw-r--r--tools/perf/util/bpf_map.c72
-rw-r--r--tools/perf/util/bpf_map.h23
-rw-r--r--tools/perf/util/bpf_off_cpu.c393
-rw-r--r--tools/perf/util/bpf_skel/.gitignore4
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c422
-rw-r--r--tools/perf/util/bpf_skel/bench_uprobe.bpf.c23
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c227
-rw-r--r--tools/perf/util/bpf_skel/bperf_follower.bpf.c78
-rw-r--r--tools/perf/util/bpf_skel/bperf_leader.bpf.c55
-rw-r--r--tools/perf/util/bpf_skel/bperf_u.h14
-rw-r--r--tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c92
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c116
-rw-r--r--tools/perf/util/bpf_skel/kwork_trace.bpf.c383
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c453
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h49
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c283
-rw-r--r--tools/perf/util/bpf_skel/sample-filter.h27
-rw-r--r--tools/perf/util/bpf_skel/sample_filter.bpf.c196
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/.gitignore1
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/vmlinux.h184
-rw-r--r--tools/perf/util/branch.c229
-rw-r--r--tools/perf/util/branch.h94
-rw-r--r--tools/perf/util/build-id.c1022
-rw-r--r--tools/perf/util/build-id.h88
-rw-r--r--tools/perf/util/cache.h31
-rw-r--r--tools/perf/util/cacheline.c25
-rw-r--r--tools/perf/util/cacheline.h36
-rw-r--r--tools/perf/util/call-path.c114
-rw-r--r--tools/perf/util/call-path.h68
-rw-r--r--tools/perf/util/callchain.c1783
-rw-r--r--tools/perf/util/callchain.h314
-rw-r--r--tools/perf/util/cap.c29
-rw-r--r--tools/perf/util/cap.h36
-rw-r--r--tools/perf/util/cgroup.c589
-rw-r--r--tools/perf/util/cgroup.h54
-rw-r--r--tools/perf/util/clockid.c119
-rw-r--r--tools/perf/util/clockid.h11
-rw-r--r--tools/perf/util/cloexec.c92
-rw-r--r--tools/perf/util/cloexec.h7
-rw-r--r--tools/perf/util/color.c184
-rw-r--r--tools/perf/util/color.h49
-rw-r--r--tools/perf/util/color_config.c48
-rw-r--r--tools/perf/util/comm.c153
-rw-r--r--tools/perf/util/comm.h28
-rw-r--r--tools/perf/util/compress.h68
-rw-r--r--tools/perf/util/config.c941
-rw-r--r--tools/perf/util/config.h79
-rw-r--r--tools/perf/util/copyfile.c146
-rw-r--r--tools/perf/util/copyfile.h16
-rw-r--r--tools/perf/util/counts.c71
-rw-r--r--tools/perf/util/counts.h45
-rw-r--r--tools/perf/util/cpu-set-sched.h50
-rw-r--r--tools/perf/util/cpumap.c717
-rw-r--r--tools/perf/util/cpumap.h151
-rw-r--r--tools/perf/util/cputopo.c506
-rw-r--r--tools/perf/util/cputopo.h78
-rw-r--r--tools/perf/util/cs-etm-base.c193
-rw-r--r--tools/perf/util/cs-etm-decoder/Build1
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c837
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h111
-rw-r--r--tools/perf/util/cs-etm.c3417
-rw-r--r--tools/perf/util/cs-etm.h277
-rw-r--r--tools/perf/util/data-convert-bt.c1702
-rw-r--r--tools/perf/util/data-convert-json.c422
-rw-r--r--tools/perf/util/data-convert.h21
-rw-r--r--tools/perf/util/data.c559
-rw-r--r--tools/perf/util/data.h107
-rw-r--r--tools/perf/util/db-export.c615
-rw-r--r--tools/perf/util/db-export.h110
-rw-r--r--tools/perf/util/debug.c313
-rw-r--r--tools/perf/util/debug.h88
-rw-r--r--tools/perf/util/demangle-cxx.cpp49
-rw-r--r--tools/perf/util/demangle-cxx.h16
-rw-r--r--tools/perf/util/demangle-java.c203
-rw-r--r--tools/perf/util/demangle-java.h11
-rw-r--r--tools/perf/util/demangle-ocaml.c68
-rw-r--r--tools/perf/util/demangle-ocaml.h7
-rw-r--r--tools/perf/util/demangle-rust.c269
-rw-r--r--tools/perf/util/demangle-rust.h8
-rw-r--r--tools/perf/util/dlfilter.c660
-rw-r--r--tools/perf/util/dlfilter.h99
-rw-r--r--tools/perf/util/dso.c1528
-rw-r--r--tools/perf/util/dso.h410
-rw-r--r--tools/perf/util/dsos.c333
-rw-r--r--tools/perf/util/dsos.h40
-rw-r--r--tools/perf/util/dump-insn.c23
-rw-r--r--tools/perf/util/dump-insn.h25
-rw-r--r--tools/perf/util/dwarf-aux.c1427
-rw-r--r--tools/perf/util/dwarf-aux.h133
-rw-r--r--tools/perf/util/dwarf-regs.c70
-rw-r--r--tools/perf/util/env.c594
-rw-r--r--tools/perf/util/env.h181
-rw-r--r--tools/perf/util/event.c829
-rw-r--r--tools/perf/util/event.h404
-rw-r--r--tools/perf/util/events_stats.h58
-rw-r--r--tools/perf/util/evlist.c2502
-rw-r--r--tools/perf/util/evlist.h445
-rw-r--r--tools/perf/util/evsel.c3171
-rw-r--r--tools/perf/util/evsel.h558
-rw-r--r--tools/perf/util/evsel_config.h61
-rw-r--r--tools/perf/util/evsel_fprintf.c251
-rw-r--r--tools/perf/util/evsel_fprintf.h50
-rw-r--r--tools/perf/util/evswitch.c61
-rw-r--r--tools/perf/util/evswitch.h31
-rw-r--r--tools/perf/util/expr.c522
-rw-r--r--tools/perf/util/expr.h60
-rw-r--r--tools/perf/util/expr.l140
-rw-r--r--tools/perf/util/expr.y379
-rw-r--r--tools/perf/util/find-map.c30
-rw-r--r--tools/perf/util/fncache.c63
-rw-r--r--tools/perf/util/fncache.h7
-rw-r--r--tools/perf/util/ftrace.h81
-rw-r--r--tools/perf/util/genelf.c529
-rw-r--r--tools/perf/util/genelf.h86
-rw-r--r--tools/perf/util/genelf_debug.c625
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh70
-rw-r--r--tools/perf/util/get_current_dir_name.c18
-rw-r--r--tools/perf/util/get_current_dir_name.h8
-rw-r--r--tools/perf/util/hashmap.c240
-rw-r--r--tools/perf/util/hashmap.h208
-rw-r--r--tools/perf/util/header.c4523
-rw-r--r--tools/perf/util/header.h194
-rw-r--r--tools/perf/util/help-unknown-cmd.c121
-rw-r--r--tools/perf/util/help-unknown-cmd.h0
-rw-r--r--tools/perf/util/hisi-ptt-decoder/Build1
-rw-r--r--tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c164
-rw-r--r--tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h31
-rw-r--r--tools/perf/util/hisi-ptt.c193
-rw-r--r--tools/perf/util/hisi-ptt.h19
-rw-r--r--tools/perf/util/hist.c2927
-rw-r--r--tools/perf/util/hist.h600
-rw-r--r--tools/perf/util/include/asm/asm-offsets.h2
-rw-r--r--tools/perf/util/include/asm/cpufeature.h10
-rw-r--r--tools/perf/util/include/asm/dwarf2.h14
-rw-r--r--tools/perf/util/include/asm/swab.h1
-rw-r--r--tools/perf/util/include/asm/system.h1
-rw-r--r--tools/perf/util/include/asm/uaccess.h15
-rw-r--r--tools/perf/util/include/dwarf-regs.h23
-rw-r--r--tools/perf/util/include/linux/linkage.h131
-rw-r--r--tools/perf/util/intel-bts.c939
-rw-r--r--tools/perf/util/intel-bts.h34
-rw-r--r--tools/perf/util/intel-pt-decoder/Build22
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c4513
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h318
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c311
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h58
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c267
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h76
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c800
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h92
-rw-r--r--tools/perf/util/intel-pt.c4496
-rw-r--r--tools/perf/util/intel-pt.h47
-rw-r--r--tools/perf/util/intlist.c150
-rw-r--r--tools/perf/util/intlist.h78
-rw-r--r--tools/perf/util/iostat.c54
-rw-r--r--tools/perf/util/iostat.h47
-rw-r--r--tools/perf/util/jit.h12
-rw-r--r--tools/perf/util/jitdump.c888
-rw-r--r--tools/perf/util/jitdump.h140
-rw-r--r--tools/perf/util/kvm-stat.h191
-rw-r--r--tools/perf/util/kwork.h257
-rw-r--r--tools/perf/util/levenshtein.c87
-rw-r--r--tools/perf/util/levenshtein.h9
-rw-r--r--tools/perf/util/libunwind/arm64.c40
-rw-r--r--tools/perf/util/libunwind/x86_32.c41
-rw-r--r--tools/perf/util/lock-contention.h174
-rw-r--r--tools/perf/util/lzma.c122
-rw-r--r--tools/perf/util/machine.c3453
-rw-r--r--tools/perf/util/machine.h316
-rw-r--r--tools/perf/util/map.c647
-rw-r--r--tools/perf/util/map.h306
-rw-r--r--tools/perf/util/map_symbol.h25
-rw-r--r--tools/perf/util/maps.c477
-rw-r--r--tools/perf/util/maps.h136
-rw-r--r--tools/perf/util/mem-events.c707
-rw-r--r--tools/perf/util/mem-events.h97
-rw-r--r--tools/perf/util/mem2node.c139
-rw-r--r--tools/perf/util/mem2node.h20
-rw-r--r--tools/perf/util/memswap.c25
-rw-r--r--tools/perf/util/memswap.h15
-rw-r--r--tools/perf/util/metricgroup.c1864
-rw-r--r--tools/perf/util/metricgroup.h95
-rw-r--r--tools/perf/util/mmap.c374
-rw-r--r--tools/perf/util/mmap.h68
-rw-r--r--tools/perf/util/mutex.c119
-rw-r--r--tools/perf/util/mutex.h108
-rw-r--r--tools/perf/util/namespaces.c376
-rw-r--r--tools/perf/util/namespaces.h83
-rw-r--r--tools/perf/util/off_cpu.h38
-rw-r--r--tools/perf/util/ordered-events.c417
-rw-r--r--tools/perf/util/ordered-events.h84
-rw-r--r--tools/perf/util/parse-branch-options.c114
-rw-r--r--tools/perf/util/parse-branch-options.h7
-rw-r--r--tools/perf/util/parse-events.c2694
-rw-r--r--tools/perf/util/parse-events.h268
-rw-r--r--tools/perf/util/parse-events.l353
-rw-r--r--tools/perf/util/parse-events.y870
-rw-r--r--tools/perf/util/parse-regs-options.c99
-rw-r--r--tools/perf/util/parse-regs-options.h7
-rw-r--r--tools/perf/util/parse-sublevel-options.c70
-rw-r--r--tools/perf/util/parse-sublevel-options.h11
-rw-r--r--tools/perf/util/path.c81
-rw-r--r--tools/perf/util/path.h17
-rw-r--r--tools/perf/util/perf-hooks-list.h3
-rw-r--r--tools/perf/util/perf-hooks.c90
-rw-r--r--tools/perf/util/perf-hooks.h40
-rw-r--r--tools/perf/util/perf-regs-arch/Build9
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_aarch64.c96
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_arm.c60
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_csky.c100
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_loongarch.c91
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_mips.c87
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_powerpc.c145
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_riscv.c92
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_s390.c96
-rw-r--r--tools/perf/util/perf-regs-arch/perf_regs_x86.c98
-rw-r--r--tools/perf/util/perf_api_probe.c197
-rw-r--r--tools/perf/util/perf_api_probe.h17
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c327
-rw-r--r--tools/perf/util/perf_regs.c129
-rw-r--r--tools/perf/util/perf_regs.h100
-rw-r--r--tools/perf/util/pfm.c309
-rw-r--r--tools/perf/util/pfm.h38
-rw-r--r--tools/perf/util/pmu.c2068
-rw-r--r--tools/perf/util/pmu.h269
-rw-r--r--tools/perf/util/pmu.l48
-rw-r--r--tools/perf/util/pmu.y97
-rw-r--r--tools/perf/util/pmus.c594
-rw-r--r--tools/perf/util/pmus.h27
-rw-r--r--tools/perf/util/print-events.c452
-rw-r--r--tools/perf/util/print-events.h43
-rw-r--r--tools/perf/util/print_binary.c58
-rw-r--r--tools/perf/util/print_binary.h37
-rw-r--r--tools/perf/util/probe-event.c3817
-rw-r--r--tools/perf/util/probe-event.h201
-rw-r--r--tools/perf/util/probe-file.c1200
-rw-r--r--tools/perf/util/probe-file.h83
-rw-r--r--tools/perf/util/probe-finder.c2109
-rw-r--r--tools/perf/util/probe-finder.h126
-rw-r--r--tools/perf/util/pstack.c85
-rw-r--r--tools/perf/util/pstack.h16
-rw-r--r--tools/perf/util/python-ext-sources51
-rw-r--r--tools/perf/util/python.c1513
-rw-r--r--tools/perf/util/rb_resort.h151
-rw-r--r--tools/perf/util/rblist.c140
-rw-r--r--tools/perf/util/rblist.h50
-rw-r--r--tools/perf/util/record.c291
-rw-r--r--tools/perf/util/record.h94
-rw-r--r--tools/perf/util/rlimit.c29
-rw-r--r--tools/perf/util/rlimit.h6
-rw-r--r--tools/perf/util/rwsem.c33
-rw-r--r--tools/perf/util/rwsem.h19
-rw-r--r--tools/perf/util/s390-cpumcf-kernel.h63
-rw-r--r--tools/perf/util/s390-cpumsf-kernel.h71
-rw-r--r--tools/perf/util/s390-cpumsf.c1177
-rw-r--r--tools/perf/util/s390-cpumsf.h21
-rw-r--r--tools/perf/util/s390-sample-raw.c238
-rw-r--r--tools/perf/util/sample-raw.c26
-rw-r--r--tools/perf/util/sample-raw.h15
-rw-r--r--tools/perf/util/sample.h133
-rw-r--r--tools/perf/util/scripting-engines/Build9
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c769
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c2218
-rw-r--r--tools/perf/util/session.c2892
-rw-r--r--tools/perf/util/session.h164
-rw-r--r--tools/perf/util/setns.c10
-rw-r--r--tools/perf/util/setup.py114
-rw-r--r--tools/perf/util/sharded_mutex.c33
-rw-r--r--tools/perf/util/sharded_mutex.h29
-rw-r--r--tools/perf/util/sideband_evlist.c149
-rw-r--r--tools/perf/util/smt.c36
-rw-r--r--tools/perf/util/smt.h17
-rw-r--r--tools/perf/util/sort.c3772
-rw-r--r--tools/perf/util/sort.h335
-rw-r--r--tools/perf/util/spark.c34
-rw-r--r--tools/perf/util/spark.h8
-rw-r--r--tools/perf/util/srccode.c171
-rw-r--r--tools/perf/util/srccode.h20
-rw-r--r--tools/perf/util/srcline.c1059
-rw-r--r--tools/perf/util/srcline.h58
-rw-r--r--tools/perf/util/stat-display.c1597
-rw-r--r--tools/perf/util/stat-shadow.c716
-rw-r--r--tools/perf/util/stat.c801
-rw-r--r--tools/perf/util/stat.h228
-rw-r--r--tools/perf/util/strbuf.c170
-rw-r--r--tools/perf/util/strbuf.h95
-rw-r--r--tools/perf/util/stream.c342
-rw-r--r--tools/perf/util/stream.h41
-rw-r--r--tools/perf/util/strfilter.c312
-rw-r--r--tools/perf/util/strfilter.h84
-rw-r--r--tools/perf/util/string.c303
-rw-r--r--tools/perf/util/string2.h43
-rw-r--r--tools/perf/util/strlist.c208
-rw-r--r--tools/perf/util/strlist.h90
-rw-r--r--tools/perf/util/svghelper.c809
-rw-r--r--tools/perf/util/svghelper.h39
-rw-r--r--tools/perf/util/symbol-elf.c2915
-rw-r--r--tools/perf/util/symbol-minimal.c392
-rw-r--r--tools/perf/util/symbol.c2825
-rw-r--r--tools/perf/util/symbol.h286
-rw-r--r--tools/perf/util/symbol_conf.h84
-rw-r--r--tools/perf/util/symbol_fprintf.c73
-rw-r--r--tools/perf/util/symsrc.h47
-rw-r--r--tools/perf/util/synthetic-events.c2416
-rw-r--r--tools/perf/util/synthetic-events.h125
-rw-r--r--tools/perf/util/syscalltbl.c184
-rw-r--r--tools/perf/util/syscalltbl.h23
-rw-r--r--tools/perf/util/target.c184
-rw-r--r--tools/perf/util/target.h106
-rw-r--r--tools/perf/util/term.c40
-rw-r--r--tools/perf/util/term.h11
-rw-r--r--tools/perf/util/thread-stack.c1239
-rw-r--r--tools/perf/util/thread-stack.h110
-rw-r--r--tools/perf/util/thread.c497
-rw-r--r--tools/perf/util/thread.h342
-rw-r--r--tools/perf/util/thread_map.c430
-rw-r--r--tools/perf/util/thread_map.h29
-rw-r--r--tools/perf/util/time-utils.c559
-rw-r--r--tools/perf/util/time-utils.h51
-rw-r--r--tools/perf/util/tool.h91
-rw-r--r--tools/perf/util/top.c120
-rw-r--r--tools/perf/util/top.h66
-rw-r--r--tools/perf/util/topdown.c8
-rw-r--r--tools/perf/util/topdown.h11
-rw-r--r--tools/perf/util/trace-event-info.c702
-rw-r--r--tools/perf/util/trace-event-parse.c216
-rw-r--r--tools/perf/util/trace-event-read.c480
-rw-r--r--tools/perf/util/trace-event-scripting.c193
-rw-r--r--tools/perf/util/trace-event.c118
-rw-r--r--tools/perf/util/trace-event.h165
-rw-r--r--tools/perf/util/tracepoint.c64
-rw-r--r--tools/perf/util/tracepoint.h25
-rw-r--r--tools/perf/util/trigger.h95
-rw-r--r--tools/perf/util/tsc.c142
-rw-r--r--tools/perf/util/tsc.h32
-rw-r--r--tools/perf/util/units.c76
-rw-r--r--tools/perf/util/units.h19
-rw-r--r--tools/perf/util/unwind-libdw.c325
-rw-r--r--tools/perf/util/unwind-libdw.h27
-rw-r--r--tools/perf/util/unwind-libunwind-local.c812
-rw-r--r--tools/perf/util/unwind-libunwind.c97
-rw-r--r--tools/perf/util/unwind.h82
-rw-r--r--tools/perf/util/usage.c34
-rw-r--r--tools/perf/util/util.c554
-rw-r--r--tools/perf/util/util.h124
-rw-r--r--tools/perf/util/values.c305
-rw-r--r--tools/perf/util/values.h28
-rw-r--r--tools/perf/util/vdso.c364
-rw-r--r--tools/perf/util/vdso.h30
-rw-r--r--tools/perf/util/zlib.c97
-rw-r--r--tools/perf/util/zstd.c111
391 files changed, 145125 insertions, 0 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
new file mode 100644
index 000000000..6d657c992
--- /dev/null
+++ b/tools/perf/util/Build
@@ -0,0 +1,384 @@
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/utilities.mak
+
+perf-y += arm64-frame-pointer-unwind-support.o
+perf-y += addr_location.o
+perf-y += annotate.o
+perf-y += block-info.o
+perf-y += block-range.o
+perf-y += build-id.o
+perf-y += cacheline.o
+perf-y += config.o
+perf-y += copyfile.o
+perf-y += ctype.o
+perf-y += db-export.o
+perf-y += env.o
+perf-y += event.o
+perf-y += evlist.o
+perf-y += sideband_evlist.o
+perf-y += evsel.o
+perf-y += evsel_fprintf.o
+perf-y += perf_event_attr_fprintf.o
+perf-y += evswitch.o
+perf-y += find_bit.o
+perf-y += get_current_dir_name.o
+perf-y += levenshtein.o
+perf-y += mmap.o
+perf-y += memswap.o
+perf-y += parse-events.o
+perf-y += print-events.o
+perf-y += tracepoint.o
+perf-y += perf_regs.o
+perf-y += perf-regs-arch/
+perf-y += path.o
+perf-y += print_binary.o
+perf-y += rlimit.o
+perf-y += argv_split.o
+perf-y += rbtree.o
+perf-y += libstring.o
+perf-y += bitmap.o
+perf-y += hweight.o
+perf-y += smt.o
+perf-y += strbuf.o
+perf-y += string.o
+perf-y += strlist.o
+perf-y += strfilter.o
+perf-y += top.o
+perf-y += usage.o
+perf-y += dso.o
+perf-y += dsos.o
+perf-y += symbol.o
+perf-y += symbol_fprintf.o
+perf-y += color.o
+perf-y += color_config.o
+perf-y += metricgroup.o
+perf-y += header.o
+perf-y += callchain.o
+perf-y += values.o
+perf-y += debug.o
+perf-y += fncache.o
+perf-y += machine.o
+perf-y += map.o
+perf-y += maps.o
+perf-y += pstack.o
+perf-y += session.o
+perf-y += sample-raw.o
+perf-y += s390-sample-raw.o
+perf-y += amd-sample-raw.o
+perf-$(CONFIG_TRACE) += syscalltbl.o
+perf-y += ordered-events.o
+perf-y += namespaces.o
+perf-y += comm.o
+perf-y += thread.o
+perf-y += thread_map.o
+perf-y += parse-events-flex.o
+perf-y += parse-events-bison.o
+perf-y += pmu.o
+perf-y += pmus.o
+perf-y += pmu-flex.o
+perf-y += pmu-bison.o
+perf-y += svghelper.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o
+perf-y += trace-event-scripting.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o
+perf-y += sort.o
+perf-y += hist.o
+perf-y += util.o
+perf-y += cpumap.o
+perf-y += affinity.o
+perf-y += cputopo.o
+perf-y += cgroup.o
+perf-y += target.o
+perf-y += rblist.o
+perf-y += intlist.o
+perf-y += vdso.o
+perf-y += counts.o
+perf-y += stat.o
+perf-y += stat-shadow.o
+perf-y += stat-display.o
+perf-y += perf_api_probe.o
+perf-y += record.o
+perf-y += srcline.o
+perf-y += srccode.o
+perf-y += synthetic-events.o
+perf-y += data.o
+perf-y += tsc.o
+perf-y += cloexec.o
+perf-y += call-path.o
+perf-y += rwsem.o
+perf-y += thread-stack.o
+perf-y += spark.o
+perf-y += topdown.o
+perf-y += iostat.o
+perf-y += stream.o
+perf-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+perf-$(CONFIG_AUXTRACE) += intel-pt.o
+perf-$(CONFIG_AUXTRACE) += intel-bts.o
+perf-$(CONFIG_AUXTRACE) += arm-spe.o
+perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
+perf-$(CONFIG_AUXTRACE) += hisi-ptt.o
+perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/
+perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
+
+ifdef CONFIG_LIBOPENCSD
+perf-$(CONFIG_AUXTRACE) += cs-etm.o
+perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
+endif
+perf-$(CONFIG_AUXTRACE) += cs-etm-base.o
+
+perf-y += parse-branch-options.o
+perf-y += dump-insn.o
+perf-y += parse-regs-options.o
+perf-y += parse-sublevel-options.o
+perf-y += term.o
+perf-y += help-unknown-cmd.o
+perf-y += dlfilter.o
+perf-y += mem-events.o
+perf-y += vsprintf.o
+perf-y += units.o
+perf-y += time-utils.o
+perf-y += expr-flex.o
+perf-y += expr-bison.o
+perf-y += expr.o
+perf-y += branch.o
+perf-y += mem2node.o
+perf-y += clockid.o
+perf-y += list_sort.o
+perf-y += mutex.o
+perf-y += sharded_mutex.o
+
+perf-$(CONFIG_LIBBPF) += bpf_map.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
+
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
+endif
+
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
+endif
+
+perf-$(CONFIG_LIBELF) += symbol-elf.o
+perf-$(CONFIG_LIBELF) += probe-file.o
+perf-$(CONFIG_LIBELF) += probe-event.o
+
+ifdef CONFIG_LIBBPF_DYNAMIC
+ hashmap := 1
+endif
+ifndef CONFIG_LIBBPF
+ hashmap := 1
+endif
+
+ifdef hashmap
+perf-y += hashmap.o
+endif
+
+ifndef CONFIG_LIBELF
+perf-y += symbol-minimal.o
+endif
+
+ifndef CONFIG_SETNS
+perf-y += setns.o
+endif
+
+perf-$(CONFIG_DWARF) += probe-finder.o
+perf-$(CONFIG_DWARF) += dwarf-aux.o
+perf-$(CONFIG_DWARF) += dwarf-regs.o
+
+perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
+perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
+perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
+
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+ perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+endif
+
+perf-y += data-convert-json.o
+
+perf-y += scripting-engines/
+
+perf-$(CONFIG_ZLIB) += zlib.o
+perf-$(CONFIG_LZMA) += lzma.o
+perf-$(CONFIG_ZSTD) += zstd.o
+
+perf-$(CONFIG_LIBCAP) += cap.o
+
+perf-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o
+perf-y += demangle-ocaml.o
+perf-y += demangle-java.o
+perf-y += demangle-rust.o
+
+ifdef CONFIG_JITDUMP
+perf-$(CONFIG_LIBELF) += jitdump.o
+perf-$(CONFIG_LIBELF) += genelf.o
+perf-$(CONFIG_DWARF) += genelf_debug.o
+endif
+
+perf-y += perf-hooks.o
+
+perf-$(CONFIG_LIBBPF) += bpf-event.o
+perf-$(CONFIG_LIBBPF) += bpf-utils.o
+
+perf-$(CONFIG_LIBPFM4) += pfm.o
+
+CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o += -Wno-packed
+
+$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \
+ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $<
+
+$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) $(BISON_FALLBACK_FLAGS) \
+ -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
+
+$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \
+ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $<
+
+$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+ -o $(OUTPUT)util/expr-bison.c -p expr_
+
+$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \
+ --header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $<
+
+$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+ -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/bpf-filter-flex.c \
+ --header-file=$(OUTPUT)util/bpf-filter-flex.h $(PARSER_DEBUG_FLEX) $<
+
+$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+ -o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_
+
+FLEX_VERSION := $(shell $(FLEX) --version | cut -d' ' -f2)
+
+FLEX_GE_260 := $(call version-ge3,$(FLEX_VERSION),2.6.0)
+ifeq ($(FLEX_GE_260),1)
+ flex_flags := -Wno-redundant-decls -Wno-switch-default -Wno-unused-function -Wno-misleading-indentation
+
+ # Some newer clang and gcc version complain about this
+ # util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable]
+ # int yynerrs = 0;
+
+ flex_flags += -Wno-unused-but-set-variable
+
+ FLEX_LT_262 := $(call version-lt3,$(FLEX_VERSION),2.6.2)
+ ifeq ($(FLEX_LT_262),1)
+ flex_flags += -Wno-sign-compare
+ endif
+else
+ flex_flags := -w
+endif
+
+# Some newer clang and gcc version complain about this
+# util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable]
+# int yynerrs = 0;
+
+bison_flags := -DYYENABLE_NLS=0 -Wno-unused-but-set-variable
+
+# Old clangs don't grok -Wno-unused-but-set-variable, remove it
+ifeq ($(CC_NO_CLANG), 0)
+ CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g')
+ ifeq ($(call version-lt3,$(CLANG_VERSION),13.0.0),1)
+ bison_flags := $(subst -Wno-unused-but-set-variable,,$(bison_flags))
+ flex_flags := $(subst -Wno-unused-but-set-variable,,$(flex_flags))
+ endif
+endif
+
+BISON_GE_382 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 382)
+ifeq ($(BISON_GE_382),1)
+ bison_flags += -Wno-switch-enum
+else
+ bison_flags += -w
+endif
+
+BISON_LT_381 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 381)
+ifeq ($(BISON_LT_381),1)
+ bison_flags += -DYYNOMEM=YYABORT
+endif
+
+CFLAGS_parse-events-flex.o += $(flex_flags) -Wno-unused-label
+CFLAGS_pmu-flex.o += $(flex_flags)
+CFLAGS_expr-flex.o += $(flex_flags)
+CFLAGS_bpf-filter-flex.o += $(flex_flags)
+
+CFLAGS_parse-events-bison.o += $(bison_flags)
+CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+CFLAGS_bpf-filter-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c
+$(OUTPUT)util/bpf-filter.o: $(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-bison.c
+
+CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
+CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/
+
+$(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/ctype.o: ../lib/ctype.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
new file mode 100755
index 000000000..d7dc7c285
--- /dev/null
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -0,0 +1,52 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $# -eq 1 ] ; then
+ OUTPUT=$1
+fi
+
+GVF=${OUTPUT}PERF-VERSION-FILE
+
+LF='
+'
+
+#
+# Use version from kernel Makefile unless not in a git repository and
+# PERF-VERSION-FILE exists
+#
+CID=
+TAG=
+if test -d ../../.git -o -f ../../.git
+then
+ TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+ CID=$(git log -1 --abbrev=12 --pretty=format:"%h" --no-show-signature 2>/dev/null) && CID="-g$CID"
+elif test -f ../../PERF-VERSION-FILE
+then
+ TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
+fi
+if test -z "$TAG"
+then
+ TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+fi
+
+VN="$TAG$CID"
+if test -n "$CID"
+then
+ # format version string, strip trailing zero of sublevel:
+ VN=$(echo "$VN" | sed -e 's/-/./g;s/\([0-9]*[.][0-9]*\)[.]0/\1/')
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+ VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF)
+else
+ VC=unset
+fi
+test "$VN" = "$VC" || {
+ echo >&2 " PERF_VERSION = $VN"
+ echo "#define PERF_VERSION \"$VN\"" >$GVF
+}
+
+
diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c
new file mode 100644
index 000000000..51825ef8c
--- /dev/null
+++ b/tools/perf/util/addr_location.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "addr_location.h"
+#include "map.h"
+#include "maps.h"
+#include "thread.h"
+
+void addr_location__init(struct addr_location *al)
+{
+ al->thread = NULL;
+ al->maps = NULL;
+ al->map = NULL;
+ al->sym = NULL;
+ al->srcline = NULL;
+ al->addr = 0;
+ al->level = 0;
+ al->filtered = 0;
+ al->cpumode = 0;
+ al->cpu = 0;
+ al->socket = 0;
+}
+
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__exit(struct addr_location *al)
+{
+ map__zput(al->map);
+ thread__zput(al->thread);
+ maps__zput(al->maps);
+}
+
+void addr_location__copy(struct addr_location *dst, struct addr_location *src)
+{
+ thread__put(dst->thread);
+ maps__put(dst->maps);
+ map__put(dst->map);
+ *dst = *src;
+ dst->thread = thread__get(src->thread);
+ dst->maps = maps__get(src->maps);
+ dst->map = map__get(src->map);
+}
diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h
new file mode 100644
index 000000000..d8ac0428d
--- /dev/null
+++ b/tools/perf/util/addr_location.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ADDR_LOCATION
+#define __PERF_ADDR_LOCATION 1
+
+#include <linux/types.h>
+
+struct thread;
+struct maps;
+struct map;
+struct symbol;
+
+struct addr_location {
+ struct thread *thread;
+ struct maps *maps;
+ struct map *map;
+ struct symbol *sym;
+ const char *srcline;
+ u64 addr;
+ char level;
+ u8 filtered;
+ u8 cpumode;
+ s32 cpu;
+ s32 socket;
+};
+
+void addr_location__init(struct addr_location *al);
+void addr_location__exit(struct addr_location *al);
+
+void addr_location__copy(struct addr_location *dst, struct addr_location *src);
+
+#endif /* __PERF_ADDR_LOCATION */
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
new file mode 100644
index 000000000..38dc4524b
--- /dev/null
+++ b/tools/perf/util/affinity.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Manage affinity to optimize IPIs inside the kernel perf API. */
+#define _GNU_SOURCE 1
+#include <sched.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/zalloc.h>
+#include "perf.h"
+#include "cpumap.h"
+#include "affinity.h"
+
+static int get_cpu_set_size(void)
+{
+ int sz = cpu__max_cpu().cpu + 8 - 1;
+ /*
+ * sched_getaffinity doesn't like masks smaller than the kernel.
+ * Hopefully that's big enough.
+ */
+ if (sz < 4096)
+ sz = 4096;
+ return sz / 8;
+}
+
+int affinity__setup(struct affinity *a)
+{
+ int cpu_set_size = get_cpu_set_size();
+
+ a->orig_cpus = bitmap_zalloc(cpu_set_size * 8);
+ if (!a->orig_cpus)
+ return -1;
+ sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
+ a->sched_cpus = bitmap_zalloc(cpu_set_size * 8);
+ if (!a->sched_cpus) {
+ zfree(&a->orig_cpus);
+ return -1;
+ }
+ bitmap_zero((unsigned long *)a->sched_cpus, cpu_set_size);
+ a->changed = false;
+ return 0;
+}
+
+/*
+ * perf_event_open does an IPI internally to the target CPU.
+ * It is more efficient to change perf's affinity to the target
+ * CPU and then set up all events on that CPU, so we amortize
+ * CPU communication.
+ */
+void affinity__set(struct affinity *a, int cpu)
+{
+ int cpu_set_size = get_cpu_set_size();
+
+ /*
+ * Return:
+ * - if cpu is -1
+ * - restrict out of bound access to sched_cpus
+ */
+ if (cpu == -1 || ((cpu >= (cpu_set_size * 8))))
+ return;
+
+ a->changed = true;
+ __set_bit(cpu, a->sched_cpus);
+ /*
+ * We ignore errors because affinity is just an optimization.
+ * This could happen for example with isolated CPUs or cpusets.
+ * In this case the IPIs inside the kernel's perf API still work.
+ */
+ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
+ __clear_bit(cpu, a->sched_cpus);
+}
+
+static void __affinity__cleanup(struct affinity *a)
+{
+ int cpu_set_size = get_cpu_set_size();
+
+ if (a->changed)
+ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
+ zfree(&a->sched_cpus);
+ zfree(&a->orig_cpus);
+}
+
+void affinity__cleanup(struct affinity *a)
+{
+ if (a != NULL)
+ __affinity__cleanup(a);
+}
diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h
new file mode 100644
index 000000000..0ad6a18ef
--- /dev/null
+++ b/tools/perf/util/affinity.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_AFFINITY_H
+#define PERF_AFFINITY_H 1
+
+#include <stdbool.h>
+
+struct affinity {
+ unsigned long *orig_cpus;
+ unsigned long *sched_cpus;
+ bool changed;
+};
+
+void affinity__cleanup(struct affinity *a);
+void affinity__set(struct affinity *a, int cpu);
+int affinity__setup(struct affinity *a);
+
+#endif // PERF_AFFINITY_H
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
new file mode 100644
index 000000000..9d0ce88e9
--- /dev/null
+++ b/tools/perf/util/amd-sample-raw.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD specific. Provide textual annotation for IBS raw sample data.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <linux/string.h>
+#include "../../arch/x86/include/asm/amd-ibs.h"
+
+#include "debug.h"
+#include "session.h"
+#include "evlist.h"
+#include "sample-raw.h"
+#include "util/sample.h"
+
+static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
+static bool zen4_ibs_extensions;
+
+static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
+{
+ const char * const ic_miss_strs[] = {
+ " IcMiss 0",
+ " IcMiss 1",
+ };
+ const char * const l1tlb_pgsz_strs[] = {
+ " L1TlbPgSz 4KB",
+ " L1TlbPgSz 2MB",
+ " L1TlbPgSz 1GB",
+ " L1TlbPgSz RESERVED"
+ };
+ const char * const l1tlb_pgsz_strs_erratum1347[] = {
+ " L1TlbPgSz 4KB",
+ " L1TlbPgSz 16KB",
+ " L1TlbPgSz 2MB",
+ " L1TlbPgSz 1GB"
+ };
+ const char *ic_miss_str = NULL;
+ const char *l1tlb_pgsz_str = NULL;
+ char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
+
+ if (cpu_family == 0x19 && cpu_model < 0x10) {
+ /*
+ * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
+ * Erratum #1347 workaround is to use table provided in erratum
+ */
+ if (reg.phy_addr_valid)
+ l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
+ } else {
+ if (reg.phy_addr_valid)
+ l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
+ ic_miss_str = ic_miss_strs[reg.ic_miss];
+ }
+
+ if (zen4_ibs_extensions) {
+ snprintf(l3_miss_str, sizeof(l3_miss_str),
+ " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
+ reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
+ }
+
+ printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
+ "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
+ reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
+ reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
+ reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
+ reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
+ l3_miss_str);
+}
+
+static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
+{
+ printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
+}
+
+static void pr_ibs_op_ctl(union ibs_op_ctl reg)
+{
+ char l3_miss_only[sizeof(" L3MissOnly _")] = "";
+
+ if (zen4_ibs_extensions)
+ snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
+
+ printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n",
+ reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
+ reg.op_en, reg.op_val, reg.cnt_ctl,
+ reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
+}
+
+static void pr_ibs_op_data(union ibs_op_data reg)
+{
+ printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
+ " RipInvalid %d BrnFuse %d Microcode %d\n",
+ reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
+ reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
+ reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
+ reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
+ reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
+}
+
+static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
+{
+ static const char * const data_src_str[] = {
+ "",
+ " DataSrc 1=Local L3 or other L1/L2 in CCX",
+ " DataSrc 2=Another CCX cache in the same NUMA node",
+ " DataSrc 3=DRAM",
+ " DataSrc 4=(reserved)",
+ " DataSrc 5=Another CCX cache in a different NUMA node",
+ " DataSrc 6=Long-latency DIMM",
+ " DataSrc 7=MMIO/Config/PCI/APIC",
+ " DataSrc 8=Extension Memory",
+ " DataSrc 9=(reserved)",
+ " DataSrc 10=(reserved)",
+ " DataSrc 11=(reserved)",
+ " DataSrc 12=Coherent Memory of a different processor type",
+ /* 13 to 31 are reserved. Avoid printing them. */
+ };
+ int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
+
+ printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
+ (data_src == 1 || data_src == 2 || data_src == 5) ?
+ (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
+ reg.rmt_node,
+ data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
+}
+
+static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
+{
+ static const char * const data_src_str[] = {
+ "",
+ " DataSrc 1=(reserved)",
+ " DataSrc 2=Local node cache",
+ " DataSrc 3=DRAM",
+ " DataSrc 4=Remote node cache",
+ " DataSrc 5=(reserved)",
+ " DataSrc 6=(reserved)",
+ " DataSrc 7=Other"
+ };
+
+ printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
+ reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
+ : "CacheHitSt 0=M-state ") : "",
+ reg.rmt_node, data_src_str[reg.data_src_lo]);
+}
+
+static void pr_ibs_op_data2(union ibs_op_data2 reg)
+{
+ if (zen4_ibs_extensions)
+ return pr_ibs_op_data2_extended(reg);
+ pr_ibs_op_data2_default(reg);
+}
+
+static void pr_ibs_op_data3(union ibs_op_data3 reg)
+{
+ char l2_miss_str[sizeof(" L2Miss _")] = "";
+ char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
+ char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
+
+ /*
+ * Erratum #1293
+ * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
+ */
+ if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
+ snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
+ snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
+ " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
+ }
+
+ if (reg.op_mem_width)
+ snprintf(op_mem_width_str, sizeof(op_mem_width_str),
+ " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
+
+ printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
+ "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
+ "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
+ "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
+ reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
+ reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
+ reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
+ reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
+ reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
+ op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
+}
+
+/*
+ * IBS Op/Execution MSRs always saved, in order, are:
+ * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
+ * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
+ */
+static void amd_dump_ibs_op(struct perf_sample *sample)
+{
+ struct perf_ibs_data *data = sample->raw_data;
+ union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
+ __u64 *rip = (__u64 *)op_ctl + 1;
+ union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
+ union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
+
+ pr_ibs_op_ctl(*op_ctl);
+ if (!op_data->op_rip_invalid)
+ printf("IbsOpRip:\t%016llx\n", *rip);
+ pr_ibs_op_data(*op_data);
+ /*
+ * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
+ */
+ if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
+ (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
+ pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
+ pr_ibs_op_data3(*op_data3);
+ if (op_data3->dc_lin_addr_valid)
+ printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
+ if (op_data3->dc_phy_addr_valid)
+ printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
+ if (op_data->op_brn_ret && *(rip + 6))
+ printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
+}
+
+/*
+ * IBS Fetch MSRs always saved, in order, are:
+ * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
+ */
+static void amd_dump_ibs_fetch(struct perf_sample *sample)
+{
+ struct perf_ibs_data *data = sample->raw_data;
+ union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
+ __u64 *addr = (__u64 *)fetch_ctl + 1;
+ union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
+
+ pr_ibs_fetch_ctl(*fetch_ctl);
+ printf("IbsFetchLinAd:\t%016llx\n", *addr++);
+ if (fetch_ctl->phy_addr_valid)
+ printf("IbsFetchPhysAd:\t%016llx\n", *addr);
+ pr_ic_ibs_extd_ctl(*extd_ctl);
+}
+
+/*
+ * Test for enable and valid bits in captured control MSRs.
+ */
+static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
+{
+ struct perf_ibs_data *data = sample->raw_data;
+ union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
+
+ if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
+ return true;
+
+ return false;
+}
+
+static bool is_valid_ibs_op_sample(struct perf_sample *sample)
+{
+ struct perf_ibs_data *data = sample->raw_data;
+ union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
+
+ if (op_ctl->op_en && op_ctl->op_val)
+ return true;
+
+ return false;
+}
+
+/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
+ * and if the event was triggered by IBS, display its raw data with decoded text.
+ * The function is only invoked when the dump flag -D is set.
+ */
+void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct evsel *evsel;
+
+ if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
+ return;
+
+ evsel = evlist__event2evsel(evlist, event);
+ if (!evsel)
+ return;
+
+ if (evsel->core.attr.type == ibs_fetch_type) {
+ if (!is_valid_ibs_fetch_sample(sample)) {
+ pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
+ return;
+ }
+ amd_dump_ibs_fetch(sample);
+ } else if (evsel->core.attr.type == ibs_op_type) {
+ if (!is_valid_ibs_op_sample(sample)) {
+ pr_debug("Invalid raw IBS Op MSR data encountered\n");
+ return;
+ }
+ amd_dump_ibs_op(sample);
+ }
+}
+
+static void parse_cpuid(struct perf_env *env)
+{
+ const char *cpuid;
+ int ret;
+
+ cpuid = perf_env__cpuid(env);
+ /*
+ * cpuid = "AuthenticAMD,family,model,stepping"
+ */
+ ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
+ if (ret != 2)
+ pr_debug("problem parsing cpuid\n");
+}
+
+/*
+ * Find and assign the type number used for ibs_op or ibs_fetch samples.
+ * Device names can be large - we are only interested in the first 9 characters,
+ * to match "ibs_fetch".
+ */
+bool evlist__has_amd_ibs(struct evlist *evlist)
+{
+ struct perf_env *env = evlist->env;
+ int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
+ const char *pmu_mapping = perf_env__pmu_mappings(env);
+ char name[sizeof("ibs_fetch")];
+ u32 type;
+
+ while (nr_pmu_mappings--) {
+ ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
+ if (ret == 2) {
+ if (strstarts(name, "ibs_op"))
+ ibs_op_type = type;
+ else if (strstarts(name, "ibs_fetch"))
+ ibs_fetch_type = type;
+ }
+ pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
+ }
+
+ if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
+ zen4_ibs_extensions = 1;
+
+ if (ibs_fetch_type || ibs_op_type) {
+ if (!cpu_family)
+ parse_cpuid(env);
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
new file mode 100644
index 000000000..82956adf9
--- /dev/null
+++ b/tools/perf/util/annotate.c
@@ -0,0 +1,3423 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-annotate.c, see those files for further
+ * copyright notes.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include "util.h" // hex_width()
+#include "ui/ui.h"
+#include "sort.h"
+#include "build-id.h"
+#include "color.h"
+#include "config.h"
+#include "dso.h"
+#include "env.h"
+#include "map.h"
+#include "maps.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "units.h"
+#include "debug.h"
+#include "annotate.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include "block-range.h"
+#include "string2.h"
+#include "util/event.h"
+#include "util/sharded_mutex.h"
+#include "arch/common.h"
+#include "namespaces.h"
+#include <regex.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <subcmd/parse-options.h>
+#include <subcmd/run-command.h>
+
+/* FIXME: For the HE_COLORSET */
+#include "ui/browser.h"
+
+/*
+ * FIXME: Using the same values as slang.h,
+ * but that header may not be available everywhere
+ */
+#define LARROW_CHAR ((unsigned char)',')
+#define RARROW_CHAR ((unsigned char)'+')
+#define DARROW_CHAR ((unsigned char)'.')
+#define UARROW_CHAR ((unsigned char)'-')
+
+#include <linux/ctype.h>
+
+static regex_t file_lineno;
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name);
+static void ins__sort(struct arch *arch);
+static int disasm_line__parse(char *line, const char **namep, char **rawp);
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+
+struct arch {
+ const char *name;
+ struct ins *instructions;
+ size_t nr_instructions;
+ size_t nr_instructions_allocated;
+ struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name);
+ bool sorted_instructions;
+ bool initialized;
+ const char *insn_suffix;
+ void *priv;
+ unsigned int model;
+ unsigned int family;
+ int (*init)(struct arch *arch, char *cpuid);
+ bool (*ins_is_fused)(struct arch *arch, const char *ins1,
+ const char *ins2);
+ struct {
+ char comment_char;
+ char skip_functions_char;
+ } objdump;
+};
+
+static struct ins_ops call_ops;
+static struct ins_ops dec_ops;
+static struct ins_ops jump_ops;
+static struct ins_ops mov_ops;
+static struct ins_ops nop_ops;
+static struct ins_ops lock_ops;
+static struct ins_ops ret_ops;
+
+static int arch__grow_instructions(struct arch *arch)
+{
+ struct ins *new_instructions;
+ size_t new_nr_allocated;
+
+ if (arch->nr_instructions_allocated == 0 && arch->instructions)
+ goto grow_from_non_allocated_table;
+
+ new_nr_allocated = arch->nr_instructions_allocated + 128;
+ new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
+ if (new_instructions == NULL)
+ return -1;
+
+out_update_instructions:
+ arch->instructions = new_instructions;
+ arch->nr_instructions_allocated = new_nr_allocated;
+ return 0;
+
+grow_from_non_allocated_table:
+ new_nr_allocated = arch->nr_instructions + 128;
+ new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
+ if (new_instructions == NULL)
+ return -1;
+
+ memcpy(new_instructions, arch->instructions, arch->nr_instructions);
+ goto out_update_instructions;
+}
+
+static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
+{
+ struct ins *ins;
+
+ if (arch->nr_instructions == arch->nr_instructions_allocated &&
+ arch__grow_instructions(arch))
+ return -1;
+
+ ins = &arch->instructions[arch->nr_instructions];
+ ins->name = strdup(name);
+ if (!ins->name)
+ return -1;
+
+ ins->ops = ops;
+ arch->nr_instructions++;
+
+ ins__sort(arch);
+ return 0;
+}
+
+#include "arch/arc/annotate/instructions.c"
+#include "arch/arm/annotate/instructions.c"
+#include "arch/arm64/annotate/instructions.c"
+#include "arch/csky/annotate/instructions.c"
+#include "arch/loongarch/annotate/instructions.c"
+#include "arch/mips/annotate/instructions.c"
+#include "arch/x86/annotate/instructions.c"
+#include "arch/powerpc/annotate/instructions.c"
+#include "arch/riscv64/annotate/instructions.c"
+#include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
+
+static struct arch architectures[] = {
+ {
+ .name = "arc",
+ .init = arc__annotate_init,
+ },
+ {
+ .name = "arm",
+ .init = arm__annotate_init,
+ },
+ {
+ .name = "arm64",
+ .init = arm64__annotate_init,
+ },
+ {
+ .name = "csky",
+ .init = csky__annotate_init,
+ },
+ {
+ .name = "mips",
+ .init = mips__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "x86",
+ .init = x86__annotate_init,
+ .instructions = x86__instructions,
+ .nr_instructions = ARRAY_SIZE(x86__instructions),
+ .insn_suffix = "bwlq",
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "powerpc",
+ .init = powerpc__annotate_init,
+ },
+ {
+ .name = "riscv64",
+ .init = riscv64__annotate_init,
+ },
+ {
+ .name = "s390",
+ .init = s390__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "sparc",
+ .init = sparc__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "loongarch",
+ .init = loongarch__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+};
+
+static void ins__delete(struct ins_operands *ops)
+{
+ if (ops == NULL)
+ return;
+ zfree(&ops->source.raw);
+ zfree(&ops->source.name);
+ zfree(&ops->target.raw);
+ zfree(&ops->target.name);
+}
+
+static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw);
+}
+
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ if (ins->ops->scnprintf)
+ return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name);
+
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+}
+
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+ if (!arch || !arch->ins_is_fused)
+ return false;
+
+ return arch->ins_is_fused(arch, ins1, ins2);
+}
+
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ char *endptr, *tok, *name;
+ struct map *map = ms->map;
+ struct addr_map_symbol target = {
+ .ms = { .map = map, },
+ };
+
+ ops->target.addr = strtoull(ops->raw, &endptr, 16);
+
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ goto indirect_call;
+
+ name++;
+
+ if (arch->objdump.skip_functions_char &&
+ strchr(name, arch->objdump.skip_functions_char))
+ return -1;
+
+ tok = strchr(name, '>');
+ if (tok == NULL)
+ return -1;
+
+ *tok = '\0';
+ ops->target.name = strdup(name);
+ *tok = '>';
+
+ if (ops->target.name == NULL)
+ return -1;
+find_target:
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+
+ if (maps__find_ams(ms->maps, &target) == 0 &&
+ map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.ms.sym;
+
+ return 0;
+
+indirect_call:
+ tok = strchr(endptr, '*');
+ if (tok != NULL) {
+ endptr++;
+
+ /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
+ * Do not parse such instruction. */
+ if (strstr(endptr, "(%r") == NULL)
+ ops->target.addr = strtoull(endptr, NULL, 16);
+ }
+ goto find_target;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ if (ops->target.sym)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
+
+ if (ops->target.addr == 0)
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+ if (ops->target.name)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name);
+
+ return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr);
+}
+
+static struct ins_ops call_ops = {
+ .parse = call__parse,
+ .scnprintf = call__scnprintf,
+};
+
+bool ins__is_call(const struct ins *ins)
+{
+ return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops;
+}
+
+/*
+ * Prevents from matching commas in the comment section, e.g.:
+ * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
+ *
+ * and skip comma as part of function arguments, e.g.:
+ * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc>
+ */
+static inline const char *validate_comma(const char *c, struct ins_operands *ops)
+{
+ if (ops->raw_comment && c > ops->raw_comment)
+ return NULL;
+
+ if (ops->raw_func_start && c > ops->raw_func_start)
+ return NULL;
+
+ return c;
+}
+
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ struct map *map = ms->map;
+ struct symbol *sym = ms->sym;
+ struct addr_map_symbol target = {
+ .ms = { .map = map, },
+ };
+ const char *c = strchr(ops->raw, ',');
+ u64 start, end;
+
+ ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+ ops->raw_func_start = strchr(ops->raw, '<');
+
+ c = validate_comma(c, ops);
+
+ /*
+ * Examples of lines to parse for the _cpp_lex_token@@Base
+ * function:
+ *
+ * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92>
+ * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72>
+ *
+ * The first is a jump to an offset inside the same function,
+ * the second is to another function, i.e. that 0xa72 is an
+ * offset in the cpp_named_operator2name@@base function.
+ */
+ /*
+ * skip over possible up to 2 operands to get to address, e.g.:
+ * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+ */
+ if (c++ != NULL) {
+ ops->target.addr = strtoull(c, NULL, 16);
+ if (!ops->target.addr) {
+ c = strchr(c, ',');
+ c = validate_comma(c, ops);
+ if (c++ != NULL)
+ ops->target.addr = strtoull(c, NULL, 16);
+ }
+ } else {
+ ops->target.addr = strtoull(ops->raw, NULL, 16);
+ }
+
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+ start = map__unmap_ip(map, sym->start);
+ end = map__unmap_ip(map, sym->end);
+
+ ops->target.outside = target.addr < start || target.addr > end;
+
+ /*
+ * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
+
+ cpp_named_operator2name@@Base+0xa72
+
+ * Point to a place that is after the cpp_named_operator2name
+ * boundaries, i.e. in the ELF symbol table for cc1
+ * cpp_named_operator2name is marked as being 32-bytes long, but it in
+ * fact is much larger than that, so we seem to need a symbols__find()
+ * routine that looks for >= current->start and < next_symbol->start,
+ * possibly just for C++ objects?
+ *
+ * For now lets just make some progress by marking jumps to outside the
+ * current function as call like.
+ *
+ * Actual navigation will come next, with further understanding of how
+ * the symbol searching and disassembly should be done.
+ */
+ if (maps__find_ams(ms->maps, &target) == 0 &&
+ map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.ms.sym;
+
+ if (!ops->target.outside) {
+ ops->target.offset = target.addr - start;
+ ops->target.offset_avail = true;
+ } else {
+ ops->target.offset_avail = false;
+ }
+
+ return 0;
+}
+
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ const char *c;
+
+ if (!ops->target.addr || ops->target.offset < 0)
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+ if (ops->target.outside && ops->target.sym != NULL)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
+
+ c = strchr(ops->raw, ',');
+ c = validate_comma(c, ops);
+
+ if (c != NULL) {
+ const char *c2 = strchr(c + 1, ',');
+
+ c2 = validate_comma(c2, ops);
+ /* check for 3-op insn */
+ if (c2 != NULL)
+ c = c2;
+ c++;
+
+ /* mirror arch objdump's space-after-comma style */
+ if (*c == ' ')
+ c++;
+ }
+
+ return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name,
+ ins->name, c ? c - ops->raw : 0, ops->raw,
+ ops->target.offset);
+}
+
+static struct ins_ops jump_ops = {
+ .parse = jump__parse,
+ .scnprintf = jump__scnprintf,
+};
+
+bool ins__is_jump(const struct ins *ins)
+{
+ return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops;
+}
+
+static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
+{
+ char *endptr, *name, *t;
+
+ if (strstr(raw, "(%rip)") == NULL)
+ return 0;
+
+ *addrp = strtoull(comment, &endptr, 16);
+ if (endptr == comment)
+ return 0;
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ return -1;
+
+ name++;
+
+ t = strchr(name, '>');
+ if (t == NULL)
+ return 0;
+
+ *t = '\0';
+ *namep = strdup(name);
+ *t = '>';
+
+ return 0;
+}
+
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
+ if (ops->locked.ops == NULL)
+ return 0;
+
+ if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
+ goto out_free_ops;
+
+ ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
+
+ if (ops->locked.ins.ops == NULL)
+ goto out_free_ops;
+
+ if (ops->locked.ins.ops->parse &&
+ ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
+ goto out_free_ops;
+
+ return 0;
+
+out_free_ops:
+ zfree(&ops->locked.ops);
+ return 0;
+}
+
+static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ int printed;
+
+ if (ops->locked.ins.ops == NULL)
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+ printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name);
+ return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
+ size - printed, ops->locked.ops, max_ins_name);
+}
+
+static void lock__delete(struct ins_operands *ops)
+{
+ struct ins *ins = &ops->locked.ins;
+
+ if (ins->ops && ins->ops->free)
+ ins->ops->free(ops->locked.ops);
+ else
+ ins__delete(ops->locked.ops);
+
+ zfree(&ops->locked.ops);
+ zfree(&ops->target.raw);
+ zfree(&ops->target.name);
+}
+
+static struct ins_ops lock_ops = {
+ .free = lock__delete,
+ .parse = lock__parse,
+ .scnprintf = lock__scnprintf,
+};
+
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+ char *s = strchr(ops->raw, ','), *target, *comment, prev;
+
+ if (s == NULL)
+ return -1;
+
+ *s = '\0';
+
+ /*
+ * x86 SIB addressing has something like 0x8(%rax, %rcx, 1)
+ * then it needs to have the closing parenthesis.
+ */
+ if (strchr(ops->raw, '(')) {
+ *s = ',';
+ s = strchr(ops->raw, ')');
+ if (s == NULL || s[1] != ',')
+ return -1;
+ *++s = '\0';
+ }
+
+ ops->source.raw = strdup(ops->raw);
+ *s = ',';
+
+ if (ops->source.raw == NULL)
+ return -1;
+
+ target = skip_spaces(++s);
+ comment = strchr(s, arch->objdump.comment_char);
+
+ if (comment != NULL)
+ s = comment - 1;
+ else
+ s = strchr(s, '\0') - 1;
+
+ while (s > target && isspace(s[0]))
+ --s;
+ s++;
+ prev = *s;
+ *s = '\0';
+
+ ops->target.raw = strdup(target);
+ *s = prev;
+
+ if (ops->target.raw == NULL)
+ goto out_free_source;
+
+ if (comment == NULL)
+ return 0;
+
+ comment = skip_spaces(comment);
+ comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+ return 0;
+
+out_free_source:
+ zfree(&ops->source.raw);
+ return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name,
+ ops->source.name ?: ops->source.raw,
+ ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops mov_ops = {
+ .parse = mov__parse,
+ .scnprintf = mov__scnprintf,
+};
+
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+ char *target, *comment, *s, prev;
+
+ target = s = ops->raw;
+
+ while (s[0] != '\0' && !isspace(s[0]))
+ ++s;
+ prev = *s;
+ *s = '\0';
+
+ ops->target.raw = strdup(target);
+ *s = prev;
+
+ if (ops->target.raw == NULL)
+ return -1;
+
+ comment = strchr(s, arch->objdump.comment_char);
+ if (comment == NULL)
+ return 0;
+
+ comment = skip_spaces(comment);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+ return 0;
+}
+
+static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
+ ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops dec_ops = {
+ .parse = dec__parse,
+ .scnprintf = dec__scnprintf,
+};
+
+static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
+ struct ins_operands *ops __maybe_unused, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s", max_ins_name, "nop");
+}
+
+static struct ins_ops nop_ops = {
+ .scnprintf = nop__scnprintf,
+};
+
+static struct ins_ops ret_ops = {
+ .scnprintf = ins__raw_scnprintf,
+};
+
+bool ins__is_ret(const struct ins *ins)
+{
+ return ins->ops == &ret_ops;
+}
+
+bool ins__is_lock(const struct ins *ins)
+{
+ return ins->ops == &lock_ops;
+}
+
+static int ins__key_cmp(const void *name, const void *insp)
+{
+ const struct ins *ins = insp;
+
+ return strcmp(name, ins->name);
+}
+
+static int ins__cmp(const void *a, const void *b)
+{
+ const struct ins *ia = a;
+ const struct ins *ib = b;
+
+ return strcmp(ia->name, ib->name);
+}
+
+static void ins__sort(struct arch *arch)
+{
+ const int nmemb = arch->nr_instructions;
+
+ qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
+}
+
+static struct ins_ops *__ins__find(struct arch *arch, const char *name)
+{
+ struct ins *ins;
+ const int nmemb = arch->nr_instructions;
+
+ if (!arch->sorted_instructions) {
+ ins__sort(arch);
+ arch->sorted_instructions = true;
+ }
+
+ ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ if (ins)
+ return ins->ops;
+
+ if (arch->insn_suffix) {
+ char tmp[32];
+ char suffix;
+ size_t len = strlen(name);
+
+ if (len == 0 || len >= sizeof(tmp))
+ return NULL;
+
+ suffix = name[len - 1];
+ if (strchr(arch->insn_suffix, suffix) == NULL)
+ return NULL;
+
+ strcpy(tmp, name);
+ tmp[len - 1] = '\0'; /* remove the suffix and check again */
+
+ ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ }
+ return ins ? ins->ops : NULL;
+}
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name)
+{
+ struct ins_ops *ops = __ins__find(arch, name);
+
+ if (!ops && arch->associate_instruction_ops)
+ ops = arch->associate_instruction_ops(arch, name);
+
+ return ops;
+}
+
+static int arch__key_cmp(const void *name, const void *archp)
+{
+ const struct arch *arch = archp;
+
+ return strcmp(name, arch->name);
+}
+
+static int arch__cmp(const void *a, const void *b)
+{
+ const struct arch *aa = a;
+ const struct arch *ab = b;
+
+ return strcmp(aa->name, ab->name);
+}
+
+static void arch__sort(void)
+{
+ const int nmemb = ARRAY_SIZE(architectures);
+
+ qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
+}
+
+static struct arch *arch__find(const char *name)
+{
+ const int nmemb = ARRAY_SIZE(architectures);
+ static bool sorted;
+
+ if (!sorted) {
+ arch__sort();
+ sorted = true;
+ }
+
+ return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
+}
+
+static struct annotated_source *annotated_source__new(void)
+{
+ struct annotated_source *src = zalloc(sizeof(*src));
+
+ if (src != NULL)
+ INIT_LIST_HEAD(&src->source);
+
+ return src;
+}
+
+static __maybe_unused void annotated_source__delete(struct annotated_source *src)
+{
+ if (src == NULL)
+ return;
+ zfree(&src->histograms);
+ zfree(&src->cycles_hist);
+ free(src);
+}
+
+static int annotated_source__alloc_histograms(struct annotated_source *src,
+ size_t size, int nr_hists)
+{
+ size_t sizeof_sym_hist;
+
+ /*
+ * Add buffer of one element for zero length symbol.
+ * When sample is taken from first instruction of
+ * zero length symbol, perf still resolves it and
+ * shows symbol name in perf report and allows to
+ * annotate it.
+ */
+ if (size == 0)
+ size = 1;
+
+ /* Check for overflow when calculating sizeof_sym_hist */
+ if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry))
+ return -1;
+
+ sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
+
+ /* Check for overflow in zalloc argument */
+ if (sizeof_sym_hist > SIZE_MAX / nr_hists)
+ return -1;
+
+ src->sizeof_sym_hist = sizeof_sym_hist;
+ src->nr_histograms = nr_hists;
+ src->histograms = calloc(nr_hists, sizeof_sym_hist) ;
+ return src->histograms ? 0 : -1;
+}
+
+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ const size_t size = symbol__size(sym);
+
+ notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+ if (notes->src->cycles_hist == NULL)
+ return -1;
+ return 0;
+}
+
+void symbol__annotate_zero_histograms(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ annotation__lock(notes);
+ if (notes->src != NULL) {
+ memset(notes->src->histograms, 0,
+ notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+ if (notes->src->cycles_hist)
+ memset(notes->src->cycles_hist, 0,
+ symbol__size(sym) * sizeof(struct cyc_hist));
+ }
+ annotation__unlock(notes);
+}
+
+static int __symbol__account_cycles(struct cyc_hist *ch,
+ u64 start,
+ unsigned offset, unsigned cycles,
+ unsigned have_start)
+{
+ /*
+ * For now we can only account one basic block per
+ * final jump. But multiple could be overlapping.
+ * Always account the longest one. So when
+ * a shorter one has been already seen throw it away.
+ *
+ * We separately always account the full cycles.
+ */
+ ch[offset].num_aggr++;
+ ch[offset].cycles_aggr += cycles;
+
+ if (cycles > ch[offset].cycles_max)
+ ch[offset].cycles_max = cycles;
+
+ if (ch[offset].cycles_min) {
+ if (cycles && cycles < ch[offset].cycles_min)
+ ch[offset].cycles_min = cycles;
+ } else
+ ch[offset].cycles_min = cycles;
+
+ if (!have_start && ch[offset].have_start)
+ return 0;
+ if (ch[offset].num) {
+ if (have_start && (!ch[offset].have_start ||
+ ch[offset].start > start)) {
+ ch[offset].have_start = 0;
+ ch[offset].cycles = 0;
+ ch[offset].num = 0;
+ if (ch[offset].reset < 0xffff)
+ ch[offset].reset++;
+ } else if (have_start &&
+ ch[offset].start < start)
+ return 0;
+ }
+
+ if (ch[offset].num < NUM_SPARKS)
+ ch[offset].cycles_spark[ch[offset].num] = cycles;
+
+ ch[offset].have_start = have_start;
+ ch[offset].start = start;
+ ch[offset].cycles += cycles;
+ ch[offset].num++;
+ return 0;
+}
+
+static int __symbol__inc_addr_samples(struct map_symbol *ms,
+ struct annotated_source *src, int evidx, u64 addr,
+ struct perf_sample *sample)
+{
+ struct symbol *sym = ms->sym;
+ unsigned offset;
+ struct sym_hist *h;
+
+ pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map__unmap_ip(ms->map, addr));
+
+ if ((addr < sym->start || addr >= sym->end) &&
+ (addr != sym->end || sym->start != sym->end)) {
+ pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n",
+ __func__, __LINE__, sym->name, sym->start, addr, sym->end);
+ return -ERANGE;
+ }
+
+ offset = addr - sym->start;
+ h = annotated_source__histogram(src, evidx);
+ if (h == NULL) {
+ pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n",
+ __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
+ return -ENOMEM;
+ }
+ h->nr_samples++;
+ h->addr[offset].nr_samples++;
+ h->period += sample->period;
+ h->addr[offset].period += sample->period;
+
+ pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
+ ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
+ sym->start, sym->name, addr, addr - sym->start, evidx,
+ h->addr[offset].nr_samples, h->addr[offset].period);
+ return 0;
+}
+
+static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ if (notes->src == NULL) {
+ notes->src = annotated_source__new();
+ if (notes->src == NULL)
+ return NULL;
+ goto alloc_cycles_hist;
+ }
+
+ if (!notes->src->cycles_hist) {
+alloc_cycles_hist:
+ symbol__alloc_hist_cycles(sym);
+ }
+
+ return notes->src->cycles_hist;
+}
+
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ if (notes->src == NULL) {
+ notes->src = annotated_source__new();
+ if (notes->src == NULL)
+ return NULL;
+ goto alloc_histograms;
+ }
+
+ if (notes->src->histograms == NULL) {
+alloc_histograms:
+ annotated_source__alloc_histograms(notes->src, symbol__size(sym),
+ nr_hists);
+ }
+
+ return notes->src;
+}
+
+static int symbol__inc_addr_samples(struct map_symbol *ms,
+ struct evsel *evsel, u64 addr,
+ struct perf_sample *sample)
+{
+ struct symbol *sym = ms->sym;
+ struct annotated_source *src;
+
+ if (sym == NULL)
+ return 0;
+ src = symbol__hists(sym, evsel->evlist->core.nr_entries);
+ return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0;
+}
+
+static int symbol__account_cycles(u64 addr, u64 start,
+ struct symbol *sym, unsigned cycles)
+{
+ struct cyc_hist *cycles_hist;
+ unsigned offset;
+
+ if (sym == NULL)
+ return 0;
+ cycles_hist = symbol__cycles_hist(sym);
+ if (cycles_hist == NULL)
+ return -ENOMEM;
+ if (addr < sym->start || addr >= sym->end)
+ return -ERANGE;
+
+ if (start) {
+ if (start < sym->start || start >= sym->end)
+ return -ERANGE;
+ if (start >= addr)
+ start = 0;
+ }
+ offset = addr - sym->start;
+ return __symbol__account_cycles(cycles_hist,
+ start ? start - sym->start : 0,
+ offset, cycles,
+ !!start);
+}
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles)
+{
+ u64 saddr = 0;
+ int err;
+
+ if (!cycles)
+ return 0;
+
+ /*
+ * Only set start when IPC can be computed. We can only
+ * compute it when the basic block is completely in a single
+ * function.
+ * Special case the case when the jump is elsewhere, but
+ * it starts on the function start.
+ */
+ if (start &&
+ (start->ms.sym == ams->ms.sym ||
+ (ams->ms.sym &&
+ start->addr == ams->ms.sym->start + map__start(ams->ms.map))))
+ saddr = start->al_addr;
+ if (saddr == 0)
+ pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
+ ams->addr,
+ start ? start->addr : 0,
+ ams->ms.sym ? ams->ms.sym->start + map__start(ams->ms.map) : 0,
+ saddr);
+ err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles);
+ if (err)
+ pr_debug2("account_cycles failed %d\n", err);
+ return err;
+}
+
+static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end)
+{
+ unsigned n_insn = 0;
+ u64 offset;
+
+ for (offset = start; offset <= end; offset++) {
+ if (notes->offsets[offset])
+ n_insn++;
+ }
+ return n_insn;
+}
+
+static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch)
+{
+ unsigned n_insn;
+ unsigned int cover_insn = 0;
+ u64 offset;
+
+ n_insn = annotation__count_insn(notes, start, end);
+ if (n_insn && ch->num && ch->cycles) {
+ float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+ /* Hide data when there are too many overlaps. */
+ if (ch->reset >= 0x7fff)
+ return;
+
+ for (offset = start; offset <= end; offset++) {
+ struct annotation_line *al = notes->offsets[offset];
+
+ if (al && al->ipc == 0.0) {
+ al->ipc = ipc;
+ cover_insn++;
+ }
+ }
+
+ if (cover_insn) {
+ notes->hit_cycles += ch->cycles;
+ notes->hit_insn += n_insn * ch->num;
+ notes->cover_insn += cover_insn;
+ }
+ }
+}
+
+void annotation__compute_ipc(struct annotation *notes, size_t size)
+{
+ s64 offset;
+
+ if (!notes->src || !notes->src->cycles_hist)
+ return;
+
+ notes->total_insn = annotation__count_insn(notes, 0, size - 1);
+ notes->hit_cycles = 0;
+ notes->hit_insn = 0;
+ notes->cover_insn = 0;
+
+ annotation__lock(notes);
+ for (offset = size - 1; offset >= 0; --offset) {
+ struct cyc_hist *ch;
+
+ ch = &notes->src->cycles_hist[offset];
+ if (ch && ch->cycles) {
+ struct annotation_line *al;
+
+ if (ch->have_start)
+ annotation__count_and_fill(notes, ch->start, offset, ch);
+ al = notes->offsets[offset];
+ if (al && ch->num_aggr) {
+ al->cycles = ch->cycles_aggr / ch->num_aggr;
+ al->cycles_max = ch->cycles_max;
+ al->cycles_min = ch->cycles_min;
+ }
+ notes->have_cycles = true;
+ }
+ }
+ annotation__unlock(notes);
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+ struct evsel *evsel)
+{
+ return symbol__inc_addr_samples(&ams->ms, evsel, ams->al_addr, sample);
+}
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+ struct evsel *evsel, u64 ip)
+{
+ return symbol__inc_addr_samples(&he->ms, evsel, ip, sample);
+}
+
+static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
+{
+ dl->ins.ops = ins__find(arch, dl->ins.name);
+
+ if (!dl->ins.ops)
+ return;
+
+ if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
+ dl->ins.ops = NULL;
+}
+
+static int disasm_line__parse(char *line, const char **namep, char **rawp)
+{
+ char tmp, *name = skip_spaces(line);
+
+ if (name[0] == '\0')
+ return -1;
+
+ *rawp = name + 1;
+
+ while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
+ ++*rawp;
+
+ tmp = (*rawp)[0];
+ (*rawp)[0] = '\0';
+ *namep = strdup(name);
+
+ if (*namep == NULL)
+ goto out;
+
+ (*rawp)[0] = tmp;
+ *rawp = strim(*rawp);
+
+ return 0;
+
+out:
+ return -1;
+}
+
+struct annotate_args {
+ struct arch *arch;
+ struct map_symbol ms;
+ struct evsel *evsel;
+ struct annotation_options *options;
+ s64 offset;
+ char *line;
+ int line_nr;
+ char *fileloc;
+};
+
+static void annotation_line__init(struct annotation_line *al,
+ struct annotate_args *args,
+ int nr)
+{
+ al->offset = args->offset;
+ al->line = strdup(args->line);
+ al->line_nr = args->line_nr;
+ al->fileloc = args->fileloc;
+ al->data_nr = nr;
+}
+
+static void annotation_line__exit(struct annotation_line *al)
+{
+ zfree_srcline(&al->path);
+ zfree(&al->line);
+}
+
+static size_t disasm_line_size(int nr)
+{
+ struct annotation_line *al;
+
+ return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
+}
+
+/*
+ * Allocating the disasm annotation line data with
+ * following structure:
+ *
+ * -------------------------------------------
+ * struct disasm_line | struct annotation_line
+ * -------------------------------------------
+ *
+ * We have 'struct annotation_line' member as last member
+ * of 'struct disasm_line' to have an easy access.
+ */
+static struct disasm_line *disasm_line__new(struct annotate_args *args)
+{
+ struct disasm_line *dl = NULL;
+ int nr = 1;
+
+ if (evsel__is_group_event(args->evsel))
+ nr = args->evsel->core.nr_members;
+
+ dl = zalloc(disasm_line_size(nr));
+ if (!dl)
+ return NULL;
+
+ annotation_line__init(&dl->al, args, nr);
+ if (dl->al.line == NULL)
+ goto out_delete;
+
+ if (args->offset != -1) {
+ if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ goto out_free_line;
+
+ disasm_line__init_ins(dl, args->arch, &args->ms);
+ }
+
+ return dl;
+
+out_free_line:
+ zfree(&dl->al.line);
+out_delete:
+ free(dl);
+ return NULL;
+}
+
+void disasm_line__free(struct disasm_line *dl)
+{
+ if (dl->ins.ops && dl->ins.ops->free)
+ dl->ins.ops->free(&dl->ops);
+ else
+ ins__delete(&dl->ops);
+ zfree(&dl->ins.name);
+ annotation_line__exit(&dl->al);
+ free(dl);
+}
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
+{
+ if (raw || !dl->ins.ops)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw);
+
+ return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
+}
+
+void annotation__exit(struct annotation *notes)
+{
+ annotated_source__delete(notes->src);
+}
+
+static struct sharded_mutex *sharded_mutex;
+
+static void annotation__init_sharded_mutex(void)
+{
+ /* As many mutexes as there are CPUs. */
+ sharded_mutex = sharded_mutex__new(cpu__max_present_cpu().cpu);
+}
+
+static size_t annotation__hash(const struct annotation *notes)
+{
+ return (size_t)notes;
+}
+
+static struct mutex *annotation__get_mutex(const struct annotation *notes)
+{
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&once, annotation__init_sharded_mutex);
+ if (!sharded_mutex)
+ return NULL;
+
+ return sharded_mutex__get_mutex(sharded_mutex, annotation__hash(notes));
+}
+
+void annotation__lock(struct annotation *notes)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ struct mutex *mutex = annotation__get_mutex(notes);
+
+ if (mutex)
+ mutex_lock(mutex);
+}
+
+void annotation__unlock(struct annotation *notes)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ struct mutex *mutex = annotation__get_mutex(notes);
+
+ if (mutex)
+ mutex_unlock(mutex);
+}
+
+bool annotation__trylock(struct annotation *notes)
+{
+ struct mutex *mutex = annotation__get_mutex(notes);
+
+ if (!mutex)
+ return false;
+
+ return mutex_trylock(mutex);
+}
+
+
+static void annotation_line__add(struct annotation_line *al, struct list_head *head)
+{
+ list_add_tail(&al->node, head);
+}
+
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head)
+{
+ list_for_each_entry_continue(pos, head, node)
+ if (pos->offset >= 0)
+ return pos;
+
+ return NULL;
+}
+
+static const char *annotate__address_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark red for >75% coverage */
+ if (cov > 0.75)
+ return PERF_COLOR_RED;
+
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+ bool emit_comment = true;
+
+ if (!br)
+ return;
+
+#if 1
+ if (br->is_target && br->start == addr) {
+ struct block_range *branch = br;
+ double p;
+
+ /*
+ * Find matching branch to our target.
+ */
+ while (!branch->is_branch)
+ branch = block_range__next(branch);
+
+ p = 100 *(double)br->entry / branch->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage joined at this target in relation
+ * to the next branch.
+ */
+ printf(" +%.2f%%", p);
+ }
+ }
+#endif
+ if (br->is_branch && br->end == addr) {
+ double p = 100*(double)br->taken / br->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage leaving at this branch, and
+ * its prediction ratio.
+ */
+ printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred / br->taken);
+ }
+ }
+}
+
+static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width)
+{
+ s64 offset = dl->al.offset;
+ const u64 addr = start + offset;
+ struct block_range *br;
+
+ br = block_range__find(addr);
+ color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr);
+ color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line);
+ annotate__branch_printf(br, addr);
+ return 0;
+}
+
+static int
+annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
+ struct evsel *evsel, u64 len, int min_pcnt, int printed,
+ int max_lines, struct annotation_line *queue, int addr_fmt_width,
+ int percent_type)
+{
+ struct disasm_line *dl = container_of(al, struct disasm_line, al);
+ static const char *prev_line;
+
+ if (al->offset != -1) {
+ double max_percent = 0.0;
+ int i, nr_percent = 1;
+ const char *color;
+ struct annotation *notes = symbol__annotation(sym);
+
+ for (i = 0; i < al->data_nr; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i],
+ percent_type);
+
+ if (percent > max_percent)
+ max_percent = percent;
+ }
+
+ if (al->data_nr > nr_percent)
+ nr_percent = al->data_nr;
+
+ if (max_percent < min_pcnt)
+ return -1;
+
+ if (max_lines && printed >= max_lines)
+ return 1;
+
+ if (queue != NULL) {
+ list_for_each_entry_from(queue, &notes->src->source, node) {
+ if (queue == al)
+ break;
+ annotation_line__print(queue, sym, start, evsel, len,
+ 0, 0, 1, NULL, addr_fmt_width,
+ percent_type);
+ }
+ }
+
+ color = get_percent_color(max_percent);
+
+ for (i = 0; i < nr_percent; i++) {
+ struct annotation_data *data = &al->data[i];
+ double percent;
+
+ percent = annotation_data__percent(data, percent_type);
+ color = get_percent_color(percent);
+
+ if (symbol_conf.show_total_period)
+ color_fprintf(stdout, color, " %11" PRIu64,
+ data->he.period);
+ else if (symbol_conf.show_nr_samples)
+ color_fprintf(stdout, color, " %7" PRIu64,
+ data->he.nr_samples);
+ else
+ color_fprintf(stdout, color, " %7.2f", percent);
+ }
+
+ printf(" : ");
+
+ disasm_line__print(dl, start, addr_fmt_width);
+
+ /*
+ * Also color the filename and line if needed, with
+ * the same color than the percentage. Don't print it
+ * twice for close colored addr with the same filename:line
+ */
+ if (al->path) {
+ if (!prev_line || strcmp(prev_line, al->path)) {
+ color_fprintf(stdout, color, " // %s", al->path);
+ prev_line = al->path;
+ }
+ }
+
+ printf("\n");
+ } else if (max_lines && printed >= max_lines)
+ return 1;
+ else {
+ int width = symbol_conf.show_total_period ? 12 : 8;
+
+ if (queue)
+ return -1;
+
+ if (evsel__is_group_event(evsel))
+ width *= evsel->core.nr_members;
+
+ if (!*al->line)
+ printf(" %*s:\n", width, " ");
+ else
+ printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line);
+ }
+
+ return 0;
+}
+
+/*
+ * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
+ * which looks like following
+ *
+ * 0000000000415500 <_init>:
+ * 415500: sub $0x8,%rsp
+ * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8>
+ * 41550b: test %rax,%rax
+ * 41550e: je 415515 <_init+0x15>
+ * 415510: callq 416e70 <__gmon_start__@plt>
+ * 415515: add $0x8,%rsp
+ * 415519: retq
+ *
+ * it will be parsed and saved into struct disasm_line as
+ * <offset> <name> <ops.raw>
+ *
+ * The offset will be a relative offset from the start of the symbol and -1
+ * means that it's not a disassembly line so should be treated differently.
+ * The ops.raw part will be parsed further according to type of the instruction.
+ */
+static int symbol__parse_objdump_line(struct symbol *sym,
+ struct annotate_args *args,
+ char *parsed_line, int *line_nr, char **fileloc)
+{
+ struct map *map = args->ms.map;
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+ char *tmp;
+ s64 line_ip, offset = -1;
+ regmatch_t match[2];
+
+ /* /filename:linenr ? Save line number and ignore. */
+ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
+ *line_nr = atoi(parsed_line + match[1].rm_so);
+ free(*fileloc);
+ *fileloc = strdup(parsed_line);
+ return 0;
+ }
+
+ /* Process hex address followed by ':'. */
+ line_ip = strtoull(parsed_line, &tmp, 16);
+ if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') {
+ u64 start = map__rip_2objdump(map, sym->start),
+ end = map__rip_2objdump(map, sym->end);
+
+ offset = line_ip - start;
+ if ((u64)line_ip < start || (u64)line_ip >= end)
+ offset = -1;
+ else
+ parsed_line = tmp + 1;
+ }
+
+ args->offset = offset;
+ args->line = parsed_line;
+ args->line_nr = *line_nr;
+ args->fileloc = *fileloc;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ (*line_nr)++;
+
+ if (dl == NULL)
+ return -1;
+
+ if (!disasm_line__has_local_offset(dl)) {
+ dl->ops.target.offset = dl->ops.target.addr -
+ map__rip_2objdump(map, sym->start);
+ dl->ops.target.offset_avail = true;
+ }
+
+ /* kcore has no symbols, so add the call target symbol */
+ if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
+ struct addr_map_symbol target = {
+ .addr = dl->ops.target.addr,
+ .ms = { .map = map, },
+ };
+
+ if (!maps__find_ams(args->ms.maps, &target) &&
+ target.ms.sym->start == target.al_addr)
+ dl->ops.target.sym = target.ms.sym;
+ }
+
+ annotation_line__add(&dl->al, &notes->src->source);
+ return 0;
+}
+
+static __attribute__((constructor)) void symbol__init_regexpr(void)
+{
+ regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
+}
+
+static void delete_last_nop(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct list_head *list = &notes->src->source;
+ struct disasm_line *dl;
+
+ while (!list_empty(list)) {
+ dl = list_entry(list->prev, struct disasm_line, al.node);
+
+ if (dl->ins.ops) {
+ if (dl->ins.ops != &nop_ops)
+ return;
+ } else {
+ if (!strstr(dl->al.line, " nop ") &&
+ !strstr(dl->al.line, " nopl ") &&
+ !strstr(dl->al.line, " nopw "))
+ return;
+ }
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+}
+
+int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen)
+{
+ struct dso *dso = map__dso(ms->map);
+
+ BUG_ON(buflen == 0);
+
+ if (errnum >= 0) {
+ str_error_r(errnum, buf, buflen);
+ return 0;
+ }
+
+ switch (errnum) {
+ case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
+ char bf[SBUILD_ID_SIZE + 15] = " with build id ";
+ char *build_id_msg = NULL;
+
+ if (dso->has_build_id) {
+ build_id__sprintf(&dso->bid, bf + 15);
+ build_id_msg = bf;
+ }
+ scnprintf(buf, buflen,
+ "No vmlinux file%s\nwas found in the path.\n\n"
+ "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
+ "Please use:\n\n"
+ " perf buildid-cache -vu vmlinux\n\n"
+ "or:\n\n"
+ " --vmlinux vmlinux\n", build_id_msg ?: "");
+ }
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
+ scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
+ scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
+ scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
+ scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name);
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
+ scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
+ dso->long_name);
+ break;
+ default:
+ scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
+ break;
+ }
+
+ return 0;
+}
+
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
+{
+ char linkname[PATH_MAX];
+ char *build_id_filename;
+ char *build_id_path = NULL;
+ char *pos;
+ int len;
+
+ if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+ !dso__is_kcore(dso))
+ return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+ build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
+ if (build_id_filename) {
+ __symbol__join_symfs(filename, filename_size, build_id_filename);
+ free(build_id_filename);
+ } else {
+ if (dso->has_build_id)
+ return ENOMEM;
+ goto fallback;
+ }
+
+ build_id_path = strdup(filename);
+ if (!build_id_path)
+ return ENOMEM;
+
+ /*
+ * old style build-id cache has name of XX/XXXXXXX.. while
+ * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+ * extract the build-id part of dirname in the new style only.
+ */
+ pos = strrchr(build_id_path, '/');
+ if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+ dirname(build_id_path);
+
+ if (dso__is_kcore(dso))
+ goto fallback;
+
+ len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
+ if (len < 0)
+ goto fallback;
+
+ linkname[len] = '\0';
+ if (strstr(linkname, DSO__NAME_KALLSYMS) ||
+ access(filename, R_OK)) {
+fallback:
+ /*
+ * If we don't have build-ids or the build-id file isn't in the
+ * cache, or is just a kallsyms file, well, lets hope that this
+ * DSO is the same as when 'perf record' ran.
+ */
+ if (dso->kernel && dso->long_name[0] == '/')
+ snprintf(filename, filename_size, "%s", dso->long_name);
+ else
+ __symbol__join_symfs(filename, filename_size, dso->long_name);
+
+ mutex_lock(&dso->lock);
+ if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
+ char *new_name = dso__filename_with_chroot(dso, filename);
+ if (new_name) {
+ strlcpy(filename, new_name, filename_size);
+ free(new_name);
+ }
+ }
+ mutex_unlock(&dso->lock);
+ }
+
+ free(build_id_path);
+ return 0;
+}
+
+#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+#define PACKAGE "perf"
+#include <bfd.h>
+#include <dis-asm.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <tools/dis-asm-compat.h>
+
+static int symbol__disassemble_bpf(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct annotation_options *opts = args->options;
+ struct bpf_prog_linfo *prog_linfo = NULL;
+ struct bpf_prog_info_node *info_node;
+ int len = sym->end - sym->start;
+ disassembler_ftype disassemble;
+ struct map *map = args->ms.map;
+ struct perf_bpil *info_linear;
+ struct disassemble_info info;
+ struct dso *dso = map__dso(map);
+ int pc = 0, count, sub_id;
+ struct btf *btf = NULL;
+ char tpath[PATH_MAX];
+ size_t buf_size;
+ int nr_skip = 0;
+ char *buf;
+ bfd *bfdf;
+ int ret;
+ FILE *s;
+
+ if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
+
+ pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+ sym->name, sym->start, sym->end - sym->start);
+
+ memset(tpath, 0, sizeof(tpath));
+ perf_exe(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ if (bfdf == NULL)
+ abort();
+
+ if (!bfd_check_format(bfdf, bfd_object))
+ abort();
+
+ s = open_memstream(&buf, &buf_size);
+ if (!s) {
+ ret = errno;
+ goto out;
+ }
+ init_disassemble_info_compat(&info, s,
+ (fprintf_ftype) fprintf,
+ fprintf_styled);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+
+ info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
+ dso->bpf_prog.id);
+ if (!info_node) {
+ ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
+ goto out;
+ }
+ info_linear = info_node->info_linear;
+ sub_id = dso->bpf_prog.sub_id;
+
+ info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
+ info.buffer_length = info_linear->info.jited_prog_len;
+
+ if (info_linear->info.nr_line_info)
+ prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+
+ if (info_linear->info.btf_id) {
+ struct btf_node *node;
+
+ node = perf_env__find_btf(dso->bpf_prog.env,
+ info_linear->info.btf_id);
+ if (node)
+ btf = btf__new((__u8 *)(node->data),
+ node->data_size);
+ }
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ if (disassemble == NULL)
+ abort();
+
+ fflush(s);
+ do {
+ const struct bpf_line_info *linfo = NULL;
+ struct disasm_line *dl;
+ size_t prev_buf_size;
+ const char *srcline;
+ u64 addr;
+
+ addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
+ count = disassemble(pc, &info);
+
+ if (prog_linfo)
+ linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
+ addr, sub_id,
+ nr_skip);
+
+ if (linfo && btf) {
+ srcline = btf__name_by_offset(btf, linfo->line_off);
+ nr_skip++;
+ } else
+ srcline = NULL;
+
+ fprintf(s, "\n");
+ prev_buf_size = buf_size;
+ fflush(s);
+
+ if (!opts->hide_src_code && srcline) {
+ args->offset = -1;
+ args->line = strdup(srcline);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl) {
+ annotation_line__add(&dl->al,
+ &notes->src->source);
+ }
+ }
+
+ args->offset = pc;
+ args->line = buf + prev_buf_size;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc += count;
+ } while (count > 0 && pc < len);
+
+ ret = 0;
+out:
+ free(prog_linfo);
+ btf__free(btf);
+ fclose(s);
+ bfd_close(bfdf);
+ return ret;
+}
+#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
+ struct annotate_args *args __maybe_unused)
+{
+ return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
+}
+#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+
+static int
+symbol__disassemble_bpf_image(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ zfree(&args->line);
+ return 0;
+}
+
+/*
+ * Possibly create a new version of line with tabs expanded. Returns the
+ * existing or new line, storage is updated if a new line is allocated. If
+ * allocation fails then NULL is returned.
+ */
+static char *expand_tabs(char *line, char **storage, size_t *storage_len)
+{
+ size_t i, src, dst, len, new_storage_len, num_tabs;
+ char *new_line;
+ size_t line_len = strlen(line);
+
+ for (num_tabs = 0, i = 0; i < line_len; i++)
+ if (line[i] == '\t')
+ num_tabs++;
+
+ if (num_tabs == 0)
+ return line;
+
+ /*
+ * Space for the line and '\0', less the leading and trailing
+ * spaces. Each tab may introduce 7 additional spaces.
+ */
+ new_storage_len = line_len + 1 + (num_tabs * 7);
+
+ new_line = malloc(new_storage_len);
+ if (new_line == NULL) {
+ pr_err("Failure allocating memory for tab expansion\n");
+ return NULL;
+ }
+
+ /*
+ * Copy regions starting at src and expand tabs. If there are two
+ * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces
+ * are inserted.
+ */
+ for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) {
+ if (line[i] == '\t') {
+ len = i - src;
+ memcpy(&new_line[dst], &line[src], len);
+ dst += len;
+ new_line[dst++] = ' ';
+ while (dst % 8 != 0)
+ new_line[dst++] = ' ';
+ src = i + 1;
+ num_tabs--;
+ }
+ }
+
+ /* Expand the last region. */
+ len = line_len - src;
+ memcpy(&new_line[dst], &line[src], len);
+ dst += len;
+ new_line[dst] = '\0';
+
+ free(*storage);
+ *storage = new_line;
+ *storage_len = new_storage_len;
+ return new_line;
+
+}
+
+static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation_options *opts = args->options;
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ char *command;
+ FILE *file;
+ char symfs_filename[PATH_MAX];
+ struct kcore_extract kce;
+ bool delete_extract = false;
+ bool decomp = false;
+ int lineno = 0;
+ char *fileloc = NULL;
+ int nline;
+ char *line;
+ size_t line_len;
+ const char *objdump_argv[] = {
+ "/bin/sh",
+ "-c",
+ NULL, /* Will be the objdump command to run. */
+ "--",
+ NULL, /* Will be the symfs path. */
+ NULL,
+ };
+ struct child_process objdump_process;
+ int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+ if (err)
+ return err;
+
+ pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+ symfs_filename, sym->name, map__unmap_ip(map, sym->start),
+ map__unmap_ip(map, sym->end));
+
+ pr_debug("annotating [%p] %30s : [%p] %30s\n",
+ dso, dso->long_name, sym, sym->name);
+
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+ return symbol__disassemble_bpf(sym, args);
+ } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
+ return symbol__disassemble_bpf_image(sym, args);
+ } else if (dso__is_kcore(dso)) {
+ kce.kcore_filename = symfs_filename;
+ kce.addr = map__rip_2objdump(map, sym->start);
+ kce.offs = sym->start;
+ kce.len = sym->end - sym->start;
+ if (!kcore_extract__create(&kce)) {
+ delete_extract = true;
+ strlcpy(symfs_filename, kce.extract_filename,
+ sizeof(symfs_filename));
+ }
+ } else if (dso__needs_decompress(dso)) {
+ char tmp[KMOD_DECOMP_LEN];
+
+ if (dso__decompress_kmodule_path(dso, symfs_filename,
+ tmp, sizeof(tmp)) < 0)
+ return -1;
+
+ decomp = true;
+ strcpy(symfs_filename, tmp);
+ }
+
+ err = asprintf(&command,
+ "%s %s%s --start-address=0x%016" PRIx64
+ " --stop-address=0x%016" PRIx64
+ " -l -d %s %s %s %c%s%c %s%s -C \"$1\"",
+ opts->objdump_path ?: "objdump",
+ opts->disassembler_style ? "-M " : "",
+ opts->disassembler_style ?: "",
+ map__rip_2objdump(map, sym->start),
+ map__rip_2objdump(map, sym->end),
+ opts->show_asm_raw ? "" : "--no-show-raw-insn",
+ opts->annotate_src ? "-S" : "",
+ opts->prefix ? "--prefix " : "",
+ opts->prefix ? '"' : ' ',
+ opts->prefix ?: "",
+ opts->prefix ? '"' : ' ',
+ opts->prefix_strip ? "--prefix-strip=" : "",
+ opts->prefix_strip ?: "");
+
+ if (err < 0) {
+ pr_err("Failure allocating memory for the command to run\n");
+ goto out_remove_tmp;
+ }
+
+ pr_debug("Executing: %s\n", command);
+
+ objdump_argv[2] = command;
+ objdump_argv[4] = symfs_filename;
+
+ /* Create a pipe to read from for stdout */
+ memset(&objdump_process, 0, sizeof(objdump_process));
+ objdump_process.argv = objdump_argv;
+ objdump_process.out = -1;
+ objdump_process.err = -1;
+ objdump_process.no_stderr = 1;
+ if (start_command(&objdump_process)) {
+ pr_err("Failure starting to run %s\n", command);
+ err = -1;
+ goto out_free_command;
+ }
+
+ file = fdopen(objdump_process.out, "r");
+ if (!file) {
+ pr_err("Failure creating FILE stream for %s\n", command);
+ /*
+ * If we were using debug info should retry with
+ * original binary.
+ */
+ err = -1;
+ goto out_close_stdout;
+ }
+
+ /* Storage for getline. */
+ line = NULL;
+ line_len = 0;
+
+ nline = 0;
+ while (!feof(file)) {
+ const char *match;
+ char *expanded_line;
+
+ if (getline(&line, &line_len, file) < 0 || !line)
+ break;
+
+ /* Skip lines containing "filename:" */
+ match = strstr(line, symfs_filename);
+ if (match && match[strlen(symfs_filename)] == ':')
+ continue;
+
+ expanded_line = strim(line);
+ expanded_line = expand_tabs(expanded_line, &line, &line_len);
+ if (!expanded_line)
+ break;
+
+ /*
+ * The source code line number (lineno) needs to be kept in
+ * across calls to symbol__parse_objdump_line(), so that it
+ * can associate it with the instructions till the next one.
+ * See disasm_line__new() and struct disasm_line::line_nr.
+ */
+ if (symbol__parse_objdump_line(sym, args, expanded_line,
+ &lineno, &fileloc) < 0)
+ break;
+ nline++;
+ }
+ free(line);
+ free(fileloc);
+
+ err = finish_command(&objdump_process);
+ if (err)
+ pr_err("Error running %s\n", command);
+
+ if (nline == 0) {
+ err = -1;
+ pr_err("No output from %s\n", command);
+ }
+
+ /*
+ * kallsyms does not have symbol sizes so there may a nop at the end.
+ * Remove it.
+ */
+ if (dso__is_kcore(dso))
+ delete_last_nop(sym);
+
+ fclose(file);
+
+out_close_stdout:
+ close(objdump_process.out);
+
+out_free_command:
+ free(command);
+
+out_remove_tmp:
+ if (decomp)
+ unlink(symfs_filename);
+
+ if (delete_extract)
+ kcore_extract__delete(&kce);
+
+ return err;
+}
+
+static void calc_percent(struct sym_hist *sym_hist,
+ struct hists *hists,
+ struct annotation_data *data,
+ s64 offset, s64 end)
+{
+ unsigned int hits = 0;
+ u64 period = 0;
+
+ while (offset < end) {
+ hits += sym_hist->addr[offset].nr_samples;
+ period += sym_hist->addr[offset].period;
+ ++offset;
+ }
+
+ if (sym_hist->nr_samples) {
+ data->he.period = period;
+ data->he.nr_samples = hits;
+ data->percent[PERCENT_HITS_LOCAL] = 100.0 * hits / sym_hist->nr_samples;
+ }
+
+ if (hists->stats.nr_non_filtered_samples)
+ data->percent[PERCENT_HITS_GLOBAL] = 100.0 * hits / hists->stats.nr_non_filtered_samples;
+
+ if (sym_hist->period)
+ data->percent[PERCENT_PERIOD_LOCAL] = 100.0 * period / sym_hist->period;
+
+ if (hists->stats.total_period)
+ data->percent[PERCENT_PERIOD_GLOBAL] = 100.0 * period / hists->stats.total_period;
+}
+
+static void annotation__calc_percent(struct annotation *notes,
+ struct evsel *leader, s64 len)
+{
+ struct annotation_line *al, *next;
+ struct evsel *evsel;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ s64 end;
+ int i = 0;
+
+ if (al->offset == -1)
+ continue;
+
+ next = annotation_line__next(al, &notes->src->source);
+ end = next ? next->offset : len;
+
+ for_each_group_evsel(evsel, leader) {
+ struct hists *hists = evsel__hists(evsel);
+ struct annotation_data *data;
+ struct sym_hist *sym_hist;
+
+ BUG_ON(i >= al->data_nr);
+
+ sym_hist = annotation__histogram(notes, evsel->core.idx);
+ data = &al->data[i++];
+
+ calc_percent(sym_hist, hists, data, al->offset, end);
+ }
+ }
+}
+
+void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ annotation__calc_percent(notes, evsel, symbol__size(sym));
+}
+
+int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *options, struct arch **parch)
+{
+ struct symbol *sym = ms->sym;
+ struct annotation *notes = symbol__annotation(sym);
+ struct annotate_args args = {
+ .evsel = evsel,
+ .options = options,
+ };
+ struct perf_env *env = evsel__env(evsel);
+ const char *arch_name = perf_env__arch(env);
+ struct arch *arch;
+ int err;
+
+ if (!arch_name)
+ return errno;
+
+ args.arch = arch = arch__find(arch_name);
+ if (arch == NULL) {
+ pr_err("%s: unsupported arch %s\n", __func__, arch_name);
+ return ENOTSUP;
+ }
+
+ if (parch)
+ *parch = arch;
+
+ if (arch->init) {
+ err = arch->init(arch, env ? env->cpuid : NULL);
+ if (err) {
+ pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
+ return err;
+ }
+ }
+
+ args.ms = *ms;
+ if (notes->options && notes->options->full_addr)
+ notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ else
+ notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+
+ return symbol__disassemble(sym, &args);
+}
+
+static void insert_source_line(struct rb_root *root, struct annotation_line *al,
+ struct annotation_options *opts)
+{
+ struct annotation_line *iter;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ int i, ret;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct annotation_line, rb_node);
+
+ ret = strcmp(iter->path, al->path);
+ if (ret == 0) {
+ for (i = 0; i < al->data_nr; i++) {
+ iter->data[i].percent_sum += annotation_data__percent(&al->data[i],
+ opts->percent_type);
+ }
+ return;
+ }
+
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ for (i = 0; i < al->data_nr; i++) {
+ al->data[i].percent_sum = annotation_data__percent(&al->data[i],
+ opts->percent_type);
+ }
+
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
+}
+
+static int cmp_source_line(struct annotation_line *a, struct annotation_line *b)
+{
+ int i;
+
+ for (i = 0; i < a->data_nr; i++) {
+ if (a->data[i].percent_sum == b->data[i].percent_sum)
+ continue;
+ return a->data[i].percent_sum > b->data[i].percent_sum;
+ }
+
+ return 0;
+}
+
+static void __resort_source_line(struct rb_root *root, struct annotation_line *al)
+{
+ struct annotation_line *iter;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct annotation_line, rb_node);
+
+ if (cmp_source_line(al, iter))
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
+}
+
+static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root)
+{
+ struct annotation_line *al;
+ struct rb_node *node;
+
+ node = rb_first(src_root);
+ while (node) {
+ struct rb_node *next;
+
+ al = rb_entry(node, struct annotation_line, rb_node);
+ next = rb_next(node);
+ rb_erase(node, src_root);
+
+ __resort_source_line(dest_root, al);
+ node = next;
+ }
+}
+
+static void print_summary(struct rb_root *root, const char *filename)
+{
+ struct annotation_line *al;
+ struct rb_node *node;
+
+ printf("\nSorted summary for file %s\n", filename);
+ printf("----------------------------------------------\n\n");
+
+ if (RB_EMPTY_ROOT(root)) {
+ printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+ return;
+ }
+
+ node = rb_first(root);
+ while (node) {
+ double percent, percent_max = 0.0;
+ const char *color;
+ char *path;
+ int i;
+
+ al = rb_entry(node, struct annotation_line, rb_node);
+ for (i = 0; i < al->data_nr; i++) {
+ percent = al->data[i].percent_sum;
+ color = get_percent_color(percent);
+ color_fprintf(stdout, color, " %7.2f", percent);
+
+ if (percent > percent_max)
+ percent_max = percent;
+ }
+
+ path = al->path;
+ color = get_percent_color(percent_max);
+ color_fprintf(stdout, color, " %s\n", path);
+
+ node = rb_next(node);
+ }
+}
+
+static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
+ u64 len = symbol__size(sym), offset;
+
+ for (offset = 0; offset < len; ++offset)
+ if (h->addr[offset].nr_samples != 0)
+ printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
+ sym->start + offset, h->addr[offset].nr_samples);
+ printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);
+}
+
+static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
+{
+ char bf[32];
+ struct annotation_line *line;
+
+ list_for_each_entry_reverse(line, lines, node) {
+ if (line->offset != -1)
+ return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset);
+ }
+
+ return 0;
+}
+
+int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *opts)
+{
+ struct map *map = ms->map;
+ struct symbol *sym = ms->sym;
+ struct dso *dso = map__dso(map);
+ char *filename;
+ const char *d_filename;
+ const char *evsel_name = evsel__name(evsel);
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
+ struct annotation_line *pos, *queue = NULL;
+ u64 start = map__rip_2objdump(map, sym->start);
+ int printed = 2, queue_len = 0, addr_fmt_width;
+ int more = 0;
+ bool context = opts->context;
+ u64 len;
+ int width = symbol_conf.show_total_period ? 12 : 8;
+ int graph_dotted_len;
+ char buf[512];
+
+ filename = strdup(dso->long_name);
+ if (!filename)
+ return -ENOMEM;
+
+ if (opts->full_path)
+ d_filename = filename;
+ else
+ d_filename = basename(filename);
+
+ len = symbol__size(sym);
+
+ if (evsel__is_group_event(evsel)) {
+ width *= evsel->core.nr_members;
+ evsel__group_desc(evsel, buf, sizeof(buf));
+ evsel_name = buf;
+ }
+
+ graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples, "
+ "percent: %s)\n",
+ width, width, symbol_conf.show_total_period ? "Period" :
+ symbol_conf.show_nr_samples ? "Samples" : "Percent",
+ d_filename, evsel_name, h->nr_samples,
+ percent_type_str(opts->percent_type));
+
+ printf("%-*.*s----\n",
+ graph_dotted_len, graph_dotted_len, graph_dotted_line);
+
+ if (verbose > 0)
+ symbol__annotate_hits(sym, evsel);
+
+ addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ int err;
+
+ if (context && queue == NULL) {
+ queue = pos;
+ queue_len = 0;
+ }
+
+ err = annotation_line__print(pos, sym, start, evsel, len,
+ opts->min_pcnt, printed, opts->max_lines,
+ queue, addr_fmt_width, opts->percent_type);
+
+ switch (err) {
+ case 0:
+ ++printed;
+ if (context) {
+ printed += queue_len;
+ queue = NULL;
+ queue_len = 0;
+ }
+ break;
+ case 1:
+ /* filtered by max_lines */
+ ++more;
+ break;
+ case -1:
+ default:
+ /*
+ * Filtered by min_pcnt or non IP lines when
+ * context != 0
+ */
+ if (!context)
+ break;
+ if (queue_len == context)
+ queue = list_entry(queue->node.next, typeof(*queue), node);
+ else
+ ++queue_len;
+ break;
+ }
+ }
+
+ free(filename);
+
+ return more;
+}
+
+static void FILE__set_percent_color(void *fp __maybe_unused,
+ double percent __maybe_unused,
+ bool current __maybe_unused)
+{
+}
+
+static int FILE__set_jumps_percent_color(void *fp __maybe_unused,
+ int nr __maybe_unused, bool current __maybe_unused)
+{
+ return 0;
+}
+
+static int FILE__set_color(void *fp __maybe_unused, int color __maybe_unused)
+{
+ return 0;
+}
+
+static void FILE__printf(void *fp, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(fp, fmt, args);
+ va_end(args);
+}
+
+static void FILE__write_graph(void *fp, int graph)
+{
+ const char *s;
+ switch (graph) {
+
+ case DARROW_CHAR: s = "↓"; break;
+ case UARROW_CHAR: s = "↑"; break;
+ case LARROW_CHAR: s = "←"; break;
+ case RARROW_CHAR: s = "→"; break;
+ default: s = "?"; break;
+ }
+
+ fputs(s, fp);
+}
+
+static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
+ struct annotation_options *opts)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct annotation_write_ops wops = {
+ .first_line = true,
+ .obj = fp,
+ .set_color = FILE__set_color,
+ .set_percent_color = FILE__set_percent_color,
+ .set_jumps_percent_color = FILE__set_jumps_percent_color,
+ .printf = FILE__printf,
+ .write_graph = FILE__write_graph,
+ };
+ struct annotation_line *al;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ if (annotation_line__filter(al, notes))
+ continue;
+ annotation_line__write(al, notes, &wops, opts);
+ fputc('\n', fp);
+ wops.first_line = false;
+ }
+
+ return 0;
+}
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *opts)
+{
+ const char *ev_name = evsel__name(evsel);
+ char buf[1024];
+ char *filename;
+ int err = -1;
+ FILE *fp;
+
+ if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0)
+ return -1;
+
+ fp = fopen(filename, "w");
+ if (fp == NULL)
+ goto out_free_filename;
+
+ if (evsel__is_group_event(evsel)) {
+ evsel__group_desc(evsel, buf, sizeof(buf));
+ ev_name = buf;
+ }
+
+ fprintf(fp, "%s() %s\nEvent: %s\n\n",
+ ms->sym->name, map__dso(ms->map)->long_name, ev_name);
+ symbol__annotate_fprintf2(ms->sym, fp, opts);
+
+ fclose(fp);
+ err = 0;
+out_free_filename:
+ free(filename);
+ return err;
+}
+
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+
+ memset(h, 0, notes->src->sizeof_sym_hist);
+}
+
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ int len = symbol__size(sym), offset;
+
+ h->nr_samples = 0;
+ for (offset = 0; offset < len; ++offset) {
+ h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8;
+ h->nr_samples += h->addr[offset].nr_samples;
+ }
+}
+
+void annotated_source__purge(struct annotated_source *as)
+{
+ struct annotation_line *al, *n;
+
+ list_for_each_entry_safe(al, n, &as->source, node) {
+ list_del_init(&al->node);
+ disasm_line__free(disasm_line(al));
+ }
+}
+
+static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)
+{
+ size_t printed;
+
+ if (dl->al.offset == -1)
+ return fprintf(fp, "%s\n", dl->al.line);
+
+ printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name);
+
+ if (dl->ops.raw[0] != '\0') {
+ printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ",
+ dl->ops.raw);
+ }
+
+ return printed + fprintf(fp, "\n");
+}
+
+size_t disasm__fprintf(struct list_head *head, FILE *fp)
+{
+ struct disasm_line *pos;
+ size_t printed = 0;
+
+ list_for_each_entry(pos, head, al.node)
+ printed += disasm_line__fprintf(pos, fp);
+
+ return printed;
+}
+
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym)
+{
+ if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) ||
+ !disasm_line__has_local_offset(dl) || dl->ops.target.offset < 0 ||
+ dl->ops.target.offset >= (s64)symbol__size(sym))
+ return false;
+
+ return true;
+}
+
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
+{
+ u64 offset, size = symbol__size(sym);
+
+ /* PLT symbols contain external offsets */
+ if (strstr(sym->name, "@plt"))
+ return;
+
+ for (offset = 0; offset < size; ++offset) {
+ struct annotation_line *al = notes->offsets[offset];
+ struct disasm_line *dl;
+
+ dl = disasm_line(al);
+
+ if (!disasm_line__is_valid_local_jump(dl, sym))
+ continue;
+
+ al = notes->offsets[dl->ops.target.offset];
+
+ /*
+ * FIXME: Oops, no jump target? Buggy disassembler? Or do we
+ * have to adjust to the previous offset?
+ */
+ if (al == NULL)
+ continue;
+
+ if (++al->jump_sources > notes->max_jump_sources)
+ notes->max_jump_sources = al->jump_sources;
+ }
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size)
+{
+ struct annotation_line *al;
+
+ notes->max_line_len = 0;
+ notes->nr_entries = 0;
+ notes->nr_asm_entries = 0;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ size_t line_len = strlen(al->line);
+
+ if (notes->max_line_len < line_len)
+ notes->max_line_len = line_len;
+ al->idx = notes->nr_entries++;
+ if (al->offset != -1) {
+ al->idx_asm = notes->nr_asm_entries++;
+ /*
+ * FIXME: short term bandaid to cope with assembly
+ * routines that comes with labels in the same column
+ * as the address in objdump, sigh.
+ *
+ * E.g. copy_user_generic_unrolled
+ */
+ if (al->offset < size)
+ notes->offsets[al->offset] = al;
+ } else
+ al->idx_asm = -1;
+ }
+}
+
+static inline int width_jumps(int n)
+{
+ if (n >= 100)
+ return 5;
+ if (n / 10)
+ return 2;
+ return 1;
+}
+
+static int annotation__max_ins_name(struct annotation *notes)
+{
+ int max_name = 0, len;
+ struct annotation_line *al;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ if (al->offset == -1)
+ continue;
+
+ len = strlen(disasm_line(al)->ins.name);
+ if (max_name < len)
+ max_name = len;
+ }
+
+ return max_name;
+}
+
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym)
+{
+ notes->widths.addr = notes->widths.target =
+ notes->widths.min_addr = hex_width(symbol__size(sym));
+ notes->widths.max_addr = hex_width(sym->end);
+ notes->widths.jumps = width_jumps(notes->max_jump_sources);
+ notes->widths.max_ins_name = annotation__max_ins_name(notes);
+}
+
+void annotation__update_column_widths(struct annotation *notes)
+{
+ if (notes->options->use_offset)
+ notes->widths.target = notes->widths.min_addr;
+ else if (notes->options->full_addr)
+ notes->widths.target = BITS_PER_LONG / 4;
+ else
+ notes->widths.target = notes->widths.max_addr;
+
+ notes->widths.addr = notes->widths.target;
+
+ if (notes->options->show_nr_jumps)
+ notes->widths.addr += notes->widths.jumps + 1;
+}
+
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms)
+{
+ notes->options->full_addr = !notes->options->full_addr;
+
+ if (notes->options->full_addr)
+ notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ else
+ notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+
+ annotation__update_column_widths(notes);
+}
+
+static void annotation__calc_lines(struct annotation *notes, struct map *map,
+ struct rb_root *root,
+ struct annotation_options *opts)
+{
+ struct annotation_line *al;
+ struct rb_root tmp_root = RB_ROOT;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ double percent_max = 0.0;
+ int i;
+
+ for (i = 0; i < al->data_nr; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i],
+ opts->percent_type);
+
+ if (percent > percent_max)
+ percent_max = percent;
+ }
+
+ if (percent_max <= 0.5)
+ continue;
+
+ al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL,
+ false, true, notes->start + al->offset);
+ insert_source_line(&tmp_root, al, opts);
+ }
+
+ resort_source_line(root, &tmp_root);
+}
+
+static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root,
+ struct annotation_options *opts)
+{
+ struct annotation *notes = symbol__annotation(ms->sym);
+
+ annotation__calc_lines(notes, ms->map, root, opts);
+}
+
+int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *opts)
+{
+ struct dso *dso = map__dso(ms->map);
+ struct symbol *sym = ms->sym;
+ struct rb_root source_line = RB_ROOT;
+ struct hists *hists = evsel__hists(evsel);
+ char buf[1024];
+ int err;
+
+ err = symbol__annotate2(ms, evsel, opts, NULL);
+ if (err) {
+ char msg[BUFSIZ];
+
+ dso->annotate_warned = true;
+ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+ return -1;
+ }
+
+ if (opts->print_lines) {
+ srcline_full_filename = opts->full_path;
+ symbol__calc_lines(ms, &source_line, opts);
+ print_summary(&source_line, dso->long_name);
+ }
+
+ hists__scnprintf_title(hists, buf, sizeof(buf));
+ fprintf(stdout, "%s, [percent: %s]\n%s() %s\n",
+ buf, percent_type_str(opts->percent_type), sym->name, dso->long_name);
+ symbol__annotate_fprintf2(sym, stdout, opts);
+
+ annotated_source__purge(symbol__annotation(sym)->src);
+
+ return 0;
+}
+
+int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *opts)
+{
+ struct dso *dso = map__dso(ms->map);
+ struct symbol *sym = ms->sym;
+ struct rb_root source_line = RB_ROOT;
+ int err;
+
+ err = symbol__annotate(ms, evsel, opts, NULL);
+ if (err) {
+ char msg[BUFSIZ];
+
+ dso->annotate_warned = true;
+ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+ return -1;
+ }
+
+ symbol__calc_percent(sym, evsel);
+
+ if (opts->print_lines) {
+ srcline_full_filename = opts->full_path;
+ symbol__calc_lines(ms, &source_line, opts);
+ print_summary(&source_line, dso->long_name);
+ }
+
+ symbol__annotate_printf(ms, evsel, opts);
+
+ annotated_source__purge(symbol__annotation(sym)->src);
+
+ return 0;
+}
+
+bool ui__has_annotation(void)
+{
+ return use_browser == 1 && perf_hpp_list.sym;
+}
+
+
+static double annotation_line__max_percent(struct annotation_line *al,
+ struct annotation *notes,
+ unsigned int percent_type)
+{
+ double percent_max = 0.0;
+ int i;
+
+ for (i = 0; i < notes->nr_events; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i],
+ percent_type);
+
+ if (percent > percent_max)
+ percent_max = percent;
+ }
+
+ return percent_max;
+}
+
+static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
+ void *obj, char *bf, size_t size,
+ void (*obj__printf)(void *obj, const char *fmt, ...),
+ void (*obj__write_graph)(void *obj, int graph))
+{
+ if (dl->ins.ops && dl->ins.ops->scnprintf) {
+ if (ins__is_jump(&dl->ins)) {
+ bool fwd;
+
+ if (dl->ops.target.outside)
+ goto call_like;
+ fwd = dl->ops.target.offset > dl->al.offset;
+ obj__write_graph(obj, fwd ? DARROW_CHAR : UARROW_CHAR);
+ obj__printf(obj, " ");
+ } else if (ins__is_call(&dl->ins)) {
+call_like:
+ obj__write_graph(obj, RARROW_CHAR);
+ obj__printf(obj, " ");
+ } else if (ins__is_ret(&dl->ins)) {
+ obj__write_graph(obj, LARROW_CHAR);
+ obj__printf(obj, " ");
+ } else {
+ obj__printf(obj, " ");
+ }
+ } else {
+ obj__printf(obj, " ");
+ }
+
+ disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name);
+}
+
+static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
+{
+ double ipc = 0.0, coverage = 0.0;
+
+ if (notes->hit_cycles)
+ ipc = notes->hit_insn / ((double)notes->hit_cycles);
+
+ if (notes->total_insn) {
+ coverage = notes->cover_insn * 100.0 /
+ ((double)notes->total_insn);
+ }
+
+ scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)",
+ ipc, coverage);
+}
+
+static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
+ bool first_line, bool current_entry, bool change_color, int width,
+ void *obj, unsigned int percent_type,
+ int (*obj__set_color)(void *obj, int color),
+ void (*obj__set_percent_color)(void *obj, double percent, bool current),
+ int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
+ void (*obj__printf)(void *obj, const char *fmt, ...),
+ void (*obj__write_graph)(void *obj, int graph))
+
+{
+ double percent_max = annotation_line__max_percent(al, notes, percent_type);
+ int pcnt_width = annotation__pcnt_width(notes),
+ cycles_width = annotation__cycles_width(notes);
+ bool show_title = false;
+ char bf[256];
+ int printed;
+
+ if (first_line && (al->offset == -1 || percent_max == 0.0)) {
+ if (notes->have_cycles) {
+ if (al->ipc == 0.0 && al->cycles == 0)
+ show_title = true;
+ } else
+ show_title = true;
+ }
+
+ if (al->offset != -1 && percent_max != 0.0) {
+ int i;
+
+ for (i = 0; i < notes->nr_events; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i], percent_type);
+
+ obj__set_percent_color(obj, percent, current_entry);
+ if (symbol_conf.show_total_period) {
+ obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
+ } else if (symbol_conf.show_nr_samples) {
+ obj__printf(obj, "%6" PRIu64 " ",
+ al->data[i].he.nr_samples);
+ } else {
+ obj__printf(obj, "%6.2f ", percent);
+ }
+ }
+ } else {
+ obj__set_percent_color(obj, 0, current_entry);
+
+ if (!show_title)
+ obj__printf(obj, "%-*s", pcnt_width, " ");
+ else {
+ obj__printf(obj, "%-*s", pcnt_width,
+ symbol_conf.show_total_period ? "Period" :
+ symbol_conf.show_nr_samples ? "Samples" : "Percent");
+ }
+ }
+
+ if (notes->have_cycles) {
+ if (al->ipc)
+ obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
+ else if (!show_title)
+ obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " ");
+ else
+ obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
+
+ if (!notes->options->show_minmax_cycle) {
+ if (al->cycles)
+ obj__printf(obj, "%*" PRIu64 " ",
+ ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
+ else if (!show_title)
+ obj__printf(obj, "%*s",
+ ANNOTATION__CYCLES_WIDTH, " ");
+ else
+ obj__printf(obj, "%*s ",
+ ANNOTATION__CYCLES_WIDTH - 1,
+ "Cycle");
+ } else {
+ if (al->cycles) {
+ char str[32];
+
+ scnprintf(str, sizeof(str),
+ "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
+ al->cycles, al->cycles_min,
+ al->cycles_max);
+
+ obj__printf(obj, "%*s ",
+ ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+ str);
+ } else if (!show_title)
+ obj__printf(obj, "%*s",
+ ANNOTATION__MINMAX_CYCLES_WIDTH,
+ " ");
+ else
+ obj__printf(obj, "%*s ",
+ ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+ "Cycle(min/max)");
+ }
+
+ if (show_title && !*al->line) {
+ ipc_coverage_string(bf, sizeof(bf), notes);
+ obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf);
+ }
+ }
+
+ obj__printf(obj, " ");
+
+ if (!*al->line)
+ obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
+ else if (al->offset == -1) {
+ if (al->line_nr && notes->options->show_linenr)
+ printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
+ else
+ printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " ");
+ obj__printf(obj, bf);
+ obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line);
+ } else {
+ u64 addr = al->offset;
+ int color = -1;
+
+ if (!notes->options->use_offset)
+ addr += notes->start;
+
+ if (!notes->options->use_offset) {
+ printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
+ } else {
+ if (al->jump_sources &&
+ notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
+ if (notes->options->show_nr_jumps) {
+ int prev;
+ printed = scnprintf(bf, sizeof(bf), "%*d ",
+ notes->widths.jumps,
+ al->jump_sources);
+ prev = obj__set_jumps_percent_color(obj, al->jump_sources,
+ current_entry);
+ obj__printf(obj, bf);
+ obj__set_color(obj, prev);
+ }
+print_addr:
+ printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
+ notes->widths.target, addr);
+ } else if (ins__is_call(&disasm_line(al)->ins) &&
+ notes->options->offset_level >= ANNOTATION__OFFSET_CALL) {
+ goto print_addr;
+ } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
+ goto print_addr;
+ } else {
+ printed = scnprintf(bf, sizeof(bf), "%-*s ",
+ notes->widths.addr, " ");
+ }
+ }
+
+ if (change_color)
+ color = obj__set_color(obj, HE_COLORSET_ADDR);
+ obj__printf(obj, bf);
+ if (change_color)
+ obj__set_color(obj, color);
+
+ disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph);
+
+ obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf);
+ }
+
+}
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+ struct annotation_write_ops *wops,
+ struct annotation_options *opts)
+{
+ __annotation_line__write(al, notes, wops->first_line, wops->current_entry,
+ wops->change_color, wops->width, wops->obj,
+ opts->percent_type,
+ wops->set_color, wops->set_percent_color,
+ wops->set_jumps_percent_color, wops->printf,
+ wops->write_graph);
+}
+
+int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *options, struct arch **parch)
+{
+ struct symbol *sym = ms->sym;
+ struct annotation *notes = symbol__annotation(sym);
+ size_t size = symbol__size(sym);
+ int nr_pcnt = 1, err;
+
+ notes->offsets = zalloc(size * sizeof(struct annotation_line *));
+ if (notes->offsets == NULL)
+ return ENOMEM;
+
+ if (evsel__is_group_event(evsel))
+ nr_pcnt = evsel->core.nr_members;
+
+ err = symbol__annotate(ms, evsel, options, parch);
+ if (err)
+ goto out_free_offsets;
+
+ notes->options = options;
+
+ symbol__calc_percent(sym, evsel);
+
+ annotation__set_offsets(notes, size);
+ annotation__mark_jump_targets(notes, sym);
+ annotation__compute_ipc(notes, size);
+ annotation__init_column_widths(notes, sym);
+ notes->nr_events = nr_pcnt;
+
+ annotation__update_column_widths(notes);
+ sym->annotate2 = 1;
+
+ return 0;
+
+out_free_offsets:
+ zfree(&notes->offsets);
+ return err;
+}
+
+static int annotation__config(const char *var, const char *value, void *data)
+{
+ struct annotation_options *opt = data;
+
+ if (!strstarts(var, "annotate."))
+ return 0;
+
+ if (!strcmp(var, "annotate.offset_level")) {
+ perf_config_u8(&opt->offset_level, "offset_level", value);
+
+ if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
+ else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+ } else if (!strcmp(var, "annotate.hide_src_code")) {
+ opt->hide_src_code = perf_config_bool("hide_src_code", value);
+ } else if (!strcmp(var, "annotate.jump_arrows")) {
+ opt->jump_arrows = perf_config_bool("jump_arrows", value);
+ } else if (!strcmp(var, "annotate.show_linenr")) {
+ opt->show_linenr = perf_config_bool("show_linenr", value);
+ } else if (!strcmp(var, "annotate.show_nr_jumps")) {
+ opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value);
+ } else if (!strcmp(var, "annotate.show_nr_samples")) {
+ symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples",
+ value);
+ } else if (!strcmp(var, "annotate.show_total_period")) {
+ symbol_conf.show_total_period = perf_config_bool("show_total_period",
+ value);
+ } else if (!strcmp(var, "annotate.use_offset")) {
+ opt->use_offset = perf_config_bool("use_offset", value);
+ } else if (!strcmp(var, "annotate.disassembler_style")) {
+ opt->disassembler_style = strdup(value);
+ if (!opt->disassembler_style) {
+ pr_err("Not enough memory for annotate.disassembler_style\n");
+ return -1;
+ }
+ } else if (!strcmp(var, "annotate.objdump")) {
+ opt->objdump_path = strdup(value);
+ if (!opt->objdump_path) {
+ pr_err("Not enough memory for annotate.objdump\n");
+ return -1;
+ }
+ } else if (!strcmp(var, "annotate.addr2line")) {
+ symbol_conf.addr2line_path = strdup(value);
+ if (!symbol_conf.addr2line_path) {
+ pr_err("Not enough memory for annotate.addr2line\n");
+ return -1;
+ }
+ } else if (!strcmp(var, "annotate.demangle")) {
+ symbol_conf.demangle = perf_config_bool("demangle", value);
+ } else if (!strcmp(var, "annotate.demangle_kernel")) {
+ symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value);
+ } else {
+ pr_debug("%s variable unknown, ignoring...", var);
+ }
+
+ return 0;
+}
+
+void annotation_options__init(struct annotation_options *opt)
+{
+ memset(opt, 0, sizeof(*opt));
+
+ /* Default values. */
+ opt->use_offset = true;
+ opt->jump_arrows = true;
+ opt->annotate_src = true;
+ opt->offset_level = ANNOTATION__OFFSET_JUMP_TARGETS;
+ opt->percent_type = PERCENT_PERIOD_LOCAL;
+}
+
+
+void annotation_options__exit(struct annotation_options *opt)
+{
+ zfree(&opt->disassembler_style);
+ zfree(&opt->objdump_path);
+}
+
+void annotation_config__init(struct annotation_options *opt)
+{
+ perf_config(annotation__config, opt);
+}
+
+static unsigned int parse_percent_type(char *str1, char *str2)
+{
+ unsigned int type = (unsigned int) -1;
+
+ if (!strcmp("period", str1)) {
+ if (!strcmp("local", str2))
+ type = PERCENT_PERIOD_LOCAL;
+ else if (!strcmp("global", str2))
+ type = PERCENT_PERIOD_GLOBAL;
+ }
+
+ if (!strcmp("hits", str1)) {
+ if (!strcmp("local", str2))
+ type = PERCENT_HITS_LOCAL;
+ else if (!strcmp("global", str2))
+ type = PERCENT_HITS_GLOBAL;
+ }
+
+ return type;
+}
+
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+ int unset __maybe_unused)
+{
+ struct annotation_options *opts = opt->value;
+ unsigned int type;
+ char *str1, *str2;
+ int err = -1;
+
+ str1 = strdup(_str);
+ if (!str1)
+ return -ENOMEM;
+
+ str2 = strchr(str1, '-');
+ if (!str2)
+ goto out;
+
+ *str2++ = 0;
+
+ type = parse_percent_type(str1, str2);
+ if (type == (unsigned int) -1)
+ type = parse_percent_type(str2, str1);
+ if (type != (unsigned int) -1) {
+ opts->percent_type = type;
+ err = 0;
+ }
+
+out:
+ free(str1);
+ return err;
+}
+
+int annotate_check_args(struct annotation_options *args)
+{
+ if (args->prefix_strip && !args->prefix) {
+ pr_err("--prefix-strip requires --prefix\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
new file mode 100644
index 000000000..962780559
--- /dev/null
+++ b/tools/perf/util/annotate.h
@@ -0,0 +1,434 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ANNOTATE_H
+#define __PERF_ANNOTATE_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <asm/bug.h>
+#include "symbol_conf.h"
+#include "mutex.h"
+#include "spark.h"
+
+struct hist_browser_timer;
+struct hist_entry;
+struct ins_ops;
+struct map;
+struct map_symbol;
+struct addr_map_symbol;
+struct option;
+struct perf_sample;
+struct evsel;
+struct symbol;
+
+struct ins {
+ const char *name;
+ struct ins_ops *ops;
+};
+
+struct ins_operands {
+ char *raw;
+ char *raw_comment;
+ char *raw_func_start;
+ struct {
+ char *raw;
+ char *name;
+ struct symbol *sym;
+ u64 addr;
+ s64 offset;
+ bool offset_avail;
+ bool outside;
+ } target;
+ union {
+ struct {
+ char *raw;
+ char *name;
+ u64 addr;
+ } source;
+ struct {
+ struct ins ins;
+ struct ins_operands *ops;
+ } locked;
+ };
+};
+
+struct arch;
+
+struct ins_ops {
+ void (*free)(struct ins_operands *ops);
+ int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
+ int (*scnprintf)(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+};
+
+bool ins__is_jump(const struct ins *ins);
+bool ins__is_call(const struct ins *ins);
+bool ins__is_ret(const struct ins *ins);
+bool ins__is_lock(const struct ins *ins);
+int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
+
+#define ANNOTATION__IPC_WIDTH 6
+#define ANNOTATION__CYCLES_WIDTH 6
+#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
+#define ANNOTATION__AVG_IPC_WIDTH 36
+#define ANNOTATION_DUMMY_LEN 256
+
+struct annotation_options {
+ bool hide_src_code,
+ use_offset,
+ jump_arrows,
+ print_lines,
+ full_path,
+ show_linenr,
+ show_fileloc,
+ show_nr_jumps,
+ show_minmax_cycle,
+ show_asm_raw,
+ annotate_src,
+ full_addr;
+ u8 offset_level;
+ int min_pcnt;
+ int max_lines;
+ int context;
+ char *objdump_path;
+ char *disassembler_style;
+ const char *prefix;
+ const char *prefix_strip;
+ unsigned int percent_type;
+};
+
+enum {
+ ANNOTATION__OFFSET_JUMP_TARGETS = 1,
+ ANNOTATION__OFFSET_CALL,
+ ANNOTATION__MAX_OFFSET_LEVEL,
+};
+
+#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS
+
+struct annotation;
+
+struct sym_hist_entry {
+ u64 nr_samples;
+ u64 period;
+};
+
+enum {
+ PERCENT_HITS_LOCAL,
+ PERCENT_HITS_GLOBAL,
+ PERCENT_PERIOD_LOCAL,
+ PERCENT_PERIOD_GLOBAL,
+ PERCENT_MAX,
+};
+
+struct annotation_data {
+ double percent[PERCENT_MAX];
+ double percent_sum;
+ struct sym_hist_entry he;
+};
+
+struct annotation_line {
+ struct list_head node;
+ struct rb_node rb_node;
+ s64 offset;
+ char *line;
+ int line_nr;
+ char *fileloc;
+ int jump_sources;
+ float ipc;
+ u64 cycles;
+ u64 cycles_max;
+ u64 cycles_min;
+ char *path;
+ u32 idx;
+ int idx_asm;
+ int data_nr;
+ struct annotation_data data[];
+};
+
+struct disasm_line {
+ struct ins ins;
+ struct ins_operands ops;
+
+ /* This needs to be at the end. */
+ struct annotation_line al;
+};
+
+static inline double annotation_data__percent(struct annotation_data *data,
+ unsigned int which)
+{
+ return which < PERCENT_MAX ? data->percent[which] : -1;
+}
+
+static inline const char *percent_type_str(unsigned int type)
+{
+ static const char *str[PERCENT_MAX] = {
+ "local hits",
+ "global hits",
+ "local period",
+ "global period",
+ };
+
+ if (WARN_ON(type >= PERCENT_MAX))
+ return "N/A";
+
+ return str[type];
+}
+
+static inline struct disasm_line *disasm_line(struct annotation_line *al)
+{
+ return al ? container_of(al, struct disasm_line, al) : NULL;
+}
+
+/*
+ * Is this offset in the same function as the line it is used?
+ * asm functions jump to other functions, for instance.
+ */
+static inline bool disasm_line__has_local_offset(const struct disasm_line *dl)
+{
+ return dl->ops.target.offset_avail && !dl->ops.target.outside;
+}
+
+/*
+ * Can we draw an arrow from the jump to its target, for instance? I.e.
+ * is the jump and its target in the same function?
+ */
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym);
+
+void disasm_line__free(struct disasm_line *dl);
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head);
+
+struct annotation_write_ops {
+ bool first_line, current_entry, change_color;
+ int width;
+ void *obj;
+ int (*set_color)(void *obj, int color);
+ void (*set_percent_color)(void *obj, double percent, bool current);
+ int (*set_jumps_percent_color)(void *obj, int nr, bool current);
+ void (*printf)(void *obj, const char *fmt, ...);
+ void (*write_graph)(void *obj, int graph);
+};
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+ struct annotation_write_ops *ops,
+ struct annotation_options *opts);
+
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+ char *bf, size_t size,
+ struct evsel *evsel,
+ bool show_freq);
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name);
+size_t disasm__fprintf(struct list_head *head, FILE *fp);
+void symbol__calc_percent(struct symbol *sym, struct evsel *evsel);
+
+struct sym_hist {
+ u64 nr_samples;
+ u64 period;
+ struct sym_hist_entry addr[];
+};
+
+struct cyc_hist {
+ u64 start;
+ u64 cycles;
+ u64 cycles_aggr;
+ u64 cycles_max;
+ u64 cycles_min;
+ s64 cycles_spark[NUM_SPARKS];
+ u32 num;
+ u32 num_aggr;
+ u8 have_start;
+ /* 1 byte padding */
+ u16 reset;
+};
+
+/** struct annotated_source - symbols with hits have this attached as in sannotation
+ *
+ * @histograms: Array of addr hit histograms per event being monitored
+ * nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if
+ * we have more than a group in a evlist, where we will want
+ * to see each group separately, that is why symbol__annotate2()
+ * sets src->nr_histograms to evsel->nr_members.
+ * @lines: If 'print_lines' is specified, per source code line percentages
+ * @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
+ *
+ * lines is allocated, percentages calculated and all sorted by percentage
+ * when the annotation is about to be presented, so the percentages are for
+ * one of the entries in the histogram array, i.e. for the event/counter being
+ * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
+ * returns.
+ */
+struct annotated_source {
+ struct list_head source;
+ int nr_histograms;
+ size_t sizeof_sym_hist;
+ struct cyc_hist *cycles_hist;
+ struct sym_hist *histograms;
+};
+
+struct LOCKABLE annotation {
+ u64 max_coverage;
+ u64 start;
+ u64 hit_cycles;
+ u64 hit_insn;
+ unsigned int total_insn;
+ unsigned int cover_insn;
+ struct annotation_options *options;
+ struct annotation_line **offsets;
+ int nr_events;
+ int max_jump_sources;
+ int nr_entries;
+ int nr_asm_entries;
+ u16 max_line_len;
+ struct {
+ u8 addr;
+ u8 jumps;
+ u8 target;
+ u8 min_addr;
+ u8 max_addr;
+ u8 max_ins_name;
+ } widths;
+ bool have_cycles;
+ struct annotated_source *src;
+};
+
+static inline void annotation__init(struct annotation *notes __maybe_unused)
+{
+}
+void annotation__exit(struct annotation *notes);
+
+void annotation__lock(struct annotation *notes) EXCLUSIVE_LOCK_FUNCTION(*notes);
+void annotation__unlock(struct annotation *notes) UNLOCK_FUNCTION(*notes);
+bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(true, *notes);
+
+static inline int annotation__cycles_width(struct annotation *notes)
+{
+ if (notes->have_cycles && notes->options->show_minmax_cycle)
+ return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
+
+ return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
+}
+
+static inline int annotation__pcnt_width(struct annotation *notes)
+{
+ return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
+}
+
+static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
+{
+ return notes->options->hide_src_code && al->offset == -1;
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size);
+void annotation__compute_ipc(struct annotation *notes, size_t size);
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
+void annotation__update_column_widths(struct annotation *notes);
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
+
+static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
+{
+ return ((void *)src->histograms) + (src->sizeof_sym_hist * idx);
+}
+
+static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+{
+ return annotated_source__histogram(notes->src, idx);
+}
+
+static inline struct annotation *symbol__annotation(struct symbol *sym)
+{
+ return (void *)sym - symbol_conf.priv_size;
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+ struct evsel *evsel);
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles);
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+ struct evsel *evsel, u64 addr);
+
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
+void symbol__annotate_zero_histograms(struct symbol *sym);
+
+int symbol__annotate(struct map_symbol *ms,
+ struct evsel *evsel,
+ struct annotation_options *options,
+ struct arch **parch);
+int symbol__annotate2(struct map_symbol *ms,
+ struct evsel *evsel,
+ struct annotation_options *options,
+ struct arch **parch);
+
+enum symbol_disassemble_errno {
+ SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __SYMBOL_ANNOTATE_ERRNO__START = -10000,
+
+ SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START,
+ SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF,
+ SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING,
+ SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP,
+ SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE,
+ SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF,
+
+ __SYMBOL_ANNOTATE_ERRNO__END,
+};
+
+int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen);
+
+int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *options);
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void annotated_source__purge(struct annotated_source *as);
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
+ struct annotation_options *opts);
+
+bool ui__has_annotation(void);
+
+int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts);
+
+int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts);
+
+#ifdef HAVE_SLANG_SUPPORT
+int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *opts);
+#else
+static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ struct annotation_options *opts __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+void annotation_options__init(struct annotation_options *opt);
+void annotation_options__exit(struct annotation_options *opt);
+
+void annotation_config__init(struct annotation_options *opt);
+
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+ int unset);
+
+int annotate_check_args(struct annotation_options *args);
+
+#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/archinsn.h b/tools/perf/util/archinsn.h
new file mode 100644
index 000000000..448cbb6b8
--- /dev/null
+++ b/tools/perf/util/archinsn.h
@@ -0,0 +1,12 @@
+#ifndef INSN_H
+#define INSN_H 1
+
+struct perf_sample;
+struct machine;
+struct thread;
+
+void arch_fetch_insn(struct perf_sample *sample,
+ struct thread *thread,
+ struct machine *machine);
+
+#endif
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
new file mode 100644
index 000000000..f8dae13fc
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
new file mode 100644
index 000000000..ba807071d
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arm_spe_decoder.c: ARM SPE support
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+#include "../debug.h"
+#include "../util.h"
+
+#include "arm-spe-decoder.h"
+
+static u64 arm_spe_calc_ip(int index, u64 payload)
+{
+ u64 ns, el, val;
+
+ /* Instruction virtual address or Branch target address */
+ if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
+ index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+ ns = SPE_ADDR_PKT_GET_NS(payload);
+ el = SPE_ADDR_PKT_GET_EL(payload);
+
+ /* Clean highest byte */
+ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+
+ /* Fill highest byte for EL1 or EL2 (VHE) mode */
+ if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
+ payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
+
+ /* Data access virtual address */
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
+
+ /* Clean tags */
+ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+
+ /*
+ * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet"
+ * defines the data virtual address payload format, the top byte
+ * (bits [63:56]) is assigned as top-byte tag; so we only can
+ * retrieve address value from bits [55:0].
+ *
+ * According to Documentation/arch/arm64/memory.rst, if detects the
+ * specific pattern in bits [55:52] of payload which falls in
+ * the kernel space, should fixup the top byte and this allows
+ * perf tool to parse DSO symbol for data address correctly.
+ *
+ * For this reason, if detects the bits [55:52] is 0xf, will
+ * fill 0xff into the top byte.
+ */
+ val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload);
+ if ((val & 0xf0ULL) == 0xf0ULL)
+ payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
+
+ /* Data access physical address */
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
+ /* Clean highest byte */
+ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+ } else {
+ static u32 seen_idx = 0;
+ if (!(seen_idx & BIT(index))) {
+ seen_idx |= BIT(index);
+ pr_warning("ignoring unsupported address packet index: 0x%x\n", index);
+ }
+ }
+
+ return payload;
+}
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
+{
+ struct arm_spe_decoder *decoder;
+
+ if (!params->get_trace)
+ return NULL;
+
+ decoder = zalloc(sizeof(struct arm_spe_decoder));
+ if (!decoder)
+ return NULL;
+
+ decoder->get_trace = params->get_trace;
+ decoder->data = params->data;
+
+ return decoder;
+}
+
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
+{
+ free(decoder);
+}
+
+static int arm_spe_get_data(struct arm_spe_decoder *decoder)
+{
+ struct arm_spe_buffer buffer = { .buf = 0, };
+ int ret;
+
+ pr_debug("Getting more data\n");
+ ret = decoder->get_trace(&buffer, decoder->data);
+ if (ret < 0)
+ return ret;
+
+ decoder->buf = buffer.buf;
+ decoder->len = buffer.len;
+
+ if (!decoder->len)
+ pr_debug("No more data\n");
+
+ return decoder->len;
+}
+
+static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
+{
+ int ret;
+
+ do {
+ if (!decoder->len) {
+ ret = arm_spe_get_data(decoder);
+
+ /* Failed to read out trace data */
+ if (ret <= 0)
+ return ret;
+ }
+
+ ret = arm_spe_get_packet(decoder->buf, decoder->len,
+ &decoder->packet);
+ if (ret <= 0) {
+ /* Move forward for 1 byte */
+ decoder->buf += 1;
+ decoder->len -= 1;
+ return -EBADMSG;
+ }
+
+ decoder->buf += ret;
+ decoder->len -= ret;
+ } while (decoder->packet.type == ARM_SPE_PAD);
+
+ return 1;
+}
+
+static int arm_spe_read_record(struct arm_spe_decoder *decoder)
+{
+ int err;
+ int idx;
+ u64 payload, ip;
+
+ memset(&decoder->record, 0x0, sizeof(decoder->record));
+ decoder->record.context_id = (u64)-1;
+
+ while (1) {
+ err = arm_spe_get_next_packet(decoder);
+ if (err <= 0)
+ return err;
+
+ idx = decoder->packet.index;
+ payload = decoder->packet.payload;
+
+ switch (decoder->packet.type) {
+ case ARM_SPE_TIMESTAMP:
+ decoder->record.timestamp = payload;
+ return 1;
+ case ARM_SPE_END:
+ return 1;
+ case ARM_SPE_ADDRESS:
+ ip = arm_spe_calc_ip(idx, payload);
+ if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
+ decoder->record.from_ip = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
+ decoder->record.to_ip = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT)
+ decoder->record.virt_addr = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS)
+ decoder->record.phys_addr = ip;
+ break;
+ case ARM_SPE_COUNTER:
+ if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
+ decoder->record.latency = payload;
+ break;
+ case ARM_SPE_CONTEXT:
+ decoder->record.context_id = payload;
+ break;
+ case ARM_SPE_OP_TYPE:
+ switch (idx) {
+ case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC:
+ decoder->record.op |= ARM_SPE_OP_LDST;
+ if (payload & SPE_OP_PKT_ST)
+ decoder->record.op |= ARM_SPE_OP_ST;
+ else
+ decoder->record.op |= ARM_SPE_OP_LD;
+ if (SPE_OP_PKT_IS_LDST_SVE(payload))
+ decoder->record.op |= ARM_SPE_OP_SVE_LDST;
+ break;
+ case SPE_OP_PKT_HDR_CLASS_OTHER:
+ decoder->record.op |= ARM_SPE_OP_OTHER;
+ if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload))
+ decoder->record.op |= ARM_SPE_OP_SVE_OTHER;
+ break;
+ case SPE_OP_PKT_HDR_CLASS_BR_ERET:
+ decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
+ break;
+ default:
+ pr_err("Get packet error!\n");
+ return -1;
+ }
+ break;
+ case ARM_SPE_EVENTS:
+ if (payload & BIT(EV_L1D_REFILL))
+ decoder->record.type |= ARM_SPE_L1D_MISS;
+
+ if (payload & BIT(EV_L1D_ACCESS))
+ decoder->record.type |= ARM_SPE_L1D_ACCESS;
+
+ if (payload & BIT(EV_TLB_WALK))
+ decoder->record.type |= ARM_SPE_TLB_MISS;
+
+ if (payload & BIT(EV_TLB_ACCESS))
+ decoder->record.type |= ARM_SPE_TLB_ACCESS;
+
+ if (payload & BIT(EV_LLC_MISS))
+ decoder->record.type |= ARM_SPE_LLC_MISS;
+
+ if (payload & BIT(EV_LLC_ACCESS))
+ decoder->record.type |= ARM_SPE_LLC_ACCESS;
+
+ if (payload & BIT(EV_REMOTE_ACCESS))
+ decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
+
+ if (payload & BIT(EV_MISPRED))
+ decoder->record.type |= ARM_SPE_BRANCH_MISS;
+
+ if (payload & BIT(EV_PARTIAL_PREDICATE))
+ decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED;
+
+ if (payload & BIT(EV_EMPTY_PREDICATE))
+ decoder->record.type |= ARM_SPE_SVE_EMPTY_PRED;
+
+ break;
+ case ARM_SPE_DATA_SOURCE:
+ decoder->record.source = payload;
+ break;
+ case ARM_SPE_BAD:
+ break;
+ case ARM_SPE_PAD:
+ break;
+ default:
+ pr_err("Get packet error!\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int arm_spe_decode(struct arm_spe_decoder *decoder)
+{
+ return arm_spe_read_record(decoder);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
new file mode 100644
index 000000000..1443c2854
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
+ * Copyright (c) 2019-2020, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_DECODER_H__
+#define INCLUDE__ARM_SPE_DECODER_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+enum arm_spe_sample_type {
+ ARM_SPE_L1D_ACCESS = 1 << 0,
+ ARM_SPE_L1D_MISS = 1 << 1,
+ ARM_SPE_LLC_ACCESS = 1 << 2,
+ ARM_SPE_LLC_MISS = 1 << 3,
+ ARM_SPE_TLB_ACCESS = 1 << 4,
+ ARM_SPE_TLB_MISS = 1 << 5,
+ ARM_SPE_BRANCH_MISS = 1 << 6,
+ ARM_SPE_REMOTE_ACCESS = 1 << 7,
+ ARM_SPE_SVE_PARTIAL_PRED = 1 << 8,
+ ARM_SPE_SVE_EMPTY_PRED = 1 << 9,
+};
+
+enum arm_spe_op_type {
+ /* First level operation type */
+ ARM_SPE_OP_OTHER = 1 << 0,
+ ARM_SPE_OP_LDST = 1 << 1,
+ ARM_SPE_OP_BRANCH_ERET = 1 << 2,
+
+ /* Second level operation type for OTHER */
+ ARM_SPE_OP_SVE_OTHER = 1 << 16,
+ ARM_SPE_OP_SVE_FP = 1 << 17,
+ ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18,
+
+ /* Second level operation type for LDST */
+ ARM_SPE_OP_LD = 1 << 16,
+ ARM_SPE_OP_ST = 1 << 17,
+ ARM_SPE_OP_ATOMIC = 1 << 18,
+ ARM_SPE_OP_EXCL = 1 << 19,
+ ARM_SPE_OP_AR = 1 << 20,
+ ARM_SPE_OP_SIMD_FP = 1 << 21,
+ ARM_SPE_OP_GP_REG = 1 << 22,
+ ARM_SPE_OP_UNSPEC_REG = 1 << 23,
+ ARM_SPE_OP_NV_SYSREG = 1 << 24,
+ ARM_SPE_OP_SVE_LDST = 1 << 25,
+ ARM_SPE_OP_SVE_PRED_LDST = 1 << 26,
+ ARM_SPE_OP_SVE_SG = 1 << 27,
+
+ /* Second level operation type for BRANCH_ERET */
+ ARM_SPE_OP_BR_COND = 1 << 16,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 17,
+};
+
+enum arm_spe_neoverse_data_source {
+ ARM_SPE_NV_L1D = 0x0,
+ ARM_SPE_NV_L2 = 0x8,
+ ARM_SPE_NV_PEER_CORE = 0x9,
+ ARM_SPE_NV_LOCAL_CLUSTER = 0xa,
+ ARM_SPE_NV_SYS_CACHE = 0xb,
+ ARM_SPE_NV_PEER_CLUSTER = 0xc,
+ ARM_SPE_NV_REMOTE = 0xd,
+ ARM_SPE_NV_DRAM = 0xe,
+};
+
+struct arm_spe_record {
+ enum arm_spe_sample_type type;
+ int err;
+ u32 op;
+ u32 latency;
+ u64 from_ip;
+ u64 to_ip;
+ u64 timestamp;
+ u64 virt_addr;
+ u64 phys_addr;
+ u64 context_id;
+ u16 source;
+};
+
+struct arm_spe_insn;
+
+struct arm_spe_buffer {
+ const unsigned char *buf;
+ size_t len;
+ u64 offset;
+ u64 trace_nr;
+};
+
+struct arm_spe_params {
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+ void *data;
+};
+
+struct arm_spe_decoder {
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+ void *data;
+ struct arm_spe_record record;
+
+ const unsigned char *buf;
+ size_t len;
+
+ struct arm_spe_pkt packet;
+};
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
+
+int arm_spe_decode(struct arm_spe_decoder *decoder);
+
+#endif
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
new file mode 100644
index 000000000..a454c6737
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/bitops.h>
+#include <stdarg.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+ memcpy((d), (s), (n)); \
+ *(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const arm_spe_packet_name[] = {
+ [ARM_SPE_PAD] = "PAD",
+ [ARM_SPE_END] = "END",
+ [ARM_SPE_TIMESTAMP] = "TS",
+ [ARM_SPE_ADDRESS] = "ADDR",
+ [ARM_SPE_COUNTER] = "LAT",
+ [ARM_SPE_CONTEXT] = "CONTEXT",
+ [ARM_SPE_OP_TYPE] = "OP-TYPE",
+ [ARM_SPE_EVENTS] = "EVENTS",
+ [ARM_SPE_DATA_SOURCE] = "DATA-SOURCE",
+};
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
+{
+ return arm_spe_packet_name[type];
+}
+
+/*
+ * Extracts the field "sz" from header bits and converts to bytes:
+ * 00 : byte (1)
+ * 01 : halfword (2)
+ * 10 : word (4)
+ * 11 : doubleword (8)
+ */
+static unsigned int arm_spe_payload_len(unsigned char hdr)
+{
+ return 1U << ((hdr & GENMASK_ULL(5, 4)) >> 4);
+}
+
+static int arm_spe_get_payload(const unsigned char *buf, size_t len,
+ unsigned char ext_hdr,
+ struct arm_spe_pkt *packet)
+{
+ size_t payload_len = arm_spe_payload_len(buf[ext_hdr]);
+
+ if (len < 1 + ext_hdr + payload_len)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ buf += 1 + ext_hdr;
+
+ switch (payload_len) {
+ case 1: packet->payload = *(uint8_t *)buf; break;
+ case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break;
+ case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break;
+ case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break;
+ default: return ARM_SPE_BAD_PACKET;
+ }
+
+ return 1 + ext_hdr + payload_len;
+}
+
+static int arm_spe_get_pad(struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_PAD;
+ return 1;
+}
+
+static int arm_spe_get_alignment(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ unsigned int alignment = 1 << ((buf[0] & 0xf) + 1);
+
+ if (len < alignment)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ packet->type = ARM_SPE_PAD;
+ return alignment - (((uintptr_t)buf) & (alignment - 1));
+}
+
+static int arm_spe_get_end(struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_END;
+ return 1;
+}
+
+static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_TIMESTAMP;
+ return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_events(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_EVENTS;
+
+ /* we use index to identify Events with a less number of
+ * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
+ * LLC-REFILL, and REMOTE-ACCESS events are identified if
+ * index > 1.
+ */
+ packet->index = arm_spe_payload_len(buf[0]);
+
+ return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_DATA_SOURCE;
+ return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_context(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_CONTEXT;
+ packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]);
+ return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_OP_TYPE;
+ packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]);
+ return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_counter(const unsigned char *buf, size_t len,
+ const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_COUNTER;
+
+ if (ext_hdr)
+ packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
+ else
+ packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
+
+ return arm_spe_get_payload(buf, len, ext_hdr, packet);
+}
+
+static int arm_spe_get_addr(const unsigned char *buf, size_t len,
+ const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_ADDRESS;
+
+ if (ext_hdr)
+ packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
+ else
+ packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
+
+ return arm_spe_get_payload(buf, len, ext_hdr, packet);
+}
+
+static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ unsigned int hdr;
+ unsigned char ext_hdr = 0;
+
+ memset(packet, 0, sizeof(struct arm_spe_pkt));
+
+ if (!len)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ hdr = buf[0];
+
+ if (hdr == SPE_HEADER0_PAD)
+ return arm_spe_get_pad(packet);
+
+ if (hdr == SPE_HEADER0_END) /* no timestamp at end of record */
+ return arm_spe_get_end(packet);
+
+ if (hdr == SPE_HEADER0_TIMESTAMP)
+ return arm_spe_get_timestamp(buf, len, packet);
+
+ if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_EVENTS)
+ return arm_spe_get_events(buf, len, packet);
+
+ if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_SOURCE)
+ return arm_spe_get_data_source(buf, len, packet);
+
+ if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_CONTEXT)
+ return arm_spe_get_context(buf, len, packet);
+
+ if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_OP_TYPE)
+ return arm_spe_get_op_type(buf, len, packet);
+
+ if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) {
+ /* 16-bit extended format header */
+ if (len == 1)
+ return ARM_SPE_BAD_PACKET;
+
+ ext_hdr = 1;
+ hdr = buf[1];
+ if (hdr == SPE_HEADER1_ALIGNMENT)
+ return arm_spe_get_alignment(buf, len, packet);
+ }
+
+ /*
+ * The short format header's byte 0 or the extended format header's
+ * byte 1 has been assigned to 'hdr', which uses the same encoding for
+ * address packet and counter packet, so don't need to distinguish if
+ * it's short format or extended format and handle in once.
+ */
+ if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_ADDRESS)
+ return arm_spe_get_addr(buf, len, ext_hdr, packet);
+
+ if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_COUNTER)
+ return arm_spe_get_counter(buf, len, ext_hdr, packet);
+
+ return ARM_SPE_BAD_PACKET;
+}
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ int ret;
+
+ ret = arm_spe_do_get_packet(buf, len, packet);
+ /* put multiple consecutive PADs on the same line, up to
+ * the fixed-width output format of 16 bytes per line.
+ */
+ if (ret > 0 && packet->type == ARM_SPE_PAD) {
+ while (ret < 16 && len > (size_t)ret && !buf[ret])
+ ret += 1;
+ }
+ return ret;
+}
+
+static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ /* Bail out if any error occurred */
+ if (err && *err)
+ return *err;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(*buf_p, *blen, fmt, ap);
+ va_end(ap);
+
+ if (ret < 0) {
+ if (err && !*err)
+ *err = ret;
+
+ /*
+ * A return value of *blen or more means that the output was
+ * truncated and the buffer is overrun.
+ */
+ } else if ((size_t)ret >= *blen) {
+ (*buf_p)[*blen - 1] = '\0';
+
+ /*
+ * Set *err to 'ret' to avoid overflow if tries to
+ * fill this buffer sequentially.
+ */
+ if (err && !*err)
+ *err = ret;
+ } else {
+ *buf_p += ret;
+ *blen -= ret;
+ }
+
+ return ret;
+}
+
+static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
+ char *buf, size_t buf_len)
+{
+ u64 payload = packet->payload;
+ int err = 0;
+
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV");
+
+ if (payload & BIT(EV_EXCEPTION_GEN))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN");
+ if (payload & BIT(EV_RETIRED))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED");
+ if (payload & BIT(EV_L1D_ACCESS))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS");
+ if (payload & BIT(EV_L1D_REFILL))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL");
+ if (payload & BIT(EV_TLB_ACCESS))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS");
+ if (payload & BIT(EV_TLB_WALK))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL");
+ if (payload & BIT(EV_NOT_TAKEN))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN");
+ if (payload & BIT(EV_MISPRED))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED");
+ if (payload & BIT(EV_LLC_ACCESS))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS");
+ if (payload & BIT(EV_LLC_MISS))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
+ if (payload & BIT(EV_REMOTE_ACCESS))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
+ if (payload & BIT(EV_ALIGNMENT))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT");
+ if (payload & BIT(EV_PARTIAL_PREDICATE))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
+ if (payload & BIT(EV_EMPTY_PREDICATE))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED");
+
+ return err;
+}
+
+static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
+ char *buf, size_t buf_len)
+{
+ u64 payload = packet->payload;
+ int err = 0;
+
+ switch (packet->index) {
+ case SPE_OP_PKT_HDR_CLASS_OTHER:
+ if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER");
+
+ /* SVE effective vector length */
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+ SPE_OP_PKG_SVE_EVL(payload));
+
+ if (payload & SPE_OP_PKT_SVE_FP)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+ } else {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
+ payload & SPE_OP_PKT_COND ?
+ "COND-SELECT" : "INSN-OTHER");
+ }
+ break;
+ case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len,
+ payload & 0x1 ? "ST" : "LD");
+
+ if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) {
+ if (payload & SPE_OP_PKT_AT)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
+ if (payload & SPE_OP_PKT_EXCL)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
+ if (payload & SPE_OP_PKT_AR)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
+ }
+
+ switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) {
+ case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
+ break;
+ case SPE_OP_PKT_LDST_SUBCLASS_GP_REG:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
+ break;
+ case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
+ break;
+ case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
+ break;
+ case SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG");
+ break;
+ case SPE_OP_PKT_LDST_SUBCLASS_MEMCPY:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY");
+ break;
+ case SPE_OP_PKT_LDST_SUBCLASS_MEMSET:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET");
+ break;
+ default:
+ break;
+ }
+
+ if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
+ /* SVE effective vector length */
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+ SPE_OP_PKG_SVE_EVL(payload));
+
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+ if (payload & SPE_OP_PKT_SVE_SG)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG");
+ }
+ break;
+ case SPE_OP_PKT_HDR_CLASS_BR_ERET:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "B");
+
+ if (payload & SPE_OP_PKT_COND)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
+
+ if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
+
+ break;
+ default:
+ /* Unknown index */
+ err = -1;
+ break;
+ }
+
+ return err;
+}
+
+static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
+ char *buf, size_t buf_len)
+{
+ int ns, el, idx = packet->index;
+ int ch, pat;
+ u64 payload = packet->payload;
+ int err = 0;
+ static const char *idx_name[] = {"PC", "TGT", "VA", "PA", "PBT"};
+
+ switch (idx) {
+ case SPE_ADDR_PKT_HDR_INDEX_INS:
+ case SPE_ADDR_PKT_HDR_INDEX_BRANCH:
+ case SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH:
+ ns = !!SPE_ADDR_PKT_GET_NS(payload);
+ el = SPE_ADDR_PKT_GET_EL(payload);
+ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+ arm_spe_pkt_out_string(&err, &buf, &buf_len,
+ "%s 0x%llx el%d ns=%d",
+ idx_name[idx], payload, el, ns);
+ break;
+ case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len,
+ "VA 0x%llx", payload);
+ break;
+ case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS:
+ ns = !!SPE_ADDR_PKT_GET_NS(payload);
+ ch = !!SPE_ADDR_PKT_GET_CH(payload);
+ pat = SPE_ADDR_PKT_GET_PAT(payload);
+ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+ arm_spe_pkt_out_string(&err, &buf, &buf_len,
+ "PA 0x%llx ns=%d ch=%d pat=%x",
+ payload, ns, ch, pat);
+ break;
+ default:
+ /* Unknown index */
+ err = -1;
+ break;
+ }
+
+ return err;
+}
+
+static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet,
+ char *buf, size_t buf_len)
+{
+ u64 payload = packet->payload;
+ const char *name = arm_spe_pkt_name(packet->type);
+ int err = 0;
+
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name,
+ (unsigned short)payload);
+
+ switch (packet->index) {
+ case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT");
+ break;
+ case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE");
+ break;
+ case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT:
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT");
+ break;
+ default:
+ break;
+ }
+
+ return err;
+}
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
+ size_t buf_len)
+{
+ int idx = packet->index;
+ unsigned long long payload = packet->payload;
+ const char *name = arm_spe_pkt_name(packet->type);
+ char *buf_orig = buf;
+ size_t blen = buf_len;
+ int err = 0;
+
+ switch (packet->type) {
+ case ARM_SPE_BAD:
+ case ARM_SPE_PAD:
+ case ARM_SPE_END:
+ arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name);
+ break;
+ case ARM_SPE_EVENTS:
+ err = arm_spe_pkt_desc_event(packet, buf, buf_len);
+ break;
+ case ARM_SPE_OP_TYPE:
+ err = arm_spe_pkt_desc_op_type(packet, buf, buf_len);
+ break;
+ case ARM_SPE_DATA_SOURCE:
+ case ARM_SPE_TIMESTAMP:
+ arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload);
+ break;
+ case ARM_SPE_ADDRESS:
+ err = arm_spe_pkt_desc_addr(packet, buf, buf_len);
+ break;
+ case ARM_SPE_CONTEXT:
+ arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d",
+ name, (unsigned long)payload, idx + 1);
+ break;
+ case ARM_SPE_COUNTER:
+ err = arm_spe_pkt_desc_counter(packet, buf, buf_len);
+ break;
+ default:
+ /* Unknown packet type */
+ err = -1;
+ break;
+ }
+
+ /* Output raw data if detect any error */
+ if (err) {
+ err = 0;
+ arm_spe_pkt_out_string(&err, &buf_orig, &buf_len, "%s 0x%llx (%d)",
+ name, payload, packet->index);
+ }
+
+ return err;
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
new file mode 100644
index 000000000..464a912b2
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
+#define INCLUDE__ARM_SPE_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define ARM_SPE_PKT_DESC_MAX 256
+
+#define ARM_SPE_NEED_MORE_BYTES -1
+#define ARM_SPE_BAD_PACKET -2
+
+#define ARM_SPE_PKT_MAX_SZ 16
+
+enum arm_spe_pkt_type {
+ ARM_SPE_BAD,
+ ARM_SPE_PAD,
+ ARM_SPE_END,
+ ARM_SPE_TIMESTAMP,
+ ARM_SPE_ADDRESS,
+ ARM_SPE_COUNTER,
+ ARM_SPE_CONTEXT,
+ ARM_SPE_OP_TYPE,
+ ARM_SPE_EVENTS,
+ ARM_SPE_DATA_SOURCE,
+};
+
+struct arm_spe_pkt {
+ enum arm_spe_pkt_type type;
+ unsigned char index;
+ uint64_t payload;
+};
+
+/* Short header (HEADER0) and extended header (HEADER1) */
+#define SPE_HEADER0_PAD 0x0
+#define SPE_HEADER0_END 0x1
+#define SPE_HEADER0_TIMESTAMP 0x71
+/* Mask for event & data source */
+#define SPE_HEADER0_MASK1 (GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0))
+#define SPE_HEADER0_EVENTS 0x42
+#define SPE_HEADER0_SOURCE 0x43
+/* Mask for context & operation */
+#define SPE_HEADER0_MASK2 GENMASK_ULL(7, 2)
+#define SPE_HEADER0_CONTEXT 0x64
+#define SPE_HEADER0_OP_TYPE 0x48
+/* Mask for extended format */
+#define SPE_HEADER0_EXTENDED 0x20
+/* Mask for address & counter */
+#define SPE_HEADER0_MASK3 GENMASK_ULL(7, 3)
+#define SPE_HEADER0_ADDRESS 0xb0
+#define SPE_HEADER0_COUNTER 0x98
+#define SPE_HEADER1_ALIGNMENT 0x0
+
+#define SPE_HDR_SHORT_INDEX(h) ((h) & GENMASK_ULL(2, 0))
+#define SPE_HDR_EXTENDED_INDEX(h0, h1) (((h0) & GENMASK_ULL(1, 0)) << 3 | \
+ SPE_HDR_SHORT_INDEX(h1))
+
+/* Address packet header */
+#define SPE_ADDR_PKT_HDR_INDEX_INS 0x0
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3
+#define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4
+
+/* Address packet payload */
+#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56
+#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v) ((v) & GENMASK_ULL(55, 0))
+#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v) (((v) & GENMASK_ULL(55, 48)) >> 48)
+
+#define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63)
+#define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61)
+#define SPE_ADDR_PKT_GET_CH(v) (((v) & BIT_ULL(62)) >> 62)
+#define SPE_ADDR_PKT_GET_PAT(v) (((v) & GENMASK_ULL(59, 56)) >> 56)
+
+#define SPE_ADDR_PKT_EL0 0
+#define SPE_ADDR_PKT_EL1 1
+#define SPE_ADDR_PKT_EL2 2
+#define SPE_ADDR_PKT_EL3 3
+
+/* Context packet header */
+#define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0))
+
+/* Counter packet header */
+#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT 0x0
+#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1
+#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2
+
+/* Event packet payload */
+enum arm_spe_events {
+ EV_EXCEPTION_GEN = 0,
+ EV_RETIRED = 1,
+ EV_L1D_ACCESS = 2,
+ EV_L1D_REFILL = 3,
+ EV_TLB_ACCESS = 4,
+ EV_TLB_WALK = 5,
+ EV_NOT_TAKEN = 6,
+ EV_MISPRED = 7,
+ EV_LLC_ACCESS = 8,
+ EV_LLC_MISS = 9,
+ EV_REMOTE_ACCESS = 10,
+ EV_ALIGNMENT = 11,
+ EV_PARTIAL_PREDICATE = 17,
+ EV_EMPTY_PREDICATE = 18,
+};
+
+/* Operation packet header */
+#define SPE_OP_PKT_HDR_CLASS(h) ((h) & GENMASK_ULL(1, 0))
+#define SPE_OP_PKT_HDR_CLASS_OTHER 0x0
+#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
+#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
+
+#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+
+#define SPE_OP_PKT_COND BIT(0)
+
+#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
+#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
+#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
+#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10
+#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30
+#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25
+
+#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
+
+#define SPE_OP_PKT_AR BIT(4)
+#define SPE_OP_PKT_EXCL BIT(3)
+#define SPE_OP_PKT_AT BIT(2)
+#define SPE_OP_PKT_ST BIT(0)
+
+#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
+
+#define SPE_OP_PKT_SVE_SG BIT(7)
+/*
+ * SVE effective vector length (EVL) is stored in byte 0 bits [6:4];
+ * the length is rounded up to a power of two and use 32 as one step,
+ * so EVL calculation is:
+ *
+ * 32 * (2 ^ bits [6:4]) = 32 << (bits [6:4])
+ */
+#define SPE_OP_PKG_SVE_EVL(v) (32 << (((v) & GENMASK_ULL(6, 4)) >> 4))
+#define SPE_OP_PKT_SVE_PRED BIT(2)
+#define SPE_OP_PKT_SVE_FP BIT(1)
+
+#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2)
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet);
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
+#endif
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
new file mode 100644
index 000000000..afbd5869f
--- /dev/null
+++ b/tools/perf/util/arm-spe.c
@@ -0,0 +1,1379 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "session.h"
+#include "symbol.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "tsc.h"
+#include "tool.h"
+#include "util/synthetic-events.h"
+
+#include "arm-spe.h"
+#include "arm-spe-decoder/arm-spe-decoder.h"
+#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
+
+#include "../../arch/arm64/include/asm/cputype.h"
+#define MAX_TIMESTAMP (~0ULL)
+
+struct arm_spe {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ struct itrace_synth_opts synth_opts;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ u32 pmu_type;
+ u64 midr;
+
+ struct perf_tsc_conversion tc;
+
+ u8 timeless_decoding;
+ u8 data_queued;
+
+ u64 sample_type;
+ u8 sample_flc;
+ u8 sample_llc;
+ u8 sample_tlb;
+ u8 sample_branch;
+ u8 sample_remote_access;
+ u8 sample_memory;
+ u8 sample_instructions;
+ u64 instructions_sample_period;
+
+ u64 l1d_miss_id;
+ u64 l1d_access_id;
+ u64 llc_miss_id;
+ u64 llc_access_id;
+ u64 tlb_miss_id;
+ u64 tlb_access_id;
+ u64 branch_miss_id;
+ u64 remote_access_id;
+ u64 memory_id;
+ u64 instructions_id;
+
+ u64 kernel_start;
+
+ unsigned long num_events;
+ u8 use_ctx_pkt_for_pid;
+};
+
+struct arm_spe_queue {
+ struct arm_spe *spe;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ struct auxtrace_buffer *old_buffer;
+ union perf_event *event_buf;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ struct arm_spe_decoder *decoder;
+ u64 time;
+ u64 timestamp;
+ struct thread *thread;
+ u64 period_instructions;
+};
+
+static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct arm_spe_pkt packet;
+ size_t pos = 0;
+ int ret, pkt_len, i;
+ char desc[ARM_SPE_PKT_DESC_MAX];
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... ARM SPE data: size %#zx bytes\n",
+ len);
+
+ while (len) {
+ ret = arm_spe_get_packet(buf, len, &packet);
+ if (ret > 0)
+ pkt_len = ret;
+ else
+ pkt_len = 1;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < pkt_len; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < 16; i++)
+ color_fprintf(stdout, color, " ");
+ if (ret > 0) {
+ ret = arm_spe_pkt_desc(&packet, desc,
+ ARM_SPE_PKT_DESC_MAX);
+ if (!ret)
+ color_fprintf(stdout, color, " %s\n", desc);
+ } else {
+ color_fprintf(stdout, color, " Bad packet!\n");
+ }
+ pos += pkt_len;
+ buf += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ arm_spe_dump(spe, buf, len);
+}
+
+static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
+{
+ struct arm_spe_queue *speq = data;
+ struct auxtrace_buffer *buffer = speq->buffer;
+ struct auxtrace_buffer *old_buffer = speq->old_buffer;
+ struct auxtrace_queue *queue;
+
+ queue = &speq->spe->queues.queue_array[speq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ /* If no more data, drop the previous auxtrace_buffer and return */
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ speq->buffer = buffer;
+
+ /* If the aux_buffer doesn't have data associated, try to load it */
+ if (!buffer->data) {
+ /* get the file desc associated with the perf data file */
+ int fd = perf_data__fd(speq->spe->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ }
+
+ b->len = buffer->size;
+ b->buf = buffer->data;
+
+ if (b->len) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ speq->old_buffer = buffer;
+ } else {
+ auxtrace_buffer__drop_data(buffer);
+ return arm_spe_get_trace(b, data);
+ }
+
+ return 0;
+}
+
+static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
+ unsigned int queue_nr)
+{
+ struct arm_spe_params params = { .get_trace = 0, };
+ struct arm_spe_queue *speq;
+
+ speq = zalloc(sizeof(*speq));
+ if (!speq)
+ return NULL;
+
+ speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!speq->event_buf)
+ goto out_free;
+
+ speq->spe = spe;
+ speq->queue_nr = queue_nr;
+ speq->pid = -1;
+ speq->tid = -1;
+ speq->cpu = -1;
+ speq->period_instructions = 0;
+
+ /* params set */
+ params.get_trace = arm_spe_get_trace;
+ params.data = speq;
+
+ /* create new decoder */
+ speq->decoder = arm_spe_decoder_new(&params);
+ if (!speq->decoder)
+ goto out_free;
+
+ return speq;
+
+out_free:
+ zfree(&speq->event_buf);
+ free(speq);
+
+ return NULL;
+}
+
+static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
+{
+ return ip >= spe->kernel_start ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+ struct auxtrace_queue *queue)
+{
+ struct arm_spe_queue *speq = queue->priv;
+ pid_t tid;
+
+ tid = machine__get_current_tid(spe->machine, speq->cpu);
+ if (tid != -1) {
+ speq->tid = tid;
+ thread__zput(speq->thread);
+ } else
+ speq->tid = queue->tid;
+
+ if ((!speq->thread) && (speq->tid != -1)) {
+ speq->thread = machine__find_thread(spe->machine, -1,
+ speq->tid);
+ }
+
+ if (speq->thread) {
+ speq->pid = thread__pid(speq->thread);
+ if (queue->cpu == -1)
+ speq->cpu = thread__cpu(speq->thread);
+ }
+}
+
+static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
+{
+ struct arm_spe *spe = speq->spe;
+ int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
+
+ if (err)
+ return err;
+
+ arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
+
+ return 0;
+}
+
+static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
+{
+ struct simd_flags simd_flags = {};
+
+ if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
+
+ if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
+
+ if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
+ simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
+
+ if (record->type & ARM_SPE_SVE_EMPTY_PRED)
+ simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
+
+ return simd_flags;
+}
+
+static void arm_spe_prep_sample(struct arm_spe *spe,
+ struct arm_spe_queue *speq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct arm_spe_record *record = &speq->decoder->record;
+
+ if (!spe->timeless_decoding)
+ sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
+
+ sample->ip = record->from_ip;
+ sample->cpumode = arm_spe_cpumode(spe, sample->ip);
+ sample->pid = speq->pid;
+ sample->tid = speq->tid;
+ sample->period = 1;
+ sample->cpu = speq->cpu;
+ sample->simd_flags = arm_spe__synth_simd_flags(record);
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = sample->cpumode;
+ event->sample.header.size = sizeof(struct perf_event_header);
+}
+
+static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
+{
+ event->header.size = perf_event__sample_event_size(sample, type, 0);
+ return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+static inline int
+arm_spe_deliver_synth_event(struct arm_spe *spe,
+ struct arm_spe_queue *speq __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ int ret;
+
+ if (spe->synth_opts.inject) {
+ ret = arm_spe__inject_event(event, sample, spe->sample_type);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(spe->session, event, sample);
+ if (ret)
+ pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
+
+ return ret;
+}
+
+static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id, u64 data_src)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->virt_addr;
+ sample.phys_addr = record->phys_addr;
+ sample.data_src = data_src;
+ sample.weight = record->latency;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->to_ip;
+ sample.weight = record->latency;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id, u64 data_src)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ /*
+ * Handles perf instruction sampling period.
+ */
+ speq->period_instructions++;
+ if (speq->period_instructions < spe->instructions_sample_period)
+ return 0;
+ speq->period_instructions = 0;
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->virt_addr;
+ sample.phys_addr = record->phys_addr;
+ sample.data_src = data_src;
+ sample.period = spe->instructions_sample_period;
+ sample.weight = record->latency;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static const struct midr_range neoverse_spe[] = {
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+};
+
+static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ /*
+ * Even though four levels of cache hierarchy are possible, no known
+ * production Neoverse systems currently include more than three levels
+ * so for the time being we assume three exist. If a production system
+ * is built with four the this function would have to be changed to
+ * detect the number of levels for reporting.
+ */
+
+ /*
+ * We have no data on the hit level or data source for stores in the
+ * Neoverse SPE records.
+ */
+ if (record->op & ARM_SPE_OP_ST) {
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+ return;
+ }
+
+ switch (record->source) {
+ case ARM_SPE_NV_L1D:
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_NV_L2:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_NV_PEER_CORE:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ /*
+ * We don't know if this is L1, L2 but we do know it was a cache-2-cache
+ * transfer, so set SNOOPX_PEER
+ */
+ case ARM_SPE_NV_LOCAL_CLUSTER:
+ case ARM_SPE_NV_PEER_CLUSTER:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ /*
+ * System cache is assumed to be L3
+ */
+ case ARM_SPE_NV_SYS_CACHE:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+ break;
+ /*
+ * We don't know what level it hit in, except it came from the other
+ * socket
+ */
+ case ARM_SPE_NV_REMOTE:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_NV_DRAM:
+ data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ default:
+ break;
+ }
+}
+
+static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3;
+
+ if (record->type & ARM_SPE_LLC_MISS)
+ data_src->mem_lvl |= PERF_MEM_LVL_MISS;
+ else
+ data_src->mem_lvl |= PERF_MEM_LVL_HIT;
+ } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L1;
+
+ if (record->type & ARM_SPE_L1D_MISS)
+ data_src->mem_lvl |= PERF_MEM_LVL_MISS;
+ else
+ data_src->mem_lvl |= PERF_MEM_LVL_HIT;
+ }
+
+ if (record->type & ARM_SPE_REMOTE_ACCESS)
+ data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+}
+
+static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
+{
+ union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
+ bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
+
+ if (record->op & ARM_SPE_OP_LD)
+ data_src.mem_op = PERF_MEM_OP_LOAD;
+ else if (record->op & ARM_SPE_OP_ST)
+ data_src.mem_op = PERF_MEM_OP_STORE;
+ else
+ return 0;
+
+ if (is_neoverse)
+ arm_spe__synth_data_source_neoverse(record, &data_src);
+ else
+ arm_spe__synth_data_source_generic(record, &data_src);
+
+ if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
+ data_src.mem_dtlb = PERF_MEM_TLB_WK;
+
+ if (record->type & ARM_SPE_TLB_MISS)
+ data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
+ else
+ data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
+ }
+
+ return data_src.val;
+}
+
+static int arm_spe_sample(struct arm_spe_queue *speq)
+{
+ const struct arm_spe_record *record = &speq->decoder->record;
+ struct arm_spe *spe = speq->spe;
+ u64 data_src;
+ int err;
+
+ data_src = arm_spe__synth_data_source(record, spe->midr);
+
+ if (spe->sample_flc) {
+ if (record->type & ARM_SPE_L1D_MISS) {
+ err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
+ data_src);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_L1D_ACCESS) {
+ err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
+ data_src);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_llc) {
+ if (record->type & ARM_SPE_LLC_MISS) {
+ err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
+ data_src);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_LLC_ACCESS) {
+ err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
+ data_src);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_tlb) {
+ if (record->type & ARM_SPE_TLB_MISS) {
+ err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
+ data_src);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_TLB_ACCESS) {
+ err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
+ data_src);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
+ err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (spe->sample_remote_access &&
+ (record->type & ARM_SPE_REMOTE_ACCESS)) {
+ err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
+ data_src);
+ if (err)
+ return err;
+ }
+
+ /*
+ * When data_src is zero it means the record is not a memory operation,
+ * skip to synthesize memory sample for this case.
+ */
+ if (spe->sample_memory && data_src) {
+ err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
+ if (err)
+ return err;
+ }
+
+ if (spe->sample_instructions) {
+ err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record;
+ int ret;
+
+ if (!spe->kernel_start)
+ spe->kernel_start = machine__kernel_start(spe->machine);
+
+ while (1) {
+ /*
+ * The usual logic is firstly to decode the packets, and then
+ * based the record to synthesize sample; but here the flow is
+ * reversed: it calls arm_spe_sample() for synthesizing samples
+ * prior to arm_spe_decode().
+ *
+ * Two reasons for this code logic:
+ * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
+ * has decoded trace data and generated a record, but the record
+ * is left to generate sample until run to here, so it's correct
+ * to synthesize sample for the left record.
+ * 2. After decoding trace data, it needs to compare the record
+ * timestamp with the coming perf event, if the record timestamp
+ * is later than the perf event, it needs bail out and pushs the
+ * record into auxtrace heap, thus the record can be deferred to
+ * synthesize sample until run to here at the next time; so this
+ * can correlate samples between Arm SPE trace data and other
+ * perf events with correct time ordering.
+ */
+
+ /*
+ * Update pid/tid info.
+ */
+ record = &speq->decoder->record;
+ if (!spe->timeless_decoding && record->context_id != (u64)-1) {
+ ret = arm_spe_set_tid(speq, record->context_id);
+ if (ret)
+ return ret;
+
+ spe->use_ctx_pkt_for_pid = true;
+ }
+
+ ret = arm_spe_sample(speq);
+ if (ret)
+ return ret;
+
+ ret = arm_spe_decode(speq->decoder);
+ if (!ret) {
+ pr_debug("No data or all data has been processed.\n");
+ return 1;
+ }
+
+ /*
+ * Error is detected when decode SPE trace data, continue to
+ * the next trace data and find out more records.
+ */
+ if (ret < 0)
+ continue;
+
+ record = &speq->decoder->record;
+
+ /* Update timestamp for the last record */
+ if (record->timestamp > speq->timestamp)
+ speq->timestamp = record->timestamp;
+
+ /*
+ * If the timestamp of the queue is later than timestamp of the
+ * coming perf event, bail out so can allow the perf event to
+ * be processed ahead.
+ */
+ if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
+ *timestamp = speq->timestamp;
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe__setup_queue(struct arm_spe *spe,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct arm_spe_queue *speq = queue->priv;
+ struct arm_spe_record *record;
+
+ if (list_empty(&queue->head) || speq)
+ return 0;
+
+ speq = arm_spe__alloc_queue(spe, queue_nr);
+
+ if (!speq)
+ return -ENOMEM;
+
+ queue->priv = speq;
+
+ if (queue->cpu != -1)
+ speq->cpu = queue->cpu;
+
+ if (!speq->on_heap) {
+ int ret;
+
+ if (spe->timeless_decoding)
+ return 0;
+
+retry:
+ ret = arm_spe_decode(speq->decoder);
+
+ if (!ret)
+ return 0;
+
+ if (ret < 0)
+ goto retry;
+
+ record = &speq->decoder->record;
+
+ speq->timestamp = record->timestamp;
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
+ if (ret)
+ return ret;
+ speq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int arm_spe__setup_queues(struct arm_spe *spe)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < spe->queues.nr_queues; i++) {
+ ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int arm_spe__update_queues(struct arm_spe *spe)
+{
+ if (spe->queues.new_data) {
+ spe->queues.new_data = false;
+ return arm_spe__setup_queues(spe);
+ }
+
+ return 0;
+}
+
+static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
+{
+ struct evsel *evsel;
+ struct evlist *evlist = spe->session->evlist;
+ bool timeless_decoding = true;
+
+ /*
+ * Circle through the list of event and complain if we find one
+ * with the time bit set.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+ timeless_decoding = false;
+ }
+
+ return timeless_decoding;
+}
+
+static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct arm_spe_queue *speq;
+
+ if (!spe->heap.heap_cnt)
+ return 0;
+
+ if (spe->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = spe->heap.heap_array[0].queue_nr;
+ queue = &spe->queues.queue_array[queue_nr];
+ speq = queue->priv;
+
+ auxtrace_heap__pop(&spe->heap);
+
+ if (spe->heap.heap_cnt) {
+ ts = spe->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ /*
+ * A previous context-switch event has set pid/tid in the machine's context, so
+ * here we need to update the pid/tid in the thread and SPE queue.
+ */
+ if (!spe->use_ctx_pkt_for_pid)
+ arm_spe_set_pid_tid_cpu(spe, queue);
+
+ ret = arm_spe_run_decoder(speq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&spe->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ speq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
+ u64 time_)
+{
+ struct auxtrace_queues *queues = &spe->queues;
+ unsigned int i;
+ u64 ts = 0;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &spe->queues.queue_array[i];
+ struct arm_spe_queue *speq = queue->priv;
+
+ if (speq && (tid == -1 || speq->tid == tid)) {
+ speq->time = time_;
+ arm_spe_set_pid_tid_cpu(spe, queue);
+ arm_spe_run_decoder(speq, &ts);
+ }
+ }
+ return 0;
+}
+
+static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
+ struct perf_sample *sample)
+{
+ pid_t pid, tid;
+ int cpu;
+
+ if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
+ return 0;
+
+ pid = event->context_switch.next_prev_pid;
+ tid = event->context_switch.next_prev_tid;
+ cpu = sample->cpu;
+
+ if (tid == -1)
+ pr_warning("context_switch event has no tid\n");
+
+ return machine__set_current_tid(spe->machine, cpu, pid, tid);
+}
+
+static int arm_spe_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ int err = 0;
+ u64 timestamp;
+ struct arm_spe *spe = container_of(session->auxtrace,
+ struct arm_spe, auxtrace);
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("SPE trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && (sample->time != (u64) -1))
+ timestamp = perf_time_to_tsc(sample->time, &spe->tc);
+ else
+ timestamp = 0;
+
+ if (timestamp || spe->timeless_decoding) {
+ err = arm_spe__update_queues(spe);
+ if (err)
+ return err;
+ }
+
+ if (spe->timeless_decoding) {
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = arm_spe_process_timeless_queues(spe,
+ event->fork.tid,
+ sample->time);
+ }
+ } else if (timestamp) {
+ err = arm_spe_process_queues(spe, timestamp);
+ if (err)
+ return err;
+
+ if (!spe->use_ctx_pkt_for_pid &&
+ (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
+ event->header.type == PERF_RECORD_SWITCH))
+ err = arm_spe_context_switch(spe, event, sample);
+ }
+
+ return err;
+}
+
+static int arm_spe_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+
+ if (!spe->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&spe->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ arm_spe_dump_event(spe, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+ int ret;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = arm_spe__update_queues(spe);
+ if (ret < 0)
+ return ret;
+
+ if (spe->timeless_decoding)
+ return arm_spe_process_timeless_queues(spe, -1,
+ MAX_TIMESTAMP - 1);
+
+ ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
+ if (ret)
+ return ret;
+
+ if (!spe->use_ctx_pkt_for_pid)
+ ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
+ "Matching of TIDs to SPE events could be inaccurate.\n");
+
+ return 0;
+}
+
+static void arm_spe_free_queue(void *priv)
+{
+ struct arm_spe_queue *speq = priv;
+
+ if (!speq)
+ return;
+ thread__zput(speq->thread);
+ arm_spe_decoder_free(speq->decoder);
+ zfree(&speq->event_buf);
+ free(speq);
+}
+
+static void arm_spe_free_events(struct perf_session *session)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+ struct auxtrace_queues *queues = &spe->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ arm_spe_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ auxtrace_queues__free(queues);
+}
+
+static void arm_spe_free(struct perf_session *session)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+
+ auxtrace_heap__free(&spe->heap);
+ arm_spe_free_events(session);
+ session->auxtrace = NULL;
+ free(spe);
+}
+
+static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
+
+ return evsel->core.attr.type == spe->pmu_type;
+}
+
+static const char * const arm_spe_info_fmts[] = {
+ [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
+};
+
+static void arm_spe_print_info(__u64 *arr)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
+}
+
+struct arm_spe_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int arm_spe_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct arm_spe_synth *arm_spe_synth =
+ container_of(tool, struct arm_spe_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(arm_spe_synth->session,
+ event, NULL);
+}
+
+static int arm_spe_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct arm_spe_synth arm_spe_synth;
+
+ memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
+ arm_spe_synth.session = session;
+
+ return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
+ &id, arm_spe_event_synth);
+}
+
+static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
+ const char *name)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.id && evsel->core.id[0] == id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup(name);
+ break;
+ }
+ }
+}
+
+static int
+arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == spe->pmu_type) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("No selected events with SPE trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->core.attr.sample_type &
+ (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
+ if (spe->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+
+ spe->sample_type = attr.sample_type;
+
+ attr.exclude_user = evsel->core.attr.exclude_user;
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
+ attr.exclude_host = evsel->core.attr.exclude_host;
+ attr.exclude_guest = evsel->core.attr.exclude_guest;
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
+ attr.read_format = evsel->core.attr.read_format;
+
+ /* create new id val to be a fixed offset from evsel id */
+ id = evsel->core.id[0] + 1000000000;
+
+ if (!id)
+ id = 1;
+
+ if (spe->synth_opts.flc) {
+ spe->sample_flc = true;
+
+ /* Level 1 data cache miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->l1d_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "l1d-miss");
+ id += 1;
+
+ /* Level 1 data cache access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->l1d_access_id = id;
+ arm_spe_set_event_name(evlist, id, "l1d-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.llc) {
+ spe->sample_llc = true;
+
+ /* Last level cache miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->llc_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "llc-miss");
+ id += 1;
+
+ /* Last level cache access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->llc_access_id = id;
+ arm_spe_set_event_name(evlist, id, "llc-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.tlb) {
+ spe->sample_tlb = true;
+
+ /* TLB miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->tlb_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "tlb-miss");
+ id += 1;
+
+ /* TLB access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->tlb_access_id = id;
+ arm_spe_set_event_name(evlist, id, "tlb-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.branches) {
+ spe->sample_branch = true;
+
+ /* Branch miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->branch_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "branch-miss");
+ id += 1;
+ }
+
+ if (spe->synth_opts.remote_access) {
+ spe->sample_remote_access = true;
+
+ /* Remote access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->remote_access_id = id;
+ arm_spe_set_event_name(evlist, id, "remote-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.mem) {
+ spe->sample_memory = true;
+
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->memory_id = id;
+ arm_spe_set_event_name(evlist, id, "memory");
+ id += 1;
+ }
+
+ if (spe->synth_opts.instructions) {
+ if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+ pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
+ goto synth_instructions_out;
+ }
+ if (spe->synth_opts.period > 1)
+ pr_warning("Arm SPE has a hardware-based sample period.\n"
+ "Additional instruction events will be discarded by --itrace\n");
+
+ spe->sample_instructions = true;
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = spe->synth_opts.period;
+ spe->instructions_sample_period = attr.sample_period;
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->instructions_id = id;
+ arm_spe_set_event_name(evlist, id, "instructions");
+ }
+synth_instructions_out:
+
+ return 0;
+}
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
+ struct perf_record_time_conv *tc = &session->time_conv;
+ const char *cpuid = perf_env__cpuid(session->evlist->env);
+ u64 midr = strtol(cpuid, NULL, 16);
+ struct arm_spe *spe;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
+ min_sz)
+ return -EINVAL;
+
+ spe = zalloc(sizeof(struct arm_spe));
+ if (!spe)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&spe->queues);
+ if (err)
+ goto err_free;
+
+ spe->session = session;
+ spe->machine = &session->machines.host; /* No kvm support */
+ spe->auxtrace_type = auxtrace_info->type;
+ spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+ spe->midr = midr;
+
+ spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
+
+ /*
+ * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
+ * and the parameters for hardware clock are stored in the session
+ * context. Passes these parameters to the struct perf_tsc_conversion
+ * in "spe->tc", which is used for later conversion between clock
+ * counter and timestamp.
+ *
+ * For backward compatibility, copies the fields starting from
+ * "time_cycles" only if they are contained in the event.
+ */
+ spe->tc.time_shift = tc->time_shift;
+ spe->tc.time_mult = tc->time_mult;
+ spe->tc.time_zero = tc->time_zero;
+
+ if (event_contains(*tc, time_cycles)) {
+ spe->tc.time_cycles = tc->time_cycles;
+ spe->tc.time_mask = tc->time_mask;
+ spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
+ spe->tc.cap_user_time_short = tc->cap_user_time_short;
+ }
+
+ spe->auxtrace.process_event = arm_spe_process_event;
+ spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
+ spe->auxtrace.flush_events = arm_spe_flush;
+ spe->auxtrace.free_events = arm_spe_free_events;
+ spe->auxtrace.free = arm_spe_free;
+ spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
+ session->auxtrace = &spe->auxtrace;
+
+ arm_spe_print_info(&auxtrace_info->priv[0]);
+
+ if (dump_trace)
+ return 0;
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ spe->synth_opts = *session->itrace_synth_opts;
+ else
+ itrace_synth_opts__set_default(&spe->synth_opts, false);
+
+ err = arm_spe_synth_events(spe, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&spe->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (spe->queues.populated)
+ spe->data_queued = true;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&spe->queues);
+ session->auxtrace = NULL;
+err_free:
+ free(spe);
+ return err;
+}
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
new file mode 100644
index 000000000..98d323578
--- /dev/null
+++ b/tools/perf/util/arm-spe.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__PERF_ARM_SPE_H__
+#define INCLUDE__PERF_ARM_SPE_H__
+
+#define ARM_SPE_PMU_NAME "arm_spe_"
+
+enum {
+ ARM_SPE_PMU_TYPE,
+ ARM_SPE_PER_CPU_MMAPS,
+ ARM_SPE_AUXTRACE_PRIV_MAX,
+};
+
+#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *arm_spe_recording_init(int *err,
+ struct perf_pmu *arm_spe_pmu);
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+#endif
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
new file mode 100644
index 000000000..4940be4a0
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arm64-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "event.h"
+#include "perf_regs.h" // SMPL_REG_MASK
+#include "unwind.h"
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+struct entries {
+ u64 stack[2];
+ size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+ return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+ && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+ struct entries *entries = arg;
+
+ entries->stack[entries->length++] = entry->ip;
+ return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx)
+{
+ int ret;
+ struct entries entries = {};
+ struct regs_dump old_regs = sample->user_regs;
+
+ if (!get_leaf_frame_caller_enabled(sample))
+ return 0;
+
+ /*
+ * If PC and SP are not recorded, get the value of PC from the stack
+ * and set its mask. SP is not used when doing the unwinding but it
+ * still needs to be set to prevent failures.
+ */
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ }
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ }
+
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
+ sample->user_regs = old_regs;
+
+ if (ret || entries.length != 2)
+ return ret;
+
+ return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
new file mode 100644
index 000000000..42d3a4549
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+#include <linux/types.h>
+
+struct perf_sample;
+struct thread;
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
new file mode 100644
index 000000000..a0368202a
--- /dev/null
+++ b/tools/perf/util/auxtrace.c
@@ -0,0 +1,2871 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * auxtrace.c: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+
+#include "config.h"
+#include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
+#include "evsel_config.h"
+#include "symbol.h"
+#include "util/perf_api_probe.h"
+#include "util/synthetic-events.h"
+#include "thread_map.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+
+#include <linux/hash.h>
+
+#include "event.h"
+#include "record.h"
+#include "session.h"
+#include "debug.h"
+#include <subcmd/parse-options.h>
+
+#include "cs-etm.h"
+#include "intel-pt.h"
+#include "intel-bts.h"
+#include "arm-spe.h"
+#include "hisi-ptt.h"
+#include "s390-cpumsf.h"
+#include "util/mmap.h"
+
+#include <linux/ctype.h>
+#include "symbol/kallsyms.h"
+#include <internal/lib.h>
+#include "util/sample.h"
+
+/*
+ * Make a group from 'leader' to 'last', requiring that the events were not
+ * already grouped to a different leader.
+ */
+static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct evsel *last)
+{
+ struct evsel *evsel;
+ bool grp;
+
+ if (!evsel__is_group_leader(leader))
+ return -EINVAL;
+
+ grp = false;
+ evlist__for_each_entry(evlist, evsel) {
+ if (grp) {
+ if (!(evsel__leader(evsel) == leader ||
+ (evsel__leader(evsel) == evsel &&
+ evsel->core.nr_members <= 1)))
+ return -EINVAL;
+ } else if (evsel == leader) {
+ grp = true;
+ }
+ if (evsel == last)
+ break;
+ }
+
+ grp = false;
+ evlist__for_each_entry(evlist, evsel) {
+ if (grp) {
+ if (!evsel__has_leader(evsel, leader)) {
+ evsel__set_leader(evsel, leader);
+ if (leader->core.nr_members < 1)
+ leader->core.nr_members = 1;
+ leader->core.nr_members += 1;
+ }
+ } else if (evsel == leader) {
+ grp = true;
+ }
+ if (evsel == last)
+ break;
+ }
+
+ return 0;
+}
+
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+ return !session->itrace_synth_opts ||
+ session->itrace_synth_opts->dont_decode;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd)
+{
+ struct perf_event_mmap_page *pc = userpg;
+
+ WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
+
+ mm->userpg = userpg;
+ mm->mask = mp->mask;
+ mm->len = mp->len;
+ mm->prev = 0;
+ mm->idx = mp->idx;
+ mm->tid = mp->tid;
+ mm->cpu = mp->cpu.cpu;
+
+ if (!mp->len || !mp->mmap_needed) {
+ mm->base = NULL;
+ return 0;
+ }
+
+ pc->aux_offset = mp->offset;
+ pc->aux_size = mp->len;
+
+ mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
+ if (mm->base == MAP_FAILED) {
+ pr_debug2("failed to mmap AUX area\n");
+ mm->base = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
+{
+ if (mm->base) {
+ munmap(mm->base, mm->len);
+ mm->base = NULL;
+ }
+}
+
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite)
+{
+ if (auxtrace_pages) {
+ mp->offset = auxtrace_offset;
+ mp->len = auxtrace_pages * (size_t)page_size;
+ mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
+ mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
+ pr_debug2("AUX area mmap length %zu\n", mp->len);
+ } else {
+ mp->len = 0;
+ }
+}
+
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct evlist *evlist,
+ struct evsel *evsel, int idx)
+{
+ bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+
+ mp->mmap_needed = evsel->needs_auxtrace_mmap;
+
+ if (!mp->mmap_needed)
+ return;
+
+ mp->idx = idx;
+
+ if (per_cpu) {
+ mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
+ if (evlist->core.threads)
+ mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
+ else
+ mp->tid = -1;
+ } else {
+ mp->cpu.cpu = -1;
+ mp->tid = perf_thread_map__pid(evlist->core.threads, idx);
+ }
+}
+
+#define AUXTRACE_INIT_NR_QUEUES 32
+
+static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
+{
+ struct auxtrace_queue *queue_array;
+ unsigned int max_nr_queues, i;
+
+ max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
+ if (nr_queues > max_nr_queues)
+ return NULL;
+
+ queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
+ if (!queue_array)
+ return NULL;
+
+ for (i = 0; i < nr_queues; i++) {
+ INIT_LIST_HEAD(&queue_array[i].head);
+ queue_array[i].priv = NULL;
+ }
+
+ return queue_array;
+}
+
+int auxtrace_queues__init(struct auxtrace_queues *queues)
+{
+ queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
+ queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
+ if (!queues->queue_array)
+ return -ENOMEM;
+ return 0;
+}
+
+static int auxtrace_queues__grow(struct auxtrace_queues *queues,
+ unsigned int new_nr_queues)
+{
+ unsigned int nr_queues = queues->nr_queues;
+ struct auxtrace_queue *queue_array;
+ unsigned int i;
+
+ if (!nr_queues)
+ nr_queues = AUXTRACE_INIT_NR_QUEUES;
+
+ while (nr_queues && nr_queues < new_nr_queues)
+ nr_queues <<= 1;
+
+ if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
+ return -EINVAL;
+
+ queue_array = auxtrace_alloc_queue_array(nr_queues);
+ if (!queue_array)
+ return -ENOMEM;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ list_splice_tail(&queues->queue_array[i].head,
+ &queue_array[i].head);
+ queue_array[i].tid = queues->queue_array[i].tid;
+ queue_array[i].cpu = queues->queue_array[i].cpu;
+ queue_array[i].set = queues->queue_array[i].set;
+ queue_array[i].priv = queues->queue_array[i].priv;
+ }
+
+ queues->nr_queues = nr_queues;
+ queues->queue_array = queue_array;
+
+ return 0;
+}
+
+static void *auxtrace_copy_data(u64 size, struct perf_session *session)
+{
+ int fd = perf_data__fd(session->data);
+ void *p;
+ ssize_t ret;
+
+ if (size > SSIZE_MAX)
+ return NULL;
+
+ p = malloc(size);
+ if (!p)
+ return NULL;
+
+ ret = readn(fd, p, size);
+ if (ret != (ssize_t)size) {
+ free(p);
+ return NULL;
+ }
+
+ return p;
+}
+
+static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
+{
+ struct auxtrace_queue *queue;
+ int err;
+
+ if (idx >= queues->nr_queues) {
+ err = auxtrace_queues__grow(queues, idx + 1);
+ if (err)
+ return err;
+ }
+
+ queue = &queues->queue_array[idx];
+
+ if (!queue->set) {
+ queue->set = true;
+ queue->tid = buffer->tid;
+ queue->cpu = buffer->cpu.cpu;
+ }
+
+ buffer->buffer_nr = queues->next_buffer_nr++;
+
+ list_add_tail(&buffer->list, &queue->head);
+
+ queues->new_data = true;
+ queues->populated = true;
+
+ return 0;
+}
+
+/* Limit buffers to 32MiB on 32-bit */
+#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
+
+static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
+{
+ u64 sz = buffer->size;
+ bool consecutive = false;
+ struct auxtrace_buffer *b;
+ int err;
+
+ while (sz > BUFFER_LIMIT_FOR_32_BIT) {
+ b = memdup(buffer, sizeof(struct auxtrace_buffer));
+ if (!b)
+ return -ENOMEM;
+ b->size = BUFFER_LIMIT_FOR_32_BIT;
+ b->consecutive = consecutive;
+ err = auxtrace_queues__queue_buffer(queues, idx, b);
+ if (err) {
+ auxtrace_buffer__free(b);
+ return err;
+ }
+ buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
+ sz -= BUFFER_LIMIT_FOR_32_BIT;
+ consecutive = true;
+ }
+
+ buffer->size = sz;
+ buffer->consecutive = consecutive;
+
+ return 0;
+}
+
+static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu)
+{
+ unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+ return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap);
+}
+
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer,
+ struct auxtrace_buffer **buffer_ptr)
+{
+ int err = -ENOMEM;
+
+ if (filter_cpu(session, buffer->cpu))
+ return 0;
+
+ buffer = memdup(buffer, sizeof(*buffer));
+ if (!buffer)
+ return -ENOMEM;
+
+ if (session->one_mmap) {
+ buffer->data = buffer->data_offset - session->one_mmap_offset +
+ session->one_mmap_addr;
+ } else if (perf_data__is_pipe(session->data)) {
+ buffer->data = auxtrace_copy_data(buffer->size, session);
+ if (!buffer->data)
+ goto out_free;
+ buffer->data_needs_freeing = true;
+ } else if (BITS_PER_LONG == 32 &&
+ buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
+ err = auxtrace_queues__split_buffer(queues, idx, buffer);
+ if (err)
+ goto out_free;
+ }
+
+ err = auxtrace_queues__queue_buffer(queues, idx, buffer);
+ if (err)
+ goto out_free;
+
+ /* FIXME: Doesn't work for split buffer */
+ if (buffer_ptr)
+ *buffer_ptr = buffer;
+
+ return 0;
+
+out_free:
+ auxtrace_buffer__free(buffer);
+ return err;
+}
+
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ union perf_event *event, off_t data_offset,
+ struct auxtrace_buffer **buffer_ptr)
+{
+ struct auxtrace_buffer buffer = {
+ .pid = -1,
+ .tid = event->auxtrace.tid,
+ .cpu = { event->auxtrace.cpu },
+ .data_offset = data_offset,
+ .offset = event->auxtrace.offset,
+ .reference = event->auxtrace.reference,
+ .size = event->auxtrace.size,
+ };
+ unsigned int idx = event->auxtrace.idx;
+
+ return auxtrace_queues__add_buffer(queues, session, idx, &buffer,
+ buffer_ptr);
+}
+
+static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ off_t file_offset, size_t sz)
+{
+ union perf_event *event;
+ int err;
+ char buf[PERF_SAMPLE_MAX_SIZE];
+
+ err = perf_session__peek_event(session, file_offset, buf,
+ PERF_SAMPLE_MAX_SIZE, &event, NULL);
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_AUXTRACE) {
+ if (event->header.size < sizeof(struct perf_record_auxtrace) ||
+ event->header.size != sz) {
+ err = -EINVAL;
+ goto out;
+ }
+ file_offset += event->header.size;
+ err = auxtrace_queues__add_event(queues, session, event,
+ file_offset, NULL);
+ }
+out:
+ return err;
+}
+
+void auxtrace_queues__free(struct auxtrace_queues *queues)
+{
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ while (!list_empty(&queues->queue_array[i].head)) {
+ struct auxtrace_buffer *buffer;
+
+ buffer = list_entry(queues->queue_array[i].head.next,
+ struct auxtrace_buffer, list);
+ list_del_init(&buffer->list);
+ auxtrace_buffer__free(buffer);
+ }
+ }
+
+ zfree(&queues->queue_array);
+ queues->nr_queues = 0;
+}
+
+static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
+ unsigned int pos, unsigned int queue_nr,
+ u64 ordinal)
+{
+ unsigned int parent;
+
+ while (pos) {
+ parent = (pos - 1) >> 1;
+ if (heap_array[parent].ordinal <= ordinal)
+ break;
+ heap_array[pos] = heap_array[parent];
+ pos = parent;
+ }
+ heap_array[pos].queue_nr = queue_nr;
+ heap_array[pos].ordinal = ordinal;
+}
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+ u64 ordinal)
+{
+ struct auxtrace_heap_item *heap_array;
+
+ if (queue_nr >= heap->heap_sz) {
+ unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
+
+ while (heap_sz <= queue_nr)
+ heap_sz <<= 1;
+ heap_array = realloc(heap->heap_array,
+ heap_sz * sizeof(struct auxtrace_heap_item));
+ if (!heap_array)
+ return -ENOMEM;
+ heap->heap_array = heap_array;
+ heap->heap_sz = heap_sz;
+ }
+
+ auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
+
+ return 0;
+}
+
+void auxtrace_heap__free(struct auxtrace_heap *heap)
+{
+ zfree(&heap->heap_array);
+ heap->heap_cnt = 0;
+ heap->heap_sz = 0;
+}
+
+void auxtrace_heap__pop(struct auxtrace_heap *heap)
+{
+ unsigned int pos, last, heap_cnt = heap->heap_cnt;
+ struct auxtrace_heap_item *heap_array;
+
+ if (!heap_cnt)
+ return;
+
+ heap->heap_cnt -= 1;
+
+ heap_array = heap->heap_array;
+
+ pos = 0;
+ while (1) {
+ unsigned int left, right;
+
+ left = (pos << 1) + 1;
+ if (left >= heap_cnt)
+ break;
+ right = left + 1;
+ if (right >= heap_cnt) {
+ heap_array[pos] = heap_array[left];
+ return;
+ }
+ if (heap_array[left].ordinal < heap_array[right].ordinal) {
+ heap_array[pos] = heap_array[left];
+ pos = left;
+ } else {
+ heap_array[pos] = heap_array[right];
+ pos = right;
+ }
+ }
+
+ last = heap_cnt - 1;
+ auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
+ heap_array[last].ordinal);
+}
+
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+ struct evlist *evlist)
+{
+ if (itr)
+ return itr->info_priv_size(itr, evlist);
+ return 0;
+}
+
+static int auxtrace_not_supported(void)
+{
+ pr_err("AUX area tracing is not supported on this architecture\n");
+ return -EINVAL;
+}
+
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct perf_record_auxtrace_info *auxtrace_info,
+ size_t priv_size)
+{
+ if (itr)
+ return itr->info_fill(itr, session, auxtrace_info, priv_size);
+ return auxtrace_not_supported();
+}
+
+void auxtrace_record__free(struct auxtrace_record *itr)
+{
+ if (itr)
+ itr->free(itr);
+}
+
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
+{
+ if (itr && itr->snapshot_start)
+ return itr->snapshot_start(itr);
+ return 0;
+}
+
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr, bool on_exit)
+{
+ if (!on_exit && itr && itr->snapshot_finish)
+ return itr->snapshot_finish(itr);
+ return 0;
+}
+
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm,
+ unsigned char *data, u64 *head, u64 *old)
+{
+ if (itr && itr->find_snapshot)
+ return itr->find_snapshot(itr, idx, mm, data, head, old);
+ return 0;
+}
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+ struct evlist *evlist,
+ struct record_opts *opts)
+{
+ if (itr) {
+ itr->evlist = evlist;
+ return itr->recording_options(itr, evlist, opts);
+ }
+ return 0;
+}
+
+u64 auxtrace_record__reference(struct auxtrace_record *itr)
+{
+ if (itr)
+ return itr->reference(itr);
+ return 0;
+}
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts, const char *str)
+{
+ if (!str)
+ return 0;
+
+ /* PMU-agnostic options */
+ switch (*str) {
+ case 'e':
+ opts->auxtrace_snapshot_on_exit = true;
+ str++;
+ break;
+ default:
+ break;
+ }
+
+ if (itr && itr->parse_snapshot_options)
+ return itr->parse_snapshot_options(itr, opts, str);
+
+ pr_err("No AUX area tracing to snapshot\n");
+ return -EINVAL;
+}
+
+static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
+{
+ bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+
+ if (per_cpu_mmaps) {
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
+ int cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
+
+ if (cpu_map_idx == -1)
+ return -EINVAL;
+ return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
+ }
+
+ return perf_evsel__enable_thread(&evsel->core, idx);
+}
+
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct evsel *evsel;
+
+ if (!itr->evlist || !itr->pmu)
+ return -EINVAL;
+
+ evlist__for_each_entry(itr->evlist, evsel) {
+ if (evsel->core.attr.type == itr->pmu->type) {
+ if (evsel->disabled)
+ return 0;
+ return evlist__enable_event_idx(itr->evlist, evsel, idx);
+ }
+ }
+ return -EINVAL;
+}
+
+/*
+ * Event record size is 16-bit which results in a maximum size of about 64KiB.
+ * Allow about 4KiB for the rest of the sample record, to give a maximum
+ * AUX area sample size of 60KiB.
+ */
+#define MAX_AUX_SAMPLE_SIZE (60 * 1024)
+
+/* Arbitrary default size if no other default provided */
+#define DEFAULT_AUX_SAMPLE_SIZE (4 * 1024)
+
+static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
+ struct record_opts *opts)
+{
+ struct evsel *evsel;
+ bool has_aux_leader = false;
+ u32 sz;
+
+ evlist__for_each_entry(evlist, evsel) {
+ sz = evsel->core.attr.aux_sample_size;
+ if (evsel__is_group_leader(evsel)) {
+ has_aux_leader = evsel__is_aux_event(evsel);
+ if (sz) {
+ if (has_aux_leader)
+ pr_err("Cannot add AUX area sampling to an AUX area event\n");
+ else
+ pr_err("Cannot add AUX area sampling to a group leader\n");
+ return -EINVAL;
+ }
+ }
+ if (sz > MAX_AUX_SAMPLE_SIZE) {
+ pr_err("AUX area sample size %u too big, max. %d\n",
+ sz, MAX_AUX_SAMPLE_SIZE);
+ return -EINVAL;
+ }
+ if (sz) {
+ if (!has_aux_leader) {
+ pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n");
+ return -EINVAL;
+ }
+ evsel__set_sample_bit(evsel, AUX);
+ opts->auxtrace_sample_mode = true;
+ } else {
+ evsel__reset_sample_bit(evsel, AUX);
+ }
+ }
+
+ if (!opts->auxtrace_sample_mode) {
+ pr_err("AUX area sampling requires an AUX area event group leader plus other events to which to add samples\n");
+ return -EINVAL;
+ }
+
+ if (!perf_can_aux_sample()) {
+ pr_err("AUX area sampling is not supported by kernel\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int auxtrace_parse_sample_options(struct auxtrace_record *itr,
+ struct evlist *evlist,
+ struct record_opts *opts, const char *str)
+{
+ struct evsel_config_term *term;
+ struct evsel *aux_evsel;
+ bool has_aux_sample_size = false;
+ bool has_aux_leader = false;
+ struct evsel *evsel;
+ char *endptr;
+ unsigned long sz;
+
+ if (!str)
+ goto no_opt;
+
+ if (!itr) {
+ pr_err("No AUX area event to sample\n");
+ return -EINVAL;
+ }
+
+ sz = strtoul(str, &endptr, 0);
+ if (*endptr || sz > UINT_MAX) {
+ pr_err("Bad AUX area sampling option: '%s'\n", str);
+ return -EINVAL;
+ }
+
+ if (!sz)
+ sz = itr->default_aux_sample_size;
+
+ if (!sz)
+ sz = DEFAULT_AUX_SAMPLE_SIZE;
+
+ /* Set aux_sample_size based on --aux-sample option */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_group_leader(evsel)) {
+ has_aux_leader = evsel__is_aux_event(evsel);
+ } else if (has_aux_leader) {
+ evsel->core.attr.aux_sample_size = sz;
+ }
+ }
+no_opt:
+ aux_evsel = NULL;
+ /* Override with aux_sample_size from config term */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_aux_event(evsel))
+ aux_evsel = evsel;
+ term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
+ if (term) {
+ has_aux_sample_size = true;
+ evsel->core.attr.aux_sample_size = term->val.aux_sample_size;
+ /* If possible, group with the AUX event */
+ if (aux_evsel && evsel->core.attr.aux_sample_size)
+ evlist__regroup(evlist, aux_evsel, evsel);
+ }
+ }
+
+ if (!str && !has_aux_sample_size)
+ return 0;
+
+ if (!itr) {
+ pr_err("No AUX area event to sample\n");
+ return -EINVAL;
+ }
+
+ return auxtrace_validate_aux_sample_size(evlist, opts);
+}
+
+void auxtrace_regroup_aux_output(struct evlist *evlist)
+{
+ struct evsel *evsel, *aux_evsel = NULL;
+ struct evsel_config_term *term;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_aux_event(evsel))
+ aux_evsel = evsel;
+ term = evsel__get_config_term(evsel, AUX_OUTPUT);
+ /* If possible, group with the AUX event */
+ if (term && aux_evsel)
+ evlist__regroup(evlist, aux_evsel, evsel);
+ }
+}
+
+struct auxtrace_record *__weak
+auxtrace_record__init(struct evlist *evlist __maybe_unused, int *err)
+{
+ *err = 0;
+ return NULL;
+}
+
+static int auxtrace_index__alloc(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+
+ auxtrace_index = malloc(sizeof(struct auxtrace_index));
+ if (!auxtrace_index)
+ return -ENOMEM;
+
+ auxtrace_index->nr = 0;
+ INIT_LIST_HEAD(&auxtrace_index->list);
+
+ list_add_tail(&auxtrace_index->list, head);
+
+ return 0;
+}
+
+void auxtrace_index__free(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index, *n;
+
+ list_for_each_entry_safe(auxtrace_index, n, head, list) {
+ list_del_init(&auxtrace_index->list);
+ free(auxtrace_index);
+ }
+}
+
+static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+ int err;
+
+ if (list_empty(head)) {
+ err = auxtrace_index__alloc(head);
+ if (err)
+ return NULL;
+ }
+
+ auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
+
+ if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
+ err = auxtrace_index__alloc(head);
+ if (err)
+ return NULL;
+ auxtrace_index = list_entry(head->prev, struct auxtrace_index,
+ list);
+ }
+
+ return auxtrace_index;
+}
+
+int auxtrace_index__auxtrace_event(struct list_head *head,
+ union perf_event *event, off_t file_offset)
+{
+ struct auxtrace_index *auxtrace_index;
+ size_t nr;
+
+ auxtrace_index = auxtrace_index__last(head);
+ if (!auxtrace_index)
+ return -ENOMEM;
+
+ nr = auxtrace_index->nr;
+ auxtrace_index->entries[nr].file_offset = file_offset;
+ auxtrace_index->entries[nr].sz = event->header.size;
+ auxtrace_index->nr += 1;
+
+ return 0;
+}
+
+static int auxtrace_index__do_write(int fd,
+ struct auxtrace_index *auxtrace_index)
+{
+ struct auxtrace_index_entry ent;
+ size_t i;
+
+ for (i = 0; i < auxtrace_index->nr; i++) {
+ ent.file_offset = auxtrace_index->entries[i].file_offset;
+ ent.sz = auxtrace_index->entries[i].sz;
+ if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
+ return -errno;
+ }
+ return 0;
+}
+
+int auxtrace_index__write(int fd, struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+ u64 total = 0;
+ int err;
+
+ list_for_each_entry(auxtrace_index, head, list)
+ total += auxtrace_index->nr;
+
+ if (writen(fd, &total, sizeof(total)) != sizeof(total))
+ return -errno;
+
+ list_for_each_entry(auxtrace_index, head, list) {
+ err = auxtrace_index__do_write(fd, auxtrace_index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int auxtrace_index__process_entry(int fd, struct list_head *head,
+ bool needs_swap)
+{
+ struct auxtrace_index *auxtrace_index;
+ struct auxtrace_index_entry ent;
+ size_t nr;
+
+ if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
+ return -1;
+
+ auxtrace_index = auxtrace_index__last(head);
+ if (!auxtrace_index)
+ return -1;
+
+ nr = auxtrace_index->nr;
+ if (needs_swap) {
+ auxtrace_index->entries[nr].file_offset =
+ bswap_64(ent.file_offset);
+ auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
+ } else {
+ auxtrace_index->entries[nr].file_offset = ent.file_offset;
+ auxtrace_index->entries[nr].sz = ent.sz;
+ }
+
+ auxtrace_index->nr = nr + 1;
+
+ return 0;
+}
+
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+ bool needs_swap)
+{
+ struct list_head *head = &session->auxtrace_index;
+ u64 nr;
+
+ if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
+ return -1;
+
+ if (needs_swap)
+ nr = bswap_64(nr);
+
+ if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
+ return -1;
+
+ while (nr--) {
+ int err;
+
+ err = auxtrace_index__process_entry(fd, head, needs_swap);
+ if (err)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ struct auxtrace_index_entry *ent)
+{
+ return auxtrace_queues__add_indexed_event(queues, session,
+ ent->file_offset, ent->sz);
+}
+
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+ struct perf_session *session)
+{
+ struct auxtrace_index *auxtrace_index;
+ struct auxtrace_index_entry *ent;
+ size_t i;
+ int err;
+
+ if (auxtrace__dont_decode(session))
+ return 0;
+
+ list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+ for (i = 0; i < auxtrace_index->nr; i++) {
+ ent = &auxtrace_index->entries[i];
+ err = auxtrace_queues__process_index_entry(queues,
+ session,
+ ent);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *buffer)
+{
+ if (buffer) {
+ if (list_is_last(&buffer->list, &queue->head))
+ return NULL;
+ return list_entry(buffer->list.next, struct auxtrace_buffer,
+ list);
+ } else {
+ if (list_empty(&queue->head))
+ return NULL;
+ return list_entry(queue->head.next, struct auxtrace_buffer,
+ list);
+ }
+}
+
+struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *queues,
+ struct perf_sample *sample,
+ struct perf_session *session)
+{
+ struct perf_sample_id *sid;
+ unsigned int idx;
+ u64 id;
+
+ id = sample->id;
+ if (!id)
+ return NULL;
+
+ sid = evlist__id2sid(session->evlist, id);
+ if (!sid)
+ return NULL;
+
+ idx = sid->idx;
+
+ if (idx >= queues->nr_queues)
+ return NULL;
+
+ return &queues->queue_array[idx];
+}
+
+int auxtrace_queues__add_sample(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ struct perf_sample *sample, u64 data_offset,
+ u64 reference)
+{
+ struct auxtrace_buffer buffer = {
+ .pid = -1,
+ .data_offset = data_offset,
+ .reference = reference,
+ .size = sample->aux_sample.size,
+ };
+ struct perf_sample_id *sid;
+ u64 id = sample->id;
+ unsigned int idx;
+
+ if (!id)
+ return -EINVAL;
+
+ sid = evlist__id2sid(session->evlist, id);
+ if (!sid)
+ return -ENOENT;
+
+ idx = sid->idx;
+ buffer.tid = sid->tid;
+ buffer.cpu = sid->cpu;
+
+ return auxtrace_queues__add_buffer(queues, session, idx, &buffer, NULL);
+}
+
+struct queue_data {
+ bool samples;
+ bool events;
+};
+
+static int auxtrace_queue_data_cb(struct perf_session *session,
+ union perf_event *event, u64 offset,
+ void *data)
+{
+ struct queue_data *qd = data;
+ struct perf_sample sample;
+ int err;
+
+ if (qd->events && event->header.type == PERF_RECORD_AUXTRACE) {
+ if (event->header.size < sizeof(struct perf_record_auxtrace))
+ return -EINVAL;
+ offset += event->header.size;
+ return session->auxtrace->queue_data(session, NULL, event,
+ offset);
+ }
+
+ if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE)
+ return 0;
+
+ err = evlist__parse_sample(session->evlist, event, &sample);
+ if (err)
+ return err;
+
+ if (!sample.aux_sample.size)
+ return 0;
+
+ offset += sample.aux_sample.data - (void *)event;
+
+ return session->auxtrace->queue_data(session, &sample, NULL, offset);
+}
+
+int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
+{
+ struct queue_data qd = {
+ .samples = samples,
+ .events = events,
+ };
+
+ if (auxtrace__dont_decode(session))
+ return 0;
+
+ if (perf_data__is_pipe(session->data))
+ return 0;
+
+ if (!session->auxtrace || !session->auxtrace->queue_data)
+ return -EINVAL;
+
+ return perf_session__peek_events(session, session->header.data_offset,
+ session->header.data_size,
+ auxtrace_queue_data_cb, &qd);
+}
+
+void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw)
+{
+ int prot = rw ? PROT_READ | PROT_WRITE : PROT_READ;
+ size_t adj = buffer->data_offset & (page_size - 1);
+ size_t size = buffer->size + adj;
+ off_t file_offset = buffer->data_offset - adj;
+ void *addr;
+
+ if (buffer->data)
+ return buffer->data;
+
+ addr = mmap(NULL, size, prot, MAP_SHARED, fd, file_offset);
+ if (addr == MAP_FAILED)
+ return NULL;
+
+ buffer->mmap_addr = addr;
+ buffer->mmap_size = size;
+
+ buffer->data = addr + adj;
+
+ return buffer->data;
+}
+
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
+{
+ if (!buffer->data || !buffer->mmap_addr)
+ return;
+ munmap(buffer->mmap_addr, buffer->mmap_size);
+ buffer->mmap_addr = NULL;
+ buffer->mmap_size = 0;
+ buffer->data = NULL;
+ buffer->use_data = NULL;
+}
+
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
+{
+ auxtrace_buffer__put_data(buffer);
+ if (buffer->data_needs_freeing) {
+ buffer->data_needs_freeing = false;
+ zfree(&buffer->data);
+ buffer->use_data = NULL;
+ buffer->size = 0;
+ }
+}
+
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
+{
+ auxtrace_buffer__drop_data(buffer);
+ free(buffer);
+}
+
+void auxtrace_synth_guest_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg, u64 timestamp,
+ pid_t machine_pid, int vcpu)
+{
+ size_t size;
+
+ memset(auxtrace_error, 0, sizeof(struct perf_record_auxtrace_error));
+
+ auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
+ auxtrace_error->type = type;
+ auxtrace_error->code = code;
+ auxtrace_error->cpu = cpu;
+ auxtrace_error->pid = pid;
+ auxtrace_error->tid = tid;
+ auxtrace_error->fmt = 1;
+ auxtrace_error->ip = ip;
+ auxtrace_error->time = timestamp;
+ strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
+ if (machine_pid) {
+ auxtrace_error->fmt = 2;
+ auxtrace_error->machine_pid = machine_pid;
+ auxtrace_error->vcpu = vcpu;
+ size = sizeof(*auxtrace_error);
+ } else {
+ size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
+ strlen(auxtrace_error->msg) + 1;
+ }
+ auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
+}
+
+void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg, u64 timestamp)
+{
+ auxtrace_synth_guest_error(auxtrace_error, type, code, cpu, pid, tid,
+ ip, msg, timestamp, 0, -1);
+}
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+ struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process)
+{
+ union perf_event *ev;
+ size_t priv_size;
+ int err;
+
+ pr_debug2("Synthesizing auxtrace information\n");
+ priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
+ ev = zalloc(sizeof(struct perf_record_auxtrace_info) + priv_size);
+ if (!ev)
+ return -ENOMEM;
+
+ ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
+ ev->auxtrace_info.header.size = sizeof(struct perf_record_auxtrace_info) +
+ priv_size;
+ err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
+ priv_size);
+ if (err)
+ goto out_free;
+
+ err = process(tool, ev, NULL, NULL);
+out_free:
+ free(ev);
+ return err;
+}
+
+static void unleader_evsel(struct evlist *evlist, struct evsel *leader)
+{
+ struct evsel *new_leader = NULL;
+ struct evsel *evsel;
+
+ /* Find new leader for the group */
+ evlist__for_each_entry(evlist, evsel) {
+ if (!evsel__has_leader(evsel, leader) || evsel == leader)
+ continue;
+ if (!new_leader)
+ new_leader = evsel;
+ evsel__set_leader(evsel, new_leader);
+ }
+
+ /* Update group information */
+ if (new_leader) {
+ zfree(&new_leader->group_name);
+ new_leader->group_name = leader->group_name;
+ leader->group_name = NULL;
+
+ new_leader->core.nr_members = leader->core.nr_members - 1;
+ leader->core.nr_members = 1;
+ }
+}
+
+static void unleader_auxtrace(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (auxtrace__evsel_is_auxtrace(session, evsel) &&
+ evsel__is_group_leader(evsel)) {
+ unleader_evsel(session->evlist, evsel);
+ }
+ }
+}
+
+int perf_event__process_auxtrace_info(struct perf_session *session,
+ union perf_event *event)
+{
+ enum auxtrace_type type = event->auxtrace_info.type;
+ int err;
+
+ if (dump_trace)
+ fprintf(stdout, " type: %u\n", type);
+
+ switch (type) {
+ case PERF_AUXTRACE_INTEL_PT:
+ err = intel_pt_process_auxtrace_info(event, session);
+ break;
+ case PERF_AUXTRACE_INTEL_BTS:
+ err = intel_bts_process_auxtrace_info(event, session);
+ break;
+ case PERF_AUXTRACE_ARM_SPE:
+ err = arm_spe_process_auxtrace_info(event, session);
+ break;
+ case PERF_AUXTRACE_CS_ETM:
+ err = cs_etm__process_auxtrace_info(event, session);
+ break;
+ case PERF_AUXTRACE_S390_CPUMSF:
+ err = s390_cpumsf_process_auxtrace_info(event, session);
+ break;
+ case PERF_AUXTRACE_HISI_PTT:
+ err = hisi_ptt_process_auxtrace_info(event, session);
+ break;
+ case PERF_AUXTRACE_UNKNOWN:
+ default:
+ return -EINVAL;
+ }
+
+ if (err)
+ return err;
+
+ unleader_auxtrace(session);
+
+ return 0;
+}
+
+s64 perf_event__process_auxtrace(struct perf_session *session,
+ union perf_event *event)
+{
+ s64 err;
+
+ if (dump_trace)
+ fprintf(stdout, " size: %#"PRI_lx64" offset: %#"PRI_lx64" ref: %#"PRI_lx64" idx: %u tid: %d cpu: %d\n",
+ event->auxtrace.size, event->auxtrace.offset,
+ event->auxtrace.reference, event->auxtrace.idx,
+ event->auxtrace.tid, event->auxtrace.cpu);
+
+ if (auxtrace__dont_decode(session))
+ return event->auxtrace.size;
+
+ if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
+ return -EINVAL;
+
+ err = session->auxtrace->process_auxtrace_event(session, event, session->tool);
+ if (err < 0)
+ return err;
+
+ return event->auxtrace.size;
+}
+
+#define PERF_ITRACE_DEFAULT_PERIOD_TYPE PERF_ITRACE_PERIOD_NANOSECS
+#define PERF_ITRACE_DEFAULT_PERIOD 100000
+#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16
+#define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024
+#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64
+#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024
+
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+ bool no_sample)
+{
+ synth_opts->branches = true;
+ synth_opts->transactions = true;
+ synth_opts->ptwrites = true;
+ synth_opts->pwr_events = true;
+ synth_opts->other_events = true;
+ synth_opts->intr_events = true;
+ synth_opts->errors = true;
+ synth_opts->flc = true;
+ synth_opts->llc = true;
+ synth_opts->tlb = true;
+ synth_opts->mem = true;
+ synth_opts->remote_access = true;
+
+ if (no_sample) {
+ synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ synth_opts->period = 1;
+ synth_opts->calls = true;
+ } else {
+ synth_opts->instructions = true;
+ synth_opts->cycles = true;
+ synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ }
+ synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+ synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+ synth_opts->initial_skip = 0;
+}
+
+static int get_flag(const char **ptr, unsigned int *flags)
+{
+ while (1) {
+ char c = **ptr;
+
+ if (c >= 'a' && c <= 'z') {
+ *flags |= 1 << (c - 'a');
+ ++*ptr;
+ return 0;
+ } else if (c == ' ') {
+ ++*ptr;
+ continue;
+ } else {
+ return -1;
+ }
+ }
+}
+
+static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags)
+{
+ while (1) {
+ switch (**ptr) {
+ case '+':
+ ++*ptr;
+ if (get_flag(ptr, plus_flags))
+ return -1;
+ break;
+ case '-':
+ ++*ptr;
+ if (get_flag(ptr, minus_flags))
+ return -1;
+ break;
+ case ' ':
+ ++*ptr;
+ break;
+ default:
+ return 0;
+ }
+ }
+}
+
+#define ITRACE_DFLT_LOG_ON_ERROR_SZ 16384
+
+static unsigned int itrace_log_on_error_size(void)
+{
+ unsigned int sz = 0;
+
+ perf_config_scan("itrace.debug-log-buffer-size", "%u", &sz);
+ return sz ?: ITRACE_DFLT_LOG_ON_ERROR_SZ;
+}
+
+/*
+ * Please check tools/perf/Documentation/perf-script.txt for information
+ * about the options parsed here, which is introduced after this cset,
+ * when support in 'perf script' for these options is introduced.
+ */
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
+ const char *str, int unset)
+{
+ const char *p;
+ char *endptr;
+ bool period_type_set = false;
+ bool period_set = false;
+
+ synth_opts->set = true;
+
+ if (unset) {
+ synth_opts->dont_decode = true;
+ return 0;
+ }
+
+ if (!str) {
+ itrace_synth_opts__set_default(synth_opts,
+ synth_opts->default_no_sample);
+ return 0;
+ }
+
+ for (p = str; *p;) {
+ switch (*p++) {
+ case 'i':
+ case 'y':
+ if (p[-1] == 'y')
+ synth_opts->cycles = true;
+ else
+ synth_opts->instructions = true;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ synth_opts->period = strtoull(p, &endptr, 10);
+ period_set = true;
+ p = endptr;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ switch (*p++) {
+ case 'i':
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ period_type_set = true;
+ break;
+ case 't':
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_TICKS;
+ period_type_set = true;
+ break;
+ case 'm':
+ synth_opts->period *= 1000;
+ /* Fall through */
+ case 'u':
+ synth_opts->period *= 1000;
+ /* Fall through */
+ case 'n':
+ if (*p++ != 's')
+ goto out_err;
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_NANOSECS;
+ period_type_set = true;
+ break;
+ case '\0':
+ goto out;
+ default:
+ goto out_err;
+ }
+ }
+ break;
+ case 'b':
+ synth_opts->branches = true;
+ break;
+ case 'x':
+ synth_opts->transactions = true;
+ break;
+ case 'w':
+ synth_opts->ptwrites = true;
+ break;
+ case 'p':
+ synth_opts->pwr_events = true;
+ break;
+ case 'o':
+ synth_opts->other_events = true;
+ break;
+ case 'I':
+ synth_opts->intr_events = true;
+ break;
+ case 'e':
+ synth_opts->errors = true;
+ if (get_flags(&p, &synth_opts->error_plus_flags,
+ &synth_opts->error_minus_flags))
+ goto out_err;
+ break;
+ case 'd':
+ synth_opts->log = true;
+ if (get_flags(&p, &synth_opts->log_plus_flags,
+ &synth_opts->log_minus_flags))
+ goto out_err;
+ if (synth_opts->log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR)
+ synth_opts->log_on_error_size = itrace_log_on_error_size();
+ break;
+ case 'c':
+ synth_opts->branches = true;
+ synth_opts->calls = true;
+ break;
+ case 'r':
+ synth_opts->branches = true;
+ synth_opts->returns = true;
+ break;
+ case 'G':
+ case 'g':
+ if (p[-1] == 'G')
+ synth_opts->add_callchain = true;
+ else
+ synth_opts->callchain = true;
+ synth_opts->callchain_sz =
+ PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ unsigned int val;
+
+ val = strtoul(p, &endptr, 10);
+ p = endptr;
+ if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
+ goto out_err;
+ synth_opts->callchain_sz = val;
+ }
+ break;
+ case 'L':
+ case 'l':
+ if (p[-1] == 'L')
+ synth_opts->add_last_branch = true;
+ else
+ synth_opts->last_branch = true;
+ synth_opts->last_branch_sz =
+ PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ unsigned int val;
+
+ val = strtoul(p, &endptr, 10);
+ p = endptr;
+ if (!val ||
+ val > PERF_ITRACE_MAX_LAST_BRANCH_SZ)
+ goto out_err;
+ synth_opts->last_branch_sz = val;
+ }
+ break;
+ case 's':
+ synth_opts->initial_skip = strtoul(p, &endptr, 10);
+ if (p == endptr)
+ goto out_err;
+ p = endptr;
+ break;
+ case 'f':
+ synth_opts->flc = true;
+ break;
+ case 'm':
+ synth_opts->llc = true;
+ break;
+ case 't':
+ synth_opts->tlb = true;
+ break;
+ case 'a':
+ synth_opts->remote_access = true;
+ break;
+ case 'M':
+ synth_opts->mem = true;
+ break;
+ case 'q':
+ synth_opts->quick += 1;
+ break;
+ case 'A':
+ synth_opts->approx_ipc = true;
+ break;
+ case 'Z':
+ synth_opts->timeless_decoding = true;
+ break;
+ case ' ':
+ case ',':
+ break;
+ default:
+ goto out_err;
+ }
+ }
+out:
+ if (synth_opts->instructions || synth_opts->cycles) {
+ if (!period_type_set)
+ synth_opts->period_type =
+ PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ if (!period_set)
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ }
+
+ return 0;
+
+out_err:
+ pr_err("Bad Instruction Tracing options '%s'\n", str);
+ return -EINVAL;
+}
+
+int itrace_parse_synth_opts(const struct option *opt, const char *str, int unset)
+{
+ return itrace_do_parse_synth_opts(opt->value, str, unset);
+}
+
+static const char * const auxtrace_error_type_name[] = {
+ [PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
+};
+
+static const char *auxtrace_error_name(int type)
+{
+ const char *error_type_name = NULL;
+
+ if (type < PERF_AUXTRACE_ERROR_MAX)
+ error_type_name = auxtrace_error_type_name[type];
+ if (!error_type_name)
+ error_type_name = "unknown AUX";
+ return error_type_name;
+}
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
+{
+ struct perf_record_auxtrace_error *e = &event->auxtrace_error;
+ unsigned long long nsecs = e->time;
+ const char *msg = e->msg;
+ int ret;
+
+ ret = fprintf(fp, " %s error type %u",
+ auxtrace_error_name(e->type), e->type);
+
+ if (e->fmt && nsecs) {
+ unsigned long secs = nsecs / NSEC_PER_SEC;
+
+ nsecs -= secs * NSEC_PER_SEC;
+ ret += fprintf(fp, " time %lu.%09llu", secs, nsecs);
+ } else {
+ ret += fprintf(fp, " time 0");
+ }
+
+ if (!e->fmt)
+ msg = (const char *)&e->time;
+
+ if (e->fmt >= 2 && e->machine_pid)
+ ret += fprintf(fp, " machine_pid %d vcpu %d", e->machine_pid, e->vcpu);
+
+ ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRI_lx64" code %u: %s\n",
+ e->cpu, e->pid, e->tid, e->ip, e->code, msg);
+ return ret;
+}
+
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+ union perf_event *event)
+{
+ struct perf_record_auxtrace_error *e = &event->auxtrace_error;
+
+ if (e->type < PERF_AUXTRACE_ERROR_MAX)
+ session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
+}
+
+void events_stats__auxtrace_error_warn(const struct events_stats *stats)
+{
+ int i;
+
+ for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
+ if (!stats->nr_auxtrace_errors[i])
+ continue;
+ ui__warning("%u %s errors\n",
+ stats->nr_auxtrace_errors[i],
+ auxtrace_error_name(i));
+ }
+}
+
+int perf_event__process_auxtrace_error(struct perf_session *session,
+ union perf_event *event)
+{
+ if (auxtrace__dont_decode(session))
+ return 0;
+
+ perf_event__fprintf_auxtrace_error(event, stdout);
+ return 0;
+}
+
+/*
+ * In the compat mode kernel runs in 64-bit and perf tool runs in 32-bit mode,
+ * 32-bit perf tool cannot access 64-bit value atomically, which might lead to
+ * the issues caused by the below sequence on multiple CPUs: when perf tool
+ * accesses either the load operation or the store operation for 64-bit value,
+ * on some architectures the operation is divided into two instructions, one
+ * is for accessing the low 32-bit value and another is for the high 32-bit;
+ * thus these two user operations can give the kernel chances to access the
+ * 64-bit value, and thus leads to the unexpected load values.
+ *
+ * kernel (64-bit) user (32-bit)
+ *
+ * if (LOAD ->aux_tail) { --, LOAD ->aux_head_lo
+ * STORE $aux_data | ,--->
+ * FLUSH $aux_data | | LOAD ->aux_head_hi
+ * STORE ->aux_head --|-------` smp_rmb()
+ * } | LOAD $data
+ * | smp_mb()
+ * | STORE ->aux_tail_lo
+ * `----------->
+ * STORE ->aux_tail_hi
+ *
+ * For this reason, it's impossible for the perf tool to work correctly when
+ * the AUX head or tail is bigger than 4GB (more than 32 bits length); and we
+ * can not simply limit the AUX ring buffer to less than 4GB, the reason is
+ * the pointers can be increased monotonically, whatever the buffer size it is,
+ * at the end the head and tail can be bigger than 4GB and carry out to the
+ * high 32-bit.
+ *
+ * To mitigate the issues and improve the user experience, we can allow the
+ * perf tool working in certain conditions and bail out with error if detect
+ * any overflow cannot be handled.
+ *
+ * For reading the AUX head, it reads out the values for three times, and
+ * compares the high 4 bytes of the values between the first time and the last
+ * time, if there has no change for high 4 bytes injected by the kernel during
+ * the user reading sequence, it's safe for use the second value.
+ *
+ * When compat_auxtrace_mmap__write_tail() detects any carrying in the high
+ * 32 bits, it means there have two store operations in user space and it cannot
+ * promise the atomicity for 64-bit write, so return '-1' in this case to tell
+ * the caller an overflow error has happened.
+ */
+u64 __weak compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+ u64 first, second, last;
+ u64 mask = (u64)(UINT32_MAX) << 32;
+
+ do {
+ first = READ_ONCE(pc->aux_head);
+ /* Ensure all reads are done after we read the head */
+ smp_rmb();
+ second = READ_ONCE(pc->aux_head);
+ /* Ensure all reads are done after we read the head */
+ smp_rmb();
+ last = READ_ONCE(pc->aux_head);
+ } while ((first & mask) != (last & mask));
+
+ return second;
+}
+
+int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+ u64 mask = (u64)(UINT32_MAX) << 32;
+
+ if (tail & mask)
+ return -1;
+
+ /* Ensure all reads are done before we write the tail out */
+ smp_mb();
+ WRITE_ONCE(pc->aux_tail, tail);
+ return 0;
+}
+
+static int __auxtrace_mmap__read(struct mmap *map,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ bool snapshot, size_t snapshot_size)
+{
+ struct auxtrace_mmap *mm = &map->auxtrace_mmap;
+ u64 head, old = mm->prev, offset, ref;
+ unsigned char *data = mm->base;
+ size_t size, head_off, old_off, len1, len2, padding;
+ union perf_event ev;
+ void *data1, *data2;
+ int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL));
+
+ head = auxtrace_mmap__read_head(mm, kernel_is_64_bit);
+
+ if (snapshot &&
+ auxtrace_record__find_snapshot(itr, mm->idx, mm, data, &head, &old))
+ return -1;
+
+ if (old == head)
+ return 0;
+
+ pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
+ mm->idx, old, head, head - old);
+
+ if (mm->mask) {
+ head_off = head & mm->mask;
+ old_off = old & mm->mask;
+ } else {
+ head_off = head % mm->len;
+ old_off = old % mm->len;
+ }
+
+ if (head_off > old_off)
+ size = head_off - old_off;
+ else
+ size = mm->len - (old_off - head_off);
+
+ if (snapshot && size > snapshot_size)
+ size = snapshot_size;
+
+ ref = auxtrace_record__reference(itr);
+
+ if (head > old || size <= head || mm->mask) {
+ offset = head - size;
+ } else {
+ /*
+ * When the buffer size is not a power of 2, 'head' wraps at the
+ * highest multiple of the buffer size, so we have to subtract
+ * the remainder here.
+ */
+ u64 rem = (0ULL - mm->len) % mm->len;
+
+ offset = head - size - rem;
+ }
+
+ if (size > head_off) {
+ len1 = size - head_off;
+ data1 = &data[mm->len - len1];
+ len2 = head_off;
+ data2 = &data[0];
+ } else {
+ len1 = size;
+ data1 = &data[head_off - len1];
+ len2 = 0;
+ data2 = NULL;
+ }
+
+ if (itr->alignment) {
+ unsigned int unwanted = len1 % itr->alignment;
+
+ len1 -= unwanted;
+ size -= unwanted;
+ }
+
+ /* padding must be written by fn() e.g. record__process_auxtrace() */
+ padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1);
+ if (padding)
+ padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+ ev.auxtrace.header.size = sizeof(ev.auxtrace);
+ ev.auxtrace.size = size + padding;
+ ev.auxtrace.offset = offset;
+ ev.auxtrace.reference = ref;
+ ev.auxtrace.idx = mm->idx;
+ ev.auxtrace.tid = mm->tid;
+ ev.auxtrace.cpu = mm->cpu;
+
+ if (fn(tool, map, &ev, data1, len1, data2, len2))
+ return -1;
+
+ mm->prev = head;
+
+ if (!snapshot) {
+ int err;
+
+ err = auxtrace_mmap__write_tail(mm, head, kernel_is_64_bit);
+ if (err < 0)
+ return err;
+
+ if (itr->read_finish) {
+ err = itr->read_finish(itr, mm->idx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 1;
+}
+
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn)
+{
+ return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
+}
+
+int auxtrace_mmap__read_snapshot(struct mmap *map,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ size_t snapshot_size)
+{
+ return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size);
+}
+
+/**
+ * struct auxtrace_cache - hash table to implement a cache
+ * @hashtable: the hashtable
+ * @sz: hashtable size (number of hlists)
+ * @entry_size: size of an entry
+ * @limit: limit the number of entries to this maximum, when reached the cache
+ * is dropped and caching begins again with an empty cache
+ * @cnt: current number of entries
+ * @bits: hashtable size (@sz = 2^@bits)
+ */
+struct auxtrace_cache {
+ struct hlist_head *hashtable;
+ size_t sz;
+ size_t entry_size;
+ size_t limit;
+ size_t cnt;
+ unsigned int bits;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+ unsigned int limit_percent)
+{
+ struct auxtrace_cache *c;
+ struct hlist_head *ht;
+ size_t sz, i;
+
+ c = zalloc(sizeof(struct auxtrace_cache));
+ if (!c)
+ return NULL;
+
+ sz = 1UL << bits;
+
+ ht = calloc(sz, sizeof(struct hlist_head));
+ if (!ht)
+ goto out_free;
+
+ for (i = 0; i < sz; i++)
+ INIT_HLIST_HEAD(&ht[i]);
+
+ c->hashtable = ht;
+ c->sz = sz;
+ c->entry_size = entry_size;
+ c->limit = (c->sz * limit_percent) / 100;
+ c->bits = bits;
+
+ return c;
+
+out_free:
+ free(c);
+ return NULL;
+}
+
+static void auxtrace_cache__drop(struct auxtrace_cache *c)
+{
+ struct auxtrace_cache_entry *entry;
+ struct hlist_node *tmp;
+ size_t i;
+
+ if (!c)
+ return;
+
+ for (i = 0; i < c->sz; i++) {
+ hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
+ hlist_del(&entry->hash);
+ auxtrace_cache__free_entry(c, entry);
+ }
+ }
+
+ c->cnt = 0;
+}
+
+void auxtrace_cache__free(struct auxtrace_cache *c)
+{
+ if (!c)
+ return;
+
+ auxtrace_cache__drop(c);
+ zfree(&c->hashtable);
+ free(c);
+}
+
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
+{
+ return malloc(c->entry_size);
+}
+
+void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
+ void *entry)
+{
+ free(entry);
+}
+
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+ struct auxtrace_cache_entry *entry)
+{
+ if (c->limit && ++c->cnt > c->limit)
+ auxtrace_cache__drop(c);
+
+ entry->key = key;
+ hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
+
+ return 0;
+}
+
+static struct auxtrace_cache_entry *auxtrace_cache__rm(struct auxtrace_cache *c,
+ u32 key)
+{
+ struct auxtrace_cache_entry *entry;
+ struct hlist_head *hlist;
+ struct hlist_node *n;
+
+ if (!c)
+ return NULL;
+
+ hlist = &c->hashtable[hash_32(key, c->bits)];
+ hlist_for_each_entry_safe(entry, n, hlist, hash) {
+ if (entry->key == key) {
+ hlist_del(&entry->hash);
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+void auxtrace_cache__remove(struct auxtrace_cache *c, u32 key)
+{
+ struct auxtrace_cache_entry *entry = auxtrace_cache__rm(c, key);
+
+ auxtrace_cache__free_entry(c, entry);
+}
+
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
+{
+ struct auxtrace_cache_entry *entry;
+ struct hlist_head *hlist;
+
+ if (!c)
+ return NULL;
+
+ hlist = &c->hashtable[hash_32(key, c->bits)];
+ hlist_for_each_entry(entry, hlist, hash) {
+ if (entry->key == key)
+ return entry;
+ }
+
+ return NULL;
+}
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+ zfree(&filt->str);
+ filt->action = NULL;
+ filt->sym_from = NULL;
+ filt->sym_to = NULL;
+ filt->filename = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+ struct addr_filter *filt = zalloc(sizeof(*filt));
+
+ if (filt)
+ INIT_LIST_HEAD(&filt->list);
+
+ return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+ if (filt)
+ addr_filter__free_str(filt);
+ free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_add_tail(&filt->list, &filts->head);
+ filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_del_init(&filt->list);
+ filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+ INIT_LIST_HEAD(&filts->head);
+ filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+ struct addr_filter *filt, *n;
+
+ list_for_each_entry_safe(filt, n, &filts->head, list) {
+ addr_filters__del(filts, filt);
+ addr_filter__free(filt);
+ }
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+ const char *str_delim)
+{
+ *inp += strspn(*inp, " ");
+
+ if (isdigit(**inp)) {
+ char *endptr;
+
+ if (!num)
+ return -EINVAL;
+ errno = 0;
+ *num = strtoull(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp)
+ return -EINVAL;
+ *inp = endptr;
+ } else {
+ size_t n;
+
+ if (!str)
+ return -EINVAL;
+ *inp += strspn(*inp, " ");
+ *str = *inp;
+ n = strcspn(*inp, str_delim);
+ if (!n)
+ return -EINVAL;
+ *inp += n;
+ if (**inp) {
+ **inp = '\0';
+ *inp += 1;
+ }
+ }
+ return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+ if (!strcmp(filt->action, "filter")) {
+ filt->start = true;
+ filt->range = true;
+ } else if (!strcmp(filt->action, "start")) {
+ filt->start = true;
+ } else if (!strcmp(filt->action, "stop")) {
+ filt->start = false;
+ } else if (!strcmp(filt->action, "tracestop")) {
+ filt->start = false;
+ filt->range = true;
+ filt->action += 5; /* Change 'tracestop' to 'stop' */
+ } else {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+ *idx = -1;
+
+ *inp += strspn(*inp, " ");
+
+ if (**inp != '#')
+ return 0;
+
+ *inp += 1;
+
+ if (**inp == 'g' || **inp == 'G') {
+ *inp += 1;
+ *idx = 0;
+ } else {
+ unsigned long num;
+ char *endptr;
+
+ errno = 0;
+ num = strtoul(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp || num > INT_MAX)
+ return -EINVAL;
+ *inp = endptr;
+ *idx = num;
+ }
+
+ return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+ int err = parse_num_or_str(inp, num, str, " ");
+
+ if (!err && *str)
+ err = parse_sym_idx(inp, idx);
+
+ return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+ char *fstr;
+ int err;
+
+ filt->str = fstr = strdup(*filter_inp);
+ if (!fstr)
+ return -ENOMEM;
+
+ err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+ if (err)
+ goto out_err;
+
+ err = parse_action(filt);
+ if (err)
+ goto out_err;
+
+ err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+ &filt->sym_from_idx);
+ if (err)
+ goto out_err;
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '/') {
+ fstr += 1;
+ err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+ &filt->sym_to_idx);
+ if (err)
+ goto out_err;
+ filt->range = true;
+ }
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '@') {
+ fstr += 1;
+ err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+ if (err)
+ goto out_err;
+ }
+
+ fstr += strspn(fstr, " ,");
+
+ *filter_inp += fstr - filt->str;
+
+ return 0;
+
+out_err:
+ addr_filter__free_str(filt);
+
+ return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter)
+{
+ struct addr_filter *filt;
+ const char *fstr = filter;
+ int err;
+
+ while (*fstr) {
+ filt = addr_filter__new();
+ err = parse_one_filter(filt, &fstr);
+ if (err) {
+ addr_filter__free(filt);
+ addr_filters__exit(filts);
+ return err;
+ }
+ addr_filters__add(filts, filt);
+ }
+
+ return 0;
+}
+
+struct sym_args {
+ const char *name;
+ u64 start;
+ u64 size;
+ int idx;
+ int cnt;
+ bool started;
+ bool global;
+ bool selected;
+ bool duplicate;
+ bool near;
+};
+
+static bool kern_sym_name_match(const char *kname, const char *name)
+{
+ size_t n = strlen(name);
+
+ return !strcmp(kname, name) ||
+ (!strncmp(kname, name, n) && kname[n] == '\t');
+}
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+ /* A function with the same name, and global or the n'th found or any */
+ return kallsyms__is_function(type) &&
+ kern_sym_name_match(name, args->name) &&
+ ((args->global && isupper(type)) ||
+ (args->selected && ++(args->cnt) == args->idx) ||
+ (!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (args->started) {
+ if (!args->size)
+ args->size = start - args->start;
+ if (args->selected) {
+ if (args->size)
+ return 1;
+ } else if (kern_sym_match(args, name, type)) {
+ args->duplicate = true;
+ return 1;
+ }
+ } else if (kern_sym_match(args, name, type)) {
+ args->started = true;
+ args->start = start;
+ }
+
+ return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (kern_sym_match(args, name, type)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++args->cnt, start, type, name);
+ args->near = true;
+ } else if (args->near) {
+ args->near = false;
+ pr_err("\t\twhich is near\t\t%s\n", name);
+ }
+
+ return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+ if (idx > 0) {
+ pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+ idx, sym_name);
+ } else if (!idx) {
+ pr_err("Global symbol '%s' not found.\n", sym_name);
+ } else {
+ pr_err("Symbol '%s' not found.\n", sym_name);
+ }
+ pr_err("Note that symbols must be functions.\n");
+
+ return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+ struct sym_args args = {
+ .name = sym_name,
+ .idx = idx,
+ .global = !idx,
+ .selected = idx > 0,
+ };
+ int err;
+
+ *start = 0;
+ *size = 0;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+ if (err < 0) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ if (args.duplicate) {
+ pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+ args.cnt = 0;
+ kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+ return -EINVAL;
+ }
+
+ if (!args.started) {
+ pr_err("Kernel symbol lookup: ");
+ return sym_not_found_error(sym_name, idx);
+ }
+
+ *start = args.start;
+ *size = args.size;
+
+ return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+ char type, u64 start)
+{
+ struct sym_args *args = arg;
+ u64 size;
+
+ if (!kallsyms__is_function(type))
+ return 0;
+
+ if (!args->started) {
+ args->started = true;
+ args->start = start;
+ }
+ /* Don't know exactly where the kernel ends, so we add a page */
+ size = round_up(start, page_size) + page_size - args->start;
+ if (size > args->size)
+ args->size = size;
+
+ return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+ struct sym_args args = { .started = false };
+ int err;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+ if (err < 0 || !args.started) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ filt->addr = args.start;
+ filt->size = args.size;
+
+ return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+ if (start + size >= filt->addr)
+ return 0;
+
+ if (filt->sym_from) {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+ filt->sym_to, start, filt->sym_from, filt->addr);
+ } else {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+ filt->sym_to, start, filt->addr);
+ }
+
+ return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+ bool no_size = false;
+ u64 start, size;
+ int err;
+
+ if (symbol_conf.kptr_restrict) {
+ pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+ return addr_filter__entire_kernel(filt);
+
+ if (filt->sym_from) {
+ err = find_kern_sym(filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ return err;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to) {
+ filt->size = size;
+ no_size = !size;
+ }
+ }
+
+ if (filt->sym_to) {
+ err = find_kern_sym(filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ return err;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+ filt->size = start + size - filt->addr;
+ no_size = !size;
+ }
+
+ /* The very last symbol in kallsyms does not imply a particular size */
+ if (no_size) {
+ pr_err("Cannot determine size of symbol '%s'\n",
+ filt->sym_to ? filt->sym_to : filt->sym_from);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+ struct map *map;
+ struct dso *dso;
+
+ map = dso__new_map(name);
+ if (!map)
+ return NULL;
+
+ if (map__load(map) < 0)
+ pr_err("File '%s' not found or has no symbols.\n", name);
+
+ dso = dso__get(map__dso(map));
+
+ map__put(map);
+
+ return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+ int idx)
+{
+ /* Same name, and global or the n'th found or any */
+ return !arch__compare_symbol_names(name, sym->name) &&
+ ((!idx && sym->binding == STB_GLOBAL) ||
+ (idx > 0 && ++*cnt == idx) ||
+ idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+ struct symbol *sym;
+ bool near = false;
+ int cnt = 0;
+
+ pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+ sym = dso__first_symbol(dso);
+ while (sym) {
+ if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++cnt, sym->start,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w',
+ sym->name);
+ near = true;
+ } else if (near) {
+ near = false;
+ pr_err("\t\twhich is near\t\t%s\n", sym->name);
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+ u64 *size, int idx)
+{
+ struct symbol *sym;
+ int cnt = 0;
+
+ *start = 0;
+ *size = 0;
+
+ sym = dso__first_symbol(dso);
+ while (sym) {
+ if (*start) {
+ if (!*size)
+ *size = sym->start - *start;
+ if (idx > 0) {
+ if (*size)
+ return 0;
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ print_duplicate_syms(dso, sym_name);
+ return -EINVAL;
+ }
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ *start = sym->start;
+ *size = sym->end - sym->start;
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ if (!*start)
+ return sym_not_found_error(sym_name, idx);
+
+ return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+ if (dso__data_file_size(dso, NULL)) {
+ pr_err("Failed to determine filter for %s\nCannot determine file size.\n",
+ filt->filename);
+ return -EINVAL;
+ }
+
+ filt->addr = 0;
+ filt->size = dso->data.file_size;
+
+ return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+ u64 start, size;
+ struct dso *dso;
+ int err = 0;
+
+ if (!filt->sym_from && !filt->sym_to)
+ return 0;
+
+ if (!filt->filename)
+ return addr_filter__resolve_kernel_syms(filt);
+
+ dso = load_dso(filt->filename);
+ if (!dso) {
+ pr_err("Failed to load symbols from: %s\n", filt->filename);
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+ err = addr_filter__entire_dso(filt, dso);
+ goto put_dso;
+ }
+
+ if (filt->sym_from) {
+ err = find_dso_sym(dso, filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ goto put_dso;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to)
+ filt->size = size;
+ }
+
+ if (filt->sym_to) {
+ err = find_dso_sym(dso, filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ goto put_dso;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+
+ filt->size = start + size - filt->addr;
+ }
+
+put_dso:
+ dso__put(dso);
+
+ return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+ char filename_buf[PATH_MAX];
+ const char *at = "";
+ const char *fn = "";
+ char *filter;
+ int err;
+
+ if (filt->filename) {
+ at = "@";
+ fn = realpath(filt->filename, filename_buf);
+ if (!fn)
+ return NULL;
+ }
+
+ if (filt->range) {
+ err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+ filt->action, filt->addr, filt->size, at, fn);
+ } else {
+ err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+ filt->action, filt->addr, at, fn);
+ }
+
+ return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct evsel *evsel, const char *filter,
+ int max_nr)
+{
+ struct addr_filters filts;
+ struct addr_filter *filt;
+ int err;
+
+ addr_filters__init(&filts);
+
+ err = addr_filters__parse_bare_filter(&filts, filter);
+ if (err)
+ goto out_exit;
+
+ if (filts.cnt > max_nr) {
+ pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+ filts.cnt, max_nr);
+ err = -EINVAL;
+ goto out_exit;
+ }
+
+ list_for_each_entry(filt, &filts.head, list) {
+ char *new_filter;
+
+ err = addr_filter__resolve_syms(filt);
+ if (err)
+ goto out_exit;
+
+ new_filter = addr_filter__to_str(filt);
+ if (!new_filter) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+
+ if (evsel__append_addr_filter(evsel, new_filter)) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+ }
+
+out_exit:
+ addr_filters__exit(&filts);
+
+ if (err) {
+ pr_err("Failed to parse address filter: '%s'\n", filter);
+ pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+ pr_err("Where multiple filters are separated by space or comma.\n");
+ }
+
+ return err;
+}
+
+static int evsel__nr_addr_filter(struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+ int nr_addr_filters = 0;
+
+ if (!pmu)
+ return 0;
+
+ perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+ return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ char *filter;
+ int err, max_nr;
+
+ evlist__for_each_entry(evlist, evsel) {
+ filter = evsel->filter;
+ max_nr = evsel__nr_addr_filter(evsel);
+ if (!filter || !max_nr)
+ continue;
+ evsel->filter = NULL;
+ err = parse_addr_filter(evsel, filter, max_nr);
+ free(filter);
+ if (err)
+ return err;
+ pr_debug("Address filter: %s\n", evsel->filter);
+ }
+
+ return 0;
+}
+
+int auxtrace__process_event(struct perf_session *session, union perf_event *event,
+ struct perf_sample *sample, struct perf_tool *tool)
+{
+ if (!session->auxtrace)
+ return 0;
+
+ return session->auxtrace->process_event(session, event, sample, tool);
+}
+
+void auxtrace__dump_auxtrace_sample(struct perf_session *session,
+ struct perf_sample *sample)
+{
+ if (!session->auxtrace || !session->auxtrace->dump_auxtrace_sample ||
+ auxtrace__dont_decode(session))
+ return;
+
+ session->auxtrace->dump_auxtrace_sample(session, sample);
+}
+
+int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool)
+{
+ if (!session->auxtrace)
+ return 0;
+
+ return session->auxtrace->flush_events(session, tool);
+}
+
+void auxtrace__free_events(struct perf_session *session)
+{
+ if (!session->auxtrace)
+ return;
+
+ return session->auxtrace->free_events(session);
+}
+
+void auxtrace__free(struct perf_session *session)
+{
+ if (!session->auxtrace)
+ return;
+
+ return session->auxtrace->free(session);
+}
+
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace)
+ return false;
+
+ return session->auxtrace->evsel_is_auxtrace(session, evsel);
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
new file mode 100644
index 000000000..29eb82dff
--- /dev/null
+++ b/tools/perf/util/auxtrace.h
@@ -0,0 +1,906 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * auxtrace.h: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#ifndef __PERF_AUXTRACE_H
+#define __PERF_AUXTRACE_H
+
+#include <sys/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h> // FILE
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <perf/cpumap.h>
+#include <asm/bitsperlong.h>
+#include <asm/barrier.h>
+
+union perf_event;
+struct perf_session;
+struct evlist;
+struct evsel;
+struct perf_tool;
+struct mmap;
+struct perf_sample;
+struct option;
+struct record_opts;
+struct perf_record_auxtrace_error;
+struct perf_record_auxtrace_info;
+struct events_stats;
+struct perf_pmu;
+
+enum auxtrace_error_type {
+ PERF_AUXTRACE_ERROR_ITRACE = 1,
+ PERF_AUXTRACE_ERROR_MAX
+};
+
+/* Auxtrace records must have the same alignment as perf event records */
+#define PERF_AUXTRACE_RECORD_ALIGNMENT 8
+
+enum auxtrace_type {
+ PERF_AUXTRACE_UNKNOWN,
+ PERF_AUXTRACE_INTEL_PT,
+ PERF_AUXTRACE_INTEL_BTS,
+ PERF_AUXTRACE_CS_ETM,
+ PERF_AUXTRACE_ARM_SPE,
+ PERF_AUXTRACE_S390_CPUMSF,
+ PERF_AUXTRACE_HISI_PTT,
+};
+
+enum itrace_period_type {
+ PERF_ITRACE_PERIOD_INSTRUCTIONS,
+ PERF_ITRACE_PERIOD_TICKS,
+ PERF_ITRACE_PERIOD_NANOSECS,
+};
+
+#define AUXTRACE_ERR_FLG_OVERFLOW (1 << ('o' - 'a'))
+#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a'))
+
+#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a'))
+#define AUXTRACE_LOG_FLG_ON_ERROR (1 << ('e' - 'a'))
+#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a'))
+
+/**
+ * struct itrace_synth_opts - AUX area tracing synthesis options.
+ * @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
+ * @inject: indicates the event (not just the sample) must be fully synthesized
+ * because 'perf inject' will write it out
+ * @instructions: whether to synthesize 'instructions' events
+ * @cycles: whether to synthesize 'cycles' events
+ * (not fully accurate, since CYC packets are only emitted
+ * together with other events, such as branches)
+ * @branches: whether to synthesize 'branches' events
+ * (branch misses only for Arm SPE)
+ * @transactions: whether to synthesize events for transactions
+ * @ptwrites: whether to synthesize events for ptwrites
+ * @pwr_events: whether to synthesize power events
+ * @other_events: whether to synthesize other events recorded due to the use of
+ * aux_output
+ * @intr_events: whether to synthesize interrupt events
+ * @errors: whether to synthesize decoder error events
+ * @dont_decode: whether to skip decoding entirely
+ * @log: write a decoding log
+ * @calls: limit branch samples to calls (can be combined with @returns)
+ * @returns: limit branch samples to returns (can be combined with @calls)
+ * @callchain: add callchain to 'instructions' events
+ * @add_callchain: add callchain to existing event records
+ * @thread_stack: feed branches to the thread_stack
+ * @last_branch: add branch context to 'instruction' events
+ * @add_last_branch: add branch context to existing event records
+ * @approx_ipc: approximate IPC
+ * @flc: whether to synthesize first level cache events
+ * @llc: whether to synthesize last level cache events
+ * @tlb: whether to synthesize TLB events
+ * @remote_access: whether to synthesize remote access events
+ * @mem: whether to synthesize memory events
+ * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps
+ * @vm_time_correlation: perform VM Time Correlation
+ * @vm_tm_corr_dry_run: VM Time Correlation dry-run
+ * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments
+ * @callchain_sz: maximum callchain size
+ * @last_branch_sz: branch context size
+ * @period: 'instructions' events period
+ * @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
+ * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
+ * @ptime_range: time intervals to trace or NULL
+ * @range_num: number of time intervals to trace
+ * @error_plus_flags: flags to affect what errors are reported
+ * @error_minus_flags: flags to affect what errors are reported
+ * @log_plus_flags: flags to affect what is logged
+ * @log_minus_flags: flags to affect what is logged
+ * @quick: quicker (less detailed) decoding
+ * @log_on_error_size: size of log to keep for outputting log only on errors
+ */
+struct itrace_synth_opts {
+ bool set;
+ bool default_no_sample;
+ bool inject;
+ bool instructions;
+ bool cycles;
+ bool branches;
+ bool transactions;
+ bool ptwrites;
+ bool pwr_events;
+ bool other_events;
+ bool intr_events;
+ bool errors;
+ bool dont_decode;
+ bool log;
+ bool calls;
+ bool returns;
+ bool callchain;
+ bool add_callchain;
+ bool thread_stack;
+ bool last_branch;
+ bool add_last_branch;
+ bool approx_ipc;
+ bool flc;
+ bool llc;
+ bool tlb;
+ bool remote_access;
+ bool mem;
+ bool timeless_decoding;
+ bool vm_time_correlation;
+ bool vm_tm_corr_dry_run;
+ char *vm_tm_corr_args;
+ unsigned int callchain_sz;
+ unsigned int last_branch_sz;
+ unsigned long long period;
+ enum itrace_period_type period_type;
+ unsigned long initial_skip;
+ unsigned long *cpu_bitmap;
+ struct perf_time_interval *ptime_range;
+ int range_num;
+ unsigned int error_plus_flags;
+ unsigned int error_minus_flags;
+ unsigned int log_plus_flags;
+ unsigned int log_minus_flags;
+ unsigned int quick;
+ unsigned int log_on_error_size;
+};
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ * perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+ u64 file_offset;
+ u64 sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ * file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+ struct list_head list;
+ size_t nr;
+ struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+/**
+ * struct auxtrace - session callbacks to allow AUX area data decoding.
+ * @process_event: lets the decoder see all session events
+ * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
+ * @queue_data: queue an AUX sample or PERF_RECORD_AUXTRACE event for later
+ * processing
+ * @dump_auxtrace_sample: dump AUX area sample data
+ * @flush_events: process any remaining data
+ * @free_events: free resources associated with event processing
+ * @free: free resources associated with the session
+ */
+struct auxtrace {
+ int (*process_event)(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool);
+ int (*process_auxtrace_event)(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool);
+ int (*queue_data)(struct perf_session *session,
+ struct perf_sample *sample, union perf_event *event,
+ u64 data_offset);
+ void (*dump_auxtrace_sample)(struct perf_session *session,
+ struct perf_sample *sample);
+ int (*flush_events)(struct perf_session *session,
+ struct perf_tool *tool);
+ void (*free_events)(struct perf_session *session);
+ void (*free)(struct perf_session *session);
+ bool (*evsel_is_auxtrace)(struct perf_session *session,
+ struct evsel *evsel);
+};
+
+/**
+ * struct auxtrace_buffer - a buffer containing AUX area tracing data.
+ * @list: buffers are queued in a list held by struct auxtrace_queue
+ * @size: size of the buffer in bytes
+ * @pid: in per-thread mode, the pid this buffer is associated with
+ * @tid: in per-thread mode, the tid this buffer is associated with
+ * @cpu: in per-cpu mode, the cpu this buffer is associated with
+ * @data: actual buffer data (can be null if the data has not been loaded)
+ * @data_offset: file offset at which the buffer can be read
+ * @mmap_addr: mmap address at which the buffer can be read
+ * @mmap_size: size of the mmap at @mmap_addr
+ * @data_needs_freeing: @data was malloc'd so free it when it is no longer
+ * needed
+ * @consecutive: the original data was split up and this buffer is consecutive
+ * to the previous buffer
+ * @offset: offset as determined by aux_head / aux_tail members of struct
+ * perf_event_mmap_page
+ * @reference: an implementation-specific reference determined when the data is
+ * recorded
+ * @buffer_nr: used to number each buffer
+ * @use_size: implementation actually only uses this number of bytes
+ * @use_data: implementation actually only uses data starting at this address
+ */
+struct auxtrace_buffer {
+ struct list_head list;
+ size_t size;
+ pid_t pid;
+ pid_t tid;
+ struct perf_cpu cpu;
+ void *data;
+ off_t data_offset;
+ void *mmap_addr;
+ size_t mmap_size;
+ bool data_needs_freeing;
+ bool consecutive;
+ u64 offset;
+ u64 reference;
+ u64 buffer_nr;
+ size_t use_size;
+ void *use_data;
+};
+
+/**
+ * struct auxtrace_queue - a queue of AUX area tracing data buffers.
+ * @head: head of buffer list
+ * @tid: in per-thread mode, the tid this queue is associated with
+ * @cpu: in per-cpu mode, the cpu this queue is associated with
+ * @set: %true once this queue has been dedicated to a specific thread or cpu
+ * @priv: implementation-specific data
+ */
+struct auxtrace_queue {
+ struct list_head head;
+ pid_t tid;
+ int cpu;
+ bool set;
+ void *priv;
+};
+
+/**
+ * struct auxtrace_queues - an array of AUX area tracing queues.
+ * @queue_array: array of queues
+ * @nr_queues: number of queues
+ * @new_data: set whenever new data is queued
+ * @populated: queues have been fully populated using the auxtrace_index
+ * @next_buffer_nr: used to number each buffer
+ */
+struct auxtrace_queues {
+ struct auxtrace_queue *queue_array;
+ unsigned int nr_queues;
+ bool new_data;
+ bool populated;
+ u64 next_buffer_nr;
+};
+
+/**
+ * struct auxtrace_heap_item - element of struct auxtrace_heap.
+ * @queue_nr: queue number
+ * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
+ * to be a timestamp
+ */
+struct auxtrace_heap_item {
+ unsigned int queue_nr;
+ u64 ordinal;
+};
+
+/**
+ * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
+ * @heap_array: the heap
+ * @heap_cnt: the number of elements in the heap
+ * @heap_sz: maximum number of elements (grows as needed)
+ */
+struct auxtrace_heap {
+ struct auxtrace_heap_item *heap_array;
+ unsigned int heap_cnt;
+ unsigned int heap_sz;
+};
+
+/**
+ * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
+ * @base: address of mapped area
+ * @userpg: pointer to buffer's perf_event_mmap_page
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @len: size of mapped area
+ * @prev: previous aux_head
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ * mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap {
+ void *base;
+ void *userpg;
+ size_t mask;
+ size_t len;
+ u64 prev;
+ int idx;
+ pid_t tid;
+ int cpu;
+};
+
+/**
+ * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @offset: file offset of mapped area
+ * @len: size of mapped area
+ * @prot: mmap memory protection
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ * mmap) otherwise %0
+ * @mmap_needed: set to %false for non-auxtrace events. This is needed because
+ * auxtrace mmapping is done in the same code path as non-auxtrace
+ * mmapping but not every evsel that needs non-auxtrace mmapping
+ * also needs auxtrace mmapping.
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap_params {
+ size_t mask;
+ off_t offset;
+ size_t len;
+ int prot;
+ int idx;
+ pid_t tid;
+ bool mmap_needed;
+ struct perf_cpu cpu;
+};
+
+/**
+ * struct auxtrace_record - callbacks for recording AUX area data.
+ * @recording_options: validate and process recording options
+ * @info_priv_size: return the size of the private data in auxtrace_info_event
+ * @info_fill: fill-in the private data in auxtrace_info_event
+ * @free: free this auxtrace record structure
+ * @snapshot_start: starting a snapshot
+ * @snapshot_finish: finishing a snapshot
+ * @find_snapshot: find data to snapshot within auxtrace mmap
+ * @parse_snapshot_options: parse snapshot options
+ * @reference: provide a 64-bit reference number for auxtrace_event
+ * @read_finish: called after reading from an auxtrace mmap
+ * @alignment: alignment (if any) for AUX area data
+ * @default_aux_sample_size: default sample size for --aux sample option
+ * @pmu: associated pmu
+ * @evlist: selected events list
+ */
+struct auxtrace_record {
+ int (*recording_options)(struct auxtrace_record *itr,
+ struct evlist *evlist,
+ struct record_opts *opts);
+ size_t (*info_priv_size)(struct auxtrace_record *itr,
+ struct evlist *evlist);
+ int (*info_fill)(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct perf_record_auxtrace_info *auxtrace_info,
+ size_t priv_size);
+ void (*free)(struct auxtrace_record *itr);
+ int (*snapshot_start)(struct auxtrace_record *itr);
+ int (*snapshot_finish)(struct auxtrace_record *itr);
+ int (*find_snapshot)(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old);
+ int (*parse_snapshot_options)(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str);
+ u64 (*reference)(struct auxtrace_record *itr);
+ int (*read_finish)(struct auxtrace_record *itr, int idx);
+ unsigned int alignment;
+ unsigned int default_aux_sample_size;
+ struct perf_pmu *pmu;
+ struct evlist *evlist;
+};
+
+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ * to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ * and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+ struct list_head list;
+ bool range;
+ bool start;
+ const char *action;
+ const char *sym_from;
+ const char *sym_to;
+ int sym_from_idx;
+ int sym_to_idx;
+ u64 addr;
+ u64 size;
+ const char *filename;
+ char *str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+ struct list_head head;
+ int cnt;
+};
+
+struct auxtrace_cache;
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm);
+int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail);
+
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm,
+ int kernel_is_64_bit __maybe_unused)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+ u64 head;
+
+#if BITS_PER_LONG == 32
+ if (kernel_is_64_bit)
+ return compat_auxtrace_mmap__read_head(mm);
+#endif
+ head = READ_ONCE(pc->aux_head);
+
+ /* Ensure all reads are done after we read the head */
+ smp_rmb();
+ return head;
+}
+
+static inline int auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail,
+ int kernel_is_64_bit __maybe_unused)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+
+#if BITS_PER_LONG == 32
+ if (kernel_is_64_bit)
+ return compat_auxtrace_mmap__write_tail(mm, tail);
+#endif
+ /* Ensure all reads are done before we write the tail out */
+ smp_mb();
+ WRITE_ONCE(pc->aux_tail, tail);
+ return 0;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct evlist *evlist,
+ struct evsel *evsel, int idx);
+
+typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+ struct mmap *map,
+ union perf_event *event, void *data1,
+ size_t len1, void *data2, size_t len2);
+
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn);
+
+int auxtrace_mmap__read_snapshot(struct mmap *map,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ size_t snapshot_size);
+
+int auxtrace_queues__init(struct auxtrace_queues *queues);
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ union perf_event *event, off_t data_offset,
+ struct auxtrace_buffer **buffer_ptr);
+struct auxtrace_queue *
+auxtrace_queues__sample_queue(struct auxtrace_queues *queues,
+ struct perf_sample *sample,
+ struct perf_session *session);
+int auxtrace_queues__add_sample(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ struct perf_sample *sample, u64 data_offset,
+ u64 reference);
+void auxtrace_queues__free(struct auxtrace_queues *queues);
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+ struct perf_session *session);
+int auxtrace_queue_data(struct perf_session *session, bool samples,
+ bool events);
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *buffer);
+void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw);
+static inline void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+ return auxtrace_buffer__get_data_rw(buffer, fd, false);
+}
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+ u64 ordinal);
+void auxtrace_heap__pop(struct auxtrace_heap *heap);
+void auxtrace_heap__free(struct auxtrace_heap *heap);
+
+struct auxtrace_cache_entry {
+ struct hlist_node hash;
+ u32 key;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+ unsigned int limit_percent);
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
+void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+ struct auxtrace_cache_entry *entry);
+void auxtrace_cache__remove(struct auxtrace_cache *c, u32 key);
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
+
+struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
+ int *err);
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str);
+int auxtrace_parse_sample_options(struct auxtrace_record *itr,
+ struct evlist *evlist,
+ struct record_opts *opts, const char *str);
+void auxtrace_regroup_aux_output(struct evlist *evlist);
+int auxtrace_record__options(struct auxtrace_record *itr,
+ struct evlist *evlist,
+ struct record_opts *opts);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+ struct evlist *evlist);
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct perf_record_auxtrace_info *auxtrace_info,
+ size_t priv_size);
+void auxtrace_record__free(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr, bool on_exit);
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm,
+ unsigned char *data, u64 *head, u64 *old);
+u64 auxtrace_record__reference(struct auxtrace_record *itr);
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx);
+
+int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
+ off_t file_offset);
+int auxtrace_index__write(int fd, struct list_head *head);
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+ bool needs_swap);
+void auxtrace_index__free(struct list_head *head);
+
+void auxtrace_synth_guest_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg, u64 timestamp,
+ pid_t machine_pid, int vcpu);
+void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg, u64 timestamp);
+
+int perf_event__process_auxtrace_info(struct perf_session *session,
+ union perf_event *event);
+s64 perf_event__process_auxtrace(struct perf_session *session,
+ union perf_event *event);
+int perf_event__process_auxtrace_error(struct perf_session *session,
+ union perf_event *event);
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
+ const char *str, int unset);
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+ int unset);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+ bool no_sample);
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+ union perf_event *event);
+void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+
+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter);
+int auxtrace_parse_filters(struct evlist *evlist);
+
+int auxtrace__process_event(struct perf_session *session, union perf_event *event,
+ struct perf_sample *sample, struct perf_tool *tool);
+void auxtrace__dump_auxtrace_sample(struct perf_session *session,
+ struct perf_sample *sample);
+int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
+void auxtrace__free_events(struct perf_session *session);
+void auxtrace__free(struct perf_session *session);
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel);
+
+#define ITRACE_HELP \
+" i[period]: synthesize instructions events\n" \
+" y[period]: synthesize cycles events (same period as i)\n" \
+" b: synthesize branches events (branch misses for Arm SPE)\n" \
+" c: synthesize branches events (calls only)\n" \
+" r: synthesize branches events (returns only)\n" \
+" x: synthesize transactions events\n" \
+" w: synthesize ptwrite events\n" \
+" p: synthesize power events\n" \
+" o: synthesize other events recorded due to the use\n" \
+" of aux-output (refer to perf record)\n" \
+" I: synthesize interrupt or similar (asynchronous) events\n" \
+" (e.g. Intel PT Event Trace)\n" \
+" e[flags]: synthesize error events\n" \
+" each flag must be preceded by + or -\n" \
+" error flags are: o (overflow)\n" \
+" l (data lost)\n" \
+" d[flags]: create a debug log\n" \
+" each flag must be preceded by + or -\n" \
+" log flags are: a (all perf events)\n" \
+" o (output to stdout)\n" \
+" f: synthesize first level cache events\n" \
+" m: synthesize last level cache events\n" \
+" t: synthesize TLB events\n" \
+" a: synthesize remote access events\n" \
+" g[len]: synthesize a call chain (use with i or x)\n" \
+" G[len]: synthesize a call chain on existing event records\n" \
+" l[len]: synthesize last branch entries (use with i or x)\n" \
+" L[len]: synthesize last branch entries on existing event records\n" \
+" sNUMBER: skip initial number of events\n" \
+" q: quicker (less detailed) decoding\n" \
+" A: approximate IPC\n" \
+" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \
+" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
+" concatenate multiple options. Default is iybxwpe or cewp\n"
+
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts,
+ struct perf_time_interval *ptime_range,
+ int range_num)
+{
+ opts->ptime_range = ptime_range;
+ opts->range_num = range_num;
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts)
+{
+ opts->ptime_range = NULL;
+ opts->range_num = 0;
+}
+
+#else
+#include "debug.h"
+
+static inline struct auxtrace_record *
+auxtrace_record__init(struct evlist *evlist __maybe_unused,
+ int *err)
+{
+ *err = 0;
+ return NULL;
+}
+
+static inline
+void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
+ struct evlist *evlist __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+int perf_event__process_auxtrace_info(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+s64 perf_event__process_auxtrace(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+int perf_event__process_auxtrace_error(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+void perf_session__auxtrace_error_inc(struct perf_session *session
+ __maybe_unused,
+ union perf_event *event
+ __maybe_unused)
+{
+}
+
+static inline
+void events_stats__auxtrace_error_warn(const struct events_stats *stats
+ __maybe_unused)
+{
+}
+
+static inline
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts __maybe_unused,
+ const char *str __maybe_unused, int unset __maybe_unused)
+{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+ struct record_opts *opts __maybe_unused,
+ const char *str)
+{
+ if (!str)
+ return 0;
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused,
+ struct evlist *evlist __maybe_unused,
+ struct record_opts *opts __maybe_unused,
+ const char *str)
+{
+ if (!str)
+ return 0;
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused)
+{
+}
+
+static inline
+int auxtrace__process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused,
+ struct perf_sample *sample __maybe_unused)
+{
+}
+
+static inline
+int auxtrace__flush_events(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+void auxtrace__free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
+{
+}
+
+static inline
+void auxtrace__free(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_index__write(int fd __maybe_unused,
+ struct list_head *head __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_index__process(int fd __maybe_unused,
+ u64 size __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ bool needs_swap __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+void auxtrace_index__free(struct list_head *head __maybe_unused)
+{
+}
+
+static inline
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel __maybe_unused)
+{
+ return false;
+}
+
+static inline
+int auxtrace_parse_filters(struct evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct evlist *evlist,
+ struct evsel *evsel, int idx);
+
+#define ITRACE_HELP ""
+
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts
+ __maybe_unused,
+ struct perf_time_interval *ptime_range
+ __maybe_unused,
+ int range_num __maybe_unused)
+{
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts
+ __maybe_unused)
+{
+}
+
+#endif
+
+#endif
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
new file mode 100644
index 000000000..591fc1edd
--- /dev/null
+++ b/tools/perf/util/block-info.c
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <linux/zalloc.h>
+#include "block-info.h"
+#include "sort.h"
+#include "annotate.h"
+#include "symbol.h"
+#include "dso.h"
+#include "map.h"
+#include "srcline.h"
+#include "evlist.h"
+#include "hist.h"
+#include "ui/browsers/hists.h"
+
+static struct block_header_column {
+ const char *name;
+ int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+ [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT] = {
+ .name = "Sampled Cycles%",
+ .width = 15,
+ },
+ [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+ .name = "Sampled Cycles",
+ .width = 14,
+ },
+ [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+ .name = "Avg Cycles%",
+ .width = 11,
+ },
+ [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+ .name = "Avg Cycles",
+ .width = 10,
+ },
+ [PERF_HPP_REPORT__BLOCK_RANGE] = {
+ .name = "[Program Block Range]",
+ .width = 70,
+ },
+ [PERF_HPP_REPORT__BLOCK_DSO] = {
+ .name = "Shared Object",
+ .width = 20,
+ }
+};
+
+struct block_info *block_info__get(struct block_info *bi)
+{
+ if (bi)
+ refcount_inc(&bi->refcnt);
+ return bi;
+}
+
+void block_info__put(struct block_info *bi)
+{
+ if (bi && refcount_dec_and_test(&bi->refcnt))
+ free(bi);
+}
+
+struct block_info *block_info__new(void)
+{
+ struct block_info *bi = zalloc(sizeof(*bi));
+
+ if (bi)
+ refcount_set(&bi->refcnt, 1);
+ return bi;
+}
+
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct block_info *bi_l = left->block_info;
+ struct block_info *bi_r = right->block_info;
+ int cmp;
+
+ if (!bi_l->sym || !bi_r->sym) {
+ if (!bi_l->sym && !bi_r->sym)
+ return -1;
+ else if (!bi_l->sym)
+ return -1;
+ else
+ return 1;
+ }
+
+ cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ if (cmp)
+ return cmp;
+
+ if (bi_l->start != bi_r->start)
+ return (int64_t)(bi_r->start - bi_l->start);
+
+ return (int64_t)(bi_r->end - bi_l->end);
+}
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return __block_info__cmp(left, right);
+}
+
+static void init_block_info(struct block_info *bi, struct symbol *sym,
+ struct cyc_hist *ch, int offset,
+ u64 total_cycles)
+{
+ bi->sym = sym;
+ bi->start = ch->start;
+ bi->end = offset;
+ bi->cycles = ch->cycles;
+ bi->cycles_aggr = ch->cycles_aggr;
+ bi->num = ch->num;
+ bi->num_aggr = ch->num_aggr;
+ bi->total_cycles = total_cycles;
+
+ memcpy(bi->cycles_spark, ch->cycles_spark,
+ NUM_SPARKS * sizeof(u64));
+}
+
+int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
+ u64 *block_cycles_aggr, u64 total_cycles)
+{
+ struct annotation *notes;
+ struct cyc_hist *ch;
+ static struct addr_location al;
+ u64 cycles = 0;
+
+ if (!he->ms.map || !he->ms.sym)
+ return 0;
+
+ memset(&al, 0, sizeof(al));
+ al.map = he->ms.map;
+ al.sym = he->ms.sym;
+
+ notes = symbol__annotation(he->ms.sym);
+ if (!notes || !notes->src || !notes->src->cycles_hist)
+ return 0;
+ ch = notes->src->cycles_hist;
+ for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
+ if (ch[i].num_aggr) {
+ struct block_info *bi;
+ struct hist_entry *he_block;
+
+ bi = block_info__new();
+ if (!bi)
+ return -1;
+
+ init_block_info(bi, he->ms.sym, &ch[i], i,
+ total_cycles);
+ cycles += bi->cycles_aggr / bi->num_aggr;
+
+ he_block = hists__add_entry_block(&bh->block_hists,
+ &al, bi);
+ if (!he_block) {
+ block_info__put(bi);
+ return -1;
+ }
+ }
+ }
+
+ if (block_cycles_aggr)
+ *block_cycles_aggr += cycles;
+
+ return 0;
+}
+
+static int block_column_header(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp,
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ block_fmt->header);
+}
+
+static int block_column_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+ return block_fmt->width;
+}
+
+static int color_pct(struct perf_hpp *hpp, int width, double pct)
+{
+#ifdef HAVE_SLANG_SUPPORT
+ if (use_browser) {
+ return __hpp__slsmg_color_printf(hpp, "%*.2f%%",
+ width - 1, pct);
+ }
+#endif
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct);
+}
+
+static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ double ratio = 0.0;
+
+ if (block_fmt->total_cycles)
+ ratio = (double)bi->cycles_aggr / (double)block_fmt->total_cycles;
+
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
+}
+
+static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
+ struct hist_entry *left,
+ struct hist_entry *right)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi_l = left->block_info;
+ struct block_info *bi_r = right->block_info;
+ double l, r;
+
+ if (block_fmt->total_cycles) {
+ l = ((double)bi_l->cycles_aggr /
+ (double)block_fmt->total_cycles) * 100000.0;
+ r = ((double)bi_r->cycles_aggr /
+ (double)block_fmt->total_cycles) * 100000.0;
+ return (int64_t)l - (int64_t)r;
+ }
+
+ return 0;
+}
+
+static void cycles_string(u64 cycles, char *buf, int size)
+{
+ if (cycles >= 1000000)
+ scnprintf(buf, size, "%.1fM", (double)cycles / 1000000.0);
+ else if (cycles >= 1000)
+ scnprintf(buf, size, "%.1fK", (double)cycles / 1000.0);
+ else
+ scnprintf(buf, size, "%1d", cycles);
+}
+
+static int block_cycles_lbr_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char cycles_buf[16];
+
+ cycles_string(bi->cycles_aggr, cycles_buf, sizeof(cycles_buf));
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ cycles_buf);
+}
+
+static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ double ratio = 0.0;
+ u64 avg;
+
+ if (block_fmt->block_cycles && bi->num_aggr) {
+ avg = bi->cycles_aggr / bi->num_aggr;
+ ratio = (double)avg / (double)block_fmt->block_cycles;
+ }
+
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
+}
+
+static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char cycles_buf[16];
+
+ cycles_string(bi->cycles_aggr / bi->num_aggr, cycles_buf,
+ sizeof(cycles_buf));
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ cycles_buf);
+}
+
+static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char buf[128];
+ char *start_line, *end_line;
+
+ symbol_conf.disable_add2line_warn = true;
+
+ start_line = map__srcline(he->ms.map, bi->sym->start + bi->start,
+ he->ms.sym);
+
+ end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
+ he->ms.sym);
+
+ if (start_line != SRCLINE_UNKNOWN &&
+ end_line != SRCLINE_UNKNOWN) {
+ scnprintf(buf, sizeof(buf), "[%s -> %s]",
+ start_line, end_line);
+ } else {
+ scnprintf(buf, sizeof(buf), "[%7lx -> %7lx]",
+ bi->start, bi->end);
+ }
+
+ zfree_srcline(&start_line);
+ zfree_srcline(&end_line);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_dso_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct map *map = he->ms.map;
+
+ if (map && map__dso(map)) {
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ map__dso(map)->short_name);
+ }
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ "[unknown]");
+}
+
+static void init_block_header(struct block_fmt *block_fmt)
+{
+ struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+ BUG_ON(block_fmt->idx >= PERF_HPP_REPORT__BLOCK_MAX_INDEX);
+
+ block_fmt->header = block_columns[block_fmt->idx].name;
+ block_fmt->width = block_columns[block_fmt->idx].width;
+
+ fmt->header = block_column_header;
+ fmt->width = block_column_width;
+}
+
+static void hpp_register(struct block_fmt *block_fmt, int idx,
+ struct perf_hpp_list *hpp_list)
+{
+ struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+ block_fmt->idx = idx;
+ INIT_LIST_HEAD(&fmt->list);
+ INIT_LIST_HEAD(&fmt->sort_list);
+
+ switch (idx) {
+ case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
+ fmt->color = block_total_cycles_pct_entry;
+ fmt->cmp = block_info__cmp;
+ fmt->sort = block_total_cycles_pct_sort;
+ break;
+ case PERF_HPP_REPORT__BLOCK_LBR_CYCLES:
+ fmt->entry = block_cycles_lbr_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
+ fmt->color = block_cycles_pct_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
+ fmt->entry = block_avg_cycles_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_RANGE:
+ fmt->entry = block_range_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_DSO:
+ fmt->entry = block_dso_entry;
+ break;
+ default:
+ return;
+ }
+
+ init_block_header(block_fmt);
+ perf_hpp_list__column_register(hpp_list, fmt);
+}
+
+static void register_block_columns(struct perf_hpp_list *hpp_list,
+ struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
+{
+ for (int i = 0; i < nr_hpps; i++)
+ hpp_register(&block_fmts[i], block_hpps[i], hpp_list);
+}
+
+static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
+{
+ __hists__init(&bh->block_hists, &bh->block_list);
+ perf_hpp_list__init(&bh->block_list);
+ bh->block_list.nr_header_lines = 1;
+
+ register_block_columns(&bh->block_list, block_fmts,
+ block_hpps, nr_hpps);
+
+ /* Sort by the first fmt */
+ perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt);
+}
+
+static int process_block_report(struct hists *hists,
+ struct block_report *block_report,
+ u64 total_cycles, int *block_hpps,
+ int nr_hpps)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct block_hist *bh = &block_report->hist;
+ struct hist_entry *he;
+
+ if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX)
+ return -1;
+
+ block_report->nr_fmts = nr_hpps;
+ init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps);
+
+ while (next) {
+ he = rb_entry(next, struct hist_entry, rb_node);
+ block_info__process_sym(he, bh, &block_report->cycles,
+ total_cycles);
+ next = rb_next(&he->rb_node);
+ }
+
+ for (int i = 0; i < nr_hpps; i++) {
+ block_report->fmts[i].total_cycles = total_cycles;
+ block_report->fmts[i].block_cycles = block_report->cycles;
+ }
+
+ hists__output_resort(&bh->block_hists, NULL);
+ return 0;
+}
+
+struct block_report *block_info__create_report(struct evlist *evlist,
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps)
+{
+ struct block_report *block_reports;
+ int nr_hists = evlist->core.nr_entries, i = 0;
+ struct evsel *pos;
+
+ block_reports = calloc(nr_hists, sizeof(struct block_report));
+ if (!block_reports)
+ return NULL;
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+
+ process_block_report(hists, &block_reports[i], total_cycles,
+ block_hpps, nr_hpps);
+ i++;
+ }
+
+ *nr_reps = nr_hists;
+ return block_reports;
+}
+
+void block_info__free_report(struct block_report *reps, int nr_reps)
+{
+ for (int i = 0; i < nr_reps; i++)
+ hists__delete_entries(&reps[i].hist.block_hists);
+
+ free(reps);
+}
+
+int report__browse_block_hists(struct block_hist *bh, float min_percent,
+ struct evsel *evsel, struct perf_env *env,
+ struct annotation_options *annotation_opts)
+{
+ int ret;
+
+ switch (use_browser) {
+ case 0:
+ symbol_conf.report_individual_block = true;
+ hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
+ stdout, true);
+ return 0;
+ case 1:
+ symbol_conf.report_individual_block = true;
+ ret = block_hists_tui_browse(bh, evsel, min_percent,
+ env, annotation_opts);
+ return ret;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+float block_info__total_cycles_percent(struct hist_entry *he)
+{
+ struct block_info *bi = he->block_info;
+
+ if (bi->total_cycles)
+ return bi->cycles * 100.0 / bi->total_cycles;
+
+ return 0.0;
+}
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
new file mode 100644
index 000000000..42e9dcc4c
--- /dev/null
+++ b/tools/perf/util/block-info.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BLOCK_H
+#define __PERF_BLOCK_H
+
+#include <linux/types.h>
+#include <linux/refcount.h>
+#include "hist.h"
+#include "symbol.h"
+#include "sort.h"
+#include "ui/ui.h"
+
+struct block_info {
+ struct symbol *sym;
+ u64 start;
+ u64 end;
+ u64 cycles;
+ u64 cycles_aggr;
+ s64 cycles_spark[NUM_SPARKS];
+ u64 total_cycles;
+ int num;
+ int num_aggr;
+ refcount_t refcnt;
+};
+
+struct block_fmt {
+ struct perf_hpp_fmt fmt;
+ int idx;
+ int width;
+ const char *header;
+ u64 total_cycles;
+ u64 block_cycles;
+};
+
+enum {
+ PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+ PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+ PERF_HPP_REPORT__BLOCK_RANGE,
+ PERF_HPP_REPORT__BLOCK_DSO,
+ PERF_HPP_REPORT__BLOCK_MAX_INDEX
+};
+
+struct block_report {
+ struct block_hist hist;
+ u64 cycles;
+ struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+ int nr_fmts;
+};
+
+struct block_hist;
+
+struct block_info *block_info__new(void);
+struct block_info *block_info__get(struct block_info *bi);
+void block_info__put(struct block_info *bi);
+
+static inline void __block_info__zput(struct block_info **bi)
+{
+ block_info__put(*bi);
+ *bi = NULL;
+}
+
+#define block_info__zput(bi) __block_info__zput(&bi)
+
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right);
+
+int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
+ u64 *block_cycles_aggr, u64 total_cycles);
+
+struct block_report *block_info__create_report(struct evlist *evlist,
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps);
+
+void block_info__free_report(struct block_report *reps, int nr_reps);
+
+int report__browse_block_hists(struct block_hist *bh, float min_percent,
+ struct evsel *evsel, struct perf_env *env,
+ struct annotation_options *annotation_opts);
+
+float block_info__total_cycles_percent(struct hist_entry *he);
+
+#endif /* __PERF_BLOCK_H */
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 000000000..680e92774
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "block-range.h"
+#include "annotate.h"
+#include <assert.h>
+#include <stdlib.h>
+
+struct {
+ struct rb_root root;
+ u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+#ifndef NDEBUG
+ struct rb_node *rb;
+ u64 old = 0; /* NULL isn't executable */
+
+ for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+ struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+ assert(old < entry->start);
+ assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+ old = entry->end;
+ }
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *parent = NULL;
+ struct block_range *entry;
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (addr < entry->start)
+ p = &parent->rb_left;
+ else if (addr > entry->end)
+ p = &parent->rb_right;
+ else
+ return entry;
+ }
+
+ return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_left;
+ while (*p) {
+ node = *p;
+ p = &node->rb_right;
+ }
+ rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_right;
+ while (*p) {
+ node = *p;
+ p = &node->rb_left;
+ }
+ rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end: branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *n, *parent = NULL;
+ struct block_range *next, *entry = NULL;
+ struct block_range_iter iter = { NULL, NULL };
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (start < entry->start)
+ p = &parent->rb_left;
+ else if (start > entry->end)
+ p = &parent->rb_right;
+ else
+ break;
+ }
+
+ /*
+ * Didn't find anything.. there's a hole at @start, however @end might
+ * be inside/behind the next range.
+ */
+ if (!*p) {
+ if (!entry) /* tree empty */
+ goto do_whole;
+
+ /*
+ * If the last node is before, advance one to find the next.
+ */
+ n = parent;
+ if (entry->end < start) {
+ n = rb_next(n);
+ if (!n)
+ goto do_whole;
+ }
+ next = rb_entry(n, struct block_range, node);
+
+ if (next->start <= end) { /* add head: [start...][n->start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = start,
+ .end = next->start - 1,
+ .is_target = 1,
+ .is_branch = 0,
+ };
+
+ rb_link_left_of_node(&head->node, &next->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = head;
+ goto do_tail;
+ }
+
+do_whole:
+ /*
+ * The whole [start..end] range is non-overlapping.
+ */
+ entry = malloc(sizeof(struct block_range));
+ if (!entry)
+ return iter;
+
+ *entry = (struct block_range){
+ .start = start,
+ .end = end,
+ .is_target = 1,
+ .is_branch = 1,
+ };
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = entry;
+ iter.end = entry;
+ goto done;
+ }
+
+ /*
+ * We found a range that overlapped with ours, split if needed.
+ */
+ if (entry->start < start) { /* split: [e->start...][start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = entry->start,
+ .end = start - 1,
+ .is_target = entry->is_target,
+ .is_branch = 0,
+
+ .coverage = entry->coverage,
+ .entry = entry->entry,
+ };
+
+ entry->start = start;
+ entry->is_target = 1;
+ entry->entry = 0;
+
+ rb_link_left_of_node(&head->node, &entry->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ } else if (entry->start == start)
+ entry->is_target = 1;
+
+ iter.start = entry;
+
+do_tail:
+ /*
+ * At this point we've got: @iter.start = [@start...] but @end can still be
+ * inside or beyond it.
+ */
+ entry = iter.start;
+ for (;;) {
+ /*
+ * If @end is inside @entry, split.
+ */
+ if (end < entry->end) { /* split: [...end][...e->end] */
+ struct block_range *tail = malloc(sizeof(struct block_range));
+ if (!tail)
+ return iter;
+
+ *tail = (struct block_range){
+ .start = end + 1,
+ .end = entry->end,
+ .is_target = 0,
+ .is_branch = entry->is_branch,
+
+ .coverage = entry->coverage,
+ .taken = entry->taken,
+ .pred = entry->pred,
+ };
+
+ entry->end = end;
+ entry->is_branch = 1;
+ entry->taken = 0;
+ entry->pred = 0;
+
+ rb_link_right_of_node(&tail->node, &entry->node);
+ rb_insert_color(&tail->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.end = entry;
+ goto done;
+ }
+
+ /*
+ * If @end matches @entry, done
+ */
+ if (end == entry->end) {
+ entry->is_branch = 1;
+ iter.end = entry;
+ goto done;
+ }
+
+ next = block_range__next(entry);
+ if (!next)
+ goto add_tail;
+
+ /*
+ * If @end is in beyond @entry but not inside @next, add tail.
+ */
+ if (end < next->start) { /* add tail: [...e->end][...end] */
+ struct block_range *tail;
+add_tail:
+ tail = malloc(sizeof(struct block_range));
+ if (!tail)
+ return iter;
+
+ *tail = (struct block_range){
+ .start = entry->end + 1,
+ .end = end,
+ .is_target = 0,
+ .is_branch = 1,
+ };
+
+ rb_link_right_of_node(&tail->node, &entry->node);
+ rb_insert_color(&tail->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.end = tail;
+ goto done;
+ }
+
+ /*
+ * If there is a hole between @entry and @next, fill it.
+ */
+ if (entry->end + 1 != next->start) {
+ struct block_range *hole = malloc(sizeof(struct block_range));
+ if (!hole)
+ return iter;
+
+ *hole = (struct block_range){
+ .start = entry->end + 1,
+ .end = next->start - 1,
+ .is_target = 0,
+ .is_branch = 0,
+ };
+
+ rb_link_left_of_node(&hole->node, &next->node);
+ rb_insert_color(&hole->node, &block_ranges.root);
+ block_range__debug();
+ }
+
+ entry = next;
+ }
+
+done:
+ assert(iter.start->start == start && iter.start->is_target);
+ assert(iter.end->end == end && iter.end->is_branch);
+
+ block_ranges.blocks++;
+
+ return iter;
+}
+
+
+/*
+ * Compute coverage as:
+ *
+ * br->coverage / br->sym->max_coverage
+ *
+ * This ensures each symbol has a 100% spot, to reflect that each symbol has a
+ * most covered section.
+ *
+ * Returns [0-1] for coverage and -1 if we had no data what so ever or the
+ * symbol does not exist.
+ */
+double block_range__coverage(struct block_range *br)
+{
+ struct symbol *sym;
+
+ if (!br) {
+ if (block_ranges.blocks)
+ return 0;
+
+ return -1;
+ }
+
+ sym = br->sym;
+ if (!sym)
+ return -1;
+
+ return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+}
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
new file mode 100644
index 000000000..ec0fb534b
--- /dev/null
+++ b/tools/perf/util/block-range.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BLOCK_RANGE_H
+#define __PERF_BLOCK_RANGE_H
+
+#include <stdbool.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct symbol;
+
+/*
+ * struct block_range - non-overlapping parts of basic blocks
+ * @node: treenode
+ * @start: inclusive start of range
+ * @end: inclusive end of range
+ * @is_target: @start is a jump target
+ * @is_branch: @end is a branch instruction
+ * @coverage: number of blocks that cover this range
+ * @taken: number of times the branch is taken (requires @is_branch)
+ * @pred: number of times the taken branch was predicted
+ */
+struct block_range {
+ struct rb_node node;
+
+ struct symbol *sym;
+
+ u64 start;
+ u64 end;
+
+ int is_target, is_branch;
+
+ u64 coverage;
+ u64 entry;
+ u64 taken;
+ u64 pred;
+};
+
+static inline struct block_range *block_range__next(struct block_range *br)
+{
+ struct rb_node *n = rb_next(&br->node);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct block_range, node);
+}
+
+struct block_range_iter {
+ struct block_range *start;
+ struct block_range *end;
+};
+
+static inline struct block_range *block_range_iter(struct block_range_iter *iter)
+{
+ return iter->start;
+}
+
+static inline bool block_range_iter__next(struct block_range_iter *iter)
+{
+ if (iter->start == iter->end)
+ return false;
+
+ iter->start = block_range__next(iter->start);
+ return true;
+}
+
+static inline bool block_range_iter__valid(struct block_range_iter *iter)
+{
+ if (!iter->start || !iter->end)
+ return false;
+ return true;
+}
+
+extern struct block_range *block_range__find(u64 addr);
+extern struct block_range_iter block_range__create(u64 start, u64 end);
+extern double block_range__coverage(struct block_range *br);
+
+#endif /* __PERF_BLOCK_RANGE_H */
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
new file mode 100644
index 000000000..b00b5a263
--- /dev/null
+++ b/tools/perf/util/bpf-event.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <internal/lib.h>
+#include <symbol/kallsyms.h>
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include "debug.h"
+#include "dso.h"
+#include "symbol.h"
+#include "machine.h"
+#include "env.h"
+#include "session.h"
+#include "map.h"
+#include "evlist.h"
+#include "record.h"
+#include "util/synthetic-events.h"
+
+static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
+{
+ int ret = 0;
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ ret += snprintf(buf + ret, size - ret, "%02x", data[i]);
+ return ret;
+}
+
+static int machine__process_bpf_event_load(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct bpf_prog_info_node *info_node;
+ struct perf_env *env = machine->env;
+ struct perf_bpil *info_linear;
+ int id = event->bpf.id;
+ unsigned int i;
+
+ /* perf-record, no need to handle bpf-event */
+ if (env == NULL)
+ return 0;
+
+ info_node = perf_env__find_bpf_prog_info(env, id);
+ if (!info_node)
+ return 0;
+ info_linear = info_node->info_linear;
+
+ for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) {
+ u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms);
+ u64 addr = addrs[i];
+ struct map *map = maps__find(machine__kernel_maps(machine), addr);
+
+ if (map) {
+ struct dso *dso = map__dso(map);
+
+ dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO;
+ dso->bpf_prog.id = id;
+ dso->bpf_prog.sub_id = i;
+ dso->bpf_prog.env = env;
+ }
+ }
+ return 0;
+}
+
+int machine__process_bpf(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (dump_trace)
+ perf_event__fprintf_bpf(event, stdout);
+
+ switch (event->bpf.type) {
+ case PERF_BPF_EVENT_PROG_LOAD:
+ return machine__process_bpf_event_load(machine, event, sample);
+
+ case PERF_BPF_EVENT_PROG_UNLOAD:
+ /*
+ * Do not free bpf_prog_info and btf of the program here,
+ * as annotation still need them. They will be freed at
+ * the end of the session.
+ */
+ break;
+ default:
+ pr_debug("unexpected bpf event type of %d\n", event->bpf.type);
+ break;
+ }
+ return 0;
+}
+
+static int perf_env__fetch_btf(struct perf_env *env,
+ u32 btf_id,
+ struct btf *btf)
+{
+ struct btf_node *node;
+ u32 data_size;
+ const void *data;
+
+ data = btf__raw_data(btf, &data_size);
+
+ node = malloc(data_size + sizeof(struct btf_node));
+ if (!node)
+ return -1;
+
+ node->id = btf_id;
+ node->data_size = data_size;
+ memcpy(node->data, data, data_size);
+
+ if (!perf_env__insert_btf(env, node)) {
+ /* Insertion failed because of a duplicate. */
+ free(node);
+ return -1;
+ }
+ return 0;
+}
+
+static int synthesize_bpf_prog_name(char *buf, int size,
+ struct bpf_prog_info *info,
+ struct btf *btf,
+ u32 sub_id)
+{
+ u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(uintptr_t)(info->prog_tags);
+ void *func_infos = (void *)(uintptr_t)(info->func_info);
+ u32 sub_prog_cnt = info->nr_jited_ksyms;
+ const struct bpf_func_info *finfo;
+ const char *short_name = NULL;
+ const struct btf_type *t;
+ int name_len;
+
+ name_len = snprintf(buf, size, "bpf_prog_");
+ name_len += snprintf_hex(buf + name_len, size - name_len,
+ prog_tags[sub_id], BPF_TAG_SIZE);
+ if (btf) {
+ finfo = func_infos + sub_id * info->func_info_rec_size;
+ t = btf__type_by_id(btf, finfo->type_id);
+ short_name = btf__name_by_offset(btf, t->name_off);
+ } else if (sub_id == 0 && sub_prog_cnt == 1) {
+ /* no subprog */
+ if (info->name[0])
+ short_name = info->name;
+ } else
+ short_name = "F";
+ if (short_name)
+ name_len += snprintf(buf + name_len, size - name_len,
+ "_%s", short_name);
+ return name_len;
+}
+
+/*
+ * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
+ * program. One PERF_RECORD_BPF_EVENT is generated for the program. And
+ * one PERF_RECORD_KSYMBOL is generated for each sub program.
+ *
+ * Returns:
+ * 0 for success;
+ * -1 for failures;
+ * -2 for lack of kernel support.
+ */
+static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
+ perf_event__handler_t process,
+ struct machine *machine,
+ int fd,
+ union perf_event *event,
+ struct record_opts *opts)
+{
+ struct perf_record_ksymbol *ksymbol_event = &event->ksymbol;
+ struct perf_record_bpf_event *bpf_event = &event->bpf;
+ struct perf_tool *tool = session->tool;
+ struct bpf_prog_info_node *info_node;
+ struct perf_bpil *info_linear;
+ struct bpf_prog_info *info;
+ struct btf *btf = NULL;
+ struct perf_env *env;
+ u32 sub_prog_cnt, i;
+ int err = 0;
+ u64 arrays;
+
+ /*
+ * for perf-record and perf-report use header.env;
+ * otherwise, use global perf_env.
+ */
+ env = session->data ? &session->header.env : &perf_env;
+
+ arrays = 1UL << PERF_BPIL_JITED_KSYMS;
+ arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
+ arrays |= 1UL << PERF_BPIL_FUNC_INFO;
+ arrays |= 1UL << PERF_BPIL_PROG_TAGS;
+ arrays |= 1UL << PERF_BPIL_JITED_INSNS;
+ arrays |= 1UL << PERF_BPIL_LINE_INFO;
+ arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+
+ info_linear = get_bpf_prog_info_linear(fd, arrays);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ info_linear = NULL;
+ pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+ return -1;
+ }
+
+ if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+ free(info_linear);
+ pr_debug("%s: the kernel is too old, aborting\n", __func__);
+ return -2;
+ }
+
+ info = &info_linear->info;
+ if (!info->jited_ksyms) {
+ free(info_linear);
+ return -1;
+ }
+
+ /* number of ksyms, func_lengths, and tags should match */
+ sub_prog_cnt = info->nr_jited_ksyms;
+ if (sub_prog_cnt != info->nr_prog_tags ||
+ sub_prog_cnt != info->nr_jited_func_lens) {
+ free(info_linear);
+ return -1;
+ }
+
+ /* check BTF func info support */
+ if (info->btf_id && info->nr_func_info && info->func_info_rec_size) {
+ /* btf func info number should be same as sub_prog_cnt */
+ if (sub_prog_cnt != info->nr_func_info) {
+ pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
+ free(info_linear);
+ return -1;
+ }
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
+ pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
+ err = -1;
+ goto out;
+ }
+ perf_env__fetch_btf(env, info->btf_id, btf);
+ }
+
+ /* Synthesize PERF_RECORD_KSYMBOL */
+ for (i = 0; i < sub_prog_cnt; i++) {
+ __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens);
+ __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms);
+ int name_len;
+
+ *ksymbol_event = (struct perf_record_ksymbol) {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = offsetof(struct perf_record_ksymbol, name),
+ },
+ .addr = prog_addrs[i],
+ .len = prog_lens[i],
+ .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+ .flags = 0,
+ };
+
+ name_len = synthesize_bpf_prog_name(ksymbol_event->name,
+ KSYM_NAME_LEN, info, btf, i);
+ ksymbol_event->header.size += PERF_ALIGN(name_len + 1,
+ sizeof(u64));
+
+ memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+ err = perf_tool__process_synth_event(tool, event,
+ machine, process);
+ }
+
+ if (!opts->no_bpf_event) {
+ /* Synthesize PERF_RECORD_BPF_EVENT */
+ *bpf_event = (struct perf_record_bpf_event) {
+ .header = {
+ .type = PERF_RECORD_BPF_EVENT,
+ .size = sizeof(struct perf_record_bpf_event),
+ },
+ .type = PERF_BPF_EVENT_PROG_LOAD,
+ .flags = 0,
+ .id = info->id,
+ };
+ memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE);
+ memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+
+ /* save bpf_prog_info to env */
+ info_node = malloc(sizeof(struct bpf_prog_info_node));
+ if (!info_node) {
+ err = -1;
+ goto out;
+ }
+
+ info_node->info_linear = info_linear;
+ perf_env__insert_bpf_prog_info(env, info_node);
+ info_linear = NULL;
+
+ /*
+ * process after saving bpf_prog_info to env, so that
+ * required information is ready for look up
+ */
+ err = perf_tool__process_synth_event(tool, event,
+ machine, process);
+ }
+
+out:
+ free(info_linear);
+ btf__free(btf);
+ return err ? -1 : 0;
+}
+
+struct kallsyms_parse {
+ union perf_event *event;
+ perf_event__handler_t process;
+ struct machine *machine;
+ struct perf_tool *tool;
+};
+
+static int
+process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data)
+{
+ struct machine *machine = data->machine;
+ union perf_event *event = data->event;
+ struct perf_record_ksymbol *ksymbol;
+ int len;
+
+ ksymbol = &event->ksymbol;
+
+ *ksymbol = (struct perf_record_ksymbol) {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = offsetof(struct perf_record_ksymbol, name),
+ },
+ .addr = addr,
+ .len = page_size,
+ .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+ .flags = 0,
+ };
+
+ len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name);
+ ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64));
+ memset((void *) event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+
+ return perf_tool__process_synth_event(data->tool, event, machine,
+ data->process);
+}
+
+static int
+kallsyms_process_symbol(void *data, const char *_name,
+ char type __maybe_unused, u64 start)
+{
+ char disp[KSYM_NAME_LEN];
+ char *module, *name;
+ unsigned long id;
+ int err = 0;
+
+ module = strchr(_name, '\t');
+ if (!module)
+ return 0;
+
+ /* We are going after [bpf] module ... */
+ if (strcmp(module + 1, "[bpf]"))
+ return 0;
+
+ name = memdup(_name, (module - _name) + 1);
+ if (!name)
+ return -ENOMEM;
+
+ name[module - _name] = 0;
+
+ /* .. and only for trampolines and dispatchers */
+ if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) ||
+ (sscanf(name, "bpf_dispatcher_%s", disp) == 1))
+ err = process_bpf_image(name, start, data);
+
+ free(name);
+ return err;
+}
+
+int perf_event__synthesize_bpf_events(struct perf_session *session,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct record_opts *opts)
+{
+ const char *kallsyms_filename = "/proc/kallsyms";
+ struct kallsyms_parse arg;
+ union perf_event *event;
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size);
+ if (!event)
+ return -1;
+
+ /* Synthesize all the bpf programs in system. */
+ while (true) {
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ pr_debug("%s: can't get next program: %s%s\n",
+ __func__, strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ /* don't report error on old kernel or EPERM */
+ err = (errno == EINVAL || errno == EPERM) ? 0 : -1;
+ break;
+ }
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ pr_debug("%s: failed to get fd for prog_id %u\n",
+ __func__, id);
+ continue;
+ }
+
+ err = perf_event__synthesize_one_bpf_prog(session, process,
+ machine, fd,
+ event, opts);
+ close(fd);
+ if (err) {
+ /* do not return error for old kernel */
+ if (err == -2)
+ err = 0;
+ break;
+ }
+ }
+
+ /* Synthesize all the bpf images - trampolines/dispatchers. */
+ if (symbol_conf.kallsyms_name != NULL)
+ kallsyms_filename = symbol_conf.kallsyms_name;
+
+ arg = (struct kallsyms_parse) {
+ .event = event,
+ .process = process,
+ .machine = machine,
+ .tool = session->tool,
+ };
+
+ if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) {
+ pr_err("%s: failed to synthesize bpf images: %s\n",
+ __func__, strerror(errno));
+ }
+
+ free(event);
+ return err;
+}
+
+static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
+{
+ struct bpf_prog_info_node *info_node;
+ struct perf_bpil *info_linear;
+ struct btf *btf = NULL;
+ u64 arrays;
+ u32 btf_id;
+ int fd;
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0)
+ return;
+
+ arrays = 1UL << PERF_BPIL_JITED_KSYMS;
+ arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
+ arrays |= 1UL << PERF_BPIL_FUNC_INFO;
+ arrays |= 1UL << PERF_BPIL_PROG_TAGS;
+ arrays |= 1UL << PERF_BPIL_JITED_INSNS;
+ arrays |= 1UL << PERF_BPIL_LINE_INFO;
+ arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+
+ info_linear = get_bpf_prog_info_linear(fd, arrays);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+ goto out;
+ }
+
+ btf_id = info_linear->info.btf_id;
+
+ info_node = malloc(sizeof(struct bpf_prog_info_node));
+ if (info_node) {
+ info_node->info_linear = info_linear;
+ perf_env__insert_bpf_prog_info(env, info_node);
+ } else
+ free(info_linear);
+
+ if (btf_id == 0)
+ goto out;
+
+ btf = btf__load_from_kernel_by_id(btf_id);
+ if (libbpf_get_error(btf)) {
+ pr_debug("%s: failed to get BTF of id %u, aborting\n",
+ __func__, btf_id);
+ goto out;
+ }
+ perf_env__fetch_btf(env, btf_id, btf);
+
+out:
+ btf__free(btf);
+ close(fd);
+}
+
+static int bpf_event__sb_cb(union perf_event *event, void *data)
+{
+ struct perf_env *env = data;
+
+ if (event->header.type != PERF_RECORD_BPF_EVENT)
+ return -1;
+
+ switch (event->bpf.type) {
+ case PERF_BPF_EVENT_PROG_LOAD:
+ perf_env__add_bpf_info(env, event->bpf.id);
+
+ case PERF_BPF_EVENT_PROG_UNLOAD:
+ /*
+ * Do not free bpf_prog_info and btf of the program here,
+ * as annotation still need them. They will be freed at
+ * the end of the session.
+ */
+ break;
+ default:
+ pr_debug("unexpected bpf event type of %d\n", event->bpf.type);
+ break;
+ }
+
+ return 0;
+}
+
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_DUMMY,
+ .sample_id_all = 1,
+ .watermark = 1,
+ .bpf_event = 1,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+
+ /*
+ * Older gcc versions don't support designated initializers, like above,
+ * for unnamed union members, such as the following:
+ */
+ attr.wakeup_watermark = 1;
+
+ return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env);
+}
+
+void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+ struct perf_env *env,
+ FILE *fp)
+{
+ __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens);
+ __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms);
+ char name[KSYM_NAME_LEN];
+ struct btf *btf = NULL;
+ u32 sub_prog_cnt, i;
+
+ sub_prog_cnt = info->nr_jited_ksyms;
+ if (sub_prog_cnt != info->nr_prog_tags ||
+ sub_prog_cnt != info->nr_jited_func_lens)
+ return;
+
+ if (info->btf_id) {
+ struct btf_node *node;
+
+ node = __perf_env__find_btf(env, info->btf_id);
+ if (node)
+ btf = btf__new((__u8 *)(node->data),
+ node->data_size);
+ }
+
+ if (sub_prog_cnt == 1) {
+ synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0);
+ fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n",
+ info->id, name, prog_addrs[0], prog_lens[0]);
+ goto out;
+ }
+
+ fprintf(fp, "# bpf_prog_info %u:\n", info->id);
+ for (i = 0; i < sub_prog_cnt; i++) {
+ synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, i);
+
+ fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n",
+ i, name, prog_addrs[i], prog_lens[i]);
+ }
+out:
+ btf__free(btf);
+}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
new file mode 100644
index 000000000..e2f042090
--- /dev/null
+++ b/tools/perf/util/bpf-event.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BPF_EVENT_H
+#define __PERF_BPF_EVENT_H
+
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+#include <api/fd/array.h>
+#include <stdio.h>
+
+struct bpf_prog_info;
+struct machine;
+union perf_event;
+struct perf_env;
+struct perf_sample;
+struct perf_session;
+struct record_opts;
+struct evlist;
+struct target;
+
+struct bpf_prog_info_node {
+ struct perf_bpil *info_linear;
+ struct rb_node rb_node;
+};
+
+struct btf_node {
+ struct rb_node rb_node;
+ u32 id;
+ u32 data_size;
+ char data[];
+};
+
+#ifdef HAVE_LIBBPF_SUPPORT
+int machine__process_bpf(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env);
+void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+ struct perf_env *env,
+ FILE *fp);
+#else
+static inline int machine__process_bpf(struct machine *machine __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused)
+{
+ return 0;
+}
+
+static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused,
+ struct perf_env *env __maybe_unused)
+{
+ return 0;
+}
+
+static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused,
+ struct perf_env *env __maybe_unused,
+ FILE *fp __maybe_unused)
+{
+
+}
+#endif // HAVE_LIBBPF_SUPPORT
+#endif
diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
new file mode 100644
index 000000000..b51544996
--- /dev/null
+++ b/tools/perf/util/bpf-filter.c
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+
+#include <bpf/bpf.h>
+#include <linux/err.h>
+#include <internal/xyarray.h>
+
+#include "util/debug.h"
+#include "util/evsel.h"
+
+#include "util/bpf-filter.h"
+#include <util/bpf-filter-flex.h>
+#include <util/bpf-filter-bison.h>
+
+#include "bpf_skel/sample-filter.h"
+#include "bpf_skel/sample_filter.skel.h"
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
+
+#define __PERF_SAMPLE_TYPE(st, opt) { st, #st, opt }
+#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PERF_SAMPLE_##_st, opt)
+
+static const struct perf_sample_info {
+ u64 type;
+ const char *name;
+ const char *option;
+} sample_table[] = {
+ /* default sample flags */
+ PERF_SAMPLE_TYPE(IP, NULL),
+ PERF_SAMPLE_TYPE(TID, NULL),
+ PERF_SAMPLE_TYPE(PERIOD, NULL),
+ /* flags mostly set by default, but still have options */
+ PERF_SAMPLE_TYPE(ID, "--sample-identifier"),
+ PERF_SAMPLE_TYPE(CPU, "--sample-cpu"),
+ PERF_SAMPLE_TYPE(TIME, "-T"),
+ /* optional sample flags */
+ PERF_SAMPLE_TYPE(ADDR, "-d"),
+ PERF_SAMPLE_TYPE(DATA_SRC, "-d"),
+ PERF_SAMPLE_TYPE(PHYS_ADDR, "--phys-data"),
+ PERF_SAMPLE_TYPE(WEIGHT, "-W"),
+ PERF_SAMPLE_TYPE(WEIGHT_STRUCT, "-W"),
+ PERF_SAMPLE_TYPE(TRANSACTION, "--transaction"),
+ PERF_SAMPLE_TYPE(CODE_PAGE_SIZE, "--code-page-size"),
+ PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"),
+};
+
+static const struct perf_sample_info *get_sample_info(u64 flags)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(sample_table); i++) {
+ if (sample_table[i].type == flags)
+ return &sample_table[i];
+ }
+ return NULL;
+}
+
+static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *expr)
+{
+ const struct perf_sample_info *info;
+
+ if (evsel->core.attr.sample_type & expr->sample_flags)
+ return 0;
+
+ if (expr->op == PBF_OP_GROUP_BEGIN) {
+ struct perf_bpf_filter_expr *group;
+
+ list_for_each_entry(group, &expr->groups, list) {
+ if (check_sample_flags(evsel, group) < 0)
+ return -1;
+ }
+ return 0;
+ }
+
+ info = get_sample_info(expr->sample_flags);
+ if (info == NULL) {
+ pr_err("Error: %s event does not have sample flags %lx\n",
+ evsel__name(evsel), expr->sample_flags);
+ return -1;
+ }
+
+ pr_err("Error: %s event does not have %s\n", evsel__name(evsel), info->name);
+ if (info->option)
+ pr_err(" Hint: please add %s option to perf record\n", info->option);
+ return -1;
+}
+
+int perf_bpf_filter__prepare(struct evsel *evsel)
+{
+ int i, x, y, fd;
+ struct sample_filter_bpf *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ struct perf_bpf_filter_expr *expr;
+
+ skel = sample_filter_bpf__open_and_load();
+ if (!skel) {
+ pr_err("Failed to load perf sample-filter BPF skeleton\n");
+ return -1;
+ }
+
+ i = 0;
+ fd = bpf_map__fd(skel->maps.filters);
+ list_for_each_entry(expr, &evsel->bpf_filters, list) {
+ struct perf_bpf_filter_entry entry = {
+ .op = expr->op,
+ .part = expr->part,
+ .flags = expr->sample_flags,
+ .value = expr->val,
+ };
+
+ if (check_sample_flags(evsel, expr) < 0)
+ return -1;
+
+ bpf_map_update_elem(fd, &i, &entry, BPF_ANY);
+ i++;
+
+ if (expr->op == PBF_OP_GROUP_BEGIN) {
+ struct perf_bpf_filter_expr *group;
+
+ list_for_each_entry(group, &expr->groups, list) {
+ struct perf_bpf_filter_entry group_entry = {
+ .op = group->op,
+ .part = group->part,
+ .flags = group->sample_flags,
+ .value = group->val,
+ };
+ bpf_map_update_elem(fd, &i, &group_entry, BPF_ANY);
+ i++;
+ }
+
+ memset(&entry, 0, sizeof(entry));
+ entry.op = PBF_OP_GROUP_END;
+ bpf_map_update_elem(fd, &i, &entry, BPF_ANY);
+ i++;
+ }
+ }
+
+ if (i > MAX_FILTERS) {
+ pr_err("Too many filters: %d (max = %d)\n", i, MAX_FILTERS);
+ return -1;
+ }
+ prog = skel->progs.perf_sample_filter;
+ for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
+ for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
+ link = bpf_program__attach_perf_event(prog, FD(evsel, x, y));
+ if (IS_ERR(link)) {
+ pr_err("Failed to attach perf sample-filter program\n");
+ return PTR_ERR(link);
+ }
+ }
+ }
+ evsel->bpf_skel = skel;
+ return 0;
+}
+
+int perf_bpf_filter__destroy(struct evsel *evsel)
+{
+ struct perf_bpf_filter_expr *expr, *tmp;
+
+ list_for_each_entry_safe(expr, tmp, &evsel->bpf_filters, list) {
+ list_del(&expr->list);
+ free(expr);
+ }
+ sample_filter_bpf__destroy(evsel->bpf_skel);
+ return 0;
+}
+
+u64 perf_bpf_filter__lost_count(struct evsel *evsel)
+{
+ struct sample_filter_bpf *skel = evsel->bpf_skel;
+
+ return skel ? skel->bss->dropped : 0;
+}
+
+struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part,
+ enum perf_bpf_filter_op op,
+ unsigned long val)
+{
+ struct perf_bpf_filter_expr *expr;
+
+ expr = malloc(sizeof(*expr));
+ if (expr != NULL) {
+ expr->sample_flags = sample_flags;
+ expr->part = part;
+ expr->op = op;
+ expr->val = val;
+ INIT_LIST_HEAD(&expr->groups);
+ }
+ return expr;
+}
+
+int perf_bpf_filter__parse(struct list_head *expr_head, const char *str)
+{
+ YY_BUFFER_STATE buffer;
+ int ret;
+
+ buffer = perf_bpf_filter__scan_string(str);
+
+ ret = perf_bpf_filter_parse(expr_head);
+
+ perf_bpf_filter__flush_buffer(buffer);
+ perf_bpf_filter__delete_buffer(buffer);
+ perf_bpf_filter_lex_destroy();
+
+ return ret;
+}
diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h
new file mode 100644
index 000000000..7afd15941
--- /dev/null
+++ b/tools/perf/util/bpf-filter.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_BPF_FILTER_H
+#define PERF_UTIL_BPF_FILTER_H
+
+#include <linux/list.h>
+
+#include "bpf_skel/sample-filter.h"
+
+struct perf_bpf_filter_expr {
+ struct list_head list;
+ struct list_head groups;
+ enum perf_bpf_filter_op op;
+ int part;
+ unsigned long sample_flags;
+ unsigned long val;
+};
+
+struct evsel;
+
+#ifdef HAVE_BPF_SKEL
+struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part,
+ enum perf_bpf_filter_op op,
+ unsigned long val);
+int perf_bpf_filter__parse(struct list_head *expr_head, const char *str);
+int perf_bpf_filter__prepare(struct evsel *evsel);
+int perf_bpf_filter__destroy(struct evsel *evsel);
+u64 perf_bpf_filter__lost_count(struct evsel *evsel);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unused,
+ const char *str __maybe_unused)
+{
+ return -EOPNOTSUPP;
+}
+static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused)
+{
+ return -EOPNOTSUPP;
+}
+static inline int perf_bpf_filter__destroy(struct evsel *evsel __maybe_unused)
+{
+ return -EOPNOTSUPP;
+}
+static inline u64 perf_bpf_filter__lost_count(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+#endif /* HAVE_BPF_SKEL*/
+#endif /* PERF_UTIL_BPF_FILTER_H */
diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
new file mode 100644
index 000000000..d4ff0f134
--- /dev/null
+++ b/tools/perf/util/bpf-filter.l
@@ -0,0 +1,159 @@
+%option prefix="perf_bpf_filter_"
+%option noyywrap
+
+%{
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/perf_event.h>
+
+#include "bpf-filter.h"
+#include "bpf-filter-bison.h"
+
+static int sample(unsigned long sample_flag)
+{
+ perf_bpf_filter_lval.sample.type = sample_flag;
+ perf_bpf_filter_lval.sample.part = 0;
+ return BFT_SAMPLE;
+}
+
+static int sample_part(unsigned long sample_flag, int part)
+{
+ perf_bpf_filter_lval.sample.type = sample_flag;
+ perf_bpf_filter_lval.sample.part = part;
+ return BFT_SAMPLE;
+}
+
+static int operator(enum perf_bpf_filter_op op)
+{
+ perf_bpf_filter_lval.op = op;
+ return BFT_OP;
+}
+
+static int value(int base)
+{
+ long num;
+
+ errno = 0;
+ num = strtoul(perf_bpf_filter_text, NULL, base);
+ if (errno)
+ return BFT_ERROR;
+
+ perf_bpf_filter_lval.num = num;
+ return BFT_NUM;
+}
+
+static int constant(int val)
+{
+ perf_bpf_filter_lval.num = val;
+ return BFT_NUM;
+}
+
+static int error(const char *str)
+{
+ printf("perf_bpf_filter: Unexpected filter %s: %s\n", str, perf_bpf_filter_text);
+ return BFT_ERROR;
+}
+
+%}
+
+num_dec [0-9]+
+num_hex 0[Xx][0-9a-fA-F]+
+space [ \t]+
+ident [_a-zA-Z][_a-zA-Z0-9]+
+
+%%
+
+{num_dec} { return value(10); }
+{num_hex} { return value(16); }
+{space} { }
+
+ip { return sample(PERF_SAMPLE_IP); }
+id { return sample(PERF_SAMPLE_ID); }
+tid { return sample(PERF_SAMPLE_TID); }
+pid { return sample_part(PERF_SAMPLE_TID, 1); }
+cpu { return sample(PERF_SAMPLE_CPU); }
+time { return sample(PERF_SAMPLE_TIME); }
+addr { return sample(PERF_SAMPLE_ADDR); }
+period { return sample(PERF_SAMPLE_PERIOD); }
+txn { return sample(PERF_SAMPLE_TRANSACTION); }
+weight { return sample(PERF_SAMPLE_WEIGHT); }
+weight1 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 1); }
+weight2 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); }
+weight3 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); }
+ins_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } /* alias for weight2 */
+p_stage_cyc { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */
+retire_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */
+phys_addr { return sample(PERF_SAMPLE_PHYS_ADDR); }
+code_pgsz { return sample(PERF_SAMPLE_CODE_PAGE_SIZE); }
+data_pgsz { return sample(PERF_SAMPLE_DATA_PAGE_SIZE); }
+mem_op { return sample_part(PERF_SAMPLE_DATA_SRC, 1); }
+mem_lvlnum { return sample_part(PERF_SAMPLE_DATA_SRC, 2); }
+mem_lvl { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } /* alias for mem_lvlnum */
+mem_snoop { return sample_part(PERF_SAMPLE_DATA_SRC, 3); } /* include snoopx */
+mem_remote { return sample_part(PERF_SAMPLE_DATA_SRC, 4); }
+mem_lock { return sample_part(PERF_SAMPLE_DATA_SRC, 5); }
+mem_dtlb { return sample_part(PERF_SAMPLE_DATA_SRC, 6); }
+mem_blk { return sample_part(PERF_SAMPLE_DATA_SRC, 7); }
+mem_hops { return sample_part(PERF_SAMPLE_DATA_SRC, 8); }
+
+"==" { return operator(PBF_OP_EQ); }
+"!=" { return operator(PBF_OP_NEQ); }
+">" { return operator(PBF_OP_GT); }
+"<" { return operator(PBF_OP_LT); }
+">=" { return operator(PBF_OP_GE); }
+"<=" { return operator(PBF_OP_LE); }
+"&" { return operator(PBF_OP_AND); }
+
+na { return constant(PERF_MEM_OP_NA); }
+load { return constant(PERF_MEM_OP_LOAD); }
+store { return constant(PERF_MEM_OP_STORE); }
+pfetch { return constant(PERF_MEM_OP_PFETCH); }
+exec { return constant(PERF_MEM_OP_EXEC); }
+
+l1 { return constant(PERF_MEM_LVLNUM_L1); }
+l2 { return constant(PERF_MEM_LVLNUM_L2); }
+l3 { return constant(PERF_MEM_LVLNUM_L3); }
+l4 { return constant(PERF_MEM_LVLNUM_L4); }
+cxl { return constant(PERF_MEM_LVLNUM_CXL); }
+io { return constant(PERF_MEM_LVLNUM_IO); }
+any_cache { return constant(PERF_MEM_LVLNUM_ANY_CACHE); }
+lfb { return constant(PERF_MEM_LVLNUM_LFB); }
+ram { return constant(PERF_MEM_LVLNUM_RAM); }
+pmem { return constant(PERF_MEM_LVLNUM_PMEM); }
+
+none { return constant(PERF_MEM_SNOOP_NONE); }
+hit { return constant(PERF_MEM_SNOOP_HIT); }
+miss { return constant(PERF_MEM_SNOOP_MISS); }
+hitm { return constant(PERF_MEM_SNOOP_HITM); }
+fwd { return constant(PERF_MEM_SNOOPX_FWD); }
+peer { return constant(PERF_MEM_SNOOPX_PEER); }
+
+remote { return constant(PERF_MEM_REMOTE_REMOTE); }
+
+locked { return constant(PERF_MEM_LOCK_LOCKED); }
+
+l1_hit { return constant(PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT); }
+l1_miss { return constant(PERF_MEM_TLB_L1 | PERF_MEM_TLB_MISS); }
+l2_hit { return constant(PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT); }
+l2_miss { return constant(PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS); }
+any_hit { return constant(PERF_MEM_TLB_HIT); }
+any_miss { return constant(PERF_MEM_TLB_MISS); }
+walk { return constant(PERF_MEM_TLB_WK); }
+os { return constant(PERF_MEM_TLB_OS); }
+fault { return constant(PERF_MEM_TLB_OS); } /* alias for os */
+
+by_data { return constant(PERF_MEM_BLK_DATA); }
+by_addr { return constant(PERF_MEM_BLK_ADDR); }
+
+hops0 { return constant(PERF_MEM_HOPS_0); }
+hops1 { return constant(PERF_MEM_HOPS_1); }
+hops2 { return constant(PERF_MEM_HOPS_2); }
+hops3 { return constant(PERF_MEM_HOPS_3); }
+
+"," { return ','; }
+"||" { return BFT_LOGICAL_OR; }
+
+{ident} { return error("ident"); }
+. { return error("input"); }
+
+%%
diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y
new file mode 100644
index 000000000..5dfa948fc
--- /dev/null
+++ b/tools/perf/util/bpf-filter.y
@@ -0,0 +1,80 @@
+%parse-param {struct list_head *expr_head}
+%define parse.error verbose
+
+%{
+
+#include <stdio.h>
+#include <string.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include "bpf-filter.h"
+
+int perf_bpf_filter_lex(void);
+
+static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
+ char const *msg)
+{
+ printf("perf_bpf_filter: %s\n", msg);
+}
+
+%}
+
+%union
+{
+ unsigned long num;
+ struct {
+ unsigned long type;
+ int part;
+ } sample;
+ enum perf_bpf_filter_op op;
+ struct perf_bpf_filter_expr *expr;
+}
+
+%token BFT_SAMPLE BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR
+%type <expr> filter_term filter_expr
+%destructor { free ($$); } <expr>
+%type <sample> BFT_SAMPLE
+%type <op> BFT_OP
+%type <num> BFT_NUM
+
+%%
+
+filter:
+filter ',' filter_term
+{
+ list_add_tail(&$3->list, expr_head);
+}
+|
+filter_term
+{
+ list_add_tail(&$1->list, expr_head);
+}
+
+filter_term:
+filter_term BFT_LOGICAL_OR filter_expr
+{
+ struct perf_bpf_filter_expr *expr;
+
+ if ($1->op == PBF_OP_GROUP_BEGIN) {
+ expr = $1;
+ } else {
+ expr = perf_bpf_filter_expr__new(0, 0, PBF_OP_GROUP_BEGIN, 1);
+ list_add_tail(&$1->list, &expr->groups);
+ }
+ expr->val++;
+ list_add_tail(&$3->list, &expr->groups);
+ $$ = expr;
+}
+|
+filter_expr
+{
+ $$ = $1;
+}
+
+filter_expr:
+BFT_SAMPLE BFT_OP BFT_NUM
+{
+ $$ = perf_bpf_filter_expr__new($1.type, $1.part, $2, $3);
+}
+
+%%
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
new file mode 100644
index 000000000..66dcf751e
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_PROLOGUE_H
+#define __BPF_PROLOGUE_H
+
+struct probe_trace_arg;
+struct bpf_insn;
+
+#define BPF_PROLOGUE_MAX_ARGS 3
+#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
+#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
+
+#ifdef HAVE_BPF_PROLOGUE
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+ struct bpf_insn *new_prog, size_t *new_cnt,
+ size_t cnt_space);
+#else
+#include <linux/compiler.h>
+#include <errno.h>
+
+static inline int
+bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
+ int nargs __maybe_unused,
+ struct bpf_insn *new_prog __maybe_unused,
+ size_t *new_cnt,
+ size_t cnt_space __maybe_unused)
+{
+ if (!new_cnt)
+ return -EINVAL;
+ *new_cnt = 0;
+ return -ENOTSUP;
+}
+#endif
+#endif /* __BPF_PROLOGUE_H */
diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c
new file mode 100644
index 000000000..80b1d2b37
--- /dev/null
+++ b/tools/perf/util/bpf-utils.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <bpf/bpf.h>
+#include "bpf-utils.h"
+#include "debug.h"
+
+struct bpil_array_desc {
+ int array_offset; /* e.g. offset of jited_prog_insns */
+ int count_offset; /* e.g. offset of jited_prog_len */
+ int size_offset; /* > 0: offset of rec size,
+ * < 0: fix size of -size_offset
+ */
+};
+
+static struct bpil_array_desc bpil_array_desc[] = {
+ [PERF_BPIL_JITED_INSNS] = {
+ offsetof(struct bpf_prog_info, jited_prog_insns),
+ offsetof(struct bpf_prog_info, jited_prog_len),
+ -1,
+ },
+ [PERF_BPIL_XLATED_INSNS] = {
+ offsetof(struct bpf_prog_info, xlated_prog_insns),
+ offsetof(struct bpf_prog_info, xlated_prog_len),
+ -1,
+ },
+ [PERF_BPIL_MAP_IDS] = {
+ offsetof(struct bpf_prog_info, map_ids),
+ offsetof(struct bpf_prog_info, nr_map_ids),
+ -(int)sizeof(__u32),
+ },
+ [PERF_BPIL_JITED_KSYMS] = {
+ offsetof(struct bpf_prog_info, jited_ksyms),
+ offsetof(struct bpf_prog_info, nr_jited_ksyms),
+ -(int)sizeof(__u64),
+ },
+ [PERF_BPIL_JITED_FUNC_LENS] = {
+ offsetof(struct bpf_prog_info, jited_func_lens),
+ offsetof(struct bpf_prog_info, nr_jited_func_lens),
+ -(int)sizeof(__u32),
+ },
+ [PERF_BPIL_FUNC_INFO] = {
+ offsetof(struct bpf_prog_info, func_info),
+ offsetof(struct bpf_prog_info, nr_func_info),
+ offsetof(struct bpf_prog_info, func_info_rec_size),
+ },
+ [PERF_BPIL_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, line_info),
+ offsetof(struct bpf_prog_info, nr_line_info),
+ offsetof(struct bpf_prog_info, line_info_rec_size),
+ },
+ [PERF_BPIL_JITED_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, jited_line_info),
+ offsetof(struct bpf_prog_info, nr_jited_line_info),
+ offsetof(struct bpf_prog_info, jited_line_info_rec_size),
+ },
+ [PERF_BPIL_PROG_TAGS] = {
+ offsetof(struct bpf_prog_info, prog_tags),
+ offsetof(struct bpf_prog_info, nr_prog_tags),
+ -(int)sizeof(__u8) * BPF_TAG_SIZE,
+ },
+
+};
+
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
+ int offset)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u32)];
+ return -(int)offset;
+}
+
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
+ int offset)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u64)];
+ return -(int)offset;
+}
+
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
+ __u32 val)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u32)] = val;
+}
+
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
+ __u64 val)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u64)] = val;
+}
+
+struct perf_bpil *
+get_bpf_prog_info_linear(int fd, __u64 arrays)
+{
+ struct bpf_prog_info info = {};
+ struct perf_bpil *info_linear;
+ __u32 info_len = sizeof(info);
+ __u32 data_len = 0;
+ int i, err;
+ void *ptr;
+
+ if (arrays >> PERF_BPIL_LAST_ARRAY)
+ return ERR_PTR(-EINVAL);
+
+ /* step 1: get array dimensions */
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 2: calculate total size of all arrays */
+ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+ bool include_array = (arrays & (1UL << i)) > 0;
+ struct bpil_array_desc *desc;
+ __u32 count, size;
+
+ desc = bpil_array_desc + i;
+
+ /* kernel is too old to support this field */
+ if (info_len < desc->array_offset + sizeof(__u32) ||
+ info_len < desc->count_offset + sizeof(__u32) ||
+ (desc->size_offset > 0 && info_len < (__u32)desc->size_offset))
+ include_array = false;
+
+ if (!include_array) {
+ arrays &= ~(1UL << i); /* clear the bit */
+ continue;
+ }
+
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+
+ data_len += roundup(count * size, sizeof(__u64));
+ }
+
+ /* step 3: allocate continuous memory */
+ info_linear = malloc(sizeof(struct perf_bpil) + data_len);
+ if (!info_linear)
+ return ERR_PTR(-ENOMEM);
+
+ /* step 4: fill data to info_linear->info */
+ info_linear->arrays = arrays;
+ memset(&info_linear->info, 0, sizeof(info));
+ ptr = info_linear->data;
+
+ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+ struct bpil_array_desc *desc;
+ __u32 count, size;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpil_array_desc + i;
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->count_offset, count);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->size_offset, size);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset,
+ ptr_to_u64(ptr));
+ ptr += roundup(count * size, sizeof(__u64));
+ }
+
+ /* step 5: call syscall again to get required arrays */
+ err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ free(info_linear);
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 6: verify the data */
+ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+ struct bpil_array_desc *desc;
+ __u32 v1, v2;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpil_array_desc + i;
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->count_offset);
+ if (v1 != v2)
+ pr_warning("%s: mismatch in element count\n", __func__);
+
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->size_offset);
+ if (v1 != v2)
+ pr_warning("%s: mismatch in rec size\n", __func__);
+ }
+
+ /* step 7: update info_len and data_len */
+ info_linear->info_len = sizeof(struct bpf_prog_info);
+ info_linear->data_len = data_len;
+
+ return info_linear;
+}
+
+void bpil_addr_to_offs(struct perf_bpil *info_linear)
+{
+ int i;
+
+ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+ struct bpil_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpil_array_desc + i;
+ addr = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ offs = addr - ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, offs);
+ }
+}
+
+void bpil_offs_to_addr(struct perf_bpil *info_linear)
+{
+ int i;
+
+ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+ struct bpil_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpil_array_desc + i;
+ offs = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ addr = offs + ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, addr);
+ }
+}
diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h
new file mode 100644
index 000000000..86a5055cd
--- /dev/null
+++ b/tools/perf/util/bpf-utils.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __PERF_BPF_UTILS_H
+#define __PERF_BPF_UTILS_H
+
+#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+#include <bpf/libbpf.h>
+
+/*
+ * Get bpf_prog_info in continuous memory
+ *
+ * struct bpf_prog_info has multiple arrays. The user has option to choose
+ * arrays to fetch from kernel. The following APIs provide an uniform way to
+ * fetch these data. All arrays in bpf_prog_info are stored in a single
+ * continuous memory region. This makes it easy to store the info in a
+ * file.
+ *
+ * Before writing perf_bpil to files, it is necessary to
+ * translate pointers in bpf_prog_info to offsets. Helper functions
+ * bpil_addr_to_offs() and bpil_offs_to_addr()
+ * are introduced to switch between pointers and offsets.
+ *
+ * Examples:
+ * # To fetch map_ids and prog_tags:
+ * __u64 arrays = (1UL << PERF_BPIL_MAP_IDS) |
+ * (1UL << PERF_BPIL_PROG_TAGS);
+ * struct perf_bpil *info_linear =
+ * get_bpf_prog_info_linear(fd, arrays);
+ *
+ * # To save data in file
+ * bpil_addr_to_offs(info_linear);
+ * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
+ *
+ * # To read data from file
+ * read(f, info_linear, <proper_size>);
+ * bpil_offs_to_addr(info_linear);
+ */
+enum perf_bpil_array_types {
+ PERF_BPIL_FIRST_ARRAY = 0,
+ PERF_BPIL_JITED_INSNS = 0,
+ PERF_BPIL_XLATED_INSNS,
+ PERF_BPIL_MAP_IDS,
+ PERF_BPIL_JITED_KSYMS,
+ PERF_BPIL_JITED_FUNC_LENS,
+ PERF_BPIL_FUNC_INFO,
+ PERF_BPIL_LINE_INFO,
+ PERF_BPIL_JITED_LINE_INFO,
+ PERF_BPIL_PROG_TAGS,
+ PERF_BPIL_LAST_ARRAY,
+};
+
+struct perf_bpil {
+ /* size of struct bpf_prog_info, when the tool is compiled */
+ __u32 info_len;
+ /* total bytes allocated for data, round up to 8 bytes */
+ __u32 data_len;
+ /* which arrays are included in data */
+ __u64 arrays;
+ struct bpf_prog_info info;
+ __u8 data[];
+};
+
+struct perf_bpil *
+get_bpf_prog_info_linear(int fd, __u64 arrays);
+
+void
+bpil_addr_to_offs(struct perf_bpil *info_linear);
+
+void
+bpil_offs_to_addr(struct perf_bpil *info_linear);
+
+#endif /* HAVE_LIBBPF_SUPPORT */
+#endif /* __PERF_BPF_UTILS_H */
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
new file mode 100644
index 000000000..6732cbbcf
--- /dev/null
+++ b/tools/perf/util/bpf_counter.c
@@ -0,0 +1,819 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2019 Facebook */
+
+#include <assert.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <api/fs/fs.h>
+#include <perf/bpf_perf.h>
+
+#include "bpf_counter.h"
+#include "bpf-utils.h"
+#include "counts.h"
+#include "debug.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "target.h"
+#include "cgroup.h"
+#include "cpumap.h"
+#include "thread_map.h"
+
+#include "bpf_skel/bpf_prog_profiler.skel.h"
+#include "bpf_skel/bperf_u.h"
+#include "bpf_skel/bperf_leader.skel.h"
+#include "bpf_skel/bperf_follower.skel.h"
+
+#define ATTR_MAP_SIZE 16
+
+static inline void *u64_to_ptr(__u64 ptr)
+{
+ return (void *)(unsigned long)ptr;
+}
+
+static struct bpf_counter *bpf_counter_alloc(void)
+{
+ struct bpf_counter *counter;
+
+ counter = zalloc(sizeof(*counter));
+ if (counter)
+ INIT_LIST_HEAD(&counter->list);
+ return counter;
+}
+
+static int bpf_program_profiler__destroy(struct evsel *evsel)
+{
+ struct bpf_counter *counter, *tmp;
+
+ list_for_each_entry_safe(counter, tmp,
+ &evsel->bpf_counter_list, list) {
+ list_del_init(&counter->list);
+ bpf_prog_profiler_bpf__destroy(counter->skel);
+ free(counter);
+ }
+ assert(list_empty(&evsel->bpf_counter_list));
+
+ return 0;
+}
+
+static char *bpf_target_prog_name(int tgt_fd)
+{
+ struct bpf_func_info *func_info;
+ struct perf_bpil *info_linear;
+ const struct btf_type *t;
+ struct btf *btf = NULL;
+ char *name = NULL;
+
+ info_linear = get_bpf_prog_info_linear(tgt_fd, 1UL << PERF_BPIL_FUNC_INFO);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd);
+ return NULL;
+ }
+
+ if (info_linear->info.btf_id == 0) {
+ pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
+ goto out;
+ }
+
+ btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ if (libbpf_get_error(btf)) {
+ pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+ goto out;
+ }
+
+ func_info = u64_to_ptr(info_linear->info.func_info);
+ t = btf__type_by_id(btf, func_info[0].type_id);
+ if (!t) {
+ pr_debug("btf %d doesn't have type %d\n",
+ info_linear->info.btf_id, func_info[0].type_id);
+ goto out;
+ }
+ name = strdup(btf__name_by_offset(btf, t->name_off));
+out:
+ btf__free(btf);
+ free(info_linear);
+ return name;
+}
+
+static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
+{
+ struct bpf_prog_profiler_bpf *skel;
+ struct bpf_counter *counter;
+ struct bpf_program *prog;
+ char *prog_name;
+ int prog_fd;
+ int err;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ pr_err("Failed to open fd for bpf prog %u\n", prog_id);
+ return -1;
+ }
+ counter = bpf_counter_alloc();
+ if (!counter) {
+ close(prog_fd);
+ return -1;
+ }
+
+ skel = bpf_prog_profiler_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open bpf skeleton\n");
+ goto err_out;
+ }
+
+ skel->rodata->num_cpu = evsel__nr_cpus(evsel);
+
+ bpf_map__set_max_entries(skel->maps.events, evsel__nr_cpus(evsel));
+ bpf_map__set_max_entries(skel->maps.fentry_readings, 1);
+ bpf_map__set_max_entries(skel->maps.accum_readings, 1);
+
+ prog_name = bpf_target_prog_name(prog_fd);
+ if (!prog_name) {
+ pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id);
+ goto err_out;
+ }
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ err = bpf_program__set_attach_target(prog, prog_fd, prog_name);
+ if (err) {
+ pr_err("bpf_program__set_attach_target failed.\n"
+ "Does bpf prog %u have BTF?\n", prog_id);
+ goto err_out;
+ }
+ }
+ set_max_rlimit();
+ err = bpf_prog_profiler_bpf__load(skel);
+ if (err) {
+ pr_err("bpf_prog_profiler_bpf__load failed\n");
+ goto err_out;
+ }
+
+ assert(skel != NULL);
+ counter->skel = skel;
+ list_add(&counter->list, &evsel->bpf_counter_list);
+ close(prog_fd);
+ return 0;
+err_out:
+ bpf_prog_profiler_bpf__destroy(skel);
+ free(counter);
+ close(prog_fd);
+ return -1;
+}
+
+static int bpf_program_profiler__load(struct evsel *evsel, struct target *target)
+{
+ char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p;
+ u32 prog_id;
+ int ret;
+
+ bpf_str_ = bpf_str = strdup(target->bpf_str);
+ if (!bpf_str)
+ return -1;
+
+ while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) {
+ prog_id = strtoul(tok, &p, 10);
+ if (prog_id == 0 || prog_id == UINT_MAX ||
+ (*p != '\0' && *p != ',')) {
+ pr_err("Failed to parse bpf prog ids %s\n",
+ target->bpf_str);
+ return -1;
+ }
+
+ ret = bpf_program_profiler_load_one(evsel, prog_id);
+ if (ret) {
+ bpf_program_profiler__destroy(evsel);
+ free(bpf_str_);
+ return -1;
+ }
+ bpf_str = NULL;
+ }
+ free(bpf_str_);
+ return 0;
+}
+
+static int bpf_program_profiler__enable(struct evsel *evsel)
+{
+ struct bpf_counter *counter;
+ int ret;
+
+ list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+ assert(counter->skel != NULL);
+ ret = bpf_prog_profiler_bpf__attach(counter->skel);
+ if (ret) {
+ bpf_program_profiler__destroy(evsel);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int bpf_program_profiler__disable(struct evsel *evsel)
+{
+ struct bpf_counter *counter;
+
+ list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+ assert(counter->skel != NULL);
+ bpf_prog_profiler_bpf__detach(counter->skel);
+ }
+ return 0;
+}
+
+static int bpf_program_profiler__read(struct evsel *evsel)
+{
+ // BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible
+ // Sometimes possible > online, like on a Ryzen 3900X that has 24
+ // threads but its possible showed 0-31 -acme
+ int num_cpu_bpf = libbpf_num_possible_cpus();
+ struct bpf_perf_event_value values[num_cpu_bpf];
+ struct bpf_counter *counter;
+ struct perf_counts_values *counts;
+ int reading_map_fd;
+ __u32 key = 0;
+ int err, idx, bpf_cpu;
+
+ if (list_empty(&evsel->bpf_counter_list))
+ return -EAGAIN;
+
+ perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) {
+ counts = perf_counts(evsel->counts, idx, 0);
+ counts->val = 0;
+ counts->ena = 0;
+ counts->run = 0;
+ }
+ list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+ struct bpf_prog_profiler_bpf *skel = counter->skel;
+
+ assert(skel != NULL);
+ reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
+
+ err = bpf_map_lookup_elem(reading_map_fd, &key, values);
+ if (err) {
+ pr_err("failed to read value\n");
+ return err;
+ }
+
+ for (bpf_cpu = 0; bpf_cpu < num_cpu_bpf; bpf_cpu++) {
+ idx = perf_cpu_map__idx(evsel__cpus(evsel),
+ (struct perf_cpu){.cpu = bpf_cpu});
+ if (idx == -1)
+ continue;
+ counts = perf_counts(evsel->counts, idx, 0);
+ counts->val += values[bpf_cpu].counter;
+ counts->ena += values[bpf_cpu].enabled;
+ counts->run += values[bpf_cpu].running;
+ }
+ }
+ return 0;
+}
+
+static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx,
+ int fd)
+{
+ struct bpf_prog_profiler_bpf *skel;
+ struct bpf_counter *counter;
+ int ret;
+
+ list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+ skel = counter->skel;
+ assert(skel != NULL);
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
+ &cpu_map_idx, &fd, BPF_ANY);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+struct bpf_counter_ops bpf_program_profiler_ops = {
+ .load = bpf_program_profiler__load,
+ .enable = bpf_program_profiler__enable,
+ .disable = bpf_program_profiler__disable,
+ .read = bpf_program_profiler__read,
+ .destroy = bpf_program_profiler__destroy,
+ .install_pe = bpf_program_profiler__install_pe,
+};
+
+static bool bperf_attr_map_compatible(int attr_map_fd)
+{
+ struct bpf_map_info map_info = {0};
+ __u32 map_info_len = sizeof(map_info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(attr_map_fd, &map_info, &map_info_len);
+
+ if (err)
+ return false;
+ return (map_info.key_size == sizeof(struct perf_event_attr)) &&
+ (map_info.value_size == sizeof(struct perf_event_attr_map_entry));
+}
+
+static int bperf_lock_attr_map(struct target *target)
+{
+ char path[PATH_MAX];
+ int map_fd, err;
+
+ if (target->attr_map) {
+ scnprintf(path, PATH_MAX, "%s", target->attr_map);
+ } else {
+ scnprintf(path, PATH_MAX, "%s/fs/bpf/%s", sysfs__mountpoint(),
+ BPF_PERF_DEFAULT_ATTR_MAP_PATH);
+ }
+
+ if (access(path, F_OK)) {
+ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
+ sizeof(struct perf_event_attr),
+ sizeof(struct perf_event_attr_map_entry),
+ ATTR_MAP_SIZE, NULL);
+ if (map_fd < 0)
+ return -1;
+
+ err = bpf_obj_pin(map_fd, path);
+ if (err) {
+ /* someone pinned the map in parallel? */
+ close(map_fd);
+ map_fd = bpf_obj_get(path);
+ if (map_fd < 0)
+ return -1;
+ }
+ } else {
+ map_fd = bpf_obj_get(path);
+ if (map_fd < 0)
+ return -1;
+ }
+
+ if (!bperf_attr_map_compatible(map_fd)) {
+ close(map_fd);
+ return -1;
+
+ }
+ err = flock(map_fd, LOCK_EX);
+ if (err) {
+ close(map_fd);
+ return -1;
+ }
+ return map_fd;
+}
+
+static int bperf_check_target(struct evsel *evsel,
+ struct target *target,
+ enum bperf_filter_type *filter_type,
+ __u32 *filter_entry_cnt)
+{
+ if (evsel->core.leader->nr_members > 1) {
+ pr_err("bpf managed perf events do not yet support groups.\n");
+ return -1;
+ }
+
+ /* determine filter type based on target */
+ if (target->system_wide) {
+ *filter_type = BPERF_FILTER_GLOBAL;
+ *filter_entry_cnt = 1;
+ } else if (target->cpu_list) {
+ *filter_type = BPERF_FILTER_CPU;
+ *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel));
+ } else if (target->tid) {
+ *filter_type = BPERF_FILTER_PID;
+ *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+ } else if (target->pid || evsel->evlist->workload.pid != -1) {
+ *filter_type = BPERF_FILTER_TGID;
+ *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+ } else {
+ pr_err("bpf managed perf events do not yet support these targets.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct perf_cpu_map *all_cpu_map;
+
+static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
+ struct perf_event_attr_map_entry *entry)
+{
+ struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
+ int link_fd, diff_map_fd, err;
+ struct bpf_link *link = NULL;
+
+ if (!skel) {
+ pr_err("Failed to open leader skeleton\n");
+ return -1;
+ }
+
+ bpf_map__set_max_entries(skel->maps.events, libbpf_num_possible_cpus());
+ err = bperf_leader_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load leader skeleton\n");
+ goto out;
+ }
+
+ link = bpf_program__attach(skel->progs.on_switch);
+ if (IS_ERR(link)) {
+ pr_err("Failed to attach leader program\n");
+ err = PTR_ERR(link);
+ goto out;
+ }
+
+ link_fd = bpf_link__fd(link);
+ diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
+ entry->link_id = bpf_link_get_id(link_fd);
+ entry->diff_map_id = bpf_map_get_id(diff_map_fd);
+ err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY);
+ assert(err == 0);
+
+ evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
+ assert(evsel->bperf_leader_link_fd >= 0);
+
+ /*
+ * save leader_skel for install_pe, which is called within
+ * following evsel__open_per_cpu call
+ */
+ evsel->leader_skel = skel;
+ evsel__open_per_cpu(evsel, all_cpu_map, -1);
+
+out:
+ bperf_leader_bpf__destroy(skel);
+ bpf_link__destroy(link);
+ return err;
+}
+
+static int bperf__load(struct evsel *evsel, struct target *target)
+{
+ struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
+ int attr_map_fd, diff_map_fd = -1, err;
+ enum bperf_filter_type filter_type;
+ __u32 filter_entry_cnt, i;
+
+ if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
+ return -1;
+
+ if (!all_cpu_map) {
+ all_cpu_map = perf_cpu_map__new(NULL);
+ if (!all_cpu_map)
+ return -1;
+ }
+
+ evsel->bperf_leader_prog_fd = -1;
+ evsel->bperf_leader_link_fd = -1;
+
+ /*
+ * Step 1: hold a fd on the leader program and the bpf_link, if
+ * the program is not already gone, reload the program.
+ * Use flock() to ensure exclusive access to the perf_event_attr
+ * map.
+ */
+ attr_map_fd = bperf_lock_attr_map(target);
+ if (attr_map_fd < 0) {
+ pr_err("Failed to lock perf_event_attr map\n");
+ return -1;
+ }
+
+ err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
+ if (err) {
+ err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY);
+ if (err)
+ goto out;
+ }
+
+ evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
+ if (evsel->bperf_leader_link_fd < 0 &&
+ bperf_reload_leader_program(evsel, attr_map_fd, &entry)) {
+ err = -1;
+ goto out;
+ }
+ /*
+ * The bpf_link holds reference to the leader program, and the
+ * leader program holds reference to the maps. Therefore, if
+ * link_id is valid, diff_map_id should also be valid.
+ */
+ evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
+ bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
+ assert(evsel->bperf_leader_prog_fd >= 0);
+
+ diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id);
+ assert(diff_map_fd >= 0);
+
+ /*
+ * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
+ * whether the kernel support it
+ */
+ err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0);
+ if (err) {
+ pr_err("The kernel does not support test_run for raw_tp BPF programs.\n"
+ "Therefore, --use-bpf might show inaccurate readings\n");
+ goto out;
+ }
+
+ /* Step 2: load the follower skeleton */
+ evsel->follower_skel = bperf_follower_bpf__open();
+ if (!evsel->follower_skel) {
+ err = -1;
+ pr_err("Failed to open follower skeleton\n");
+ goto out;
+ }
+
+ /* attach fexit program to the leader program */
+ bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX,
+ evsel->bperf_leader_prog_fd, "on_switch");
+
+ /* connect to leader diff_reading map */
+ bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd);
+
+ /* set up reading map */
+ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
+ filter_entry_cnt);
+ /* set up follower filter based on target */
+ bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
+ filter_entry_cnt);
+ err = bperf_follower_bpf__load(evsel->follower_skel);
+ if (err) {
+ pr_err("Failed to load follower skeleton\n");
+ bperf_follower_bpf__destroy(evsel->follower_skel);
+ evsel->follower_skel = NULL;
+ goto out;
+ }
+
+ for (i = 0; i < filter_entry_cnt; i++) {
+ int filter_map_fd;
+ __u32 key;
+
+ if (filter_type == BPERF_FILTER_PID ||
+ filter_type == BPERF_FILTER_TGID)
+ key = perf_thread_map__pid(evsel->core.threads, i);
+ else if (filter_type == BPERF_FILTER_CPU)
+ key = perf_cpu_map__cpu(evsel->core.cpus, i).cpu;
+ else
+ break;
+
+ filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
+ bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
+ }
+
+ evsel->follower_skel->bss->type = filter_type;
+
+ err = bperf_follower_bpf__attach(evsel->follower_skel);
+
+out:
+ if (err && evsel->bperf_leader_link_fd >= 0)
+ close(evsel->bperf_leader_link_fd);
+ if (err && evsel->bperf_leader_prog_fd >= 0)
+ close(evsel->bperf_leader_prog_fd);
+ if (diff_map_fd >= 0)
+ close(diff_map_fd);
+
+ flock(attr_map_fd, LOCK_UN);
+ close(attr_map_fd);
+
+ return err;
+}
+
+static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
+{
+ struct bperf_leader_bpf *skel = evsel->leader_skel;
+
+ return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
+ &cpu_map_idx, &fd, BPF_ANY);
+}
+
+/*
+ * trigger the leader prog on each cpu, so the accum_reading map could get
+ * the latest readings.
+ */
+static int bperf_sync_counters(struct evsel *evsel)
+{
+ int num_cpu, i, cpu;
+
+ num_cpu = perf_cpu_map__nr(all_cpu_map);
+ for (i = 0; i < num_cpu; i++) {
+ cpu = perf_cpu_map__cpu(all_cpu_map, i).cpu;
+ bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
+ }
+ return 0;
+}
+
+static int bperf__enable(struct evsel *evsel)
+{
+ evsel->follower_skel->bss->enabled = 1;
+ return 0;
+}
+
+static int bperf__disable(struct evsel *evsel)
+{
+ evsel->follower_skel->bss->enabled = 0;
+ return 0;
+}
+
+static int bperf__read(struct evsel *evsel)
+{
+ struct bperf_follower_bpf *skel = evsel->follower_skel;
+ __u32 num_cpu_bpf = cpu__max_cpu().cpu;
+ struct bpf_perf_event_value values[num_cpu_bpf];
+ struct perf_counts_values *counts;
+ int reading_map_fd, err = 0;
+ __u32 i;
+ int j;
+
+ bperf_sync_counters(evsel);
+ reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
+
+ for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+ struct perf_cpu entry;
+ __u32 cpu;
+
+ err = bpf_map_lookup_elem(reading_map_fd, &i, values);
+ if (err)
+ goto out;
+ switch (evsel->follower_skel->bss->type) {
+ case BPERF_FILTER_GLOBAL:
+ assert(i == 0);
+
+ perf_cpu_map__for_each_cpu(entry, j, evsel__cpus(evsel)) {
+ counts = perf_counts(evsel->counts, j, 0);
+ counts->val = values[entry.cpu].counter;
+ counts->ena = values[entry.cpu].enabled;
+ counts->run = values[entry.cpu].running;
+ }
+ break;
+ case BPERF_FILTER_CPU:
+ cpu = perf_cpu_map__cpu(evsel__cpus(evsel), i).cpu;
+ assert(cpu >= 0);
+ counts = perf_counts(evsel->counts, i, 0);
+ counts->val = values[cpu].counter;
+ counts->ena = values[cpu].enabled;
+ counts->run = values[cpu].running;
+ break;
+ case BPERF_FILTER_PID:
+ case BPERF_FILTER_TGID:
+ counts = perf_counts(evsel->counts, 0, i);
+ counts->val = 0;
+ counts->ena = 0;
+ counts->run = 0;
+
+ for (cpu = 0; cpu < num_cpu_bpf; cpu++) {
+ counts->val += values[cpu].counter;
+ counts->ena += values[cpu].enabled;
+ counts->run += values[cpu].running;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+out:
+ return err;
+}
+
+static int bperf__destroy(struct evsel *evsel)
+{
+ bperf_follower_bpf__destroy(evsel->follower_skel);
+ close(evsel->bperf_leader_prog_fd);
+ close(evsel->bperf_leader_link_fd);
+ return 0;
+}
+
+/*
+ * bperf: share hardware PMCs with BPF
+ *
+ * perf uses performance monitoring counters (PMC) to monitor system
+ * performance. The PMCs are limited hardware resources. For example,
+ * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu.
+ *
+ * Modern data center systems use these PMCs in many different ways:
+ * system level monitoring, (maybe nested) container level monitoring, per
+ * process monitoring, profiling (in sample mode), etc. In some cases,
+ * there are more active perf_events than available hardware PMCs. To allow
+ * all perf_events to have a chance to run, it is necessary to do expensive
+ * time multiplexing of events.
+ *
+ * On the other hand, many monitoring tools count the common metrics
+ * (cycles, instructions). It is a waste to have multiple tools create
+ * multiple perf_events of "cycles" and occupy multiple PMCs.
+ *
+ * bperf tries to reduce such wastes by allowing multiple perf_events of
+ * "cycles" or "instructions" (at different scopes) to share PMUs. Instead
+ * of having each perf-stat session to read its own perf_events, bperf uses
+ * BPF programs to read the perf_events and aggregate readings to BPF maps.
+ * Then, the perf-stat session(s) reads the values from these BPF maps.
+ *
+ * ||
+ * shared progs and maps <- || -> per session progs and maps
+ * ||
+ * --------------- ||
+ * | perf_events | ||
+ * --------------- fexit || -----------------
+ * | --------||----> | follower prog |
+ * --------------- / || --- -----------------
+ * cs -> | leader prog |/ ||/ | |
+ * --> --------------- /|| -------------- ------------------
+ * / | | / || | filter map | | accum_readings |
+ * / ------------ ------------ || -------------- ------------------
+ * | | prev map | | diff map | || |
+ * | ------------ ------------ || |
+ * \ || |
+ * = \ ==================================================== | ============
+ * \ / user space
+ * \ /
+ * \ /
+ * BPF_PROG_TEST_RUN BPF_MAP_LOOKUP_ELEM
+ * \ /
+ * \ /
+ * \------ perf-stat ----------------------/
+ *
+ * The figure above shows the architecture of bperf. Note that the figure
+ * is divided into 3 regions: shared progs and maps (top left), per session
+ * progs and maps (top right), and user space (bottom).
+ *
+ * The leader prog is triggered on each context switch (cs). The leader
+ * prog reads perf_events and stores the difference (current_reading -
+ * previous_reading) to the diff map. For the same metric, e.g. "cycles",
+ * multiple perf-stat sessions share the same leader prog.
+ *
+ * Each perf-stat session creates a follower prog as fexit program to the
+ * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38)
+ * follower progs to the same leader prog. The follower prog checks current
+ * task and processor ID to decide whether to add the value from the diff
+ * map to its accumulated reading map (accum_readings).
+ *
+ * Finally, perf-stat user space reads the value from accum_reading map.
+ *
+ * Besides context switch, it is also necessary to trigger the leader prog
+ * before perf-stat reads the value. Otherwise, the accum_reading map may
+ * not have the latest reading from the perf_events. This is achieved by
+ * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU.
+ *
+ * Comment before the definition of struct perf_event_attr_map_entry
+ * describes how different sessions of perf-stat share information about
+ * the leader prog.
+ */
+
+struct bpf_counter_ops bperf_ops = {
+ .load = bperf__load,
+ .enable = bperf__enable,
+ .disable = bperf__disable,
+ .read = bperf__read,
+ .install_pe = bperf__install_pe,
+ .destroy = bperf__destroy,
+};
+
+extern struct bpf_counter_ops bperf_cgrp_ops;
+
+static inline bool bpf_counter_skip(struct evsel *evsel)
+{
+ return evsel->bpf_counter_ops == NULL;
+}
+
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
+{
+ if (bpf_counter_skip(evsel))
+ return 0;
+ return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd);
+}
+
+int bpf_counter__load(struct evsel *evsel, struct target *target)
+{
+ if (target->bpf_str)
+ evsel->bpf_counter_ops = &bpf_program_profiler_ops;
+ else if (cgrp_event_expanded && target->use_bpf)
+ evsel->bpf_counter_ops = &bperf_cgrp_ops;
+ else if (target->use_bpf || evsel->bpf_counter ||
+ evsel__match_bpf_counter_events(evsel->name))
+ evsel->bpf_counter_ops = &bperf_ops;
+
+ if (evsel->bpf_counter_ops)
+ return evsel->bpf_counter_ops->load(evsel, target);
+ return 0;
+}
+
+int bpf_counter__enable(struct evsel *evsel)
+{
+ if (bpf_counter_skip(evsel))
+ return 0;
+ return evsel->bpf_counter_ops->enable(evsel);
+}
+
+int bpf_counter__disable(struct evsel *evsel)
+{
+ if (bpf_counter_skip(evsel))
+ return 0;
+ return evsel->bpf_counter_ops->disable(evsel);
+}
+
+int bpf_counter__read(struct evsel *evsel)
+{
+ if (bpf_counter_skip(evsel))
+ return -EAGAIN;
+ return evsel->bpf_counter_ops->read(evsel);
+}
+
+void bpf_counter__destroy(struct evsel *evsel)
+{
+ if (bpf_counter_skip(evsel))
+ return;
+ evsel->bpf_counter_ops->destroy(evsel);
+ evsel->bpf_counter_ops = NULL;
+ evsel->bpf_skel = NULL;
+}
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
new file mode 100644
index 000000000..c6d21c07b
--- /dev/null
+++ b/tools/perf/util/bpf_counter.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BPF_COUNTER_H
+#define __PERF_BPF_COUNTER_H 1
+
+#include <linux/list.h>
+#include <sys/resource.h>
+
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#endif
+
+struct evsel;
+struct target;
+struct bpf_counter;
+
+typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
+typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
+ struct target *target);
+typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
+ int cpu_map_idx,
+ int fd);
+
+struct bpf_counter_ops {
+ bpf_counter_evsel_target_op load;
+ bpf_counter_evsel_op enable;
+ bpf_counter_evsel_op disable;
+ bpf_counter_evsel_op read;
+ bpf_counter_evsel_op destroy;
+ bpf_counter_evsel_install_pe_op install_pe;
+};
+
+struct bpf_counter {
+ void *skel;
+ struct list_head list;
+};
+
+#ifdef HAVE_BPF_SKEL
+
+int bpf_counter__load(struct evsel *evsel, struct target *target);
+int bpf_counter__enable(struct evsel *evsel);
+int bpf_counter__disable(struct evsel *evsel);
+int bpf_counter__read(struct evsel *evsel);
+void bpf_counter__destroy(struct evsel *evsel);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd);
+
+#else /* HAVE_BPF_SKEL */
+
+#include <linux/err.h>
+
+static inline int bpf_counter__load(struct evsel *evsel __maybe_unused,
+ struct target *target __maybe_unused)
+{
+ return 0;
+}
+
+static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+static inline int bpf_counter__disable(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+static inline int bpf_counter__read(struct evsel *evsel __maybe_unused)
+{
+ return -EAGAIN;
+}
+
+static inline void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
+{
+}
+
+static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused,
+ int cpu __maybe_unused,
+ int fd __maybe_unused)
+{
+ return 0;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+static inline void set_max_rlimit(void)
+{
+ struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
+#ifdef HAVE_BPF_SKEL
+
+static inline __u32 bpf_link_get_id(int fd)
+{
+ struct bpf_link_info link_info = { .id = 0, };
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.id;
+}
+
+static inline __u32 bpf_link_get_prog_id(int fd)
+{
+ struct bpf_link_info link_info = { .id = 0, };
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.prog_id;
+}
+
+static inline __u32 bpf_map_get_id(int fd)
+{
+ struct bpf_map_info map_info = { .id = 0, };
+ __u32 map_info_len = sizeof(map_info);
+
+ bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
+ return map_info.id;
+}
+
+/* trigger the leader program on a cpu */
+static inline int bperf_trigger_reading(int prog_fd, int cpu)
+{
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = NULL,
+ .ctx_size_in = 0,
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ .cpu = cpu,
+ .retval = 0,
+ );
+
+ return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_BPF_COUNTER_H */
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
new file mode 100644
index 000000000..1c82377ed
--- /dev/null
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2021 Facebook */
+/* Copyright (c) 2021 Google */
+
+#include <assert.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <linux/perf_event.h>
+#include <api/fs/fs.h>
+#include <perf/bpf_perf.h>
+
+#include "affinity.h"
+#include "bpf_counter.h"
+#include "cgroup.h"
+#include "counts.h"
+#include "debug.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "target.h"
+#include "cpumap.h"
+#include "thread_map.h"
+
+#include "bpf_skel/bperf_cgroup.skel.h"
+
+static struct perf_event_attr cgrp_switch_attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CGROUP_SWITCHES,
+ .size = sizeof(cgrp_switch_attr),
+ .sample_period = 1,
+ .disabled = 1,
+};
+
+static struct evsel *cgrp_switch;
+static struct bperf_cgroup_bpf *skel;
+
+#define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0))
+
+static int bperf_load_program(struct evlist *evlist)
+{
+ struct bpf_link *link;
+ struct evsel *evsel;
+ struct cgroup *cgrp, *leader_cgrp;
+ int i, j;
+ struct perf_cpu cpu;
+ int total_cpus = cpu__max_cpu().cpu;
+ int map_size, map_fd;
+ int prog_fd, err;
+
+ skel = bperf_cgroup_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open cgroup skeleton\n");
+ return -1;
+ }
+
+ skel->rodata->num_cpus = total_cpus;
+ skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups;
+
+ BUG_ON(evlist->core.nr_entries % nr_cgroups != 0);
+
+ /* we need one copy of events per cpu for reading */
+ map_size = total_cpus * evlist->core.nr_entries / nr_cgroups;
+ bpf_map__set_max_entries(skel->maps.events, map_size);
+ bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups);
+ /* previous result is saved in a per-cpu array */
+ map_size = evlist->core.nr_entries / nr_cgroups;
+ bpf_map__set_max_entries(skel->maps.prev_readings, map_size);
+ /* cgroup result needs all events (per-cpu) */
+ map_size = evlist->core.nr_entries;
+ bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size);
+
+ set_max_rlimit();
+
+ err = bperf_cgroup_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load cgroup skeleton\n");
+ goto out;
+ }
+
+ if (cgroup_is_v2("perf_event") > 0)
+ skel->bss->use_cgroup_v2 = 1;
+
+ err = -1;
+
+ cgrp_switch = evsel__new(&cgrp_switch_attr);
+ if (evsel__open_per_cpu(cgrp_switch, evlist->core.all_cpus, -1) < 0) {
+ pr_err("Failed to open cgroup switches event\n");
+ goto out;
+ }
+
+ perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
+ link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch,
+ FD(cgrp_switch, i));
+ if (IS_ERR(link)) {
+ pr_err("Failed to attach cgroup program\n");
+ err = PTR_ERR(link);
+ goto out;
+ }
+ }
+
+ /*
+ * Update cgrp_idx map from cgroup-id to event index.
+ */
+ cgrp = NULL;
+ i = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (cgrp == NULL || evsel->cgrp == leader_cgrp) {
+ leader_cgrp = evsel->cgrp;
+ evsel->cgrp = NULL;
+
+ /* open single copy of the events w/o cgroup */
+ err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1);
+ if (err == 0)
+ evsel->supported = true;
+
+ map_fd = bpf_map__fd(skel->maps.events);
+ perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) {
+ int fd = FD(evsel, j);
+ __u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
+
+ bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY);
+ }
+
+ evsel->cgrp = leader_cgrp;
+ }
+
+ if (evsel->cgrp == cgrp)
+ continue;
+
+ cgrp = evsel->cgrp;
+
+ if (read_cgroup_id(cgrp) < 0) {
+ pr_err("Failed to get cgroup id\n");
+ err = -1;
+ goto out;
+ }
+
+ map_fd = bpf_map__fd(skel->maps.cgrp_idx);
+ err = bpf_map_update_elem(map_fd, &cgrp->id, &i, BPF_ANY);
+ if (err < 0) {
+ pr_err("Failed to update cgroup index map\n");
+ goto out;
+ }
+
+ i++;
+ }
+
+ /*
+ * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
+ * whether the kernel support it
+ */
+ prog_fd = bpf_program__fd(skel->progs.trigger_read);
+ err = bperf_trigger_reading(prog_fd, 0);
+ if (err) {
+ pr_warning("The kernel does not support test_run for raw_tp BPF programs.\n"
+ "Therefore, --for-each-cgroup might show inaccurate readings\n");
+ err = 0;
+ }
+
+out:
+ return err;
+}
+
+static int bperf_cgrp__load(struct evsel *evsel,
+ struct target *target __maybe_unused)
+{
+ static bool bperf_loaded = false;
+
+ evsel->bperf_leader_prog_fd = -1;
+ evsel->bperf_leader_link_fd = -1;
+
+ if (!bperf_loaded && bperf_load_program(evsel->evlist))
+ return -1;
+
+ bperf_loaded = true;
+ /* just to bypass bpf_counter_skip() */
+ evsel->follower_skel = (struct bperf_follower_bpf *)skel;
+
+ return 0;
+}
+
+static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
+ int cpu __maybe_unused, int fd __maybe_unused)
+{
+ /* nothing to do */
+ return 0;
+}
+
+/*
+ * trigger the leader prog on each cpu, so the cgrp_reading map could get
+ * the latest results.
+ */
+static int bperf_cgrp__sync_counters(struct evlist *evlist)
+{
+ struct perf_cpu cpu;
+ int idx;
+ int prog_fd = bpf_program__fd(skel->progs.trigger_read);
+
+ perf_cpu_map__for_each_cpu(cpu, idx, evlist->core.all_cpus)
+ bperf_trigger_reading(prog_fd, cpu.cpu);
+
+ return 0;
+}
+
+static int bperf_cgrp__enable(struct evsel *evsel)
+{
+ if (evsel->core.idx)
+ return 0;
+
+ bperf_cgrp__sync_counters(evsel->evlist);
+
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+static int bperf_cgrp__disable(struct evsel *evsel)
+{
+ if (evsel->core.idx)
+ return 0;
+
+ bperf_cgrp__sync_counters(evsel->evlist);
+
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+static int bperf_cgrp__read(struct evsel *evsel)
+{
+ struct evlist *evlist = evsel->evlist;
+ int total_cpus = cpu__max_cpu().cpu;
+ struct perf_counts_values *counts;
+ struct bpf_perf_event_value *values;
+ int reading_map_fd, err = 0;
+
+ if (evsel->core.idx)
+ return 0;
+
+ bperf_cgrp__sync_counters(evsel->evlist);
+
+ values = calloc(total_cpus, sizeof(*values));
+ if (values == NULL)
+ return -ENOMEM;
+
+ reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings);
+
+ evlist__for_each_entry(evlist, evsel) {
+ __u32 idx = evsel->core.idx;
+ int i;
+ struct perf_cpu cpu;
+
+ err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
+ if (err) {
+ pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
+ idx, evsel__name(evsel), evsel->cgrp->name);
+ goto out;
+ }
+
+ perf_cpu_map__for_each_cpu(cpu, i, evsel->core.cpus) {
+ counts = perf_counts(evsel->counts, i, 0);
+ counts->val = values[cpu.cpu].counter;
+ counts->ena = values[cpu.cpu].enabled;
+ counts->run = values[cpu.cpu].running;
+ }
+ }
+
+out:
+ free(values);
+ return err;
+}
+
+static int bperf_cgrp__destroy(struct evsel *evsel)
+{
+ if (evsel->core.idx)
+ return 0;
+
+ bperf_cgroup_bpf__destroy(skel);
+ evsel__delete(cgrp_switch); // it'll destroy on_switch progs too
+
+ return 0;
+}
+
+struct bpf_counter_ops bperf_cgrp_ops = {
+ .load = bperf_cgrp__load,
+ .enable = bperf_cgrp__enable,
+ .disable = bperf_cgrp__disable,
+ .read = bperf_cgrp__read,
+ .install_pe = bperf_cgrp__install_pe,
+ .destroy = bperf_cgrp__destroy,
+};
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
new file mode 100644
index 000000000..7a4297d8f
--- /dev/null
+++ b/tools/perf/util/bpf_ftrace.c
@@ -0,0 +1,154 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <linux/err.h>
+
+#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/bpf_counter.h"
+
+#include "util/bpf_skel/func_latency.skel.h"
+
+static struct func_latency_bpf *skel;
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
+{
+ int fd, err;
+ int i, ncpus = 1, ntasks = 1;
+ struct filter_entry *func;
+
+ if (!list_is_singular(&ftrace->filters)) {
+ pr_err("ERROR: %s target function(s).\n",
+ list_empty(&ftrace->filters) ? "No" : "Too many");
+ return -1;
+ }
+
+ func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+
+ skel = func_latency_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open func latency skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (ftrace->target.cpu_list) {
+ ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
+ set_max_rlimit();
+
+ err = func_latency_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load func latency skeleton\n");
+ goto out;
+ }
+
+ if (ftrace->target.cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ skel->bss->use_nsec = ftrace->use_nsec;
+
+ skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+ false, func->name);
+ if (IS_ERR(skel->links.func_begin)) {
+ pr_err("Failed to attach fentry program\n");
+ err = PTR_ERR(skel->links.func_begin);
+ goto out;
+ }
+
+ skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+ true, func->name);
+ if (IS_ERR(skel->links.func_end)) {
+ pr_err("Failed to attach fexit program\n");
+ err = PTR_ERR(skel->links.func_end);
+ goto out;
+ }
+
+ /* XXX: we don't actually use this fd - just for poll() */
+ return open("/dev/null", O_RDONLY);
+
+out:
+ return err;
+}
+
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[])
+{
+ int i, fd, err;
+ u32 idx;
+ u64 *hist;
+ int ncpus = cpu__max_cpu().cpu;
+
+ fd = bpf_map__fd(skel->maps.latency);
+
+ hist = calloc(ncpus, sizeof(*hist));
+ if (hist == NULL)
+ return -ENOMEM;
+
+ for (idx = 0; idx < NUM_BUCKET; idx++) {
+ err = bpf_map_lookup_elem(fd, &idx, hist);
+ if (err) {
+ buckets[idx] = 0;
+ continue;
+ }
+
+ for (i = 0; i < ncpus; i++)
+ buckets[idx] += hist[i];
+ }
+
+ free(hist);
+ return 0;
+}
+
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ func_latency_bpf__destroy(skel);
+ return 0;
+}
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
new file mode 100644
index 000000000..6eb2c78fd
--- /dev/null
+++ b/tools/perf/util/bpf_kwork.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf_kwork.c
+ *
+ * Copyright (c) 2022 Huawei Inc, Yang Jihong <yangjihong1@huawei.com>
+ */
+
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/time64.h>
+
+#include "util/debug.h"
+#include "util/evsel.h"
+#include "util/kwork.h"
+
+#include <bpf/bpf.h>
+#include <perf/cpumap.h>
+
+#include "util/bpf_skel/kwork_trace.skel.h"
+
+/*
+ * This should be in sync with "util/kwork_trace.bpf.c"
+ */
+#define MAX_KWORKNAME 128
+
+struct work_key {
+ u32 type;
+ u32 cpu;
+ u64 id;
+};
+
+struct report_data {
+ u64 nr;
+ u64 total_time;
+ u64 max_time;
+ u64 max_time_start;
+ u64 max_time_end;
+};
+
+struct kwork_class_bpf {
+ struct kwork_class *class;
+
+ void (*load_prepare)(struct perf_kwork *kwork);
+ int (*get_work_name)(struct work_key *key, char **ret_name);
+};
+
+static struct kwork_trace_bpf *skel;
+
+static struct timespec ts_start;
+static struct timespec ts_end;
+
+void perf_kwork__trace_start(void)
+{
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ skel->bss->enabled = 1;
+}
+
+void perf_kwork__trace_finish(void)
+{
+ clock_gettime(CLOCK_MONOTONIC, &ts_end);
+ skel->bss->enabled = 0;
+}
+
+static int get_work_name_from_map(struct work_key *key, char **ret_name)
+{
+ char name[MAX_KWORKNAME] = { 0 };
+ int fd = bpf_map__fd(skel->maps.perf_kwork_names);
+
+ *ret_name = NULL;
+
+ if (fd < 0) {
+ pr_debug("Invalid names map fd\n");
+ return 0;
+ }
+
+ if ((bpf_map_lookup_elem(fd, key, name) == 0) && (strlen(name) != 0)) {
+ *ret_name = strdup(name);
+ if (*ret_name == NULL) {
+ pr_err("Failed to copy work name\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static void irq_load_prepare(struct perf_kwork *kwork)
+{
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ bpf_program__set_autoload(skel->progs.report_irq_handler_entry, true);
+ bpf_program__set_autoload(skel->progs.report_irq_handler_exit, true);
+ }
+}
+
+static struct kwork_class_bpf kwork_irq_bpf = {
+ .load_prepare = irq_load_prepare,
+ .get_work_name = get_work_name_from_map,
+};
+
+static void softirq_load_prepare(struct perf_kwork *kwork)
+{
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ bpf_program__set_autoload(skel->progs.report_softirq_entry, true);
+ bpf_program__set_autoload(skel->progs.report_softirq_exit, true);
+ } else if (kwork->report == KWORK_REPORT_LATENCY) {
+ bpf_program__set_autoload(skel->progs.latency_softirq_raise, true);
+ bpf_program__set_autoload(skel->progs.latency_softirq_entry, true);
+ }
+}
+
+static struct kwork_class_bpf kwork_softirq_bpf = {
+ .load_prepare = softirq_load_prepare,
+ .get_work_name = get_work_name_from_map,
+};
+
+static void workqueue_load_prepare(struct perf_kwork *kwork)
+{
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ bpf_program__set_autoload(skel->progs.report_workqueue_execute_start, true);
+ bpf_program__set_autoload(skel->progs.report_workqueue_execute_end, true);
+ } else if (kwork->report == KWORK_REPORT_LATENCY) {
+ bpf_program__set_autoload(skel->progs.latency_workqueue_activate_work, true);
+ bpf_program__set_autoload(skel->progs.latency_workqueue_execute_start, true);
+ }
+}
+
+static struct kwork_class_bpf kwork_workqueue_bpf = {
+ .load_prepare = workqueue_load_prepare,
+ .get_work_name = get_work_name_from_map,
+};
+
+static struct kwork_class_bpf *
+kwork_class_bpf_supported_list[KWORK_CLASS_MAX] = {
+ [KWORK_CLASS_IRQ] = &kwork_irq_bpf,
+ [KWORK_CLASS_SOFTIRQ] = &kwork_softirq_bpf,
+ [KWORK_CLASS_WORKQUEUE] = &kwork_workqueue_bpf,
+};
+
+static bool valid_kwork_class_type(enum kwork_class_type type)
+{
+ return type >= 0 && type < KWORK_CLASS_MAX ? true : false;
+}
+
+static int setup_filters(struct perf_kwork *kwork)
+{
+ u8 val = 1;
+ int i, nr_cpus, key, fd;
+ struct perf_cpu_map *map;
+
+ if (kwork->cpu_list != NULL) {
+ fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+ if (fd < 0) {
+ pr_debug("Invalid cpu filter fd\n");
+ return -1;
+ }
+
+ map = perf_cpu_map__new(kwork->cpu_list);
+ if (map == NULL) {
+ pr_debug("Invalid cpu_list\n");
+ return -1;
+ }
+
+ nr_cpus = libbpf_num_possible_cpus();
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+
+ if (cpu.cpu >= nr_cpus) {
+ perf_cpu_map__put(map);
+ pr_err("Requested cpu %d too large\n", cpu.cpu);
+ return -1;
+ }
+ bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
+ }
+ perf_cpu_map__put(map);
+
+ skel->bss->has_cpu_filter = 1;
+ }
+
+ if (kwork->profile_name != NULL) {
+ if (strlen(kwork->profile_name) >= MAX_KWORKNAME) {
+ pr_err("Requested name filter %s too large, limit to %d\n",
+ kwork->profile_name, MAX_KWORKNAME - 1);
+ return -1;
+ }
+
+ fd = bpf_map__fd(skel->maps.perf_kwork_name_filter);
+ if (fd < 0) {
+ pr_debug("Invalid name filter fd\n");
+ return -1;
+ }
+
+ key = 0;
+ bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY);
+
+ skel->bss->has_name_filter = 1;
+ }
+
+ return 0;
+}
+
+int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork)
+{
+ struct bpf_program *prog;
+ struct kwork_class *class;
+ struct kwork_class_bpf *class_bpf;
+ enum kwork_class_type type;
+
+ skel = kwork_trace_bpf__open();
+ if (!skel) {
+ pr_debug("Failed to open kwork trace skeleton\n");
+ return -1;
+ }
+
+ /*
+ * set all progs to non-autoload,
+ * then set corresponding progs according to config
+ */
+ bpf_object__for_each_program(prog, skel->obj)
+ bpf_program__set_autoload(prog, false);
+
+ list_for_each_entry(class, &kwork->class_list, list) {
+ type = class->type;
+ if (!valid_kwork_class_type(type) ||
+ (kwork_class_bpf_supported_list[type] == NULL)) {
+ pr_err("Unsupported bpf trace class %s\n", class->name);
+ goto out;
+ }
+
+ class_bpf = kwork_class_bpf_supported_list[type];
+ class_bpf->class = class;
+
+ if (class_bpf->load_prepare != NULL)
+ class_bpf->load_prepare(kwork);
+ }
+
+ if (kwork_trace_bpf__load(skel)) {
+ pr_debug("Failed to load kwork trace skeleton\n");
+ goto out;
+ }
+
+ if (setup_filters(kwork))
+ goto out;
+
+ if (kwork_trace_bpf__attach(skel)) {
+ pr_debug("Failed to attach kwork trace skeleton\n");
+ goto out;
+ }
+
+ return 0;
+
+out:
+ kwork_trace_bpf__destroy(skel);
+ return -1;
+}
+
+static int add_work(struct perf_kwork *kwork,
+ struct work_key *key,
+ struct report_data *data)
+{
+ struct kwork_work *work;
+ struct kwork_class_bpf *bpf_trace;
+ struct kwork_work tmp = {
+ .id = key->id,
+ .name = NULL,
+ .cpu = key->cpu,
+ };
+ enum kwork_class_type type = key->type;
+
+ if (!valid_kwork_class_type(type)) {
+ pr_debug("Invalid class type %d to add work\n", type);
+ return -1;
+ }
+
+ bpf_trace = kwork_class_bpf_supported_list[type];
+ tmp.class = bpf_trace->class;
+
+ if ((bpf_trace->get_work_name != NULL) &&
+ (bpf_trace->get_work_name(key, &tmp.name)))
+ return -1;
+
+ work = perf_kwork_add_work(kwork, tmp.class, &tmp);
+ if (work == NULL)
+ return -1;
+
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ work->nr_atoms = data->nr;
+ work->total_runtime = data->total_time;
+ work->max_runtime = data->max_time;
+ work->max_runtime_start = data->max_time_start;
+ work->max_runtime_end = data->max_time_end;
+ } else if (kwork->report == KWORK_REPORT_LATENCY) {
+ work->nr_atoms = data->nr;
+ work->total_latency = data->total_time;
+ work->max_latency = data->max_time;
+ work->max_latency_start = data->max_time_start;
+ work->max_latency_end = data->max_time_end;
+ } else {
+ pr_debug("Invalid bpf report type %d\n", kwork->report);
+ return -1;
+ }
+
+ kwork->timestart = (u64)ts_start.tv_sec * NSEC_PER_SEC + ts_start.tv_nsec;
+ kwork->timeend = (u64)ts_end.tv_sec * NSEC_PER_SEC + ts_end.tv_nsec;
+
+ return 0;
+}
+
+int perf_kwork__report_read_bpf(struct perf_kwork *kwork)
+{
+ struct report_data data;
+ struct work_key key = {
+ .type = 0,
+ .cpu = 0,
+ .id = 0,
+ };
+ struct work_key prev = {
+ .type = 0,
+ .cpu = 0,
+ .id = 0,
+ };
+ int fd = bpf_map__fd(skel->maps.perf_kwork_report);
+
+ if (fd < 0) {
+ pr_debug("Invalid report fd\n");
+ return -1;
+ }
+
+ while (!bpf_map_get_next_key(fd, &prev, &key)) {
+ if ((bpf_map_lookup_elem(fd, &key, &data)) != 0) {
+ pr_debug("Failed to lookup report elem\n");
+ return -1;
+ }
+
+ if ((data.nr != 0) && (add_work(kwork, &key, &data) != 0))
+ return -1;
+
+ prev = key;
+ }
+ return 0;
+}
+
+void perf_kwork__report_cleanup_bpf(void)
+{
+ kwork_trace_bpf__destroy(skel);
+}
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
new file mode 100644
index 000000000..e7dddf012
--- /dev/null
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/map.h"
+#include "util/symbol.h"
+#include "util/target.h"
+#include "util/thread.h"
+#include "util/thread_map.h"
+#include "util/lock-contention.h"
+#include <linux/zalloc.h>
+#include <linux/string.h>
+#include <bpf/bpf.h>
+
+#include "bpf_skel/lock_contention.skel.h"
+#include "bpf_skel/lock_data.h"
+
+static struct lock_contention_bpf *skel;
+
+int lock_contention_prepare(struct lock_contention *con)
+{
+ int i, fd;
+ int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1;
+ struct evlist *evlist = con->evlist;
+ struct target *target = con->target;
+
+ skel = lock_contention_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open lock-contention BPF skeleton\n");
+ return -1;
+ }
+
+ bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
+ bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
+
+ if (con->aggr_mode == LOCK_AGGR_TASK)
+ bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
+ else
+ bpf_map__set_max_entries(skel->maps.task_data, 1);
+
+ if (con->save_callstack)
+ bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
+ else
+ bpf_map__set_max_entries(skel->maps.stacks, 1);
+
+ if (target__has_cpu(target))
+ ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
+ if (target__has_task(target))
+ ntasks = perf_thread_map__nr(evlist->core.threads);
+ if (con->filters->nr_types)
+ ntypes = con->filters->nr_types;
+
+ /* resolve lock name filters to addr */
+ if (con->filters->nr_syms) {
+ struct symbol *sym;
+ struct map *kmap;
+ unsigned long *addrs;
+
+ for (i = 0; i < con->filters->nr_syms; i++) {
+ sym = machine__find_kernel_symbol_by_name(con->machine,
+ con->filters->syms[i],
+ &kmap);
+ if (sym == NULL) {
+ pr_warning("ignore unknown symbol: %s\n",
+ con->filters->syms[i]);
+ continue;
+ }
+
+ addrs = realloc(con->filters->addrs,
+ (con->filters->nr_addrs + 1) * sizeof(*addrs));
+ if (addrs == NULL) {
+ pr_warning("memory allocation failure\n");
+ continue;
+ }
+
+ addrs[con->filters->nr_addrs++] = map__unmap_ip(kmap, sym->start);
+ con->filters->addrs = addrs;
+ }
+ naddrs = con->filters->nr_addrs;
+ }
+
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ bpf_map__set_max_entries(skel->maps.type_filter, ntypes);
+ bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
+
+ if (lock_contention_bpf__load(skel) < 0) {
+ pr_err("Failed to load lock-contention BPF skeleton\n");
+ return -1;
+ }
+
+ if (target__has_cpu(target)) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ if (target__none(target) && evlist->workload.pid > 0) {
+ u32 pid = evlist->workload.pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+
+ if (con->filters->nr_types) {
+ u8 val = 1;
+
+ skel->bss->has_type = 1;
+ fd = bpf_map__fd(skel->maps.type_filter);
+
+ for (i = 0; i < con->filters->nr_types; i++)
+ bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY);
+ }
+
+ if (con->filters->nr_addrs) {
+ u8 val = 1;
+
+ skel->bss->has_addr = 1;
+ fd = bpf_map__fd(skel->maps.addr_filter);
+
+ for (i = 0; i < con->filters->nr_addrs; i++)
+ bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY);
+ }
+
+ /* these don't work well if in the rodata section */
+ skel->bss->stack_skip = con->stack_skip;
+ skel->bss->aggr_mode = con->aggr_mode;
+ skel->bss->needs_callstack = con->save_callstack;
+ skel->bss->lock_owner = con->owner;
+
+ bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
+
+ lock_contention_bpf__attach(skel);
+ return 0;
+}
+
+int lock_contention_start(void)
+{
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+int lock_contention_stop(void)
+{
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+static const char *lock_contention_get_name(struct lock_contention *con,
+ struct contention_key *key,
+ u64 *stack_trace, u32 flags)
+{
+ int idx = 0;
+ u64 addr;
+ const char *name = "";
+ static char name_buf[KSYM_NAME_LEN];
+ struct symbol *sym;
+ struct map *kmap;
+ struct machine *machine = con->machine;
+
+ if (con->aggr_mode == LOCK_AGGR_TASK) {
+ struct contention_task_data task;
+ int pid = key->pid;
+ int task_fd = bpf_map__fd(skel->maps.task_data);
+
+ /* do not update idle comm which contains CPU number */
+ if (pid) {
+ struct thread *t = __machine__findnew_thread(machine, /*pid=*/-1, pid);
+
+ if (t == NULL)
+ return name;
+ if (!bpf_map_lookup_elem(task_fd, &pid, &task) &&
+ thread__set_comm(t, task.comm, /*timestamp=*/0))
+ name = task.comm;
+ }
+ return name;
+ }
+
+ if (con->aggr_mode == LOCK_AGGR_ADDR) {
+ int lock_fd = bpf_map__fd(skel->maps.lock_syms);
+
+ /* per-process locks set upper bits of the flags */
+ if (flags & LCD_F_MMAP_LOCK)
+ return "mmap_lock";
+ if (flags & LCD_F_SIGHAND_LOCK)
+ return "siglock";
+
+ /* global locks with symbols */
+ sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap);
+ if (sym)
+ return sym->name;
+
+ /* try semi-global locks collected separately */
+ if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr, &flags)) {
+ if (flags == LOCK_CLASS_RQLOCK)
+ return "rq_lock";
+ }
+
+ return "";
+ }
+
+ /* LOCK_AGGR_CALLER: skip lock internal functions */
+ while (machine__is_lock_function(machine, stack_trace[idx]) &&
+ idx < con->max_stack - 1)
+ idx++;
+
+ addr = stack_trace[idx];
+ sym = machine__find_kernel_symbol(machine, addr, &kmap);
+
+ if (sym) {
+ unsigned long offset;
+
+ offset = map__map_ip(kmap, addr) - sym->start;
+
+ if (offset == 0)
+ return sym->name;
+
+ snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset);
+ } else {
+ snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr);
+ }
+
+ return name_buf;
+}
+
+int lock_contention_read(struct lock_contention *con)
+{
+ int fd, stack, err = 0;
+ struct contention_key *prev_key, key = {};
+ struct contention_data data = {};
+ struct lock_stat *st = NULL;
+ struct machine *machine = con->machine;
+ u64 *stack_trace;
+ size_t stack_size = con->max_stack * sizeof(*stack_trace);
+
+ fd = bpf_map__fd(skel->maps.lock_stat);
+ stack = bpf_map__fd(skel->maps.stacks);
+
+ con->fails.task = skel->bss->task_fail;
+ con->fails.stack = skel->bss->stack_fail;
+ con->fails.time = skel->bss->time_fail;
+ con->fails.data = skel->bss->data_fail;
+
+ stack_trace = zalloc(stack_size);
+ if (stack_trace == NULL)
+ return -1;
+
+ if (con->aggr_mode == LOCK_AGGR_TASK) {
+ struct thread *idle = __machine__findnew_thread(machine,
+ /*pid=*/0,
+ /*tid=*/0);
+ thread__set_comm(idle, "swapper", /*timestamp=*/0);
+ }
+
+ if (con->aggr_mode == LOCK_AGGR_ADDR) {
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
+ int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms);
+
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ }
+
+ /* make sure it loads the kernel map */
+ map__load(maps__first(machine->kmaps)->map);
+
+ prev_key = NULL;
+ while (!bpf_map_get_next_key(fd, prev_key, &key)) {
+ s64 ls_key;
+ const char *name;
+
+ /* to handle errors in the loop body */
+ err = -1;
+
+ bpf_map_lookup_elem(fd, &key, &data);
+ if (con->save_callstack) {
+ bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
+
+ if (!match_callstack_filter(machine, stack_trace)) {
+ con->nr_filtered += data.count;
+ goto next;
+ }
+ }
+
+ switch (con->aggr_mode) {
+ case LOCK_AGGR_CALLER:
+ ls_key = key.stack_id;
+ break;
+ case LOCK_AGGR_TASK:
+ ls_key = key.pid;
+ break;
+ case LOCK_AGGR_ADDR:
+ ls_key = key.lock_addr;
+ break;
+ default:
+ goto next;
+ }
+
+ st = lock_stat_find(ls_key);
+ if (st != NULL) {
+ st->wait_time_total += data.total_time;
+ if (st->wait_time_max < data.max_time)
+ st->wait_time_max = data.max_time;
+ if (st->wait_time_min > data.min_time)
+ st->wait_time_min = data.min_time;
+
+ st->nr_contended += data.count;
+ if (st->nr_contended)
+ st->avg_wait_time = st->wait_time_total / st->nr_contended;
+ goto next;
+ }
+
+ name = lock_contention_get_name(con, &key, stack_trace, data.flags);
+ st = lock_stat_findnew(ls_key, name, data.flags);
+ if (st == NULL)
+ break;
+
+ st->nr_contended = data.count;
+ st->wait_time_total = data.total_time;
+ st->wait_time_max = data.max_time;
+ st->wait_time_min = data.min_time;
+
+ if (data.count)
+ st->avg_wait_time = data.total_time / data.count;
+
+ if (con->aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
+ st->callstack = memdup(stack_trace, stack_size);
+ if (st->callstack == NULL)
+ break;
+ }
+
+next:
+ prev_key = &key;
+
+ /* we're fine now, reset the error */
+ err = 0;
+ }
+
+ free(stack_trace);
+
+ return err;
+}
+
+int lock_contention_finish(void)
+{
+ if (skel) {
+ skel->bss->enabled = 0;
+ lock_contention_bpf__destroy(skel);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
new file mode 100644
index 000000000..c863ae0c5
--- /dev/null
+++ b/tools/perf/util/bpf_map.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "util/bpf_map.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
+{
+ return type == BPF_MAP_TYPE_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+}
+
+static void *bpf_map__alloc_value(const struct bpf_map *map)
+{
+ if (bpf_map__is_per_cpu(bpf_map__type(map)))
+ return malloc(round_up(bpf_map__value_size(map), 8) *
+ sysconf(_SC_NPROCESSORS_CONF));
+
+ return malloc(bpf_map__value_size(map));
+}
+
+int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
+{
+ void *prev_key = NULL, *key, *value;
+ int fd = bpf_map__fd(map), err;
+ int printed = 0;
+
+ if (fd < 0)
+ return fd;
+
+ if (!map)
+ return PTR_ERR(map);
+
+ err = -ENOMEM;
+ key = malloc(bpf_map__key_size(map));
+ if (key == NULL)
+ goto out;
+
+ value = bpf_map__alloc_value(map);
+ if (value == NULL)
+ goto out_free_key;
+
+ while ((err = bpf_map_get_next_key(fd, prev_key, key) == 0)) {
+ int intkey = *(int *)key;
+
+ if (!bpf_map_lookup_elem(fd, key, value)) {
+ bool boolval = *(bool *)value;
+ if (boolval)
+ printed += fprintf(fp, "[%d] = %d,\n", intkey, boolval);
+ } else {
+ printed += fprintf(fp, "[%d] = ERROR,\n", intkey);
+ }
+
+ prev_key = key;
+ }
+
+ if (err == ENOENT)
+ err = printed;
+
+ free(value);
+out_free_key:
+ free(key);
+out:
+ return err;
+}
diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h
new file mode 100644
index 000000000..c2f7c13cb
--- /dev/null
+++ b/tools/perf/util/bpf_map.h
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#ifndef __PERF_BPF_MAP_H
+#define __PERF_BPF_MAP_H 1
+
+#include <stdio.h>
+struct bpf_map;
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+int bpf_map__fprintf(struct bpf_map *map, FILE *fp);
+
+#else
+
+#include <linux/compiler.h>
+
+static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused)
+{
+ return 0;
+}
+
+#endif // HAVE_LIBBPF_SUPPORT
+
+#endif // __PERF_BPF_MAP_H
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
new file mode 100644
index 000000000..21f4d9ba0
--- /dev/null
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/bpf_counter.h"
+#include "util/debug.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/off_cpu.h"
+#include "util/perf-hooks.h"
+#include "util/record.h"
+#include "util/session.h"
+#include "util/target.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/cgroup.h"
+#include "util/strlist.h"
+#include <bpf/bpf.h>
+
+#include "bpf_skel/off_cpu.skel.h"
+
+#define MAX_STACKS 32
+#define MAX_PROC 4096
+/* we don't need actual timestamp, just want to put the samples at last */
+#define OFF_CPU_TIMESTAMP (~0ull << 32)
+
+static struct off_cpu_bpf *skel;
+
+struct off_cpu_key {
+ u32 pid;
+ u32 tgid;
+ u32 stack_id;
+ u32 state;
+ u64 cgroup_id;
+};
+
+union off_cpu_data {
+ struct perf_event_header hdr;
+ u64 array[1024 / sizeof(u64)];
+};
+
+static int off_cpu_config(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+ char *evname = strdup(OFFCPU_EVENT);
+
+ if (evname == NULL)
+ return -ENOMEM;
+
+ evsel = evsel__new(&attr);
+ if (!evsel) {
+ free(evname);
+ return -ENOMEM;
+ }
+
+ evsel->core.attr.freq = 1;
+ evsel->core.attr.sample_period = 1;
+ /* off-cpu analysis depends on stack trace */
+ evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
+
+ evlist__add(evlist, evsel);
+
+ free(evsel->name);
+ evsel->name = evname;
+
+ return 0;
+}
+
+static void off_cpu_start(void *arg)
+{
+ struct evlist *evlist = arg;
+
+ /* update task filter for the given workload */
+ if (!skel->bss->has_cpu && !skel->bss->has_task &&
+ perf_thread_map__pid(evlist->core.threads, 0) != -1) {
+ int fd;
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ skel->bss->uses_tgid = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+ pid = perf_thread_map__pid(evlist->core.threads, 0);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+
+ skel->bss->enabled = 1;
+}
+
+static void off_cpu_finish(void *arg __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ off_cpu_bpf__destroy(skel);
+}
+
+/* v5.18 kernel added prev_state arg, so it needs to check the signature */
+static void check_sched_switch_args(void)
+{
+ const struct btf *btf = btf__load_vmlinux_btf();
+ const struct btf_type *t1, *t2, *t3;
+ u32 type_id;
+
+ type_id = btf__find_by_name_kind(btf, "btf_trace_sched_switch",
+ BTF_KIND_TYPEDEF);
+ if ((s32)type_id < 0)
+ return;
+
+ t1 = btf__type_by_id(btf, type_id);
+ if (t1 == NULL)
+ return;
+
+ t2 = btf__type_by_id(btf, t1->type);
+ if (t2 == NULL || !btf_is_ptr(t2))
+ return;
+
+ t3 = btf__type_by_id(btf, t2->type);
+ /* btf_trace func proto has one more argument for the context */
+ if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 5) {
+ /* new format: pass prev_state as 4th arg */
+ skel->rodata->has_prev_state = true;
+ }
+}
+
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+ struct record_opts *opts)
+{
+ int err, fd, i;
+ int ncpus = 1, ntasks = 1, ncgrps = 1;
+ struct strlist *pid_slist = NULL;
+ struct str_node *pos;
+
+ if (off_cpu_config(evlist) < 0) {
+ pr_err("Failed to config off-cpu BPF event\n");
+ return -1;
+ }
+
+ skel = off_cpu_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open off-cpu BPF skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (target->cpu_list) {
+ ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target->pid) {
+ pid_slist = strlist__new(target->pid, NULL);
+ if (!pid_slist) {
+ pr_err("Failed to create a strlist for pid\n");
+ return -1;
+ }
+
+ ntasks = 0;
+ strlist__for_each_entry(pos, pid_slist) {
+ char *end_ptr;
+ int pid = strtol(pos->s, &end_ptr, 10);
+
+ if (pid == INT_MIN || pid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ continue;
+
+ ntasks++;
+ }
+
+ if (ntasks < MAX_PROC)
+ ntasks = MAX_PROC;
+
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ } else if (target__has_task(target)) {
+ ntasks = perf_thread_map__nr(evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ } else if (target__none(target)) {
+ bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
+ }
+
+ if (evlist__first(evlist)->cgrp) {
+ ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */
+ bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+
+ if (!cgroup_is_v2("perf_event"))
+ skel->rodata->uses_cgroup_v1 = true;
+ }
+
+ if (opts->record_cgroup) {
+ skel->rodata->needs_cgroup = true;
+
+ if (!cgroup_is_v2("perf_event"))
+ skel->rodata->uses_cgroup_v1 = true;
+ }
+
+ set_max_rlimit();
+ check_sched_switch_args();
+
+ err = off_cpu_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load off-cpu skeleton\n");
+ goto out;
+ }
+
+ if (target->cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target->pid) {
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ skel->bss->uses_tgid = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ strlist__for_each_entry(pos, pid_slist) {
+ char *end_ptr;
+ u32 tgid;
+ int pid = strtol(pos->s, &end_ptr, 10);
+
+ if (pid == INT_MIN || pid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ continue;
+
+ tgid = pid;
+ bpf_map_update_elem(fd, &tgid, &val, BPF_ANY);
+ }
+ } else if (target__has_task(target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ if (evlist__first(evlist)->cgrp) {
+ struct evsel *evsel;
+ u8 val = 1;
+
+ skel->bss->has_cgroup = 1;
+ fd = bpf_map__fd(skel->maps.cgroup_filter);
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct cgroup *cgrp = evsel->cgrp;
+
+ if (cgrp == NULL)
+ continue;
+
+ if (!cgrp->id && read_cgroup_id(cgrp) < 0) {
+ pr_err("Failed to read cgroup id of %s\n",
+ cgrp->name);
+ goto out;
+ }
+
+ bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY);
+ }
+ }
+
+ err = off_cpu_bpf__attach(skel);
+ if (err) {
+ pr_err("Failed to attach off-cpu BPF skeleton\n");
+ goto out;
+ }
+
+ if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) ||
+ perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) {
+ pr_err("Failed to attach off-cpu skeleton\n");
+ goto out;
+ }
+
+ return 0;
+
+out:
+ off_cpu_bpf__destroy(skel);
+ return -1;
+}
+
+int off_cpu_write(struct perf_session *session)
+{
+ int bytes = 0, size;
+ int fd, stack;
+ u64 sample_type, val, sid = 0;
+ struct evsel *evsel;
+ struct perf_data_file *file = &session->data->file;
+ struct off_cpu_key prev, key;
+ union off_cpu_data data = {
+ .hdr = {
+ .type = PERF_RECORD_SAMPLE,
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ };
+ u64 tstamp = OFF_CPU_TIMESTAMP;
+
+ skel->bss->enabled = 0;
+
+ evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT);
+ if (evsel == NULL) {
+ pr_err("%s evsel not found\n", OFFCPU_EVENT);
+ return 0;
+ }
+
+ sample_type = evsel->core.attr.sample_type;
+
+ if (sample_type & ~OFFCPU_SAMPLE_TYPES) {
+ pr_err("not supported sample type: %llx\n",
+ (unsigned long long)sample_type);
+ return -1;
+ }
+
+ if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
+ if (evsel->core.id)
+ sid = evsel->core.id[0];
+ }
+
+ fd = bpf_map__fd(skel->maps.off_cpu);
+ stack = bpf_map__fd(skel->maps.stacks);
+ memset(&prev, 0, sizeof(prev));
+
+ while (!bpf_map_get_next_key(fd, &prev, &key)) {
+ int n = 1; /* start from perf_event_header */
+ int ip_pos = -1;
+
+ bpf_map_lookup_elem(fd, &key, &val);
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ data.array[n++] = sid;
+ if (sample_type & PERF_SAMPLE_IP) {
+ ip_pos = n;
+ data.array[n++] = 0; /* will be updated */
+ }
+ if (sample_type & PERF_SAMPLE_TID)
+ data.array[n++] = (u64)key.pid << 32 | key.tgid;
+ if (sample_type & PERF_SAMPLE_TIME)
+ data.array[n++] = tstamp;
+ if (sample_type & PERF_SAMPLE_ID)
+ data.array[n++] = sid;
+ if (sample_type & PERF_SAMPLE_CPU)
+ data.array[n++] = 0;
+ if (sample_type & PERF_SAMPLE_PERIOD)
+ data.array[n++] = val;
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ int len = 0;
+
+ /* data.array[n] is callchain->nr (updated later) */
+ data.array[n + 1] = PERF_CONTEXT_USER;
+ data.array[n + 2] = 0;
+
+ bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
+ while (data.array[n + 2 + len])
+ len++;
+
+ /* update length of callchain */
+ data.array[n] = len + 1;
+
+ /* update sample ip with the first callchain entry */
+ if (ip_pos >= 0)
+ data.array[ip_pos] = data.array[n + 2];
+
+ /* calculate sample callchain data array length */
+ n += len + 2;
+ }
+ if (sample_type & PERF_SAMPLE_CGROUP)
+ data.array[n++] = key.cgroup_id;
+
+ size = n * sizeof(u64);
+ data.hdr.size = size;
+ bytes += size;
+
+ if (perf_data_file__write(file, &data, size) < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return bytes;
+ }
+
+ prev = key;
+ /* increase dummy timestamp to sort later samples */
+ tstamp++;
+ }
+ return bytes;
+}
diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore
new file mode 100644
index 000000000..cd01455e1
--- /dev/null
+++ b/tools/perf/util/bpf_skel/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+.tmp
+*.skel.h
+vmlinux.h
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
new file mode 100644
index 000000000..52c270330
--- /dev/null
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
+ *
+ * This exactly matches what is marshalled into the raw_syscall:sys_enter
+ * payload expected by the 'perf trace' beautifiers.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+
+/**
+ * is_power_of_2() - check if a value is a power of two
+ * @n: the value to check
+ *
+ * Determine whether some value is a power of two, where zero is *not*
+ * considered a power of two. Return: true if @n is a power of 2, otherwise
+ * false.
+ */
+#define is_power_of_2(n) (n != 0 && ((n & (n - 1)) == 0))
+
+#define MAX_CPUS 4096
+
+// FIXME: These should come from system headers
+#ifndef bool
+typedef char bool;
+#endif
+typedef int pid_t;
+typedef long long int __s64;
+typedef __s64 time64_t;
+
+struct timespec64 {
+ time64_t tv_sec;
+ long int tv_nsec;
+};
+
+/* bpf-output associated map */
+struct __augmented_syscalls__ {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, int);
+ __type(value, __u32);
+ __uint(max_entries, MAX_CPUS);
+} __augmented_syscalls__ SEC(".maps");
+
+/*
+ * What to augment at entry?
+ *
+ * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
+ */
+struct syscalls_sys_enter {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 512);
+} syscalls_sys_enter SEC(".maps");
+
+/*
+ * What to augment at exit?
+ *
+ * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
+ */
+struct syscalls_sys_exit {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 512);
+} syscalls_sys_exit SEC(".maps");
+
+struct syscall_enter_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ unsigned long args[6];
+};
+
+struct syscall_exit_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long ret;
+};
+
+struct augmented_arg {
+ unsigned int size;
+ int err;
+ char value[PATH_MAX];
+};
+
+struct pids_filtered {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, pid_t);
+ __type(value, bool);
+ __uint(max_entries, 64);
+} pids_filtered SEC(".maps");
+
+/*
+ * Desired design of maximum size and alignment (see RFC2553)
+ */
+#define SS_MAXSIZE 128 /* Implementation specific max size */
+
+typedef unsigned short sa_family_t;
+
+/*
+ * FIXME: Should come from system headers
+ *
+ * The definition uses anonymous union and struct in order to control the
+ * default alignment.
+ */
+struct sockaddr_storage {
+ union {
+ struct {
+ sa_family_t ss_family; /* address family */
+ /* Following field(s) are implementation specific */
+ char __data[SS_MAXSIZE - sizeof(unsigned short)];
+ /* space to achieve desired size, */
+ /* _SS_MAXSIZE value minus size of ss_family */
+ };
+ void *__align; /* implementation specific desired alignment */
+ };
+};
+
+struct augmented_args_payload {
+ struct syscall_enter_args args;
+ union {
+ struct {
+ struct augmented_arg arg, arg2;
+ };
+ struct sockaddr_storage saddr;
+ char __data[sizeof(struct augmented_arg)];
+ };
+};
+
+// We need more tmp space than the BPF stack can give us
+struct augmented_args_tmp {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct augmented_args_payload);
+ __uint(max_entries, 1);
+} augmented_args_tmp SEC(".maps");
+
+static inline struct augmented_args_payload *augmented_args_payload(void)
+{
+ int key = 0;
+ return bpf_map_lookup_elem(&augmented_args_tmp, &key);
+}
+
+static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
+{
+ /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
+ return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
+}
+
+static inline
+unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
+{
+ unsigned int augmented_len = sizeof(*augmented_arg);
+ int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg);
+
+ augmented_arg->size = augmented_arg->err = 0;
+ /*
+ * probe_read_str may return < 0, e.g. -EFAULT
+ * So we leave that in the augmented_arg->size that userspace will
+ */
+ if (string_len > 0) {
+ augmented_len -= sizeof(augmented_arg->value) - string_len;
+ _Static_assert(is_power_of_2(sizeof(augmented_arg->value)), "sizeof(augmented_arg->value) needs to be a power of two");
+ augmented_len &= sizeof(augmented_arg->value) - 1;
+ augmented_arg->size = string_len;
+ } else {
+ /*
+ * So that username notice the error while still being able
+ * to skip this augmented arg record
+ */
+ augmented_arg->err = string_len;
+ augmented_len = offsetof(struct augmented_arg, value);
+ }
+
+ return augmented_len;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int syscall_unaugmented(struct syscall_enter_args *args)
+{
+ return 1;
+}
+
+/*
+ * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
+ * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
+ * on from there, reading the first syscall arg as a string, i.e. open's
+ * filename.
+ */
+SEC("tp/syscalls/sys_enter_connect")
+int sys_enter_connect(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *sockaddr_arg = (const void *)args->args[1];
+ unsigned int socklen = args->args[2];
+ unsigned int len = sizeof(augmented_args->args);
+
+ if (augmented_args == NULL)
+ return 1; /* Failure: don't filter */
+
+ _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two");
+ socklen &= sizeof(augmented_args->saddr) - 1;
+
+ bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
+
+ return augmented__output(args, augmented_args, len + socklen);
+}
+
+SEC("tp/syscalls/sys_enter_sendto")
+int sys_enter_sendto(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *sockaddr_arg = (const void *)args->args[4];
+ unsigned int socklen = args->args[5];
+ unsigned int len = sizeof(augmented_args->args);
+
+ if (augmented_args == NULL)
+ return 1; /* Failure: don't filter */
+
+ socklen &= sizeof(augmented_args->saddr) - 1;
+
+ bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg);
+
+ return augmented__output(args, augmented_args, len + socklen);
+}
+
+SEC("tp/syscalls/sys_enter_open")
+int sys_enter_open(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *filename_arg = (const void *)args->args[0];
+ unsigned int len = sizeof(augmented_args->args);
+
+ if (augmented_args == NULL)
+ return 1; /* Failure: don't filter */
+
+ len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
+
+ return augmented__output(args, augmented_args, len);
+}
+
+SEC("tp/syscalls/sys_enter_openat")
+int sys_enter_openat(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *filename_arg = (const void *)args->args[1];
+ unsigned int len = sizeof(augmented_args->args);
+
+ if (augmented_args == NULL)
+ return 1; /* Failure: don't filter */
+
+ len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
+
+ return augmented__output(args, augmented_args, len);
+}
+
+SEC("tp/syscalls/sys_enter_rename")
+int sys_enter_rename(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *oldpath_arg = (const void *)args->args[0],
+ *newpath_arg = (const void *)args->args[1];
+ unsigned int len = sizeof(augmented_args->args), oldpath_len;
+
+ if (augmented_args == NULL)
+ return 1; /* Failure: don't filter */
+
+ oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
+ len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
+
+ return augmented__output(args, augmented_args, len);
+}
+
+SEC("tp/syscalls/sys_enter_renameat")
+int sys_enter_renameat(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *oldpath_arg = (const void *)args->args[1],
+ *newpath_arg = (const void *)args->args[3];
+ unsigned int len = sizeof(augmented_args->args), oldpath_len;
+
+ if (augmented_args == NULL)
+ return 1; /* Failure: don't filter */
+
+ oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
+ len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
+
+ return augmented__output(args, augmented_args, len);
+}
+
+#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
+
+// we need just the start, get the size to then copy it
+struct perf_event_attr_size {
+ __u32 type;
+ /*
+ * Size of the attr structure, for fwd/bwd compat.
+ */
+ __u32 size;
+};
+
+SEC("tp/syscalls/sys_enter_perf_event_open")
+int sys_enter_perf_event_open(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
+ unsigned int len = sizeof(augmented_args->args);
+
+ if (augmented_args == NULL)
+ goto failure;
+
+ if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0)
+ goto failure;
+
+ attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
+
+ __u32 size = attr_read->size;
+
+ if (!size)
+ size = PERF_ATTR_SIZE_VER0;
+
+ if (size > sizeof(augmented_args->__data))
+ goto failure;
+
+ // Now that we read attr->size and tested it against the size limits, read it completely
+ if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0)
+ goto failure;
+
+ return augmented__output(args, augmented_args, len + size);
+failure:
+ return 1; /* Failure: don't filter */
+}
+
+SEC("tp/syscalls/sys_enter_clock_nanosleep")
+int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *rqtp_arg = (const void *)args->args[2];
+ unsigned int len = sizeof(augmented_args->args);
+ __u32 size = sizeof(struct timespec64);
+
+ if (augmented_args == NULL)
+ goto failure;
+
+ if (size > sizeof(augmented_args->__data))
+ goto failure;
+
+ bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg);
+
+ return augmented__output(args, augmented_args, len + size);
+failure:
+ return 1; /* Failure: don't filter */
+}
+
+static pid_t getpid(void)
+{
+ return bpf_get_current_pid_tgid();
+}
+
+static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
+{
+ return bpf_map_lookup_elem(pids, &pid) != NULL;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int sys_enter(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args;
+ /*
+ * We start len, the amount of data that will be in the perf ring
+ * buffer, if this is not filtered out by one of pid_filter__has(),
+ * syscall->enabled, etc, with the non-augmented raw syscall payload,
+ * i.e. sizeof(augmented_args->args).
+ *
+ * We'll add to this as we add augmented syscalls right after that
+ * initial, non-augmented raw_syscalls:sys_enter payload.
+ */
+
+ if (pid_filter__has(&pids_filtered, getpid()))
+ return 0;
+
+ augmented_args = augmented_args_payload();
+ if (augmented_args == NULL)
+ return 1;
+
+ bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args);
+
+ /*
+ * Jump to syscall specific augmenter, even if the default one,
+ * "!raw_syscalls:unaugmented" that will just return 1 to return the
+ * unaugmented tracepoint payload.
+ */
+ bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
+
+ // If not found on the PROG_ARRAY syscalls map, then we're filtering it:
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_exit")
+int sys_exit(struct syscall_exit_args *args)
+{
+ struct syscall_exit_args exit_args;
+
+ if (pid_filter__has(&pids_filtered, getpid()))
+ return 0;
+
+ bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args);
+ /*
+ * Jump to syscall specific return augmenter, even if the default one,
+ * "!raw_syscalls:unaugmented" that will just return 1 to return the
+ * unaugmented tracepoint payload.
+ */
+ bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
+ /*
+ * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
+ */
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c
new file mode 100644
index 000000000..2c55896bb
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2023 Red Hat
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+unsigned int nr_uprobes;
+
+SEC("uprobe")
+int BPF_UPROBE(empty)
+{
+ return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(trace_printk)
+{
+ char fmt[] = "perf bench uprobe %u";
+
+ bpf_trace_printk(fmt, sizeof(fmt), ++nr_uprobes);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
new file mode 100644
index 000000000..6a438e010
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary
+#define MAX_EVENTS 32 // max events per cgroup: arbitrary
+
+// NOTE: many of map and global data will be modified before loading
+// from the userspace (perf tool) using the skeleton helpers.
+
+// single set of global perf events to measure
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1);
+} events SEC(".maps");
+
+// from cgroup id to event index
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, 1);
+} cgrp_idx SEC(".maps");
+
+// per-cpu event snapshots to calculate delta
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} prev_readings SEC(".maps");
+
+// aggregated event values for each cgroup (per-cpu)
+// will be read from the user-space
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} cgrp_readings SEC(".maps");
+
+/* new kernel cgroup definition */
+struct cgroup___new {
+ int level;
+ struct cgroup *ancestors[];
+} __attribute__((preserve_access_index));
+
+/* old kernel cgroup definition */
+struct cgroup___old {
+ int level;
+ u64 ancestor_ids[];
+} __attribute__((preserve_access_index));
+
+const volatile __u32 num_events = 1;
+const volatile __u32 num_cpus = 1;
+
+int enabled = 0;
+int use_cgroup_v2 = 0;
+int perf_subsys_id = -1;
+
+static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
+{
+ /* recast pointer to capture new type for compiler */
+ struct cgroup___new *cgrp_new = (void *)cgrp;
+
+ if (bpf_core_field_exists(cgrp_new->ancestors)) {
+ return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
+ } else {
+ /* recast pointer to capture old type for compiler */
+ struct cgroup___old *cgrp_old = (void *)cgrp;
+
+ return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
+ }
+}
+
+static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
+{
+ struct task_struct *p = (void *)bpf_get_current_task();
+ struct cgroup *cgrp;
+ register int i = 0;
+ __u32 *elem;
+ int level;
+ int cnt;
+
+ if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+ perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+ perf_event_cgrp_id);
+#else
+ perf_subsys_id = perf_event_cgrp_id;
+#endif
+ }
+ cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup);
+ level = BPF_CORE_READ(cgrp, level);
+
+ for (cnt = 0; i < MAX_LEVELS; i++) {
+ __u64 cgrp_id;
+
+ if (i > level)
+ break;
+
+ // convert cgroup-id to a map index
+ cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
+ elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
+ if (!elem)
+ continue;
+
+ cgrps[cnt++] = *elem;
+ if (cnt == size)
+ break;
+ }
+
+ return cnt;
+}
+
+static inline int get_cgroup_v2_idx(__u32 *cgrps, int size)
+{
+ register int i = 0;
+ __u32 *elem;
+ int cnt;
+
+ for (cnt = 0; i < MAX_LEVELS; i++) {
+ __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i);
+
+ if (cgrp_id == 0)
+ break;
+
+ // convert cgroup-id to a map index
+ elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
+ if (!elem)
+ continue;
+
+ cgrps[cnt++] = *elem;
+ if (cnt == size)
+ break;
+ }
+
+ return cnt;
+}
+
+static int bperf_cgroup_count(void)
+{
+ register __u32 idx = 0; // to have it in a register to pass BPF verifier
+ register int c = 0;
+ struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val;
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u32 cgrp_idx[MAX_LEVELS];
+ int cgrp_cnt;
+ __u32 key, cgrp;
+ long err;
+
+ if (use_cgroup_v2)
+ cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS);
+ else
+ cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS);
+
+ for ( ; idx < MAX_EVENTS; idx++) {
+ if (idx == num_events)
+ break;
+
+ // XXX: do not pass idx directly (for verifier)
+ key = idx;
+ // this is per-cpu array for diff
+ prev_val = bpf_map_lookup_elem(&prev_readings, &key);
+ if (!prev_val) {
+ val.counter = val.enabled = val.running = 0;
+ bpf_map_update_elem(&prev_readings, &key, &val, BPF_ANY);
+
+ prev_val = bpf_map_lookup_elem(&prev_readings, &key);
+ if (!prev_val)
+ continue;
+ }
+
+ // read from global perf_event array
+ key = idx * num_cpus + cpu;
+ err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
+ if (err)
+ continue;
+
+ if (enabled) {
+ delta.counter = val.counter - prev_val->counter;
+ delta.enabled = val.enabled - prev_val->enabled;
+ delta.running = val.running - prev_val->running;
+
+ for (c = 0; c < MAX_LEVELS; c++) {
+ if (c == cgrp_cnt)
+ break;
+
+ cgrp = cgrp_idx[c];
+
+ // aggregate the result by cgroup
+ key = cgrp * num_events + idx;
+ cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key);
+ if (cgrp_val) {
+ cgrp_val->counter += delta.counter;
+ cgrp_val->enabled += delta.enabled;
+ cgrp_val->running += delta.running;
+ } else {
+ bpf_map_update_elem(&cgrp_readings, &key,
+ &delta, BPF_ANY);
+ }
+ }
+ }
+
+ *prev_val = val;
+ }
+ return 0;
+}
+
+// This will be attached to cgroup-switches event for each cpu
+SEC("perf_event")
+int BPF_PROG(on_cgrp_switch)
+{
+ return bperf_cgroup_count();
+}
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(trigger_read)
+{
+ return bperf_cgroup_count();
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
new file mode 100644
index 000000000..f19399853
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bperf_u.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} diff_readings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} accum_readings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} filter SEC(".maps");
+
+enum bperf_filter_type type = 0;
+int enabled = 0;
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+ struct bpf_perf_event_value *diff_val, *accum_val;
+ __u32 filter_key, zero = 0;
+ __u32 *accum_key;
+
+ if (!enabled)
+ return 0;
+
+ switch (type) {
+ case BPERF_FILTER_GLOBAL:
+ accum_key = &zero;
+ goto do_add;
+ case BPERF_FILTER_CPU:
+ filter_key = bpf_get_smp_processor_id();
+ break;
+ case BPERF_FILTER_PID:
+ filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
+ break;
+ case BPERF_FILTER_TGID:
+ filter_key = bpf_get_current_pid_tgid() >> 32;
+ break;
+ default:
+ return 0;
+ }
+
+ accum_key = bpf_map_lookup_elem(&filter, &filter_key);
+ if (!accum_key)
+ return 0;
+
+do_add:
+ diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+ if (!diff_val)
+ return 0;
+
+ accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+ if (!accum_val)
+ return 0;
+
+ accum_val->counter += diff_val->counter;
+ accum_val->enabled += diff_val->enabled;
+ accum_val->running += diff_val->running;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
new file mode 100644
index 000000000..e2a2d4cd7
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(int));
+ __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} events SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} prev_readings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} diff_readings SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(on_switch)
+{
+ struct bpf_perf_event_value val, *prev_val, *diff_val;
+ __u32 key = bpf_get_smp_processor_id();
+ __u32 zero = 0;
+ long err;
+
+ prev_val = bpf_map_lookup_elem(&prev_readings, &zero);
+ if (!prev_val)
+ return 0;
+
+ diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+ if (!diff_val)
+ return 0;
+
+ err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
+ if (err)
+ return 0;
+
+ diff_val->counter = val.counter - prev_val->counter;
+ diff_val->enabled = val.enabled - prev_val->enabled;
+ diff_val->running = val.running - prev_val->running;
+ *prev_val = val;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h
new file mode 100644
index 000000000..1ce0c2c90
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_u.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+
+#ifndef __BPERF_STAT_U_H
+#define __BPERF_STAT_U_H
+
+enum bperf_filter_type {
+ BPERF_FILTER_GLOBAL = 1,
+ BPERF_FILTER_CPU,
+ BPERF_FILTER_PID,
+ BPERF_FILTER_TGID,
+};
+
+#endif /* __BPERF_STAT_U_H */
diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
new file mode 100644
index 000000000..97037d3b3
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* map of perf event fds, num_cpu * num_metric entries */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+/* readings at fentry */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} fentry_readings SEC(".maps");
+
+/* accumulated readings */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} accum_readings SEC(".maps");
+
+const volatile __u32 num_cpu = 1;
+
+SEC("fentry/XXX")
+int BPF_PROG(fentry_XXX)
+{
+ __u32 key = bpf_get_smp_processor_id();
+ struct bpf_perf_event_value *ptr;
+ __u32 zero = 0;
+ long err;
+
+ /* look up before reading, to reduce error */
+ ptr = bpf_map_lookup_elem(&fentry_readings, &zero);
+ if (!ptr)
+ return 0;
+
+ err = bpf_perf_event_read_value(&events, key, ptr, sizeof(*ptr));
+ if (err)
+ return 0;
+
+ return 0;
+}
+
+static inline void
+fexit_update_maps(struct bpf_perf_event_value *after)
+{
+ struct bpf_perf_event_value *before, diff;
+ __u32 zero = 0;
+
+ before = bpf_map_lookup_elem(&fentry_readings, &zero);
+ /* only account samples with a valid fentry_reading */
+ if (before && before->counter) {
+ struct bpf_perf_event_value *accum;
+
+ diff.counter = after->counter - before->counter;
+ diff.enabled = after->enabled - before->enabled;
+ diff.running = after->running - before->running;
+
+ accum = bpf_map_lookup_elem(&accum_readings, &zero);
+ if (accum) {
+ accum->counter += diff.counter;
+ accum->enabled += diff.enabled;
+ accum->running += diff.running;
+ }
+ }
+}
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+ struct bpf_perf_event_value reading;
+ __u32 cpu = bpf_get_smp_processor_id();
+ int err;
+
+ /* read all events before updating the maps, to reduce error */
+ err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading));
+ if (err)
+ return 0;
+
+ fexit_update_maps(&reading);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
new file mode 100644
index 000000000..9d01e3af7
--- /dev/null
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+// This should be in sync with "util/ftrace.h"
+#define NUM_BUCKET 22
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 10000);
+} functime SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, NUM_BUCKET);
+} latency SEC(".maps");
+
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+int use_nsec = 0;
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+ __u64 key, now;
+
+ if (!enabled)
+ return 0;
+
+ key = bpf_get_current_pid_tgid();
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u32 pid = key & 0xffffffff;
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ now = bpf_ktime_get_ns();
+
+ // overwrite timestamp for nested functions
+ bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+ return 0;
+}
+
+SEC("kretprobe/func")
+int BPF_PROG(func_end)
+{
+ __u64 tid;
+ __u64 *start;
+ __u64 cmp_base = use_nsec ? 1 : 1000;
+
+ if (!enabled)
+ return 0;
+
+ tid = bpf_get_current_pid_tgid();
+
+ start = bpf_map_lookup_elem(&functime, &tid);
+ if (start) {
+ __s64 delta = bpf_ktime_get_ns() - *start;
+ __u32 key;
+ __u64 *hist;
+
+ bpf_map_delete_elem(&functime, &tid);
+
+ if (delta < 0)
+ return 0;
+
+ // calculate index using delta
+ for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ if (delta < (cmp_base << key))
+ break;
+ }
+
+ hist = bpf_map_lookup_elem(&latency, &key);
+ if (!hist)
+ return 0;
+
+ *hist += 1;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
new file mode 100644
index 000000000..063c124e0
--- /dev/null
+++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022, Huawei
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define KWORK_COUNT 100
+#define MAX_KWORKNAME 128
+
+/*
+ * This should be in sync with "util/kwork.h"
+ */
+enum kwork_class_type {
+ KWORK_CLASS_IRQ,
+ KWORK_CLASS_SOFTIRQ,
+ KWORK_CLASS_WORKQUEUE,
+ KWORK_CLASS_MAX,
+};
+
+struct work_key {
+ __u32 type;
+ __u32 cpu;
+ __u64 id;
+};
+
+struct report_data {
+ __u64 nr;
+ __u64 total_time;
+ __u64 max_time;
+ __u64 max_time_start;
+ __u64 max_time_end;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct work_key));
+ __uint(value_size, MAX_KWORKNAME);
+ __uint(max_entries, KWORK_COUNT);
+} perf_kwork_names SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct work_key));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, KWORK_COUNT);
+} perf_kwork_time SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct work_key));
+ __uint(value_size, sizeof(struct report_data));
+ __uint(max_entries, KWORK_COUNT);
+} perf_kwork_report SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} perf_kwork_cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, MAX_KWORKNAME);
+ __uint(max_entries, 1);
+} perf_kwork_name_filter SEC(".maps");
+
+int enabled = 0;
+int has_cpu_filter = 0;
+int has_name_filter = 0;
+
+static __always_inline int local_strncmp(const char *s1,
+ unsigned int sz, const char *s2)
+{
+ int ret = 0;
+ unsigned int i;
+
+ for (i = 0; i < sz; i++) {
+ ret = (unsigned char)s1[i] - (unsigned char)s2[i];
+ if (ret || !s1[i] || !s2[i])
+ break;
+ }
+
+ return ret;
+}
+
+static __always_inline int trace_event_match(struct work_key *key, char *name)
+{
+ __u8 *cpu_val;
+ char *name_val;
+ __u32 zero = 0;
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ if (!enabled)
+ return 0;
+
+ if (has_cpu_filter) {
+ cpu_val = bpf_map_lookup_elem(&perf_kwork_cpu_filter, &cpu);
+ if (!cpu_val)
+ return 0;
+ }
+
+ if (has_name_filter && (name != NULL)) {
+ name_val = bpf_map_lookup_elem(&perf_kwork_name_filter, &zero);
+ if (name_val &&
+ (local_strncmp(name_val, MAX_KWORKNAME, name) != 0)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static __always_inline void do_update_time(void *map, struct work_key *key,
+ __u64 time_start, __u64 time_end)
+{
+ struct report_data zero, *data;
+ __s64 delta = time_end - time_start;
+
+ if (delta < 0)
+ return;
+
+ data = bpf_map_lookup_elem(map, key);
+ if (!data) {
+ __builtin_memset(&zero, 0, sizeof(zero));
+ bpf_map_update_elem(map, key, &zero, BPF_NOEXIST);
+ data = bpf_map_lookup_elem(map, key);
+ if (!data)
+ return;
+ }
+
+ if ((delta > data->max_time) ||
+ (data->max_time == 0)) {
+ data->max_time = delta;
+ data->max_time_start = time_start;
+ data->max_time_end = time_end;
+ }
+
+ data->total_time += delta;
+ data->nr++;
+}
+
+static __always_inline void do_update_timestart(void *map, struct work_key *key)
+{
+ __u64 ts = bpf_ktime_get_ns();
+
+ bpf_map_update_elem(map, key, &ts, BPF_ANY);
+}
+
+static __always_inline void do_update_timeend(void *report_map, void *time_map,
+ struct work_key *key)
+{
+ __u64 *time = bpf_map_lookup_elem(time_map, key);
+
+ if (time) {
+ bpf_map_delete_elem(time_map, key);
+ do_update_time(report_map, key, *time, bpf_ktime_get_ns());
+ }
+}
+
+static __always_inline void do_update_name(void *map,
+ struct work_key *key, char *name)
+{
+ if (!bpf_map_lookup_elem(map, key))
+ bpf_map_update_elem(map, key, name, BPF_ANY);
+}
+
+static __always_inline int update_timestart(void *map, struct work_key *key)
+{
+ if (!trace_event_match(key, NULL))
+ return 0;
+
+ do_update_timestart(map, key);
+ return 0;
+}
+
+static __always_inline int update_timestart_and_name(void *time_map,
+ void *names_map,
+ struct work_key *key,
+ char *name)
+{
+ if (!trace_event_match(key, name))
+ return 0;
+
+ do_update_timestart(time_map, key);
+ do_update_name(names_map, key, name);
+
+ return 0;
+}
+
+static __always_inline int update_timeend(void *report_map,
+ void *time_map, struct work_key *key)
+{
+ if (!trace_event_match(key, NULL))
+ return 0;
+
+ do_update_timeend(report_map, time_map, key);
+
+ return 0;
+}
+
+static __always_inline int update_timeend_and_name(void *report_map,
+ void *time_map,
+ void *names_map,
+ struct work_key *key,
+ char *name)
+{
+ if (!trace_event_match(key, name))
+ return 0;
+
+ do_update_timeend(report_map, time_map, key);
+ do_update_name(names_map, key, name);
+
+ return 0;
+}
+
+SEC("tracepoint/irq/irq_handler_entry")
+int report_irq_handler_entry(struct trace_event_raw_irq_handler_entry *ctx)
+{
+ char name[MAX_KWORKNAME];
+ struct work_key key = {
+ .type = KWORK_CLASS_IRQ,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->irq,
+ };
+ void *name_addr = (void *)ctx + (ctx->__data_loc_name & 0xffff);
+
+ bpf_probe_read_kernel_str(name, sizeof(name), name_addr);
+
+ return update_timestart_and_name(&perf_kwork_time,
+ &perf_kwork_names, &key, name);
+}
+
+SEC("tracepoint/irq/irq_handler_exit")
+int report_irq_handler_exit(struct trace_event_raw_irq_handler_exit *ctx)
+{
+ struct work_key key = {
+ .type = KWORK_CLASS_IRQ,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->irq,
+ };
+
+ return update_timeend(&perf_kwork_report, &perf_kwork_time, &key);
+}
+
+static char softirq_name_list[NR_SOFTIRQS][MAX_KWORKNAME] = {
+ { "HI" },
+ { "TIMER" },
+ { "NET_TX" },
+ { "NET_RX" },
+ { "BLOCK" },
+ { "IRQ_POLL" },
+ { "TASKLET" },
+ { "SCHED" },
+ { "HRTIMER" },
+ { "RCU" },
+};
+
+SEC("tracepoint/irq/softirq_entry")
+int report_softirq_entry(struct trace_event_raw_softirq *ctx)
+{
+ unsigned int vec = ctx->vec;
+ struct work_key key = {
+ .type = KWORK_CLASS_SOFTIRQ,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)vec,
+ };
+
+ if (vec < NR_SOFTIRQS) {
+ return update_timestart_and_name(&perf_kwork_time,
+ &perf_kwork_names, &key,
+ softirq_name_list[vec]);
+ }
+
+ return 0;
+}
+
+SEC("tracepoint/irq/softirq_exit")
+int report_softirq_exit(struct trace_event_raw_softirq *ctx)
+{
+ struct work_key key = {
+ .type = KWORK_CLASS_SOFTIRQ,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->vec,
+ };
+
+ return update_timeend(&perf_kwork_report, &perf_kwork_time, &key);
+}
+
+SEC("tracepoint/irq/softirq_raise")
+int latency_softirq_raise(struct trace_event_raw_softirq *ctx)
+{
+ unsigned int vec = ctx->vec;
+ struct work_key key = {
+ .type = KWORK_CLASS_SOFTIRQ,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)vec,
+ };
+
+ if (vec < NR_SOFTIRQS) {
+ return update_timestart_and_name(&perf_kwork_time,
+ &perf_kwork_names, &key,
+ softirq_name_list[vec]);
+ }
+
+ return 0;
+}
+
+SEC("tracepoint/irq/softirq_entry")
+int latency_softirq_entry(struct trace_event_raw_softirq *ctx)
+{
+ struct work_key key = {
+ .type = KWORK_CLASS_SOFTIRQ,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->vec,
+ };
+
+ return update_timeend(&perf_kwork_report, &perf_kwork_time, &key);
+}
+
+SEC("tracepoint/workqueue/workqueue_execute_start")
+int report_workqueue_execute_start(struct trace_event_raw_workqueue_execute_start *ctx)
+{
+ struct work_key key = {
+ .type = KWORK_CLASS_WORKQUEUE,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->work,
+ };
+
+ return update_timestart(&perf_kwork_time, &key);
+}
+
+SEC("tracepoint/workqueue/workqueue_execute_end")
+int report_workqueue_execute_end(struct trace_event_raw_workqueue_execute_end *ctx)
+{
+ char name[MAX_KWORKNAME];
+ struct work_key key = {
+ .type = KWORK_CLASS_WORKQUEUE,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->work,
+ };
+ unsigned long long func_addr = (unsigned long long)ctx->function;
+
+ __builtin_memset(name, 0, sizeof(name));
+ bpf_snprintf(name, sizeof(name), "%ps", &func_addr, sizeof(func_addr));
+
+ return update_timeend_and_name(&perf_kwork_report, &perf_kwork_time,
+ &perf_kwork_names, &key, name);
+}
+
+SEC("tracepoint/workqueue/workqueue_activate_work")
+int latency_workqueue_activate_work(struct trace_event_raw_workqueue_activate_work *ctx)
+{
+ struct work_key key = {
+ .type = KWORK_CLASS_WORKQUEUE,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->work,
+ };
+
+ return update_timestart(&perf_kwork_time, &key);
+}
+
+SEC("tracepoint/workqueue/workqueue_execute_start")
+int latency_workqueue_execute_start(struct trace_event_raw_workqueue_execute_start *ctx)
+{
+ char name[MAX_KWORKNAME];
+ struct work_key key = {
+ .type = KWORK_CLASS_WORKQUEUE,
+ .cpu = bpf_get_smp_processor_id(),
+ .id = (__u64)ctx->work,
+ };
+ unsigned long long func_addr = (unsigned long long)ctx->function;
+
+ __builtin_memset(name, 0, sizeof(name));
+ bpf_snprintf(name, sizeof(name), "%ps", &func_addr, sizeof(func_addr));
+
+ return update_timeend_and_name(&perf_kwork_report, &perf_kwork_time,
+ &perf_kwork_names, &key, name);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
new file mode 100644
index 000000000..8d3cfbb3c
--- /dev/null
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <asm-generic/errno-base.h>
+
+#include "lock_data.h"
+
+/* for collect_lock_syms(). 4096 was rejected by the verifier */
+#define MAX_CPUS 1024
+
+/* lock contention flags from include/trace/events/lock.h */
+#define LCB_F_SPIN (1U << 0)
+#define LCB_F_READ (1U << 1)
+#define LCB_F_WRITE (1U << 2)
+#define LCB_F_RT (1U << 3)
+#define LCB_F_PERCPU (1U << 4)
+#define LCB_F_MUTEX (1U << 5)
+
+struct tstamp_data {
+ __u64 timestamp;
+ __u64 lock;
+ __u32 flags;
+ __s32 stack_id;
+};
+
+/* callstack storage */
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, MAX_ENTRIES);
+} stacks SEC(".maps");
+
+/* maintain timestamp at the beginning of contention */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct tstamp_data);
+ __uint(max_entries, MAX_ENTRIES);
+} tstamp SEC(".maps");
+
+/* actual lock contention statistics */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct contention_key));
+ __uint(value_size, sizeof(struct contention_data));
+ __uint(max_entries, MAX_ENTRIES);
+} lock_stat SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct contention_task_data));
+ __uint(max_entries, MAX_ENTRIES);
+} task_data SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, MAX_ENTRIES);
+} lock_syms SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} type_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} addr_filter SEC(".maps");
+
+struct rw_semaphore___old {
+ struct task_struct *owner;
+} __attribute__((preserve_access_index));
+
+struct rw_semaphore___new {
+ atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
+struct mm_struct___old {
+ struct rw_semaphore mmap_sem;
+} __attribute__((preserve_access_index));
+
+struct mm_struct___new {
+ struct rw_semaphore mmap_lock;
+} __attribute__((preserve_access_index));
+
+/* control flags */
+int enabled;
+int has_cpu;
+int has_task;
+int has_type;
+int has_addr;
+int needs_callstack;
+int stack_skip;
+int lock_owner;
+
+/* determine the key of lock stat */
+int aggr_mode;
+
+/* error stat */
+int task_fail;
+int stack_fail;
+int time_fail;
+int data_fail;
+
+int task_map_full;
+int data_map_full;
+
+static inline int can_record(u64 *ctx)
+{
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u8 *ok;
+ __u32 pid = bpf_get_current_pid_tgid();
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_type) {
+ __u8 *ok;
+ __u32 flags = (__u32)ctx[1];
+
+ ok = bpf_map_lookup_elem(&type_filter, &flags);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_addr) {
+ __u8 *ok;
+ __u64 addr = ctx[0];
+
+ ok = bpf_map_lookup_elem(&addr_filter, &addr);
+ if (!ok)
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int update_task_data(struct task_struct *task)
+{
+ struct contention_task_data *p;
+ int pid, err;
+
+ err = bpf_core_read(&pid, sizeof(pid), &task->pid);
+ if (err)
+ return -1;
+
+ p = bpf_map_lookup_elem(&task_data, &pid);
+ if (p == NULL && !task_map_full) {
+ struct contention_task_data data = {};
+
+ BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
+ if (bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST) == -E2BIG)
+ task_map_full = 1;
+ }
+
+ return 0;
+}
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags)
+{
+ struct task_struct *task;
+ __u64 owner = 0;
+
+ if (flags & LCB_F_MUTEX) {
+ struct mutex *mutex = (void *)lock;
+ owner = BPF_CORE_READ(mutex, owner.counter);
+ } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) {
+ /*
+ * Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what is needed for
+ * bpf_core_type_matches.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
+ if (bpf_core_type_matches(struct rw_semaphore___old)) {
+ struct rw_semaphore___old *rwsem = (void *)lock;
+ owner = (unsigned long)BPF_CORE_READ(rwsem, owner);
+ } else if (bpf_core_type_matches(struct rw_semaphore___new)) {
+ struct rw_semaphore___new *rwsem = (void *)lock;
+ owner = BPF_CORE_READ(rwsem, owner.counter);
+ }
+#else
+ /* assume new struct */
+ struct rw_semaphore *rwsem = (void *)lock;
+ owner = BPF_CORE_READ(rwsem, owner.counter);
+#endif
+ }
+
+ if (!owner)
+ return NULL;
+
+ task = (void *)(owner & ~7UL);
+ return task;
+}
+
+static inline __u32 check_lock_type(__u64 lock, __u32 flags)
+{
+ struct task_struct *curr;
+ struct mm_struct___old *mm_old;
+ struct mm_struct___new *mm_new;
+
+ switch (flags) {
+ case LCB_F_READ: /* rwsem */
+ case LCB_F_WRITE:
+ curr = bpf_get_current_task_btf();
+ if (curr->mm == NULL)
+ break;
+ mm_new = (void *)curr->mm;
+ if (bpf_core_field_exists(mm_new->mmap_lock)) {
+ if (&mm_new->mmap_lock == (void *)lock)
+ return LCD_F_MMAP_LOCK;
+ break;
+ }
+ mm_old = (void *)curr->mm;
+ if (bpf_core_field_exists(mm_old->mmap_sem)) {
+ if (&mm_old->mmap_sem == (void *)lock)
+ return LCD_F_MMAP_LOCK;
+ }
+ break;
+ case LCB_F_SPIN: /* spinlock */
+ curr = bpf_get_current_task_btf();
+ if (&curr->sighand->siglock == (void *)lock)
+ return LCD_F_SIGHAND_LOCK;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+SEC("tp_btf/contention_begin")
+int contention_begin(u64 *ctx)
+{
+ __u32 pid;
+ struct tstamp_data *pelem;
+
+ if (!enabled || !can_record(ctx))
+ return 0;
+
+ pid = bpf_get_current_pid_tgid();
+ pelem = bpf_map_lookup_elem(&tstamp, &pid);
+ if (pelem && pelem->lock)
+ return 0;
+
+ if (pelem == NULL) {
+ struct tstamp_data zero = {};
+
+ bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
+ pelem = bpf_map_lookup_elem(&tstamp, &pid);
+ if (pelem == NULL) {
+ __sync_fetch_and_add(&task_fail, 1);
+ return 0;
+ }
+ }
+
+ pelem->timestamp = bpf_ktime_get_ns();
+ pelem->lock = (__u64)ctx[0];
+ pelem->flags = (__u32)ctx[1];
+
+ if (needs_callstack) {
+ pelem->stack_id = bpf_get_stackid(ctx, &stacks,
+ BPF_F_FAST_STACK_CMP | stack_skip);
+ if (pelem->stack_id < 0)
+ __sync_fetch_and_add(&stack_fail, 1);
+ } else if (aggr_mode == LOCK_AGGR_TASK) {
+ struct task_struct *task;
+
+ if (lock_owner) {
+ task = get_lock_owner(pelem->lock, pelem->flags);
+
+ /* The flags is not used anymore. Pass the owner pid. */
+ if (task)
+ pelem->flags = BPF_CORE_READ(task, pid);
+ else
+ pelem->flags = -1U;
+
+ } else {
+ task = bpf_get_current_task_btf();
+ }
+
+ if (task) {
+ if (update_task_data(task) < 0 && lock_owner)
+ pelem->flags = -1U;
+ }
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/contention_end")
+int contention_end(u64 *ctx)
+{
+ __u32 pid;
+ struct tstamp_data *pelem;
+ struct contention_key key = {};
+ struct contention_data *data;
+ __u64 duration;
+
+ if (!enabled)
+ return 0;
+
+ pid = bpf_get_current_pid_tgid();
+ pelem = bpf_map_lookup_elem(&tstamp, &pid);
+ if (!pelem || pelem->lock != ctx[0])
+ return 0;
+
+ duration = bpf_ktime_get_ns() - pelem->timestamp;
+ if ((__s64)duration < 0) {
+ bpf_map_delete_elem(&tstamp, &pid);
+ __sync_fetch_and_add(&time_fail, 1);
+ return 0;
+ }
+
+ switch (aggr_mode) {
+ case LOCK_AGGR_CALLER:
+ key.stack_id = pelem->stack_id;
+ break;
+ case LOCK_AGGR_TASK:
+ if (lock_owner)
+ key.pid = pelem->flags;
+ else
+ key.pid = pid;
+ if (needs_callstack)
+ key.stack_id = pelem->stack_id;
+ break;
+ case LOCK_AGGR_ADDR:
+ key.lock_addr = pelem->lock;
+ if (needs_callstack)
+ key.stack_id = pelem->stack_id;
+ break;
+ default:
+ /* should not happen */
+ return 0;
+ }
+
+ data = bpf_map_lookup_elem(&lock_stat, &key);
+ if (!data) {
+ if (data_map_full) {
+ bpf_map_delete_elem(&tstamp, &pid);
+ __sync_fetch_and_add(&data_fail, 1);
+ return 0;
+ }
+
+ struct contention_data first = {
+ .total_time = duration,
+ .max_time = duration,
+ .min_time = duration,
+ .count = 1,
+ .flags = pelem->flags,
+ };
+ int err;
+
+ if (aggr_mode == LOCK_AGGR_ADDR)
+ first.flags |= check_lock_type(pelem->lock, pelem->flags);
+
+ err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
+ if (err < 0) {
+ if (err == -E2BIG)
+ data_map_full = 1;
+ __sync_fetch_and_add(&data_fail, 1);
+ }
+ bpf_map_delete_elem(&tstamp, &pid);
+ return 0;
+ }
+
+ __sync_fetch_and_add(&data->total_time, duration);
+ __sync_fetch_and_add(&data->count, 1);
+
+ /* FIXME: need atomic operations */
+ if (data->max_time < duration)
+ data->max_time = duration;
+ if (data->min_time > duration)
+ data->min_time = duration;
+
+ bpf_map_delete_elem(&tstamp, &pid);
+ return 0;
+}
+
+extern struct rq runqueues __ksym;
+
+struct rq___old {
+ raw_spinlock_t lock;
+} __attribute__((preserve_access_index));
+
+struct rq___new {
+ raw_spinlock_t __lock;
+} __attribute__((preserve_access_index));
+
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(collect_lock_syms)
+{
+ __u64 lock_addr, lock_off;
+ __u32 lock_flag;
+
+ if (bpf_core_field_exists(struct rq___new, __lock))
+ lock_off = offsetof(struct rq___new, __lock);
+ else
+ lock_off = offsetof(struct rq___old, lock);
+
+ for (int i = 0; i < MAX_CPUS; i++) {
+ struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
+
+ if (rq == NULL)
+ break;
+
+ lock_addr = (__u64)(void *)rq + lock_off;
+ lock_flag = LOCK_CLASS_RQLOCK;
+ bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
+ }
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
new file mode 100644
index 000000000..260062a9f
--- /dev/null
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Data structures shared between BPF and tools. */
+#ifndef UTIL_BPF_SKEL_LOCK_DATA_H
+#define UTIL_BPF_SKEL_LOCK_DATA_H
+
+struct contention_key {
+ u32 stack_id;
+ u32 pid;
+ u64 lock_addr;
+};
+
+#define TASK_COMM_LEN 16
+
+struct contention_task_data {
+ char comm[TASK_COMM_LEN];
+};
+
+/* default buffer size */
+#define MAX_ENTRIES 16384
+
+/*
+ * Upper bits of the flags in the contention_data are used to identify
+ * some well-known locks which do not have symbols (non-global locks).
+ */
+#define LCD_F_MMAP_LOCK (1U << 31)
+#define LCD_F_SIGHAND_LOCK (1U << 30)
+
+#define LCB_F_MAX_FLAGS (1U << 7)
+
+struct contention_data {
+ u64 total_time;
+ u64 min_time;
+ u64 max_time;
+ u32 count;
+ u32 flags;
+};
+
+enum lock_aggr_mode {
+ LOCK_AGGR_ADDR = 0,
+ LOCK_AGGR_TASK,
+ LOCK_AGGR_CALLER,
+};
+
+enum lock_class_sym {
+ LOCK_CLASS_NONE,
+ LOCK_CLASS_RQLOCK,
+};
+
+#endif /* UTIL_BPF_SKEL_LOCK_DATA_H */
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
new file mode 100644
index 000000000..d877a0a97
--- /dev/null
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* task->flags for off-cpu analysis */
+#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
+
+/* task->state for off-cpu analysis */
+#define TASK_INTERRUPTIBLE 0x0001
+#define TASK_UNINTERRUPTIBLE 0x0002
+
+/* create a new thread */
+#define CLONE_THREAD 0x10000
+
+#define MAX_STACKS 32
+#define MAX_ENTRIES 102400
+
+struct tstamp_data {
+ __u32 stack_id;
+ __u32 state;
+ __u64 timestamp;
+};
+
+struct offcpu_key {
+ __u32 pid;
+ __u32 tgid;
+ __u32 stack_id;
+ __u32 state;
+ __u64 cgroup_id;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, MAX_STACKS * sizeof(__u64));
+ __uint(max_entries, MAX_ENTRIES);
+} stacks SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tstamp_data);
+} tstamp SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct offcpu_key));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, MAX_ENTRIES);
+} off_cpu SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cgroup_filter SEC(".maps");
+
+/* new kernel task_struct definition */
+struct task_struct___new {
+ long __state;
+} __attribute__((preserve_access_index));
+
+/* old kernel task_struct definition */
+struct task_struct___old {
+ long state;
+} __attribute__((preserve_access_index));
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+int has_cgroup = 0;
+int uses_tgid = 0;
+
+const volatile bool has_prev_state = false;
+const volatile bool needs_cgroup = false;
+const volatile bool uses_cgroup_v1 = false;
+
+int perf_subsys_id = -1;
+
+/*
+ * Old kernel used to call it task_struct->state and now it's '__state'.
+ * Use BPF CO-RE "ignored suffix rule" to deal with it like below:
+ *
+ * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes
+ */
+static inline int get_task_state(struct task_struct *t)
+{
+ /* recast pointer to capture new type for compiler */
+ struct task_struct___new *t_new = (void *)t;
+
+ if (bpf_core_field_exists(t_new->__state)) {
+ return BPF_CORE_READ(t_new, __state);
+ } else {
+ /* recast pointer to capture old type for compiler */
+ struct task_struct___old *t_old = (void *)t;
+
+ return BPF_CORE_READ(t_old, state);
+ }
+}
+
+static inline __u64 get_cgroup_id(struct task_struct *t)
+{
+ struct cgroup *cgrp;
+
+ if (!uses_cgroup_v1)
+ return BPF_CORE_READ(t, cgroups, dfl_cgrp, kn, id);
+
+ if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+ perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+ perf_event_cgrp_id);
+#else
+ perf_subsys_id = perf_event_cgrp_id;
+#endif
+ }
+
+ cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_subsys_id], cgroup);
+ return BPF_CORE_READ(cgrp, kn, id);
+}
+
+static inline int can_record(struct task_struct *t, int state)
+{
+ /* kernel threads don't have user stack */
+ if (t->flags & PF_KTHREAD)
+ return 0;
+
+ if (state != TASK_INTERRUPTIBLE &&
+ state != TASK_UNINTERRUPTIBLE)
+ return 0;
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u8 *ok;
+ __u32 pid;
+
+ if (uses_tgid)
+ pid = t->tgid;
+ else
+ pid = t->pid;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_cgroup) {
+ __u8 *ok;
+ __u64 cgrp_id = get_cgroup_id(t);
+
+ ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id);
+ if (!ok)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
+ struct task_struct *next, int state)
+{
+ __u64 ts;
+ __u32 stack_id;
+ struct tstamp_data *pelem;
+
+ ts = bpf_ktime_get_ns();
+
+ if (!can_record(prev, state))
+ goto next;
+
+ stack_id = bpf_get_stackid(ctx, &stacks,
+ BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK);
+
+ pelem = bpf_task_storage_get(&tstamp, prev, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!pelem)
+ goto next;
+
+ pelem->timestamp = ts;
+ pelem->state = state;
+ pelem->stack_id = stack_id;
+
+next:
+ pelem = bpf_task_storage_get(&tstamp, next, NULL, 0);
+
+ if (pelem && pelem->timestamp) {
+ struct offcpu_key key = {
+ .pid = next->pid,
+ .tgid = next->tgid,
+ .stack_id = pelem->stack_id,
+ .state = pelem->state,
+ .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0,
+ };
+ __u64 delta = ts - pelem->timestamp;
+ __u64 *total;
+
+ total = bpf_map_lookup_elem(&off_cpu, &key);
+ if (total)
+ *total += delta;
+ else
+ bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
+
+ /* prevent to reuse the timestamp later */
+ pelem->timestamp = 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int on_newtask(u64 *ctx)
+{
+ struct task_struct *task;
+ u64 clone_flags;
+ u32 pid;
+ u8 val = 1;
+
+ if (!uses_tgid)
+ return 0;
+
+ task = (struct task_struct *)bpf_get_current_task();
+
+ pid = BPF_CORE_READ(task, tgid);
+ if (!bpf_map_lookup_elem(&task_filter, &pid))
+ return 0;
+
+ task = (struct task_struct *)ctx[0];
+ clone_flags = ctx[1];
+
+ pid = task->tgid;
+ if (!(clone_flags & CLONE_THREAD))
+ bpf_map_update_elem(&task_filter, &pid, &val, BPF_NOEXIST);
+
+ return 0;
+}
+
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+ struct task_struct *prev, *next;
+ int prev_state;
+
+ if (!enabled)
+ return 0;
+
+ prev = (struct task_struct *)ctx[1];
+ next = (struct task_struct *)ctx[2];
+
+ if (has_prev_state)
+ prev_state = (int)ctx[3];
+ else
+ prev_state = get_task_state(prev);
+
+ return off_cpu_stat(ctx, prev, next, prev_state & 0xff);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h
new file mode 100644
index 000000000..2e96e1ab0
--- /dev/null
+++ b/tools/perf/util/bpf_skel/sample-filter.h
@@ -0,0 +1,27 @@
+#ifndef PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
+#define PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
+
+#define MAX_FILTERS 64
+
+/* supported filter operations */
+enum perf_bpf_filter_op {
+ PBF_OP_EQ,
+ PBF_OP_NEQ,
+ PBF_OP_GT,
+ PBF_OP_GE,
+ PBF_OP_LT,
+ PBF_OP_LE,
+ PBF_OP_AND,
+ PBF_OP_GROUP_BEGIN,
+ PBF_OP_GROUP_END,
+};
+
+/* BPF map entry for filtering */
+struct perf_bpf_filter_entry {
+ enum perf_bpf_filter_op op;
+ __u32 part; /* sub-sample type info when it has multiple values */
+ __u64 flags; /* perf sample type flags */
+ __u64 value;
+};
+
+#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */ \ No newline at end of file
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
new file mode 100644
index 000000000..fb94f5280
--- /dev/null
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2023 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include "sample-filter.h"
+
+/* BPF map that will be filled by user space */
+struct filters {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct perf_bpf_filter_entry);
+ __uint(max_entries, MAX_FILTERS);
+} filters SEC(".maps");
+
+int dropped;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+/* new kernel perf_sample_data definition */
+struct perf_sample_data___new {
+ __u64 sample_flags;
+} __attribute__((preserve_access_index));
+
+/* new kernel perf_mem_data_src definition */
+union perf_mem_data_src___new {
+ __u64 val;
+ struct {
+ __u64 mem_op:5, /* type of opcode */
+ mem_lvl:14, /* memory hierarchy level */
+ mem_snoop:5, /* snoop mode */
+ mem_lock:2, /* lock instr */
+ mem_dtlb:7, /* tlb access */
+ mem_lvl_num:4, /* memory hierarchy level number */
+ mem_remote:1, /* remote */
+ mem_snoopx:2, /* snoop mode, ext */
+ mem_blk:3, /* access blocked */
+ mem_hops:3, /* hop level */
+ mem_rsvd:18;
+ };
+};
+
+/* helper function to return the given perf sample data */
+static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
+ struct perf_bpf_filter_entry *entry)
+{
+ struct perf_sample_data___new *data = (void *)kctx->data;
+
+ if (!bpf_core_field_exists(data->sample_flags) ||
+ (data->sample_flags & entry->flags) == 0)
+ return 0;
+
+ switch (entry->flags) {
+ case PERF_SAMPLE_IP:
+ return kctx->data->ip;
+ case PERF_SAMPLE_ID:
+ return kctx->data->id;
+ case PERF_SAMPLE_TID:
+ if (entry->part)
+ return kctx->data->tid_entry.pid;
+ else
+ return kctx->data->tid_entry.tid;
+ case PERF_SAMPLE_CPU:
+ return kctx->data->cpu_entry.cpu;
+ case PERF_SAMPLE_TIME:
+ return kctx->data->time;
+ case PERF_SAMPLE_ADDR:
+ return kctx->data->addr;
+ case PERF_SAMPLE_PERIOD:
+ return kctx->data->period;
+ case PERF_SAMPLE_TRANSACTION:
+ return kctx->data->txn;
+ case PERF_SAMPLE_WEIGHT_STRUCT:
+ if (entry->part == 1)
+ return kctx->data->weight.var1_dw;
+ if (entry->part == 2)
+ return kctx->data->weight.var2_w;
+ if (entry->part == 3)
+ return kctx->data->weight.var3_w;
+ /* fall through */
+ case PERF_SAMPLE_WEIGHT:
+ return kctx->data->weight.full;
+ case PERF_SAMPLE_PHYS_ADDR:
+ return kctx->data->phys_addr;
+ case PERF_SAMPLE_CODE_PAGE_SIZE:
+ return kctx->data->code_page_size;
+ case PERF_SAMPLE_DATA_PAGE_SIZE:
+ return kctx->data->data_page_size;
+ case PERF_SAMPLE_DATA_SRC:
+ if (entry->part == 1)
+ return kctx->data->data_src.mem_op;
+ if (entry->part == 2)
+ return kctx->data->data_src.mem_lvl_num;
+ if (entry->part == 3) {
+ __u32 snoop = kctx->data->data_src.mem_snoop;
+ __u32 snoopx = kctx->data->data_src.mem_snoopx;
+
+ return (snoopx << 5) | snoop;
+ }
+ if (entry->part == 4)
+ return kctx->data->data_src.mem_remote;
+ if (entry->part == 5)
+ return kctx->data->data_src.mem_lock;
+ if (entry->part == 6)
+ return kctx->data->data_src.mem_dtlb;
+ if (entry->part == 7)
+ return kctx->data->data_src.mem_blk;
+ if (entry->part == 8) {
+ union perf_mem_data_src___new *data = (void *)&kctx->data->data_src;
+
+ if (bpf_core_field_exists(data->mem_hops))
+ return data->mem_hops;
+
+ return 0;
+ }
+ /* return the whole word */
+ return kctx->data->data_src.val;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#define CHECK_RESULT(data, op, val) \
+ if (!(data op val)) { \
+ if (!in_group) \
+ goto drop; \
+ } else if (in_group) { \
+ group_result = 1; \
+ }
+
+/* BPF program to be called from perf event overflow handler */
+SEC("perf_event")
+int perf_sample_filter(void *ctx)
+{
+ struct bpf_perf_event_data_kern *kctx;
+ struct perf_bpf_filter_entry *entry;
+ __u64 sample_data;
+ int in_group = 0;
+ int group_result = 0;
+ int i;
+
+ kctx = bpf_cast_to_kern_ctx(ctx);
+
+ for (i = 0; i < MAX_FILTERS; i++) {
+ int key = i; /* needed for verifier :( */
+
+ entry = bpf_map_lookup_elem(&filters, &key);
+ if (entry == NULL)
+ break;
+ sample_data = perf_get_sample(kctx, entry);
+
+ switch (entry->op) {
+ case PBF_OP_EQ:
+ CHECK_RESULT(sample_data, ==, entry->value)
+ break;
+ case PBF_OP_NEQ:
+ CHECK_RESULT(sample_data, !=, entry->value)
+ break;
+ case PBF_OP_GT:
+ CHECK_RESULT(sample_data, >, entry->value)
+ break;
+ case PBF_OP_GE:
+ CHECK_RESULT(sample_data, >=, entry->value)
+ break;
+ case PBF_OP_LT:
+ CHECK_RESULT(sample_data, <, entry->value)
+ break;
+ case PBF_OP_LE:
+ CHECK_RESULT(sample_data, <=, entry->value)
+ break;
+ case PBF_OP_AND:
+ CHECK_RESULT(sample_data, &, entry->value)
+ break;
+ case PBF_OP_GROUP_BEGIN:
+ in_group = 1;
+ group_result = 0;
+ break;
+ case PBF_OP_GROUP_END:
+ if (group_result == 0)
+ goto drop;
+ in_group = 0;
+ break;
+ }
+ }
+ /* generate sample data */
+ return 1;
+
+drop:
+ __sync_fetch_and_add(&dropped, 1);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/vmlinux/.gitignore b/tools/perf/util/bpf_skel/vmlinux/.gitignore
new file mode 100644
index 000000000..49502c041
--- /dev/null
+++ b/tools/perf/util/bpf_skel/vmlinux/.gitignore
@@ -0,0 +1 @@
+!vmlinux.h
diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
new file mode 100644
index 000000000..ab84a6e1d
--- /dev/null
+++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
@@ -0,0 +1,184 @@
+#ifndef __VMLINUX_H
+#define __VMLINUX_H
+
+#include <linux/stddef.h> // for define __always_inline
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <stdbool.h>
+
+// non-UAPI kernel data structures, used in the .bpf.c BPF tool component.
+
+// Just the fields used in these tools preserving the access index so that
+// libbpf can fixup offsets with the ones used in the kernel when loading the
+// BPF bytecode, if they differ from what is used here.
+
+typedef __u8 u8;
+typedef __u32 u32;
+typedef __u64 u64;
+typedef __s64 s64;
+
+typedef int pid_t;
+
+enum cgroup_subsys_id {
+ perf_event_cgrp_id = 8,
+};
+
+enum {
+ HI_SOFTIRQ = 0,
+ TIMER_SOFTIRQ,
+ NET_TX_SOFTIRQ,
+ NET_RX_SOFTIRQ,
+ BLOCK_SOFTIRQ,
+ IRQ_POLL_SOFTIRQ,
+ TASKLET_SOFTIRQ,
+ SCHED_SOFTIRQ,
+ HRTIMER_SOFTIRQ,
+ RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
+
+ NR_SOFTIRQS
+};
+
+typedef struct {
+ s64 counter;
+} __attribute__((preserve_access_index)) atomic64_t;
+
+typedef atomic64_t atomic_long_t;
+
+struct raw_spinlock {
+ int rawlock;
+} __attribute__((preserve_access_index));
+
+typedef struct raw_spinlock raw_spinlock_t;
+
+typedef struct {
+ struct raw_spinlock rlock;
+} __attribute__((preserve_access_index)) spinlock_t;
+
+struct sighand_struct {
+ spinlock_t siglock;
+} __attribute__((preserve_access_index));
+
+struct rw_semaphore {
+ atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
+struct mutex {
+ atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
+struct kernfs_node {
+ u64 id;
+} __attribute__((preserve_access_index));
+
+struct cgroup {
+ struct kernfs_node *kn;
+ int level;
+} __attribute__((preserve_access_index));
+
+struct cgroup_subsys_state {
+ struct cgroup *cgroup;
+} __attribute__((preserve_access_index));
+
+struct css_set {
+ struct cgroup_subsys_state *subsys[13];
+ struct cgroup *dfl_cgrp;
+} __attribute__((preserve_access_index));
+
+struct mm_struct {
+ struct rw_semaphore mmap_lock;
+} __attribute__((preserve_access_index));
+
+struct task_struct {
+ unsigned int flags;
+ struct mm_struct *mm;
+ pid_t pid;
+ pid_t tgid;
+ char comm[16];
+ struct sighand_struct *sighand;
+ struct css_set *cgroups;
+} __attribute__((preserve_access_index));
+
+struct trace_entry {
+ short unsigned int type;
+ unsigned char flags;
+ unsigned char preempt_count;
+ int pid;
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_irq_handler_entry {
+ struct trace_entry ent;
+ int irq;
+ u32 __data_loc_name;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_irq_handler_exit {
+ struct trace_entry ent;
+ int irq;
+ int ret;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_softirq {
+ struct trace_entry ent;
+ unsigned int vec;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_execute_start {
+ struct trace_entry ent;
+ void *work;
+ void *function;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_execute_end {
+ struct trace_entry ent;
+ void *work;
+ void *function;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_activate_work {
+ struct trace_entry ent;
+ void *work;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct perf_sample_data {
+ u64 addr;
+ u64 period;
+ union perf_sample_weight weight;
+ u64 txn;
+ union perf_mem_data_src data_src;
+ u64 ip;
+ struct {
+ u32 pid;
+ u32 tid;
+ } tid_entry;
+ u64 time;
+ u64 id;
+ struct {
+ u32 cpu;
+ } cpu_entry;
+ u64 phys_addr;
+ u64 data_page_size;
+ u64 code_page_size;
+} __attribute__((__aligned__(64))) __attribute__((preserve_access_index));
+
+struct bpf_perf_event_data_kern {
+ struct perf_sample_data *data;
+ struct perf_event *event;
+} __attribute__((preserve_access_index));
+
+/*
+ * If 'struct rq' isn't defined for lock_contention.bpf.c, for the sake of
+ * rq___old and rq___new, then the type for the 'runqueue' variable ends up
+ * being a forward declaration (BTF_KIND_FWD) while the kernel has it defined
+ * (BTF_KIND_STRUCT). The definition appears in vmlinux.h rather than
+ * lock_contention.bpf.c for consistency with a generated vmlinux.h.
+ */
+struct rq {};
+
+#endif // __VMLINUX_H
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
new file mode 100644
index 000000000..378f16a24
--- /dev/null
+++ b/tools/perf/util/branch.c
@@ -0,0 +1,229 @@
+#include "util/map_symbol.h"
+#include "util/branch.h"
+#include <linux/kernel.h>
+
+static bool cross_area(u64 addr1, u64 addr2, int size)
+{
+ u64 align1, align2;
+
+ align1 = addr1 & ~(size - 1);
+ align2 = addr2 & ~(size - 1);
+
+ return (align1 != align2) ? true : false;
+}
+
+#define AREA_4K 4096
+#define AREA_2M (2 * 1024 * 1024)
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+ u64 from, u64 to)
+{
+ if (flags->type == PERF_BR_UNKNOWN || from == 0)
+ return;
+
+ if (flags->type == PERF_BR_EXTEND_ABI)
+ st->new_counts[flags->new_type]++;
+ else
+ st->counts[flags->type]++;
+
+ if (flags->type == PERF_BR_COND) {
+ if (to > from)
+ st->cond_fwd++;
+ else
+ st->cond_bwd++;
+ }
+
+ if (cross_area(from, to, AREA_2M))
+ st->cross_2m++;
+ else if (cross_area(from, to, AREA_4K))
+ st->cross_4k++;
+}
+
+const char *branch_new_type_name(int new_type)
+{
+ const char *branch_new_names[PERF_BR_NEW_MAX] = {
+ "FAULT_ALGN",
+ "FAULT_DATA",
+ "FAULT_INST",
+/*
+ * TODO: This switch should happen on 'session->header.env.arch'
+ * instead, because an arm64 platform perf recording could be
+ * opened for analysis on other platforms as well.
+ */
+#ifdef __aarch64__
+ "ARM64_FIQ",
+ "ARM64_DEBUG_HALT",
+ "ARM64_DEBUG_EXIT",
+ "ARM64_DEBUG_INST",
+ "ARM64_DEBUG_DATA"
+#else
+ "ARCH_1",
+ "ARCH_2",
+ "ARCH_3",
+ "ARCH_4",
+ "ARCH_5"
+#endif
+ };
+
+ if (new_type >= 0 && new_type < PERF_BR_NEW_MAX)
+ return branch_new_names[new_type];
+
+ return NULL;
+}
+
+const char *branch_type_name(int type)
+{
+ const char *branch_names[PERF_BR_MAX] = {
+ "N/A",
+ "COND",
+ "UNCOND",
+ "IND",
+ "CALL",
+ "IND_CALL",
+ "RET",
+ "SYSCALL",
+ "SYSRET",
+ "COND_CALL",
+ "COND_RET",
+ "ERET",
+ "IRQ",
+ "SERROR",
+ "NO_TX",
+ "", // Needed for PERF_BR_EXTEND_ABI that ends up triggering some compiler warnings about NULL deref
+ };
+
+ if (type >= 0 && type < PERF_BR_MAX)
+ return branch_names[type];
+
+ return NULL;
+}
+
+const char *get_branch_type(struct branch_entry *e)
+{
+ if (e->flags.type == PERF_BR_UNKNOWN)
+ return "";
+
+ if (e->flags.type == PERF_BR_EXTEND_ABI)
+ return branch_new_type_name(e->flags.new_type);
+
+ return branch_type_name(e->flags.type);
+}
+
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
+{
+ u64 total = 0;
+ int i;
+
+ for (i = 0; i < PERF_BR_MAX; i++)
+ total += st->counts[i];
+
+ if (total == 0)
+ return;
+
+ fprintf(fp, "\n#");
+ fprintf(fp, "\n# Branch Statistics:");
+ fprintf(fp, "\n#");
+
+ if (st->cond_fwd > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "COND_FWD",
+ 100.0 * (double)st->cond_fwd / (double)total);
+ }
+
+ if (st->cond_bwd > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "COND_BWD",
+ 100.0 * (double)st->cond_bwd / (double)total);
+ }
+
+ if (st->cross_4k > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "CROSS_4K",
+ 100.0 * (double)st->cross_4k / (double)total);
+ }
+
+ if (st->cross_2m > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "CROSS_2M",
+ 100.0 * (double)st->cross_2m / (double)total);
+ }
+
+ for (i = 0; i < PERF_BR_MAX; i++) {
+ if (st->counts[i] > 0)
+ fprintf(fp, "\n%8s: %5.1f%%",
+ branch_type_name(i),
+ 100.0 *
+ (double)st->counts[i] / (double)total);
+ }
+
+ for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+ if (st->new_counts[i] > 0)
+ fprintf(fp, "\n%8s: %5.1f%%",
+ branch_new_type_name(i),
+ 100.0 *
+ (double)st->new_counts[i] / (double)total);
+ }
+
+}
+
+static int count_str_scnprintf(int idx, const char *str, char *bf, int size)
+{
+ return scnprintf(bf, size, "%s%s", (idx) ? " " : " (", str);
+}
+
+int branch_type_str(struct branch_type_stat *st, char *bf, int size)
+{
+ int i, j = 0, printed = 0;
+ u64 total = 0;
+
+ for (i = 0; i < PERF_BR_MAX; i++)
+ total += st->counts[i];
+
+ for (i = 0; i < PERF_BR_NEW_MAX; i++)
+ total += st->new_counts[i];
+
+ if (total == 0)
+ return 0;
+
+ if (st->cond_fwd > 0)
+ printed += count_str_scnprintf(j++, "COND_FWD", bf + printed, size - printed);
+
+ if (st->cond_bwd > 0)
+ printed += count_str_scnprintf(j++, "COND_BWD", bf + printed, size - printed);
+
+ for (i = 0; i < PERF_BR_MAX; i++) {
+ if (i == PERF_BR_COND)
+ continue;
+
+ if (st->counts[i] > 0)
+ printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed);
+ }
+
+ for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+ if (st->new_counts[i] > 0)
+ printed += count_str_scnprintf(j++, branch_new_type_name(i), bf + printed, size - printed);
+ }
+
+ if (st->cross_4k > 0)
+ printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed);
+
+ if (st->cross_2m > 0)
+ printed += count_str_scnprintf(j++, "CROSS_2M", bf + printed, size - printed);
+
+ return printed;
+}
+
+const char *branch_spec_desc(int spec)
+{
+ const char *branch_spec_outcomes[PERF_BR_SPEC_MAX] = {
+ "N/A",
+ "SPEC_WRONG_PATH",
+ "NON_SPEC_CORRECT_PATH",
+ "SPEC_CORRECT_PATH",
+ };
+
+ if (spec >= 0 && spec < PERF_BR_SPEC_MAX)
+ return branch_spec_outcomes[spec];
+
+ return NULL;
+}
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
new file mode 100644
index 000000000..e41bfffe2
--- /dev/null
+++ b/tools/perf/util/branch.h
@@ -0,0 +1,94 @@
+#ifndef _PERF_BRANCH_H
+#define _PERF_BRANCH_H 1
+/*
+ * The linux/stddef.h isn't need here, but is needed for __always_inline used
+ * in files included from uapi/linux/perf_event.h such as
+ * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
+ * detected in at least musl libc, used in Alpine Linux. -acme
+ */
+#include <stdio.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include "util/map_symbol.h"
+#include "util/sample.h"
+
+struct branch_flags {
+ union {
+ u64 value;
+ struct {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 in_tx:1;
+ u64 abort:1;
+ u64 cycles:16;
+ u64 type:4;
+ u64 spec:2;
+ u64 new_type:4;
+ u64 priv:3;
+ u64 reserved:31;
+ };
+ };
+};
+
+struct branch_info {
+ struct addr_map_symbol from;
+ struct addr_map_symbol to;
+ struct branch_flags flags;
+ char *srcline_from;
+ char *srcline_to;
+};
+
+struct branch_entry {
+ u64 from;
+ u64 to;
+ struct branch_flags flags;
+};
+
+struct branch_stack {
+ u64 nr;
+ u64 hw_idx;
+ struct branch_entry entries[];
+};
+
+/*
+ * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
+ * Otherwise, the output format of a sample with branch stack is
+ * struct branch_stack {
+ * u64 nr;
+ * struct branch_entry entries[0];
+ * }
+ * Check whether the hw_idx is available,
+ * and return the corresponding pointer of entries[0].
+ */
+static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
+{
+ u64 *entry = (u64 *)sample->branch_stack;
+
+ entry++;
+ if (sample->no_hw_idx)
+ return (struct branch_entry *)entry;
+ return (struct branch_entry *)(++entry);
+}
+
+struct branch_type_stat {
+ bool branch_to;
+ u64 counts[PERF_BR_MAX];
+ u64 new_counts[PERF_BR_NEW_MAX];
+ u64 cond_fwd;
+ u64 cond_bwd;
+ u64 cross_4k;
+ u64 cross_2m;
+};
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+ u64 from, u64 to);
+
+const char *branch_type_name(int type);
+const char *branch_new_type_name(int new_type);
+const char *get_branch_type(struct branch_entry *e);
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
+int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
+
+const char *branch_spec_desc(int spec);
+
+#endif /* _PERF_BRANCH_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
new file mode 100644
index 000000000..03c64b853
--- /dev/null
+++ b/tools/perf/util/build-id.c
@@ -0,0 +1,1022 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * build-id.c
+ *
+ * build-id support
+ *
+ * Copyright (C) 2009, 2010 Red Hat Inc.
+ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "util.h" // lsdir(), mkdir_p(), rm_rf()
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include "util/copyfile.h"
+#include "dso.h"
+#include "build-id.h"
+#include "event.h"
+#include "namespaces.h"
+#include "map.h"
+#include "symbol.h"
+#include "thread.h"
+#include <linux/kernel.h>
+#include "debug.h"
+#include "session.h"
+#include "tool.h"
+#include "header.h"
+#include "vdso.h"
+#include "path.h"
+#include "probe-file.h"
+#include "strlist.h"
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+#include <linux/string.h>
+#include <asm/bug.h>
+
+static bool no_buildid_cache;
+
+int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct addr_location al;
+ struct thread *thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+
+ if (thread == NULL) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ addr_location__init(&al);
+ if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
+ map__dso(al.map)->hit = 1;
+
+ addr_location__exit(&al);
+ thread__put(thread);
+ return 0;
+}
+
+static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample
+ __maybe_unused,
+ struct machine *machine)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
+
+ if (thread) {
+ machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
+
+ return 0;
+}
+
+struct perf_tool build_id__mark_dso_hit_ops = {
+ .sample = build_id__mark_dso_hit,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .fork = perf_event__process_fork,
+ .exit = perf_event__exit_del_thread,
+ .attr = perf_event__process_attr,
+ .build_id = perf_event__process_build_id,
+ .ordered_events = true,
+};
+
+int build_id__sprintf(const struct build_id *build_id, char *bf)
+{
+ char *bid = bf;
+ const u8 *raw = build_id->data;
+ size_t i;
+
+ bf[0] = 0x0;
+
+ for (i = 0; i < build_id->size; ++i) {
+ sprintf(bid, "%02x", *raw);
+ ++raw;
+ bid += 2;
+ }
+
+ return (bid - bf) + 1;
+}
+
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
+{
+ char notes[PATH_MAX];
+ struct build_id bid;
+ int ret;
+
+ if (!root_dir)
+ root_dir = "";
+
+ scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+ ret = sysfs__read_build_id(notes, &bid);
+ if (ret < 0)
+ return ret;
+
+ return build_id__sprintf(&bid, sbuild_id);
+}
+
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
+{
+ struct build_id bid;
+ int ret;
+
+ ret = filename__read_build_id(pathname, &bid);
+ if (ret < 0)
+ return ret;
+
+ return build_id__sprintf(&bid, sbuild_id);
+}
+
+/* asnprintf consolidates asprintf and snprintf */
+static int asnprintf(char **strp, size_t size, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ if (!strp)
+ return -EINVAL;
+
+ va_start(ap, fmt);
+ if (*strp)
+ ret = vsnprintf(*strp, size, fmt, ap);
+ else
+ ret = vasprintf(strp, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size)
+{
+ bool retry_old = true;
+
+ snprintf(bf, size, "%s/%s/%s/kallsyms",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+ if (!access(bf, F_OK))
+ return bf;
+ if (retry_old) {
+ /* Try old style kallsyms cache */
+ snprintf(bf, size, "%s/%s/%s",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+ retry_old = false;
+ goto retry;
+ }
+
+ return NULL;
+}
+
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size)
+{
+ char *tmp = bf;
+ int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
+ sbuild_id, sbuild_id + 2);
+ if (ret < 0 || (tmp && size < (unsigned int)ret))
+ return NULL;
+ return bf;
+}
+
+/* The caller is responsible to free the returned buffer. */
+char *build_id_cache__origname(const char *sbuild_id)
+{
+ char *linkname;
+ char buf[PATH_MAX];
+ char *ret = NULL, *p;
+ size_t offs = 5; /* == strlen("../..") */
+ ssize_t len;
+
+ linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+ if (!linkname)
+ return NULL;
+
+ len = readlink(linkname, buf, sizeof(buf) - 1);
+ if (len <= 0)
+ goto out;
+ buf[len] = '\0';
+
+ /* The link should be "../..<origpath>/<sbuild_id>" */
+ p = strrchr(buf, '/'); /* Cut off the "/<sbuild_id>" */
+ if (p && (p > buf + offs)) {
+ *p = '\0';
+ if (buf[offs + 1] == '[')
+ offs++; /*
+ * This is a DSO name, like [kernel.kallsyms].
+ * Skip the first '/', since this is not the
+ * cache of a regular file.
+ */
+ ret = strdup(buf + offs); /* Skip "../..[/]" */
+ }
+out:
+ free(linkname);
+ return ret;
+}
+
+/* Check if the given build_id cache is valid on current running system */
+static bool build_id_cache__valid_id(char *sbuild_id)
+{
+ char real_sbuild_id[SBUILD_ID_SIZE] = "";
+ char *pathname;
+ int ret = 0;
+ bool result = false;
+
+ pathname = build_id_cache__origname(sbuild_id);
+ if (!pathname)
+ return false;
+
+ if (!strcmp(pathname, DSO__NAME_KALLSYMS))
+ ret = sysfs__sprintf_build_id("/", real_sbuild_id);
+ else if (pathname[0] == '/')
+ ret = filename__sprintf_build_id(pathname, real_sbuild_id);
+ else
+ ret = -EINVAL; /* Should we support other special DSO cache? */
+ if (ret >= 0)
+ result = (strcmp(sbuild_id, real_sbuild_id) == 0);
+ free(pathname);
+
+ return result;
+}
+
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso,
+ bool is_debug)
+{
+ return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : (is_debug ?
+ "debug" : "elf"));
+}
+
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+ bool is_debug, bool is_kallsyms)
+{
+ bool is_vdso = dso__is_vdso((struct dso *)dso);
+ char sbuild_id[SBUILD_ID_SIZE];
+ char *linkname;
+ bool alloc = (bf == NULL);
+ int ret;
+
+ if (!dso->has_build_id)
+ return NULL;
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+ linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+ if (!linkname)
+ return NULL;
+
+ /* Check if old style build_id cache */
+ if (is_regular_file(linkname))
+ ret = asnprintf(&bf, size, "%s", linkname);
+ else
+ ret = asnprintf(&bf, size, "%s/%s", linkname,
+ build_id_cache__basename(is_kallsyms, is_vdso,
+ is_debug));
+ if (ret < 0 || (!alloc && size < (unsigned int)ret))
+ bf = NULL;
+ free(linkname);
+
+ return bf;
+}
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+ bool is_debug)
+{
+ bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+
+ return __dso__build_id_filename(dso, bf, size, is_debug, is_kallsyms);
+}
+
+static int write_buildid(const char *name, size_t name_len, struct build_id *bid,
+ pid_t pid, u16 misc, struct feat_fd *fd)
+{
+ int err;
+ struct perf_record_header_build_id b;
+ size_t len;
+
+ len = name_len + 1;
+ len = PERF_ALIGN(len, NAME_ALIGN);
+
+ memset(&b, 0, sizeof(b));
+ memcpy(&b.data, bid->data, bid->size);
+ b.size = (u8) bid->size;
+ misc |= PERF_RECORD_MISC_BUILD_ID_SIZE;
+ b.pid = pid;
+ b.header.misc = misc;
+ b.header.size = sizeof(b) + len;
+
+ err = do_write(fd, &b, sizeof(b));
+ if (err < 0)
+ return err;
+
+ return write_padded(fd, name, name_len + 1, len);
+}
+
+static int machine__write_buildid_table(struct machine *machine,
+ struct feat_fd *fd)
+{
+ int err = 0;
+ struct dso *pos;
+ u16 kmisc = PERF_RECORD_MISC_KERNEL,
+ umisc = PERF_RECORD_MISC_USER;
+
+ if (!machine__is_host(machine)) {
+ kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+ umisc = PERF_RECORD_MISC_GUEST_USER;
+ }
+
+ dsos__for_each_with_build_id(pos, &machine->dsos.head) {
+ const char *name;
+ size_t name_len;
+ bool in_kernel = false;
+
+ if (!pos->hit && !dso__is_vdso(pos))
+ continue;
+
+ if (dso__is_vdso(pos)) {
+ name = pos->short_name;
+ name_len = pos->short_name_len;
+ } else if (dso__is_kcore(pos)) {
+ name = machine->mmap_name;
+ name_len = strlen(name);
+ } else {
+ name = pos->long_name;
+ name_len = pos->long_name_len;
+ }
+
+ in_kernel = pos->kernel ||
+ is_kernel_module(name,
+ PERF_RECORD_MISC_CPUMODE_UNKNOWN);
+ err = write_buildid(name, name_len, &pos->bid, machine->pid,
+ in_kernel ? kmisc : umisc, fd);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int perf_session__write_buildid_table(struct perf_session *session,
+ struct feat_fd *fd)
+{
+ struct rb_node *nd;
+ int err = machine__write_buildid_table(&session->machines.host, fd);
+
+ if (err)
+ return err;
+
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ err = machine__write_buildid_table(pos, fd);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static int __dsos__hit_all(struct list_head *head)
+{
+ struct dso *pos;
+
+ list_for_each_entry(pos, head, node)
+ pos->hit = true;
+
+ return 0;
+}
+
+static int machine__hit_all_dsos(struct machine *machine)
+{
+ return __dsos__hit_all(&machine->dsos.head);
+}
+
+int dsos__hit_all(struct perf_session *session)
+{
+ struct rb_node *nd;
+ int err;
+
+ err = machine__hit_all_dsos(&session->machines.host);
+ if (err)
+ return err;
+
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+
+ err = machine__hit_all_dsos(pos);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+void disable_buildid_cache(void)
+{
+ no_buildid_cache = true;
+}
+
+static bool lsdir_bid_head_filter(const char *name __maybe_unused,
+ struct dirent *d)
+{
+ return (strlen(d->d_name) == 2) &&
+ isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]);
+}
+
+static bool lsdir_bid_tail_filter(const char *name __maybe_unused,
+ struct dirent *d)
+{
+ int i = 0;
+ while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3)
+ i++;
+ return (i >= SBUILD_ID_MIN_SIZE - 3) && (i <= SBUILD_ID_SIZE - 3) &&
+ (d->d_name[i] == '\0');
+}
+
+struct strlist *build_id_cache__list_all(bool validonly)
+{
+ struct strlist *toplist, *linklist = NULL, *bidlist;
+ struct str_node *nd, *nd2;
+ char *topdir, *linkdir = NULL;
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ /* for filename__ functions */
+ if (validonly)
+ symbol__init(NULL);
+
+ /* Open the top-level directory */
+ if (asprintf(&topdir, "%s/.build-id/", buildid_dir) < 0)
+ return NULL;
+
+ bidlist = strlist__new(NULL, NULL);
+ if (!bidlist)
+ goto out;
+
+ toplist = lsdir(topdir, lsdir_bid_head_filter);
+ if (!toplist) {
+ pr_debug("Error in lsdir(%s): %d\n", topdir, errno);
+ /* If there is no buildid cache, return an empty list */
+ if (errno == ENOENT)
+ goto out;
+ goto err_out;
+ }
+
+ strlist__for_each_entry(nd, toplist) {
+ if (asprintf(&linkdir, "%s/%s", topdir, nd->s) < 0)
+ goto err_out;
+ /* Open the lower-level directory */
+ linklist = lsdir(linkdir, lsdir_bid_tail_filter);
+ if (!linklist) {
+ pr_debug("Error in lsdir(%s): %d\n", linkdir, errno);
+ goto err_out;
+ }
+ strlist__for_each_entry(nd2, linklist) {
+ if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s",
+ nd->s, nd2->s) > SBUILD_ID_SIZE - 1)
+ goto err_out;
+ if (validonly && !build_id_cache__valid_id(sbuild_id))
+ continue;
+ if (strlist__add(bidlist, sbuild_id) < 0)
+ goto err_out;
+ }
+ strlist__delete(linklist);
+ zfree(&linkdir);
+ }
+
+out_free:
+ strlist__delete(toplist);
+out:
+ free(topdir);
+
+ return bidlist;
+
+err_out:
+ strlist__delete(linklist);
+ zfree(&linkdir);
+ strlist__delete(bidlist);
+ bidlist = NULL;
+ goto out_free;
+}
+
+static bool str_is_build_id(const char *maybe_sbuild_id, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (!isxdigit(maybe_sbuild_id[i]))
+ return false;
+ }
+ return true;
+}
+
+/* Return the valid complete build-id */
+char *build_id_cache__complement(const char *incomplete_sbuild_id)
+{
+ struct strlist *bidlist;
+ struct str_node *nd, *cand = NULL;
+ char *sbuild_id = NULL;
+ size_t len = strlen(incomplete_sbuild_id);
+
+ if (len >= SBUILD_ID_SIZE ||
+ !str_is_build_id(incomplete_sbuild_id, len))
+ return NULL;
+
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist)
+ return NULL;
+
+ strlist__for_each_entry(nd, bidlist) {
+ if (strncmp(nd->s, incomplete_sbuild_id, len) != 0)
+ continue;
+ if (cand) { /* Error: There are more than 2 candidates. */
+ cand = NULL;
+ break;
+ }
+ cand = nd;
+ }
+ if (cand)
+ sbuild_id = strdup(cand->s);
+ strlist__delete(bidlist);
+
+ return sbuild_id;
+}
+
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+ struct nsinfo *nsi, bool is_kallsyms,
+ bool is_vdso)
+{
+ char *realname = NULL, *filename;
+ bool slash = is_kallsyms || is_vdso;
+
+ if (!slash)
+ realname = nsinfo__realpath(name, nsi);
+
+ if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+ is_vdso ? DSO__NAME_VDSO : (realname ? realname : name),
+ sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
+ filename = NULL;
+
+ free(realname);
+ return filename;
+}
+
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+ struct strlist **result)
+{
+ char *dir_name;
+ int ret = 0;
+
+ dir_name = build_id_cache__cachedir(NULL, pathname, nsi, false, false);
+ if (!dir_name)
+ return -ENOMEM;
+
+ *result = lsdir(dir_name, lsdir_no_dot_filter);
+ if (!*result)
+ ret = -errno;
+ free(dir_name);
+
+ return ret;
+}
+
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_GELF_GETNOTE_SUPPORT)
+static int build_id_cache__add_sdt_cache(const char *sbuild_id,
+ const char *realname,
+ struct nsinfo *nsi)
+{
+ struct probe_cache *cache;
+ int ret;
+ struct nscookie nsc;
+
+ cache = probe_cache__new(sbuild_id, nsi);
+ if (!cache)
+ return -1;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = probe_cache__scan_sdt(cache, realname);
+ nsinfo__mountns_exit(&nsc);
+ if (ret >= 0) {
+ pr_debug4("Found %d SDTs in %s\n", ret, realname);
+ if (probe_cache__commit(cache) < 0)
+ ret = -1;
+ }
+ probe_cache__delete(cache);
+ return ret;
+}
+#else
+#define build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) (0)
+#endif
+
+static char *build_id_cache__find_debug(const char *sbuild_id,
+ struct nsinfo *nsi,
+ const char *root_dir)
+{
+ const char *dirname = "/usr/lib/debug/.build-id/";
+ char *realname = NULL;
+ char dirbuf[PATH_MAX];
+ char *debugfile;
+ struct nscookie nsc;
+ size_t len = 0;
+
+ debugfile = calloc(1, PATH_MAX);
+ if (!debugfile)
+ goto out;
+
+ if (root_dir) {
+ path__join(dirbuf, PATH_MAX, root_dir, dirname);
+ dirname = dirbuf;
+ }
+
+ len = __symbol__join_symfs(debugfile, PATH_MAX, dirname);
+ snprintf(debugfile + len, PATH_MAX - len, "%.2s/%s.debug", sbuild_id,
+ sbuild_id + 2);
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ realname = realpath(debugfile, NULL);
+ if (realname && access(realname, R_OK))
+ zfree(&realname);
+ nsinfo__mountns_exit(&nsc);
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+ if (realname == NULL) {
+ debuginfod_client* c;
+
+ pr_debug("Downloading debug info with build id %s\n", sbuild_id);
+
+ c = debuginfod_begin();
+ if (c != NULL) {
+ int fd = debuginfod_find_debuginfo(c,
+ (const unsigned char*)sbuild_id, 0,
+ &realname);
+ if (fd >= 0)
+ close(fd); /* retaining reference by realname */
+ debuginfod_end(c);
+ }
+ }
+#endif
+
+out:
+ free(debugfile);
+ return realname;
+}
+
+int
+build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+ struct nsinfo *nsi, bool is_kallsyms, bool is_vdso,
+ const char *proper_name, const char *root_dir)
+{
+ const size_t size = PATH_MAX;
+ char *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *tmp;
+ char *debugfile = NULL;
+ int err = -1;
+
+ if (!proper_name)
+ proper_name = name;
+
+ dir_name = build_id_cache__cachedir(sbuild_id, proper_name, nsi, is_kallsyms,
+ is_vdso);
+ if (!dir_name)
+ goto out_free;
+
+ /* Remove old style build-id cache */
+ if (is_regular_file(dir_name))
+ if (unlink(dir_name))
+ goto out_free;
+
+ if (mkdir_p(dir_name, 0755))
+ goto out_free;
+
+ /* Save the allocated buildid dirname */
+ if (asprintf(&filename, "%s/%s", dir_name,
+ build_id_cache__basename(is_kallsyms, is_vdso,
+ false)) < 0) {
+ filename = NULL;
+ goto out_free;
+ }
+
+ if (access(filename, F_OK)) {
+ if (is_kallsyms) {
+ if (copyfile("/proc/kallsyms", filename))
+ goto out_free;
+ } else if (nsi && nsinfo__need_setns(nsi)) {
+ if (copyfile_ns(name, filename, nsi))
+ goto out_free;
+ } else if (link(realname, filename) && errno != EEXIST) {
+ struct stat f_stat;
+
+ if (!(stat(name, &f_stat) < 0) &&
+ copyfile_mode(name, filename, f_stat.st_mode))
+ goto out_free;
+ }
+ }
+
+ /* Some binaries are stripped, but have .debug files with their symbol
+ * table. Check to see if we can locate one of those, since the elf
+ * file itself may not be very useful to users of our tools without a
+ * symtab.
+ */
+ if (!is_kallsyms && !is_vdso &&
+ strncmp(".ko", name + strlen(name) - 3, 3)) {
+ debugfile = build_id_cache__find_debug(sbuild_id, nsi, root_dir);
+ if (debugfile) {
+ zfree(&filename);
+ if (asprintf(&filename, "%s/%s", dir_name,
+ build_id_cache__basename(false, false, true)) < 0) {
+ filename = NULL;
+ goto out_free;
+ }
+ if (access(filename, F_OK)) {
+ if (nsi && nsinfo__need_setns(nsi)) {
+ if (copyfile_ns(debugfile, filename,
+ nsi))
+ goto out_free;
+ } else if (link(debugfile, filename) &&
+ errno != EEXIST &&
+ copyfile(debugfile, filename))
+ goto out_free;
+ }
+ }
+ }
+
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
+ goto out_free;
+ tmp = strrchr(linkname, '/');
+ *tmp = '\0';
+
+ if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+ goto out_free;
+
+ *tmp = '/';
+ tmp = dir_name + strlen(buildid_dir) - 5;
+ memcpy(tmp, "../..", 5);
+
+ if (symlink(tmp, linkname) == 0) {
+ err = 0;
+ } else if (errno == EEXIST) {
+ char path[PATH_MAX];
+ ssize_t len;
+
+ len = readlink(linkname, path, sizeof(path) - 1);
+ if (len <= 0) {
+ pr_err("Can't read link: %s\n", linkname);
+ goto out_free;
+ }
+ path[len] = '\0';
+
+ if (strcmp(tmp, path)) {
+ pr_debug("build <%s> already linked to %s\n",
+ sbuild_id, linkname);
+ }
+ err = 0;
+ }
+
+ /* Update SDT cache : error is just warned */
+ if (realname &&
+ build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) < 0)
+ pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
+
+out_free:
+ free(filename);
+ free(debugfile);
+ free(dir_name);
+ free(linkname);
+ return err;
+}
+
+int __build_id_cache__add_s(const char *sbuild_id, const char *name,
+ struct nsinfo *nsi, bool is_kallsyms, bool is_vdso,
+ const char *proper_name, const char *root_dir)
+{
+ char *realname = NULL;
+ int err = -1;
+
+ if (!is_kallsyms) {
+ if (!is_vdso)
+ realname = nsinfo__realpath(name, nsi);
+ else
+ realname = realpath(name, NULL);
+ if (!realname)
+ goto out_free;
+ }
+
+ err = build_id_cache__add(sbuild_id, name, realname, nsi,
+ is_kallsyms, is_vdso, proper_name, root_dir);
+out_free:
+ if (!is_kallsyms)
+ free(realname);
+ return err;
+}
+
+static int build_id_cache__add_b(const struct build_id *bid,
+ const char *name, struct nsinfo *nsi,
+ bool is_kallsyms, bool is_vdso,
+ const char *proper_name,
+ const char *root_dir)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ build_id__sprintf(bid, sbuild_id);
+
+ return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms,
+ is_vdso, proper_name, root_dir);
+}
+
+bool build_id_cache__cached(const char *sbuild_id)
+{
+ bool ret = false;
+ char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
+
+ if (filename && !access(filename, F_OK))
+ ret = true;
+ free(filename);
+
+ return ret;
+}
+
+int build_id_cache__remove_s(const char *sbuild_id)
+{
+ const size_t size = PATH_MAX;
+ char *filename = zalloc(size),
+ *linkname = zalloc(size), *tmp;
+ int err = -1;
+
+ if (filename == NULL || linkname == NULL)
+ goto out_free;
+
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
+ goto out_free;
+
+ if (access(linkname, F_OK))
+ goto out_free;
+
+ if (readlink(linkname, filename, size - 1) < 0)
+ goto out_free;
+
+ if (unlink(linkname))
+ goto out_free;
+
+ /*
+ * Since the link is relative, we must make it absolute:
+ */
+ tmp = strrchr(linkname, '/') + 1;
+ snprintf(tmp, size - (tmp - linkname), "%s", filename);
+
+ if (rm_rf(linkname))
+ goto out_free;
+
+ err = 0;
+out_free:
+ free(filename);
+ free(linkname);
+ return err;
+}
+
+static int filename__read_build_id_ns(const char *filename,
+ struct build_id *bid,
+ struct nsinfo *nsi)
+{
+ struct nscookie nsc;
+ int ret;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = filename__read_build_id(filename, bid);
+ nsinfo__mountns_exit(&nsc);
+
+ return ret;
+}
+
+static bool dso__build_id_mismatch(struct dso *dso, const char *name)
+{
+ struct build_id bid;
+ bool ret = false;
+
+ mutex_lock(&dso->lock);
+ if (filename__read_build_id_ns(name, &bid, dso->nsinfo) >= 0)
+ ret = !dso__build_id_equal(dso, &bid);
+
+ mutex_unlock(&dso->lock);
+
+ return ret;
+}
+
+static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+ void *priv __maybe_unused)
+{
+ bool is_kallsyms = dso__is_kallsyms(dso);
+ bool is_vdso = dso__is_vdso(dso);
+ const char *name = dso->long_name;
+ const char *proper_name = NULL;
+ const char *root_dir = NULL;
+ char *allocated_name = NULL;
+ int ret = 0;
+
+ if (!dso->has_build_id)
+ return 0;
+
+ if (dso__is_kcore(dso)) {
+ is_kallsyms = true;
+ name = machine->mmap_name;
+ }
+
+ if (!machine__is_host(machine)) {
+ if (*machine->root_dir) {
+ root_dir = machine->root_dir;
+ ret = asprintf(&allocated_name, "%s/%s", root_dir, name);
+ if (ret < 0)
+ return ret;
+ proper_name = name;
+ name = allocated_name;
+ } else if (is_kallsyms) {
+ /* Cannot get guest kallsyms */
+ return 0;
+ }
+ }
+
+ if (!is_kallsyms && dso__build_id_mismatch(dso, name))
+ goto out_free;
+
+ mutex_lock(&dso->lock);
+ ret = build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
+ is_kallsyms, is_vdso, proper_name, root_dir);
+ mutex_unlock(&dso->lock);
+out_free:
+ free(allocated_name);
+ return ret;
+}
+
+static int
+machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv)
+{
+ int ret = machine__for_each_dso(&machines->host, fn, priv);
+ struct rb_node *nd;
+
+ for (nd = rb_first_cached(&machines->guests); nd;
+ nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+
+ ret |= machine__for_each_dso(pos, fn, priv);
+ }
+ return ret ? -1 : 0;
+}
+
+int __perf_session__cache_build_ids(struct perf_session *session,
+ machine__dso_t fn, void *priv)
+{
+ if (no_buildid_cache)
+ return 0;
+
+ if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
+ return -1;
+
+ return machines__for_each_dso(&session->machines, fn, priv) ? -1 : 0;
+}
+
+int perf_session__cache_build_ids(struct perf_session *session)
+{
+ return __perf_session__cache_build_ids(session, dso__cache_build_id, NULL);
+}
+
+static bool machine__read_build_ids(struct machine *machine, bool with_hits)
+{
+ return __dsos__read_build_ids(&machine->dsos.head, with_hits);
+}
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
+{
+ struct rb_node *nd;
+ bool ret = machine__read_build_ids(&session->machines.host, with_hits);
+
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret |= machine__read_build_ids(pos, with_hits);
+ }
+
+ return ret;
+}
+
+void build_id__init(struct build_id *bid, const u8 *data, size_t size)
+{
+ WARN_ON(size > BUILD_ID_SIZE);
+ memcpy(bid->data, data, size);
+ bid->size = size;
+}
+
+bool build_id__is_defined(const struct build_id *bid)
+{
+ return bid && bid->size ? !!memchr_inv(bid->data, 0, bid->size) : false;
+}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
new file mode 100644
index 000000000..4e3a11693
--- /dev/null
+++ b/tools/perf/util/build-id.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_BUILD_ID_H_
+#define PERF_BUILD_ID_H_ 1
+
+#define BUILD_ID_SIZE 20 /* SHA-1 length in bytes */
+#define BUILD_ID_MIN_SIZE 16 /* MD5/UUID/GUID length in bytes */
+#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
+#define SBUILD_ID_MIN_SIZE (BUILD_ID_MIN_SIZE * 2 + 1)
+
+#include "machine.h"
+#include "tool.h"
+#include <linux/types.h>
+
+struct build_id {
+ u8 data[BUILD_ID_SIZE];
+ size_t size;
+};
+
+struct nsinfo;
+
+extern struct perf_tool build_id__mark_dso_hit_ops;
+struct dso;
+struct feat_fd;
+
+void build_id__init(struct build_id *bid, const u8 *data, size_t size);
+int build_id__sprintf(const struct build_id *build_id, char *bf);
+bool build_id__is_defined(const struct build_id *bid);
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size);
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+ bool is_debug);
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+ bool is_debug, bool is_kallsyms);
+
+int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct evsel *evsel,
+ struct machine *machine);
+
+int dsos__hit_all(struct perf_session *session);
+
+int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct evsel *evsel,
+ struct machine *machine);
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
+int perf_session__write_buildid_table(struct perf_session *session,
+ struct feat_fd *fd);
+int perf_session__cache_build_ids(struct perf_session *session);
+int __perf_session__cache_build_ids(struct perf_session *session,
+ machine__dso_t fn, void *priv);
+
+char *build_id_cache__origname(const char *sbuild_id);
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size);
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+ struct nsinfo *nsi, bool is_kallsyms,
+ bool is_vdso);
+
+struct strlist;
+
+struct strlist *build_id_cache__list_all(bool validonly);
+char *build_id_cache__complement(const char *incomplete_sbuild_id);
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+ struct strlist **result);
+bool build_id_cache__cached(const char *sbuild_id);
+int build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+ struct nsinfo *nsi, bool is_kallsyms, bool is_vdso,
+ const char *proper_name, const char *root_dir);
+int __build_id_cache__add_s(const char *sbuild_id,
+ const char *name, struct nsinfo *nsi,
+ bool is_kallsyms, bool is_vdso,
+ const char *proper_name, const char *root_dir);
+static inline int build_id_cache__add_s(const char *sbuild_id,
+ const char *name, struct nsinfo *nsi,
+ bool is_kallsyms, bool is_vdso)
+{
+ return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, is_vdso, NULL, NULL);
+}
+int build_id_cache__remove_s(const char *sbuild_id);
+
+extern char buildid_dir[];
+
+void set_buildid_dir(const char *dir);
+void disable_buildid_cache(void);
+
+#endif
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
new file mode 100644
index 000000000..0b61840d4
--- /dev/null
+++ b/tools/perf/util/cache.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CACHE_H
+#define __PERF_CACHE_H
+
+#include "strbuf.h"
+#include <subcmd/pager.h>
+#include "../ui/ui.h"
+
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+#define CMD_EXEC_PATH "--exec-path"
+#define CMD_DEBUGFS_DIR "--debugfs-dir="
+
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
+#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
+#define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
+
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+static inline int is_absolute_path(const char *path)
+{
+ return path[0] == '/';
+}
+
+char *mkpath(char *path_buf, size_t sz, const char *fmt, ...) __printf(3, 4);
+
+#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/cacheline.c b/tools/perf/util/cacheline.c
new file mode 100644
index 000000000..e98b5250a
--- /dev/null
+++ b/tools/perf/util/cacheline.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cacheline.h"
+#include <unistd.h>
+
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+#include <api/fs/fs.h>
+#include "debug.h"
+static void cache_line_size(int *cacheline_sizep)
+{
+ if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+ pr_debug("cannot determine cache line size");
+}
+#endif
+
+int cacheline_size(void)
+{
+ static int size;
+
+ if (!size)
+ cache_line_size(&size);
+
+ return size;
+}
diff --git a/tools/perf/util/cacheline.h b/tools/perf/util/cacheline.h
new file mode 100644
index 000000000..fe6d5b60a
--- /dev/null
+++ b/tools/perf/util/cacheline.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_CACHELINE_H
+#define PERF_CACHELINE_H
+
+#include <linux/compiler.h>
+
+int __pure cacheline_size(void);
+
+
+/*
+ * Some architectures have 'Adjacent Cacheline Prefetch' feature,
+ * which performs like the cacheline size being doubled.
+ */
+static inline u64 cl_address(u64 address, bool double_cl)
+{
+ u64 size = cacheline_size();
+
+ if (double_cl)
+ size *= 2;
+
+ /* return the cacheline of the address */
+ return (address & ~(size - 1));
+}
+
+static inline u64 cl_offset(u64 address, bool double_cl)
+{
+ u64 size = cacheline_size();
+
+ if (double_cl)
+ size *= 2;
+
+ /* return the offset inside cacheline */
+ return (address & (size - 1));
+}
+
+#endif // PERF_CACHELINE_H
diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c
new file mode 100644
index 000000000..5c60b8be1
--- /dev/null
+++ b/tools/perf/util/call-path.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+
+#include "call-path.h"
+
+static void call_path__init(struct call_path *cp, struct call_path *parent,
+ struct symbol *sym, u64 ip, bool in_kernel)
+{
+ cp->parent = parent;
+ cp->sym = sym;
+ cp->ip = sym ? 0 : ip;
+ cp->db_id = 0;
+ cp->in_kernel = in_kernel;
+ RB_CLEAR_NODE(&cp->rb_node);
+ cp->children = RB_ROOT;
+}
+
+struct call_path_root *call_path_root__new(void)
+{
+ struct call_path_root *cpr;
+
+ cpr = zalloc(sizeof(struct call_path_root));
+ if (!cpr)
+ return NULL;
+ call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+ INIT_LIST_HEAD(&cpr->blocks);
+ return cpr;
+}
+
+void call_path_root__free(struct call_path_root *cpr)
+{
+ struct call_path_block *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+ free(cpr);
+}
+
+static struct call_path *call_path__new(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip,
+ bool in_kernel)
+{
+ struct call_path_block *cpb;
+ struct call_path *cp;
+ size_t n;
+
+ if (cpr->next < cpr->sz) {
+ cpb = list_last_entry(&cpr->blocks, struct call_path_block,
+ node);
+ } else {
+ cpb = zalloc(sizeof(struct call_path_block));
+ if (!cpb)
+ return NULL;
+ list_add_tail(&cpb->node, &cpr->blocks);
+ cpr->sz += CALL_PATH_BLOCK_SIZE;
+ }
+
+ n = cpr->next++ & CALL_PATH_BLOCK_MASK;
+ cp = &cpb->cp[n];
+
+ call_path__init(cp, parent, sym, ip, in_kernel);
+
+ return cp;
+}
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip, u64 ks)
+{
+ struct rb_node **p;
+ struct rb_node *node_parent = NULL;
+ struct call_path *cp;
+ bool in_kernel = ip >= ks;
+
+ if (sym)
+ ip = 0;
+
+ if (!parent)
+ return call_path__new(cpr, parent, sym, ip, in_kernel);
+
+ p = &parent->children.rb_node;
+ while (*p != NULL) {
+ node_parent = *p;
+ cp = rb_entry(node_parent, struct call_path, rb_node);
+
+ if (cp->sym == sym && cp->ip == ip)
+ return cp;
+
+ if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+ if (!cp)
+ return NULL;
+
+ rb_link_node(&cp->rb_node, node_parent, p);
+ rb_insert_color(&cp->rb_node, &parent->children);
+
+ return cp;
+}
diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h
new file mode 100644
index 000000000..5875cfc81
--- /dev/null
+++ b/tools/perf/util/call-path.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#ifndef __PERF_CALL_PATH_H
+#define __PERF_CALL_PATH_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/**
+ * struct call_path - node in list of calls leading to a function call.
+ * @parent: call path to the parent function call
+ * @sym: symbol of function called
+ * @ip: only if sym is null, the ip of the function
+ * @db_id: id used for db-export
+ * @in_kernel: whether function is a in the kernel
+ * @rb_node: node in parent's tree of called functions
+ * @children: tree of call paths of functions called
+ *
+ * In combination with the call_return structure, the call_path structure
+ * defines a context-sensitive call-graph.
+ */
+struct call_path {
+ struct call_path *parent;
+ struct symbol *sym;
+ u64 ip;
+ u64 db_id;
+ bool in_kernel;
+ struct rb_node rb_node;
+ struct rb_root children;
+};
+
+#define CALL_PATH_BLOCK_SHIFT 8
+#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
+#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
+
+struct call_path_block {
+ struct call_path cp[CALL_PATH_BLOCK_SIZE];
+ struct list_head node;
+};
+
+/**
+ * struct call_path_root - root of all call paths.
+ * @call_path: root call path
+ * @blocks: list of blocks to store call paths
+ * @next: next free space
+ * @sz: number of spaces
+ */
+struct call_path_root {
+ struct call_path call_path;
+ struct list_head blocks;
+ size_t next;
+ size_t sz;
+};
+
+struct call_path_root *call_path_root__new(void);
+void call_path_root__free(struct call_path_root *cpr);
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip, u64 ks);
+
+#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
new file mode 100644
index 000000000..aee937d14
--- /dev/null
+++ b/tools/perf/util/callchain.c
@@ -0,0 +1,1783 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Handle the callchains from the stream in an ad-hoc radix tree and then
+ * sort them in an rbtree.
+ *
+ * Using a radix for code path provides a fast retrieval and factorizes
+ * memory use. Also that lets us use the paths in a hierarchical graph view.
+ *
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <math.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include "asm/bug.h"
+
+#include "debug.h"
+#include "dso.h"
+#include "event.h"
+#include "hist.h"
+#include "sort.h"
+#include "machine.h"
+#include "map.h"
+#include "callchain.h"
+#include "branch.h"
+#include "symbol.h"
+#include "util.h"
+#include "../perf.h"
+
+#define CALLCHAIN_PARAM_DEFAULT \
+ .mode = CHAIN_GRAPH_ABS, \
+ .min_percent = 0.5, \
+ .order = ORDER_CALLEE, \
+ .key = CCKEY_FUNCTION, \
+ .value = CCVAL_PERCENT, \
+
+struct callchain_param callchain_param = {
+ CALLCHAIN_PARAM_DEFAULT
+};
+
+/*
+ * Are there any events usind DWARF callchains?
+ *
+ * I.e.
+ *
+ * -e cycles/call-graph=dwarf/
+ */
+bool dwarf_callchain_users;
+
+struct callchain_param callchain_param_default = {
+ CALLCHAIN_PARAM_DEFAULT
+};
+
+/* Used for thread-local struct callchain_cursor. */
+static pthread_key_t callchain_cursor;
+
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
+{
+ return parse_callchain_record(arg, param);
+}
+
+static int parse_callchain_mode(const char *value)
+{
+ if (!strncmp(value, "graph", strlen(value))) {
+ callchain_param.mode = CHAIN_GRAPH_ABS;
+ return 0;
+ }
+ if (!strncmp(value, "flat", strlen(value))) {
+ callchain_param.mode = CHAIN_FLAT;
+ return 0;
+ }
+ if (!strncmp(value, "fractal", strlen(value))) {
+ callchain_param.mode = CHAIN_GRAPH_REL;
+ return 0;
+ }
+ if (!strncmp(value, "folded", strlen(value))) {
+ callchain_param.mode = CHAIN_FOLDED;
+ return 0;
+ }
+ return -1;
+}
+
+static int parse_callchain_order(const char *value)
+{
+ if (!strncmp(value, "caller", strlen(value))) {
+ callchain_param.order = ORDER_CALLER;
+ callchain_param.order_set = true;
+ return 0;
+ }
+ if (!strncmp(value, "callee", strlen(value))) {
+ callchain_param.order = ORDER_CALLEE;
+ callchain_param.order_set = true;
+ return 0;
+ }
+ return -1;
+}
+
+static int parse_callchain_sort_key(const char *value)
+{
+ if (!strncmp(value, "function", strlen(value))) {
+ callchain_param.key = CCKEY_FUNCTION;
+ return 0;
+ }
+ if (!strncmp(value, "address", strlen(value))) {
+ callchain_param.key = CCKEY_ADDRESS;
+ return 0;
+ }
+ if (!strncmp(value, "srcline", strlen(value))) {
+ callchain_param.key = CCKEY_SRCLINE;
+ return 0;
+ }
+ if (!strncmp(value, "branch", strlen(value))) {
+ callchain_param.branch_callstack = 1;
+ return 0;
+ }
+ return -1;
+}
+
+static int parse_callchain_value(const char *value)
+{
+ if (!strncmp(value, "percent", strlen(value))) {
+ callchain_param.value = CCVAL_PERCENT;
+ return 0;
+ }
+ if (!strncmp(value, "period", strlen(value))) {
+ callchain_param.value = CCVAL_PERIOD;
+ return 0;
+ }
+ if (!strncmp(value, "count", strlen(value))) {
+ callchain_param.value = CCVAL_COUNT;
+ return 0;
+ }
+ return -1;
+}
+
+static int get_stack_size(const char *str, unsigned long *_size)
+{
+ char *endptr;
+ unsigned long size;
+ unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
+
+ size = strtoul(str, &endptr, 0);
+
+ do {
+ if (*endptr)
+ break;
+
+ size = round_up(size, sizeof(u64));
+ if (!size || size > max_size)
+ break;
+
+ *_size = size;
+ return 0;
+
+ } while (0);
+
+ pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
+ max_size, str);
+ return -1;
+}
+
+static int
+__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
+{
+ char *tok;
+ char *endptr, *saveptr = NULL;
+ bool minpcnt_set = false;
+ bool record_opt_set = false;
+ bool try_stack_size = false;
+
+ callchain_param.enabled = true;
+ symbol_conf.use_callchain = true;
+
+ if (!arg)
+ return 0;
+
+ while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
+ if (!strncmp(tok, "none", strlen(tok))) {
+ callchain_param.mode = CHAIN_NONE;
+ callchain_param.enabled = false;
+ symbol_conf.use_callchain = false;
+ return 0;
+ }
+
+ if (!parse_callchain_mode(tok) ||
+ !parse_callchain_order(tok) ||
+ !parse_callchain_sort_key(tok) ||
+ !parse_callchain_value(tok)) {
+ /* parsing ok - move on to the next */
+ try_stack_size = false;
+ goto next;
+ } else if (allow_record_opt && !record_opt_set) {
+ if (parse_callchain_record(tok, &callchain_param))
+ goto try_numbers;
+
+ /* assume that number followed by 'dwarf' is stack size */
+ if (callchain_param.record_mode == CALLCHAIN_DWARF)
+ try_stack_size = true;
+
+ record_opt_set = true;
+ goto next;
+ }
+
+try_numbers:
+ if (try_stack_size) {
+ unsigned long size = 0;
+
+ if (get_stack_size(tok, &size) < 0)
+ return -1;
+ callchain_param.dump_size = size;
+ try_stack_size = false;
+ } else if (!minpcnt_set) {
+ /* try to get the min percent */
+ callchain_param.min_percent = strtod(tok, &endptr);
+ if (tok == endptr)
+ return -1;
+ minpcnt_set = true;
+ } else {
+ /* try print limit at last */
+ callchain_param.print_limit = strtoul(tok, &endptr, 0);
+ if (tok == endptr)
+ return -1;
+ }
+next:
+ arg = NULL;
+ }
+
+ if (callchain_register_param(&callchain_param) < 0) {
+ pr_err("Can't register callchain params\n");
+ return -1;
+ }
+ return 0;
+}
+
+int parse_callchain_report_opt(const char *arg)
+{
+ return __parse_callchain_report_opt(arg, false);
+}
+
+int parse_callchain_top_opt(const char *arg)
+{
+ return __parse_callchain_report_opt(arg, true);
+}
+
+int parse_callchain_record(const char *arg, struct callchain_param *param)
+{
+ char *tok, *name, *saveptr = NULL;
+ char *buf;
+ int ret = -1;
+
+ /* We need buffer that we know we can write to. */
+ buf = malloc(strlen(arg) + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ strcpy(buf, arg);
+
+ tok = strtok_r((char *)buf, ",", &saveptr);
+ name = tok ? : (char *)buf;
+
+ do {
+ /* Framepointer style */
+ if (!strncmp(name, "fp", sizeof("fp"))) {
+ ret = 0;
+ param->record_mode = CALLCHAIN_FP;
+
+ tok = strtok_r(NULL, ",", &saveptr);
+ if (tok) {
+ unsigned long size;
+
+ size = strtoul(tok, &name, 0);
+ if (size < (unsigned) sysctl__max_stack())
+ param->max_stack = size;
+ }
+ break;
+
+ /* Dwarf style */
+ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
+ const unsigned long default_stack_dump_size = 8192;
+
+ ret = 0;
+ param->record_mode = CALLCHAIN_DWARF;
+ param->dump_size = default_stack_dump_size;
+ dwarf_callchain_users = true;
+
+ tok = strtok_r(NULL, ",", &saveptr);
+ if (tok) {
+ unsigned long size = 0;
+
+ ret = get_stack_size(tok, &size);
+ param->dump_size = size;
+ }
+ } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ param->record_mode = CALLCHAIN_LBR;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph lbr\n");
+ break;
+ } else {
+ pr_err("callchain: Unknown --call-graph option "
+ "value: %s\n", arg);
+ break;
+ }
+
+ } while (0);
+
+ free(buf);
+ return ret;
+}
+
+int perf_callchain_config(const char *var, const char *value)
+{
+ char *endptr;
+
+ if (!strstarts(var, "call-graph."))
+ return 0;
+ var += sizeof("call-graph.") - 1;
+
+ if (!strcmp(var, "record-mode"))
+ return parse_callchain_record_opt(value, &callchain_param);
+ if (!strcmp(var, "dump-size")) {
+ unsigned long size = 0;
+ int ret;
+
+ ret = get_stack_size(value, &size);
+ callchain_param.dump_size = size;
+
+ return ret;
+ }
+ if (!strcmp(var, "print-type")){
+ int ret;
+ ret = parse_callchain_mode(value);
+ if (ret == -1)
+ pr_err("Invalid callchain mode: %s\n", value);
+ return ret;
+ }
+ if (!strcmp(var, "order")){
+ int ret;
+ ret = parse_callchain_order(value);
+ if (ret == -1)
+ pr_err("Invalid callchain order: %s\n", value);
+ return ret;
+ }
+ if (!strcmp(var, "sort-key")){
+ int ret;
+ ret = parse_callchain_sort_key(value);
+ if (ret == -1)
+ pr_err("Invalid callchain sort key: %s\n", value);
+ return ret;
+ }
+ if (!strcmp(var, "threshold")) {
+ callchain_param.min_percent = strtod(value, &endptr);
+ if (value == endptr) {
+ pr_err("Invalid callchain threshold: %s\n", value);
+ return -1;
+ }
+ }
+ if (!strcmp(var, "print-limit")) {
+ callchain_param.print_limit = strtod(value, &endptr);
+ if (value == endptr) {
+ pr_err("Invalid callchain print limit: %s\n", value);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static void
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+ enum chain_mode mode)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct callchain_node *rnode;
+ u64 chain_cumul = callchain_cumul_hits(chain);
+
+ while (*p) {
+ u64 rnode_cumul;
+
+ parent = *p;
+ rnode = rb_entry(parent, struct callchain_node, rb_node);
+ rnode_cumul = callchain_cumul_hits(rnode);
+
+ switch (mode) {
+ case CHAIN_FLAT:
+ case CHAIN_FOLDED:
+ if (rnode->hit < chain->hit)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ case CHAIN_GRAPH_ABS: /* Falldown */
+ case CHAIN_GRAPH_REL:
+ if (rnode_cumul < chain_cumul)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ case CHAIN_NONE:
+ default:
+ break;
+ }
+ }
+
+ rb_link_node(&chain->rb_node, parent, p);
+ rb_insert_color(&chain->rb_node, root);
+}
+
+static void
+__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
+ u64 min_hit)
+{
+ struct rb_node *n;
+ struct callchain_node *child;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ __sort_chain_flat(rb_root, child, min_hit);
+ }
+
+ if (node->hit && node->hit >= min_hit)
+ rb_insert_callchain(rb_root, node, CHAIN_FLAT);
+}
+
+/*
+ * Once we get every callchains from the stream, we can now
+ * sort them by hit
+ */
+static void
+sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
+ u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+ *rb_root = RB_ROOT;
+ __sort_chain_flat(rb_root, &root->node, min_hit);
+}
+
+static void __sort_chain_graph_abs(struct callchain_node *node,
+ u64 min_hit)
+{
+ struct rb_node *n;
+ struct callchain_node *child;
+
+ node->rb_root = RB_ROOT;
+ n = rb_first(&node->rb_root_in);
+
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ __sort_chain_graph_abs(child, min_hit);
+ if (callchain_cumul_hits(child) >= min_hit)
+ rb_insert_callchain(&node->rb_root, child,
+ CHAIN_GRAPH_ABS);
+ }
+}
+
+static void
+sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
+ u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+ __sort_chain_graph_abs(&chain_root->node, min_hit);
+ rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+static void __sort_chain_graph_rel(struct callchain_node *node,
+ double min_percent)
+{
+ struct rb_node *n;
+ struct callchain_node *child;
+ u64 min_hit;
+
+ node->rb_root = RB_ROOT;
+ min_hit = ceil(node->children_hit * min_percent);
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ __sort_chain_graph_rel(child, min_percent);
+ if (callchain_cumul_hits(child) >= min_hit)
+ rb_insert_callchain(&node->rb_root, child,
+ CHAIN_GRAPH_REL);
+ }
+}
+
+static void
+sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
+ u64 min_hit __maybe_unused, struct callchain_param *param)
+{
+ __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
+ rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+int callchain_register_param(struct callchain_param *param)
+{
+ switch (param->mode) {
+ case CHAIN_GRAPH_ABS:
+ param->sort = sort_chain_graph_abs;
+ break;
+ case CHAIN_GRAPH_REL:
+ param->sort = sort_chain_graph_rel;
+ break;
+ case CHAIN_FLAT:
+ case CHAIN_FOLDED:
+ param->sort = sort_chain_flat;
+ break;
+ case CHAIN_NONE:
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create a child for a parent. If inherit_children, then the new child
+ * will become the new parent of it's parent children
+ */
+static struct callchain_node *
+create_child(struct callchain_node *parent, bool inherit_children)
+{
+ struct callchain_node *new;
+
+ new = zalloc(sizeof(*new));
+ if (!new) {
+ perror("not enough memory to create child for code path tree");
+ return NULL;
+ }
+ new->parent = parent;
+ INIT_LIST_HEAD(&new->val);
+ INIT_LIST_HEAD(&new->parent_val);
+
+ if (inherit_children) {
+ struct rb_node *n;
+ struct callchain_node *child;
+
+ new->rb_root_in = parent->rb_root_in;
+ parent->rb_root_in = RB_ROOT;
+
+ n = rb_first(&new->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ child->parent = new;
+ n = rb_next(n);
+ }
+
+ /* make it the first child */
+ rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+ rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+ }
+
+ return new;
+}
+
+
+/*
+ * Fill the node with callchain values
+ */
+static int
+fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
+{
+ struct callchain_cursor_node *cursor_node;
+
+ node->val_nr = cursor->nr - cursor->pos;
+ if (!node->val_nr)
+ pr_warning("Warning: empty node in callchain tree\n");
+
+ cursor_node = callchain_cursor_current(cursor);
+
+ while (cursor_node) {
+ struct callchain_list *call;
+
+ call = zalloc(sizeof(*call));
+ if (!call) {
+ perror("not enough memory for the code path tree");
+ return -1;
+ }
+ call->ip = cursor_node->ip;
+ call->ms = cursor_node->ms;
+ call->ms.map = map__get(call->ms.map);
+ call->ms.maps = maps__get(call->ms.maps);
+ call->srcline = cursor_node->srcline;
+
+ if (cursor_node->branch) {
+ call->branch_count = 1;
+
+ if (cursor_node->branch_from) {
+ /*
+ * branch_from is set with value somewhere else
+ * to imply it's "to" of a branch.
+ */
+ call->brtype_stat.branch_to = true;
+
+ if (cursor_node->branch_flags.predicted)
+ call->predicted_count = 1;
+
+ if (cursor_node->branch_flags.abort)
+ call->abort_count = 1;
+
+ branch_type_count(&call->brtype_stat,
+ &cursor_node->branch_flags,
+ cursor_node->branch_from,
+ cursor_node->ip);
+ } else {
+ /*
+ * It's "from" of a branch
+ */
+ call->brtype_stat.branch_to = false;
+ call->cycles_count =
+ cursor_node->branch_flags.cycles;
+ call->iter_count = cursor_node->nr_loop_iter;
+ call->iter_cycles = cursor_node->iter_cycles;
+ }
+ }
+
+ list_add_tail(&call->list, &node->val);
+
+ callchain_cursor_advance(cursor);
+ cursor_node = callchain_cursor_current(cursor);
+ }
+ return 0;
+}
+
+static struct callchain_node *
+add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ struct callchain_node *new;
+
+ new = create_child(parent, false);
+ if (new == NULL)
+ return NULL;
+
+ if (fill_node(new, cursor) < 0) {
+ struct callchain_list *call, *tmp;
+
+ list_for_each_entry_safe(call, tmp, &new->val, list) {
+ list_del_init(&call->list);
+ map__zput(call->ms.map);
+ maps__zput(call->ms.maps);
+ free(call);
+ }
+ free(new);
+ return NULL;
+ }
+
+ new->children_hit = 0;
+ new->hit = period;
+ new->children_count = 0;
+ new->count = 1;
+ return new;
+}
+
+enum match_result {
+ MATCH_ERROR = -1,
+ MATCH_EQ,
+ MATCH_LT,
+ MATCH_GT,
+};
+
+static enum match_result match_chain_strings(const char *left,
+ const char *right)
+{
+ enum match_result ret = MATCH_EQ;
+ int cmp;
+
+ if (left && right)
+ cmp = strcmp(left, right);
+ else if (!left && right)
+ cmp = 1;
+ else if (left && !right)
+ cmp = -1;
+ else
+ return MATCH_ERROR;
+
+ if (cmp != 0)
+ ret = cmp < 0 ? MATCH_LT : MATCH_GT;
+
+ return ret;
+}
+
+/*
+ * We need to always use relative addresses because we're aggregating
+ * callchains from multiple threads, i.e. different address spaces, so
+ * comparing absolute addresses make no sense as a symbol in a DSO may end up
+ * in a different address when used in a different binary or even the same
+ * binary but with some sort of address randomization technique, thus we need
+ * to compare just relative addresses. -acme
+ */
+static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
+ struct map *right_map, u64 right_ip)
+{
+ struct dso *left_dso = left_map ? map__dso(left_map) : NULL;
+ struct dso *right_dso = right_map ? map__dso(right_map) : NULL;
+
+ if (left_dso != right_dso)
+ return left_dso < right_dso ? MATCH_LT : MATCH_GT;
+
+ if (left_ip != right_ip)
+ return left_ip < right_ip ? MATCH_LT : MATCH_GT;
+
+ return MATCH_EQ;
+}
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+ struct callchain_list *cnode)
+{
+ enum match_result match = MATCH_ERROR;
+
+ switch (callchain_param.key) {
+ case CCKEY_SRCLINE:
+ match = match_chain_strings(cnode->srcline, node->srcline);
+ if (match != MATCH_ERROR)
+ break;
+ /* otherwise fall-back to symbol-based comparison below */
+ fallthrough;
+ case CCKEY_FUNCTION:
+ if (node->ms.sym && cnode->ms.sym) {
+ /*
+ * Compare inlined frames based on their symbol name
+ * because different inlined frames will have the same
+ * symbol start. Otherwise do a faster comparison based
+ * on the symbol start address.
+ */
+ if (cnode->ms.sym->inlined || node->ms.sym->inlined) {
+ match = match_chain_strings(cnode->ms.sym->name,
+ node->ms.sym->name);
+ if (match != MATCH_ERROR)
+ break;
+ } else {
+ match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
+ node->ms.map, node->ms.sym->start);
+ break;
+ }
+ }
+ /* otherwise fall-back to IP-based comparison below */
+ fallthrough;
+ case CCKEY_ADDRESS:
+ default:
+ match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->ms.map, node->ip);
+ break;
+ }
+
+ if (match == MATCH_EQ && node->branch) {
+ cnode->branch_count++;
+
+ if (node->branch_from) {
+ /*
+ * It's "to" of a branch
+ */
+ cnode->brtype_stat.branch_to = true;
+
+ if (node->branch_flags.predicted)
+ cnode->predicted_count++;
+
+ if (node->branch_flags.abort)
+ cnode->abort_count++;
+
+ branch_type_count(&cnode->brtype_stat,
+ &node->branch_flags,
+ node->branch_from,
+ node->ip);
+ } else {
+ /*
+ * It's "from" of a branch
+ */
+ cnode->brtype_stat.branch_to = false;
+ cnode->cycles_count += node->branch_flags.cycles;
+ cnode->iter_count += node->nr_loop_iter;
+ cnode->iter_cycles += node->iter_cycles;
+ cnode->from_count++;
+ }
+ }
+
+ return match;
+}
+
+/*
+ * Split the parent in two parts (a new child is created) and
+ * give a part of its callchain to the created child.
+ * Then create another child to host the given callchain of new branch
+ */
+static int
+split_add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ struct callchain_list *to_split,
+ u64 idx_parents, u64 idx_local, u64 period)
+{
+ struct callchain_node *new;
+ struct list_head *old_tail;
+ unsigned int idx_total = idx_parents + idx_local;
+
+ /* split */
+ new = create_child(parent, true);
+ if (new == NULL)
+ return -1;
+
+ /* split the callchain and move a part to the new child */
+ old_tail = parent->val.prev;
+ list_del_range(&to_split->list, old_tail);
+ new->val.next = &to_split->list;
+ new->val.prev = old_tail;
+ to_split->list.prev = &new->val;
+ old_tail->next = &new->val;
+
+ /* split the hits */
+ new->hit = parent->hit;
+ new->children_hit = parent->children_hit;
+ parent->children_hit = callchain_cumul_hits(new);
+ new->val_nr = parent->val_nr - idx_local;
+ parent->val_nr = idx_local;
+ new->count = parent->count;
+ new->children_count = parent->children_count;
+ parent->children_count = callchain_cumul_counts(new);
+
+ /* create a new child for the new branch if any */
+ if (idx_total < cursor->nr) {
+ struct callchain_node *first;
+ struct callchain_list *cnode;
+ struct callchain_cursor_node *node;
+ struct rb_node *p, **pp;
+
+ parent->hit = 0;
+ parent->children_hit += period;
+ parent->count = 0;
+ parent->children_count += 1;
+
+ node = callchain_cursor_current(cursor);
+ new = add_child(parent, cursor, period);
+ if (new == NULL)
+ return -1;
+
+ /*
+ * This is second child since we moved parent's children
+ * to new (first) child above.
+ */
+ p = parent->rb_root_in.rb_node;
+ first = rb_entry(p, struct callchain_node, rb_node_in);
+ cnode = list_first_entry(&first->val, struct callchain_list,
+ list);
+
+ if (match_chain(node, cnode) == MATCH_LT)
+ pp = &p->rb_left;
+ else
+ pp = &p->rb_right;
+
+ rb_link_node(&new->rb_node_in, p, pp);
+ rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+ } else {
+ parent->hit = period;
+ parent->count = 1;
+ }
+ return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period);
+
+static int
+append_chain_children(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ struct callchain_node *rnode;
+ struct callchain_cursor_node *node;
+ struct rb_node **p = &root->rb_root_in.rb_node;
+ struct rb_node *parent = NULL;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ return -1;
+
+ /* lookup in children */
+ while (*p) {
+ enum match_result ret;
+
+ parent = *p;
+ rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+
+ /* If at least first entry matches, rely to children */
+ ret = append_chain(rnode, cursor, period);
+ if (ret == MATCH_EQ)
+ goto inc_children_hit;
+ if (ret == MATCH_ERROR)
+ return -1;
+
+ if (ret == MATCH_LT)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ /* nothing in children, add to the current node */
+ rnode = add_child(root, cursor, period);
+ if (rnode == NULL)
+ return -1;
+
+ rb_link_node(&rnode->rb_node_in, parent, p);
+ rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
+
+inc_children_hit:
+ root->children_hit += period;
+ root->children_count++;
+ return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ struct callchain_list *cnode;
+ u64 start = cursor->pos;
+ bool found = false;
+ u64 matches;
+ enum match_result cmp = MATCH_ERROR;
+
+ /*
+ * Lookup in the current node
+ * If we have a symbol, then compare the start to match
+ * anywhere inside a function, unless function
+ * mode is disabled.
+ */
+ list_for_each_entry(cnode, &root->val, list) {
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ cmp = match_chain(node, cnode);
+ if (cmp != MATCH_EQ)
+ break;
+
+ found = true;
+
+ callchain_cursor_advance(cursor);
+ }
+
+ /* matches not, relay no the parent */
+ if (!found) {
+ WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
+ return cmp;
+ }
+
+ matches = cursor->pos - start;
+
+ /* we match only a part of the node. Split it and add the new chain */
+ if (matches < root->val_nr) {
+ if (split_add_child(root, cursor, cnode, start, matches,
+ period) < 0)
+ return MATCH_ERROR;
+
+ return MATCH_EQ;
+ }
+
+ /* we match 100% of the path, increment the hit */
+ if (matches == root->val_nr && cursor->pos == cursor->nr) {
+ root->hit += period;
+ root->count++;
+ return MATCH_EQ;
+ }
+
+ /* We match the node and still have a part remaining */
+ if (append_chain_children(root, cursor, period) < 0)
+ return MATCH_ERROR;
+
+ return MATCH_EQ;
+}
+
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ if (cursor == NULL)
+ return -1;
+
+ if (!cursor->nr)
+ return 0;
+
+ callchain_cursor_commit(cursor);
+
+ if (append_chain_children(&root->node, cursor, period) < 0)
+ return -1;
+
+ if (cursor->nr > root->max_depth)
+ root->max_depth = cursor->nr;
+
+ return 0;
+}
+
+static int
+merge_chain_branch(struct callchain_cursor *cursor,
+ struct callchain_node *dst, struct callchain_node *src)
+{
+ struct callchain_cursor_node **old_last = cursor->last;
+ struct callchain_node *child;
+ struct callchain_list *list, *next_list;
+ struct rb_node *n;
+ int old_pos = cursor->nr;
+ int err = 0;
+
+ list_for_each_entry_safe(list, next_list, &src->val, list) {
+ struct map_symbol ms = {
+ .maps = maps__get(list->ms.maps),
+ .map = map__get(list->ms.map),
+ };
+ callchain_cursor_append(cursor, list->ip, &ms, false, NULL, 0, 0, 0, list->srcline);
+ list_del_init(&list->list);
+ map__zput(ms.map);
+ maps__zput(ms.maps);
+ map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
+ free(list);
+ }
+
+ if (src->hit) {
+ callchain_cursor_commit(cursor);
+ if (append_chain_children(dst, cursor, src->hit) < 0)
+ return -1;
+ }
+
+ n = rb_first(&src->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+ rb_erase(&child->rb_node_in, &src->rb_root_in);
+
+ err = merge_chain_branch(cursor, dst, child);
+ if (err)
+ break;
+
+ free(child);
+ }
+
+ cursor->nr = old_pos;
+ cursor->last = old_last;
+
+ return err;
+}
+
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src)
+{
+ return merge_chain_branch(cursor, &dst->node, &src->node);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor,
+ u64 ip, struct map_symbol *ms,
+ bool branch, struct branch_flags *flags,
+ int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+ const char *srcline)
+{
+ struct callchain_cursor_node *node = *cursor->last;
+
+ if (!node) {
+ node = calloc(1, sizeof(*node));
+ if (!node)
+ return -ENOMEM;
+
+ *cursor->last = node;
+ }
+
+ node->ip = ip;
+ maps__zput(node->ms.maps);
+ map__zput(node->ms.map);
+ node->ms = *ms;
+ node->ms.maps = maps__get(ms->maps);
+ node->ms.map = map__get(ms->map);
+ node->branch = branch;
+ node->nr_loop_iter = nr_loop_iter;
+ node->iter_cycles = iter_cycles;
+ node->srcline = srcline;
+
+ if (flags)
+ memcpy(&node->branch_flags, flags,
+ sizeof(struct branch_flags));
+
+ node->branch_from = branch_from;
+ cursor->nr++;
+
+ cursor->last = &node->next;
+
+ return 0;
+}
+
+int sample__resolve_callchain(struct perf_sample *sample,
+ struct callchain_cursor *cursor, struct symbol **parent,
+ struct evsel *evsel, struct addr_location *al,
+ int max_stack)
+{
+ if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
+ return 0;
+
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
+ perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
+ return thread__resolve_callchain(al->thread, cursor, evsel, sample,
+ parent, al, max_stack);
+ }
+ return 0;
+}
+
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
+{
+ if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
+ !symbol_conf.show_branchflag_count)
+ return 0;
+ return callchain_append(he->callchain, get_tls_callchain_cursor(), sample->period);
+}
+
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+ bool hide_unresolved)
+{
+ struct machine *machine = maps__machine(node->ms.maps);
+
+ maps__put(al->maps);
+ al->maps = maps__get(node->ms.maps);
+ map__put(al->map);
+ al->map = map__get(node->ms.map);
+ al->sym = node->ms.sym;
+ al->srcline = node->srcline;
+ al->addr = node->ip;
+
+ if (al->sym == NULL) {
+ if (hide_unresolved)
+ return 0;
+ if (al->map == NULL)
+ goto out;
+ }
+ if (RC_CHK_ACCESS(al->maps) == RC_CHK_ACCESS(machine__kernel_maps(machine))) {
+ if (machine__is_host(machine)) {
+ al->cpumode = PERF_RECORD_MISC_KERNEL;
+ al->level = 'k';
+ } else {
+ al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ al->level = 'g';
+ }
+ } else {
+ if (machine__is_host(machine)) {
+ al->cpumode = PERF_RECORD_MISC_USER;
+ al->level = '.';
+ } else if (perf_guest) {
+ al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+ al->level = 'u';
+ } else {
+ al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ al->level = 'H';
+ }
+ }
+
+out:
+ return 1;
+}
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+ char *bf, size_t bfsize, bool show_dso)
+{
+ bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+ bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE;
+ int printed;
+
+ if (cl->ms.sym) {
+ const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
+
+ if (show_srcline && cl->srcline)
+ printed = scnprintf(bf, bfsize, "%s %s%s",
+ cl->ms.sym->name, cl->srcline,
+ inlined);
+ else
+ printed = scnprintf(bf, bfsize, "%s%s",
+ cl->ms.sym->name, inlined);
+ } else
+ printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
+
+ if (show_dso)
+ scnprintf(bf + printed, bfsize - printed, " %s",
+ cl->ms.map ?
+ map__dso(cl->ms.map)->short_name :
+ "unknown");
+
+ return bf;
+}
+
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ scnprintf(bf, bfsize, "%"PRIu64, period);
+ break;
+ case CCVAL_COUNT:
+ scnprintf(bf, bfsize, "%u", count);
+ break;
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ scnprintf(bf, bfsize, "%.2f%%", percent);
+ break;
+ }
+ return bf;
+}
+
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ return fprintf(fp, "%"PRIu64, period);
+ case CCVAL_COUNT:
+ return fprintf(fp, "%u", count);
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ return percent_color_fprintf(fp, "%.2f%%", percent);
+ }
+ return 0;
+}
+
+static void callchain_counts_value(struct callchain_node *node,
+ u64 *branch_count, u64 *predicted_count,
+ u64 *abort_count, u64 *cycles_count)
+{
+ struct callchain_list *clist;
+
+ list_for_each_entry(clist, &node->val, list) {
+ if (branch_count)
+ *branch_count += clist->branch_count;
+
+ if (predicted_count)
+ *predicted_count += clist->predicted_count;
+
+ if (abort_count)
+ *abort_count += clist->abort_count;
+
+ if (cycles_count)
+ *cycles_count += clist->cycles_count;
+ }
+}
+
+static int callchain_node_branch_counts_cumul(struct callchain_node *node,
+ u64 *branch_count,
+ u64 *predicted_count,
+ u64 *abort_count,
+ u64 *cycles_count)
+{
+ struct callchain_node *child;
+ struct rb_node *n;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ callchain_node_branch_counts_cumul(child, branch_count,
+ predicted_count,
+ abort_count,
+ cycles_count);
+
+ callchain_counts_value(child, branch_count,
+ predicted_count, abort_count,
+ cycles_count);
+ }
+
+ return 0;
+}
+
+int callchain_branch_counts(struct callchain_root *root,
+ u64 *branch_count, u64 *predicted_count,
+ u64 *abort_count, u64 *cycles_count)
+{
+ if (branch_count)
+ *branch_count = 0;
+
+ if (predicted_count)
+ *predicted_count = 0;
+
+ if (abort_count)
+ *abort_count = 0;
+
+ if (cycles_count)
+ *cycles_count = 0;
+
+ return callchain_node_branch_counts_cumul(&root->node,
+ branch_count,
+ predicted_count,
+ abort_count,
+ cycles_count);
+}
+
+static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
+{
+ return scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
+}
+
+static int count_float_printf(int idx, const char *str, float value,
+ char *bf, int bfsize, float threshold)
+{
+ if (threshold != 0.0 && value < threshold)
+ return 0;
+
+ return scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
+}
+
+static int branch_to_str(char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count,
+ struct branch_type_stat *brtype_stat)
+{
+ int printed, i = 0;
+
+ printed = branch_type_str(brtype_stat, bf, bfsize);
+ if (printed)
+ i++;
+
+ if (predicted_count < branch_count) {
+ printed += count_float_printf(i++, "predicted",
+ predicted_count * 100.0 / branch_count,
+ bf + printed, bfsize - printed, 0.0);
+ }
+
+ if (abort_count) {
+ printed += count_float_printf(i++, "abort",
+ abort_count * 100.0 / branch_count,
+ bf + printed, bfsize - printed, 0.1);
+ }
+
+ if (i)
+ printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+ return printed;
+}
+
+static int branch_from_str(char *bf, int bfsize,
+ u64 branch_count,
+ u64 cycles_count, u64 iter_count,
+ u64 iter_cycles, u64 from_count)
+{
+ int printed = 0, i = 0;
+ u64 cycles, v = 0;
+
+ cycles = cycles_count / branch_count;
+ if (cycles) {
+ printed += count_pri64_printf(i++, "cycles",
+ cycles,
+ bf + printed, bfsize - printed);
+ }
+
+ if (iter_count && from_count) {
+ v = iter_count / from_count;
+ if (v) {
+ printed += count_pri64_printf(i++, "iter",
+ v, bf + printed, bfsize - printed);
+
+ printed += count_pri64_printf(i++, "avg_cycles",
+ iter_cycles / iter_count,
+ bf + printed, bfsize - printed);
+ }
+ }
+
+ if (i)
+ printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+ return printed;
+}
+
+static int counts_str_build(char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count, u64 cycles_count,
+ u64 iter_count, u64 iter_cycles,
+ u64 from_count,
+ struct branch_type_stat *brtype_stat)
+{
+ int printed;
+
+ if (branch_count == 0)
+ return scnprintf(bf, bfsize, " (calltrace)");
+
+ if (brtype_stat->branch_to) {
+ printed = branch_to_str(bf, bfsize, branch_count,
+ predicted_count, abort_count, brtype_stat);
+ } else {
+ printed = branch_from_str(bf, bfsize, branch_count,
+ cycles_count, iter_count, iter_cycles,
+ from_count);
+ }
+
+ if (!printed)
+ bf[0] = 0;
+
+ return printed;
+}
+
+static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count, u64 cycles_count,
+ u64 iter_count, u64 iter_cycles,
+ u64 from_count,
+ struct branch_type_stat *brtype_stat)
+{
+ char str[256];
+
+ counts_str_build(str, sizeof(str), branch_count,
+ predicted_count, abort_count, cycles_count,
+ iter_count, iter_cycles, from_count, brtype_stat);
+
+ if (fp)
+ return fprintf(fp, "%s", str);
+
+ return scnprintf(bf, bfsize, "%s", str);
+}
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+ FILE *fp, char *bf, int bfsize)
+{
+ u64 branch_count, predicted_count;
+ u64 abort_count, cycles_count;
+ u64 iter_count, iter_cycles;
+ u64 from_count;
+
+ branch_count = clist->branch_count;
+ predicted_count = clist->predicted_count;
+ abort_count = clist->abort_count;
+ cycles_count = clist->cycles_count;
+ iter_count = clist->iter_count;
+ iter_cycles = clist->iter_cycles;
+ from_count = clist->from_count;
+
+ return callchain_counts_printf(fp, bf, bfsize, branch_count,
+ predicted_count, abort_count,
+ cycles_count, iter_count, iter_cycles,
+ from_count, &clist->brtype_stat);
+}
+
+static void free_callchain_node(struct callchain_node *node)
+{
+ struct callchain_list *list, *tmp;
+ struct callchain_node *child;
+ struct rb_node *n;
+
+ list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
+ list_del_init(&list->list);
+ map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
+ free(list);
+ }
+
+ list_for_each_entry_safe(list, tmp, &node->val, list) {
+ list_del_init(&list->list);
+ map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
+ free(list);
+ }
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+ rb_erase(&child->rb_node_in, &node->rb_root_in);
+
+ free_callchain_node(child);
+ free(child);
+ }
+}
+
+void free_callchain(struct callchain_root *root)
+{
+ if (!symbol_conf.use_callchain)
+ return;
+
+ free_callchain_node(&root->node);
+}
+
+static u64 decay_callchain_node(struct callchain_node *node)
+{
+ struct callchain_node *child;
+ struct rb_node *n;
+ u64 child_hits = 0;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+
+ child_hits += decay_callchain_node(child);
+ n = rb_next(n);
+ }
+
+ node->hit = (node->hit * 7) / 8;
+ node->children_hit = child_hits;
+
+ return node->hit;
+}
+
+void decay_callchain(struct callchain_root *root)
+{
+ if (!symbol_conf.use_callchain)
+ return;
+
+ decay_callchain_node(&root->node);
+}
+
+int callchain_node__make_parent_list(struct callchain_node *node)
+{
+ struct callchain_node *parent = node->parent;
+ struct callchain_list *chain, *new;
+ LIST_HEAD(head);
+
+ while (parent) {
+ list_for_each_entry_reverse(chain, &parent->val, list) {
+ new = malloc(sizeof(*new));
+ if (new == NULL)
+ goto out;
+ *new = *chain;
+ new->has_children = false;
+ new->ms.map = map__get(new->ms.map);
+ list_add_tail(&new->list, &head);
+ }
+ parent = parent->parent;
+ }
+
+ list_for_each_entry_safe_reverse(chain, new, &head, list)
+ list_move_tail(&chain->list, &node->parent_val);
+
+ if (!list_empty(&node->parent_val)) {
+ chain = list_first_entry(&node->parent_val, struct callchain_list, list);
+ chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
+
+ chain = list_first_entry(&node->val, struct callchain_list, list);
+ chain->has_children = false;
+ }
+ return 0;
+
+out:
+ list_for_each_entry_safe(chain, new, &head, list) {
+ list_del_init(&chain->list);
+ map__zput(chain->ms.map);
+ maps__zput(chain->ms.maps);
+ free(chain);
+ }
+ return -ENOMEM;
+}
+
+static void callchain_cursor__delete(void *vcursor)
+{
+ struct callchain_cursor *cursor = vcursor;
+ struct callchain_cursor_node *node, *next;
+
+ callchain_cursor_reset(cursor);
+ for (node = cursor->first; node != NULL; node = next) {
+ next = node->next;
+ free(node);
+ }
+ free(cursor);
+}
+
+static void init_callchain_cursor_key(void)
+{
+ if (pthread_key_create(&callchain_cursor, callchain_cursor__delete)) {
+ pr_err("callchain cursor creation failed");
+ abort();
+ }
+}
+
+struct callchain_cursor *get_tls_callchain_cursor(void)
+{
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ struct callchain_cursor *cursor;
+
+ pthread_once(&once_control, init_callchain_cursor_key);
+ cursor = pthread_getspecific(callchain_cursor);
+ if (!cursor) {
+ cursor = zalloc(sizeof(*cursor));
+ if (!cursor)
+ pr_debug3("%s: not enough memory\n", __func__);
+ pthread_setspecific(callchain_cursor, cursor);
+ }
+ return cursor;
+}
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+ struct callchain_cursor *src)
+{
+ int rc = 0;
+
+ callchain_cursor_reset(dst);
+ callchain_cursor_commit(src);
+
+ while (true) {
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(src);
+ if (node == NULL)
+ break;
+
+ rc = callchain_cursor_append(dst, node->ip, &node->ms,
+ node->branch, &node->branch_flags,
+ node->nr_loop_iter,
+ node->iter_cycles,
+ node->branch_from, node->srcline);
+ if (rc)
+ break;
+
+ callchain_cursor_advance(src);
+ }
+
+ return rc;
+}
+
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+ struct callchain_cursor_node *node;
+
+ cursor->nr = 0;
+ cursor->last = &cursor->first;
+
+ for (node = cursor->first; node != NULL; node = node->next) {
+ map__zput(node->ms.map);
+ maps__zput(node->ms.maps);
+ }
+}
+
+void callchain_param_setup(u64 sample_type, const char *arch)
+{
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
+ callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ callchain_param.record_mode = CALLCHAIN_LBR;
+ else
+ callchain_param.record_mode = CALLCHAIN_FP;
+ }
+
+ /*
+ * It's necessary to use libunwind to reliably determine the caller of
+ * a leaf function on aarch64, as otherwise we cannot know whether to
+ * start from the LR or FP.
+ *
+ * Always starting from the LR can result in duplicate or entirely
+ * erroneous entries. Always skipping the LR and starting from the FP
+ * can result in missing entries.
+ */
+ if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
+ dwarf_callchain_users = true;
+}
+
+static bool chain_match(struct callchain_list *base_chain,
+ struct callchain_list *pair_chain)
+{
+ enum match_result match;
+
+ match = match_chain_strings(base_chain->srcline,
+ pair_chain->srcline);
+ if (match != MATCH_ERROR)
+ return match == MATCH_EQ;
+
+ match = match_chain_dso_addresses(base_chain->ms.map,
+ base_chain->ip,
+ pair_chain->ms.map,
+ pair_chain->ip);
+
+ return match == MATCH_EQ;
+}
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+ struct callchain_node *pair_cnode)
+{
+ struct callchain_list *base_chain, *pair_chain;
+ bool match = false;
+
+ pair_chain = list_first_entry(&pair_cnode->val,
+ struct callchain_list,
+ list);
+
+ list_for_each_entry(base_chain, &base_cnode->val, list) {
+ if (&pair_chain->list == &pair_cnode->val)
+ return false;
+
+ if (!base_chain->srcline || !pair_chain->srcline) {
+ pair_chain = list_next_entry(pair_chain, list);
+ continue;
+ }
+
+ match = chain_match(base_chain, pair_chain);
+ if (!match)
+ return false;
+
+ pair_chain = list_next_entry(pair_chain, list);
+ }
+
+ /*
+ * Say chain1 is ABC, chain2 is ABCD, we consider they are
+ * not fully matched.
+ */
+ if (pair_chain && (&pair_chain->list != &pair_cnode->val))
+ return false;
+
+ return match;
+}
+
+static u64 count_callchain_hits(struct hist_entry *he)
+{
+ struct rb_root *root = &he->sorted_chain;
+ struct rb_node *rb_node = rb_first(root);
+ struct callchain_node *node;
+ u64 chain_hits = 0;
+
+ while (rb_node) {
+ node = rb_entry(rb_node, struct callchain_node, rb_node);
+ chain_hits += node->hit;
+ rb_node = rb_next(rb_node);
+ }
+
+ return chain_hits;
+}
+
+u64 callchain_total_hits(struct hists *hists)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ u64 chain_hits = 0;
+
+ while (next) {
+ struct hist_entry *he = rb_entry(next, struct hist_entry,
+ rb_node);
+
+ chain_hits += count_callchain_hits(he);
+ next = rb_next(&he->rb_node);
+ }
+
+ return chain_hits;
+}
+
+s64 callchain_avg_cycles(struct callchain_node *cnode)
+{
+ struct callchain_list *chain;
+ s64 cycles = 0;
+
+ list_for_each_entry(chain, &cnode->val, list) {
+ if (chain->srcline && chain->branch_count)
+ cycles += chain->cycles_count / chain->branch_count;
+ }
+
+ return cycles;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
new file mode 100644
index 000000000..d2618a47d
--- /dev/null
+++ b/tools/perf/util/callchain.h
@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CALLCHAIN_H
+#define __PERF_CALLCHAIN_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "map_symbol.h"
+#include "branch.h"
+
+struct addr_location;
+struct evsel;
+struct ip_callchain;
+struct map;
+struct perf_sample;
+struct thread;
+struct hists;
+
+#define HELP_PAD "\t\t\t\t"
+
+#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
+
+# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
+
+#define RECORD_SIZE_HELP \
+ HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
+ HELP_PAD "\t\tdefault: 8192 (bytes)\n"
+
+#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
+
+#define CALLCHAIN_REPORT_HELP \
+ HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
+ HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
+ HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
+ HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
+ HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \
+ HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
+ HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
+
+enum perf_call_graph_mode {
+ CALLCHAIN_NONE,
+ CALLCHAIN_FP,
+ CALLCHAIN_DWARF,
+ CALLCHAIN_LBR,
+ CALLCHAIN_MAX
+};
+
+enum chain_mode {
+ CHAIN_NONE,
+ CHAIN_FLAT,
+ CHAIN_GRAPH_ABS,
+ CHAIN_GRAPH_REL,
+ CHAIN_FOLDED,
+};
+
+enum chain_order {
+ ORDER_CALLER,
+ ORDER_CALLEE
+};
+
+struct callchain_node {
+ struct callchain_node *parent;
+ struct list_head val;
+ struct list_head parent_val;
+ struct rb_node rb_node_in; /* to insert nodes in an rbtree */
+ struct rb_node rb_node; /* to sort nodes in an output tree */
+ struct rb_root rb_root_in; /* input tree of children */
+ struct rb_root rb_root; /* sorted output tree of children */
+ unsigned int val_nr;
+ unsigned int count;
+ unsigned int children_count;
+ u64 hit;
+ u64 children_hit;
+};
+
+struct callchain_root {
+ u64 max_depth;
+ struct callchain_node node;
+};
+
+struct callchain_param;
+
+typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
+ u64, struct callchain_param *);
+
+enum chain_key {
+ CCKEY_FUNCTION,
+ CCKEY_ADDRESS,
+ CCKEY_SRCLINE
+};
+
+enum chain_value {
+ CCVAL_PERCENT,
+ CCVAL_PERIOD,
+ CCVAL_COUNT,
+};
+
+extern bool dwarf_callchain_users;
+
+struct callchain_param {
+ bool enabled;
+ enum perf_call_graph_mode record_mode;
+ u32 dump_size;
+ enum chain_mode mode;
+ u16 max_stack;
+ u32 print_limit;
+ double min_percent;
+ sort_chain_func_t sort;
+ enum chain_order order;
+ bool order_set;
+ enum chain_key key;
+ bool branch_callstack;
+ enum chain_value value;
+};
+
+extern struct callchain_param callchain_param;
+extern struct callchain_param callchain_param_default;
+
+struct callchain_list {
+ u64 ip;
+ struct map_symbol ms;
+ struct /* for TUI */ {
+ bool unfolded;
+ bool has_children;
+ };
+ u64 branch_count;
+ u64 from_count;
+ u64 predicted_count;
+ u64 abort_count;
+ u64 cycles_count;
+ u64 iter_count;
+ u64 iter_cycles;
+ struct branch_type_stat brtype_stat;
+ const char *srcline;
+ struct list_head list;
+};
+
+/*
+ * A callchain cursor is a single linked list that
+ * let one feed a callchain progressively.
+ * It keeps persistent allocated entries to minimize
+ * allocations.
+ */
+struct callchain_cursor_node {
+ u64 ip;
+ struct map_symbol ms;
+ const char *srcline;
+ /* Indicate valid cursor node for LBR stitch */
+ bool valid;
+
+ bool branch;
+ struct branch_flags branch_flags;
+ u64 branch_from;
+ int nr_loop_iter;
+ u64 iter_cycles;
+ struct callchain_cursor_node *next;
+};
+
+struct stitch_list {
+ struct list_head node;
+ struct callchain_cursor_node cursor;
+};
+
+struct callchain_cursor {
+ u64 nr;
+ struct callchain_cursor_node *first;
+ struct callchain_cursor_node **last;
+ u64 pos;
+ struct callchain_cursor_node *curr;
+};
+
+static inline void callchain_init(struct callchain_root *root)
+{
+ INIT_LIST_HEAD(&root->node.val);
+ INIT_LIST_HEAD(&root->node.parent_val);
+
+ root->node.parent = NULL;
+ root->node.hit = 0;
+ root->node.children_hit = 0;
+ root->node.rb_root_in = RB_ROOT;
+ root->max_depth = 0;
+}
+
+static inline u64 callchain_cumul_hits(struct callchain_node *node)
+{
+ return node->hit + node->children_hit;
+}
+
+static inline unsigned callchain_cumul_counts(struct callchain_node *node)
+{
+ return node->count + node->children_count;
+}
+
+int callchain_register_param(struct callchain_param *param);
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period);
+
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src);
+
+void callchain_cursor_reset(struct callchain_cursor *cursor);
+
+int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
+ struct map_symbol *ms,
+ bool branch, struct branch_flags *flags,
+ int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+ const char *srcline);
+
+/* Close a cursor writing session. Initialize for the reader */
+static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
+{
+ if (cursor == NULL)
+ return;
+ cursor->curr = cursor->first;
+ cursor->pos = 0;
+}
+
+/* Cursor reading iteration helpers */
+static inline struct callchain_cursor_node *
+callchain_cursor_current(struct callchain_cursor *cursor)
+{
+ if (cursor == NULL || cursor->pos == cursor->nr)
+ return NULL;
+
+ return cursor->curr;
+}
+
+static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+}
+
+struct callchain_cursor *get_tls_callchain_cursor(void);
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+ struct callchain_cursor *src);
+
+struct option;
+struct hist_entry;
+
+int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+int record_callchain_opt(const struct option *opt, const char *arg, int unset);
+
+struct record_opts;
+
+int record_opts__parse_callchain(struct record_opts *record,
+ struct callchain_param *callchain,
+ const char *arg, bool unset);
+
+int sample__resolve_callchain(struct perf_sample *sample,
+ struct callchain_cursor *cursor, struct symbol **parent,
+ struct evsel *evsel, struct addr_location *al,
+ int max_stack);
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+ bool hide_unresolved);
+
+extern const char record_callchain_help[];
+int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
+int parse_callchain_report_opt(const char *arg);
+int parse_callchain_top_opt(const char *arg);
+int perf_callchain_config(const char *var, const char *value);
+
+static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
+ struct callchain_cursor *src)
+{
+ *dest = *src;
+
+ dest->first = src->curr;
+ dest->nr -= src->pos;
+}
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
+#else
+static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
+ struct ip_callchain *chain __maybe_unused)
+{
+ return -1;
+}
+#endif
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+ char *bf, size_t bfsize, bool show_dso);
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total);
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total);
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+ FILE *fp, char *bf, int bfsize);
+
+void free_callchain(struct callchain_root *root);
+void decay_callchain(struct callchain_root *root);
+int callchain_node__make_parent_list(struct callchain_node *node);
+
+int callchain_branch_counts(struct callchain_root *root,
+ u64 *branch_count, u64 *predicted_count,
+ u64 *abort_count, u64 *cycles_count);
+
+void callchain_param_setup(u64 sample_type, const char *arch);
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+ struct callchain_node *pair_cnode);
+
+u64 callchain_total_hits(struct hists *hists);
+
+s64 callchain_avg_cycles(struct callchain_node *cnode);
+
+#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c
new file mode 100644
index 000000000..c3ba841bb
--- /dev/null
+++ b/tools/perf/util/cap.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Capability utilities
+ */
+
+#ifdef HAVE_LIBCAP_SUPPORT
+
+#include "cap.h"
+#include <stdbool.h>
+#include <sys/capability.h>
+
+bool perf_cap__capable(cap_value_t cap)
+{
+ cap_flag_value_t val;
+ cap_t caps = cap_get_proc();
+
+ if (!caps)
+ return false;
+
+ if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val) != 0)
+ val = CAP_CLEAR;
+
+ if (cap_free(caps) != 0)
+ return false;
+
+ return val == CAP_SET;
+}
+
+#endif /* HAVE_LIBCAP_SUPPORT */
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
new file mode 100644
index 000000000..ae52878c0
--- /dev/null
+++ b/tools/perf/util/cap.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CAP_H
+#define __PERF_CAP_H
+
+#include <stdbool.h>
+#include <linux/capability.h>
+#include <linux/compiler.h>
+
+#ifdef HAVE_LIBCAP_SUPPORT
+
+#include <sys/capability.h>
+
+bool perf_cap__capable(cap_value_t cap);
+
+#else
+
+#include <unistd.h>
+#include <sys/types.h>
+
+static inline bool perf_cap__capable(int cap __maybe_unused)
+{
+ return geteuid() == 0;
+}
+
+#endif /* HAVE_LIBCAP_SUPPORT */
+
+/* For older systems */
+#ifndef CAP_SYSLOG
+#define CAP_SYSLOG 34
+#endif
+
+#ifndef CAP_PERFMON
+#define CAP_PERFMON 38
+#endif
+
+#endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
new file mode 100644
index 000000000..bfb13306d
--- /dev/null
+++ b/tools/perf/util/cgroup.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <subcmd/parse-options.h>
+#include "evsel.h"
+#include "cgroup.h"
+#include "evlist.h"
+#include "rblist.h"
+#include "metricgroup.h"
+#include "stat.h"
+#include <linux/zalloc.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <api/fs/fs.h>
+#include <ftw.h>
+#include <regex.h>
+
+int nr_cgroups;
+bool cgrp_event_expanded;
+
+/* used to match cgroup name with patterns */
+struct cgroup_name {
+ struct list_head list;
+ bool used;
+ char name[];
+};
+static LIST_HEAD(cgroup_list);
+
+static int open_cgroup(const char *name)
+{
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
+ int fd;
+
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ fprintf(stderr, "no access to cgroup %s\n", path);
+
+ return fd;
+}
+
+#ifdef HAVE_FILE_HANDLE
+int read_cgroup_id(struct cgroup *cgrp)
+{
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
+ struct {
+ struct file_handle fh;
+ uint64_t cgroup_id;
+ } handle;
+ int mount_id;
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s", mnt, cgrp->name);
+
+ handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+ if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0)
+ return -1;
+
+ cgrp->id = handle.cgroup_id;
+ return 0;
+}
+#endif /* HAVE_FILE_HANDLE */
+
+#ifndef CGROUP2_SUPER_MAGIC
+#define CGROUP2_SUPER_MAGIC 0x63677270
+#endif
+
+int cgroup_is_v2(const char *subsys)
+{
+ char mnt[PATH_MAX + 1];
+ struct statfs stbuf;
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, subsys))
+ return -1;
+
+ if (statfs(mnt, &stbuf) < 0)
+ return -1;
+
+ return (stbuf.f_type == CGROUP2_SUPER_MAGIC);
+}
+
+static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str)
+{
+ struct evsel *counter;
+ /*
+ * check if cgrp is already defined, if so we reuse it
+ */
+ evlist__for_each_entry(evlist, counter) {
+ if (!counter->cgrp)
+ continue;
+ if (!strcmp(counter->cgrp->name, str))
+ return cgroup__get(counter->cgrp);
+ }
+
+ return NULL;
+}
+
+static struct cgroup *cgroup__new(const char *name, bool do_open)
+{
+ struct cgroup *cgroup = zalloc(sizeof(*cgroup));
+
+ if (cgroup != NULL) {
+ refcount_set(&cgroup->refcnt, 1);
+
+ cgroup->name = strdup(name);
+ if (!cgroup->name)
+ goto out_err;
+
+ if (do_open) {
+ cgroup->fd = open_cgroup(name);
+ if (cgroup->fd == -1)
+ goto out_free_name;
+ } else {
+ cgroup->fd = -1;
+ }
+ }
+
+ return cgroup;
+
+out_free_name:
+ zfree(&cgroup->name);
+out_err:
+ free(cgroup);
+ return NULL;
+}
+
+struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name)
+{
+ struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
+
+ return cgroup ?: cgroup__new(name, true);
+}
+
+static int add_cgroup(struct evlist *evlist, const char *str)
+{
+ struct evsel *counter;
+ struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
+ int n;
+
+ if (!cgrp)
+ return -1;
+ /*
+ * find corresponding event
+ * if add cgroup N, then need to find event N
+ */
+ n = 0;
+ evlist__for_each_entry(evlist, counter) {
+ if (n == nr_cgroups)
+ goto found;
+ n++;
+ }
+
+ cgroup__put(cgrp);
+ return -1;
+found:
+ counter->cgrp = cgrp;
+ return 0;
+}
+
+static void cgroup__delete(struct cgroup *cgroup)
+{
+ if (cgroup->fd >= 0)
+ close(cgroup->fd);
+ zfree(&cgroup->name);
+ free(cgroup);
+}
+
+void cgroup__put(struct cgroup *cgrp)
+{
+ if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
+ cgroup__delete(cgrp);
+ }
+}
+
+struct cgroup *cgroup__get(struct cgroup *cgroup)
+{
+ if (cgroup)
+ refcount_inc(&cgroup->refcnt);
+ return cgroup;
+}
+
+static void evsel__set_default_cgroup(struct evsel *evsel, struct cgroup *cgroup)
+{
+ if (evsel->cgrp == NULL)
+ evsel->cgrp = cgroup__get(cgroup);
+}
+
+void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__set_default_cgroup(evsel, cgroup);
+}
+
+/* helper function for ftw() in match_cgroups and list_cgroups */
+static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused,
+ int typeflag, struct FTW *ftwbuf __maybe_unused)
+{
+ struct cgroup_name *cn;
+
+ if (typeflag != FTW_D)
+ return 0;
+
+ cn = malloc(sizeof(*cn) + strlen(fpath) + 1);
+ if (cn == NULL)
+ return -1;
+
+ cn->used = false;
+ strcpy(cn->name, fpath);
+
+ list_add_tail(&cn->list, &cgroup_list);
+ return 0;
+}
+
+static int check_and_add_cgroup_name(const char *fpath)
+{
+ struct cgroup_name *cn;
+
+ list_for_each_entry(cn, &cgroup_list, list) {
+ if (!strcmp(cn->name, fpath))
+ return 0;
+ }
+
+ /* pretend if it's added by ftw() */
+ return add_cgroup_name(fpath, NULL, FTW_D, NULL);
+}
+
+static void release_cgroup_list(void)
+{
+ struct cgroup_name *cn;
+
+ while (!list_empty(&cgroup_list)) {
+ cn = list_first_entry(&cgroup_list, struct cgroup_name, list);
+ list_del(&cn->list);
+ free(cn);
+ }
+}
+
+/* collect given cgroups only */
+static int list_cgroups(const char *str)
+{
+ const char *p, *e, *eos = str + strlen(str);
+ struct cgroup_name *cn;
+ char *s;
+
+ /* use given name as is when no regex is given */
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ if (e - str) {
+ int ret;
+
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+
+ ret = check_and_add_cgroup_name(s);
+ free(s);
+ if (ret < 0)
+ return -1;
+ } else {
+ if (check_and_add_cgroup_name("/") < 0)
+ return -1;
+ }
+
+ if (!p)
+ break;
+ str = p+1;
+ }
+
+ /* these groups will be used */
+ list_for_each_entry(cn, &cgroup_list, list)
+ cn->used = true;
+
+ return 0;
+}
+
+/* collect all cgroups first and then match with the pattern */
+static int match_cgroups(const char *str)
+{
+ char mnt[PATH_MAX];
+ const char *p, *e, *eos = str + strlen(str);
+ struct cgroup_name *cn;
+ regex_t reg;
+ int prefix_len;
+ char *s;
+
+ if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event"))
+ return -1;
+
+ /* cgroup_name will have a full path, skip the root directory */
+ prefix_len = strlen(mnt);
+
+ /* collect all cgroups in the cgroup_list */
+ if (nftw(mnt, add_cgroup_name, 20, 0) < 0)
+ return -1;
+
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ /* allow empty cgroups, i.e., skip */
+ if (e - str) {
+ /* termination added */
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+ if (regcomp(&reg, s, REG_NOSUB)) {
+ free(s);
+ return -1;
+ }
+
+ /* check cgroup name with the pattern */
+ list_for_each_entry(cn, &cgroup_list, list) {
+ char *name = cn->name + prefix_len;
+
+ if (name[0] == '/' && name[1])
+ name++;
+ if (!regexec(&reg, name, 0, NULL, 0))
+ cn->used = true;
+ }
+ regfree(&reg);
+ free(s);
+ } else {
+ /* first entry to root cgroup */
+ cn = list_first_entry(&cgroup_list, struct cgroup_name,
+ list);
+ cn->used = true;
+ }
+
+ if (!p)
+ break;
+ str = p+1;
+ }
+ return prefix_len;
+}
+
+int parse_cgroups(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+ struct evsel *counter;
+ struct cgroup *cgrp = NULL;
+ const char *p, *e, *eos = str + strlen(str);
+ char *s;
+ int ret, i;
+
+ if (list_empty(&evlist->core.entries)) {
+ fprintf(stderr, "must define events before cgroups\n");
+ return -1;
+ }
+
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ /* allow empty cgroups, i.e., skip */
+ if (e - str) {
+ /* termination added */
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+ ret = add_cgroup(evlist, s);
+ free(s);
+ if (ret)
+ return -1;
+ }
+ /* nr_cgroups is increased een for empty cgroups */
+ nr_cgroups++;
+ if (!p)
+ break;
+ str = p+1;
+ }
+ /* for the case one cgroup combine to multiple events */
+ i = 0;
+ if (nr_cgroups == 1) {
+ evlist__for_each_entry(evlist, counter) {
+ if (i == 0)
+ cgrp = counter->cgrp;
+ else {
+ counter->cgrp = cgrp;
+ refcount_inc(&cgrp->refcnt);
+ }
+ i++;
+ }
+ }
+ return 0;
+}
+
+static bool has_pattern_string(const char *str)
+{
+ return !!strpbrk(str, "{}[]()|*+?^$");
+}
+
+int evlist__expand_cgroup(struct evlist *evlist, const char *str,
+ struct rblist *metric_events, bool open_cgroup)
+{
+ struct evlist *orig_list, *tmp_list;
+ struct evsel *pos, *evsel, *leader;
+ struct rblist orig_metric_events;
+ struct cgroup *cgrp = NULL;
+ struct cgroup_name *cn;
+ int ret = -1;
+ int prefix_len;
+
+ if (evlist->core.nr_entries == 0) {
+ fprintf(stderr, "must define events before cgroups\n");
+ return -EINVAL;
+ }
+
+ orig_list = evlist__new();
+ tmp_list = evlist__new();
+ if (orig_list == NULL || tmp_list == NULL) {
+ fprintf(stderr, "memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ /* save original events and init evlist */
+ evlist__splice_list_tail(orig_list, &evlist->core.entries);
+ evlist->core.nr_entries = 0;
+
+ if (metric_events) {
+ orig_metric_events = *metric_events;
+ rblist__init(metric_events);
+ } else {
+ rblist__init(&orig_metric_events);
+ }
+
+ if (has_pattern_string(str))
+ prefix_len = match_cgroups(str);
+ else
+ prefix_len = list_cgroups(str);
+
+ if (prefix_len < 0)
+ goto out_err;
+
+ list_for_each_entry(cn, &cgroup_list, list) {
+ char *name;
+
+ if (!cn->used)
+ continue;
+
+ /* cgroup_name might have a full path, skip the prefix */
+ name = cn->name + prefix_len;
+ if (name[0] == '/' && name[1])
+ name++;
+ cgrp = cgroup__new(name, open_cgroup);
+ if (cgrp == NULL)
+ goto out_err;
+
+ leader = NULL;
+ evlist__for_each_entry(orig_list, pos) {
+ evsel = evsel__clone(pos);
+ if (evsel == NULL)
+ goto out_err;
+
+ cgroup__put(evsel->cgrp);
+ evsel->cgrp = cgroup__get(cgrp);
+
+ if (evsel__is_group_leader(pos))
+ leader = evsel;
+ evsel__set_leader(evsel, leader);
+
+ evlist__add(tmp_list, evsel);
+ }
+ /* cgroup__new() has a refcount, release it here */
+ cgroup__put(cgrp);
+ nr_cgroups++;
+
+ if (metric_events) {
+ if (metricgroup__copy_metric_events(tmp_list, cgrp,
+ metric_events,
+ &orig_metric_events) < 0)
+ goto out_err;
+ }
+
+ evlist__splice_list_tail(evlist, &tmp_list->core.entries);
+ tmp_list->core.nr_entries = 0;
+ }
+
+ if (list_empty(&evlist->core.entries)) {
+ fprintf(stderr, "no cgroup matched: %s\n", str);
+ goto out_err;
+ }
+
+ ret = 0;
+ cgrp_event_expanded = true;
+
+out_err:
+ evlist__delete(orig_list);
+ evlist__delete(tmp_list);
+ rblist__exit(&orig_metric_events);
+ release_cgroup_list();
+
+ return ret;
+}
+
+static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id,
+ bool create, const char *path)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct cgroup *cgrp;
+
+ while (*p != NULL) {
+ parent = *p;
+ cgrp = rb_entry(parent, struct cgroup, node);
+
+ if (cgrp->id == id)
+ return cgrp;
+
+ if (cgrp->id < id)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ if (!create)
+ return NULL;
+
+ cgrp = malloc(sizeof(*cgrp));
+ if (cgrp == NULL)
+ return NULL;
+
+ cgrp->name = strdup(path);
+ if (cgrp->name == NULL) {
+ free(cgrp);
+ return NULL;
+ }
+
+ cgrp->fd = -1;
+ cgrp->id = id;
+ refcount_set(&cgrp->refcnt, 1);
+
+ rb_link_node(&cgrp->node, parent, p);
+ rb_insert_color(&cgrp->node, root);
+
+ return cgrp;
+}
+
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+ const char *path)
+{
+ struct cgroup *cgrp;
+
+ down_write(&env->cgroups.lock);
+ cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path);
+ up_write(&env->cgroups.lock);
+ return cgrp;
+}
+
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id)
+{
+ struct cgroup *cgrp;
+
+ down_read(&env->cgroups.lock);
+ cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL);
+ up_read(&env->cgroups.lock);
+ return cgrp;
+}
+
+void perf_env__purge_cgroups(struct perf_env *env)
+{
+ struct rb_node *node;
+ struct cgroup *cgrp;
+
+ down_write(&env->cgroups.lock);
+ while (!RB_EMPTY_ROOT(&env->cgroups.tree)) {
+ node = rb_first(&env->cgroups.tree);
+ cgrp = rb_entry(node, struct cgroup, node);
+
+ rb_erase(node, &env->cgroups.tree);
+ cgroup__put(cgrp);
+ }
+ up_write(&env->cgroups.lock);
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
new file mode 100644
index 000000000..12256b786
--- /dev/null
+++ b/tools/perf/util/cgroup.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CGROUP_H__
+#define __CGROUP_H__
+
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include "util/env.h"
+
+struct option;
+
+struct cgroup {
+ struct rb_node node;
+ u64 id;
+ char *name;
+ int fd;
+ refcount_t refcnt;
+};
+
+extern int nr_cgroups; /* number of explicit cgroups defined */
+extern bool cgrp_event_expanded;
+
+struct cgroup *cgroup__get(struct cgroup *cgroup);
+void cgroup__put(struct cgroup *cgroup);
+
+struct evlist;
+struct rblist;
+
+struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name);
+int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups,
+ struct rblist *metric_events, bool open_cgroup);
+
+void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
+
+int parse_cgroups(const struct option *opt, const char *str, int unset);
+
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+ const char *path);
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id);
+
+void perf_env__purge_cgroups(struct perf_env *env);
+
+#ifdef HAVE_FILE_HANDLE
+int read_cgroup_id(struct cgroup *cgrp);
+#else
+static inline int read_cgroup_id(struct cgroup *cgrp __maybe_unused)
+{
+ return -1;
+}
+#endif /* HAVE_FILE_HANDLE */
+
+int cgroup_is_v2(const char *subsys);
+
+#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c
new file mode 100644
index 000000000..74365a5d9
--- /dev/null
+++ b/tools/perf/util/clockid.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <time.h>
+#include <strings.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "clockid.h"
+#include "record.h"
+
+struct clockid_map {
+ const char *name;
+ int clockid;
+};
+
+#define CLOCKID_MAP(n, c) \
+ { .name = n, .clockid = (c), }
+
+#define CLOCKID_END { .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+ /* available for all events, NMI safe */
+ CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+ CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+ /* available for some events */
+ CLOCKID_MAP("realtime", CLOCK_REALTIME),
+ CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+ CLOCKID_MAP("tai", CLOCK_TAI),
+
+ /* available for the lazy */
+ CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+ CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+ CLOCKID_MAP("real", CLOCK_REALTIME),
+ CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+ CLOCKID_END,
+};
+
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+ struct timespec res;
+
+ *res_ns = 0;
+ if (!clock_getres(clk_id, &res))
+ *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+ else
+ pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+ return 0;
+}
+
+int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+ const struct clockid_map *cm;
+ const char *ostr = str;
+
+ if (unset) {
+ opts->use_clockid = 0;
+ return 0;
+ }
+
+ /* no arg passed */
+ if (!str)
+ return 0;
+
+ /* no setting it twice */
+ if (opts->use_clockid)
+ return -1;
+
+ opts->use_clockid = true;
+
+ /* if its a number, we're done */
+ if (sscanf(str, "%d", &opts->clockid) == 1)
+ return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
+
+ /* allow a "CLOCK_" prefix to the name */
+ if (!strncasecmp(str, "CLOCK_", 6))
+ str += 6;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (!strcasecmp(str, cm->name)) {
+ opts->clockid = cm->clockid;
+ return get_clockid_res(opts->clockid,
+ &opts->clockid_res_ns);
+ }
+ }
+
+ opts->use_clockid = false;
+ ui__warning("unknown clockid %s, check man page\n", ostr);
+ return -1;
+}
+
+const char *clockid_name(clockid_t clk_id)
+{
+ const struct clockid_map *cm;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (cm->clockid == clk_id)
+ return cm->name;
+ }
+ return "(not found)";
+}
diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h
new file mode 100644
index 000000000..9b49b4711
--- /dev/null
+++ b/tools/perf/util/clockid.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PERF_CLOCKID_H
+#define __PERF_CLOCKID_H
+
+struct option;
+int parse_clockid(const struct option *opt, const char *str, int unset);
+
+const char *clockid_name(clockid_t clk_id);
+
+#endif
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
new file mode 100644
index 000000000..8830604c3
--- /dev/null
+++ b/tools/perf/util/cloexec.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <sched.h>
+#include "util.h" // for sched_getcpu()
+#include "../perf-sys.h"
+#include "cloexec.h"
+#include "event.h"
+#include "asm/bug.h"
+#include "debug.h"
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/string.h>
+
+static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
+
+static int perf_flag_probe(void)
+{
+ /* use 'safest' configuration as used in evsel__fallback() */
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .exclude_kernel = 1,
+ };
+ int fd;
+ int err;
+ int cpu;
+ pid_t pid = -1;
+ char sbuf[STRERR_BUFSIZE];
+
+ cpu = sched_getcpu();
+ if (cpu < 0)
+ cpu = 0;
+
+ /*
+ * Using -1 for the pid is a workaround to avoid gratuitous jump label
+ * changes.
+ */
+ while (1) {
+ /* check cloexec flag */
+ fd = sys_perf_event_open(&attr, pid, cpu, -1,
+ PERF_FLAG_FD_CLOEXEC);
+ if (fd < 0 && pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ break;
+ }
+ err = errno;
+
+ if (fd >= 0) {
+ close(fd);
+ return 1;
+ }
+
+ WARN_ONCE(err != EINVAL && err != EBUSY && err != EACCES,
+ "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
+ err, str_error_r(err, sbuf, sizeof(sbuf)));
+
+ /* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
+ while (1) {
+ fd = sys_perf_event_open(&attr, pid, cpu, -1, 0);
+ if (fd < 0 && pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ break;
+ }
+ err = errno;
+
+ if (fd >= 0)
+ close(fd);
+
+ if (WARN_ONCE(fd < 0 && err != EBUSY && err != EACCES,
+ "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
+ err, str_error_r(err, sbuf, sizeof(sbuf))))
+ return -1;
+
+ return 0;
+}
+
+unsigned long perf_event_open_cloexec_flag(void)
+{
+ static bool probed;
+
+ if (!probed) {
+ if (perf_flag_probe() <= 0)
+ flag = 0;
+ probed = true;
+ }
+
+ return flag;
+}
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
new file mode 100644
index 000000000..78216b101
--- /dev/null
+++ b/tools/perf/util/cloexec.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CLOEXEC_H
+#define __PERF_CLOEXEC_H
+
+unsigned long perf_event_open_cloexec_flag(void);
+
+#endif /* __PERF_CLOEXEC_H */
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
new file mode 100644
index 000000000..bffbdd216
--- /dev/null
+++ b/tools/perf/util/color.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <subcmd/pager.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "color.h"
+#include <math.h>
+#include <unistd.h>
+
+int perf_use_color_default = -1;
+
+static int __color_vsnprintf(char *bf, size_t size, const char *color,
+ const char *fmt, va_list args, const char *trail)
+{
+ int r = 0;
+
+ /*
+ * Auto-detect:
+ */
+ if (perf_use_color_default < 0) {
+ if (isatty(1) || pager_in_use())
+ perf_use_color_default = 1;
+ else
+ perf_use_color_default = 0;
+ }
+
+ if (perf_use_color_default && *color)
+ r += scnprintf(bf, size, "%s", color);
+ r += vscnprintf(bf + r, size - r, fmt, args);
+ if (perf_use_color_default && *color)
+ r += scnprintf(bf + r, size - r, "%s", PERF_COLOR_RESET);
+ if (trail)
+ r += scnprintf(bf + r, size - r, "%s", trail);
+ return r;
+}
+
+/* Colors are not included in return value */
+static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
+ va_list args)
+{
+ int r = 0;
+
+ /*
+ * Auto-detect:
+ */
+ if (perf_use_color_default < 0) {
+ if (isatty(fileno(fp)) || pager_in_use())
+ perf_use_color_default = 1;
+ else
+ perf_use_color_default = 0;
+ }
+
+ if (perf_use_color_default && *color)
+ fprintf(fp, "%s", color);
+ r += vfprintf(fp, fmt, args);
+ if (perf_use_color_default && *color)
+ fprintf(fp, "%s", PERF_COLOR_RESET);
+ return r;
+}
+
+int color_vsnprintf(char *bf, size_t size, const char *color,
+ const char *fmt, va_list args)
+{
+ return __color_vsnprintf(bf, size, color, fmt, args, NULL);
+}
+
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args)
+{
+ return __color_vfprintf(fp, color, fmt, args);
+}
+
+int color_snprintf(char *bf, size_t size, const char *color,
+ const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ va_start(args, fmt);
+ r = color_vsnprintf(bf, size, color, fmt, args);
+ va_end(args);
+ return r;
+}
+
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ va_start(args, fmt);
+ r = color_vfprintf(fp, color, fmt, args);
+ va_end(args);
+ return r;
+}
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+ size_t count, const char *buf)
+{
+ if (!*color)
+ return fwrite(buf, count, 1, fp) != 1;
+
+ while (count) {
+ char *p = memchr(buf, '\n', count);
+
+ if (p != buf && (fputs(color, fp) < 0 ||
+ fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 ||
+ fputs(PERF_COLOR_RESET, fp) < 0))
+ return -1;
+ if (!p)
+ return 0;
+ if (fputc('\n', fp) < 0)
+ return -1;
+ count -= p + 1 - buf;
+ buf = p + 1;
+ }
+ return 0;
+}
+
+const char *get_percent_color(double percent)
+{
+ const char *color = PERF_COLOR_NORMAL;
+
+ /*
+ * We color high-overhead entries in red, mid-overhead
+ * entries in green - and keep the low overhead places
+ * normal:
+ */
+ if (fabs(percent) >= MIN_RED)
+ color = PERF_COLOR_RED;
+ else {
+ if (fabs(percent) > MIN_GREEN)
+ color = PERF_COLOR_GREEN;
+ }
+ return color;
+}
+
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent)
+{
+ int r;
+ const char *color;
+
+ color = get_percent_color(percent);
+ r = color_fprintf(fp, color, fmt, percent);
+
+ return r;
+}
+
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value)
+{
+ const char *color = get_percent_color(value);
+ return color_snprintf(bf, size, color, fmt, value);
+}
+
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ double percent;
+
+ va_start(args, fmt);
+ percent = va_arg(args, double);
+ va_end(args);
+ return value_color_snprintf(bf, size, fmt, percent);
+}
+
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+ double percent;
+ const char *color;
+
+ va_start(args, fmt);
+ len = va_arg(args, int);
+ percent = va_arg(args, double);
+ va_end(args);
+
+ color = get_percent_color(percent);
+ return color_snprintf(bf, size, color, fmt, len, percent);
+}
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
new file mode 100644
index 000000000..01f7bed21
--- /dev/null
+++ b/tools/perf/util/color.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COLOR_H
+#define __PERF_COLOR_H
+
+#include <stdio.h>
+#include <stdarg.h>
+
+/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
+#define COLOR_MAXLEN 24
+
+#define PERF_COLOR_NORMAL ""
+#define PERF_COLOR_RESET "\033[m"
+#define PERF_COLOR_BOLD "\033[1m"
+#define PERF_COLOR_RED "\033[31m"
+#define PERF_COLOR_GREEN "\033[32m"
+#define PERF_COLOR_YELLOW "\033[33m"
+#define PERF_COLOR_BLUE "\033[34m"
+#define PERF_COLOR_MAGENTA "\033[35m"
+#define PERF_COLOR_CYAN "\033[36m"
+#define PERF_COLOR_BG_RED "\033[41m"
+
+#define MIN_GREEN 0.5
+#define MIN_RED 5.0
+
+/*
+ * This variable stores the value of color.ui
+ */
+extern int perf_use_color_default;
+
+
+/*
+ * Use this instead of perf_default_config if you need the value of color.ui.
+ */
+int perf_color_default_config(const char *var, const char *value, void *cb);
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
+int color_vsnprintf(char *bf, size_t size, const char *color,
+ const char *fmt, va_list args);
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
+int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
+const char *get_percent_color(double percent);
+
+#endif /* __PERF_COLOR_H */
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c
new file mode 100644
index 000000000..dc09ba7cb
--- /dev/null
+++ b/tools/perf/util/color_config.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <subcmd/pager.h>
+#include <string.h>
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include "color.h"
+#include <math.h>
+#include <unistd.h>
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+ if (value) {
+ if (!strcasecmp(value, "never"))
+ return 0;
+ if (!strcasecmp(value, "always"))
+ return 1;
+ if (!strcasecmp(value, "auto"))
+ goto auto_color;
+ }
+
+ /* Missing or explicit false to turn off colorization */
+ if (!perf_config_bool(var, value))
+ return 0;
+
+ /* any normal truth value defaults to 'auto' */
+ auto_color:
+ if (stdout_is_tty < 0)
+ stdout_is_tty = isatty(1);
+ if (stdout_is_tty || pager_in_use()) {
+ char *term = getenv("TERM");
+ if (term && strcmp(term, "dumb"))
+ return 1;
+ }
+ return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value,
+ void *cb __maybe_unused)
+{
+ if (!strcmp(var, "color.ui")) {
+ perf_use_color_default = perf_config_colorbool(var, value, -1);
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
new file mode 100644
index 000000000..afb8d4fd2
--- /dev/null
+++ b/tools/perf/util/comm.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "comm.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <linux/zalloc.h>
+#include "rwsem.h"
+
+struct comm_str {
+ char *str;
+ struct rb_node rb_node;
+ refcount_t refcnt;
+};
+
+/* Should perhaps be moved to struct machine */
+static struct rb_root comm_str_root;
+static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
+
+static struct comm_str *comm_str__get(struct comm_str *cs)
+{
+ if (cs && refcount_inc_not_zero(&cs->refcnt))
+ return cs;
+
+ return NULL;
+}
+
+static void comm_str__put(struct comm_str *cs)
+{
+ if (cs && refcount_dec_and_test(&cs->refcnt)) {
+ down_write(&comm_str_lock);
+ rb_erase(&cs->rb_node, &comm_str_root);
+ up_write(&comm_str_lock);
+ zfree(&cs->str);
+ free(cs);
+ }
+}
+
+static struct comm_str *comm_str__alloc(const char *str)
+{
+ struct comm_str *cs;
+
+ cs = zalloc(sizeof(*cs));
+ if (!cs)
+ return NULL;
+
+ cs->str = strdup(str);
+ if (!cs->str) {
+ free(cs);
+ return NULL;
+ }
+
+ refcount_set(&cs->refcnt, 1);
+
+ return cs;
+}
+
+static
+struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct comm_str *iter, *new;
+ int cmp;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct comm_str, rb_node);
+
+ /*
+ * If we race with comm_str__put, iter->refcnt is 0
+ * and it will be removed within comm_str__put call
+ * shortly, ignore it in this search.
+ */
+ cmp = strcmp(str, iter->str);
+ if (!cmp && comm_str__get(iter))
+ return iter;
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ new = comm_str__alloc(str);
+ if (!new)
+ return NULL;
+
+ rb_link_node(&new->rb_node, parent, p);
+ rb_insert_color(&new->rb_node, root);
+
+ return new;
+}
+
+static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+{
+ struct comm_str *cs;
+
+ down_write(&comm_str_lock);
+ cs = __comm_str__findnew(str, root);
+ up_write(&comm_str_lock);
+
+ return cs;
+}
+
+struct comm *comm__new(const char *str, u64 timestamp, bool exec)
+{
+ struct comm *comm = zalloc(sizeof(*comm));
+
+ if (!comm)
+ return NULL;
+
+ comm->start = timestamp;
+ comm->exec = exec;
+
+ comm->comm_str = comm_str__findnew(str, &comm_str_root);
+ if (!comm->comm_str) {
+ free(comm);
+ return NULL;
+ }
+
+ return comm;
+}
+
+int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec)
+{
+ struct comm_str *new, *old = comm->comm_str;
+
+ new = comm_str__findnew(str, &comm_str_root);
+ if (!new)
+ return -ENOMEM;
+
+ comm_str__put(old);
+ comm->comm_str = new;
+ comm->start = timestamp;
+ if (exec)
+ comm->exec = true;
+
+ return 0;
+}
+
+void comm__free(struct comm *comm)
+{
+ comm_str__put(comm->comm_str);
+ free(comm);
+}
+
+const char *comm__str(const struct comm *comm)
+{
+ return comm->comm_str->str;
+}
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
new file mode 100644
index 000000000..f35d8fbfa
--- /dev/null
+++ b/tools/perf/util/comm.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COMM_H
+#define __PERF_COMM_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <stdbool.h>
+
+struct comm_str;
+
+struct comm {
+ struct comm_str *comm_str;
+ u64 start;
+ struct list_head list;
+ bool exec;
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+};
+
+void comm__free(struct comm *comm);
+struct comm *comm__new(const char *str, u64 timestamp, bool exec);
+const char *comm__str(const struct comm *comm);
+int comm__override(struct comm *comm, const char *str, u64 timestamp,
+ bool exec);
+
+#endif /* __PERF_COMM_H */
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
new file mode 100644
index 000000000..0cd3369af
--- /dev/null
+++ b/tools/perf/util/compress.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_COMPRESS_H
+#define PERF_COMPRESS_H
+
+#include <stdbool.h>
+#ifdef HAVE_ZSTD_SUPPORT
+#include <zstd.h>
+#endif
+
+#ifdef HAVE_ZLIB_SUPPORT
+int gzip_decompress_to_file(const char *input, int output_fd);
+bool gzip_is_compressed(const char *input);
+#endif
+
+#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_to_file(const char *input, int output_fd);
+bool lzma_is_compressed(const char *input);
+#endif
+
+struct zstd_data {
+#ifdef HAVE_ZSTD_SUPPORT
+ ZSTD_CStream *cstream;
+ ZSTD_DStream *dstream;
+#endif
+};
+
+#ifdef HAVE_ZSTD_SUPPORT
+
+int zstd_init(struct zstd_data *data, int level);
+int zstd_fini(struct zstd_data *data);
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
+ void *src, size_t src_size, size_t max_record_size,
+ size_t process_header(void *record, size_t increment));
+
+size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
+ void *dst, size_t dst_size);
+#else /* !HAVE_ZSTD_SUPPORT */
+
+static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
+{
+ return 0;
+}
+
+static inline int zstd_fini(struct zstd_data *data __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
+ void *dst __maybe_unused, size_t dst_size __maybe_unused,
+ void *src __maybe_unused, size_t src_size __maybe_unused,
+ size_t max_record_size __maybe_unused,
+ size_t process_header(void *record, size_t increment) __maybe_unused)
+{
+ return 0;
+}
+
+static inline size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused,
+ size_t src_size __maybe_unused, void *dst __maybe_unused,
+ size_t dst_size __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
new file mode 100644
index 000000000..7a650de0d
--- /dev/null
+++ b/tools/perf/util/config.c
@@ -0,0 +1,941 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * config.c
+ *
+ * Helper functions for parsing config items.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include <errno.h>
+#include <sys/param.h>
+#include "cache.h"
+#include "callchain.h"
+#include <subcmd/exec-cmd.h>
+#include "util/event.h" /* proc_map_timeout */
+#include "util/hist.h" /* perf_hist_config */
+#include "util/stat.h" /* perf_stat__set_big_num */
+#include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */
+#include "util/srcline.h" /* addr2line_timeout_ms */
+#include "build-id.h"
+#include "debug.h"
+#include "config.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/ctype.h>
+
+#define MAXNAME (256)
+
+#define DEBUG_CACHE_DIR ".debug"
+
+
+char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+static struct perf_config_set *config_set;
+
+const char *config_exclusive_filename;
+
+static int get_next_char(void)
+{
+ int c;
+ FILE *f;
+
+ c = '\n';
+ if ((f = config_file) != NULL) {
+ c = fgetc(f);
+ if (c == '\r') {
+ /* DOS like systems */
+ c = fgetc(f);
+ if (c != '\n') {
+ ungetc(c, f);
+ c = '\r';
+ }
+ }
+ if (c == '\n')
+ config_linenr++;
+ if (c == EOF) {
+ config_file_eof = 1;
+ c = '\n';
+ }
+ }
+ return c;
+}
+
+static char *parse_value(void)
+{
+ static char value[1024];
+ int quote = 0, comment = 0, space = 0;
+ size_t len = 0;
+
+ for (;;) {
+ int c = get_next_char();
+
+ if (len >= sizeof(value) - 1)
+ return NULL;
+ if (c == '\n') {
+ if (quote)
+ return NULL;
+ value[len] = 0;
+ return value;
+ }
+ if (comment)
+ continue;
+ if (isspace(c) && !quote) {
+ space = 1;
+ continue;
+ }
+ if (!quote) {
+ if (c == ';' || c == '#') {
+ comment = 1;
+ continue;
+ }
+ }
+ if (space) {
+ if (len)
+ value[len++] = ' ';
+ space = 0;
+ }
+ if (c == '\\') {
+ c = get_next_char();
+ switch (c) {
+ case '\n':
+ continue;
+ case 't':
+ c = '\t';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ /* Some characters escape as themselves */
+ case '\\': case '"':
+ break;
+ /* Reject unknown escape sequences */
+ default:
+ return NULL;
+ }
+ value[len++] = c;
+ continue;
+ }
+ if (c == '"') {
+ quote = 1-quote;
+ continue;
+ }
+ value[len++] = c;
+ }
+}
+
+static inline int iskeychar(int c)
+{
+ return isalnum(c) || c == '-' || c == '_';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+ int c;
+ char *value;
+
+ /* Get the full name */
+ for (;;) {
+ c = get_next_char();
+ if (config_file_eof)
+ break;
+ if (!iskeychar(c))
+ break;
+ name[len++] = c;
+ if (len >= MAXNAME)
+ return -1;
+ }
+ name[len] = 0;
+ while (c == ' ' || c == '\t')
+ c = get_next_char();
+
+ value = NULL;
+ if (c != '\n') {
+ if (c != '=')
+ return -1;
+ value = parse_value();
+ if (!value)
+ return -1;
+ }
+ return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+ do {
+ if (c == '\n')
+ return -1;
+ c = get_next_char();
+ } while (isspace(c));
+
+ /* We require the format to be '[base "extension"]' */
+ if (c != '"')
+ return -1;
+ name[baselen++] = '.';
+
+ for (;;) {
+ int ch = get_next_char();
+
+ if (ch == '\n')
+ return -1;
+ if (ch == '"')
+ break;
+ if (ch == '\\') {
+ ch = get_next_char();
+ if (ch == '\n')
+ return -1;
+ }
+ name[baselen++] = ch;
+ if (baselen > MAXNAME / 2)
+ return -1;
+ }
+
+ /* Final ']' */
+ if (get_next_char() != ']')
+ return -1;
+ return baselen;
+}
+
+static int get_base_var(char *name)
+{
+ int baselen = 0;
+
+ for (;;) {
+ int c = get_next_char();
+ if (config_file_eof)
+ return -1;
+ if (c == ']')
+ return baselen;
+ if (isspace(c))
+ return get_extended_base_var(name, baselen, c);
+ if (!iskeychar(c) && c != '.')
+ return -1;
+ if (baselen > MAXNAME / 2)
+ return -1;
+ name[baselen++] = tolower(c);
+ }
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+ int comment = 0;
+ int baselen = 0;
+ static char var[MAXNAME];
+
+ /* U+FEFF Byte Order Mark in UTF8 */
+ static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+ const unsigned char *bomptr = utf8_bom;
+
+ for (;;) {
+ int line, c = get_next_char();
+
+ if (bomptr && *bomptr) {
+ /* We are at the file beginning; skip UTF8-encoded BOM
+ * if present. Sane editors won't put this in on their
+ * own, but e.g. Windows Notepad will do it happily. */
+ if ((unsigned char) c == *bomptr) {
+ bomptr++;
+ continue;
+ } else {
+ /* Do not tolerate partial BOM. */
+ if (bomptr != utf8_bom)
+ break;
+ /* No BOM at file beginning. Cool. */
+ bomptr = NULL;
+ }
+ }
+ if (c == '\n') {
+ if (config_file_eof)
+ return 0;
+ comment = 0;
+ continue;
+ }
+ if (comment || isspace(c))
+ continue;
+ if (c == '#' || c == ';') {
+ comment = 1;
+ continue;
+ }
+ if (c == '[') {
+ baselen = get_base_var(var);
+ if (baselen <= 0)
+ break;
+ var[baselen++] = '.';
+ var[baselen] = 0;
+ continue;
+ }
+ if (!isalpha(c))
+ break;
+ var[baselen] = tolower(c);
+
+ /*
+ * The get_value function might or might not reach the '\n',
+ * so saving the current line number for error reporting.
+ */
+ line = config_linenr;
+ if (get_value(fn, data, var, baselen+1) < 0) {
+ config_linenr = line;
+ break;
+ }
+ }
+ pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+ return -1;
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+ if (!*end)
+ return 1;
+ else if (!strcasecmp(end, "k")) {
+ *val *= 1024;
+ return 1;
+ }
+ else if (!strcasecmp(end, "m")) {
+ *val *= 1024 * 1024;
+ return 1;
+ }
+ else if (!strcasecmp(end, "g")) {
+ *val *= 1024 * 1024 * 1024;
+ return 1;
+ }
+ return 0;
+}
+
+static int perf_parse_llong(const char *value, long long *ret)
+{
+ if (value && *value) {
+ char *end;
+ long long val = strtoll(value, &end, 0);
+ unsigned long factor = 1;
+
+ if (!parse_unit_factor(end, &factor))
+ return 0;
+ *ret = val * factor;
+ return 1;
+ }
+ return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+ if (value && *value) {
+ char *end;
+ long val = strtol(value, &end, 0);
+ unsigned long factor = 1;
+ if (!parse_unit_factor(end, &factor))
+ return 0;
+ *ret = val * factor;
+ return 1;
+ }
+ return 0;
+}
+
+static void bad_config(const char *name)
+{
+ if (config_file_name)
+ pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name);
+ else
+ pr_warning("bad config value for '%s', ignoring...\n", name);
+}
+
+int perf_config_u64(u64 *dest, const char *name, const char *value)
+{
+ long long ret = 0;
+
+ if (!perf_parse_llong(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+
+ *dest = ret;
+ return 0;
+}
+
+int perf_config_int(int *dest, const char *name, const char *value)
+{
+ long ret = 0;
+ if (!perf_parse_long(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+ *dest = ret;
+ return 0;
+}
+
+int perf_config_u8(u8 *dest, const char *name, const char *value)
+{
+ long ret = 0;
+
+ if (!perf_parse_long(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+ *dest = ret;
+ return 0;
+}
+
+static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+ int ret;
+
+ *is_bool = 1;
+ if (!value)
+ return 1;
+ if (!*value)
+ return 0;
+ if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+ return 1;
+ if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+ return 0;
+ *is_bool = 0;
+ return perf_config_int(&ret, name, value) < 0 ? -1 : ret;
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+ int discard;
+ return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+static const char *perf_config_dirname(const char *name, const char *value)
+{
+ if (!name)
+ return NULL;
+ return value;
+}
+
+static int perf_buildid_config(const char *var, const char *value)
+{
+ /* same dir for all commands */
+ if (!strcmp(var, "buildid.dir")) {
+ const char *dir = perf_config_dirname(var, value);
+
+ if (!dir) {
+ pr_err("Invalid buildid directory!\n");
+ return -1;
+ }
+ strncpy(buildid_dir, dir, MAXPATHLEN-1);
+ buildid_dir[MAXPATHLEN-1] = '\0';
+ }
+
+ return 0;
+}
+
+static int perf_default_core_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "core.proc-map-timeout"))
+ proc_map_timeout = strtoul(value, NULL, 10);
+
+ if (!strcmp(var, "core.addr2line-timeout"))
+ addr2line_timeout_ms = strtoul(value, NULL, 10);
+
+ /* Add other config variables here. */
+ return 0;
+}
+
+static int perf_ui_config(const char *var, const char *value)
+{
+ /* Add other config variables here. */
+ if (!strcmp(var, "ui.show-headers"))
+ symbol_conf.show_hist_headers = perf_config_bool(var, value);
+
+ return 0;
+}
+
+static int perf_stat_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "stat.big-num"))
+ perf_stat__set_big_num(perf_config_bool(var, value));
+
+ if (!strcmp(var, "stat.no-csv-summary"))
+ perf_stat__set_no_csv_summary(perf_config_bool(var, value));
+
+ if (!strcmp(var, "stat.bpf-counter-events"))
+ evsel__bpf_counter_events = strdup(value);
+
+ /* Add other config variables here. */
+ return 0;
+}
+
+int perf_default_config(const char *var, const char *value,
+ void *dummy __maybe_unused)
+{
+ if (strstarts(var, "core."))
+ return perf_default_core_config(var, value);
+
+ if (strstarts(var, "hist."))
+ return perf_hist_config(var, value);
+
+ if (strstarts(var, "ui."))
+ return perf_ui_config(var, value);
+
+ if (strstarts(var, "call-graph."))
+ return perf_callchain_config(var, value);
+
+ if (strstarts(var, "buildid."))
+ return perf_buildid_config(var, value);
+
+ if (strstarts(var, "stat."))
+ return perf_stat_config(var, value);
+
+ /* Add other config variables here. */
+ return 0;
+}
+
+static int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+ int ret;
+ FILE *f = fopen(filename, "r");
+
+ ret = -1;
+ if (f) {
+ config_file = f;
+ config_file_name = filename;
+ config_linenr = 1;
+ config_file_eof = 0;
+ ret = perf_parse_file(fn, data);
+ fclose(f);
+ config_file_name = NULL;
+ }
+ return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+ static const char *system_wide;
+ if (!system_wide)
+ system_wide = system_path(ETC_PERFCONFIG);
+ return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+ const char *v = getenv(k);
+ return v ? perf_config_bool(k, v) : def;
+}
+
+int perf_config_system(void)
+{
+ return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+int perf_config_global(void)
+{
+ return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+static char *home_perfconfig(void)
+{
+ const char *home = NULL;
+ char *config;
+ struct stat st;
+ char path[PATH_MAX];
+
+ home = getenv("HOME");
+
+ /*
+ * Skip reading user config if:
+ * - there is no place to read it from (HOME)
+ * - we are asked not to (PERF_CONFIG_NOGLOBAL=1)
+ */
+ if (!home || !*home || !perf_config_global())
+ return NULL;
+
+ config = strdup(mkpath(path, sizeof(path), "%s/.perfconfig", home));
+ if (config == NULL) {
+ pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home);
+ return NULL;
+ }
+
+ if (stat(config, &st) < 0)
+ goto out_free;
+
+ if (st.st_uid && (st.st_uid != geteuid())) {
+ pr_warning("File %s not owned by current user or root, ignoring it.\n", config);
+ goto out_free;
+ }
+
+ if (st.st_size)
+ return config;
+
+out_free:
+ free(config);
+ return NULL;
+}
+
+const char *perf_home_perfconfig(void)
+{
+ static const char *config;
+ static bool failed;
+
+ if (failed || config)
+ return config;
+
+ config = home_perfconfig();
+ if (!config)
+ failed = true;
+
+ return config;
+}
+
+static struct perf_config_section *find_section(struct list_head *sections,
+ const char *section_name)
+{
+ struct perf_config_section *section;
+
+ list_for_each_entry(section, sections, node)
+ if (!strcmp(section->name, section_name))
+ return section;
+
+ return NULL;
+}
+
+static struct perf_config_item *find_config_item(const char *name,
+ struct perf_config_section *section)
+{
+ struct perf_config_item *item;
+
+ list_for_each_entry(item, &section->items, node)
+ if (!strcmp(item->name, name))
+ return item;
+
+ return NULL;
+}
+
+static struct perf_config_section *add_section(struct list_head *sections,
+ const char *section_name)
+{
+ struct perf_config_section *section = zalloc(sizeof(*section));
+
+ if (!section)
+ return NULL;
+
+ INIT_LIST_HEAD(&section->items);
+ section->name = strdup(section_name);
+ if (!section->name) {
+ pr_debug("%s: strdup failed\n", __func__);
+ free(section);
+ return NULL;
+ }
+
+ list_add_tail(&section->node, sections);
+ return section;
+}
+
+static struct perf_config_item *add_config_item(struct perf_config_section *section,
+ const char *name)
+{
+ struct perf_config_item *item = zalloc(sizeof(*item));
+
+ if (!item)
+ return NULL;
+
+ item->name = strdup(name);
+ if (!item->name) {
+ pr_debug("%s: strdup failed\n", __func__);
+ free(item);
+ return NULL;
+ }
+
+ list_add_tail(&item->node, &section->items);
+ return item;
+}
+
+static int set_value(struct perf_config_item *item, const char *value)
+{
+ char *val = strdup(value);
+
+ if (!val)
+ return -1;
+
+ zfree(&item->value);
+ item->value = val;
+ return 0;
+}
+
+static int collect_config(const char *var, const char *value,
+ void *perf_config_set)
+{
+ int ret = -1;
+ char *ptr, *key;
+ char *section_name, *name;
+ struct perf_config_section *section = NULL;
+ struct perf_config_item *item = NULL;
+ struct perf_config_set *set = perf_config_set;
+ struct list_head *sections;
+
+ if (set == NULL)
+ return -1;
+
+ sections = &set->sections;
+ key = ptr = strdup(var);
+ if (!key) {
+ pr_debug("%s: strdup failed\n", __func__);
+ return -1;
+ }
+
+ section_name = strsep(&ptr, ".");
+ name = ptr;
+ if (name == NULL || value == NULL)
+ goto out_free;
+
+ section = find_section(sections, section_name);
+ if (!section) {
+ section = add_section(sections, section_name);
+ if (!section)
+ goto out_free;
+ }
+
+ item = find_config_item(name, section);
+ if (!item) {
+ item = add_config_item(section, name);
+ if (!item)
+ goto out_free;
+ }
+
+ /* perf_config_set can contain both user and system config items.
+ * So we should know where each value is from.
+ * The classification would be needed when a particular config file
+ * is overwritten by setting feature i.e. set_config().
+ */
+ if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
+ section->from_system_config = true;
+ item->from_system_config = true;
+ } else {
+ section->from_system_config = false;
+ item->from_system_config = false;
+ }
+
+ ret = set_value(item, value);
+
+out_free:
+ free(key);
+ return ret;
+}
+
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+ const char *var, const char *value)
+{
+ config_file_name = file_name;
+ return collect_config(var, value, set);
+}
+
+static int perf_config_set__init(struct perf_config_set *set)
+{
+ int ret = -1;
+
+ /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+ if (config_exclusive_filename)
+ return perf_config_from_file(collect_config, config_exclusive_filename, set);
+ if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+ if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+ goto out;
+ }
+ if (perf_config_global() && perf_home_perfconfig()) {
+ if (perf_config_from_file(collect_config, perf_home_perfconfig(), set) < 0)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+struct perf_config_set *perf_config_set__new(void)
+{
+ struct perf_config_set *set = zalloc(sizeof(*set));
+
+ if (set) {
+ INIT_LIST_HEAD(&set->sections);
+ perf_config_set__init(set);
+ }
+
+ return set;
+}
+
+struct perf_config_set *perf_config_set__load_file(const char *file)
+{
+ struct perf_config_set *set = zalloc(sizeof(*set));
+
+ if (set) {
+ INIT_LIST_HEAD(&set->sections);
+ perf_config_from_file(collect_config, file, set);
+ }
+
+ return set;
+}
+
+static int perf_config__init(void)
+{
+ if (config_set == NULL)
+ config_set = perf_config_set__new();
+
+ return config_set == NULL;
+}
+
+int perf_config_set(struct perf_config_set *set,
+ config_fn_t fn, void *data)
+{
+ int ret = 0;
+ char key[BUFSIZ];
+ struct perf_config_section *section;
+ struct perf_config_item *item;
+
+ perf_config_set__for_each_entry(set, section, item) {
+ char *value = item->value;
+
+ if (value) {
+ scnprintf(key, sizeof(key), "%s.%s",
+ section->name, item->name);
+ ret = fn(key, value, data);
+ if (ret < 0) {
+ pr_err("Error in the given config file: wrong config key-value pair %s=%s\n",
+ key, value);
+ /*
+ * Can't be just a 'break', as perf_config_set__for_each_entry()
+ * expands to two nested for() loops.
+ */
+ goto out;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+ if (config_set == NULL && perf_config__init())
+ return -1;
+
+ return perf_config_set(config_set, fn, data);
+}
+
+void perf_config__exit(void)
+{
+ perf_config_set__delete(config_set);
+ config_set = NULL;
+}
+
+void perf_config__refresh(void)
+{
+ perf_config__exit();
+ perf_config__init();
+}
+
+static void perf_config_item__delete(struct perf_config_item *item)
+{
+ zfree(&item->name);
+ zfree(&item->value);
+ free(item);
+}
+
+static void perf_config_section__purge(struct perf_config_section *section)
+{
+ struct perf_config_item *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &section->items, node) {
+ list_del_init(&item->node);
+ perf_config_item__delete(item);
+ }
+}
+
+static void perf_config_section__delete(struct perf_config_section *section)
+{
+ perf_config_section__purge(section);
+ zfree(&section->name);
+ free(section);
+}
+
+static void perf_config_set__purge(struct perf_config_set *set)
+{
+ struct perf_config_section *section, *tmp;
+
+ list_for_each_entry_safe(section, tmp, &set->sections, node) {
+ list_del_init(&section->node);
+ perf_config_section__delete(section);
+ }
+}
+
+void perf_config_set__delete(struct perf_config_set *set)
+{
+ if (set == NULL)
+ return;
+
+ perf_config_set__purge(set);
+ free(set);
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+ pr_err("Missing value for '%s'", var);
+ return -1;
+}
+
+void set_buildid_dir(const char *dir)
+{
+ if (dir)
+ scnprintf(buildid_dir, MAXPATHLEN, "%s", dir);
+
+ /* default to $HOME/.debug */
+ if (buildid_dir[0] == '\0') {
+ char *home = getenv("HOME");
+
+ if (home) {
+ snprintf(buildid_dir, MAXPATHLEN, "%s/%s",
+ home, DEBUG_CACHE_DIR);
+ } else {
+ strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
+ }
+ buildid_dir[MAXPATHLEN-1] = '\0';
+ }
+ /* for communicating with external commands */
+ setenv("PERF_BUILDID_DIR", buildid_dir, 1);
+}
+
+struct perf_config_scan_data {
+ const char *name;
+ const char *fmt;
+ va_list args;
+ int ret;
+};
+
+static int perf_config_scan_cb(const char *var, const char *value, void *data)
+{
+ struct perf_config_scan_data *d = data;
+
+ if (!strcmp(var, d->name))
+ d->ret = vsscanf(value, d->fmt, d->args);
+
+ return 0;
+}
+
+int perf_config_scan(const char *name, const char *fmt, ...)
+{
+ struct perf_config_scan_data d = {
+ .name = name,
+ .fmt = fmt,
+ };
+
+ va_start(d.args, fmt);
+ perf_config(perf_config_scan_cb, &d);
+ va_end(d.args);
+
+ return d.ret;
+}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
new file mode 100644
index 000000000..2e5e80892
--- /dev/null
+++ b/tools/perf/util/config.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CONFIG_H
+#define __PERF_CONFIG_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+
+struct perf_config_item {
+ char *name;
+ char *value;
+ bool from_system_config;
+ struct list_head node;
+};
+
+struct perf_config_section {
+ char *name;
+ struct list_head items;
+ bool from_system_config;
+ struct list_head node;
+};
+
+struct perf_config_set {
+ struct list_head sections;
+};
+
+extern const char *config_exclusive_filename;
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3);
+int perf_config_set(struct perf_config_set *set,
+ config_fn_t fn, void *data);
+int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u8(u8 *dest, const char *name, const char *value);
+int perf_config_u64(u64 *dest, const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_etc_perfconfig(void);
+const char *perf_home_perfconfig(void);
+int perf_config_system(void);
+int perf_config_global(void);
+
+struct perf_config_set *perf_config_set__new(void);
+struct perf_config_set *perf_config_set__load_file(const char *file);
+void perf_config_set__delete(struct perf_config_set *set);
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+ const char *var, const char *value);
+void perf_config__exit(void);
+void perf_config__refresh(void);
+
+/**
+ * perf_config_sections__for_each - iterate thru all the sections
+ * @list: list_head instance to iterate
+ * @section: struct perf_config_section iterator
+ */
+#define perf_config_sections__for_each_entry(list, section) \
+ list_for_each_entry(section, list, node)
+
+/**
+ * perf_config_items__for_each - iterate thru all the items
+ * @list: list_head instance to iterate
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_items__for_each_entry(list, item) \
+ list_for_each_entry(item, list, node)
+
+/**
+ * perf_config_set__for_each - iterate thru all the config section-item pairs
+ * @set: evlist instance to iterate
+ * @section: struct perf_config_section iterator
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_set__for_each_entry(set, section, item) \
+ perf_config_sections__for_each_entry(&set->sections, section) \
+ perf_config_items__for_each_entry(&section->items, item)
+
+#endif /* __PERF_CONFIG_H */
diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c
new file mode 100644
index 000000000..47e03de7c
--- /dev/null
+++ b/tools/perf/util/copyfile.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/copyfile.h"
+#include "util/namespaces.h"
+#include <internal/lib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
+{
+ int err = -1;
+ char *line = NULL;
+ size_t n;
+ FILE *from_fp, *to_fp;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ from_fp = fopen(from, "r");
+ nsinfo__mountns_exit(&nsc);
+ if (from_fp == NULL)
+ goto out;
+
+ to_fp = fopen(to, "w");
+ if (to_fp == NULL)
+ goto out_fclose_from;
+
+ while (getline(&line, &n, from_fp) > 0)
+ if (fputs(line, to_fp) == EOF)
+ goto out_fclose_to;
+ err = 0;
+out_fclose_to:
+ fclose(to_fp);
+ free(line);
+out_fclose_from:
+ fclose(from_fp);
+out:
+ return err;
+}
+
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+ void *ptr;
+ loff_t pgoff;
+
+ pgoff = off_in & ~(page_size - 1);
+ off_in -= pgoff;
+
+ ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+ if (ptr == MAP_FAILED)
+ return -1;
+
+ while (size) {
+ ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+ if (ret < 0 && errno == EINTR)
+ continue;
+ if (ret <= 0)
+ break;
+
+ size -= ret;
+ off_in += ret;
+ off_out += ret;
+ }
+ munmap(ptr, off_in + size);
+
+ return size ? -1 : 0;
+}
+
+static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
+ struct nsinfo *nsi)
+{
+ int fromfd, tofd;
+ struct stat st;
+ int err;
+ char *tmp = NULL, *ptr = NULL;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ err = stat(from, &st);
+ nsinfo__mountns_exit(&nsc);
+ if (err)
+ goto out;
+ err = -1;
+
+ /* extra 'x' at the end is to reserve space for '.' */
+ if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+ tmp = NULL;
+ goto out;
+ }
+ ptr = strrchr(tmp, '/');
+ if (!ptr)
+ goto out;
+ ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+ *ptr = '.';
+
+ tofd = mkstemp(tmp);
+ if (tofd < 0)
+ goto out;
+
+ if (st.st_size == 0) { /* /proc? do it slowly... */
+ err = slow_copyfile(from, tmp, nsi);
+ if (!err && fchmod(tofd, mode))
+ err = -1;
+ goto out_close_to;
+ }
+
+ if (fchmod(tofd, mode))
+ goto out_close_to;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ fromfd = open(from, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fromfd < 0)
+ goto out_close_to;
+
+ err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
+
+ close(fromfd);
+out_close_to:
+ close(tofd);
+ if (!err)
+ err = link(tmp, to);
+ unlink(tmp);
+out:
+ free(tmp);
+ return err;
+}
+
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
+{
+ return copyfile_mode_ns(from, to, 0755, nsi);
+}
+
+int copyfile_mode(const char *from, const char *to, mode_t mode)
+{
+ return copyfile_mode_ns(from, to, mode, NULL);
+}
+
+int copyfile(const char *from, const char *to)
+{
+ return copyfile_mode(from, to, 0755);
+}
diff --git a/tools/perf/util/copyfile.h b/tools/perf/util/copyfile.h
new file mode 100644
index 000000000..e85d2f22f
--- /dev/null
+++ b/tools/perf/util/copyfile.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_COPYFILE_H_
+#define PERF_COPYFILE_H_
+
+#include <linux/types.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+struct nsinfo;
+
+int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
+
+#endif // PERF_COPYFILE_H_
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
new file mode 100644
index 000000000..11cd85b27
--- /dev/null
+++ b/tools/perf/util/counts.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "evsel.h"
+#include "counts.h"
+#include <perf/threadmap.h>
+#include <linux/zalloc.h>
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads)
+{
+ struct perf_counts *counts = zalloc(sizeof(*counts));
+
+ if (counts) {
+ struct xyarray *values;
+
+ values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
+ if (!values) {
+ free(counts);
+ return NULL;
+ }
+
+ counts->values = values;
+
+ values = xyarray__new(ncpus, nthreads, sizeof(bool));
+ if (!values) {
+ xyarray__delete(counts->values);
+ free(counts);
+ return NULL;
+ }
+
+ counts->loaded = values;
+ }
+
+ return counts;
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+ if (counts) {
+ xyarray__delete(counts->loaded);
+ xyarray__delete(counts->values);
+ free(counts);
+ }
+}
+
+void perf_counts__reset(struct perf_counts *counts)
+{
+ xyarray__reset(counts->loaded);
+ xyarray__reset(counts->values);
+}
+
+void evsel__reset_counts(struct evsel *evsel)
+{
+ perf_counts__reset(evsel->counts);
+}
+
+int evsel__alloc_counts(struct evsel *evsel)
+{
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+ evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads);
+ return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void evsel__free_counts(struct evsel *evsel)
+{
+ perf_counts__delete(evsel->counts);
+ evsel->counts = NULL;
+}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
new file mode 100644
index 000000000..42760242e
--- /dev/null
+++ b/tools/perf/util/counts.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COUNTS_H
+#define __PERF_COUNTS_H
+
+#include <linux/types.h>
+#include <internal/xyarray.h>
+#include <perf/evsel.h>
+#include <stdbool.h>
+
+struct evsel;
+
+struct perf_counts {
+ s8 scaled;
+ struct xyarray *values;
+ struct xyarray *loaded;
+};
+
+
+static inline struct perf_counts_values*
+perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread)
+{
+ return xyarray__entry(counts->values, cpu_map_idx, thread);
+}
+
+static inline bool
+perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread)
+{
+ return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread));
+}
+
+static inline void
+perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded)
+{
+ *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded;
+}
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads);
+void perf_counts__delete(struct perf_counts *counts);
+void perf_counts__reset(struct perf_counts *counts);
+
+void evsel__reset_counts(struct evsel *evsel);
+int evsel__alloc_counts(struct evsel *evsel);
+void evsel__free_counts(struct evsel *evsel);
+
+#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpu-set-sched.h b/tools/perf/util/cpu-set-sched.h
new file mode 100644
index 000000000..8cf4e40d3
--- /dev/null
+++ b/tools/perf/util/cpu-set-sched.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Definitions taken from glibc for use with older systems, same licensing.
+#ifndef _CPU_SET_SCHED_PERF_H
+#define _CPU_SET_SCHED_PERF_H
+
+#include <features.h>
+#include <sched.h>
+
+#ifndef CPU_EQUAL
+#ifndef __CPU_EQUAL_S
+#if __GNUC_PREREQ (2, 91)
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+ (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0)
+#else
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+ (__extension__ \
+ ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits; \
+ const __cpu_mask *__arr2 = (cpusetp2)->__bits; \
+ size_t __imax = (setsize) / sizeof (__cpu_mask); \
+ size_t __i; \
+ for (__i = 0; __i < __imax; ++__i) \
+ if (__arr1[__i] != __arr2[__i]) \
+ break; \
+ __i == __imax; }))
+#endif
+#endif // __CPU_EQUAL_S
+
+#define CPU_EQUAL(cpusetp1, cpusetp2) \
+ __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2)
+#endif // CPU_EQUAL
+
+#ifndef CPU_OR
+#ifndef __CPU_OP_S
+#define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \
+ (__extension__ \
+ ({ cpu_set_t *__dest = (destset); \
+ const __cpu_mask *__arr1 = (srcset1)->__bits; \
+ const __cpu_mask *__arr2 = (srcset2)->__bits; \
+ size_t __imax = (setsize) / sizeof (__cpu_mask); \
+ size_t __i; \
+ for (__i = 0; __i < __imax; ++__i) \
+ ((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i]; \
+ __dest; }))
+#endif // __CPU_OP_S
+
+#define CPU_OR(destset, srcset1, srcset2) \
+ __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |)
+#endif // CPU_OR
+
+#endif // _CPU_SET_SCHED_PERF_H
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
new file mode 100644
index 000000000..0e090e8bc
--- /dev/null
+++ b/tools/perf/util/cpumap.c
@@ -0,0 +1,717 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <api/fs/fs.h>
+#include "cpumap.h"
+#include "debug.h"
+#include "event.h"
+#include <assert.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include "asm/bug.h"
+
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+#include <internal/cpumap.h>
+
+static struct perf_cpu max_cpu_num;
+static struct perf_cpu max_present_cpu_num;
+static int max_node_num;
+/**
+ * The numa node X as read from /sys/devices/system/node/nodeX indexed by the
+ * CPU number.
+ */
+static int *cpunode_map;
+
+bool perf_record_cpu_map_data__test_bit(int i,
+ const struct perf_record_cpu_map_data *data)
+{
+ int bit_word32 = i / 32;
+ __u32 bit_mask32 = 1U << (i & 31);
+ int bit_word64 = i / 64;
+ __u64 bit_mask64 = ((__u64)1) << (i & 63);
+
+ return (data->mask32_data.long_size == 4)
+ ? (bit_word32 < data->mask32_data.nr) &&
+ (data->mask32_data.mask[bit_word32] & bit_mask32) != 0
+ : (bit_word64 < data->mask64_data.nr) &&
+ (data->mask64_data.mask[bit_word64] & bit_mask64) != 0;
+}
+
+/* Read ith mask value from data into the given 64-bit sized bitmap */
+static void perf_record_cpu_map_data__read_one_mask(const struct perf_record_cpu_map_data *data,
+ int i, unsigned long *bitmap)
+{
+#if __SIZEOF_LONG__ == 8
+ if (data->mask32_data.long_size == 4)
+ bitmap[0] = data->mask32_data.mask[i];
+ else
+ bitmap[0] = data->mask64_data.mask[i];
+#else
+ if (data->mask32_data.long_size == 4) {
+ bitmap[0] = data->mask32_data.mask[i];
+ bitmap[1] = 0;
+ } else {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ bitmap[0] = (unsigned long)(data->mask64_data.mask[i] >> 32);
+ bitmap[1] = (unsigned long)data->mask64_data.mask[i];
+#else
+ bitmap[0] = (unsigned long)data->mask64_data.mask[i];
+ bitmap[1] = (unsigned long)(data->mask64_data.mask[i] >> 32);
+#endif
+ }
+#endif
+}
+static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_map_data *data)
+{
+ struct perf_cpu_map *map;
+
+ map = perf_cpu_map__empty_new(data->cpus_data.nr);
+ if (map) {
+ unsigned i;
+
+ for (i = 0; i < data->cpus_data.nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (data->cpus_data.cpu[i] == (u16) -1)
+ RC_CHK_ACCESS(map)->map[i].cpu = -1;
+ else
+ RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i];
+ }
+ }
+
+ return map;
+}
+
+static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_data *data)
+{
+ DECLARE_BITMAP(local_copy, 64);
+ int weight = 0, mask_nr = data->mask32_data.nr;
+ struct perf_cpu_map *map;
+
+ for (int i = 0; i < mask_nr; i++) {
+ perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
+ weight += bitmap_weight(local_copy, 64);
+ }
+
+ map = perf_cpu_map__empty_new(weight);
+ if (!map)
+ return NULL;
+
+ for (int i = 0, j = 0; i < mask_nr; i++) {
+ int cpus_per_i = (i * data->mask32_data.long_size * BITS_PER_BYTE);
+ int cpu;
+
+ perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
+ for_each_set_bit(cpu, local_copy, 64)
+ RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ }
+ return map;
+
+}
+
+static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map_data *data)
+{
+ struct perf_cpu_map *map;
+ unsigned int i = 0;
+
+ map = perf_cpu_map__empty_new(data->range_cpu_data.end_cpu -
+ data->range_cpu_data.start_cpu + 1 + data->range_cpu_data.any_cpu);
+ if (!map)
+ return NULL;
+
+ if (data->range_cpu_data.any_cpu)
+ RC_CHK_ACCESS(map)->map[i++].cpu = -1;
+
+ for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
+ i++, cpu++)
+ RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+
+ return map;
+}
+
+struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data)
+{
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
+ return cpu_map__from_entries(data);
+ case PERF_CPU_MAP__MASK:
+ return cpu_map__from_mask(data);
+ case PERF_CPU_MAP__RANGE_CPUS:
+ return cpu_map__from_range(data);
+ default:
+ pr_err("cpu_map__new_data unknown type %d\n", data->type);
+ return NULL;
+ }
+}
+
+size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)
+{
+#define BUFSIZE 1024
+ char buf[BUFSIZE];
+
+ cpu_map__snprint(map, buf, sizeof(buf));
+ return fprintf(fp, "%s\n", buf);
+#undef BUFSIZE
+}
+
+struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr);
+
+ if (cpus != NULL) {
+ for (int i = 0; i < nr; i++)
+ RC_CHK_ACCESS(cpus)->map[i].cpu = -1;
+ }
+
+ return cpus;
+}
+
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
+{
+ struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
+
+ if (cpus != NULL) {
+ int i;
+
+ cpus->nr = nr;
+ for (i = 0; i < nr; i++)
+ cpus->map[i] = aggr_cpu_id__empty();
+
+ refcount_set(&cpus->refcnt, 1);
+ }
+
+ return cpus;
+}
+
+static int cpu__get_topology_int(int cpu, const char *name, int *value)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, PATH_MAX,
+ "devices/system/cpu/cpu%d/topology/%s", cpu, name);
+
+ return sysfs__read_int(path, value);
+}
+
+int cpu__get_socket_id(struct perf_cpu cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value);
+ return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused)
+{
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ id.socket = cpu__get_socket_id(cpu);
+ return id;
+}
+
+static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
+{
+ struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
+ struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
+
+ if (a->node != b->node)
+ return a->node - b->node;
+ else if (a->socket != b->socket)
+ return a->socket - b->socket;
+ else if (a->die != b->die)
+ return a->die - b->die;
+ else if (a->cache_lvl != b->cache_lvl)
+ return a->cache_lvl - b->cache_lvl;
+ else if (a->cache != b->cache)
+ return a->cache - b->cache;
+ else if (a->core != b->core)
+ return a->core - b->core;
+ else
+ return a->thread_idx - b->thread_idx;
+}
+
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data, bool needs_sort)
+{
+ int idx;
+ struct perf_cpu cpu;
+ struct cpu_aggr_map *c = cpu_aggr_map__empty_new(perf_cpu_map__nr(cpus));
+
+ if (!c)
+ return NULL;
+
+ /* Reset size as it may only be partially filled */
+ c->nr = 0;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ bool duplicate = false;
+ struct aggr_cpu_id cpu_id = get_id(cpu, data);
+
+ for (int j = 0; j < c->nr; j++) {
+ if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) {
+ duplicate = true;
+ break;
+ }
+ }
+ if (!duplicate) {
+ c->map[c->nr] = cpu_id;
+ c->nr++;
+ }
+ }
+ /* Trim. */
+ if (c->nr != perf_cpu_map__nr(cpus)) {
+ struct cpu_aggr_map *trimmed_c =
+ realloc(c,
+ sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
+
+ if (trimmed_c)
+ c = trimmed_c;
+ }
+
+ /* ensure we process id in increasing order */
+ if (needs_sort)
+ qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
+
+ return c;
+
+}
+
+int cpu__get_die_id(struct perf_cpu cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value);
+
+ return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
+{
+ struct aggr_cpu_id id;
+ int die;
+
+ die = cpu__get_die_id(cpu);
+ /* There is no die_id on legacy system. */
+ if (die == -1)
+ die = 0;
+
+ /*
+ * die_id is relative to socket, so start
+ * with the socket ID and then add die to
+ * make a unique ID.
+ */
+ id = aggr_cpu_id__socket(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
+ return id;
+
+ id.die = die;
+ return id;
+}
+
+int cpu__get_core_id(struct perf_cpu cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
+ return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
+{
+ struct aggr_cpu_id id;
+ int core = cpu__get_core_id(cpu);
+
+ /* aggr_cpu_id__die returns a struct with socket and die set. */
+ id = aggr_cpu_id__die(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
+ return id;
+
+ /*
+ * core_id is relative to socket and die, we need a global id.
+ * So we combine the result from cpu_map__get_die with the core id
+ */
+ id.core = core;
+ return id;
+
+}
+
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data)
+{
+ struct aggr_cpu_id id;
+
+ /* aggr_cpu_id__core returns a struct with socket, die and core set. */
+ id = aggr_cpu_id__core(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
+ return id;
+
+ id.cpu = cpu;
+ return id;
+
+}
+
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused)
+{
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ id.node = cpu__get_node(cpu);
+ return id;
+}
+
+struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data __maybe_unused)
+{
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ /* it always aggregates to the cpu 0 */
+ cpu.cpu = 0;
+ id.cpu = cpu;
+ return id;
+}
+
+/* setup simple routines to easily access node numbers given a cpu number */
+static int get_max_num(char *path, int *max)
+{
+ size_t num;
+ char *buf;
+ int err = 0;
+
+ if (filename__read_str(path, &buf, &num))
+ return -1;
+
+ buf[num] = '\0';
+
+ /* start on the right, to find highest node num */
+ while (--num) {
+ if ((buf[num] == ',') || (buf[num] == '-')) {
+ num++;
+ break;
+ }
+ }
+ if (sscanf(&buf[num], "%d", max) < 1) {
+ err = -1;
+ goto out;
+ }
+
+ /* convert from 0-based to 1-based */
+ (*max)++;
+
+out:
+ free(buf);
+ return err;
+}
+
+/* Determine highest possible cpu in the system for sparse allocation */
+static void set_max_cpu_num(void)
+{
+ const char *mnt;
+ char path[PATH_MAX];
+ int ret = -1;
+
+ /* set up default */
+ max_cpu_num.cpu = 4096;
+ max_present_cpu_num.cpu = 4096;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ goto out;
+
+ /* get the highest possible cpu number for a sparse allocation */
+ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
+ if (ret >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ goto out;
+ }
+
+ ret = get_max_num(path, &max_cpu_num.cpu);
+ if (ret)
+ goto out;
+
+ /* get the highest present cpu number for a sparse allocation */
+ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+ if (ret >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ goto out;
+ }
+
+ ret = get_max_num(path, &max_present_cpu_num.cpu);
+
+out:
+ if (ret)
+ pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
+}
+
+/* Determine highest possible node in the system for sparse allocation */
+static void set_max_node_num(void)
+{
+ const char *mnt;
+ char path[PATH_MAX];
+ int ret = -1;
+
+ /* set up default */
+ max_node_num = 8;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ goto out;
+
+ /* get the highest possible cpu number for a sparse allocation */
+ ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
+ if (ret >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ goto out;
+ }
+
+ ret = get_max_num(path, &max_node_num);
+
+out:
+ if (ret)
+ pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
+}
+
+int cpu__max_node(void)
+{
+ if (unlikely(!max_node_num))
+ set_max_node_num();
+
+ return max_node_num;
+}
+
+struct perf_cpu cpu__max_cpu(void)
+{
+ if (unlikely(!max_cpu_num.cpu))
+ set_max_cpu_num();
+
+ return max_cpu_num;
+}
+
+struct perf_cpu cpu__max_present_cpu(void)
+{
+ if (unlikely(!max_present_cpu_num.cpu))
+ set_max_cpu_num();
+
+ return max_present_cpu_num;
+}
+
+
+int cpu__get_node(struct perf_cpu cpu)
+{
+ if (unlikely(cpunode_map == NULL)) {
+ pr_debug("cpu_map not initialized\n");
+ return -1;
+ }
+
+ return cpunode_map[cpu.cpu];
+}
+
+static int init_cpunode_map(void)
+{
+ int i;
+
+ set_max_cpu_num();
+ set_max_node_num();
+
+ cpunode_map = calloc(max_cpu_num.cpu, sizeof(int));
+ if (!cpunode_map) {
+ pr_err("%s: calloc failed\n", __func__);
+ return -1;
+ }
+
+ for (i = 0; i < max_cpu_num.cpu; i++)
+ cpunode_map[i] = -1;
+
+ return 0;
+}
+
+int cpu__setup_cpunode_map(void)
+{
+ struct dirent *dent1, *dent2;
+ DIR *dir1, *dir2;
+ unsigned int cpu, mem;
+ char buf[PATH_MAX];
+ char path[PATH_MAX];
+ const char *mnt;
+ int n;
+
+ /* initialize globals */
+ if (init_cpunode_map())
+ return -1;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ return 0;
+
+ n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
+ if (n >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ return -1;
+ }
+
+ dir1 = opendir(path);
+ if (!dir1)
+ return 0;
+
+ /* walk tree and setup map */
+ while ((dent1 = readdir(dir1)) != NULL) {
+ if (dent1->d_type != DT_DIR || sscanf(dent1->d_name, "node%u", &mem) < 1)
+ continue;
+
+ n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
+ if (n >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ continue;
+ }
+
+ dir2 = opendir(buf);
+ if (!dir2)
+ continue;
+ while ((dent2 = readdir(dir2)) != NULL) {
+ if (dent2->d_type != DT_LNK || sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ continue;
+ cpunode_map[cpu] = mem;
+ }
+ closedir(dir2);
+ }
+ closedir(dir1);
+ return 0;
+}
+
+size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
+{
+ int i, start = -1;
+ bool first = true;
+ size_t ret = 0;
+
+#define COMMA first ? "" : ","
+
+ for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
+ struct perf_cpu cpu = { .cpu = INT_MAX };
+ bool last = i == perf_cpu_map__nr(map);
+
+ if (!last)
+ cpu = perf_cpu_map__cpu(map, i);
+
+ if (start == -1) {
+ start = i;
+ if (last) {
+ ret += snprintf(buf + ret, size - ret,
+ "%s%d", COMMA,
+ perf_cpu_map__cpu(map, i).cpu);
+ }
+ } else if (((i - start) != (cpu.cpu - perf_cpu_map__cpu(map, start).cpu)) || last) {
+ int end = i - 1;
+
+ if (start == end) {
+ ret += snprintf(buf + ret, size - ret,
+ "%s%d", COMMA,
+ perf_cpu_map__cpu(map, start).cpu);
+ } else {
+ ret += snprintf(buf + ret, size - ret,
+ "%s%d-%d", COMMA,
+ perf_cpu_map__cpu(map, start).cpu, perf_cpu_map__cpu(map, end).cpu);
+ }
+ first = false;
+ start = i;
+ }
+ }
+
+#undef COMMA
+
+ pr_debug2("cpumask list: %s\n", buf);
+ return ret;
+}
+
+static char hex_char(unsigned char val)
+{
+ if (val < 10)
+ return val + '0';
+ if (val < 16)
+ return val - 10 + 'a';
+ return '?';
+}
+
+size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
+{
+ int i, cpu;
+ char *ptr = buf;
+ unsigned char *bitmap;
+ struct perf_cpu last_cpu = perf_cpu_map__cpu(map, perf_cpu_map__nr(map) - 1);
+
+ if (buf == NULL)
+ return 0;
+
+ bitmap = zalloc(last_cpu.cpu / 8 + 1);
+ if (bitmap == NULL) {
+ buf[0] = '\0';
+ return 0;
+ }
+
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ cpu = perf_cpu_map__cpu(map, i).cpu;
+ bitmap[cpu / 8] |= 1 << (cpu % 8);
+ }
+
+ for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ unsigned char bits = bitmap[cpu / 8];
+
+ if (cpu % 8)
+ bits >>= 4;
+ else
+ bits &= 0xf;
+
+ *ptr++ = hex_char(bits);
+ if ((cpu % 32) == 0 && cpu > 0)
+ *ptr++ = ',';
+ }
+ *ptr = '\0';
+ free(bitmap);
+
+ buf[size - 1] = '\0';
+ return ptr - buf;
+}
+
+struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
+{
+ static struct perf_cpu_map *online;
+
+ if (!online)
+ online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
+
+ return online;
+}
+
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
+{
+ return a->thread_idx == b->thread_idx &&
+ a->node == b->node &&
+ a->socket == b->socket &&
+ a->die == b->die &&
+ a->cache_lvl == b->cache_lvl &&
+ a->cache == b->cache &&
+ a->core == b->core &&
+ a->cpu.cpu == b->cpu.cpu;
+}
+
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
+{
+ return a->thread_idx == -1 &&
+ a->node == -1 &&
+ a->socket == -1 &&
+ a->die == -1 &&
+ a->cache_lvl == -1 &&
+ a->cache == -1 &&
+ a->core == -1 &&
+ a->cpu.cpu == -1;
+}
+
+struct aggr_cpu_id aggr_cpu_id__empty(void)
+{
+ struct aggr_cpu_id ret = {
+ .thread_idx = -1,
+ .node = -1,
+ .socket = -1,
+ .die = -1,
+ .cache_lvl = -1,
+ .cache = -1,
+ .core = -1,
+ .cpu = (struct perf_cpu){ .cpu = -1 },
+ };
+ return ret;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
new file mode 100644
index 000000000..9df2aeb34
--- /dev/null
+++ b/tools/perf/util/cpumap.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CPUMAP_H
+#define __PERF_CPUMAP_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <perf/cpumap.h>
+#include <linux/refcount.h>
+
+/** Identify where counts are aggregated, -1 implies not to aggregate. */
+struct aggr_cpu_id {
+ /** A value in the range 0 to number of threads. */
+ int thread_idx;
+ /** The numa node X as read from /sys/devices/system/node/nodeX. */
+ int node;
+ /**
+ * The socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id.
+ */
+ int socket;
+ /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
+ int die;
+ /** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */
+ int cache_lvl;
+ /**
+ * The cache instance ID, which is the first CPU in the
+ * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
+ */
+ int cache;
+ /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */
+ int core;
+ /** CPU aggregation, note there is one CPU for each SMT thread. */
+ struct perf_cpu cpu;
+};
+
+/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */
+struct cpu_aggr_map {
+ refcount_t refcnt;
+ /** Number of valid entries. */
+ int nr;
+ /** The entries. */
+ struct aggr_cpu_id map[];
+};
+
+#define cpu_aggr_map__for_each_idx(idx, aggr_map) \
+ for ((idx) = 0; (idx) < aggr_map->nr; (idx)++)
+
+struct perf_record_cpu_map_data;
+
+bool perf_record_cpu_map_data__test_bit(int i, const struct perf_record_cpu_map_data *data);
+
+struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
+
+struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data);
+size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
+size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
+size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp);
+struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
+
+int cpu__setup_cpunode_map(void);
+
+int cpu__max_node(void);
+struct perf_cpu cpu__max_cpu(void);
+struct perf_cpu cpu__max_present_cpu(void);
+
+/**
+ * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
+ */
+static inline bool cpu_map__is_dummy(const struct perf_cpu_map *cpus)
+{
+ return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
+}
+
+/**
+ * cpu__get_node - Returns the numa node X as read from
+ * /sys/devices/system/node/nodeX for the given CPU.
+ */
+int cpu__get_node(struct perf_cpu cpu);
+/**
+ * cpu__get_socket_id - Returns the socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU.
+ */
+int cpu__get_socket_id(struct perf_cpu cpu);
+/**
+ * cpu__get_die_id - Returns the die id as read from
+ * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU.
+ */
+int cpu__get_die_id(struct perf_cpu cpu);
+/**
+ * cpu__get_core_id - Returns the core id as read from
+ * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU.
+ */
+int cpu__get_core_id(struct perf_cpu cpu);
+
+/**
+ * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry
+ * being empty.
+ */
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
+
+typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data);
+
+/**
+ * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in
+ * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value
+ * passed to it. The cpu_aggr_map is sorted with duplicate values removed.
+ */
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data, bool needs_sort);
+
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b);
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a);
+struct aggr_cpu_id aggr_cpu_id__empty(void);
+
+
+/**
+ * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with
+ * the socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated
+ * with the die and socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket
+ * populated with the core, die and socket for cpu. The function signature is
+ * compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket
+ * populated with the cpu, core, die and socket for cpu. The function signature
+ * is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for
+ * cpu. The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__global - Create an aggr_cpu_id for global aggregation.
+ * The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data);
+#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
new file mode 100644
index 000000000..81cfc85f4
--- /dev/null
+++ b/tools/perf/util/cputopo.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/param.h>
+#include <sys/utsname.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <api/fs/fs.h>
+#include <linux/zalloc.h>
+#include <perf/cpumap.h>
+
+#include "cputopo.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "env.h"
+#include "pmu.h"
+#include "pmus.h"
+
+#define PACKAGE_CPUS_FMT \
+ "%s/devices/system/cpu/cpu%d/topology/package_cpus_list"
+#define PACKAGE_CPUS_FMT_OLD \
+ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
+#define DIE_CPUS_FMT \
+ "%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
+#define CORE_CPUS_FMT \
+ "%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
+#define CORE_CPUS_FMT_OLD \
+ "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
+#define NODE_ONLINE_FMT \
+ "%s/devices/system/node/online"
+#define NODE_MEMINFO_FMT \
+ "%s/devices/system/node/node%d/meminfo"
+#define NODE_CPULIST_FMT \
+ "%s/devices/system/node/node%d/cpulist"
+
+static int build_cpu_topology(struct cpu_topology *tp, int cpu)
+{
+ FILE *fp;
+ char filename[MAXPATHLEN];
+ char *buf = NULL, *p;
+ size_t len = 0;
+ ssize_t sret;
+ u32 i = 0;
+ int ret = -1;
+
+ scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT,
+ sysfs__mountpoint(), cpu);
+ if (access(filename, F_OK) == -1) {
+ scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD,
+ sysfs__mountpoint(), cpu);
+ }
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto try_dies;
+
+ sret = getline(&buf, &len, fp);
+ fclose(fp);
+ if (sret <= 0)
+ goto try_dies;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->package_cpus_lists; i++) {
+ if (!strcmp(buf, tp->package_cpus_list[i]))
+ break;
+ }
+ if (i == tp->package_cpus_lists) {
+ tp->package_cpus_list[i] = buf;
+ tp->package_cpus_lists++;
+ buf = NULL;
+ len = 0;
+ }
+ ret = 0;
+
+try_dies:
+ if (!tp->die_cpus_list)
+ goto try_threads;
+
+ scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
+ sysfs__mountpoint(), cpu);
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto try_threads;
+
+ sret = getline(&buf, &len, fp);
+ fclose(fp);
+ if (sret <= 0)
+ goto try_threads;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->die_cpus_lists; i++) {
+ if (!strcmp(buf, tp->die_cpus_list[i]))
+ break;
+ }
+ if (i == tp->die_cpus_lists) {
+ tp->die_cpus_list[i] = buf;
+ tp->die_cpus_lists++;
+ buf = NULL;
+ len = 0;
+ }
+ ret = 0;
+
+try_threads:
+ scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT,
+ sysfs__mountpoint(), cpu);
+ if (access(filename, F_OK) == -1) {
+ scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD,
+ sysfs__mountpoint(), cpu);
+ }
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto done;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto done;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->core_cpus_lists; i++) {
+ if (!strcmp(buf, tp->core_cpus_list[i]))
+ break;
+ }
+ if (i == tp->core_cpus_lists) {
+ tp->core_cpus_list[i] = buf;
+ tp->core_cpus_lists++;
+ buf = NULL;
+ }
+ ret = 0;
+done:
+ if (fp)
+ fclose(fp);
+ free(buf);
+ return ret;
+}
+
+void cpu_topology__delete(struct cpu_topology *tp)
+{
+ u32 i;
+
+ if (!tp)
+ return;
+
+ for (i = 0 ; i < tp->package_cpus_lists; i++)
+ zfree(&tp->package_cpus_list[i]);
+
+ for (i = 0 ; i < tp->die_cpus_lists; i++)
+ zfree(&tp->die_cpus_list[i]);
+
+ for (i = 0 ; i < tp->core_cpus_lists; i++)
+ zfree(&tp->core_cpus_list[i]);
+
+ free(tp);
+}
+
+bool cpu_topology__smt_on(const struct cpu_topology *topology)
+{
+ for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+ const char *cpu_list = topology->core_cpus_list[i];
+
+ /*
+ * If there is a need to separate siblings in a core then SMT is
+ * enabled.
+ */
+ if (strchr(cpu_list, ',') || strchr(cpu_list, '-'))
+ return true;
+ }
+ return false;
+}
+
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+ const char *user_requested_cpu_list)
+{
+ struct perf_cpu_map *user_requested_cpus;
+
+ /*
+ * If user_requested_cpu_list is empty then all CPUs are recorded and so
+ * core_wide is true.
+ */
+ if (!user_requested_cpu_list)
+ return true;
+
+ user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list);
+ /* Check that every user requested CPU is the complete set of SMT threads on a core. */
+ for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+ const char *core_cpu_list = topology->core_cpus_list[i];
+ struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list);
+ struct perf_cpu cpu;
+ int idx;
+ bool has_first, first = true;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) {
+ if (first) {
+ has_first = perf_cpu_map__has(user_requested_cpus, cpu);
+ first = false;
+ } else {
+ /*
+ * If the first core CPU is user requested then
+ * all subsequent CPUs in the core must be user
+ * requested too. If the first CPU isn't user
+ * requested then none of the others must be
+ * too.
+ */
+ if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) {
+ perf_cpu_map__put(core_cpus);
+ perf_cpu_map__put(user_requested_cpus);
+ return false;
+ }
+ }
+ }
+ perf_cpu_map__put(core_cpus);
+ }
+ perf_cpu_map__put(user_requested_cpus);
+ return true;
+}
+
+static bool has_die_topology(void)
+{
+ char filename[MAXPATHLEN];
+ struct utsname uts;
+
+ if (uname(&uts) < 0)
+ return false;
+
+ if (strncmp(uts.machine, "x86_64", 6) &&
+ strncmp(uts.machine, "s390x", 5))
+ return false;
+
+ scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
+ sysfs__mountpoint(), 0);
+ if (access(filename, F_OK) == -1)
+ return false;
+
+ return true;
+}
+
+const struct cpu_topology *online_topology(void)
+{
+ static const struct cpu_topology *topology;
+
+ if (!topology) {
+ topology = cpu_topology__new();
+ if (!topology) {
+ pr_err("Error creating CPU topology");
+ abort();
+ }
+ }
+ return topology;
+}
+
+struct cpu_topology *cpu_topology__new(void)
+{
+ struct cpu_topology *tp = NULL;
+ void *addr;
+ u32 nr, i, nr_addr;
+ size_t sz;
+ long ncpus;
+ int ret = -1;
+ struct perf_cpu_map *map;
+ bool has_die = has_die_topology();
+
+ ncpus = cpu__max_present_cpu().cpu;
+
+ /* build online CPU map */
+ map = perf_cpu_map__new(NULL);
+ if (map == NULL) {
+ pr_debug("failed to get system cpumap\n");
+ return NULL;
+ }
+
+ nr = (u32)(ncpus & UINT_MAX);
+
+ sz = nr * sizeof(char *);
+ if (has_die)
+ nr_addr = 3;
+ else
+ nr_addr = 2;
+ addr = calloc(1, sizeof(*tp) + nr_addr * sz);
+ if (!addr)
+ goto out_free;
+
+ tp = addr;
+ addr += sizeof(*tp);
+ tp->package_cpus_list = addr;
+ addr += sz;
+ if (has_die) {
+ tp->die_cpus_list = addr;
+ addr += sz;
+ }
+ tp->core_cpus_list = addr;
+
+ for (i = 0; i < nr; i++) {
+ if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i }))
+ continue;
+
+ ret = build_cpu_topology(tp, i);
+ if (ret < 0)
+ break;
+ }
+
+out_free:
+ perf_cpu_map__put(map);
+ if (ret) {
+ cpu_topology__delete(tp);
+ tp = NULL;
+ }
+ return tp;
+}
+
+static int load_numa_node(struct numa_topology_node *node, int nr)
+{
+ char str[MAXPATHLEN];
+ char field[32];
+ char *buf = NULL, *p;
+ size_t len = 0;
+ int ret = -1;
+ FILE *fp;
+ u64 mem;
+
+ node->node = (u32) nr;
+
+ scnprintf(str, MAXPATHLEN, NODE_MEMINFO_FMT,
+ sysfs__mountpoint(), nr);
+ fp = fopen(str, "r");
+ if (!fp)
+ return -1;
+
+ while (getline(&buf, &len, fp) > 0) {
+ /* skip over invalid lines */
+ if (!strchr(buf, ':'))
+ continue;
+ if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
+ goto err;
+ if (!strcmp(field, "MemTotal:"))
+ node->mem_total = mem;
+ if (!strcmp(field, "MemFree:"))
+ node->mem_free = mem;
+ if (node->mem_total && node->mem_free)
+ break;
+ }
+
+ fclose(fp);
+ fp = NULL;
+
+ scnprintf(str, MAXPATHLEN, NODE_CPULIST_FMT,
+ sysfs__mountpoint(), nr);
+
+ fp = fopen(str, "r");
+ if (!fp)
+ return -1;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto err;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ node->cpus = buf;
+ fclose(fp);
+ return 0;
+
+err:
+ free(buf);
+ if (fp)
+ fclose(fp);
+ return ret;
+}
+
+struct numa_topology *numa_topology__new(void)
+{
+ struct perf_cpu_map *node_map = NULL;
+ struct numa_topology *tp = NULL;
+ char path[MAXPATHLEN];
+ char *buf = NULL;
+ size_t len = 0;
+ u32 nr, i;
+ FILE *fp;
+ char *c;
+
+ scnprintf(path, MAXPATHLEN, NODE_ONLINE_FMT,
+ sysfs__mountpoint());
+
+ fp = fopen(path, "r");
+ if (!fp)
+ return NULL;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto out;
+
+ c = strchr(buf, '\n');
+ if (c)
+ *c = '\0';
+
+ node_map = perf_cpu_map__new(buf);
+ if (!node_map)
+ goto out;
+
+ nr = (u32) perf_cpu_map__nr(node_map);
+
+ tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
+ if (!tp)
+ goto out;
+
+ tp->nr = nr;
+
+ for (i = 0; i < nr; i++) {
+ if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) {
+ numa_topology__delete(tp);
+ tp = NULL;
+ break;
+ }
+ }
+
+out:
+ free(buf);
+ fclose(fp);
+ perf_cpu_map__put(node_map);
+ return tp;
+}
+
+void numa_topology__delete(struct numa_topology *tp)
+{
+ u32 i;
+
+ for (i = 0; i < tp->nr; i++)
+ zfree(&tp->nodes[i].cpus);
+
+ free(tp);
+}
+
+static int load_hybrid_node(struct hybrid_topology_node *node,
+ struct perf_pmu *pmu)
+{
+ char *buf = NULL, *p;
+ FILE *fp;
+ size_t len = 0;
+
+ node->pmu_name = strdup(pmu->name);
+ if (!node->pmu_name)
+ return -1;
+
+ fp = perf_pmu__open_file(pmu, "cpus");
+ if (!fp)
+ goto err;
+
+ if (getline(&buf, &len, fp) <= 0) {
+ fclose(fp);
+ goto err;
+ }
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ fclose(fp);
+ node->cpus = buf;
+ return 0;
+
+err:
+ zfree(&node->pmu_name);
+ free(buf);
+ return -1;
+}
+
+struct hybrid_topology *hybrid_topology__new(void)
+{
+ struct perf_pmu *pmu = NULL;
+ struct hybrid_topology *tp = NULL;
+ int nr = perf_pmus__num_core_pmus(), i = 0;
+
+ if (nr <= 1)
+ return NULL;
+
+ tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0]) * nr);
+ if (!tp)
+ return NULL;
+
+ tp->nr = nr;
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (load_hybrid_node(&tp->nodes[i], pmu)) {
+ hybrid_topology__delete(tp);
+ return NULL;
+ }
+ i++;
+ }
+
+ return tp;
+}
+
+void hybrid_topology__delete(struct hybrid_topology *tp)
+{
+ u32 i;
+
+ for (i = 0; i < tp->nr; i++) {
+ zfree(&tp->nodes[i].pmu_name);
+ zfree(&tp->nodes[i].cpus);
+ }
+
+ free(tp);
+}
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
new file mode 100644
index 000000000..8d42f6102
--- /dev/null
+++ b/tools/perf/util/cputopo.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CPUTOPO_H
+#define __PERF_CPUTOPO_H
+
+#include <linux/types.h>
+
+struct cpu_topology {
+ /* The number of unique package_cpus_lists below. */
+ u32 package_cpus_lists;
+ /* The number of unique die_cpu_lists below. */
+ u32 die_cpus_lists;
+ /* The number of unique core_cpu_lists below. */
+ u32 core_cpus_lists;
+ /*
+ * An array of strings where each string is unique and read from
+ * /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI
+ * each of these is a human-readable list of CPUs sharing the same
+ * physical_package_id. The format is like 0-3, 8-11, 14,17.
+ */
+ const char **package_cpus_list;
+ /*
+ * An array of string where each string is unique and from
+ * /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI
+ * each of these is a human-readable list of CPUs within the same die.
+ * The format is like 0-3, 8-11, 14,17.
+ */
+ const char **die_cpus_list;
+ /*
+ * An array of string where each string is unique and from
+ * /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI
+ * each of these is a human-readable list of CPUs within the same
+ * core. The format is like 0-3, 8-11, 14,17.
+ */
+ const char **core_cpus_list;
+};
+
+struct numa_topology_node {
+ char *cpus;
+ u32 node;
+ u64 mem_total;
+ u64 mem_free;
+};
+
+struct numa_topology {
+ u32 nr;
+ struct numa_topology_node nodes[];
+};
+
+struct hybrid_topology_node {
+ char *pmu_name;
+ char *cpus;
+};
+
+struct hybrid_topology {
+ u32 nr;
+ struct hybrid_topology_node nodes[];
+};
+
+/*
+ * The topology for online CPUs, lazily created.
+ */
+const struct cpu_topology *online_topology(void);
+
+struct cpu_topology *cpu_topology__new(void);
+void cpu_topology__delete(struct cpu_topology *tp);
+/* Determine from the core list whether SMT was enabled. */
+bool cpu_topology__smt_on(const struct cpu_topology *topology);
+/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+ const char *user_requested_cpu_list);
+
+struct numa_topology *numa_topology__new(void);
+void numa_topology__delete(struct numa_topology *tp);
+
+struct hybrid_topology *hybrid_topology__new(void);
+void hybrid_topology__delete(struct hybrid_topology *tp);
+
+#endif /* __PERF_CPUTOPO_H */
diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c
new file mode 100644
index 000000000..4abe416e3
--- /dev/null
+++ b/tools/perf/util/cs-etm-base.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * File for any parts of the Coresight decoding that don't require
+ * OpenCSD.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+
+#include "cs-etm.h"
+
+static const char * const cs_etm_global_header_fmts[] = {
+ [CS_HEADER_VERSION] = " Header version %llx\n",
+ [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n",
+ [CS_ETM_SNAPSHOT] = " Snapshot %llx\n",
+};
+
+static const char * const cs_etm_priv_fmts[] = {
+ [CS_ETM_MAGIC] = " Magic number %llx\n",
+ [CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n",
+ [CS_ETM_ETMCR] = " ETMCR %llx\n",
+ [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n",
+ [CS_ETM_ETMCCER] = " ETMCCER %llx\n",
+ [CS_ETM_ETMIDR] = " ETMIDR %llx\n",
+};
+
+static const char * const cs_etmv4_priv_fmts[] = {
+ [CS_ETM_MAGIC] = " Magic number %llx\n",
+ [CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n",
+ [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n",
+ [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n",
+ [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n",
+ [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n",
+ [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n",
+ [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n",
+ [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
+ [CS_ETMV4_TS_SOURCE] = " TS_SOURCE %lld\n",
+};
+
+static const char * const cs_ete_priv_fmts[] = {
+ [CS_ETM_MAGIC] = " Magic number %llx\n",
+ [CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n",
+ [CS_ETE_TRCCONFIGR] = " TRCCONFIGR %llx\n",
+ [CS_ETE_TRCTRACEIDR] = " TRCTRACEIDR %llx\n",
+ [CS_ETE_TRCIDR0] = " TRCIDR0 %llx\n",
+ [CS_ETE_TRCIDR1] = " TRCIDR1 %llx\n",
+ [CS_ETE_TRCIDR2] = " TRCIDR2 %llx\n",
+ [CS_ETE_TRCIDR8] = " TRCIDR8 %llx\n",
+ [CS_ETE_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
+ [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n",
+ [CS_ETE_TS_SOURCE] = " TS_SOURCE %lld\n",
+};
+
+static const char * const param_unk_fmt =
+ " Unknown parameter [%d] %"PRIx64"\n";
+static const char * const magic_unk_fmt =
+ " Magic number Unknown %"PRIx64"\n";
+
+static int cs_etm__print_cpu_metadata_v0(u64 *val, int *offset)
+{
+ int i = *offset, j, nr_params = 0, fmt_offset;
+ u64 magic;
+
+ /* check magic value */
+ magic = val[i + CS_ETM_MAGIC];
+ if ((magic != __perf_cs_etmv3_magic) &&
+ (magic != __perf_cs_etmv4_magic)) {
+ /* failure - note bad magic value */
+ fprintf(stdout, magic_unk_fmt, magic);
+ return -EINVAL;
+ }
+
+ /* print common header block */
+ fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]);
+ fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]);
+
+ if (magic == __perf_cs_etmv3_magic) {
+ nr_params = CS_ETM_NR_TRC_PARAMS_V0;
+ fmt_offset = CS_ETM_ETMCR;
+ /* after common block, offset format index past NR_PARAMS */
+ for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+ fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+ } else if (magic == __perf_cs_etmv4_magic) {
+ nr_params = CS_ETMV4_NR_TRC_PARAMS_V0;
+ fmt_offset = CS_ETMV4_TRCCONFIGR;
+ /* after common block, offset format index past NR_PARAMS */
+ for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+ fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+ }
+ *offset = i;
+ return 0;
+}
+
+static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset)
+{
+ int i = *offset, j, total_params = 0;
+ u64 magic;
+
+ magic = val[i + CS_ETM_MAGIC];
+ /* total params to print is NR_PARAMS + common block size for v1 */
+ total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1;
+
+ if (magic == __perf_cs_etmv3_magic) {
+ for (j = 0; j < total_params; j++, i++) {
+ /* if newer record - could be excess params */
+ if (j >= CS_ETM_PRIV_MAX)
+ fprintf(stdout, param_unk_fmt, j, val[i]);
+ else
+ fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+ }
+ } else if (magic == __perf_cs_etmv4_magic) {
+ for (j = 0; j < total_params; j++, i++) {
+ /* if newer record - could be excess params */
+ if (j >= CS_ETMV4_PRIV_MAX)
+ fprintf(stdout, param_unk_fmt, j, val[i]);
+ else
+ fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+ }
+ } else if (magic == __perf_cs_ete_magic) {
+ for (j = 0; j < total_params; j++, i++) {
+ /* if newer record - could be excess params */
+ if (j >= CS_ETE_PRIV_MAX)
+ fprintf(stdout, param_unk_fmt, j, val[i]);
+ else
+ fprintf(stdout, cs_ete_priv_fmts[j], val[i]);
+ }
+ } else {
+ /* failure - note bad magic value and error out */
+ fprintf(stdout, magic_unk_fmt, magic);
+ return -EINVAL;
+ }
+ *offset = i;
+ return 0;
+}
+
+static void cs_etm__print_auxtrace_info(u64 *val, int num)
+{
+ int i, cpu = 0, version, err;
+
+ version = val[0];
+
+ for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
+ fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+
+ for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) {
+ if (version == 0)
+ err = cs_etm__print_cpu_metadata_v0(val, &i);
+ /* printing same for both, but value bit flags added on v2 */
+ else if ((version == 1) || (version == 2))
+ err = cs_etm__print_cpu_metadata_v1(val, &i);
+ if (err)
+ return;
+ }
+}
+
+/*
+ * Do some basic checks and print the auxtrace info header before calling
+ * into cs_etm__process_auxtrace_info_full() which requires OpenCSD to be
+ * linked in. This allows some basic debugging if OpenCSD is missing.
+ */
+int cs_etm__process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ int event_header_size = sizeof(struct perf_event_header);
+ int num_cpu;
+ u64 *ptr = NULL;
+ u64 hdr_version;
+
+ if (auxtrace_info->header.size < (event_header_size + INFO_HEADER_SIZE))
+ return -EINVAL;
+
+ /* First the global part */
+ ptr = (u64 *) auxtrace_info->priv;
+
+ /* Look for version of the header */
+ hdr_version = ptr[0];
+ if (hdr_version > CS_HEADER_CURRENT_VERSION) {
+ pr_err("\nCS ETM Trace: Unknown Header Version = %#" PRIx64, hdr_version);
+ pr_err(", version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
+ return -EINVAL;
+ }
+
+ if (dump_trace) {
+ num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
+ cs_etm__print_auxtrace_info(ptr, num_cpu);
+ }
+
+ return cs_etm__process_auxtrace_info_full(event, session);
+}
diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build
new file mode 100644
index 000000000..216cb17a3
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
new file mode 100644
index 000000000..e917985bb
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -0,0 +1,837 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <asm/bug.h>
+#include <linux/coresight-pmu.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <opencsd/c_api/opencsd_c_api.h>
+
+#include "cs-etm.h"
+#include "cs-etm-decoder.h"
+#include "debug.h"
+#include "intlist.h"
+
+/* use raw logging */
+#ifdef CS_DEBUG_RAW
+#define CS_LOG_RAW_FRAMES
+#ifdef CS_RAW_PACKED
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \
+ OCSD_DFRMTR_PACKED_RAW_OUT)
+#else
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT)
+#endif
+#endif
+
+/*
+ * Assume a maximum of 0.1ns elapsed per instruction. This would be the
+ * case with a theoretical 10GHz core executing 1 instruction per cycle.
+ * Used to estimate the sample time for synthesized instructions because
+ * Coresight only emits a timestamp for a range of instructions rather
+ * than per instruction.
+ */
+const u32 INSTR_PER_NS = 10;
+
+struct cs_etm_decoder {
+ void *data;
+ void (*packet_printer)(const char *msg);
+ bool suppress_printing;
+ dcd_tree_handle_t dcd_tree;
+ cs_etm_mem_cb_type mem_access;
+ ocsd_datapath_resp_t prev_return;
+ const char *decoder_name;
+};
+
+static u32
+cs_etm_decoder__mem_access(const void *context,
+ const ocsd_vaddr_t address,
+ const ocsd_mem_space_acc_t mem_space,
+ const u8 trace_chan_id,
+ const u32 req_size,
+ u8 *buffer)
+{
+ struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+
+ return decoder->mem_access(decoder->data, trace_chan_id, address,
+ req_size, buffer, mem_space);
+}
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+ u64 start, u64 end,
+ cs_etm_mem_cb_type cb_func)
+{
+ decoder->mem_access = cb_func;
+
+ if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end,
+ OCSD_MEM_SPACE_ANY,
+ cs_etm_decoder__mem_access,
+ decoder))
+ return -1;
+
+ return 0;
+}
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
+{
+ ocsd_datapath_resp_t dp_ret;
+
+ decoder->prev_return = OCSD_RESP_CONT;
+ decoder->suppress_printing = true;
+ dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
+ 0, 0, NULL, NULL);
+ decoder->suppress_printing = false;
+ if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
+ return -1;
+
+ return 0;
+}
+
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
+ struct cs_etm_packet *packet)
+{
+ if (!packet_queue || !packet)
+ return -EINVAL;
+
+ /* Nothing to do, might as well just return */
+ if (packet_queue->packet_count == 0)
+ return 0;
+ /*
+ * The queueing process in function cs_etm_decoder__buffer_packet()
+ * increments the tail *before* using it. This is somewhat counter
+ * intuitive but it has the advantage of centralizing tail management
+ * at a single location. Because of that we need to follow the same
+ * heuristic with the head, i.e we increment it before using its
+ * value. Otherwise the first element of the packet queue is not
+ * used.
+ */
+ packet_queue->head = (packet_queue->head + 1) &
+ (CS_ETM_PACKET_MAX_BUFFER - 1);
+
+ *packet = packet_queue->packet_buffer[packet_queue->head];
+
+ packet_queue->packet_count--;
+
+ return 1;
+}
+
+/*
+ * Calculate the number of nanoseconds elapsed.
+ *
+ * instr_count is updated in place with the remainder of the instructions
+ * which didn't make up a whole nanosecond.
+ */
+static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count)
+{
+ const u32 instr_copy = *instr_count;
+
+ *instr_count %= INSTR_PER_NS;
+ return instr_copy / INSTR_PER_NS;
+}
+
+static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
+ ocsd_etmv3_cfg *config)
+{
+ config->reg_idr = params->etmv3.reg_idr;
+ config->reg_ctrl = params->etmv3.reg_ctrl;
+ config->reg_ccer = params->etmv3.reg_ccer;
+ config->reg_trc_id = params->etmv3.reg_trc_id;
+ config->arch_ver = ARCH_V7;
+ config->core_prof = profile_CortexA;
+
+ return 0;
+}
+
+#define TRCIDR1_TRCARCHMIN_SHIFT 4
+#define TRCIDR1_TRCARCHMIN_MASK GENMASK(7, 4)
+#define TRCIDR1_TRCARCHMIN(x) (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT)
+
+static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1)
+{
+ /*
+ * For ETMv4 if the trace minor version is 4 or more then we can assume
+ * the architecture is ARCH_AA64 rather than just V8.
+ * ARCH_V8 = V8 architecture
+ * ARCH_AA64 = Min v8r3 plus additional AA64 PE features
+ */
+ return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8;
+}
+
+static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
+ ocsd_etmv4_cfg *config)
+{
+ config->reg_configr = params->etmv4.reg_configr;
+ config->reg_traceidr = params->etmv4.reg_traceidr;
+ config->reg_idr0 = params->etmv4.reg_idr0;
+ config->reg_idr1 = params->etmv4.reg_idr1;
+ config->reg_idr2 = params->etmv4.reg_idr2;
+ config->reg_idr8 = params->etmv4.reg_idr8;
+ config->reg_idr9 = 0;
+ config->reg_idr10 = 0;
+ config->reg_idr11 = 0;
+ config->reg_idr12 = 0;
+ config->reg_idr13 = 0;
+ config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1);
+ config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params,
+ ocsd_ete_cfg *config)
+{
+ config->reg_configr = params->ete.reg_configr;
+ config->reg_traceidr = params->ete.reg_traceidr;
+ config->reg_idr0 = params->ete.reg_idr0;
+ config->reg_idr1 = params->ete.reg_idr1;
+ config->reg_idr2 = params->ete.reg_idr2;
+ config->reg_idr8 = params->ete.reg_idr8;
+ config->reg_devarch = params->ete.reg_devarch;
+ config->arch_ver = ARCH_AA64;
+ config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__print_str_cb(const void *p_context,
+ const char *msg,
+ const int str_len)
+{
+ const struct cs_etm_decoder *decoder = p_context;
+
+ if (p_context && str_len && !decoder->suppress_printing)
+ decoder->packet_printer(msg);
+}
+
+static int
+cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_decoder *decoder)
+{
+ int ret = 0;
+
+ if (d_params->packet_printer == NULL)
+ return -1;
+
+ decoder->packet_printer = d_params->packet_printer;
+
+ /*
+ * Set up a library default logger to process any printers
+ * (packet/raw frame) we add later.
+ */
+ ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+ if (ret != 0)
+ return -1;
+
+ /* no stdout / err / file output */
+ ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+ if (ret != 0)
+ return -1;
+
+ /*
+ * Set the string CB for the default logger, passes strings to
+ * perf print logger.
+ */
+ ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+ (void *)decoder,
+ cs_etm_decoder__print_str_cb);
+ if (ret != 0)
+ ret = -1;
+
+ return 0;
+}
+
+#ifdef CS_LOG_RAW_FRAMES
+static void
+cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_decoder *decoder)
+{
+ /* Only log these during a --dump operation */
+ if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+ /* set up a library default logger to process the
+ * raw frame printer we add later
+ */
+ ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+
+ /* no stdout / err / file output */
+ ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+
+ /* set the string CB for the default logger,
+ * passes strings to perf print logger.
+ */
+ ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+ (void *)decoder,
+ cs_etm_decoder__print_str_cb);
+
+ /* use the built in library printer for the raw frames */
+ ocsd_dt_set_raw_frame_printer(decoder->dcd_tree,
+ CS_RAW_DEBUG_FLAGS);
+ }
+}
+#else
+static void
+cs_etm_decoder__init_raw_frame_logging(
+ struct cs_etm_decoder_params *d_params __maybe_unused,
+ struct cs_etm_decoder *decoder __maybe_unused)
+{
+}
+#endif
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
+ const uint8_t trace_chan_id)
+{
+ u64 estimated_ts;
+
+ /* No timestamp packet has been received, nothing to do */
+ if (!packet_queue->next_cs_timestamp)
+ return OCSD_RESP_CONT;
+
+ estimated_ts = packet_queue->cs_timestamp +
+ cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count);
+
+ /* Estimated TS can never be higher than the next real one in the trace */
+ packet_queue->cs_timestamp = min(packet_queue->next_cs_timestamp, estimated_ts);
+
+ /* Tell the front end which traceid_queue needs attention */
+ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+ return OCSD_RESP_WAIT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id,
+ const ocsd_trc_index_t indx)
+{
+ struct cs_etm_packet_queue *packet_queue;
+ u64 converted_timestamp;
+ u64 estimated_first_ts;
+
+ /* First get the packet queue for this traceID */
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+ if (!packet_queue)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ /*
+ * Coresight timestamps are raw timer values which need to be scaled to ns. Assume
+ * 0 is a bad value so don't try to convert it.
+ */
+ converted_timestamp = elem->timestamp ?
+ cs_etm__convert_sample_time(etmq, elem->timestamp) : 0;
+
+ /*
+ * We've seen a timestamp packet before - simply record the new value.
+ * Function do_soft_timestamp() will report the value to the front end,
+ * hence asking the decoder to keep decoding rather than stopping.
+ */
+ if (packet_queue->next_cs_timestamp) {
+ /*
+ * What was next is now where new ranges start from, overwriting
+ * any previous estimate in cs_timestamp
+ */
+ packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
+ packet_queue->next_cs_timestamp = converted_timestamp;
+ return OCSD_RESP_CONT;
+ }
+
+ if (!converted_timestamp) {
+ /*
+ * Zero timestamps can be seen due to misconfiguration or hardware bugs.
+ * Warn once, and don't try to subtract instr_count as it would result in an
+ * underflow.
+ */
+ packet_queue->cs_timestamp = 0;
+ if (!cs_etm__etmq_is_timeless(etmq))
+ pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
+ ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
+ indx);
+
+ } else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) {
+ /*
+ * Sanity check that the elem->timestamp - packet_queue->instr_count would not
+ * result in an underflow. Warn and clamp at 0 if it would.
+ */
+ packet_queue->cs_timestamp = 0;
+ pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx);
+ } else {
+ /*
+ * This is the first timestamp we've seen since the beginning of traces
+ * or a discontinuity. Since timestamps packets are generated *after*
+ * range packets have been generated, we need to estimate the time at
+ * which instructions started by subtracting the number of instructions
+ * executed to the timestamp. Don't estimate earlier than the last used
+ * timestamp though.
+ */
+ estimated_first_ts = converted_timestamp -
+ (packet_queue->instr_count / INSTR_PER_NS);
+ packet_queue->cs_timestamp = max(packet_queue->cs_timestamp, estimated_first_ts);
+ }
+ packet_queue->next_cs_timestamp = converted_timestamp;
+ packet_queue->instr_count = 0;
+
+ /* Tell the front end which traceid_queue needs attention */
+ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+ /* Halt processing until we are being told to proceed */
+ return OCSD_RESP_WAIT;
+}
+
+static void
+cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
+{
+ packet_queue->next_cs_timestamp = 0;
+ packet_queue->instr_count = 0;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
+ const u8 trace_chan_id,
+ enum cs_etm_sample_type sample_type)
+{
+ u32 et = 0;
+ int cpu;
+
+ if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ et = packet_queue->tail;
+ et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1);
+ packet_queue->tail = et;
+ packet_queue->packet_count++;
+
+ packet_queue->packet_buffer[et].sample_type = sample_type;
+ packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
+ packet_queue->packet_buffer[et].cpu = cpu;
+ packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
+ packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+ packet_queue->packet_buffer[et].instr_count = 0;
+ packet_queue->packet_buffer[et].last_instr_taken_branch = false;
+ packet_queue->packet_buffer[et].last_instr_size = 0;
+ packet_queue->packet_buffer[et].last_instr_type = 0;
+ packet_queue->packet_buffer[et].last_instr_subtype = 0;
+ packet_queue->packet_buffer[et].last_instr_cond = 0;
+ packet_queue->packet_buffer[et].flags = 0;
+ packet_queue->packet_buffer[et].exception_number = UINT32_MAX;
+ packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id;
+
+ if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1)
+ return OCSD_RESP_WAIT;
+
+ return OCSD_RESP_CONT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ int ret = 0;
+ struct cs_etm_packet *packet;
+
+ ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
+ CS_ETM_RANGE);
+ if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+ return ret;
+
+ packet = &packet_queue->packet_buffer[packet_queue->tail];
+
+ switch (elem->isa) {
+ case ocsd_isa_aarch64:
+ packet->isa = CS_ETM_ISA_A64;
+ break;
+ case ocsd_isa_arm:
+ packet->isa = CS_ETM_ISA_A32;
+ break;
+ case ocsd_isa_thumb2:
+ packet->isa = CS_ETM_ISA_T32;
+ break;
+ case ocsd_isa_tee:
+ case ocsd_isa_jazelle:
+ case ocsd_isa_custom:
+ case ocsd_isa_unknown:
+ default:
+ packet->isa = CS_ETM_ISA_UNKNOWN;
+ }
+
+ packet->start_addr = elem->st_addr;
+ packet->end_addr = elem->en_addr;
+ packet->instr_count = elem->num_instr_range;
+ packet->last_instr_type = elem->last_i_type;
+ packet->last_instr_subtype = elem->last_i_subtype;
+ packet->last_instr_cond = elem->last_instr_cond;
+
+ if (elem->last_i_type == OCSD_INSTR_BR || elem->last_i_type == OCSD_INSTR_BR_INDIRECT)
+ packet->last_instr_taken_branch = elem->last_instr_exec;
+ else
+ packet->last_instr_taken_branch = false;
+
+ packet->last_instr_size = elem->last_instr_sz;
+
+ /* per-thread scenario, no need to generate a timestamp */
+ if (cs_etm__etmq_is_timeless(etmq))
+ goto out;
+
+ /*
+ * The packet queue is full and we haven't seen a timestamp (had we
+ * seen one the packet queue wouldn't be full). Let the front end
+ * deal with it.
+ */
+ if (ret == OCSD_RESP_WAIT)
+ goto out;
+
+ packet_queue->instr_count += elem->num_instr_range;
+ /* Tell the front end we have a new timestamp to process */
+ ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue,
+ trace_chan_id);
+out:
+ return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
+ const uint8_t trace_chan_id)
+{
+ /*
+ * Something happened and who knows when we'll get new traces so
+ * reset time statistics.
+ */
+ cs_etm_decoder__reset_timestamp(queue);
+ return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ CS_ETM_DISCONTINUITY);
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{ int ret = 0;
+ struct cs_etm_packet *packet;
+
+ ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ CS_ETM_EXCEPTION);
+ if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+ return ret;
+
+ packet = &queue->packet_buffer[queue->tail];
+ packet->exception_number = elem->exception_number;
+
+ return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
+ const uint8_t trace_chan_id)
+{
+ return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ CS_ETM_EXCEPTION_RET);
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ pid_t tid = -1;
+
+ /*
+ * Process the PE_CONTEXT packets if we have a valid contextID or VMID.
+ * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2
+ * as VMID, Bit ETM_OPT_CTXTID2 is set in this case.
+ */
+ switch (cs_etm__get_pid_fmt(etmq)) {
+ case CS_ETM_PIDFMT_CTXTID:
+ if (elem->context.ctxt_id_valid)
+ tid = elem->context.context_id;
+ break;
+ case CS_ETM_PIDFMT_CTXTID2:
+ if (elem->context.vmid_valid)
+ tid = elem->context.vmid;
+ break;
+ case CS_ETM_PIDFMT_NONE:
+ default:
+ break;
+ }
+
+ if (cs_etm__etmq_set_tid_el(etmq, tid, trace_chan_id,
+ elem->context.exception_level))
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ if (tid == -1)
+ return OCSD_RESP_CONT;
+
+ /*
+ * A timestamp is generated after a PE_CONTEXT element so make sure
+ * to rely on that coming one.
+ */
+ cs_etm_decoder__reset_timestamp(packet_queue);
+
+ return OCSD_RESP_CONT;
+}
+
+static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
+ const void *context,
+ const ocsd_trc_index_t indx,
+ const u8 trace_chan_id __maybe_unused,
+ const ocsd_generic_trace_elem *elem)
+{
+ ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
+ struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+ struct cs_etm_queue *etmq = decoder->data;
+ struct cs_etm_packet_queue *packet_queue;
+
+ /* First get the packet queue for this traceID */
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+ if (!packet_queue)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ switch (elem->elem_type) {
+ case OCSD_GEN_TRC_ELEM_UNKNOWN:
+ break;
+ case OCSD_GEN_TRC_ELEM_EO_TRACE:
+ case OCSD_GEN_TRC_ELEM_NO_SYNC:
+ case OCSD_GEN_TRC_ELEM_TRACE_ON:
+ resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
+ trace_chan_id);
+ break;
+ case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
+ resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem,
+ trace_chan_id);
+ break;
+ case OCSD_GEN_TRC_ELEM_EXCEPTION:
+ resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
+ trace_chan_id);
+ break;
+ case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
+ resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
+ trace_chan_id);
+ break;
+ case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+ resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
+ trace_chan_id,
+ indx);
+ break;
+ case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+ resp = cs_etm_decoder__set_tid(etmq, packet_queue,
+ elem, trace_chan_id);
+ break;
+ /* Unused packet types */
+ case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH:
+ case OCSD_GEN_TRC_ELEM_ADDR_NACC:
+ case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
+ case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
+ case OCSD_GEN_TRC_ELEM_EVENT:
+ case OCSD_GEN_TRC_ELEM_SWTRACE:
+ case OCSD_GEN_TRC_ELEM_CUSTOM:
+ case OCSD_GEN_TRC_ELEM_SYNC_MARKER:
+ case OCSD_GEN_TRC_ELEM_MEMTRANS:
+#if (OCSD_VER_NUM >= 0x010400)
+ case OCSD_GEN_TRC_ELEM_INSTRUMENTATION:
+#endif
+ default:
+ break;
+ }
+
+ return resp;
+}
+
+static int
+cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_trace_params *t_params,
+ struct cs_etm_decoder *decoder)
+{
+ ocsd_etmv3_cfg config_etmv3;
+ ocsd_etmv4_cfg trace_config_etmv4;
+ ocsd_ete_cfg trace_config_ete;
+ void *trace_config;
+ u8 csid;
+
+ switch (t_params->protocol) {
+ case CS_ETM_PROTO_ETMV3:
+ case CS_ETM_PROTO_PTM:
+ csid = (t_params->etmv3.reg_idr & CORESIGHT_TRACE_ID_VAL_MASK);
+ cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
+ decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
+ OCSD_BUILTIN_DCD_ETMV3 :
+ OCSD_BUILTIN_DCD_PTM;
+ trace_config = &config_etmv3;
+ break;
+ case CS_ETM_PROTO_ETMV4i:
+ csid = (t_params->etmv4.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK);
+ cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
+ decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+ trace_config = &trace_config_etmv4;
+ break;
+ case CS_ETM_PROTO_ETE:
+ csid = (t_params->ete.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK);
+ cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete);
+ decoder->decoder_name = OCSD_BUILTIN_DCD_ETE;
+ trace_config = &trace_config_ete;
+ break;
+ default:
+ return -1;
+ }
+
+ /* if the CPU has no trace ID associated, no decoder needed */
+ if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL)
+ return 0;
+
+ if (d_params->operation == CS_ETM_OPERATION_DECODE) {
+ if (ocsd_dt_create_decoder(decoder->dcd_tree,
+ decoder->decoder_name,
+ OCSD_CREATE_FLG_FULL_DECODER,
+ trace_config, &csid))
+ return -1;
+
+ if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
+ cs_etm_decoder__gen_trace_elem_printer,
+ decoder))
+ return -1;
+
+ return 0;
+ } else if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+ if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name,
+ OCSD_CREATE_FLG_PACKET_PROC,
+ trace_config, &csid))
+ return -1;
+
+ if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
+ return -1;
+
+ return 0;
+ }
+
+ return -1;
+}
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
+ struct cs_etm_trace_params t_params[])
+{
+ struct cs_etm_decoder *decoder;
+ ocsd_dcd_tree_src_t format;
+ u32 flags;
+ int i, ret;
+
+ if ((!t_params) || (!d_params))
+ return NULL;
+
+ decoder = zalloc(sizeof(*decoder));
+
+ if (!decoder)
+ return NULL;
+
+ decoder->data = d_params->data;
+ decoder->prev_return = OCSD_RESP_CONT;
+ format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED :
+ OCSD_TRC_SRC_SINGLE);
+ flags = 0;
+ flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0);
+ flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0);
+ flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0);
+
+ /*
+ * Drivers may add barrier frames when used with perf, set up to
+ * handle this. Barriers const of FSYNC packet repeated 4 times.
+ */
+ flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC;
+
+ /* Create decode tree for the data source */
+ decoder->dcd_tree = ocsd_create_dcd_tree(format, flags);
+
+ if (decoder->dcd_tree == 0)
+ goto err_free_decoder;
+
+ /* init library print logging support */
+ ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder);
+ if (ret != 0)
+ goto err_free_decoder;
+
+ /* init raw frame logging if required */
+ cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
+
+ for (i = 0; i < decoders; i++) {
+ ret = cs_etm_decoder__create_etm_decoder(d_params,
+ &t_params[i],
+ decoder);
+ if (ret != 0)
+ goto err_free_decoder;
+ }
+
+ return decoder;
+
+err_free_decoder:
+ cs_etm_decoder__free(decoder);
+ return NULL;
+}
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+ u64 indx, const u8 *buf,
+ size_t len, size_t *consumed)
+{
+ int ret = 0;
+ ocsd_datapath_resp_t cur = OCSD_RESP_CONT;
+ ocsd_datapath_resp_t prev_return = decoder->prev_return;
+ size_t processed = 0;
+ u32 count;
+
+ while (processed < len) {
+ if (OCSD_DATA_RESP_IS_WAIT(prev_return)) {
+ cur = ocsd_dt_process_data(decoder->dcd_tree,
+ OCSD_OP_FLUSH,
+ 0,
+ 0,
+ NULL,
+ NULL);
+ } else if (OCSD_DATA_RESP_IS_CONT(prev_return)) {
+ cur = ocsd_dt_process_data(decoder->dcd_tree,
+ OCSD_OP_DATA,
+ indx + processed,
+ len - processed,
+ &buf[processed],
+ &count);
+ processed += count;
+ } else {
+ ret = -EINVAL;
+ break;
+ }
+
+ /*
+ * Return to the input code if the packet buffer is full.
+ * Flushing will get done once the packet buffer has been
+ * processed.
+ */
+ if (OCSD_DATA_RESP_IS_WAIT(cur))
+ break;
+
+ prev_return = cur;
+ }
+
+ decoder->prev_return = cur;
+ *consumed = processed;
+
+ return ret;
+}
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder)
+{
+ if (!decoder)
+ return;
+
+ ocsd_destroy_dcd_tree(decoder->dcd_tree);
+ decoder->dcd_tree = NULL;
+ free(decoder);
+}
+
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder)
+{
+ return decoder->decoder_name;
+}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
new file mode 100644
index 000000000..272c2efe7
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -0,0 +1,111 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__CS_ETM_DECODER_H__
+#define INCLUDE__CS_ETM_DECODER_H__
+
+#include <linux/types.h>
+#include <opencsd/ocsd_if_types.h>
+#include <stdio.h>
+
+struct cs_etm_decoder;
+struct cs_etm_packet;
+struct cs_etm_packet_queue;
+
+struct cs_etm_queue;
+
+typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *,
+ const ocsd_mem_space_acc_t);
+
+struct cs_etmv3_trace_params {
+ u32 reg_ctrl;
+ u32 reg_trc_id;
+ u32 reg_ccer;
+ u32 reg_idr;
+};
+
+struct cs_etmv4_trace_params {
+ u32 reg_idr0;
+ u32 reg_idr1;
+ u32 reg_idr2;
+ u32 reg_idr8;
+ u32 reg_configr;
+ u32 reg_traceidr;
+};
+
+struct cs_ete_trace_params {
+ u32 reg_idr0;
+ u32 reg_idr1;
+ u32 reg_idr2;
+ u32 reg_idr8;
+ u32 reg_configr;
+ u32 reg_traceidr;
+ u32 reg_devarch;
+};
+
+struct cs_etm_trace_params {
+ int protocol;
+ union {
+ struct cs_etmv3_trace_params etmv3;
+ struct cs_etmv4_trace_params etmv4;
+ struct cs_ete_trace_params ete;
+ };
+};
+
+struct cs_etm_decoder_params {
+ int operation;
+ void (*packet_printer)(const char *msg);
+ cs_etm_mem_cb_type mem_acc_cb;
+ bool formatted;
+ bool fsyncs;
+ bool hsyncs;
+ bool frame_aligned;
+ void *data;
+};
+
+/*
+ * The following enums are indexed starting with 1 to align with the
+ * open source coresight trace decoder library.
+ */
+enum {
+ CS_ETM_PROTO_ETMV3 = 1,
+ CS_ETM_PROTO_ETMV4i,
+ CS_ETM_PROTO_ETMV4d,
+ CS_ETM_PROTO_PTM,
+ CS_ETM_PROTO_ETE
+};
+
+enum cs_etm_decoder_operation {
+ CS_ETM_OPERATION_PRINT = 1,
+ CS_ETM_OPERATION_DECODE,
+ CS_ETM_OPERATION_MAX,
+};
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+ u64 indx, const u8 *buf,
+ size_t len, size_t *consumed);
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int num_cpu,
+ struct cs_etm_decoder_params *d_params,
+ struct cs_etm_trace_params t_params[]);
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder);
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+ u64 start, u64 end,
+ cs_etm_mem_cb_type cb_func);
+
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
+ struct cs_etm_packet *packet);
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder);
+
+#endif /* INCLUDE__CS_ETM_DECODER_H__ */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
new file mode 100644
index 000000000..9729d0065
--- /dev/null
+++ b/tools/perf/util/cs-etm.c
@@ -0,0 +1,3417 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/coresight-pmu.h>
+#include <linux/err.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+
+#include <stdlib.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "cs-etm.h"
+#include "cs-etm-decoder/cs-etm-decoder.h"
+#include "debug.h"
+#include "dso.h"
+#include "evlist.h"
+#include "intlist.h"
+#include "machine.h"
+#include "map.h"
+#include "perf.h"
+#include "session.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "symbol.h"
+#include "tool.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "tsc.h"
+#include <tools/libc_compat.h>
+#include "util/synthetic-events.h"
+#include "util/util.h"
+
+struct cs_etm_auxtrace {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ struct itrace_synth_opts synth_opts;
+ struct perf_session *session;
+ struct perf_tsc_conversion tc;
+
+ /*
+ * Timeless has no timestamps in the trace so overlapping mmap lookups
+ * are less accurate but produces smaller trace data. We use context IDs
+ * in the trace instead of matching timestamps with fork records so
+ * they're not really needed in the general case. Overlapping mmaps
+ * happen in cases like between a fork and an exec.
+ */
+ bool timeless_decoding;
+
+ /*
+ * Per-thread ignores the trace channel ID and instead assumes that
+ * everything in a buffer comes from the same process regardless of
+ * which CPU it ran on. It also implies no context IDs so the TID is
+ * taken from the auxtrace buffer.
+ */
+ bool per_thread_decoding;
+ bool snapshot_mode;
+ bool data_queued;
+ bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
+
+ int num_cpu;
+ u64 latest_kernel_timestamp;
+ u32 auxtrace_type;
+ u64 branches_sample_type;
+ u64 branches_id;
+ u64 instructions_sample_type;
+ u64 instructions_sample_period;
+ u64 instructions_id;
+ u64 **metadata;
+ unsigned int pmu_type;
+ enum cs_etm_pid_fmt pid_fmt;
+};
+
+struct cs_etm_traceid_queue {
+ u8 trace_chan_id;
+ u64 period_instructions;
+ size_t last_branch_pos;
+ union perf_event *event_buf;
+ struct thread *thread;
+ struct thread *prev_packet_thread;
+ ocsd_ex_level prev_packet_el;
+ ocsd_ex_level el;
+ struct branch_stack *last_branch;
+ struct branch_stack *last_branch_rb;
+ struct cs_etm_packet *prev_packet;
+ struct cs_etm_packet *packet;
+ struct cs_etm_packet_queue packet_queue;
+};
+
+struct cs_etm_queue {
+ struct cs_etm_auxtrace *etm;
+ struct cs_etm_decoder *decoder;
+ struct auxtrace_buffer *buffer;
+ unsigned int queue_nr;
+ u8 pending_timestamp_chan_id;
+ u64 offset;
+ const unsigned char *buf;
+ size_t buf_len, buf_used;
+ /* Conversion between traceID and index in traceid_queues array */
+ struct intlist *traceid_queues_list;
+ struct cs_etm_traceid_queue **traceid_queues;
+};
+
+/* RB tree for quick conversion between traceID and metadata pointers */
+static struct intlist *traceid_list;
+
+static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+ pid_t tid);
+static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
+static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
+
+/* PTMs ETMIDR [11:8] set to b0011 */
+#define ETMIDR_PTM_VERSION 0x00000300
+
+/*
+ * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
+ * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
+ * encode the etm queue number as the upper 16 bit and the channel as
+ * the lower 16 bit.
+ */
+#define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
+ (queue_nr << 16 | trace_chan_id)
+#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
+#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
+
+static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
+{
+ etmidr &= ETMIDR_PTM_VERSION;
+
+ if (etmidr == ETMIDR_PTM_VERSION)
+ return CS_ETM_PROTO_PTM;
+
+ return CS_ETM_PROTO_ETMV3;
+}
+
+static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
+{
+ struct int_node *inode;
+ u64 *metadata;
+
+ inode = intlist__find(traceid_list, trace_chan_id);
+ if (!inode)
+ return -EINVAL;
+
+ metadata = inode->priv;
+ *magic = metadata[CS_ETM_MAGIC];
+ return 0;
+}
+
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
+{
+ struct int_node *inode;
+ u64 *metadata;
+
+ inode = intlist__find(traceid_list, trace_chan_id);
+ if (!inode)
+ return -EINVAL;
+
+ metadata = inode->priv;
+ *cpu = (int)metadata[CS_ETM_CPU];
+ return 0;
+}
+
+/*
+ * The returned PID format is presented as an enum:
+ *
+ * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
+ * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ * CS_ETM_PIDFMT_NONE: No context IDs
+ *
+ * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
+ * are enabled at the same time when the session runs on an EL2 kernel.
+ * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
+ * recorded in the trace data, the tool will selectively use
+ * CONTEXTIDR_EL2 as PID.
+ *
+ * The result is cached in etm->pid_fmt so this function only needs to be called
+ * when processing the aux info.
+ */
+static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
+{
+ u64 val;
+
+ if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
+ val = metadata[CS_ETM_ETMCR];
+ /* CONTEXTIDR is traced */
+ if (val & BIT(ETM_OPT_CTXTID))
+ return CS_ETM_PIDFMT_CTXTID;
+ } else {
+ val = metadata[CS_ETMV4_TRCCONFIGR];
+ /* CONTEXTIDR_EL2 is traced */
+ if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
+ return CS_ETM_PIDFMT_CTXTID2;
+ /* CONTEXTIDR_EL1 is traced */
+ else if (val & BIT(ETM4_CFG_BIT_CTXTID))
+ return CS_ETM_PIDFMT_CTXTID;
+ }
+
+ return CS_ETM_PIDFMT_NONE;
+}
+
+enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
+{
+ return etmq->etm->pid_fmt;
+}
+
+static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
+{
+ struct int_node *inode;
+
+ /* Get an RB node for this CPU */
+ inode = intlist__findnew(traceid_list, trace_chan_id);
+
+ /* Something went wrong, no need to continue */
+ if (!inode)
+ return -ENOMEM;
+
+ /*
+ * The node for that CPU should not be taken.
+ * Back out if that's the case.
+ */
+ if (inode->priv)
+ return -EINVAL;
+
+ /* All good, associate the traceID with the metadata pointer */
+ inode->priv = cpu_metadata;
+
+ return 0;
+}
+
+static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
+{
+ u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
+
+ switch (cs_etm_magic) {
+ case __perf_cs_etmv3_magic:
+ *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
+ CORESIGHT_TRACE_ID_VAL_MASK);
+ break;
+ case __perf_cs_etmv4_magic:
+ case __perf_cs_ete_magic:
+ *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
+ CORESIGHT_TRACE_ID_VAL_MASK);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * update metadata trace ID from the value found in the AUX_HW_INFO packet.
+ * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
+ */
+static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
+{
+ u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
+
+ switch (cs_etm_magic) {
+ case __perf_cs_etmv3_magic:
+ cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
+ break;
+ case __perf_cs_etmv4_magic:
+ case __perf_cs_ete_magic:
+ cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Get a metadata for a specific cpu from an array.
+ *
+ */
+static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
+{
+ int i;
+ u64 *metadata = NULL;
+
+ for (i = 0; i < etm->num_cpu; i++) {
+ if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
+ metadata = etm->metadata[i];
+ break;
+ }
+ }
+
+ return metadata;
+}
+
+/*
+ * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
+ *
+ * The payload associates the Trace ID and the CPU.
+ * The routine is tolerant of seeing multiple packets with the same association,
+ * but a CPU / Trace ID association changing during a session is an error.
+ */
+static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
+ union perf_event *event)
+{
+ struct cs_etm_auxtrace *etm;
+ struct perf_sample sample;
+ struct int_node *inode;
+ struct evsel *evsel;
+ u64 *cpu_data;
+ u64 hw_id;
+ int cpu, version, err;
+ u8 trace_chan_id, curr_chan_id;
+
+ /* extract and parse the HW ID */
+ hw_id = event->aux_output_hw_id.hw_id;
+ version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
+ trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+
+ /* check that we can handle this version */
+ if (version > CS_AUX_HW_ID_CURR_VERSION)
+ return -EINVAL;
+
+ /* get access to the etm metadata */
+ etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
+ if (!etm || !etm->metadata)
+ return -EINVAL;
+
+ /* parse the sample to get the CPU */
+ evsel = evlist__event2evsel(session->evlist, event);
+ if (!evsel)
+ return -EINVAL;
+ err = evsel__parse_sample(evsel, event, &sample);
+ if (err)
+ return err;
+ cpu = sample.cpu;
+ if (cpu == -1) {
+ /* no CPU in the sample - possibly recorded with an old version of perf */
+ pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
+ return -EINVAL;
+ }
+
+ /* See if the ID is mapped to a CPU, and it matches the current CPU */
+ inode = intlist__find(traceid_list, trace_chan_id);
+ if (inode) {
+ cpu_data = inode->priv;
+ if ((int)cpu_data[CS_ETM_CPU] != cpu) {
+ pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
+ return -EINVAL;
+ }
+
+ /* check that the mapped ID matches */
+ err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
+ if (err)
+ return err;
+ if (curr_chan_id != trace_chan_id) {
+ pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
+ return -EINVAL;
+ }
+
+ /* mapped and matched - return OK */
+ return 0;
+ }
+
+ cpu_data = get_cpu_data(etm, cpu);
+ if (cpu_data == NULL)
+ return err;
+
+ /* not one we've seen before - lets map it */
+ err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
+ if (err)
+ return err;
+
+ /*
+ * if we are picking up the association from the packet, need to plug
+ * the correct trace ID into the metadata for setting up decoders later.
+ */
+ err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
+ return err;
+}
+
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+ u8 trace_chan_id)
+{
+ /*
+ * When a timestamp packet is encountered the backend code
+ * is stopped so that the front end has time to process packets
+ * that were accumulated in the traceID queue. Since there can
+ * be more than one channel per cs_etm_queue, we need to specify
+ * what traceID queue needs servicing.
+ */
+ etmq->pending_timestamp_chan_id = trace_chan_id;
+}
+
+static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
+ u8 *trace_chan_id)
+{
+ struct cs_etm_packet_queue *packet_queue;
+
+ if (!etmq->pending_timestamp_chan_id)
+ return 0;
+
+ if (trace_chan_id)
+ *trace_chan_id = etmq->pending_timestamp_chan_id;
+
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq,
+ etmq->pending_timestamp_chan_id);
+ if (!packet_queue)
+ return 0;
+
+ /* Acknowledge pending status */
+ etmq->pending_timestamp_chan_id = 0;
+
+ /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
+ return packet_queue->cs_timestamp;
+}
+
+static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
+{
+ int i;
+
+ queue->head = 0;
+ queue->tail = 0;
+ queue->packet_count = 0;
+ for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
+ queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
+ queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
+ queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+ queue->packet_buffer[i].instr_count = 0;
+ queue->packet_buffer[i].last_instr_taken_branch = false;
+ queue->packet_buffer[i].last_instr_size = 0;
+ queue->packet_buffer[i].last_instr_type = 0;
+ queue->packet_buffer[i].last_instr_subtype = 0;
+ queue->packet_buffer[i].last_instr_cond = 0;
+ queue->packet_buffer[i].flags = 0;
+ queue->packet_buffer[i].exception_number = UINT32_MAX;
+ queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
+ queue->packet_buffer[i].cpu = INT_MIN;
+ }
+}
+
+static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ struct int_node *inode;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry(inode, traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+ cs_etm__clear_packet_queue(&tidq->packet_queue);
+ }
+}
+
+static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
+ u8 trace_chan_id)
+{
+ int rc = -ENOMEM;
+ struct auxtrace_queue *queue;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ cs_etm__clear_packet_queue(&tidq->packet_queue);
+
+ queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+ tidq->trace_chan_id = trace_chan_id;
+ tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
+ tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
+ queue->tid);
+ tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
+
+ tidq->packet = zalloc(sizeof(struct cs_etm_packet));
+ if (!tidq->packet)
+ goto out;
+
+ tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
+ if (!tidq->prev_packet)
+ goto out_free;
+
+ if (etm->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += etm->synth_opts.last_branch_sz *
+ sizeof(struct branch_entry);
+ tidq->last_branch = zalloc(sz);
+ if (!tidq->last_branch)
+ goto out_free;
+ tidq->last_branch_rb = zalloc(sz);
+ if (!tidq->last_branch_rb)
+ goto out_free;
+ }
+
+ tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!tidq->event_buf)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ zfree(&tidq->last_branch_rb);
+ zfree(&tidq->last_branch);
+ zfree(&tidq->prev_packet);
+ zfree(&tidq->packet);
+out:
+ return rc;
+}
+
+static struct cs_etm_traceid_queue
+*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+ int idx;
+ struct int_node *inode;
+ struct intlist *traceid_queues_list;
+ struct cs_etm_traceid_queue *tidq, **traceid_queues;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ if (etm->per_thread_decoding)
+ trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
+
+ traceid_queues_list = etmq->traceid_queues_list;
+
+ /*
+ * Check if the traceid_queue exist for this traceID by looking
+ * in the queue list.
+ */
+ inode = intlist__find(traceid_queues_list, trace_chan_id);
+ if (inode) {
+ idx = (int)(intptr_t)inode->priv;
+ return etmq->traceid_queues[idx];
+ }
+
+ /* We couldn't find a traceid_queue for this traceID, allocate one */
+ tidq = malloc(sizeof(*tidq));
+ if (!tidq)
+ return NULL;
+
+ memset(tidq, 0, sizeof(*tidq));
+
+ /* Get a valid index for the new traceid_queue */
+ idx = intlist__nr_entries(traceid_queues_list);
+ /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
+ inode = intlist__findnew(traceid_queues_list, trace_chan_id);
+ if (!inode)
+ goto out_free;
+
+ /* Associate this traceID with this index */
+ inode->priv = (void *)(intptr_t)idx;
+
+ if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
+ goto out_free;
+
+ /* Grow the traceid_queues array by one unit */
+ traceid_queues = etmq->traceid_queues;
+ traceid_queues = reallocarray(traceid_queues,
+ idx + 1,
+ sizeof(*traceid_queues));
+
+ /*
+ * On failure reallocarray() returns NULL and the original block of
+ * memory is left untouched.
+ */
+ if (!traceid_queues)
+ goto out_free;
+
+ traceid_queues[idx] = tidq;
+ etmq->traceid_queues = traceid_queues;
+
+ return etmq->traceid_queues[idx];
+
+out_free:
+ /*
+ * Function intlist__remove() removes the inode from the list
+ * and delete the memory associated to it.
+ */
+ intlist__remove(traceid_queues_list, inode);
+ free(tidq);
+
+ return NULL;
+}
+
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+ struct cs_etm_traceid_queue *tidq;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (tidq)
+ return &tidq->packet_queue;
+
+ return NULL;
+}
+
+static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_packet *tmp;
+
+ if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
+ etm->synth_opts.instructions) {
+ /*
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
+ *
+ * Threads and exception levels are also tracked for both the
+ * previous and current packets. This is because the previous
+ * packet is used for the 'from' IP for branch samples, so the
+ * thread at that time must also be assigned to that sample.
+ * Across discontinuity packets the thread can change, so by
+ * tracking the thread for the previous packet the branch sample
+ * will have the correct info.
+ */
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
+ tidq->prev_packet_el = tidq->el;
+ thread__put(tidq->prev_packet_thread);
+ tidq->prev_packet_thread = thread__get(tidq->thread);
+ }
+}
+
+static void cs_etm__packet_dump(const char *pkt_string)
+{
+ const char *color = PERF_COLOR_BLUE;
+ int len = strlen(pkt_string);
+
+ if (len && (pkt_string[len-1] == '\n'))
+ color_fprintf(stdout, color, " %s", pkt_string);
+ else
+ color_fprintf(stdout, color, " %s\n", pkt_string);
+
+ fflush(stdout);
+}
+
+static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm, int idx,
+ u32 etmidr)
+{
+ u64 **metadata = etm->metadata;
+
+ t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
+ t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
+ t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
+}
+
+static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm, int idx)
+{
+ u64 **metadata = etm->metadata;
+
+ t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
+ t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
+ t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
+ t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
+ t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
+ t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
+ t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
+}
+
+static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm, int idx)
+{
+ u64 **metadata = etm->metadata;
+
+ t_params[idx].protocol = CS_ETM_PROTO_ETE;
+ t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0];
+ t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1];
+ t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2];
+ t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8];
+ t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR];
+ t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR];
+ t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
+}
+
+static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm,
+ int decoders)
+{
+ int i;
+ u32 etmidr;
+ u64 architecture;
+
+ for (i = 0; i < decoders; i++) {
+ architecture = etm->metadata[i][CS_ETM_MAGIC];
+
+ switch (architecture) {
+ case __perf_cs_etmv3_magic:
+ etmidr = etm->metadata[i][CS_ETM_ETMIDR];
+ cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
+ break;
+ case __perf_cs_etmv4_magic:
+ cs_etm__set_trace_param_etmv4(t_params, etm, i);
+ break;
+ case __perf_cs_ete_magic:
+ cs_etm__set_trace_param_ete(t_params, etm, i);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_queue *etmq,
+ enum cs_etm_decoder_operation mode,
+ bool formatted)
+{
+ int ret = -EINVAL;
+
+ if (!(mode < CS_ETM_OPERATION_MAX))
+ goto out;
+
+ d_params->packet_printer = cs_etm__packet_dump;
+ d_params->operation = mode;
+ d_params->data = etmq;
+ d_params->formatted = formatted;
+ d_params->fsyncs = false;
+ d_params->hsyncs = false;
+ d_params->frame_aligned = true;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void cs_etm__dump_event(struct cs_etm_queue *etmq,
+ struct auxtrace_buffer *buffer)
+{
+ int ret;
+ const char *color = PERF_COLOR_BLUE;
+ size_t buffer_used = 0;
+
+ fprintf(stdout, "\n");
+ color_fprintf(stdout, color,
+ ". ... CoreSight %s Trace data: size %#zx bytes\n",
+ cs_etm_decoder__get_name(etmq->decoder), buffer->size);
+
+ do {
+ size_t consumed;
+
+ ret = cs_etm_decoder__process_data_block(
+ etmq->decoder, buffer->offset,
+ &((u8 *)buffer->data)[buffer_used],
+ buffer->size - buffer_used, &consumed);
+ if (ret)
+ break;
+
+ buffer_used += consumed;
+ } while (buffer_used < buffer->size);
+
+ cs_etm_decoder__reset(etmq->decoder);
+}
+
+static int cs_etm__flush_events(struct perf_session *session,
+ struct perf_tool *tool)
+{
+ struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ if (etm->timeless_decoding) {
+ /*
+ * Pass tid = -1 to process all queues. But likely they will have
+ * already been processed on PERF_RECORD_EXIT anyway.
+ */
+ return cs_etm__process_timeless_queues(etm, -1);
+ }
+
+ return cs_etm__process_timestamped_queues(etm);
+}
+
+static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ uintptr_t priv;
+ struct int_node *inode, *tmp;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
+ priv = (uintptr_t)inode->priv;
+ idx = priv;
+
+ /* Free this traceid_queue from the array */
+ tidq = etmq->traceid_queues[idx];
+ thread__zput(tidq->thread);
+ thread__zput(tidq->prev_packet_thread);
+ zfree(&tidq->event_buf);
+ zfree(&tidq->last_branch);
+ zfree(&tidq->last_branch_rb);
+ zfree(&tidq->prev_packet);
+ zfree(&tidq->packet);
+ zfree(&tidq);
+
+ /*
+ * Function intlist__remove() removes the inode from the list
+ * and delete the memory associated to it.
+ */
+ intlist__remove(traceid_queues_list, inode);
+ }
+
+ /* Then the RB tree itself */
+ intlist__delete(traceid_queues_list);
+ etmq->traceid_queues_list = NULL;
+
+ /* finally free the traceid_queues array */
+ zfree(&etmq->traceid_queues);
+}
+
+static void cs_etm__free_queue(void *priv)
+{
+ struct cs_etm_queue *etmq = priv;
+
+ if (!etmq)
+ return;
+
+ cs_etm_decoder__free(etmq->decoder);
+ cs_etm__free_traceid_queues(etmq);
+ free(etmq);
+}
+
+static void cs_etm__free_events(struct perf_session *session)
+{
+ unsigned int i;
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ struct auxtrace_queues *queues = &aux->queues;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ cs_etm__free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+
+ auxtrace_queues__free(queues);
+}
+
+static void cs_etm__free(struct perf_session *session)
+{
+ int i;
+ struct int_node *inode, *tmp;
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ cs_etm__free_events(session);
+ session->auxtrace = NULL;
+
+ /* First remove all traceID/metadata nodes for the RB tree */
+ intlist__for_each_entry_safe(inode, tmp, traceid_list)
+ intlist__remove(traceid_list, inode);
+ /* Then the RB tree itself */
+ intlist__delete(traceid_list);
+
+ for (i = 0; i < aux->num_cpu; i++)
+ zfree(&aux->metadata[i]);
+
+ zfree(&aux->metadata);
+ zfree(&aux);
+}
+
+static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+
+ return evsel->core.attr.type == aux->pmu_type;
+}
+
+static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
+ ocsd_ex_level el)
+{
+ enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
+
+ /*
+ * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
+ * running at EL1 assume everything is the host.
+ */
+ if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
+ return &etmq->etm->session->machines.host;
+
+ /*
+ * Not perfect, but otherwise assume anything in EL1 is the default
+ * guest, and everything else is the host. Distinguishing between guest
+ * and host userspaces isn't currently supported either. Neither is
+ * multiple guest support. All this does is reduce the likeliness of
+ * decode errors where we look into the host kernel maps when it should
+ * have been the guest maps.
+ */
+ switch (el) {
+ case ocsd_EL1:
+ return machines__find_guest(&etmq->etm->session->machines,
+ DEFAULT_GUEST_KERNEL_ID);
+ case ocsd_EL3:
+ case ocsd_EL2:
+ case ocsd_EL0:
+ case ocsd_EL_unknown:
+ default:
+ return &etmq->etm->session->machines.host;
+ }
+}
+
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
+ ocsd_ex_level el)
+{
+ struct machine *machine = cs_etm__get_machine(etmq, el);
+
+ if (address >= machine__kernel_start(machine)) {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_KERNEL;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
+ } else {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_USER;
+ else {
+ /*
+ * Can't really happen at the moment because
+ * cs_etm__get_machine() will always return
+ * machines.host for any non EL1 trace.
+ */
+ return PERF_RECORD_MISC_GUEST_USER;
+ }
+ }
+}
+
+static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
+ u64 address, size_t size, u8 *buffer,
+ const ocsd_mem_space_acc_t mem_space)
+{
+ u8 cpumode;
+ u64 offset;
+ int len;
+ struct addr_location al;
+ struct dso *dso;
+ struct cs_etm_traceid_queue *tidq;
+ int ret = 0;
+
+ if (!etmq)
+ return 0;
+
+ addr_location__init(&al);
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq)
+ goto out;
+
+ /*
+ * We've already tracked EL along side the PID in cs_etm__set_thread()
+ * so double check that it matches what OpenCSD thinks as well. It
+ * doesn't distinguish between EL0 and EL1 for this mem access callback
+ * so we had to do the extra tracking. Skip validation if it's any of
+ * the 'any' values.
+ */
+ if (!(mem_space == OCSD_MEM_SPACE_ANY ||
+ mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
+ if (mem_space & OCSD_MEM_SPACE_EL1N) {
+ /* Includes both non secure EL1 and EL0 */
+ assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
+ } else if (mem_space & OCSD_MEM_SPACE_EL2)
+ assert(tidq->el == ocsd_EL2);
+ else if (mem_space & OCSD_MEM_SPACE_EL3)
+ assert(tidq->el == ocsd_EL3);
+ }
+
+ cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
+
+ if (!thread__find_map(tidq->thread, cpumode, address, &al))
+ goto out;
+
+ dso = map__dso(al.map);
+ if (!dso)
+ goto out;
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR &&
+ dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
+ goto out;
+
+ offset = map__map_ip(al.map, address);
+
+ map__load(al.map);
+
+ len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
+ offset, buffer, size);
+
+ if (len <= 0) {
+ ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
+ " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
+ if (!dso->auxtrace_warned) {
+ pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
+ address,
+ dso->long_name ? dso->long_name : "Unknown");
+ dso->auxtrace_warned = true;
+ }
+ goto out;
+ }
+ ret = len;
+out:
+ addr_location__exit(&al);
+ return ret;
+}
+
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+ bool formatted)
+{
+ struct cs_etm_decoder_params d_params;
+ struct cs_etm_trace_params *t_params = NULL;
+ struct cs_etm_queue *etmq;
+ /*
+ * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+ * needed.
+ */
+ int decoders = formatted ? etm->num_cpu : 1;
+
+ etmq = zalloc(sizeof(*etmq));
+ if (!etmq)
+ return NULL;
+
+ etmq->traceid_queues_list = intlist__new(NULL);
+ if (!etmq->traceid_queues_list)
+ goto out_free;
+
+ /* Use metadata to fill in trace parameters for trace decoder */
+ t_params = zalloc(sizeof(*t_params) * decoders);
+
+ if (!t_params)
+ goto out_free;
+
+ if (cs_etm__init_trace_params(t_params, etm, decoders))
+ goto out_free;
+
+ /* Set decoder parameters to decode trace packets */
+ if (cs_etm__init_decoder_params(&d_params, etmq,
+ dump_trace ? CS_ETM_OPERATION_PRINT :
+ CS_ETM_OPERATION_DECODE,
+ formatted))
+ goto out_free;
+
+ etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+ t_params);
+
+ if (!etmq->decoder)
+ goto out_free;
+
+ /*
+ * Register a function to handle all memory accesses required by
+ * the trace decoder library.
+ */
+ if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
+ 0x0L, ((u64) -1L),
+ cs_etm__mem_access))
+ goto out_free_decoder;
+
+ zfree(&t_params);
+ return etmq;
+
+out_free_decoder:
+ cs_etm_decoder__free(etmq->decoder);
+out_free:
+ intlist__delete(etmq->traceid_queues_list);
+ free(etmq);
+
+ return NULL;
+}
+
+static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr,
+ bool formatted)
+{
+ struct cs_etm_queue *etmq = queue->priv;
+
+ if (list_empty(&queue->head) || etmq)
+ return 0;
+
+ etmq = cs_etm__alloc_queue(etm, formatted);
+
+ if (!etmq)
+ return -ENOMEM;
+
+ queue->priv = etmq;
+ etmq->etm = etm;
+ etmq->queue_nr = queue_nr;
+ etmq->offset = 0;
+
+ return 0;
+}
+
+static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
+ struct cs_etm_queue *etmq,
+ unsigned int queue_nr)
+{
+ int ret = 0;
+ unsigned int cs_queue_nr;
+ u8 trace_chan_id;
+ u64 cs_timestamp;
+
+ /*
+ * We are under a CPU-wide trace scenario. As such we need to know
+ * when the code that generated the traces started to execute so that
+ * it can be correlated with execution on other CPUs. So we get a
+ * handle on the beginning of traces and decode until we find a
+ * timestamp. The timestamp is then added to the auxtrace min heap
+ * in order to know what nibble (of all the etmqs) to decode first.
+ */
+ while (1) {
+ /*
+ * Fetch an aux_buffer from this etmq. Bail if no more
+ * blocks or an error has been encountered.
+ */
+ ret = cs_etm__get_data_block(etmq);
+ if (ret <= 0)
+ goto out;
+
+ /*
+ * Run decoder on the trace block. The decoder will stop when
+ * encountering a CS timestamp, a full packet queue or the end of
+ * trace for that block.
+ */
+ ret = cs_etm__decode_data_block(etmq);
+ if (ret)
+ goto out;
+
+ /*
+ * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
+ * the timestamp calculation for us.
+ */
+ cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+ /* We found a timestamp, no need to continue. */
+ if (cs_timestamp)
+ break;
+
+ /*
+ * We didn't find a timestamp so empty all the traceid packet
+ * queues before looking for another timestamp packet, either
+ * in the current data block or a new one. Packets that were
+ * just decoded are useless since no timestamp has been
+ * associated with them. As such simply discard them.
+ */
+ cs_etm__clear_all_packet_queues(etmq);
+ }
+
+ /*
+ * We have a timestamp. Add it to the min heap to reflect when
+ * instructions conveyed by the range packets of this traceID queue
+ * started to execute. Once the same has been done for all the traceID
+ * queues of each etmq, redenring and decoding can start in
+ * chronological order.
+ *
+ * Note that packets decoded above are still in the traceID's packet
+ * queue and will be processed in cs_etm__process_timestamped_queues().
+ */
+ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
+out:
+ return ret;
+}
+
+static inline
+void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct branch_stack *bs_src = tidq->last_branch_rb;
+ struct branch_stack *bs_dst = tidq->last_branch;
+ size_t nr = 0;
+
+ /*
+ * Set the number of records before early exit: ->nr is used to
+ * determine how many branches to copy from ->entries.
+ */
+ bs_dst->nr = bs_src->nr;
+
+ /*
+ * Early exit when there is nothing to copy.
+ */
+ if (!bs_src->nr)
+ return;
+
+ /*
+ * As bs_src->entries is a circular buffer, we need to copy from it in
+ * two steps. First, copy the branches from the most recently inserted
+ * branch ->last_branch_pos until the end of bs_src->entries buffer.
+ */
+ nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
+ memcpy(&bs_dst->entries[0],
+ &bs_src->entries[tidq->last_branch_pos],
+ sizeof(struct branch_entry) * nr);
+
+ /*
+ * If we wrapped around at least once, the branches from the beginning
+ * of the bs_src->entries buffer and until the ->last_branch_pos element
+ * are older valid branches: copy them over. The total number of
+ * branches copied over will be equal to the number of branches asked by
+ * the user in last_branch_sz.
+ */
+ if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
+ memcpy(&bs_dst->entries[nr],
+ &bs_src->entries[0],
+ sizeof(struct branch_entry) * tidq->last_branch_pos);
+ }
+}
+
+static inline
+void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
+{
+ tidq->last_branch_pos = 0;
+ tidq->last_branch_rb->nr = 0;
+}
+
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+ u8 trace_chan_id, u64 addr)
+{
+ u8 instrBytes[2];
+
+ cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
+ instrBytes, 0);
+ /*
+ * T32 instruction size is indicated by bits[15:11] of the first
+ * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+ * denote a 32-bit instruction.
+ */
+ return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
+}
+
+static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
+{
+ /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
+ if (packet->sample_type == CS_ETM_DISCONTINUITY)
+ return 0;
+
+ return packet->start_addr;
+}
+
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
+{
+ /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
+ if (packet->sample_type == CS_ETM_DISCONTINUITY)
+ return 0;
+
+ return packet->end_addr - packet->last_instr_size;
+}
+
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+ u64 trace_chan_id,
+ const struct cs_etm_packet *packet,
+ u64 offset)
+{
+ if (packet->isa == CS_ETM_ISA_T32) {
+ u64 addr = packet->start_addr;
+
+ while (offset) {
+ addr += cs_etm__t32_instr_size(etmq,
+ trace_chan_id, addr);
+ offset--;
+ }
+ return addr;
+ }
+
+ /* Assume a 4 byte instruction size (A32/A64) */
+ return packet->start_addr + offset * 4;
+}
+
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct branch_stack *bs = tidq->last_branch_rb;
+ struct branch_entry *be;
+
+ /*
+ * The branches are recorded in a circular buffer in reverse
+ * chronological order: we start recording from the last element of the
+ * buffer down. After writing the first element of the stack, move the
+ * insert position back to the end of the buffer.
+ */
+ if (!tidq->last_branch_pos)
+ tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+
+ tidq->last_branch_pos -= 1;
+
+ be = &bs->entries[tidq->last_branch_pos];
+ be->from = cs_etm__last_executed_instr(tidq->prev_packet);
+ be->to = cs_etm__first_executed_instr(tidq->packet);
+ /* No support for mispredict */
+ be->flags.mispred = 0;
+ be->flags.predicted = 1;
+
+ /*
+ * Increment bs->nr until reaching the number of last branches asked by
+ * the user on the command line.
+ */
+ if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
+ bs->nr += 1;
+}
+
+static int cs_etm__inject_event(union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ event->header.size = perf_event__sample_event_size(sample, type, 0);
+ return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+
+static int
+cs_etm__get_trace(struct cs_etm_queue *etmq)
+{
+ struct auxtrace_buffer *aux_buffer = etmq->buffer;
+ struct auxtrace_buffer *old_buffer = aux_buffer;
+ struct auxtrace_queue *queue;
+
+ queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+
+ aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
+
+ /* If no more data, drop the previous auxtrace_buffer and return */
+ if (!aux_buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ etmq->buf_len = 0;
+ return 0;
+ }
+
+ etmq->buffer = aux_buffer;
+
+ /* If the aux_buffer doesn't have data associated, try to load it */
+ if (!aux_buffer->data) {
+ /* get the file desc associated with the perf data file */
+ int fd = perf_data__fd(etmq->etm->session->data);
+
+ aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
+ if (!aux_buffer->data)
+ return -ENOMEM;
+ }
+
+ /* If valid, drop the previous buffer */
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+
+ etmq->buf_used = 0;
+ etmq->buf_len = aux_buffer->size;
+ etmq->buf = aux_buffer->data;
+
+ return etmq->buf_len;
+}
+
+static void cs_etm__set_thread(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq, pid_t tid,
+ ocsd_ex_level el)
+{
+ struct machine *machine = cs_etm__get_machine(etmq, el);
+
+ if (tid != -1) {
+ thread__zput(tidq->thread);
+ tidq->thread = machine__find_thread(machine, -1, tid);
+ }
+
+ /* Couldn't find a known thread */
+ if (!tidq->thread)
+ tidq->thread = machine__idle_thread(machine);
+
+ tidq->el = el;
+}
+
+int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
+ u8 trace_chan_id, ocsd_ex_level el)
+{
+ struct cs_etm_traceid_queue *tidq;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq)
+ return -EINVAL;
+
+ cs_etm__set_thread(etmq, tidq, tid, el);
+ return 0;
+}
+
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
+{
+ return !!etmq->etm->timeless_decoding;
+}
+
+static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
+ u64 trace_chan_id,
+ const struct cs_etm_packet *packet,
+ struct perf_sample *sample)
+{
+ /*
+ * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
+ * packet, so directly bail out with 'insn_len' = 0.
+ */
+ if (packet->sample_type == CS_ETM_DISCONTINUITY) {
+ sample->insn_len = 0;
+ return;
+ }
+
+ /*
+ * T32 instruction size might be 32-bit or 16-bit, decide by calling
+ * cs_etm__t32_instr_size().
+ */
+ if (packet->isa == CS_ETM_ISA_T32)
+ sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
+ sample->ip);
+ /* Otherwise, A64 and A32 instruction size are always 32-bit. */
+ else
+ sample->insn_len = 4;
+
+ cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
+ (void *)sample->insn, 0);
+}
+
+u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
+{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ if (etm->has_virtual_ts)
+ return tsc_to_perf_time(cs_timestamp, &etm->tc);
+ else
+ return cs_timestamp;
+}
+
+static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
+
+ if (!etm->timeless_decoding && etm->has_virtual_ts)
+ return packet_queue->cs_timestamp;
+ else
+ return etm->latest_kernel_timestamp;
+}
+
+static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
+ u64 addr, u64 period)
+{
+ int ret = 0;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ union perf_event *event = tidq->event_buf;
+ struct perf_sample sample = {.ip = 0,};
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ /* Set time field based on etm auxtrace config. */
+ sample.time = cs_etm__resolve_sample_time(etmq, tidq);
+
+ sample.ip = addr;
+ sample.pid = thread__pid(tidq->thread);
+ sample.tid = thread__tid(tidq->thread);
+ sample.id = etmq->etm->instructions_id;
+ sample.stream_id = etmq->etm->instructions_id;
+ sample.period = period;
+ sample.cpu = tidq->packet->cpu;
+ sample.flags = tidq->prev_packet->flags;
+ sample.cpumode = event->sample.header.misc;
+
+ cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
+
+ if (etm->synth_opts.last_branch)
+ sample.branch_stack = tidq->last_branch;
+
+ if (etm->synth_opts.inject) {
+ ret = cs_etm__inject_event(event, &sample,
+ etm->instructions_sample_type);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+ if (ret)
+ pr_err(
+ "CS ETM Trace: failed to deliver instruction event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+/*
+ * The cs etm packet encodes an instruction range between a branch target
+ * and the next taken branch. Generate sample accordingly.
+ */
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ int ret = 0;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct perf_sample sample = {.ip = 0,};
+ union perf_event *event = tidq->event_buf;
+ struct dummy_branch_stack {
+ u64 nr;
+ u64 hw_idx;
+ struct branch_entry entries;
+ } dummy_bs;
+ u64 ip;
+
+ ip = cs_etm__last_executed_instr(tidq->prev_packet);
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
+ tidq->prev_packet_el);
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ /* Set time field based on etm auxtrace config. */
+ sample.time = cs_etm__resolve_sample_time(etmq, tidq);
+
+ sample.ip = ip;
+ sample.pid = thread__pid(tidq->prev_packet_thread);
+ sample.tid = thread__tid(tidq->prev_packet_thread);
+ sample.addr = cs_etm__first_executed_instr(tidq->packet);
+ sample.id = etmq->etm->branches_id;
+ sample.stream_id = etmq->etm->branches_id;
+ sample.period = 1;
+ sample.cpu = tidq->packet->cpu;
+ sample.flags = tidq->prev_packet->flags;
+ sample.cpumode = event->sample.header.misc;
+
+ cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
+ &sample);
+
+ /*
+ * perf report cannot handle events without a branch stack
+ */
+ if (etm->synth_opts.last_branch) {
+ dummy_bs = (struct dummy_branch_stack){
+ .nr = 1,
+ .hw_idx = -1ULL,
+ .entries = {
+ .from = sample.ip,
+ .to = sample.addr,
+ },
+ };
+ sample.branch_stack = (struct branch_stack *)&dummy_bs;
+ }
+
+ if (etm->synth_opts.inject) {
+ ret = cs_etm__inject_event(event, &sample,
+ etm->branches_sample_type);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+ if (ret)
+ pr_err(
+ "CS ETM Trace: failed to deliver instruction event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+struct cs_etm_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int cs_etm__event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cs_etm_synth *cs_etm_synth =
+ container_of(tool, struct cs_etm_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(cs_etm_synth->session,
+ event, NULL);
+}
+
+static int cs_etm__synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct cs_etm_synth cs_etm_synth;
+
+ memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
+ cs_etm_synth.session = session;
+
+ return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
+ &id, cs_etm__event_synth);
+}
+
+static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
+ struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == etm->pmu_type) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("No selected events with CoreSight Trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ if (etm->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+
+ attr.exclude_user = evsel->core.attr.exclude_user;
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
+ attr.exclude_host = evsel->core.attr.exclude_host;
+ attr.exclude_guest = evsel->core.attr.exclude_guest;
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
+ attr.read_format = evsel->core.attr.read_format;
+
+ /* create new id val to be a fixed offset from evsel id */
+ id = evsel->core.id[0] + 1000000000;
+
+ if (!id)
+ id = 1;
+
+ if (etm->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ err = cs_etm__synth_event(session, &attr, id);
+ if (err)
+ return err;
+ etm->branches_sample_type = attr.sample_type;
+ etm->branches_id = id;
+ id += 1;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+ }
+
+ if (etm->synth_opts.last_branch) {
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
+
+ if (etm->synth_opts.instructions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = etm->synth_opts.period;
+ etm->instructions_sample_period = attr.sample_period;
+ err = cs_etm__synth_event(session, &attr, id);
+ if (err)
+ return err;
+ etm->instructions_sample_type = attr.sample_type;
+ etm->instructions_id = id;
+ id += 1;
+ }
+
+ return 0;
+}
+
+static int cs_etm__sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ int ret;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ u64 instrs_prev;
+
+ /* Get instructions remainder from previous packet */
+ instrs_prev = tidq->period_instructions;
+
+ tidq->period_instructions += tidq->packet->instr_count;
+
+ /*
+ * Record a branch when the last instruction in
+ * PREV_PACKET is a branch.
+ */
+ if (etm->synth_opts.last_branch &&
+ tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+ tidq->prev_packet->last_instr_taken_branch)
+ cs_etm__update_last_branch_rb(etmq, tidq);
+
+ if (etm->synth_opts.instructions &&
+ tidq->period_instructions >= etm->instructions_sample_period) {
+ /*
+ * Emit instruction sample periodically
+ * TODO: allow period to be defined in cycles and clock time
+ */
+
+ /*
+ * Below diagram demonstrates the instruction samples
+ * generation flows:
+ *
+ * Instrs Instrs Instrs Instrs
+ * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
+ * | | | |
+ * V V V V
+ * --------------------------------------------------
+ * ^ ^
+ * | |
+ * Period Period
+ * instructions(Pi) instructions(Pi')
+ *
+ * | |
+ * \---------------- -----------------/
+ * V
+ * tidq->packet->instr_count
+ *
+ * Instrs Sample(n...) are the synthesised samples occurring
+ * every etm->instructions_sample_period instructions - as
+ * defined on the perf command line. Sample(n) is being the
+ * last sample before the current etm packet, n+1 to n+3
+ * samples are generated from the current etm packet.
+ *
+ * tidq->packet->instr_count represents the number of
+ * instructions in the current etm packet.
+ *
+ * Period instructions (Pi) contains the number of
+ * instructions executed after the sample point(n) from the
+ * previous etm packet. This will always be less than
+ * etm->instructions_sample_period.
+ *
+ * When generate new samples, it combines with two parts
+ * instructions, one is the tail of the old packet and another
+ * is the head of the new coming packet, to generate
+ * sample(n+1); sample(n+2) and sample(n+3) consume the
+ * instructions with sample period. After sample(n+3), the rest
+ * instructions will be used by later packet and it is assigned
+ * to tidq->period_instructions for next round calculation.
+ */
+
+ /*
+ * Get the initial offset into the current packet instructions;
+ * entry conditions ensure that instrs_prev is less than
+ * etm->instructions_sample_period.
+ */
+ u64 offset = etm->instructions_sample_period - instrs_prev;
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ if (etm->synth_opts.last_branch)
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
+ while (tidq->period_instructions >=
+ etm->instructions_sample_period) {
+ /*
+ * Calculate the address of the sampled instruction (-1
+ * as sample is reported as though instruction has just
+ * been executed, but PC has not advanced to next
+ * instruction)
+ */
+ addr = cs_etm__instr_addr(etmq, trace_chan_id,
+ tidq->packet, offset - 1);
+ ret = cs_etm__synth_instruction_sample(
+ etmq, tidq, addr,
+ etm->instructions_sample_period);
+ if (ret)
+ return ret;
+
+ offset += etm->instructions_sample_period;
+ tidq->period_instructions -=
+ etm->instructions_sample_period;
+ }
+ }
+
+ if (etm->synth_opts.branches) {
+ bool generate_sample = false;
+
+ /* Generate sample for tracing on packet */
+ if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
+ generate_sample = true;
+
+ /* Generate sample for branch taken packet */
+ if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+ tidq->prev_packet->last_instr_taken_branch)
+ generate_sample = true;
+
+ if (generate_sample) {
+ ret = cs_etm__synth_branch_sample(etmq, tidq);
+ if (ret)
+ return ret;
+ }
+ }
+
+ cs_etm__packet_swap(etm, tidq);
+
+ return 0;
+}
+
+static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
+{
+ /*
+ * When the exception packet is inserted, whether the last instruction
+ * in previous range packet is taken branch or not, we need to force
+ * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
+ * to generate branch sample for the instruction range before the
+ * exception is trapped to kernel or before the exception returning.
+ *
+ * The exception packet includes the dummy address values, so don't
+ * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
+ * for generating instruction and branch samples.
+ */
+ if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
+ tidq->prev_packet->last_instr_taken_branch = true;
+
+ return 0;
+}
+
+static int cs_etm__flush(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ int err = 0;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ /* Handle start tracing packet */
+ if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
+ goto swap_packet;
+
+ if (etmq->etm->synth_opts.last_branch &&
+ etmq->etm->synth_opts.instructions &&
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
+ /*
+ * Generate a last branch event for the branches left in the
+ * circular buffer at the end of the trace.
+ *
+ * Use the address of the end of the last reported execution
+ * range
+ */
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
+
+ err = cs_etm__synth_instruction_sample(
+ etmq, tidq, addr,
+ tidq->period_instructions);
+ if (err)
+ return err;
+
+ tidq->period_instructions = 0;
+
+ }
+
+ if (etm->synth_opts.branches &&
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ err = cs_etm__synth_branch_sample(etmq, tidq);
+ if (err)
+ return err;
+ }
+
+swap_packet:
+ cs_etm__packet_swap(etm, tidq);
+
+ /* Reset last branches after flush the trace */
+ if (etm->synth_opts.last_branch)
+ cs_etm__reset_last_branch_rb(tidq);
+
+ return err;
+}
+
+static int cs_etm__end_block(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ int err;
+
+ /*
+ * It has no new packet coming and 'etmq->packet' contains the stale
+ * packet which was set at the previous time with packets swapping;
+ * so skip to generate branch sample to avoid stale packet.
+ *
+ * For this case only flush branch stack and generate a last branch
+ * event for the branches left in the circular buffer at the end of
+ * the trace.
+ */
+ if (etmq->etm->synth_opts.last_branch &&
+ etmq->etm->synth_opts.instructions &&
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
+ /*
+ * Use the address of the end of the last reported execution
+ * range.
+ */
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
+
+ err = cs_etm__synth_instruction_sample(
+ etmq, tidq, addr,
+ tidq->period_instructions);
+ if (err)
+ return err;
+
+ tidq->period_instructions = 0;
+ }
+
+ return 0;
+}
+/*
+ * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
+ * if need be.
+ * Returns: < 0 if error
+ * = 0 if no more auxtrace_buffer to read
+ * > 0 if the current buffer isn't empty yet
+ */
+static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
+{
+ int ret;
+
+ if (!etmq->buf_len) {
+ ret = cs_etm__get_trace(etmq);
+ if (ret <= 0)
+ return ret;
+ /*
+ * We cannot assume consecutive blocks in the data file
+ * are contiguous, reset the decoder to force re-sync.
+ */
+ ret = cs_etm_decoder__reset(etmq->decoder);
+ if (ret)
+ return ret;
+ }
+
+ return etmq->buf_len;
+}
+
+static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
+ struct cs_etm_packet *packet,
+ u64 end_addr)
+{
+ /* Initialise to keep compiler happy */
+ u16 instr16 = 0;
+ u32 instr32 = 0;
+ u64 addr;
+
+ switch (packet->isa) {
+ case CS_ETM_ISA_T32:
+ /*
+ * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
+ *
+ * b'15 b'8
+ * +-----------------+--------+
+ * | 1 1 0 1 1 1 1 1 | imm8 |
+ * +-----------------+--------+
+ *
+ * According to the specification, it only defines SVC for T32
+ * with 16 bits instruction and has no definition for 32bits;
+ * so below only read 2 bytes as instruction size for T32.
+ */
+ addr = end_addr - 2;
+ cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
+ (u8 *)&instr16, 0);
+ if ((instr16 & 0xFF00) == 0xDF00)
+ return true;
+
+ break;
+ case CS_ETM_ISA_A32:
+ /*
+ * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
+ *
+ * b'31 b'28 b'27 b'24
+ * +---------+---------+-------------------------+
+ * | !1111 | 1 1 1 1 | imm24 |
+ * +---------+---------+-------------------------+
+ */
+ addr = end_addr - 4;
+ cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
+ (u8 *)&instr32, 0);
+ if ((instr32 & 0x0F000000) == 0x0F000000 &&
+ (instr32 & 0xF0000000) != 0xF0000000)
+ return true;
+
+ break;
+ case CS_ETM_ISA_A64:
+ /*
+ * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
+ *
+ * b'31 b'21 b'4 b'0
+ * +-----------------------+---------+-----------+
+ * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
+ * +-----------------------+---------+-----------+
+ */
+ addr = end_addr - 4;
+ cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
+ (u8 *)&instr32, 0);
+ if ((instr32 & 0xFFE0001F) == 0xd4000001)
+ return true;
+
+ break;
+ case CS_ETM_ISA_UNKNOWN:
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq, u64 magic)
+{
+ u8 trace_chan_id = tidq->trace_chan_id;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
+
+ if (magic == __perf_cs_etmv3_magic)
+ if (packet->exception_number == CS_ETMV3_EXC_SVC)
+ return true;
+
+ /*
+ * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
+ * HVC cases; need to check if it's SVC instruction based on
+ * packet address.
+ */
+ if (magic == __perf_cs_etmv4_magic) {
+ if (packet->exception_number == CS_ETMV4_EXC_CALL &&
+ cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
+ prev_packet->end_addr))
+ return true;
+ }
+
+ return false;
+}
+
+static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
+ u64 magic)
+{
+ struct cs_etm_packet *packet = tidq->packet;
+
+ if (magic == __perf_cs_etmv3_magic)
+ if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
+ packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
+ packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
+ packet->exception_number == CS_ETMV3_EXC_IRQ ||
+ packet->exception_number == CS_ETMV3_EXC_FIQ)
+ return true;
+
+ if (magic == __perf_cs_etmv4_magic)
+ if (packet->exception_number == CS_ETMV4_EXC_RESET ||
+ packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
+ packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
+ packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
+ packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
+ packet->exception_number == CS_ETMV4_EXC_IRQ ||
+ packet->exception_number == CS_ETMV4_EXC_FIQ)
+ return true;
+
+ return false;
+}
+
+static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
+ u64 magic)
+{
+ u8 trace_chan_id = tidq->trace_chan_id;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
+
+ if (magic == __perf_cs_etmv3_magic)
+ if (packet->exception_number == CS_ETMV3_EXC_SMC ||
+ packet->exception_number == CS_ETMV3_EXC_HYP ||
+ packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
+ packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
+ packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
+ packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
+ packet->exception_number == CS_ETMV3_EXC_GENERIC)
+ return true;
+
+ if (magic == __perf_cs_etmv4_magic) {
+ if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
+ packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
+ packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
+ packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
+ return true;
+
+ /*
+ * For CS_ETMV4_EXC_CALL, except SVC other instructions
+ * (SMC, HVC) are taken as sync exceptions.
+ */
+ if (packet->exception_number == CS_ETMV4_EXC_CALL &&
+ !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
+ prev_packet->end_addr))
+ return true;
+
+ /*
+ * ETMv4 has 5 bits for exception number; if the numbers
+ * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
+ * they are implementation defined exceptions.
+ *
+ * For this case, simply take it as sync exception.
+ */
+ if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
+ packet->exception_number <= CS_ETMV4_EXC_END)
+ return true;
+ }
+
+ return false;
+}
+
+static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ u64 magic;
+ int ret;
+
+ switch (packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * Immediate branch instruction without neither link nor
+ * return flag, it's normal branch instruction within
+ * the function.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR &&
+ packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
+ packet->flags = PERF_IP_FLAG_BRANCH;
+
+ if (packet->last_instr_cond)
+ packet->flags |= PERF_IP_FLAG_CONDITIONAL;
+ }
+
+ /*
+ * Immediate branch instruction with link (e.g. BL), this is
+ * branch instruction for function call.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR &&
+ packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL;
+
+ /*
+ * Indirect branch instruction with link (e.g. BLR), this is
+ * branch instruction for function call.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL;
+
+ /*
+ * Indirect branch instruction with subtype of
+ * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
+ * function return for A32/T32.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN;
+
+ /*
+ * Indirect branch instruction without link (e.g. BR), usually
+ * this is used for function return, especially for functions
+ * within dynamic link lib.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_NONE)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN;
+
+ /* Return instruction for function return. */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN;
+
+ /*
+ * Decoder might insert a discontinuity in the middle of
+ * instruction packets, fixup prev_packet with flag
+ * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
+ */
+ if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
+ prev_packet->flags |= PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ /*
+ * If the previous packet is an exception return packet
+ * and the return address just follows SVC instruction,
+ * it needs to calibrate the previous packet sample flags
+ * as PERF_IP_FLAG_SYSCALLRET.
+ */
+ if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT) &&
+ cs_etm__is_svc_instr(etmq, trace_chan_id,
+ packet, packet->start_addr))
+ prev_packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_SYSCALLRET;
+ break;
+ case CS_ETM_DISCONTINUITY:
+ /*
+ * The trace is discontinuous, if the previous packet is
+ * instruction packet, set flag PERF_IP_FLAG_TRACE_END
+ * for previous packet.
+ */
+ if (prev_packet->sample_type == CS_ETM_RANGE)
+ prev_packet->flags |= PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ break;
+ case CS_ETM_EXCEPTION:
+ ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
+ if (ret)
+ return ret;
+
+ /* The exception is for system call. */
+ if (cs_etm__is_syscall(etmq, tidq, magic))
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET;
+ /*
+ * The exceptions are triggered by external signals from bus,
+ * interrupt controller, debug module, PE reset or halt.
+ */
+ else if (cs_etm__is_async_exception(tidq, magic))
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ /*
+ * Otherwise, exception is caused by trap, instruction &
+ * data fault, or alignment errors.
+ */
+ else if (cs_etm__is_sync_exception(etmq, tidq, magic))
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_INTERRUPT;
+
+ /*
+ * When the exception packet is inserted, since exception
+ * packet is not used standalone for generating samples
+ * and it's affiliation to the previous instruction range
+ * packet; so set previous range packet flags to tell perf
+ * it is an exception taken branch.
+ */
+ if (prev_packet->sample_type == CS_ETM_RANGE)
+ prev_packet->flags = packet->flags;
+ break;
+ case CS_ETM_EXCEPTION_RET:
+ /*
+ * When the exception return packet is inserted, since
+ * exception return packet is not used standalone for
+ * generating samples and it's affiliation to the previous
+ * instruction range packet; so set previous range packet
+ * flags to tell perf it is an exception return branch.
+ *
+ * The exception return can be for either system call or
+ * other exception types; unfortunately the packet doesn't
+ * contain exception type related info so we cannot decide
+ * the exception type purely based on exception return packet.
+ * If we record the exception number from exception packet and
+ * reuse it for exception return packet, this is not reliable
+ * due the trace can be discontinuity or the interrupt can
+ * be nested, thus the recorded exception number cannot be
+ * used for exception return packet for these two cases.
+ *
+ * For exception return packet, we only need to distinguish the
+ * packet is for system call or for other types. Thus the
+ * decision can be deferred when receive the next packet which
+ * contains the return address, based on the return address we
+ * can read out the previous instruction and check if it's a
+ * system call instruction and then calibrate the sample flag
+ * as needed.
+ */
+ if (prev_packet->sample_type == CS_ETM_RANGE)
+ prev_packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT;
+ break;
+ case CS_ETM_EMPTY:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
+{
+ int ret = 0;
+ size_t processed = 0;
+
+ /*
+ * Packets are decoded and added to the decoder's packet queue
+ * until the decoder packet processing callback has requested that
+ * processing stops or there is nothing left in the buffer. Normal
+ * operations that stop processing are a timestamp packet or a full
+ * decoder buffer queue.
+ */
+ ret = cs_etm_decoder__process_data_block(etmq->decoder,
+ etmq->offset,
+ &etmq->buf[etmq->buf_used],
+ etmq->buf_len,
+ &processed);
+ if (ret)
+ goto out;
+
+ etmq->offset += processed;
+ etmq->buf_used += processed;
+ etmq->buf_len -= processed;
+
+out:
+ return ret;
+}
+
+static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ int ret;
+ struct cs_etm_packet_queue *packet_queue;
+
+ packet_queue = &tidq->packet_queue;
+
+ /* Process each packet in this chunk */
+ while (1) {
+ ret = cs_etm_decoder__get_packet(packet_queue,
+ tidq->packet);
+ if (ret <= 0)
+ /*
+ * Stop processing this chunk on
+ * end of data or error
+ */
+ break;
+
+ /*
+ * Since packet addresses are swapped in packet
+ * handling within below switch() statements,
+ * thus setting sample flags must be called
+ * prior to switch() statement to use address
+ * information before packets swapping.
+ */
+ ret = cs_etm__set_sample_flags(etmq, tidq);
+ if (ret < 0)
+ break;
+
+ switch (tidq->packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * If the packet contains an instruction
+ * range, generate instruction sequence
+ * events.
+ */
+ cs_etm__sample(etmq, tidq);
+ break;
+ case CS_ETM_EXCEPTION:
+ case CS_ETM_EXCEPTION_RET:
+ /*
+ * If the exception packet is coming,
+ * make sure the previous instruction
+ * range packet to be handled properly.
+ */
+ cs_etm__exception(tidq);
+ break;
+ case CS_ETM_DISCONTINUITY:
+ /*
+ * Discontinuity in trace, flush
+ * previous branch stack
+ */
+ cs_etm__flush(etmq, tidq);
+ break;
+ case CS_ETM_EMPTY:
+ /*
+ * Should not receive empty packet,
+ * report error.
+ */
+ pr_err("CS ETM Trace: empty packet\n");
+ return -EINVAL;
+ default:
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ struct int_node *inode;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry(inode, traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+
+ /* Ignore return value */
+ cs_etm__process_traceid_queue(etmq, tidq);
+
+ /*
+ * Generate an instruction sample with the remaining
+ * branchstack entries.
+ */
+ cs_etm__flush(etmq, tidq);
+ }
+}
+
+static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
+{
+ int err = 0;
+ struct cs_etm_traceid_queue *tidq;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
+ if (!tidq)
+ return -EINVAL;
+
+ /* Go through each buffer in the queue and decode them one by one */
+ while (1) {
+ err = cs_etm__get_data_block(etmq);
+ if (err <= 0)
+ return err;
+
+ /* Run trace decoder until buffer consumed or end of trace */
+ do {
+ err = cs_etm__decode_data_block(etmq);
+ if (err)
+ return err;
+
+ /*
+ * Process each packet in this chunk, nothing to do if
+ * an error occurs other than hoping the next one will
+ * be better.
+ */
+ err = cs_etm__process_traceid_queue(etmq, tidq);
+
+ } while (etmq->buf_len);
+
+ if (err == 0)
+ /* Flush any remaining branch stack entries */
+ err = cs_etm__end_block(etmq, tidq);
+ }
+
+ return err;
+}
+
+static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
+{
+ int idx, err = 0;
+ struct cs_etm_traceid_queue *tidq;
+ struct int_node *inode;
+
+ /* Go through each buffer in the queue and decode them one by one */
+ while (1) {
+ err = cs_etm__get_data_block(etmq);
+ if (err <= 0)
+ return err;
+
+ /* Run trace decoder until buffer consumed or end of trace */
+ do {
+ err = cs_etm__decode_data_block(etmq);
+ if (err)
+ return err;
+
+ /*
+ * cs_etm__run_per_thread_timeless_decoder() runs on a
+ * single traceID queue because each TID has a separate
+ * buffer. But here in per-cpu mode we need to iterate
+ * over each channel instead.
+ */
+ intlist__for_each_entry(inode,
+ etmq->traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+ cs_etm__process_traceid_queue(etmq, tidq);
+ }
+ } while (etmq->buf_len);
+
+ intlist__for_each_entry(inode, etmq->traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+ /* Flush any remaining branch stack entries */
+ err = cs_etm__end_block(etmq, tidq);
+ if (err)
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+ pid_t tid)
+{
+ unsigned int i;
+ struct auxtrace_queues *queues = &etm->queues;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &etm->queues.queue_array[i];
+ struct cs_etm_queue *etmq = queue->priv;
+ struct cs_etm_traceid_queue *tidq;
+
+ if (!etmq)
+ continue;
+
+ if (etm->per_thread_decoding) {
+ tidq = cs_etm__etmq_get_traceid_queue(
+ etmq, CS_ETM_PER_THREAD_TRACEID);
+
+ if (!tidq)
+ continue;
+
+ if (tid == -1 || thread__tid(tidq->thread) == tid)
+ cs_etm__run_per_thread_timeless_decoder(etmq);
+ } else
+ cs_etm__run_per_cpu_timeless_decoder(etmq);
+ }
+
+ return 0;
+}
+
+static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
+{
+ int ret = 0;
+ unsigned int cs_queue_nr, queue_nr, i;
+ u8 trace_chan_id;
+ u64 cs_timestamp;
+ struct auxtrace_queue *queue;
+ struct cs_etm_queue *etmq;
+ struct cs_etm_traceid_queue *tidq;
+
+ /*
+ * Pre-populate the heap with one entry from each queue so that we can
+ * start processing in time order across all queues.
+ */
+ for (i = 0; i < etm->queues.nr_queues; i++) {
+ etmq = etm->queues.queue_array[i].priv;
+ if (!etmq)
+ continue;
+
+ ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
+ if (ret)
+ return ret;
+ }
+
+ while (1) {
+ if (!etm->heap.heap_cnt)
+ goto out;
+
+ /* Take the entry at the top of the min heap */
+ cs_queue_nr = etm->heap.heap_array[0].queue_nr;
+ queue_nr = TO_QUEUE_NR(cs_queue_nr);
+ trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
+ queue = &etm->queues.queue_array[queue_nr];
+ etmq = queue->priv;
+
+ /*
+ * Remove the top entry from the heap since we are about
+ * to process it.
+ */
+ auxtrace_heap__pop(&etm->heap);
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq) {
+ /*
+ * No traceID queue has been allocated for this traceID,
+ * which means something somewhere went very wrong. No
+ * other choice than simply exit.
+ */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Packets associated with this timestamp are already in
+ * the etmq's traceID queue, so process them.
+ */
+ ret = cs_etm__process_traceid_queue(etmq, tidq);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Packets for this timestamp have been processed, time to
+ * move on to the next timestamp, fetching a new auxtrace_buffer
+ * if need be.
+ */
+refetch:
+ ret = cs_etm__get_data_block(etmq);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * No more auxtrace_buffers to process in this etmq, simply
+ * move on to another entry in the auxtrace_heap.
+ */
+ if (!ret)
+ continue;
+
+ ret = cs_etm__decode_data_block(etmq);
+ if (ret)
+ goto out;
+
+ cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+ if (!cs_timestamp) {
+ /*
+ * Function cs_etm__decode_data_block() returns when
+ * there is no more traces to decode in the current
+ * auxtrace_buffer OR when a timestamp has been
+ * encountered on any of the traceID queues. Since we
+ * did not get a timestamp, there is no more traces to
+ * process in this auxtrace_buffer. As such empty and
+ * flush all traceID queues.
+ */
+ cs_etm__clear_all_traceid_queues(etmq);
+
+ /* Fetch another auxtrace_buffer for this etmq */
+ goto refetch;
+ }
+
+ /*
+ * Add to the min heap the timestamp for packets that have
+ * just been decoded. They will be processed and synthesized
+ * during the next call to cs_etm__process_traceid_queue() for
+ * this queue/traceID.
+ */
+ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
+ }
+
+out:
+ return ret;
+}
+
+static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
+ union perf_event *event)
+{
+ struct thread *th;
+
+ if (etm->timeless_decoding)
+ return 0;
+
+ /*
+ * Add the tid/pid to the log so that we can get a match when we get a
+ * contextID from the decoder. Only track for the host: only kernel
+ * trace is supported for guests which wouldn't need pids so this should
+ * be fine.
+ */
+ th = machine__findnew_thread(&etm->session->machines.host,
+ event->itrace_start.pid,
+ event->itrace_start.tid);
+ if (!th)
+ return -ENOMEM;
+
+ thread__put(th);
+
+ return 0;
+}
+
+static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
+ union perf_event *event)
+{
+ struct thread *th;
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
+ /*
+ * Context switch in per-thread mode are irrelevant since perf
+ * will start/stop tracing as the process is scheduled.
+ */
+ if (etm->timeless_decoding)
+ return 0;
+
+ /*
+ * SWITCH_IN events carry the next process to be switched out while
+ * SWITCH_OUT events carry the process to be switched in. As such
+ * we don't care about IN events.
+ */
+ if (!out)
+ return 0;
+
+ /*
+ * Add the tid/pid to the log so that we can get a match when we get a
+ * contextID from the decoder. Only track for the host: only kernel
+ * trace is supported for guests which wouldn't need pids so this should
+ * be fine.
+ */
+ th = machine__findnew_thread(&etm->session->machines.host,
+ event->context_switch.next_prev_pid,
+ event->context_switch.next_prev_tid);
+ if (!th)
+ return -ENOMEM;
+
+ thread__put(th);
+
+ return 0;
+}
+
+static int cs_etm__process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("CoreSight ETM Trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ switch (event->header.type) {
+ case PERF_RECORD_EXIT:
+ /*
+ * Don't need to wait for cs_etm__flush_events() in per-thread mode to
+ * start the decode because we know there will be no more trace from
+ * this thread. All this does is emit samples earlier than waiting for
+ * the flush in other modes, but with timestamps it makes sense to wait
+ * for flush so that events from different threads are interleaved
+ * properly.
+ */
+ if (etm->per_thread_decoding && etm->timeless_decoding)
+ return cs_etm__process_timeless_queues(etm,
+ event->fork.tid);
+ break;
+
+ case PERF_RECORD_ITRACE_START:
+ return cs_etm__process_itrace_start(etm, event);
+
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ return cs_etm__process_switch_cpu_wide(etm, event);
+
+ case PERF_RECORD_AUX:
+ /*
+ * Record the latest kernel timestamp available in the header
+ * for samples so that synthesised samples occur from this point
+ * onwards.
+ */
+ if (sample->time && (sample->time != (u64)-1))
+ etm->latest_kernel_timestamp = sample->time;
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+ struct perf_record_auxtrace *event)
+{
+ struct auxtrace_buffer *buf;
+ unsigned int i;
+ /*
+ * Find all buffers with same reference in the queues and dump them.
+ * This is because the queues can contain multiple entries of the same
+ * buffer that were split on aux records.
+ */
+ for (i = 0; i < etm->queues.nr_queues; ++i)
+ list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+ if (buf->reference == event->reference)
+ cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
+}
+
+static int cs_etm__process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ if (!etm->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ bool is_pipe = perf_data__is_pipe(session->data);
+ int err;
+ int idx = event->auxtrace.idx;
+
+ if (is_pipe)
+ data_offset = 0;
+ else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&etm->queues, session,
+ event, data_offset, &buffer);
+ if (err)
+ return err;
+
+ /*
+ * Knowing if the trace is formatted or not requires a lookup of
+ * the aux record so only works in non-piped mode where data is
+ * queued in cs_etm__queue_aux_records(). Always assume
+ * formatted in piped mode (true).
+ */
+ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+ idx, true);
+ if (err)
+ return err;
+
+ if (dump_trace)
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
+ auxtrace_buffer__put_data(buffer);
+ }
+ } else if (dump_trace)
+ dump_queued_data(etm, &event->auxtrace);
+
+ return 0;
+}
+
+static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
+{
+ struct evsel *evsel;
+ struct evlist *evlist = etm->session->evlist;
+
+ /* Override timeless mode with user input from --itrace=Z */
+ if (etm->synth_opts.timeless_decoding) {
+ etm->timeless_decoding = true;
+ return 0;
+ }
+
+ /*
+ * Find the cs_etm evsel and look at what its timestamp setting was
+ */
+ evlist__for_each_entry(evlist, evsel)
+ if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
+ etm->timeless_decoding =
+ !(evsel->core.attr.config & BIT(ETM_OPT_TS));
+ return 0;
+ }
+
+ pr_err("CS ETM: Couldn't find ETM evsel\n");
+ return -EINVAL;
+}
+
+/*
+ * Read a single cpu parameter block from the auxtrace_info priv block.
+ *
+ * For version 1 there is a per cpu nr_params entry. If we are handling
+ * version 1 file, then there may be less, the same, or more params
+ * indicated by this value than the compile time number we understand.
+ *
+ * For a version 0 info block, there are a fixed number, and we need to
+ * fill out the nr_param value in the metadata we create.
+ */
+static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
+ int out_blk_size, int nr_params_v0)
+{
+ u64 *metadata = NULL;
+ int hdr_version;
+ int nr_in_params, nr_out_params, nr_cmn_params;
+ int i, k;
+
+ metadata = zalloc(sizeof(*metadata) * out_blk_size);
+ if (!metadata)
+ return NULL;
+
+ /* read block current index & version */
+ i = *buff_in_offset;
+ hdr_version = buff_in[CS_HEADER_VERSION];
+
+ if (!hdr_version) {
+ /* read version 0 info block into a version 1 metadata block */
+ nr_in_params = nr_params_v0;
+ metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
+ metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
+ metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
+ /* remaining block params at offset +1 from source */
+ for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
+ metadata[k + 1] = buff_in[i + k];
+ /* version 0 has 2 common params */
+ nr_cmn_params = 2;
+ } else {
+ /* read version 1 info block - input and output nr_params may differ */
+ /* version 1 has 3 common params */
+ nr_cmn_params = 3;
+ nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
+
+ /* if input has more params than output - skip excess */
+ nr_out_params = nr_in_params + nr_cmn_params;
+ if (nr_out_params > out_blk_size)
+ nr_out_params = out_blk_size;
+
+ for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
+ metadata[k] = buff_in[i + k];
+
+ /* record the actual nr params we copied */
+ metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
+ }
+
+ /* adjust in offset by number of in params used */
+ i += nr_in_params + nr_cmn_params;
+ *buff_in_offset = i;
+ return metadata;
+}
+
+/**
+ * Puts a fragment of an auxtrace buffer into the auxtrace queues based
+ * on the bounds of aux_event, if it matches with the buffer that's at
+ * file_offset.
+ *
+ * Normally, whole auxtrace buffers would be added to the queue. But we
+ * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
+ * is reset across each buffer, so splitting the buffers up in advance has
+ * the same effect.
+ */
+static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
+ struct perf_record_aux *aux_event, struct perf_sample *sample)
+{
+ int err;
+ char buf[PERF_SAMPLE_MAX_SIZE];
+ union perf_event *auxtrace_event_union;
+ struct perf_record_auxtrace *auxtrace_event;
+ union perf_event auxtrace_fragment;
+ __u64 aux_offset, aux_size;
+ __u32 idx;
+ bool formatted;
+
+ struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+
+ /*
+ * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
+ * from looping through the auxtrace index.
+ */
+ err = perf_session__peek_event(session, file_offset, buf,
+ PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
+ if (err)
+ return err;
+ auxtrace_event = &auxtrace_event_union->auxtrace;
+ if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
+ return -EINVAL;
+
+ if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
+ auxtrace_event->header.size != sz) {
+ return -EINVAL;
+ }
+
+ /*
+ * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
+ * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
+ * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
+ * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
+ * Return 'not found' if mismatch.
+ */
+ if (auxtrace_event->cpu == (__u32) -1) {
+ etm->per_thread_decoding = true;
+ if (auxtrace_event->tid != sample->tid)
+ return 1;
+ } else if (auxtrace_event->cpu != sample->cpu) {
+ if (etm->per_thread_decoding) {
+ /*
+ * Found a per-cpu buffer after a per-thread one was
+ * already found
+ */
+ pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
+ return -EINVAL;
+ }
+ return 1;
+ }
+
+ if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
+ /*
+ * Clamp size in snapshot mode. The buffer size is clamped in
+ * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
+ * the buffer size.
+ */
+ aux_size = min(aux_event->aux_size, auxtrace_event->size);
+
+ /*
+ * In this mode, the head also points to the end of the buffer so aux_offset
+ * needs to have the size subtracted so it points to the beginning as in normal mode
+ */
+ aux_offset = aux_event->aux_offset - aux_size;
+ } else {
+ aux_size = aux_event->aux_size;
+ aux_offset = aux_event->aux_offset;
+ }
+
+ if (aux_offset >= auxtrace_event->offset &&
+ aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
+ /*
+ * If this AUX event was inside this buffer somewhere, create a new auxtrace event
+ * based on the sizes of the aux event, and queue that fragment.
+ */
+ auxtrace_fragment.auxtrace = *auxtrace_event;
+ auxtrace_fragment.auxtrace.size = aux_size;
+ auxtrace_fragment.auxtrace.offset = aux_offset;
+ file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
+
+ pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
+ " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
+ err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
+ file_offset, NULL);
+ if (err)
+ return err;
+
+ idx = auxtrace_event->idx;
+ formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
+ return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+ idx, formatted);
+ }
+
+ /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
+ return 1;
+}
+
+static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
+ u64 offset __maybe_unused, void *data __maybe_unused)
+{
+ /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
+ if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
+ (*(int *)data)++; /* increment found count */
+ return cs_etm__process_aux_output_hw_id(session, event);
+ }
+ return 0;
+}
+
+static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
+ u64 offset __maybe_unused, void *data __maybe_unused)
+{
+ struct perf_sample sample;
+ int ret;
+ struct auxtrace_index_entry *ent;
+ struct auxtrace_index *auxtrace_index;
+ struct evsel *evsel;
+ size_t i;
+
+ /* Don't care about any other events, we're only queuing buffers for AUX events */
+ if (event->header.type != PERF_RECORD_AUX)
+ return 0;
+
+ if (event->header.size < sizeof(struct perf_record_aux))
+ return -EINVAL;
+
+ /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
+ if (!event->aux.aux_size)
+ return 0;
+
+ /*
+ * Parse the sample, we need the sample_id_all data that comes after the event so that the
+ * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
+ */
+ evsel = evlist__event2evsel(session->evlist, event);
+ if (!evsel)
+ return -EINVAL;
+ ret = evsel__parse_sample(evsel, event, &sample);
+ if (ret)
+ return ret;
+
+ /*
+ * Loop through the auxtrace index to find the buffer that matches up with this aux event.
+ */
+ list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+ for (i = 0; i < auxtrace_index->nr; i++) {
+ ent = &auxtrace_index->entries[i];
+ ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
+ ent->sz, &event->aux, &sample);
+ /*
+ * Stop search on error or successful values. Continue search on
+ * 1 ('not found')
+ */
+ if (ret != 1)
+ return ret;
+ }
+ }
+
+ /*
+ * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
+ * don't exit with an error because it will still be possible to decode other aux records.
+ */
+ pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
+ " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
+ return 0;
+}
+
+static int cs_etm__queue_aux_records(struct perf_session *session)
+{
+ struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
+ struct auxtrace_index, list);
+ if (index && index->nr > 0)
+ return perf_session__peek_events(session, session->header.data_offset,
+ session->header.data_size,
+ cs_etm__queue_aux_records_cb, NULL);
+
+ /*
+ * We would get here if there are no entries in the index (either no auxtrace
+ * buffers or no index at all). Fail silently as there is the possibility of
+ * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
+ * false.
+ *
+ * In that scenario, buffers will not be split by AUX records.
+ */
+ return 0;
+}
+
+#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
+ (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
+
+/*
+ * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
+ * timestamps).
+ */
+static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
+{
+ int j;
+
+ for (j = 0; j < num_cpu; j++) {
+ switch (metadata[j][CS_ETM_MAGIC]) {
+ case __perf_cs_etmv4_magic:
+ if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
+ return false;
+ break;
+ case __perf_cs_ete_magic:
+ if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
+ return false;
+ break;
+ default:
+ /* Unknown / unsupported magic number. */
+ return false;
+ }
+ }
+ return true;
+}
+
+/* map trace ids to correct metadata block, from information in metadata */
+static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
+{
+ u64 cs_etm_magic;
+ u8 trace_chan_id;
+ int i, err;
+
+ for (i = 0; i < num_cpu; i++) {
+ cs_etm_magic = metadata[i][CS_ETM_MAGIC];
+ switch (cs_etm_magic) {
+ case __perf_cs_etmv3_magic:
+ metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
+ trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
+ break;
+ case __perf_cs_etmv4_magic:
+ case __perf_cs_ete_magic:
+ metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
+ trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
+ break;
+ default:
+ /* unknown magic number */
+ return -EINVAL;
+ }
+ err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
+ * unused value to reduce the number of unneeded decoders created.
+ */
+static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
+{
+ u64 cs_etm_magic;
+ int i;
+
+ for (i = 0; i < num_cpu; i++) {
+ cs_etm_magic = metadata[i][CS_ETM_MAGIC];
+ switch (cs_etm_magic) {
+ case __perf_cs_etmv3_magic:
+ if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
+ metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
+ break;
+ case __perf_cs_etmv4_magic:
+ case __perf_cs_ete_magic:
+ if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
+ metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
+ break;
+ default:
+ /* unknown magic number */
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+int cs_etm__process_auxtrace_info_full(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ struct cs_etm_auxtrace *etm = NULL;
+ struct perf_record_time_conv *tc = &session->time_conv;
+ int event_header_size = sizeof(struct perf_event_header);
+ int total_size = auxtrace_info->header.size;
+ int priv_size = 0;
+ int num_cpu;
+ int err = 0;
+ int aux_hw_id_found;
+ int i, j;
+ u64 *ptr = NULL;
+ u64 **metadata = NULL;
+
+ /*
+ * Create an RB tree for traceID-metadata tuple. Since the conversion
+ * has to be made for each packet that gets decoded, optimizing access
+ * in anything other than a sequential array is worth doing.
+ */
+ traceid_list = intlist__new(NULL);
+ if (!traceid_list)
+ return -ENOMEM;
+
+ /* First the global part */
+ ptr = (u64 *) auxtrace_info->priv;
+ num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
+ metadata = zalloc(sizeof(*metadata) * num_cpu);
+ if (!metadata) {
+ err = -ENOMEM;
+ goto err_free_traceid_list;
+ }
+
+ /* Start parsing after the common part of the header */
+ i = CS_HEADER_VERSION_MAX;
+
+ /*
+ * The metadata is stored in the auxtrace_info section and encodes
+ * the configuration of the ARM embedded trace macrocell which is
+ * required by the trace decoder to properly decode the trace due
+ * to its highly compressed nature.
+ */
+ for (j = 0; j < num_cpu; j++) {
+ if (ptr[i] == __perf_cs_etmv3_magic) {
+ metadata[j] =
+ cs_etm__create_meta_blk(ptr, &i,
+ CS_ETM_PRIV_MAX,
+ CS_ETM_NR_TRC_PARAMS_V0);
+ } else if (ptr[i] == __perf_cs_etmv4_magic) {
+ metadata[j] =
+ cs_etm__create_meta_blk(ptr, &i,
+ CS_ETMV4_PRIV_MAX,
+ CS_ETMV4_NR_TRC_PARAMS_V0);
+ } else if (ptr[i] == __perf_cs_ete_magic) {
+ metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
+ } else {
+ ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
+ ptr[i]);
+ err = -EINVAL;
+ goto err_free_metadata;
+ }
+
+ if (!metadata[j]) {
+ err = -ENOMEM;
+ goto err_free_metadata;
+ }
+ }
+
+ /*
+ * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
+ * CS_ETMV4_PRIV_MAX mark how many double words are in the
+ * global metadata, and each cpu's metadata respectively.
+ * The following tests if the correct number of double words was
+ * present in the auxtrace info section.
+ */
+ priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
+ if (i * 8 != priv_size) {
+ err = -EINVAL;
+ goto err_free_metadata;
+ }
+
+ etm = zalloc(sizeof(*etm));
+
+ if (!etm) {
+ err = -ENOMEM;
+ goto err_free_metadata;
+ }
+
+ /*
+ * As all the ETMs run at the same exception level, the system should
+ * have the same PID format crossing CPUs. So cache the PID format
+ * and reuse it for sequential decoding.
+ */
+ etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
+
+ err = auxtrace_queues__init(&etm->queues);
+ if (err)
+ goto err_free_etm;
+
+ if (session->itrace_synth_opts->set) {
+ etm->synth_opts = *session->itrace_synth_opts;
+ } else {
+ itrace_synth_opts__set_default(&etm->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
+ etm->synth_opts.callchain = false;
+ }
+
+ etm->session = session;
+
+ etm->num_cpu = num_cpu;
+ etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
+ etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
+ etm->metadata = metadata;
+ etm->auxtrace_type = auxtrace_info->type;
+
+ /* Use virtual timestamps if all ETMs report ts_source = 1 */
+ etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
+
+ if (!etm->has_virtual_ts)
+ ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
+ "The time field of the samples will not be set accurately.\n\n");
+
+ etm->auxtrace.process_event = cs_etm__process_event;
+ etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
+ etm->auxtrace.flush_events = cs_etm__flush_events;
+ etm->auxtrace.free_events = cs_etm__free_events;
+ etm->auxtrace.free = cs_etm__free;
+ etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
+ session->auxtrace = &etm->auxtrace;
+
+ err = cs_etm__setup_timeless_decoding(etm);
+ if (err)
+ return err;
+
+ etm->tc.time_shift = tc->time_shift;
+ etm->tc.time_mult = tc->time_mult;
+ etm->tc.time_zero = tc->time_zero;
+ if (event_contains(*tc, time_cycles)) {
+ etm->tc.time_cycles = tc->time_cycles;
+ etm->tc.time_mask = tc->time_mask;
+ etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
+ etm->tc.cap_user_time_short = tc->cap_user_time_short;
+ }
+ err = cs_etm__synth_events(etm, session);
+ if (err)
+ goto err_free_queues;
+
+ /*
+ * Map Trace ID values to CPU metadata.
+ *
+ * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
+ * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
+ * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
+ *
+ * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
+ * the same IDs as the old algorithm as far as is possible, unless there are clashes
+ * in which case a different value will be used. This means an older perf may still
+ * be able to record and read files generate on a newer system.
+ *
+ * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
+ * those packets. If they are there then the values will be mapped and plugged into
+ * the metadata. We then set any remaining metadata values with the used flag to a
+ * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
+ *
+ * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
+ * then we map Trace ID values to CPU directly from the metadata - clearing any unused
+ * flags if present.
+ */
+
+ /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
+ aux_hw_id_found = 0;
+ err = perf_session__peek_events(session, session->header.data_offset,
+ session->header.data_size,
+ cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
+ if (err)
+ goto err_free_queues;
+
+ /* if HW ID found then clear any unused metadata ID values */
+ if (aux_hw_id_found)
+ err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
+ /* otherwise, this is a file with metadata values only, map from metadata */
+ else
+ err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
+
+ if (err)
+ goto err_free_queues;
+
+ err = cs_etm__queue_aux_records(session);
+ if (err)
+ goto err_free_queues;
+
+ etm->data_queued = etm->queues.populated;
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&etm->queues);
+ session->auxtrace = NULL;
+err_free_etm:
+ zfree(&etm);
+err_free_metadata:
+ /* No need to check @metadata[j], free(NULL) is supported */
+ for (j = 0; j < num_cpu; j++)
+ zfree(&metadata[j]);
+ zfree(&metadata);
+err_free_traceid_list:
+ intlist__delete(traceid_list);
+ return err;
+}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
new file mode 100644
index 000000000..7cca37887
--- /dev/null
+++ b/tools/perf/util/cs-etm.h
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
+#define INCLUDE__UTIL_PERF_CS_ETM_H__
+
+#include "debug.h"
+#include "util/event.h"
+#include <linux/bits.h>
+
+struct perf_session;
+struct perf_pmu;
+
+/*
+ * Versioning header in case things need to change in the future. That way
+ * decoding of old snapshot is still possible.
+ */
+enum {
+ /* Starting with 0x0 */
+ CS_HEADER_VERSION,
+ /* PMU->type (32 bit), total # of CPUs (32 bit) */
+ CS_PMU_TYPE_CPUS,
+ CS_ETM_SNAPSHOT,
+ CS_HEADER_VERSION_MAX,
+};
+
+/*
+ * Update the version for new format.
+ *
+ * Version 1: format adds a param count to the per cpu metadata.
+ * This allows easy adding of new metadata parameters.
+ * Requires that new params always added after current ones.
+ * Also allows client reader to handle file versions that are different by
+ * checking the number of params in the file vs the number expected.
+ *
+ * Version 2: Drivers will use PERF_RECORD_AUX_OUTPUT_HW_ID to output
+ * CoreSight Trace ID. ...TRACEIDR metadata will be set to legacy values
+ * but with addition flags.
+ */
+#define CS_HEADER_CURRENT_VERSION 2
+
+/* Beginning of header common to both ETMv3 and V4 */
+enum {
+ CS_ETM_MAGIC,
+ CS_ETM_CPU,
+ /* Number of trace config params in following ETM specific block */
+ CS_ETM_NR_TRC_PARAMS,
+ CS_ETM_COMMON_BLK_MAX_V1,
+};
+
+/* ETMv3/PTM metadata */
+enum {
+ /* Dynamic, configurable parameters */
+ CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1,
+ CS_ETM_ETMTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETM_ETMCCER,
+ CS_ETM_ETMIDR,
+ CS_ETM_PRIV_MAX,
+};
+
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1)
+
+/* ETMv4 metadata */
+enum {
+ /* Dynamic, configurable parameters */
+ CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1,
+ CS_ETMV4_TRCTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETMV4_TRCIDR0,
+ CS_ETMV4_TRCIDR1,
+ CS_ETMV4_TRCIDR2,
+ CS_ETMV4_TRCIDR8,
+ CS_ETMV4_TRCAUTHSTATUS,
+ CS_ETMV4_TS_SOURCE,
+ CS_ETMV4_PRIV_MAX,
+};
+
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1)
+
+/*
+ * ETE metadata is ETMv4 plus TRCDEVARCH register and doesn't support header V0 since it was
+ * added in header V1
+ */
+enum {
+ /* Dynamic, configurable parameters */
+ CS_ETE_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1,
+ CS_ETE_TRCTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETE_TRCIDR0,
+ CS_ETE_TRCIDR1,
+ CS_ETE_TRCIDR2,
+ CS_ETE_TRCIDR8,
+ CS_ETE_TRCAUTHSTATUS,
+ CS_ETE_TRCDEVARCH,
+ CS_ETE_TS_SOURCE,
+ CS_ETE_PRIV_MAX
+};
+
+/*
+ * Check for valid CoreSight trace ID. If an invalid value is present in the metadata,
+ * then IDs are present in the hardware ID packet in the data file.
+ */
+#define CS_IS_VALID_TRACE_ID(id) ((id > 0) && (id < 0x70))
+
+/*
+ * ETMv3 exception encoding number:
+ * See Embedded Trace Macrocell specification (ARM IHI 0014Q)
+ * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
+ */
+enum {
+ CS_ETMV3_EXC_NONE = 0,
+ CS_ETMV3_EXC_DEBUG_HALT = 1,
+ CS_ETMV3_EXC_SMC = 2,
+ CS_ETMV3_EXC_HYP = 3,
+ CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4,
+ CS_ETMV3_EXC_JAZELLE_THUMBEE = 5,
+ CS_ETMV3_EXC_PE_RESET = 8,
+ CS_ETMV3_EXC_UNDEFINED_INSTR = 9,
+ CS_ETMV3_EXC_SVC = 10,
+ CS_ETMV3_EXC_PREFETCH_ABORT = 11,
+ CS_ETMV3_EXC_DATA_FAULT = 12,
+ CS_ETMV3_EXC_GENERIC = 13,
+ CS_ETMV3_EXC_IRQ = 14,
+ CS_ETMV3_EXC_FIQ = 15,
+};
+
+/*
+ * ETMv4 exception encoding number:
+ * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D)
+ * table 6-12 Possible values for the TYPE field in an Exception instruction
+ * trace packet, for ARMv7-A/R and ARMv8-A/R PEs.
+ */
+enum {
+ CS_ETMV4_EXC_RESET = 0,
+ CS_ETMV4_EXC_DEBUG_HALT = 1,
+ CS_ETMV4_EXC_CALL = 2,
+ CS_ETMV4_EXC_TRAP = 3,
+ CS_ETMV4_EXC_SYSTEM_ERROR = 4,
+ CS_ETMV4_EXC_INST_DEBUG = 6,
+ CS_ETMV4_EXC_DATA_DEBUG = 7,
+ CS_ETMV4_EXC_ALIGNMENT = 10,
+ CS_ETMV4_EXC_INST_FAULT = 11,
+ CS_ETMV4_EXC_DATA_FAULT = 12,
+ CS_ETMV4_EXC_IRQ = 14,
+ CS_ETMV4_EXC_FIQ = 15,
+ CS_ETMV4_EXC_END = 31,
+};
+
+enum cs_etm_sample_type {
+ CS_ETM_EMPTY,
+ CS_ETM_RANGE,
+ CS_ETM_DISCONTINUITY,
+ CS_ETM_EXCEPTION,
+ CS_ETM_EXCEPTION_RET,
+};
+
+enum cs_etm_isa {
+ CS_ETM_ISA_UNKNOWN,
+ CS_ETM_ISA_A64,
+ CS_ETM_ISA_A32,
+ CS_ETM_ISA_T32,
+};
+
+struct cs_etm_queue;
+
+struct cs_etm_packet {
+ enum cs_etm_sample_type sample_type;
+ enum cs_etm_isa isa;
+ u64 start_addr;
+ u64 end_addr;
+ u32 instr_count;
+ u32 last_instr_type;
+ u32 last_instr_subtype;
+ u32 flags;
+ u32 exception_number;
+ bool last_instr_cond;
+ bool last_instr_taken_branch;
+ u8 last_instr_size;
+ u8 trace_chan_id;
+ int cpu;
+};
+
+#define CS_ETM_PACKET_MAX_BUFFER 1024
+
+/*
+ * When working with per-thread scenarios the process under trace can
+ * be scheduled on any CPU and as such, more than one traceID may be
+ * associated with the same process. Since a traceID of '0' is illegal
+ * as per the CoreSight architecture, use that specific value to
+ * identify the queue where all packets (with any traceID) are
+ * aggregated.
+ */
+#define CS_ETM_PER_THREAD_TRACEID 0
+
+struct cs_etm_packet_queue {
+ u32 packet_count;
+ u32 head;
+ u32 tail;
+ u32 instr_count;
+ u64 cs_timestamp; /* Timestamp from trace data, converted to ns if possible */
+ u64 next_cs_timestamp;
+ struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
+};
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
+
+#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb)
+
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64))
+
+#define __perf_cs_etmv3_magic 0x3030303030303030ULL
+#define __perf_cs_etmv4_magic 0x4040404040404040ULL
+#define __perf_cs_ete_magic 0x5050505050505050ULL
+#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
+#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+#define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64))
+
+#define INFO_HEADER_SIZE (sizeof(((struct perf_record_auxtrace_info *)0)->type) + \
+ sizeof(((struct perf_record_auxtrace_info *)0)->reserved__))
+
+/* CoreSight trace ID is currently the bottom 7 bits of the value */
+#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
+
+/*
+ * perf record will set the legacy meta data values as unused initially.
+ * This allows perf report to manage the decoders created when dynamic
+ * allocation in operation.
+ */
+#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
+
+/* Value to set for unused trace ID values */
+#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
+
+int cs_etm__process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu);
+
+enum cs_etm_pid_fmt {
+ CS_ETM_PIDFMT_NONE,
+ CS_ETM_PIDFMT_CTXTID,
+ CS_ETM_PIDFMT_CTXTID2
+};
+
+#ifdef HAVE_CSTRACE_SUPPORT
+#include <opencsd/ocsd_if_types.h>
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq);
+int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
+ u8 trace_chan_id, ocsd_ex_level el);
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+ u8 trace_chan_id);
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
+int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused);
+u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp);
+#else
+static inline int
+cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ pr_err("\nCS ETM Trace: OpenCSD is not linked in, please recompile with CORESIGHT=1\n");
+ return -1;
+}
+#endif
+
+#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
new file mode 100644
index 000000000..2b732bcca
--- /dev/null
+++ b/tools/perf/util/data-convert-bt.c
@@ -0,0 +1,1702 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CTF writing support via babeltrace.
+ *
+ * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com>
+ * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <babeltrace/ctf-writer/writer.h>
+#include <babeltrace/ctf-writer/clock.h>
+#include <babeltrace/ctf-writer/stream.h>
+#include <babeltrace/ctf-writer/event.h>
+#include <babeltrace/ctf-writer/event-types.h>
+#include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf-ir/utils.h>
+#include <babeltrace/ctf/events.h>
+#include "asm/bug.h"
+#include "data-convert.h"
+#include "session.h"
+#include "debug.h"
+#include "tool.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "config.h"
+#include <linux/ctype.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include "util.h"
+#include "clockid.h"
+#include "util/sample.h"
+
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+#define pr_N(n, fmt, ...) \
+ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+struct evsel_priv {
+ struct bt_ctf_event_class *event_class;
+};
+
+#define MAX_CPUS 4096
+
+struct ctf_stream {
+ struct bt_ctf_stream *stream;
+ int cpu;
+ u32 count;
+};
+
+struct ctf_writer {
+ /* writer primitives */
+ struct bt_ctf_writer *writer;
+ struct ctf_stream **stream;
+ int stream_cnt;
+ struct bt_ctf_stream_class *stream_class;
+ struct bt_ctf_clock *clock;
+
+ /* data types */
+ union {
+ struct {
+ struct bt_ctf_field_type *s64;
+ struct bt_ctf_field_type *u64;
+ struct bt_ctf_field_type *s32;
+ struct bt_ctf_field_type *u32;
+ struct bt_ctf_field_type *string;
+ struct bt_ctf_field_type *u32_hex;
+ struct bt_ctf_field_type *u64_hex;
+ };
+ struct bt_ctf_field_type *array[6];
+ } data;
+ struct bt_ctf_event_class *comm_class;
+ struct bt_ctf_event_class *exit_class;
+ struct bt_ctf_event_class *fork_class;
+ struct bt_ctf_event_class *mmap_class;
+ struct bt_ctf_event_class *mmap2_class;
+};
+
+struct convert {
+ struct perf_tool tool;
+ struct ctf_writer writer;
+
+ u64 events_size;
+ u64 events_count;
+ u64 non_sample_count;
+
+ /* Ordered events configured queue size. */
+ u64 queue_size;
+};
+
+static int value_set(struct bt_ctf_field_type *type,
+ struct bt_ctf_event *event,
+ const char *name, u64 val)
+{
+ struct bt_ctf_field *field;
+ bool sign = bt_ctf_field_type_integer_get_signed(type);
+ int ret;
+
+ field = bt_ctf_field_create(type);
+ if (!field) {
+ pr_err("failed to create a field %s\n", name);
+ return -1;
+ }
+
+ if (sign) {
+ ret = bt_ctf_field_signed_integer_set_value(field, val);
+ if (ret) {
+ pr_err("failed to set field value %s\n", name);
+ goto err;
+ }
+ } else {
+ ret = bt_ctf_field_unsigned_integer_set_value(field, val);
+ if (ret) {
+ pr_err("failed to set field value %s\n", name);
+ goto err;
+ }
+ }
+
+ ret = bt_ctf_event_set_payload(event, name, field);
+ if (ret) {
+ pr_err("failed to set payload %s\n", name);
+ goto err;
+ }
+
+ pr2(" SET [%s = %" PRIu64 "]\n", name, val);
+
+err:
+ bt_ctf_field_put(field);
+ return ret;
+}
+
+#define __FUNC_VALUE_SET(_name, _val_type) \
+static __maybe_unused int value_set_##_name(struct ctf_writer *cw, \
+ struct bt_ctf_event *event, \
+ const char *name, \
+ _val_type val) \
+{ \
+ struct bt_ctf_field_type *type = cw->data._name; \
+ return value_set(type, event, name, (u64) val); \
+}
+
+#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name)
+
+FUNC_VALUE_SET(s32)
+FUNC_VALUE_SET(u32)
+FUNC_VALUE_SET(s64)
+FUNC_VALUE_SET(u64)
+__FUNC_VALUE_SET(u64_hex, u64)
+
+static int string_set_value(struct bt_ctf_field *field, const char *string);
+static __maybe_unused int
+value_set_string(struct ctf_writer *cw, struct bt_ctf_event *event,
+ const char *name, const char *string)
+{
+ struct bt_ctf_field_type *type = cw->data.string;
+ struct bt_ctf_field *field;
+ int ret = 0;
+
+ field = bt_ctf_field_create(type);
+ if (!field) {
+ pr_err("failed to create a field %s\n", name);
+ return -1;
+ }
+
+ ret = string_set_value(field, string);
+ if (ret) {
+ pr_err("failed to set value %s\n", name);
+ goto err_put_field;
+ }
+
+ ret = bt_ctf_event_set_payload(event, name, field);
+ if (ret)
+ pr_err("failed to set payload %s\n", name);
+
+err_put_field:
+ bt_ctf_field_put(field);
+ return ret;
+}
+
+static struct bt_ctf_field_type*
+get_tracepoint_field_type(struct ctf_writer *cw, struct tep_format_field *field)
+{
+ unsigned long flags = field->flags;
+
+ if (flags & TEP_FIELD_IS_STRING)
+ return cw->data.string;
+
+ if (!(flags & TEP_FIELD_IS_SIGNED)) {
+ /* unsigned long are mostly pointers */
+ if (flags & TEP_FIELD_IS_LONG || flags & TEP_FIELD_IS_POINTER)
+ return cw->data.u64_hex;
+ }
+
+ if (flags & TEP_FIELD_IS_SIGNED) {
+ if (field->size == 8)
+ return cw->data.s64;
+ else
+ return cw->data.s32;
+ }
+
+ if (field->size == 8)
+ return cw->data.u64;
+ else
+ return cw->data.u32;
+}
+
+static unsigned long long adjust_signedness(unsigned long long value_int, int size)
+{
+ unsigned long long value_mask;
+
+ /*
+ * value_mask = (1 << (size * 8 - 1)) - 1.
+ * Directly set value_mask for code readers.
+ */
+ switch (size) {
+ case 1:
+ value_mask = 0x7fULL;
+ break;
+ case 2:
+ value_mask = 0x7fffULL;
+ break;
+ case 4:
+ value_mask = 0x7fffffffULL;
+ break;
+ case 8:
+ /*
+ * For 64 bit value, return it self. There is no need
+ * to fill high bit.
+ */
+ /* Fall through */
+ default:
+ /* BUG! */
+ return value_int;
+ }
+
+ /* If it is a positive value, don't adjust. */
+ if ((value_int & (~0ULL - value_mask)) == 0)
+ return value_int;
+
+ /* Fill upper part of value_int with 1 to make it a negative long long. */
+ return (value_int & value_mask) | ~value_mask;
+}
+
+static int string_set_value(struct bt_ctf_field *field, const char *string)
+{
+ char *buffer = NULL;
+ size_t len = strlen(string), i, p;
+ int err;
+
+ for (i = p = 0; i < len; i++, p++) {
+ if (isprint(string[i])) {
+ if (!buffer)
+ continue;
+ buffer[p] = string[i];
+ } else {
+ char numstr[5];
+
+ snprintf(numstr, sizeof(numstr), "\\x%02x",
+ (unsigned int)(string[i]) & 0xff);
+
+ if (!buffer) {
+ buffer = zalloc(i + (len - i) * 4 + 2);
+ if (!buffer) {
+ pr_err("failed to set unprintable string '%s'\n", string);
+ return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING");
+ }
+ if (i > 0)
+ strncpy(buffer, string, i);
+ }
+ memcpy(buffer + p, numstr, 4);
+ p += 3;
+ }
+ }
+
+ if (!buffer)
+ return bt_ctf_field_string_set_value(field, string);
+ err = bt_ctf_field_string_set_value(field, buffer);
+ free(buffer);
+ return err;
+}
+
+static int add_tracepoint_field_value(struct ctf_writer *cw,
+ struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct perf_sample *sample,
+ struct tep_format_field *fmtf)
+{
+ struct bt_ctf_field_type *type;
+ struct bt_ctf_field *array_field;
+ struct bt_ctf_field *field;
+ const char *name = fmtf->name;
+ void *data = sample->raw_data;
+ unsigned long flags = fmtf->flags;
+ unsigned int n_items;
+ unsigned int i;
+ unsigned int offset;
+ unsigned int len;
+ int ret;
+
+ name = fmtf->alias;
+ offset = fmtf->offset;
+ len = fmtf->size;
+ if (flags & TEP_FIELD_IS_STRING)
+ flags &= ~TEP_FIELD_IS_ARRAY;
+
+ if (flags & TEP_FIELD_IS_DYNAMIC) {
+ unsigned long long tmp_val;
+
+ tmp_val = tep_read_number(fmtf->event->tep,
+ data + offset, len);
+ offset = tmp_val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ if (tep_field_is_relative(flags))
+ offset += fmtf->offset + fmtf->size;
+ }
+
+ if (flags & TEP_FIELD_IS_ARRAY) {
+
+ type = bt_ctf_event_class_get_field_by_name(
+ event_class, name);
+ array_field = bt_ctf_field_create(type);
+ bt_ctf_field_type_put(type);
+ if (!array_field) {
+ pr_err("Failed to create array type %s\n", name);
+ return -1;
+ }
+
+ len = fmtf->size / fmtf->arraylen;
+ n_items = fmtf->arraylen;
+ } else {
+ n_items = 1;
+ array_field = NULL;
+ }
+
+ type = get_tracepoint_field_type(cw, fmtf);
+
+ for (i = 0; i < n_items; i++) {
+ if (flags & TEP_FIELD_IS_ARRAY)
+ field = bt_ctf_field_array_get_field(array_field, i);
+ else
+ field = bt_ctf_field_create(type);
+
+ if (!field) {
+ pr_err("failed to create a field %s\n", name);
+ return -1;
+ }
+
+ if (flags & TEP_FIELD_IS_STRING)
+ ret = string_set_value(field, data + offset + i * len);
+ else {
+ unsigned long long value_int;
+
+ value_int = tep_read_number(
+ fmtf->event->tep,
+ data + offset + i * len, len);
+
+ if (!(flags & TEP_FIELD_IS_SIGNED))
+ ret = bt_ctf_field_unsigned_integer_set_value(
+ field, value_int);
+ else
+ ret = bt_ctf_field_signed_integer_set_value(
+ field, adjust_signedness(value_int, len));
+ }
+
+ if (ret) {
+ pr_err("failed to set file value %s\n", name);
+ goto err_put_field;
+ }
+ if (!(flags & TEP_FIELD_IS_ARRAY)) {
+ ret = bt_ctf_event_set_payload(event, name, field);
+ if (ret) {
+ pr_err("failed to set payload %s\n", name);
+ goto err_put_field;
+ }
+ }
+ bt_ctf_field_put(field);
+ }
+ if (flags & TEP_FIELD_IS_ARRAY) {
+ ret = bt_ctf_event_set_payload(event, name, array_field);
+ if (ret) {
+ pr_err("Failed add payload array %s\n", name);
+ return -1;
+ }
+ bt_ctf_field_put(array_field);
+ }
+ return 0;
+
+err_put_field:
+ bt_ctf_field_put(field);
+ return -1;
+}
+
+static int add_tracepoint_fields_values(struct ctf_writer *cw,
+ struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct tep_format_field *fields,
+ struct perf_sample *sample)
+{
+ struct tep_format_field *field;
+ int ret;
+
+ for (field = fields; field; field = field->next) {
+ ret = add_tracepoint_field_value(cw, event_class, event, sample,
+ field);
+ if (ret)
+ return -1;
+ }
+ return 0;
+}
+
+static int add_tracepoint_values(struct ctf_writer *cw,
+ struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct evsel *evsel,
+ struct perf_sample *sample)
+{
+ struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+ struct tep_format_field *fields = evsel->tp_format->format.fields;
+ int ret;
+
+ ret = add_tracepoint_fields_values(cw, event_class, event,
+ common_fields, sample);
+ if (!ret)
+ ret = add_tracepoint_fields_values(cw, event_class, event,
+ fields, sample);
+
+ return ret;
+}
+
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct perf_sample *sample)
+{
+ struct bt_ctf_field_type *len_type, *seq_type;
+ struct bt_ctf_field *len_field, *seq_field;
+ unsigned int raw_size = sample->raw_size;
+ unsigned int nr_elements = raw_size / sizeof(u32);
+ unsigned int i;
+ int ret;
+
+ if (nr_elements * sizeof(u32) != raw_size)
+ pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
+ raw_size, nr_elements * sizeof(u32) - raw_size);
+
+ len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+ len_field = bt_ctf_field_create(len_type);
+ if (!len_field) {
+ pr_err("failed to create 'raw_len' for bpf output event\n");
+ ret = -1;
+ goto put_len_type;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+ if (ret) {
+ pr_err("failed to set field value for raw_len\n");
+ goto put_len_field;
+ }
+ ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+ if (ret) {
+ pr_err("failed to set payload to raw_len\n");
+ goto put_len_field;
+ }
+
+ seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+ seq_field = bt_ctf_field_create(seq_type);
+ if (!seq_field) {
+ pr_err("failed to create 'raw_data' for bpf output event\n");
+ ret = -1;
+ goto put_seq_type;
+ }
+
+ ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+ if (ret) {
+ pr_err("failed to set length of 'raw_data'\n");
+ goto put_seq_field;
+ }
+
+ for (i = 0; i < nr_elements; i++) {
+ struct bt_ctf_field *elem_field =
+ bt_ctf_field_sequence_get_field(seq_field, i);
+
+ ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+ ((u32 *)(sample->raw_data))[i]);
+
+ bt_ctf_field_put(elem_field);
+ if (ret) {
+ pr_err("failed to set raw_data[%d]\n", i);
+ goto put_seq_field;
+ }
+ }
+
+ ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+ if (ret)
+ pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+ bt_ctf_field_put(seq_field);
+put_seq_type:
+ bt_ctf_field_type_put(seq_type);
+put_len_field:
+ bt_ctf_field_put(len_field);
+put_len_type:
+ bt_ctf_field_type_put(len_type);
+ return ret;
+}
+
+static int
+add_callchain_output_values(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct ip_callchain *callchain)
+{
+ struct bt_ctf_field_type *len_type, *seq_type;
+ struct bt_ctf_field *len_field, *seq_field;
+ unsigned int nr_elements = callchain->nr;
+ unsigned int i;
+ int ret;
+
+ len_type = bt_ctf_event_class_get_field_by_name(
+ event_class, "perf_callchain_size");
+ len_field = bt_ctf_field_create(len_type);
+ if (!len_field) {
+ pr_err("failed to create 'perf_callchain_size' for callchain output event\n");
+ ret = -1;
+ goto put_len_type;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+ if (ret) {
+ pr_err("failed to set field value for perf_callchain_size\n");
+ goto put_len_field;
+ }
+ ret = bt_ctf_event_set_payload(event, "perf_callchain_size", len_field);
+ if (ret) {
+ pr_err("failed to set payload to perf_callchain_size\n");
+ goto put_len_field;
+ }
+
+ seq_type = bt_ctf_event_class_get_field_by_name(
+ event_class, "perf_callchain");
+ seq_field = bt_ctf_field_create(seq_type);
+ if (!seq_field) {
+ pr_err("failed to create 'perf_callchain' for callchain output event\n");
+ ret = -1;
+ goto put_seq_type;
+ }
+
+ ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+ if (ret) {
+ pr_err("failed to set length of 'perf_callchain'\n");
+ goto put_seq_field;
+ }
+
+ for (i = 0; i < nr_elements; i++) {
+ struct bt_ctf_field *elem_field =
+ bt_ctf_field_sequence_get_field(seq_field, i);
+
+ ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+ ((u64 *)(callchain->ips))[i]);
+
+ bt_ctf_field_put(elem_field);
+ if (ret) {
+ pr_err("failed to set callchain[%d]\n", i);
+ goto put_seq_field;
+ }
+ }
+
+ ret = bt_ctf_event_set_payload(event, "perf_callchain", seq_field);
+ if (ret)
+ pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+ bt_ctf_field_put(seq_field);
+put_seq_type:
+ bt_ctf_field_type_put(seq_type);
+put_len_field:
+ bt_ctf_field_put(len_field);
+put_len_type:
+ bt_ctf_field_type_put(len_type);
+ return ret;
+}
+
+static int add_generic_values(struct ctf_writer *cw,
+ struct bt_ctf_event *event,
+ struct evsel *evsel,
+ struct perf_sample *sample)
+{
+ u64 type = evsel->core.attr.sample_type;
+ int ret;
+
+ /*
+ * missing:
+ * PERF_SAMPLE_TIME - not needed as we have it in
+ * ctf event header
+ * PERF_SAMPLE_READ - TODO
+ * PERF_SAMPLE_RAW - tracepoint fields are handled separately
+ * PERF_SAMPLE_BRANCH_STACK - TODO
+ * PERF_SAMPLE_REGS_USER - TODO
+ * PERF_SAMPLE_STACK_USER - TODO
+ */
+
+ if (type & PERF_SAMPLE_IP) {
+ ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ ret = value_set_s32(cw, event, "perf_tid", sample->tid);
+ if (ret)
+ return -1;
+
+ ret = value_set_s32(cw, event, "perf_pid", sample->pid);
+ if (ret)
+ return -1;
+ }
+
+ if ((type & PERF_SAMPLE_ID) ||
+ (type & PERF_SAMPLE_IDENTIFIER)) {
+ ret = value_set_u64(cw, event, "perf_id", sample->id);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ ret = value_set_u64(cw, event, "perf_period", sample->period);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT) {
+ ret = value_set_u64(cw, event, "perf_weight", sample->weight);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_DATA_SRC) {
+ ret = value_set_u64(cw, event, "perf_data_src",
+ sample->data_src);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_TRANSACTION) {
+ ret = value_set_u64(cw, event, "perf_transaction",
+ sample->transaction);
+ if (ret)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ctf_stream__flush(struct ctf_stream *cs)
+{
+ int err = 0;
+
+ if (cs) {
+ err = bt_ctf_stream_flush(cs->stream);
+ if (err)
+ pr_err("CTF stream %d flush failed\n", cs->cpu);
+
+ pr("Flush stream for cpu %d (%u samples)\n",
+ cs->cpu, cs->count);
+
+ cs->count = 0;
+ }
+
+ return err;
+}
+
+static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
+{
+ struct ctf_stream *cs;
+ struct bt_ctf_field *pkt_ctx = NULL;
+ struct bt_ctf_field *cpu_field = NULL;
+ struct bt_ctf_stream *stream = NULL;
+ int ret;
+
+ cs = zalloc(sizeof(*cs));
+ if (!cs) {
+ pr_err("Failed to allocate ctf stream\n");
+ return NULL;
+ }
+
+ stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
+ if (!stream) {
+ pr_err("Failed to create CTF stream\n");
+ goto out;
+ }
+
+ pkt_ctx = bt_ctf_stream_get_packet_context(stream);
+ if (!pkt_ctx) {
+ pr_err("Failed to obtain packet context\n");
+ goto out;
+ }
+
+ cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
+ bt_ctf_field_put(pkt_ctx);
+ if (!cpu_field) {
+ pr_err("Failed to obtain cpu field\n");
+ goto out;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
+ if (ret) {
+ pr_err("Failed to update CPU number\n");
+ goto out;
+ }
+
+ bt_ctf_field_put(cpu_field);
+
+ cs->cpu = cpu;
+ cs->stream = stream;
+ return cs;
+
+out:
+ if (cpu_field)
+ bt_ctf_field_put(cpu_field);
+ if (stream)
+ bt_ctf_stream_put(stream);
+
+ free(cs);
+ return NULL;
+}
+
+static void ctf_stream__delete(struct ctf_stream *cs)
+{
+ if (cs) {
+ bt_ctf_stream_put(cs->stream);
+ free(cs);
+ }
+}
+
+static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
+{
+ struct ctf_stream *cs = cw->stream[cpu];
+
+ if (!cs) {
+ cs = ctf_stream__create(cw, cpu);
+ cw->stream[cpu] = cs;
+ }
+
+ return cs;
+}
+
+static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
+ struct evsel *evsel)
+{
+ int cpu = 0;
+
+ if (evsel->core.attr.sample_type & PERF_SAMPLE_CPU)
+ cpu = sample->cpu;
+
+ if (cpu > cw->stream_cnt) {
+ pr_err("Event was recorded for CPU %d, limit is at %d.\n",
+ cpu, cw->stream_cnt);
+ cpu = 0;
+ }
+
+ return cpu;
+}
+
+#define STREAM_FLUSH_COUNT 100000
+
+/*
+ * Currently we have no other way to determine the
+ * time for the stream flush other than keep track
+ * of the number of events and check it against
+ * threshold.
+ */
+static bool is_flush_needed(struct ctf_stream *cs)
+{
+ return cs->count >= STREAM_FLUSH_COUNT;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *_event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine __maybe_unused)
+{
+ struct convert *c = container_of(tool, struct convert, tool);
+ struct evsel_priv *priv = evsel->priv;
+ struct ctf_writer *cw = &c->writer;
+ struct ctf_stream *cs;
+ struct bt_ctf_event_class *event_class;
+ struct bt_ctf_event *event;
+ int ret;
+ unsigned long type = evsel->core.attr.sample_type;
+
+ if (WARN_ONCE(!priv, "Failed to setup all events.\n"))
+ return 0;
+
+ event_class = priv->event_class;
+
+ /* update stats */
+ c->events_count++;
+ c->events_size += _event->header.size;
+
+ pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count);
+
+ event = bt_ctf_event_create(event_class);
+ if (!event) {
+ pr_err("Failed to create an CTF event\n");
+ return -1;
+ }
+
+ bt_ctf_clock_set_time(cw->clock, sample->time);
+
+ ret = add_generic_values(cw, event, evsel, sample);
+ if (ret)
+ return -1;
+
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+ ret = add_tracepoint_values(cw, event_class, event,
+ evsel, sample);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ ret = add_callchain_output_values(event_class,
+ event, sample->callchain);
+ if (ret)
+ return -1;
+ }
+
+ if (evsel__is_bpf_output(evsel)) {
+ ret = add_bpf_output_values(event_class, event, sample);
+ if (ret)
+ return -1;
+ }
+
+ cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
+ if (cs) {
+ if (is_flush_needed(cs))
+ ctf_stream__flush(cs);
+
+ cs->count++;
+ bt_ctf_stream_append_event(cs->stream, event);
+ }
+
+ bt_ctf_event_put(event);
+ return cs ? 0 : -1;
+}
+
+#define __NON_SAMPLE_SET_FIELD(_name, _type, _field) \
+do { \
+ ret = value_set_##_type(cw, event, #_field, _event->_name._field);\
+ if (ret) \
+ return -1; \
+} while(0)
+
+#define __FUNC_PROCESS_NON_SAMPLE(_name, body) \
+static int process_##_name##_event(struct perf_tool *tool, \
+ union perf_event *_event, \
+ struct perf_sample *sample, \
+ struct machine *machine) \
+{ \
+ struct convert *c = container_of(tool, struct convert, tool);\
+ struct ctf_writer *cw = &c->writer; \
+ struct bt_ctf_event_class *event_class = cw->_name##_class;\
+ struct bt_ctf_event *event; \
+ struct ctf_stream *cs; \
+ int ret; \
+ \
+ c->non_sample_count++; \
+ c->events_size += _event->header.size; \
+ event = bt_ctf_event_create(event_class); \
+ if (!event) { \
+ pr_err("Failed to create an CTF event\n"); \
+ return -1; \
+ } \
+ \
+ bt_ctf_clock_set_time(cw->clock, sample->time); \
+ body \
+ cs = ctf_stream(cw, 0); \
+ if (cs) { \
+ if (is_flush_needed(cs)) \
+ ctf_stream__flush(cs); \
+ \
+ cs->count++; \
+ bt_ctf_stream_append_event(cs->stream, event); \
+ } \
+ bt_ctf_event_put(event); \
+ \
+ return perf_event__process_##_name(tool, _event, sample, machine);\
+}
+
+__FUNC_PROCESS_NON_SAMPLE(comm,
+ __NON_SAMPLE_SET_FIELD(comm, u32, pid);
+ __NON_SAMPLE_SET_FIELD(comm, u32, tid);
+ __NON_SAMPLE_SET_FIELD(comm, string, comm);
+)
+__FUNC_PROCESS_NON_SAMPLE(fork,
+ __NON_SAMPLE_SET_FIELD(fork, u32, pid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, tid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+ __NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+
+__FUNC_PROCESS_NON_SAMPLE(exit,
+ __NON_SAMPLE_SET_FIELD(fork, u32, pid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, tid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+ __NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap,
+ __NON_SAMPLE_SET_FIELD(mmap, u32, pid);
+ __NON_SAMPLE_SET_FIELD(mmap, u32, tid);
+ __NON_SAMPLE_SET_FIELD(mmap, u64_hex, start);
+ __NON_SAMPLE_SET_FIELD(mmap, string, filename);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap2,
+ __NON_SAMPLE_SET_FIELD(mmap2, u32, pid);
+ __NON_SAMPLE_SET_FIELD(mmap2, u32, tid);
+ __NON_SAMPLE_SET_FIELD(mmap2, u64_hex, start);
+ __NON_SAMPLE_SET_FIELD(mmap2, string, filename);
+)
+#undef __NON_SAMPLE_SET_FIELD
+#undef __FUNC_PROCESS_NON_SAMPLE
+
+/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
+static char *change_name(char *name, char *orig_name, int dup)
+{
+ char *new_name = NULL;
+ size_t len;
+
+ if (!name)
+ name = orig_name;
+
+ if (dup >= 10)
+ goto out;
+ /*
+ * Add '_' prefix to potential keywork. According to
+ * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com),
+ * further CTF spec updating may require us to use '$'.
+ */
+ if (dup < 0)
+ len = strlen(name) + sizeof("_");
+ else
+ len = strlen(orig_name) + sizeof("_dupl_X");
+
+ new_name = malloc(len);
+ if (!new_name)
+ goto out;
+
+ if (dup < 0)
+ snprintf(new_name, len, "_%s", name);
+ else
+ snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
+
+out:
+ if (name != orig_name)
+ free(name);
+ return new_name;
+}
+
+static int event_class_add_field(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_field_type *type,
+ struct tep_format_field *field)
+{
+ struct bt_ctf_field_type *t = NULL;
+ char *name;
+ int dup = 1;
+ int ret;
+
+ /* alias was already assigned */
+ if (field->alias != field->name)
+ return bt_ctf_event_class_add_field(event_class, type,
+ (char *)field->alias);
+
+ name = field->name;
+
+ /* If 'name' is a keywork, add prefix. */
+ if (bt_ctf_validate_identifier(name))
+ name = change_name(name, field->name, -1);
+
+ if (!name) {
+ pr_err("Failed to fix invalid identifier.");
+ return -1;
+ }
+ while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
+ bt_ctf_field_type_put(t);
+ name = change_name(name, field->name, dup++);
+ if (!name) {
+ pr_err("Failed to create dup name for '%s'\n", field->name);
+ return -1;
+ }
+ }
+
+ ret = bt_ctf_event_class_add_field(event_class, type, name);
+ if (!ret)
+ field->alias = name;
+
+ return ret;
+}
+
+static int add_tracepoint_fields_types(struct ctf_writer *cw,
+ struct tep_format_field *fields,
+ struct bt_ctf_event_class *event_class)
+{
+ struct tep_format_field *field;
+ int ret;
+
+ for (field = fields; field; field = field->next) {
+ struct bt_ctf_field_type *type;
+ unsigned long flags = field->flags;
+
+ pr2(" field '%s'\n", field->name);
+
+ type = get_tracepoint_field_type(cw, field);
+ if (!type)
+ return -1;
+
+ /*
+ * A string is an array of chars. For this we use the string
+ * type and don't care that it is an array. What we don't
+ * support is an array of strings.
+ */
+ if (flags & TEP_FIELD_IS_STRING)
+ flags &= ~TEP_FIELD_IS_ARRAY;
+
+ if (flags & TEP_FIELD_IS_ARRAY)
+ type = bt_ctf_field_type_array_create(type, field->arraylen);
+
+ ret = event_class_add_field(event_class, type, field);
+
+ if (flags & TEP_FIELD_IS_ARRAY)
+ bt_ctf_field_type_put(type);
+
+ if (ret) {
+ pr_err("Failed to add field '%s': %d\n",
+ field->name, ret);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int add_tracepoint_types(struct ctf_writer *cw,
+ struct evsel *evsel,
+ struct bt_ctf_event_class *class)
+{
+ struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+ struct tep_format_field *fields = evsel->tp_format->format.fields;
+ int ret;
+
+ ret = add_tracepoint_fields_types(cw, common_fields, class);
+ if (!ret)
+ ret = add_tracepoint_fields_types(cw, fields, class);
+
+ return ret;
+}
+
+static int add_bpf_output_types(struct ctf_writer *cw,
+ struct bt_ctf_event_class *class)
+{
+ struct bt_ctf_field_type *len_type = cw->data.u32;
+ struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+ struct bt_ctf_field_type *seq_type;
+ int ret;
+
+ ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+ if (ret)
+ return ret;
+
+ seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+ if (!seq_type)
+ return -1;
+
+ return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
+static int add_generic_types(struct ctf_writer *cw, struct evsel *evsel,
+ struct bt_ctf_event_class *event_class)
+{
+ u64 type = evsel->core.attr.sample_type;
+
+ /*
+ * missing:
+ * PERF_SAMPLE_TIME - not needed as we have it in
+ * ctf event header
+ * PERF_SAMPLE_READ - TODO
+ * PERF_SAMPLE_CALLCHAIN - TODO
+ * PERF_SAMPLE_RAW - tracepoint fields and BPF output
+ * are handled separately
+ * PERF_SAMPLE_BRANCH_STACK - TODO
+ * PERF_SAMPLE_REGS_USER - TODO
+ * PERF_SAMPLE_STACK_USER - TODO
+ */
+
+#define ADD_FIELD(cl, t, n) \
+ do { \
+ pr2(" field '%s'\n", n); \
+ if (bt_ctf_event_class_add_field(cl, t, n)) { \
+ pr_err("Failed to add field '%s';\n", n); \
+ return -1; \
+ } \
+ } while (0)
+
+ if (type & PERF_SAMPLE_IP)
+ ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip");
+
+ if (type & PERF_SAMPLE_TID) {
+ ADD_FIELD(event_class, cw->data.s32, "perf_tid");
+ ADD_FIELD(event_class, cw->data.s32, "perf_pid");
+ }
+
+ if ((type & PERF_SAMPLE_ID) ||
+ (type & PERF_SAMPLE_IDENTIFIER))
+ ADD_FIELD(event_class, cw->data.u64, "perf_id");
+
+ if (type & PERF_SAMPLE_STREAM_ID)
+ ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
+
+ if (type & PERF_SAMPLE_PERIOD)
+ ADD_FIELD(event_class, cw->data.u64, "perf_period");
+
+ if (type & PERF_SAMPLE_WEIGHT)
+ ADD_FIELD(event_class, cw->data.u64, "perf_weight");
+
+ if (type & PERF_SAMPLE_DATA_SRC)
+ ADD_FIELD(event_class, cw->data.u64, "perf_data_src");
+
+ if (type & PERF_SAMPLE_TRANSACTION)
+ ADD_FIELD(event_class, cw->data.u64, "perf_transaction");
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ ADD_FIELD(event_class, cw->data.u32, "perf_callchain_size");
+ ADD_FIELD(event_class,
+ bt_ctf_field_type_sequence_create(
+ cw->data.u64_hex, "perf_callchain_size"),
+ "perf_callchain");
+ }
+
+#undef ADD_FIELD
+ return 0;
+}
+
+static int add_event(struct ctf_writer *cw, struct evsel *evsel)
+{
+ struct bt_ctf_event_class *event_class;
+ struct evsel_priv *priv;
+ const char *name = evsel__name(evsel);
+ int ret;
+
+ pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type);
+
+ event_class = bt_ctf_event_class_create(name);
+ if (!event_class)
+ return -1;
+
+ ret = add_generic_types(cw, evsel, event_class);
+ if (ret)
+ goto err;
+
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+ ret = add_tracepoint_types(cw, evsel, event_class);
+ if (ret)
+ goto err;
+ }
+
+ if (evsel__is_bpf_output(evsel)) {
+ ret = add_bpf_output_types(cw, event_class);
+ if (ret)
+ goto err;
+ }
+
+ ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
+ if (ret) {
+ pr("Failed to add event class into stream.\n");
+ goto err;
+ }
+
+ priv = malloc(sizeof(*priv));
+ if (!priv)
+ goto err;
+
+ priv->event_class = event_class;
+ evsel->priv = priv;
+ return 0;
+
+err:
+ bt_ctf_event_class_put(event_class);
+ pr_err("Failed to add event '%s'.\n", name);
+ return -1;
+}
+
+static int setup_events(struct ctf_writer *cw, struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+ int ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ ret = add_event(cw, evsel);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+#define __NON_SAMPLE_ADD_FIELD(t, n) \
+ do { \
+ pr2(" field '%s'\n", #n); \
+ if (bt_ctf_event_class_add_field(event_class, cw->data.t, #n)) {\
+ pr_err("Failed to add field '%s';\n", #n);\
+ return -1; \
+ } \
+ } while(0)
+
+#define __FUNC_ADD_NON_SAMPLE_EVENT_CLASS(_name, body) \
+static int add_##_name##_event(struct ctf_writer *cw) \
+{ \
+ struct bt_ctf_event_class *event_class; \
+ int ret; \
+ \
+ pr("Adding "#_name" event\n"); \
+ event_class = bt_ctf_event_class_create("perf_" #_name);\
+ if (!event_class) \
+ return -1; \
+ body \
+ \
+ ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);\
+ if (ret) { \
+ pr("Failed to add event class '"#_name"' into stream.\n");\
+ return ret; \
+ } \
+ \
+ cw->_name##_class = event_class; \
+ bt_ctf_event_class_put(event_class); \
+ return 0; \
+}
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(comm,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(string, comm);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(fork,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, ppid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u32, ptid);
+ __NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(exit,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, ppid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u32, ptid);
+ __NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u64_hex, start);
+ __NON_SAMPLE_ADD_FIELD(string, filename);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap2,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u64_hex, start);
+ __NON_SAMPLE_ADD_FIELD(string, filename);
+)
+#undef __NON_SAMPLE_ADD_FIELD
+#undef __FUNC_ADD_NON_SAMPLE_EVENT_CLASS
+
+static int setup_non_sample_events(struct ctf_writer *cw,
+ struct perf_session *session __maybe_unused)
+{
+ int ret;
+
+ ret = add_comm_event(cw);
+ if (ret)
+ return ret;
+ ret = add_exit_event(cw);
+ if (ret)
+ return ret;
+ ret = add_fork_event(cw);
+ if (ret)
+ return ret;
+ ret = add_mmap_event(cw);
+ if (ret)
+ return ret;
+ ret = add_mmap2_event(cw);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+static void cleanup_events(struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct evsel_priv *priv;
+
+ priv = evsel->priv;
+ bt_ctf_event_class_put(priv->event_class);
+ zfree(&evsel->priv);
+ }
+
+ evlist__delete(evlist);
+ session->evlist = NULL;
+}
+
+static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
+{
+ struct ctf_stream **stream;
+ struct perf_header *ph = &session->header;
+ int ncpus;
+
+ /*
+ * Try to get the number of cpus used in the data file,
+ * if not present fallback to the MAX_CPUS.
+ */
+ ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+
+ stream = zalloc(sizeof(*stream) * ncpus);
+ if (!stream) {
+ pr_err("Failed to allocate streams.\n");
+ return -ENOMEM;
+ }
+
+ cw->stream = stream;
+ cw->stream_cnt = ncpus;
+ return 0;
+}
+
+static void free_streams(struct ctf_writer *cw)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < cw->stream_cnt; cpu++)
+ ctf_stream__delete(cw->stream[cpu]);
+
+ zfree(&cw->stream);
+}
+
+static int ctf_writer__setup_env(struct ctf_writer *cw,
+ struct perf_session *session)
+{
+ struct perf_header *header = &session->header;
+ struct bt_ctf_writer *writer = cw->writer;
+
+#define ADD(__n, __v) \
+do { \
+ if (bt_ctf_writer_add_environment_field(writer, __n, __v)) \
+ return -1; \
+} while (0)
+
+ ADD("host", header->env.hostname);
+ ADD("sysname", "Linux");
+ ADD("release", header->env.os_release);
+ ADD("version", header->env.version);
+ ADD("machine", header->env.arch);
+ ADD("domain", "kernel");
+ ADD("tracer_name", "perf");
+
+#undef ADD
+ return 0;
+}
+
+static int ctf_writer__setup_clock(struct ctf_writer *cw,
+ struct perf_session *session,
+ bool tod)
+{
+ struct bt_ctf_clock *clock = cw->clock;
+ const char *desc = "perf clock";
+ int64_t offset = 0;
+
+ if (tod) {
+ struct perf_env *env = &session->header.env;
+
+ if (!env->clock.enabled) {
+ pr_err("Can't provide --tod time, missing clock data. "
+ "Please record with -k/--clockid option.\n");
+ return -1;
+ }
+
+ desc = clockid_name(env->clock.clockid);
+ offset = env->clock.tod_ns - env->clock.clockid_ns;
+ }
+
+#define SET(__n, __v) \
+do { \
+ if (bt_ctf_clock_set_##__n(clock, __v)) \
+ return -1; \
+} while (0)
+
+ SET(frequency, 1000000000);
+ SET(offset, offset);
+ SET(description, desc);
+ SET(precision, 10);
+ SET(is_absolute, 0);
+
+#undef SET
+ return 0;
+}
+
+static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
+{
+ struct bt_ctf_field_type *type;
+
+ type = bt_ctf_field_type_integer_create(size);
+ if (!type)
+ return NULL;
+
+ if (sign &&
+ bt_ctf_field_type_integer_set_signed(type, 1))
+ goto err;
+
+ if (hex &&
+ bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
+ goto err;
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+ bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
+ pr2("Created type: INTEGER %d-bit %ssigned %s\n",
+ size, sign ? "un" : "", hex ? "hex" : "");
+ return type;
+
+err:
+ bt_ctf_field_type_put(type);
+ return NULL;
+}
+
+static void ctf_writer__cleanup_data(struct ctf_writer *cw)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(cw->data.array); i++)
+ bt_ctf_field_type_put(cw->data.array[i]);
+}
+
+static int ctf_writer__init_data(struct ctf_writer *cw)
+{
+#define CREATE_INT_TYPE(type, size, sign, hex) \
+do { \
+ (type) = create_int_type(size, sign, hex); \
+ if (!(type)) \
+ goto err; \
+} while (0)
+
+ CREATE_INT_TYPE(cw->data.s64, 64, true, false);
+ CREATE_INT_TYPE(cw->data.u64, 64, false, false);
+ CREATE_INT_TYPE(cw->data.s32, 32, true, false);
+ CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+ CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
+ CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);
+
+ cw->data.string = bt_ctf_field_type_string_create();
+ if (cw->data.string)
+ return 0;
+
+err:
+ ctf_writer__cleanup_data(cw);
+ pr_err("Failed to create data types.\n");
+ return -1;
+}
+
+static void ctf_writer__cleanup(struct ctf_writer *cw)
+{
+ ctf_writer__cleanup_data(cw);
+
+ bt_ctf_clock_put(cw->clock);
+ free_streams(cw);
+ bt_ctf_stream_class_put(cw->stream_class);
+ bt_ctf_writer_put(cw->writer);
+
+ /* and NULL all the pointers */
+ memset(cw, 0, sizeof(*cw));
+}
+
+static int ctf_writer__init(struct ctf_writer *cw, const char *path,
+ struct perf_session *session, bool tod)
+{
+ struct bt_ctf_writer *writer;
+ struct bt_ctf_stream_class *stream_class;
+ struct bt_ctf_clock *clock;
+ struct bt_ctf_field_type *pkt_ctx_type;
+ int ret;
+
+ /* CTF writer */
+ writer = bt_ctf_writer_create(path);
+ if (!writer)
+ goto err;
+
+ cw->writer = writer;
+
+ /* CTF clock */
+ clock = bt_ctf_clock_create("perf_clock");
+ if (!clock) {
+ pr("Failed to create CTF clock.\n");
+ goto err_cleanup;
+ }
+
+ cw->clock = clock;
+
+ if (ctf_writer__setup_clock(cw, session, tod)) {
+ pr("Failed to setup CTF clock.\n");
+ goto err_cleanup;
+ }
+
+ /* CTF stream class */
+ stream_class = bt_ctf_stream_class_create("perf_stream");
+ if (!stream_class) {
+ pr("Failed to create CTF stream class.\n");
+ goto err_cleanup;
+ }
+
+ cw->stream_class = stream_class;
+
+ /* CTF clock stream setup */
+ if (bt_ctf_stream_class_set_clock(stream_class, clock)) {
+ pr("Failed to assign CTF clock to stream class.\n");
+ goto err_cleanup;
+ }
+
+ if (ctf_writer__init_data(cw))
+ goto err_cleanup;
+
+ /* Add cpu_id for packet context */
+ pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
+ if (!pkt_ctx_type)
+ goto err_cleanup;
+
+ ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
+ bt_ctf_field_type_put(pkt_ctx_type);
+ if (ret)
+ goto err_cleanup;
+
+ /* CTF clock writer setup */
+ if (bt_ctf_writer_add_clock(writer, clock)) {
+ pr("Failed to assign CTF clock to writer.\n");
+ goto err_cleanup;
+ }
+
+ return 0;
+
+err_cleanup:
+ ctf_writer__cleanup(cw);
+err:
+ pr_err("Failed to setup CTF writer.\n");
+ return -1;
+}
+
+static int ctf_writer__flush_streams(struct ctf_writer *cw)
+{
+ int cpu, ret = 0;
+
+ for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
+ ret = ctf_stream__flush(cw->stream[cpu]);
+
+ return ret;
+}
+
+static int convert__config(const char *var, const char *value, void *cb)
+{
+ struct convert *c = cb;
+
+ if (!strcmp(var, "convert.queue-size"))
+ return perf_config_u64(&c->queue_size, var, value);
+
+ return 0;
+}
+
+int bt_convert__perf2ctf(const char *input, const char *path,
+ struct perf_data_convert_opts *opts)
+{
+ struct perf_session *session;
+ struct perf_data data = {
+ .path = input,
+ .mode = PERF_DATA_MODE_READ,
+ .force = opts->force,
+ };
+ struct convert c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
+ .namespaces = perf_event__process_namespaces,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ };
+ struct ctf_writer *cw = &c.writer;
+ int err;
+
+ if (opts->all) {
+ c.tool.comm = process_comm_event;
+ c.tool.exit = process_exit_event;
+ c.tool.fork = process_fork_event;
+ c.tool.mmap = process_mmap_event;
+ c.tool.mmap2 = process_mmap2_event;
+ }
+
+ err = perf_config(convert__config, &c);
+ if (err)
+ return err;
+
+ err = -1;
+ /* perf.data session */
+ session = perf_session__new(&data, &c.tool);
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ /* CTF writer */
+ if (ctf_writer__init(cw, path, session, opts->tod))
+ goto free_session;
+
+ if (c.queue_size) {
+ ordered_events__set_alloc_size(&session->ordered_events,
+ c.queue_size);
+ }
+
+ /* CTF writer env/clock setup */
+ if (ctf_writer__setup_env(cw, session))
+ goto free_writer;
+
+ /* CTF events setup */
+ if (setup_events(cw, session))
+ goto free_writer;
+
+ if (opts->all && setup_non_sample_events(cw, session))
+ goto free_writer;
+
+ if (setup_streams(cw, session))
+ goto free_writer;
+
+ err = perf_session__process_events(session);
+ if (!err)
+ err = ctf_writer__flush_streams(cw);
+ else
+ pr_err("Error during conversion.\n");
+
+ fprintf(stderr,
+ "[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
+ data.path, path);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
+ (double) c.events_size / 1024.0 / 1024.0,
+ c.events_count);
+
+ if (!c.non_sample_count)
+ fprintf(stderr, ") ]\n");
+ else
+ fprintf(stderr, ", %" PRIu64 " non-samples) ]\n", c.non_sample_count);
+
+ cleanup_events(session);
+ perf_session__delete(session);
+ ctf_writer__cleanup(cw);
+
+ return err;
+
+free_writer:
+ ctf_writer__cleanup(cw);
+free_session:
+ perf_session__delete(session);
+ pr_err("Error during conversion setup.\n");
+ return err;
+}
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
new file mode 100644
index 000000000..5bb3c2ba9
--- /dev/null
+++ b/tools/perf/util/data-convert-json.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * JSON export.
+ *
+ * Copyright (C) 2021, CodeWeavers Inc. <nfraser@codeweavers.com>
+ */
+
+#include "data-convert.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "linux/compiler.h"
+#include "linux/err.h"
+#include "util/auxtrace.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/header.h"
+#include "util/map.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/tool.h"
+
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+struct convert_json {
+ struct perf_tool tool;
+ FILE *out;
+ bool first;
+ u64 events_count;
+};
+
+// Outputs a JSON-encoded string surrounded by quotes with characters escaped.
+static void output_json_string(FILE *out, const char *s)
+{
+ fputc('"', out);
+ while (*s) {
+ switch (*s) {
+
+ // required escapes with special forms as per RFC 8259
+ case '"': fputs("\\\"", out); break;
+ case '\\': fputs("\\\\", out); break;
+ case '\b': fputs("\\b", out); break;
+ case '\f': fputs("\\f", out); break;
+ case '\n': fputs("\\n", out); break;
+ case '\r': fputs("\\r", out); break;
+ case '\t': fputs("\\t", out); break;
+
+ default:
+ // all other control characters must be escaped by hex code
+ if (*s <= 0x1f)
+ fprintf(out, "\\u%04x", *s);
+ else
+ fputc(*s, out);
+ break;
+ }
+
+ ++s;
+ }
+ fputc('"', out);
+}
+
+// Outputs an optional comma, newline and indentation to delimit a new value
+// from the previous one in a JSON object or array.
+static void output_json_delimiters(FILE *out, bool comma, int depth)
+{
+ int i;
+
+ if (comma)
+ fputc(',', out);
+ fputc('\n', out);
+ for (i = 0; i < depth; ++i)
+ fputc('\t', out);
+}
+
+// Outputs a printf format string (with delimiter) as a JSON value.
+__printf(4, 5)
+static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
+{
+ va_list args;
+
+ output_json_delimiters(out, comma, depth);
+ va_start(args, format);
+ vfprintf(out, format, args);
+ va_end(args);
+}
+
+// Outputs a JSON key-value pair where the value is a string.
+static void output_json_key_string(FILE *out, bool comma, int depth,
+ const char *key, const char *value)
+{
+ output_json_delimiters(out, comma, depth);
+ output_json_string(out, key);
+ fputs(": ", out);
+ output_json_string(out, value);
+}
+
+// Outputs a JSON key-value pair where the value is a printf format string.
+__printf(5, 6)
+static void output_json_key_format(FILE *out, bool comma, int depth,
+ const char *key, const char *format, ...)
+{
+ va_list args;
+
+ output_json_delimiters(out, comma, depth);
+ output_json_string(out, key);
+ fputs(": ", out);
+ va_start(args, format);
+ vfprintf(out, format, args);
+ va_end(args);
+}
+
+static void output_sample_callchain_entry(struct perf_tool *tool,
+ u64 ip, struct addr_location *al)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+
+ output_json_format(out, false, 4, "{");
+ output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
+
+ if (al && al->sym && al->sym->namelen) {
+ struct dso *dso = al->map ? map__dso(al->map) : NULL;
+
+ fputc(',', out);
+ output_json_key_string(out, false, 5, "symbol", al->sym->name);
+
+ if (dso) {
+ const char *dso_name = dso->short_name;
+
+ if (dso_name && strlen(dso_name) > 0) {
+ fputc(',', out);
+ output_json_key_string(out, false, 5, "dso", dso_name);
+ }
+ }
+ }
+
+ output_json_format(out, false, 4, "}");
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct convert_json *c = container_of(tool, struct convert_json, tool);
+ FILE *out = c->out;
+ struct addr_location al;
+ u64 sample_type = __evlist__combined_sample_type(evsel->evlist);
+ u8 cpumode = PERF_RECORD_MISC_USER;
+
+ addr_location__init(&al);
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("Sample resolution failed!\n");
+ addr_location__exit(&al);
+ return -1;
+ }
+
+ ++c->events_count;
+
+ if (c->first)
+ c->first = false;
+ else
+ fputc(',', out);
+ output_json_format(out, false, 2, "{");
+
+ output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
+ output_json_key_format(out, true, 3, "pid", "%i", thread__pid(al.thread));
+ output_json_key_format(out, true, 3, "tid", "%i", thread__tid(al.thread));
+
+ if ((sample_type & PERF_SAMPLE_CPU))
+ output_json_key_format(out, true, 3, "cpu", "%i", sample->cpu);
+ else if (thread__cpu(al.thread) >= 0)
+ output_json_key_format(out, true, 3, "cpu", "%i", thread__cpu(al.thread));
+
+ output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
+
+ output_json_key_format(out, true, 3, "callchain", "[");
+ if (sample->callchain) {
+ unsigned int i;
+ bool ok;
+ bool first_callchain = true;
+
+ for (i = 0; i < sample->callchain->nr; ++i) {
+ u64 ip = sample->callchain->ips[i];
+ struct addr_location tal;
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ switch (ip) {
+ case PERF_CONTEXT_HV:
+ cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: %"
+ PRId64 "\n", (s64) ip);
+ break;
+ }
+ continue;
+ }
+
+ if (first_callchain)
+ first_callchain = false;
+ else
+ fputc(',', out);
+
+ addr_location__init(&tal);
+ ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
+ output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
+ addr_location__exit(&tal);
+ }
+ } else {
+ output_sample_callchain_entry(tool, sample->ip, &al);
+ }
+ output_json_format(out, false, 3, "]");
+
+#ifdef HAVE_LIBTRACEEVENT
+ if (sample->raw_data) {
+ int i;
+ struct tep_format_field **fields;
+
+ fields = tep_event_fields(evsel->tp_format);
+ if (fields) {
+ i = 0;
+ while (fields[i]) {
+ struct trace_seq s;
+
+ trace_seq_init(&s);
+ tep_print_field(&s, sample->raw_data, fields[i]);
+ output_json_key_string(out, true, 3, fields[i]->name, s.buffer);
+
+ i++;
+ }
+ free(fields);
+ }
+ }
+#endif
+ output_json_format(out, false, 2, "}");
+ addr_location__exit(&al);
+ return 0;
+}
+
+static void output_headers(struct perf_session *session, struct convert_json *c)
+{
+ struct stat st;
+ struct perf_header *header = &session->header;
+ int ret;
+ int fd = perf_data__fd(session->data);
+ int i;
+ FILE *out = c->out;
+
+ output_json_key_format(out, false, 2, "header-version", "%u", header->version);
+
+ ret = fstat(fd, &st);
+ if (ret >= 0) {
+ time_t stctime = st.st_mtime;
+ char buf[256];
+
+ strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
+ output_json_key_string(out, true, 2, "captured-on", buf);
+ } else {
+ pr_debug("Failed to get mtime of source file, not writing captured-on");
+ }
+
+ output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
+ output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
+ output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
+
+ output_json_key_string(out, true, 2, "hostname", header->env.hostname);
+ output_json_key_string(out, true, 2, "os-release", header->env.os_release);
+ output_json_key_string(out, true, 2, "arch", header->env.arch);
+
+ output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+ output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
+ output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
+ output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
+
+ if (header->env.clock.enabled) {
+ output_json_key_format(out, true, 2, "clockid",
+ "%u", header->env.clock.clockid);
+ output_json_key_format(out, true, 2, "clock-time",
+ "%" PRIu64, header->env.clock.clockid_ns);
+ output_json_key_format(out, true, 2, "real-time",
+ "%" PRIu64, header->env.clock.tod_ns);
+ }
+
+ output_json_key_string(out, true, 2, "perf-version", header->env.version);
+
+ output_json_key_format(out, true, 2, "cmdline", "[");
+ for (i = 0; i < header->env.nr_cmdline; i++) {
+ output_json_delimiters(out, i != 0, 3);
+ output_json_string(c->out, header->env.cmdline_argv[i]);
+ }
+ output_json_format(out, false, 2, "]");
+}
+
+int bt_convert__perf2json(const char *input_name, const char *output_name,
+ struct perf_data_convert_opts *opts __maybe_unused)
+{
+ struct perf_session *session;
+ int fd;
+ int ret = -1;
+
+ struct convert_json c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+#ifdef HAVE_LIBTRACEEVENT
+ .tracing_data = perf_event__process_tracing_data,
+#endif
+ .build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
+ .event_update = perf_event__process_event_update,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ .first = true,
+ .events_count = 0,
+ };
+
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ .path = input_name,
+ .force = opts->force,
+ };
+
+ if (opts->all) {
+ pr_err("--all is currently unsupported for JSON output.\n");
+ goto err;
+ }
+ if (opts->tod) {
+ pr_err("--tod is currently unsupported for JSON output.\n");
+ goto err;
+ }
+
+ fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
+ if (fd == -1) {
+ if (errno == EEXIST)
+ pr_err("Output file exists. Use --force to overwrite it.\n");
+ else
+ pr_err("Error opening output file!\n");
+ goto err;
+ }
+
+ c.out = fdopen(fd, "w");
+ if (!c.out) {
+ fprintf(stderr, "Error opening output file!\n");
+ close(fd);
+ goto err;
+ }
+
+ session = perf_session__new(&data, &c.tool);
+ if (IS_ERR(session)) {
+ fprintf(stderr, "Error creating perf session!\n");
+ goto err_fclose;
+ }
+
+ if (symbol__init(&session->header.env) < 0) {
+ fprintf(stderr, "Symbol init error!\n");
+ goto err_session_delete;
+ }
+
+ // The opening brace is printed manually because it isn't delimited from a
+ // previous value (i.e. we don't want a leading newline)
+ fputc('{', c.out);
+
+ // Version number for future-proofing. Most additions should be able to be
+ // done in a backwards-compatible way so this should only need to be bumped
+ // if some major breaking change must be made.
+ output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
+
+ // Output headers
+ output_json_format(c.out, true, 1, "\"headers\": {");
+ output_headers(session, &c);
+ output_json_format(c.out, false, 1, "}");
+
+ // Output samples
+ output_json_format(c.out, true, 1, "\"samples\": [");
+ perf_session__process_events(session);
+ output_json_format(c.out, false, 1, "]");
+ output_json_format(c.out, false, 0, "}");
+ fputc('\n', c.out);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
+ data.path, output_name);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
+ (ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
+
+ ret = 0;
+err_session_delete:
+ perf_session__delete(session);
+err_fclose:
+ fclose(c.out);
+err:
+ return ret;
+}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
new file mode 100644
index 000000000..1b4c5f598
--- /dev/null
+++ b/tools/perf/util/data-convert.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DATA_CONVERT_H
+#define __DATA_CONVERT_H
+
+#include <stdbool.h>
+
+struct perf_data_convert_opts {
+ bool force;
+ bool all;
+ bool tod;
+};
+
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+
+int bt_convert__perf2json(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+
+#endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
new file mode 100644
index 000000000..fc16299c9
--- /dev/null
+++ b/tools/perf/util/data.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <asm/bug.h>
+#include <dirent.h>
+
+#include "data.h"
+#include "util.h" // rm_rf_perf_data()
+#include "debug.h"
+#include "header.h"
+#include <internal/lib.h>
+
+static void close_dir(struct perf_data_file *files, int nr)
+{
+ while (--nr >= 0) {
+ close(files[nr].fd);
+ zfree(&files[nr].path);
+ }
+ free(files);
+}
+
+void perf_data__close_dir(struct perf_data *data)
+{
+ close_dir(data->dir.files, data->dir.nr);
+}
+
+int perf_data__create_dir(struct perf_data *data, int nr)
+{
+ struct perf_data_file *files = NULL;
+ int i, ret;
+
+ if (WARN_ON(!data->is_dir))
+ return -EINVAL;
+
+ files = zalloc(nr * sizeof(*files));
+ if (!files)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ struct perf_data_file *file = &files[i];
+
+ ret = asprintf(&file->path, "%s/data.%d", data->path, i);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
+ if (ret < 0) {
+ ret = -errno;
+ goto out_err;
+ }
+
+ file->fd = ret;
+ }
+
+ data->dir.version = PERF_DIR_VERSION;
+ data->dir.files = files;
+ data->dir.nr = nr;
+ return 0;
+
+out_err:
+ close_dir(files, i);
+ return ret;
+}
+
+int perf_data__open_dir(struct perf_data *data)
+{
+ struct perf_data_file *files = NULL;
+ struct dirent *dent;
+ int ret = -1;
+ DIR *dir;
+ int nr = 0;
+
+ /*
+ * Directory containing a single regular perf data file which is already
+ * open, means there is nothing more to do here.
+ */
+ if (perf_data__is_single_file(data))
+ return 0;
+
+ if (WARN_ON(!data->is_dir))
+ return -EINVAL;
+
+ /* The version is provided by DIR_FORMAT feature. */
+ if (WARN_ON(data->dir.version != PERF_DIR_VERSION))
+ return -1;
+
+ dir = opendir(data->path);
+ if (!dir)
+ return -EINVAL;
+
+ while ((dent = readdir(dir)) != NULL) {
+ struct perf_data_file *file;
+ char path[PATH_MAX];
+ struct stat st;
+
+ snprintf(path, sizeof(path), "%s/%s", data->path, dent->d_name);
+ if (stat(path, &st))
+ continue;
+
+ if (!S_ISREG(st.st_mode) || strncmp(dent->d_name, "data.", 5))
+ continue;
+
+ ret = -ENOMEM;
+
+ file = realloc(files, (nr + 1) * sizeof(*files));
+ if (!file)
+ goto out_err;
+
+ files = file;
+ file = &files[nr++];
+
+ file->path = strdup(path);
+ if (!file->path)
+ goto out_err;
+
+ ret = open(file->path, O_RDONLY);
+ if (ret < 0)
+ goto out_err;
+
+ file->fd = ret;
+ file->size = st.st_size;
+ }
+
+ closedir(dir);
+ if (!files)
+ return -EINVAL;
+
+ data->dir.files = files;
+ data->dir.nr = nr;
+ return 0;
+
+out_err:
+ closedir(dir);
+ close_dir(files, nr);
+ return ret;
+}
+
+int perf_data__update_dir(struct perf_data *data)
+{
+ int i;
+
+ if (WARN_ON(!data->is_dir))
+ return -EINVAL;
+
+ for (i = 0; i < data->dir.nr; i++) {
+ struct perf_data_file *file = &data->dir.files[i];
+ struct stat st;
+
+ if (fstat(file->fd, &st))
+ return -1;
+
+ file->size = st.st_size;
+ }
+
+ return 0;
+}
+
+static bool check_pipe(struct perf_data *data)
+{
+ struct stat st;
+ bool is_pipe = false;
+ int fd = perf_data__is_read(data) ?
+ STDIN_FILENO : STDOUT_FILENO;
+
+ if (!data->path) {
+ if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
+ is_pipe = true;
+ } else {
+ if (!strcmp(data->path, "-"))
+ is_pipe = true;
+ }
+
+ if (is_pipe) {
+ if (data->use_stdio) {
+ const char *mode;
+
+ mode = perf_data__is_read(data) ? "r" : "w";
+ data->file.fptr = fdopen(fd, mode);
+
+ if (data->file.fptr == NULL) {
+ data->file.fd = fd;
+ data->use_stdio = false;
+ }
+ } else {
+ data->file.fd = fd;
+ }
+ }
+
+ return data->is_pipe = is_pipe;
+}
+
+static int check_backup(struct perf_data *data)
+{
+ struct stat st;
+
+ if (perf_data__is_read(data))
+ return 0;
+
+ if (!stat(data->path, &st) && st.st_size) {
+ char oldname[PATH_MAX];
+ int ret;
+
+ snprintf(oldname, sizeof(oldname), "%s.old",
+ data->path);
+
+ ret = rm_rf_perf_data(oldname);
+ if (ret) {
+ pr_err("Can't remove old data: %s (%s)\n",
+ ret == -2 ?
+ "Unknown file found" : strerror(errno),
+ oldname);
+ return -1;
+ }
+
+ if (rename(data->path, oldname)) {
+ pr_err("Can't move data: %s (%s to %s)\n",
+ strerror(errno),
+ data->path, oldname);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static bool is_dir(struct perf_data *data)
+{
+ struct stat st;
+
+ if (stat(data->path, &st))
+ return false;
+
+ return (st.st_mode & S_IFMT) == S_IFDIR;
+}
+
+static int open_file_read(struct perf_data *data)
+{
+ int flags = data->in_place_update ? O_RDWR : O_RDONLY;
+ struct stat st;
+ int fd;
+ char sbuf[STRERR_BUFSIZE];
+
+ fd = open(data->file.path, flags);
+ if (fd < 0) {
+ int err = errno;
+
+ pr_err("failed to open %s: %s", data->file.path,
+ str_error_r(err, sbuf, sizeof(sbuf)));
+ if (err == ENOENT && !strcmp(data->file.path, "perf.data"))
+ pr_err(" (try 'perf record' first)");
+ pr_err("\n");
+ return -err;
+ }
+
+ if (fstat(fd, &st) < 0)
+ goto out_close;
+
+ if (!data->force && st.st_uid && (st.st_uid != geteuid())) {
+ pr_err("File %s not owned by current user or root (use -f to override)\n",
+ data->file.path);
+ goto out_close;
+ }
+
+ if (!st.st_size) {
+ pr_info("zero-sized data (%s), nothing to do!\n",
+ data->file.path);
+ goto out_close;
+ }
+
+ data->file.size = st.st_size;
+ return fd;
+
+ out_close:
+ close(fd);
+ return -1;
+}
+
+static int open_file_write(struct perf_data *data)
+{
+ int fd;
+ char sbuf[STRERR_BUFSIZE];
+
+ fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
+ S_IRUSR|S_IWUSR);
+
+ if (fd < 0)
+ pr_err("failed to open %s : %s\n", data->file.path,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+
+ return fd;
+}
+
+static int open_file(struct perf_data *data)
+{
+ int fd;
+
+ fd = perf_data__is_read(data) ?
+ open_file_read(data) : open_file_write(data);
+
+ if (fd < 0) {
+ zfree(&data->file.path);
+ return -1;
+ }
+
+ data->file.fd = fd;
+ return 0;
+}
+
+static int open_file_dup(struct perf_data *data)
+{
+ data->file.path = strdup(data->path);
+ if (!data->file.path)
+ return -ENOMEM;
+
+ return open_file(data);
+}
+
+static int open_dir(struct perf_data *data)
+{
+ int ret;
+
+ /*
+ * So far we open only the header, so we can read the data version and
+ * layout.
+ */
+ if (asprintf(&data->file.path, "%s/data", data->path) < 0)
+ return -1;
+
+ if (perf_data__is_write(data) &&
+ mkdir(data->path, S_IRWXU) < 0)
+ return -1;
+
+ ret = open_file(data);
+
+ /* Cleanup whatever we managed to create so far. */
+ if (ret && perf_data__is_write(data))
+ rm_rf_perf_data(data->path);
+
+ return ret;
+}
+
+int perf_data__open(struct perf_data *data)
+{
+ if (check_pipe(data))
+ return 0;
+
+ /* currently it allows stdio for pipe only */
+ data->use_stdio = false;
+
+ if (!data->path)
+ data->path = "perf.data";
+
+ if (check_backup(data))
+ return -1;
+
+ if (perf_data__is_read(data))
+ data->is_dir = is_dir(data);
+
+ return perf_data__is_dir(data) ?
+ open_dir(data) : open_file_dup(data);
+}
+
+void perf_data__close(struct perf_data *data)
+{
+ if (perf_data__is_dir(data))
+ perf_data__close_dir(data);
+
+ zfree(&data->file.path);
+
+ if (data->use_stdio)
+ fclose(data->file.fptr);
+ else
+ close(data->file.fd);
+}
+
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
+{
+ if (data->use_stdio) {
+ if (fread(buf, size, 1, data->file.fptr) == 1)
+ return size;
+ return feof(data->file.fptr) ? 0 : -1;
+ }
+ return readn(data->file.fd, buf, size);
+}
+
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size)
+{
+ return writen(file->fd, buf, size);
+}
+
+ssize_t perf_data__write(struct perf_data *data,
+ void *buf, size_t size)
+{
+ if (data->use_stdio) {
+ if (fwrite(buf, size, 1, data->file.fptr) == 1)
+ return size;
+ return -1;
+ }
+ return perf_data_file__write(&data->file, buf, size);
+}
+
+int perf_data__switch(struct perf_data *data,
+ const char *postfix,
+ size_t pos, bool at_exit,
+ char **new_filepath)
+{
+ int ret;
+
+ if (check_pipe(data))
+ return -EINVAL;
+ if (perf_data__is_read(data))
+ return -EINVAL;
+
+ if (asprintf(new_filepath, "%s.%s", data->path, postfix) < 0)
+ return -ENOMEM;
+
+ /*
+ * Only fire a warning, don't return error, continue fill
+ * original file.
+ */
+ if (rename(data->path, *new_filepath))
+ pr_warning("Failed to rename %s to %s\n", data->path, *new_filepath);
+
+ if (!at_exit) {
+ close(data->file.fd);
+ ret = perf_data__open(data);
+ if (ret < 0)
+ goto out;
+
+ if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) {
+ ret = -errno;
+ pr_debug("Failed to lseek to %zu: %s",
+ pos, strerror(errno));
+ goto out;
+ }
+ }
+ ret = data->file.fd;
+out:
+ return ret;
+}
+
+unsigned long perf_data__size(struct perf_data *data)
+{
+ u64 size = data->file.size;
+ int i;
+
+ if (perf_data__is_single_file(data))
+ return size;
+
+ for (i = 0; i < data->dir.nr; i++) {
+ struct perf_data_file *file = &data->dir.files[i];
+
+ size += file->size;
+ }
+
+ return size;
+}
+
+int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz)
+{
+ int ret;
+
+ if (!data->is_dir)
+ return -1;
+
+ ret = snprintf(buf, buf_sz, "%s/kcore_dir", data->path);
+ if (ret < 0 || (size_t)ret >= buf_sz)
+ return -1;
+
+ return mkdir(buf, S_IRWXU);
+}
+
+bool has_kcore_dir(const char *path)
+{
+ struct dirent *d = ERR_PTR(-EINVAL);
+ const char *name = "kcore_dir";
+ DIR *dir = opendir(path);
+ size_t n = strlen(name);
+ bool result = false;
+
+ if (dir) {
+ while (d && !result) {
+ d = readdir(dir);
+ result = d ? strncmp(d->d_name, name, n) : false;
+ }
+ closedir(dir);
+ }
+
+ return result;
+}
+
+char *perf_data__kallsyms_name(struct perf_data *data)
+{
+ char *kallsyms_name;
+ struct stat st;
+
+ if (!data->is_dir)
+ return NULL;
+
+ if (asprintf(&kallsyms_name, "%s/kcore_dir/kallsyms", data->path) < 0)
+ return NULL;
+
+ if (stat(kallsyms_name, &st)) {
+ free(kallsyms_name);
+ return NULL;
+ }
+
+ return kallsyms_name;
+}
+
+char *perf_data__guest_kallsyms_name(struct perf_data *data, pid_t machine_pid)
+{
+ char *kallsyms_name;
+ struct stat st;
+
+ if (!data->is_dir)
+ return NULL;
+
+ if (asprintf(&kallsyms_name, "%s/kcore_dir__%d/kallsyms", data->path, machine_pid) < 0)
+ return NULL;
+
+ if (stat(kallsyms_name, &st)) {
+ free(kallsyms_name);
+ return NULL;
+ }
+
+ return kallsyms_name;
+}
+
+bool is_perf_data(const char *path)
+{
+ bool ret = false;
+ FILE *file;
+ u64 magic;
+
+ file = fopen(path, "r");
+ if (!file)
+ return false;
+
+ if (fread(&magic, 1, 8, file) < 8)
+ goto out;
+
+ ret = is_perf_magic(magic);
+out:
+ fclose(file);
+ return ret;
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
new file mode 100644
index 000000000..effcc195d
--- /dev/null
+++ b/tools/perf/util/data.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DATA_H
+#define __PERF_DATA_H
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <linux/types.h>
+
+enum perf_data_mode {
+ PERF_DATA_MODE_WRITE,
+ PERF_DATA_MODE_READ,
+};
+
+enum perf_dir_version {
+ PERF_DIR_SINGLE_FILE = 0,
+ PERF_DIR_VERSION = 1,
+};
+
+struct perf_data_file {
+ char *path;
+ union {
+ int fd;
+ FILE *fptr;
+ };
+ unsigned long size;
+};
+
+struct perf_data {
+ const char *path;
+ struct perf_data_file file;
+ bool is_pipe;
+ bool is_dir;
+ bool force;
+ bool use_stdio;
+ bool in_place_update;
+ enum perf_data_mode mode;
+
+ struct {
+ u64 version;
+ struct perf_data_file *files;
+ int nr;
+ } dir;
+};
+
+static inline bool perf_data__is_read(struct perf_data *data)
+{
+ return data->mode == PERF_DATA_MODE_READ;
+}
+
+static inline bool perf_data__is_write(struct perf_data *data)
+{
+ return data->mode == PERF_DATA_MODE_WRITE;
+}
+
+static inline int perf_data__is_pipe(struct perf_data *data)
+{
+ return data->is_pipe;
+}
+
+static inline bool perf_data__is_dir(struct perf_data *data)
+{
+ return data->is_dir;
+}
+
+static inline bool perf_data__is_single_file(struct perf_data *data)
+{
+ return data->dir.version == PERF_DIR_SINGLE_FILE;
+}
+
+static inline int perf_data__fd(struct perf_data *data)
+{
+ if (data->use_stdio)
+ return fileno(data->file.fptr);
+
+ return data->file.fd;
+}
+
+int perf_data__open(struct perf_data *data);
+void perf_data__close(struct perf_data *data);
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
+ssize_t perf_data__write(struct perf_data *data,
+ void *buf, size_t size);
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size);
+/*
+ * If at_exit is set, only rename current perf.data to
+ * perf.data.<postfix>, continue write on original data.
+ * Set at_exit when flushing the last output.
+ *
+ * Return value is fd of new output.
+ */
+int perf_data__switch(struct perf_data *data,
+ const char *postfix,
+ size_t pos, bool at_exit, char **new_filepath);
+
+int perf_data__create_dir(struct perf_data *data, int nr);
+int perf_data__open_dir(struct perf_data *data);
+void perf_data__close_dir(struct perf_data *data);
+int perf_data__update_dir(struct perf_data *data);
+unsigned long perf_data__size(struct perf_data *data);
+int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
+bool has_kcore_dir(const char *path);
+char *perf_data__kallsyms_name(struct perf_data *data);
+char *perf_data__guest_kallsyms_name(struct perf_data *data, pid_t machine_pid);
+bool is_perf_data(const char *path);
+#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
new file mode 100644
index 000000000..106429155
--- /dev/null
+++ b/tools/perf/util/db-export.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * db-export.c: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "dso.h"
+#include "evsel.h"
+#include "machine.h"
+#include "thread.h"
+#include "comm.h"
+#include "symbol.h"
+#include "map.h"
+#include "event.h"
+#include "thread-stack.h"
+#include "callchain.h"
+#include "call-path.h"
+#include "db-export.h"
+#include <linux/zalloc.h>
+
+int db_export__init(struct db_export *dbe)
+{
+ memset(dbe, 0, sizeof(struct db_export));
+ return 0;
+}
+
+void db_export__exit(struct db_export *dbe)
+{
+ call_return_processor__free(dbe->crp);
+ dbe->crp = NULL;
+}
+
+int db_export__evsel(struct db_export *dbe, struct evsel *evsel)
+{
+ if (evsel->db_id)
+ return 0;
+
+ evsel->db_id = ++dbe->evsel_last_db_id;
+
+ if (dbe->export_evsel)
+ return dbe->export_evsel(dbe, evsel);
+
+ return 0;
+}
+
+int db_export__machine(struct db_export *dbe, struct machine *machine)
+{
+ if (machine->db_id)
+ return 0;
+
+ machine->db_id = ++dbe->machine_last_db_id;
+
+ if (dbe->export_machine)
+ return dbe->export_machine(dbe, machine);
+
+ return 0;
+}
+
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+ struct machine *machine, struct thread *main_thread)
+{
+ u64 main_thread_db_id = 0;
+
+ if (thread__db_id(thread))
+ return 0;
+
+ thread__set_db_id(thread, ++dbe->thread_last_db_id);
+
+ if (main_thread)
+ main_thread_db_id = thread__db_id(main_thread);
+
+ if (dbe->export_thread)
+ return dbe->export_thread(dbe, thread, main_thread_db_id,
+ machine);
+
+ return 0;
+}
+
+static int __db_export__comm(struct db_export *dbe, struct comm *comm,
+ struct thread *thread)
+{
+ comm->db_id = ++dbe->comm_last_db_id;
+
+ if (dbe->export_comm)
+ return dbe->export_comm(dbe, comm, thread);
+
+ return 0;
+}
+
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+ struct thread *thread)
+{
+ if (comm->db_id)
+ return 0;
+
+ return __db_export__comm(dbe, comm, thread);
+}
+
+/*
+ * Export the "exec" comm. The "exec" comm is the program / application command
+ * name at the time it first executes. It is used to group threads for the same
+ * program. Note that the main thread pid (or thread group id tgid) cannot be
+ * used because it does not change when a new program is exec'ed.
+ */
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+ struct thread *main_thread)
+{
+ int err;
+
+ if (comm->db_id)
+ return 0;
+
+ err = __db_export__comm(dbe, comm, main_thread);
+ if (err)
+ return err;
+
+ /*
+ * Record the main thread for this comm. Note that the main thread can
+ * have many "exec" comms because there will be a new one every time it
+ * exec's. An "exec" comm however will only ever have 1 main thread.
+ * That is different to any other threads for that same program because
+ * exec() will effectively kill them, so the relationship between the
+ * "exec" comm and non-main threads is 1-to-1. That is why
+ * db_export__comm_thread() is called here for the main thread, but it
+ * is called for non-main threads when they are exported.
+ */
+ return db_export__comm_thread(dbe, comm, main_thread);
+}
+
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+ struct thread *thread)
+{
+ u64 db_id;
+
+ db_id = ++dbe->comm_thread_last_db_id;
+
+ if (dbe->export_comm_thread)
+ return dbe->export_comm_thread(dbe, db_id, comm, thread);
+
+ return 0;
+}
+
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+ struct machine *machine)
+{
+ if (dso->db_id)
+ return 0;
+
+ dso->db_id = ++dbe->dso_last_db_id;
+
+ if (dbe->export_dso)
+ return dbe->export_dso(dbe, dso, machine);
+
+ return 0;
+}
+
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso)
+{
+ u64 *sym_db_id = symbol__priv(sym);
+
+ if (*sym_db_id)
+ return 0;
+
+ *sym_db_id = ++dbe->symbol_last_db_id;
+
+ if (dbe->export_symbol)
+ return dbe->export_symbol(dbe, sym, dso);
+
+ return 0;
+}
+
+static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
+ u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
+{
+ int err;
+
+ if (al->map) {
+ struct dso *dso = map__dso(al->map);
+
+ err = db_export__dso(dbe, dso, maps__machine(al->maps));
+ if (err)
+ return err;
+ *dso_db_id = dso->db_id;
+
+ if (!al->sym) {
+ al->sym = symbol__new(al->addr, 0, 0, 0, "unknown");
+ if (al->sym)
+ dso__insert_symbol(dso, al->sym);
+ }
+
+ if (al->sym) {
+ u64 *db_id = symbol__priv(al->sym);
+
+ err = db_export__symbol(dbe, al->sym, dso);
+ if (err)
+ return err;
+ *sym_db_id = *db_id;
+ *offset = al->addr - al->sym->start;
+ }
+ }
+
+ return 0;
+}
+
+static struct call_path *call_path_from_sample(struct db_export *dbe,
+ struct machine *machine,
+ struct thread *thread,
+ struct perf_sample *sample,
+ struct evsel *evsel)
+{
+ u64 kernel_start = machine__kernel_start(machine);
+ struct call_path *current = &dbe->cpr->call_path;
+ enum chain_order saved_order = callchain_param.order;
+ struct callchain_cursor *cursor;
+ int err;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ return NULL;
+
+ /*
+ * Since the call path tree must be built starting with the root, we
+ * must use ORDER_CALL for call chain resolution, in order to process
+ * the callchain starting with the root node and ending with the leaf.
+ */
+ callchain_param.order = ORDER_CALLER;
+ cursor = get_tls_callchain_cursor();
+ err = thread__resolve_callchain(thread, cursor, evsel,
+ sample, NULL, NULL, PERF_MAX_STACK_DEPTH);
+ if (err) {
+ callchain_param.order = saved_order;
+ return NULL;
+ }
+ callchain_cursor_commit(cursor);
+
+ while (1) {
+ struct callchain_cursor_node *node;
+ struct addr_location al;
+ u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
+
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ /*
+ * Handle export of symbol and dso for this node by
+ * constructing an addr_location struct and then passing it to
+ * db_ids_from_al() to perform the export.
+ */
+ addr_location__init(&al);
+ al.sym = node->ms.sym;
+ al.map = map__get(node->ms.map);
+ al.maps = maps__get(thread__maps(thread));
+ al.addr = node->ip;
+
+ if (al.map && !al.sym)
+ al.sym = dso__find_symbol(map__dso(al.map), al.addr);
+
+ db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
+
+ /* add node to the call path tree if it doesn't exist */
+ current = call_path__findnew(dbe->cpr, current,
+ al.sym, node->ip,
+ kernel_start);
+
+ callchain_cursor_advance(cursor);
+ addr_location__exit(&al);
+ }
+
+ /* Reset the callchain order to its prior value. */
+ callchain_param.order = saved_order;
+
+ if (current == &dbe->cpr->call_path) {
+ /* Bail because the callchain was empty. */
+ return NULL;
+ }
+
+ return current;
+}
+
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+ const char *name)
+{
+ if (dbe->export_branch_type)
+ return dbe->export_branch_type(dbe, branch_type, name);
+
+ return 0;
+}
+
+static int db_export__threads(struct db_export *dbe, struct thread *thread,
+ struct thread *main_thread,
+ struct machine *machine, struct comm **comm_ptr)
+{
+ struct comm *comm = NULL;
+ struct comm *curr_comm;
+ int err;
+
+ if (main_thread) {
+ /*
+ * A thread has a reference to the main thread, so export the
+ * main thread first.
+ */
+ err = db_export__thread(dbe, main_thread, machine, main_thread);
+ if (err)
+ return err;
+ /*
+ * Export comm before exporting the non-main thread because
+ * db_export__comm_thread() can be called further below.
+ */
+ comm = machine__thread_exec_comm(machine, main_thread);
+ if (comm) {
+ err = db_export__exec_comm(dbe, comm, main_thread);
+ if (err)
+ return err;
+ *comm_ptr = comm;
+ }
+ }
+
+ if (thread != main_thread) {
+ /*
+ * For a non-main thread, db_export__comm_thread() must be
+ * called only if thread has not previously been exported.
+ */
+ bool export_comm_thread = comm && !thread__db_id(thread);
+
+ err = db_export__thread(dbe, thread, machine, main_thread);
+ if (err)
+ return err;
+
+ if (export_comm_thread) {
+ err = db_export__comm_thread(dbe, comm, thread);
+ if (err)
+ return err;
+ }
+ }
+
+ curr_comm = thread__comm(thread);
+ if (curr_comm)
+ return db_export__comm(dbe, curr_comm, thread);
+
+ return 0;
+}
+
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct evsel *evsel,
+ struct addr_location *al, struct addr_location *addr_al)
+{
+ struct thread *thread = al->thread;
+ struct export_sample es = {
+ .event = event,
+ .sample = sample,
+ .evsel = evsel,
+ .al = al,
+ };
+ struct thread *main_thread;
+ struct comm *comm = NULL;
+ struct machine *machine;
+ int err;
+
+ err = db_export__evsel(dbe, evsel);
+ if (err)
+ return err;
+
+ machine = maps__machine(al->maps);
+ err = db_export__machine(dbe, machine);
+ if (err)
+ return err;
+
+ main_thread = thread__main_thread(machine, thread);
+
+ err = db_export__threads(dbe, thread, main_thread, machine, &comm);
+ if (err)
+ goto out_put;
+
+ if (comm)
+ es.comm_db_id = comm->db_id;
+
+ es.db_id = ++dbe->sample_last_db_id;
+
+ err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
+ if (err)
+ goto out_put;
+
+ if (dbe->cpr) {
+ struct call_path *cp = call_path_from_sample(dbe, machine,
+ thread, sample,
+ evsel);
+ if (cp) {
+ db_export__call_path(dbe, cp);
+ es.call_path_id = cp->db_id;
+ }
+ }
+
+ if (addr_al) {
+ err = db_ids_from_al(dbe, addr_al, &es.addr_dso_db_id,
+ &es.addr_sym_db_id, &es.addr_offset);
+ if (err)
+ goto out_put;
+ if (dbe->crp) {
+ err = thread_stack__process(thread, comm, sample, al,
+ addr_al, es.db_id,
+ dbe->crp);
+ if (err)
+ goto out_put;
+ }
+ }
+
+ if (dbe->export_sample)
+ err = dbe->export_sample(dbe, &es);
+
+out_put:
+ thread__put(main_thread);
+ return err;
+}
+
+static struct {
+ u32 branch_type;
+ const char *name;
+} branch_types[] = {
+ {0, "no branch"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "conditional jump"},
+ {PERF_IP_FLAG_BRANCH, "unconditional jump"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT,
+ "software interrupt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT,
+ "return from interrupt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET,
+ "system call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET,
+ "return from system call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "asynchronous branch"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT, "hardware interrupt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "transaction abort"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "trace begin"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "trace end"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vm entry"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vm exit"},
+ {0, NULL}
+};
+
+int db_export__branch_types(struct db_export *dbe)
+{
+ int i, err = 0;
+
+ for (i = 0; branch_types[i].name ; i++) {
+ err = db_export__branch_type(dbe, branch_types[i].branch_type,
+ branch_types[i].name);
+ if (err)
+ break;
+ }
+
+ /* Add trace begin / end variants */
+ for (i = 0; branch_types[i].name ; i++) {
+ const char *name = branch_types[i].name;
+ u32 type = branch_types[i].branch_type;
+ char buf[64];
+
+ if (type == PERF_IP_FLAG_BRANCH ||
+ (type & (PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END)))
+ continue;
+
+ snprintf(buf, sizeof(buf), "trace begin / %s", name);
+ err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_BEGIN, buf);
+ if (err)
+ break;
+
+ snprintf(buf, sizeof(buf), "%s / trace end", name);
+ err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_END, buf);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp)
+{
+ int err;
+
+ if (cp->db_id)
+ return 0;
+
+ if (cp->parent) {
+ err = db_export__call_path(dbe, cp->parent);
+ if (err)
+ return err;
+ }
+
+ cp->db_id = ++dbe->call_path_last_db_id;
+
+ if (dbe->export_call_path)
+ return dbe->export_call_path(dbe, cp);
+
+ return 0;
+}
+
+int db_export__call_return(struct db_export *dbe, struct call_return *cr,
+ u64 *parent_db_id)
+{
+ int err;
+
+ err = db_export__call_path(dbe, cr->cp);
+ if (err)
+ return err;
+
+ if (!cr->db_id)
+ cr->db_id = ++dbe->call_return_last_db_id;
+
+ if (parent_db_id) {
+ if (!*parent_db_id)
+ *parent_db_id = ++dbe->call_return_last_db_id;
+ cr->parent_db_id = *parent_db_id;
+ }
+
+ if (dbe->export_call_return)
+ return dbe->export_call_return(dbe, cr);
+
+ return 0;
+}
+
+static int db_export__pid_tid(struct db_export *dbe, struct machine *machine,
+ pid_t pid, pid_t tid, u64 *db_id,
+ struct comm **comm_ptr, bool *is_idle)
+{
+ struct thread *thread = machine__find_thread(machine, pid, tid);
+ struct thread *main_thread;
+ int err = 0;
+
+ if (!thread || !thread__comm_set(thread))
+ goto out_put;
+
+ *is_idle = !thread__pid(thread) && !thread__tid(thread);
+
+ main_thread = thread__main_thread(machine, thread);
+
+ err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr);
+
+ *db_id = thread__db_id(thread);
+
+ thread__put(main_thread);
+out_put:
+ thread__put(thread);
+
+ return err;
+}
+
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ bool out_preempt = out &&
+ (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT);
+ int flags = out | (out_preempt << 1);
+ bool is_idle_a = false, is_idle_b = false;
+ u64 th_a_id = 0, th_b_id = 0;
+ u64 comm_out_id, comm_in_id;
+ struct comm *comm_a = NULL;
+ struct comm *comm_b = NULL;
+ u64 th_out_id, th_in_id;
+ u64 db_id;
+ int err;
+
+ err = db_export__machine(dbe, machine);
+ if (err)
+ return err;
+
+ err = db_export__pid_tid(dbe, machine, sample->pid, sample->tid,
+ &th_a_id, &comm_a, &is_idle_a);
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ pid_t pid = event->context_switch.next_prev_pid;
+ pid_t tid = event->context_switch.next_prev_tid;
+
+ err = db_export__pid_tid(dbe, machine, pid, tid, &th_b_id,
+ &comm_b, &is_idle_b);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Do not export if both threads are unknown (i.e. not being traced),
+ * or one is unknown and the other is the idle task.
+ */
+ if ((!th_a_id || is_idle_a) && (!th_b_id || is_idle_b))
+ return 0;
+
+ db_id = ++dbe->context_switch_last_db_id;
+
+ if (out) {
+ th_out_id = th_a_id;
+ th_in_id = th_b_id;
+ comm_out_id = comm_a ? comm_a->db_id : 0;
+ comm_in_id = comm_b ? comm_b->db_id : 0;
+ } else {
+ th_out_id = th_b_id;
+ th_in_id = th_a_id;
+ comm_out_id = comm_b ? comm_b->db_id : 0;
+ comm_in_id = comm_a ? comm_a->db_id : 0;
+ }
+
+ if (dbe->export_context_switch)
+ return dbe->export_context_switch(dbe, db_id, machine, sample,
+ th_out_id, comm_out_id,
+ th_in_id, comm_in_id, flags);
+ return 0;
+}
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
new file mode 100644
index 000000000..23983cb35
--- /dev/null
+++ b/tools/perf/util/db-export.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * db-export.h: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#ifndef __PERF_DB_EXPORT_H
+#define __PERF_DB_EXPORT_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+struct evsel;
+struct machine;
+struct thread;
+struct comm;
+struct dso;
+struct perf_sample;
+struct addr_location;
+struct call_return_processor;
+struct call_path_root;
+struct call_path;
+struct call_return;
+
+struct export_sample {
+ union perf_event *event;
+ struct perf_sample *sample;
+ struct evsel *evsel;
+ struct addr_location *al;
+ u64 db_id;
+ u64 comm_db_id;
+ u64 dso_db_id;
+ u64 sym_db_id;
+ u64 offset; /* ip offset from symbol start */
+ u64 addr_dso_db_id;
+ u64 addr_sym_db_id;
+ u64 addr_offset; /* addr offset from symbol start */
+ u64 call_path_id;
+};
+
+struct db_export {
+ int (*export_evsel)(struct db_export *dbe, struct evsel *evsel);
+ int (*export_machine)(struct db_export *dbe, struct machine *machine);
+ int (*export_thread)(struct db_export *dbe, struct thread *thread,
+ u64 main_thread_db_id, struct machine *machine);
+ int (*export_comm)(struct db_export *dbe, struct comm *comm,
+ struct thread *thread);
+ int (*export_comm_thread)(struct db_export *dbe, u64 db_id,
+ struct comm *comm, struct thread *thread);
+ int (*export_dso)(struct db_export *dbe, struct dso *dso,
+ struct machine *machine);
+ int (*export_symbol)(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso);
+ int (*export_branch_type)(struct db_export *dbe, u32 branch_type,
+ const char *name);
+ int (*export_sample)(struct db_export *dbe, struct export_sample *es);
+ int (*export_call_path)(struct db_export *dbe, struct call_path *cp);
+ int (*export_call_return)(struct db_export *dbe,
+ struct call_return *cr);
+ int (*export_context_switch)(struct db_export *dbe, u64 db_id,
+ struct machine *machine,
+ struct perf_sample *sample,
+ u64 th_out_id, u64 comm_out_id,
+ u64 th_in_id, u64 comm_in_id, int flags);
+ struct call_return_processor *crp;
+ struct call_path_root *cpr;
+ u64 evsel_last_db_id;
+ u64 machine_last_db_id;
+ u64 thread_last_db_id;
+ u64 comm_last_db_id;
+ u64 comm_thread_last_db_id;
+ u64 dso_last_db_id;
+ u64 symbol_last_db_id;
+ u64 sample_last_db_id;
+ u64 call_path_last_db_id;
+ u64 call_return_last_db_id;
+ u64 context_switch_last_db_id;
+};
+
+int db_export__init(struct db_export *dbe);
+void db_export__exit(struct db_export *dbe);
+int db_export__evsel(struct db_export *dbe, struct evsel *evsel);
+int db_export__machine(struct db_export *dbe, struct machine *machine);
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+ struct machine *machine, struct thread *main_thread);
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+ struct thread *thread);
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+ struct thread *main_thread);
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+ struct thread *thread);
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+ struct machine *machine);
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso);
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+ const char *name);
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct evsel *evsel,
+ struct addr_location *al, struct addr_location *addr_al);
+
+int db_export__branch_types(struct db_export *dbe);
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp);
+int db_export__call_return(struct db_export *dbe, struct call_return *cr,
+ u64 *parent_db_id);
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
+
+#endif
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
new file mode 100644
index 000000000..88378c4c5
--- /dev/null
+++ b/tools/perf/util/debug.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/* For general debugging purposes */
+
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <api/debug.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <sys/time.h>
+#ifdef HAVE_BACKTRACE_SUPPORT
+#include <execinfo.h>
+#endif
+#include "color.h"
+#include "event.h"
+#include "debug.h"
+#include "print_binary.h"
+#include "target.h"
+#include "trace-event.h"
+#include "ui/helpline.h"
+#include "ui/ui.h"
+#include "util/parse-sublevel-options.h"
+
+#include <linux/ctype.h>
+
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#else
+#define LIBTRACEEVENT_VERSION 0
+#endif
+
+int verbose;
+int debug_peo_args;
+bool dump_trace = false, quiet = false;
+int debug_ordered_events;
+static int redirect_to_stderr;
+int debug_data_convert;
+static FILE *debug_file;
+bool debug_display_time;
+
+void debug_set_file(FILE *file)
+{
+ debug_file = file;
+}
+
+void debug_set_display_time(bool set)
+{
+ debug_display_time = set;
+}
+
+static int fprintf_time(FILE *file)
+{
+ struct timeval tod;
+ struct tm ltime;
+ char date[64];
+
+ if (!debug_display_time)
+ return 0;
+
+ if (gettimeofday(&tod, NULL) != 0)
+ return 0;
+
+ if (localtime_r(&tod.tv_sec, &ltime) == NULL)
+ return 0;
+
+ strftime(date, sizeof(date), "%F %H:%M:%S", &ltime);
+ return fprintf(file, "[%s.%06lu] ", date, (long)tod.tv_usec);
+}
+
+int veprintf(int level, int var, const char *fmt, va_list args)
+{
+ int ret = 0;
+
+ if (var >= level) {
+ if (use_browser >= 1 && !redirect_to_stderr) {
+ ui_helpline__vshow(fmt, args);
+ } else {
+ ret = fprintf_time(debug_file);
+ ret += vfprintf(debug_file, fmt, args);
+ }
+ }
+
+ return ret;
+}
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = veprintf(level, var, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int veprintf_time(u64 t, const char *fmt, va_list args)
+{
+ int ret = 0;
+ u64 secs, usecs, nsecs = t;
+
+ secs = nsecs / NSEC_PER_SEC;
+ nsecs -= secs * NSEC_PER_SEC;
+ usecs = nsecs / NSEC_PER_USEC;
+
+ ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
+ secs, usecs);
+ ret += vfprintf(stderr, fmt, args);
+ return ret;
+}
+
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...)
+{
+ int ret = 0;
+ va_list args;
+
+ if (var >= level) {
+ va_start(args, fmt);
+ ret = veprintf_time(t, fmt, args);
+ va_end(args);
+ }
+
+ return ret;
+}
+
+/*
+ * Overloading libtraceevent standard info print
+ * function, display with -v in perf.
+ */
+void pr_stat(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ veprintf(1, verbose, fmt, args);
+ va_end(args);
+ eprintf(1, verbose, "\n");
+}
+
+int dump_printf(const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (dump_trace) {
+ va_start(args, fmt);
+ ret = vprintf(fmt, args);
+ va_end(args);
+ }
+
+ return ret;
+}
+
+static int trace_event_printer(enum binary_printer_ops op,
+ unsigned int val, void *extra, FILE *fp)
+{
+ const char *color = PERF_COLOR_BLUE;
+ union perf_event *event = (union perf_event *)extra;
+ unsigned char ch = (unsigned char)val;
+ int printed = 0;
+
+ switch (op) {
+ case BINARY_PRINT_DATA_BEGIN:
+ printed += fprintf(fp, ".");
+ printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n",
+ event->header.size);
+ break;
+ case BINARY_PRINT_LINE_BEGIN:
+ printed += fprintf(fp, ".");
+ break;
+ case BINARY_PRINT_ADDR:
+ printed += color_fprintf(fp, color, " %04x: ", val);
+ break;
+ case BINARY_PRINT_NUM_DATA:
+ printed += color_fprintf(fp, color, " %02x", val);
+ break;
+ case BINARY_PRINT_NUM_PAD:
+ printed += color_fprintf(fp, color, " ");
+ break;
+ case BINARY_PRINT_SEP:
+ printed += color_fprintf(fp, color, " ");
+ break;
+ case BINARY_PRINT_CHAR_DATA:
+ printed += color_fprintf(fp, color, "%c",
+ isprint(ch) && isascii(ch) ? ch : '.');
+ break;
+ case BINARY_PRINT_CHAR_PAD:
+ printed += color_fprintf(fp, color, " ");
+ break;
+ case BINARY_PRINT_LINE_END:
+ printed += color_fprintf(fp, color, "\n");
+ break;
+ case BINARY_PRINT_DATA_END:
+ printed += fprintf(fp, "\n");
+ break;
+ default:
+ break;
+ }
+
+ return printed;
+}
+
+void trace_event(union perf_event *event)
+{
+ unsigned char *raw_event = (void *)event;
+
+ if (!dump_trace)
+ return;
+
+ print_binary(raw_event, event->header.size, 16,
+ trace_event_printer, event);
+}
+
+static struct sublevel_option debug_opts[] = {
+ { .name = "verbose", .value_ptr = &verbose },
+ { .name = "ordered-events", .value_ptr = &debug_ordered_events},
+ { .name = "stderr", .value_ptr = &redirect_to_stderr},
+ { .name = "data-convert", .value_ptr = &debug_data_convert },
+ { .name = "perf-event-open", .value_ptr = &debug_peo_args },
+ { .name = NULL, }
+};
+
+int perf_debug_option(const char *str)
+{
+ int ret;
+
+ ret = perf_parse_sublevel_options(str, debug_opts);
+ if (ret)
+ return ret;
+
+ /* Allow only verbose value in range (0, 10), otherwise set 0. */
+ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
+
+#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 3, 0)
+ if (verbose == 1)
+ tep_set_loglevel(TEP_LOG_INFO);
+ else if (verbose == 2)
+ tep_set_loglevel(TEP_LOG_DEBUG);
+ else if (verbose >= 3)
+ tep_set_loglevel(TEP_LOG_ALL);
+#endif
+ return 0;
+}
+
+int perf_quiet_option(void)
+{
+ struct sublevel_option *opt = &debug_opts[0];
+
+ /* disable all debug messages */
+ while (opt->name) {
+ *opt->value_ptr = -1;
+ opt++;
+ }
+
+ /* For debug variables that are used as bool types, set to 0. */
+ redirect_to_stderr = 0;
+ debug_peo_args = 0;
+
+ return 0;
+}
+
+#define DEBUG_WRAPPER(__n, __l) \
+static int pr_ ## __n ## _wrapper(const char *fmt, ...) \
+{ \
+ va_list args; \
+ int ret; \
+ \
+ va_start(args, fmt); \
+ ret = veprintf(__l, verbose, fmt, args); \
+ va_end(args); \
+ return ret; \
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+ debug_set_file(stderr);
+ libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}
+
+/* Obtain a backtrace and print it to stdout. */
+#ifdef HAVE_BACKTRACE_SUPPORT
+void dump_stack(void)
+{
+ void *array[16];
+ size_t size = backtrace(array, ARRAY_SIZE(array));
+ char **strings = backtrace_symbols(array, size);
+ size_t i;
+
+ printf("Obtained %zd stack frames.\n", size);
+
+ for (i = 0; i < size; i++)
+ printf("%s\n", strings[i]);
+
+ free(strings);
+}
+#else
+void dump_stack(void) {}
+#endif
+
+void sighandler_dump_stack(int sig)
+{
+ psignal(sig, "perf");
+ dump_stack();
+ signal(sig, SIG_DFL);
+ raise(sig);
+}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
new file mode 100644
index 000000000..f99468a7f
--- /dev/null
+++ b/tools/perf/util/debug.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* For debugging general purposes */
+#ifndef __PERF_DEBUG_H
+#define __PERF_DEBUG_H
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+
+extern int verbose;
+extern int debug_peo_args;
+extern bool quiet, dump_trace;
+extern int debug_ordered_events;
+extern int debug_data_convert;
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_err(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning_once(fmt, ...) ({ \
+ static int __warned; \
+ if (unlikely(!__warned)) { \
+ pr_warning(fmt, ##__VA_ARGS__); \
+ __warned = 1; \
+ } \
+})
+#define pr_info(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug(fmt, ...) \
+ eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+ eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
+
+/* Special macro to print perf_event_open arguments/return value. */
+#define pr_debug2_peo(fmt, ...) { \
+ if (debug_peo_args) \
+ pr_debugN(0, pr_fmt(fmt), ##__VA_ARGS__); \
+ else \
+ pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__); \
+}
+
+#define pr_time_N(n, var, t, fmt, ...) \
+ eprintf_time(n, var, t, fmt, ##__VA_ARGS__)
+
+#define pr_oe_time(t, fmt, ...) pr_time_N(1, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_oe_time2(t, fmt, ...) pr_time_N(2, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define STRERR_BUFSIZE 128 /* For the buffer size of str_error_r */
+
+union perf_event;
+
+int dump_printf(const char *fmt, ...) __printf(1, 2);
+void trace_event(union perf_event *event);
+
+int ui__error(const char *format, ...) __printf(1, 2);
+int ui__warning(const char *format, ...) __printf(1, 2);
+#define ui__warning_once(format, ...) ({ \
+ static int __warned; \
+ if (unlikely(!__warned)) { \
+ ui__warning(format, ##__VA_ARGS__); \
+ __warned = 1; \
+ } \
+})
+
+void pr_stat(const char *fmt, ...);
+
+int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4);
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5);
+int veprintf(int level, int var, const char *fmt, va_list args);
+
+int perf_debug_option(const char *str);
+void debug_set_file(FILE *file);
+void debug_set_display_time(bool set);
+void perf_debug_setup(void);
+int perf_quiet_option(void);
+
+void dump_stack(void);
+void sighandler_dump_stack(int sig);
+
+#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/demangle-cxx.cpp b/tools/perf/util/demangle-cxx.cpp
new file mode 100644
index 000000000..85b706641
--- /dev/null
+++ b/tools/perf/util/demangle-cxx.cpp
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "demangle-cxx.h"
+#include <stdlib.h>
+#include <string.h>
+#include <linux/compiler.h>
+
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+#endif
+
+#ifdef HAVE_CXA_DEMANGLE_SUPPORT
+#include <cxxabi.h>
+#endif
+
+#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
+#ifndef DMGL_PARAMS
+#define DMGL_PARAMS (1 << 0) /* Include function args */
+#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
+#endif
+#endif
+
+/*
+ * Demangle C++ function signature
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+extern "C"
+char *cxx_demangle_sym(const char *str, bool params __maybe_unused,
+ bool modifiers __maybe_unused)
+{
+#ifdef HAVE_LIBBFD_SUPPORT
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return bfd_demangle(NULL, str, flags);
+#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return cplus_demangle(str, flags);
+#elif defined(HAVE_CXA_DEMANGLE_SUPPORT)
+ char *output;
+ int status;
+
+ output = abi::__cxa_demangle(str, /*output_buffer=*/NULL, /*length=*/NULL, &status);
+ return output;
+#else
+ return NULL;
+#endif
+}
diff --git a/tools/perf/util/demangle-cxx.h b/tools/perf/util/demangle-cxx.h
new file mode 100644
index 000000000..26b5b66c0
--- /dev/null
+++ b/tools/perf/util/demangle-cxx.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_CXX
+#define __PERF_DEMANGLE_CXX 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+char *cxx_demangle_sym(const char *str, bool params, bool modifiers);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* __PERF_DEMANGLE_CXX */
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
new file mode 100644
index 000000000..ddf33d58b
--- /dev/null
+++ b/tools/perf/util/demangle-java.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+
+enum {
+ MODE_PREFIX = 0,
+ MODE_CLASS = 1,
+ MODE_FUNC = 2,
+ MODE_TYPE = 3,
+ MODE_CTYPE = 4, /* class arg */
+};
+
+#define BASE_ENT(c, n) [c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+ BASE_ENT('B', "byte" ),
+ BASE_ENT('C', "char" ),
+ BASE_ENT('D', "double" ),
+ BASE_ENT('F', "float" ),
+ BASE_ENT('I', "int" ),
+ BASE_ENT('J', "long" ),
+ BASE_ENT('S', "short" ),
+ BASE_ENT('Z', "boolean" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ * Return:
+ * success: buf
+ * error : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+ int rlen = 0;
+ int array = 0;
+ int narg = 0;
+ const char *q;
+
+ if (!end)
+ end = str + strlen(str);
+
+ for (q = str; q != end; q++) {
+
+ if (rlen == (maxlen - 1))
+ break;
+
+ switch (*q) {
+ case 'L':
+ if (mode == MODE_PREFIX || mode == MODE_TYPE) {
+ if (mode == MODE_TYPE) {
+ if (narg)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+ narg++;
+ }
+ if (mode == MODE_PREFIX)
+ mode = MODE_CLASS;
+ else
+ mode = MODE_CTYPE;
+ } else
+ buf[rlen++] = *q;
+ break;
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'F':
+ case 'I':
+ case 'J':
+ case 'S':
+ case 'Z':
+ if (mode == MODE_TYPE) {
+ if (narg)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+ while (array--)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+ array = 0;
+ narg++;
+ } else
+ buf[rlen++] = *q;
+ break;
+ case 'V':
+ if (mode == MODE_TYPE) {
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+ while (array--)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+ array = 0;
+ } else
+ buf[rlen++] = *q;
+ break;
+ case '[':
+ if (mode != MODE_TYPE)
+ goto error;
+ array++;
+ break;
+ case '(':
+ if (mode != MODE_FUNC)
+ goto error;
+ buf[rlen++] = *q;
+ mode = MODE_TYPE;
+ break;
+ case ')':
+ if (mode != MODE_TYPE)
+ goto error;
+ buf[rlen++] = *q;
+ narg = 0;
+ break;
+ case ';':
+ if (mode != MODE_CLASS && mode != MODE_CTYPE)
+ goto error;
+ /* safe because at least one other char to process */
+ if (isalpha(*(q + 1)) && mode == MODE_CLASS)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+ if (mode == MODE_CLASS)
+ mode = MODE_FUNC;
+ else if (mode == MODE_CTYPE)
+ mode = MODE_TYPE;
+ break;
+ case '/':
+ if (mode != MODE_CLASS && mode != MODE_CTYPE)
+ goto error;
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+ break;
+ default :
+ buf[rlen++] = *q;
+ }
+ }
+ buf[rlen] = '\0';
+ return buf;
+error:
+ return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ * str: string to parse. String is not modified
+ * flags: combination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ * if input can be demangled, then a newly allocated string is returned.
+ * if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+ char *buf, *ptr;
+ char *p;
+ size_t len, l1 = 0;
+
+ if (!str)
+ return NULL;
+
+ /* find start of return type */
+ p = strrchr(str, ')');
+ if (!p)
+ return NULL;
+
+ /*
+ * expansion factor estimated to 3x
+ */
+ len = strlen(str) * 3 + 1;
+ buf = malloc(len);
+ if (!buf)
+ return NULL;
+
+ buf[0] = '\0';
+ if (!(flags & JAVA_DEMANGLE_NORET)) {
+ /*
+ * get return type first
+ */
+ ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+ if (!ptr)
+ goto error;
+
+ /* add space between return type and function prototype */
+ l1 = strlen(buf);
+ buf[l1++] = ' ';
+ }
+
+ /* process function up to return type */
+ ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+ if (!ptr)
+ goto error;
+
+ return buf;
+error:
+ free(buf);
+ return NULL;
+}
diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h
new file mode 100644
index 000000000..f936c8eab
--- /dev/null
+++ b/tools/perf/util/demangle-java.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET 0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */
diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c
new file mode 100644
index 000000000..9d707bb60
--- /dev/null
+++ b/tools/perf/util/demangle-ocaml.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include "util/string2.h"
+
+#include "demangle-ocaml.h"
+
+#include <linux/ctype.h>
+
+static const char *caml_prefix = "caml";
+static const size_t caml_prefix_len = 4;
+
+/* mangled OCaml symbols start with "caml" followed by an upper-case letter */
+static bool
+ocaml_is_mangled(const char *sym)
+{
+ return 0 == strncmp(sym, caml_prefix, caml_prefix_len)
+ && isupper(sym[caml_prefix_len]);
+}
+
+/*
+ * input:
+ * sym: a symbol which may have been mangled by the OCaml compiler
+ * return:
+ * if the input doesn't look like a mangled OCaml symbol, NULL is returned
+ * otherwise, a newly allocated string containing the demangled symbol is returned
+ */
+char *
+ocaml_demangle_sym(const char *sym)
+{
+ char *result;
+ int j = 0;
+ int i;
+ int len;
+
+ if (!ocaml_is_mangled(sym)) {
+ return NULL;
+ }
+
+ len = strlen(sym);
+
+ /* the demangled symbol is always smaller than the mangled symbol */
+ result = malloc(len + 1);
+ if (!result)
+ return NULL;
+
+ /* skip "caml" prefix */
+ i = caml_prefix_len;
+
+ while (i < len) {
+ if (sym[i] == '_' && sym[i + 1] == '_') {
+ /* "__" -> "." */
+ result[j++] = '.';
+ i += 2;
+ }
+ else if (sym[i] == '$' && isxdigit(sym[i + 1]) && isxdigit(sym[i + 2])) {
+ /* "$xx" is a hex-encoded character */
+ result[j++] = (hex(sym[i + 1]) << 4) | hex(sym[i + 2]);
+ i += 3;
+ }
+ else {
+ result[j++] = sym[i++];
+ }
+ }
+ result[j] = '\0';
+
+ return result;
+}
diff --git a/tools/perf/util/demangle-ocaml.h b/tools/perf/util/demangle-ocaml.h
new file mode 100644
index 000000000..843cc4fa1
--- /dev/null
+++ b/tools/perf/util/demangle-ocaml.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_OCAML
+#define __PERF_DEMANGLE_OCAML 1
+
+char * ocaml_demangle_sym(const char *str);
+
+#endif /* __PERF_DEMANGLE_OCAML */
diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c
new file mode 100644
index 000000000..a659fc69f
--- /dev/null
+++ b/tools/perf/util/demangle-rust.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "debug.h"
+
+#include "demangle-rust.h"
+
+/*
+ * Mangled Rust symbols look like this:
+ *
+ * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
+ *
+ * The original symbol is:
+ *
+ * <std::sys::fd::FileDesc as core::ops::Drop>::drop
+ *
+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed
+ * with "h". Rust does not have a global namespace between crates, an illusion
+ * which Rust maintains by using the hash to distinguish things that would
+ * otherwise have the same symbol.
+ *
+ * Any path component not starting with a XID_Start character is prefixed with
+ * "_".
+ *
+ * The following escape sequences are used:
+ *
+ * "," => $C$
+ * "@" => $SP$
+ * "*" => $BP$
+ * "&" => $RF$
+ * "<" => $LT$
+ * ">" => $GT$
+ * "(" => $LP$
+ * ")" => $RP$
+ * " " => $u20$
+ * "'" => $u27$
+ * "[" => $u5b$
+ * "]" => $u5d$
+ * "~" => $u7e$
+ *
+ * A double ".." means "::" and a single "." means "-".
+ *
+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
+ */
+
+static const char *hash_prefix = "::h";
+static const size_t hash_prefix_len = 3;
+static const size_t hash_len = 16;
+
+static bool is_prefixed_hash(const char *start);
+static bool looks_like_rust(const char *sym, size_t len);
+static bool unescape(const char **in, char **out, const char *seq, char value);
+
+/*
+ * INPUT:
+ * sym: symbol that has been through BFD-demangling
+ *
+ * This function looks for the following indicators:
+ *
+ * 1. The hash must consist of "h" followed by 16 lowercase hex digits.
+ *
+ * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
+ * hex digits. This is true of 99.9998% of hashes so once in your life you
+ * may see a false negative. The point is to notice path components that
+ * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
+ * this case a false positive (non-Rust symbol has an important path
+ * component removed because it looks like a Rust hash) is worse than a
+ * false negative (the rare Rust symbol is not demangled) so this sets the
+ * balance in favor of false negatives.
+ *
+ * 3. There must be no characters other than a-zA-Z0-9 and _.:$
+ *
+ * 4. There must be no unrecognized $-sign sequences.
+ *
+ * 5. There must be no sequence of three or more dots in a row ("...").
+ */
+bool
+rust_is_mangled(const char *sym)
+{
+ size_t len, len_without_hash;
+
+ if (!sym)
+ return false;
+
+ len = strlen(sym);
+ if (len <= hash_prefix_len + hash_len)
+ /* Not long enough to contain "::h" + hash + something else */
+ return false;
+
+ len_without_hash = len - (hash_prefix_len + hash_len);
+ if (!is_prefixed_hash(sym + len_without_hash))
+ return false;
+
+ return looks_like_rust(sym, len_without_hash);
+}
+
+/*
+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
+ * digits must comprise between 5 and 15 (inclusive) distinct digits.
+ */
+static bool is_prefixed_hash(const char *str)
+{
+ const char *end;
+ bool seen[16];
+ size_t i;
+ int count;
+
+ if (strncmp(str, hash_prefix, hash_prefix_len))
+ return false;
+ str += hash_prefix_len;
+
+ memset(seen, false, sizeof(seen));
+ for (end = str + hash_len; str < end; str++)
+ if (*str >= '0' && *str <= '9')
+ seen[*str - '0'] = true;
+ else if (*str >= 'a' && *str <= 'f')
+ seen[*str - 'a' + 10] = true;
+ else
+ return false;
+
+ /* Count how many distinct digits seen */
+ count = 0;
+ for (i = 0; i < 16; i++)
+ if (seen[i])
+ count++;
+
+ return count >= 5 && count <= 15;
+}
+
+static bool looks_like_rust(const char *str, size_t len)
+{
+ const char *end = str + len;
+
+ while (str < end)
+ switch (*str) {
+ case '$':
+ if (!strncmp(str, "$C$", 3))
+ str += 3;
+ else if (!strncmp(str, "$SP$", 4)
+ || !strncmp(str, "$BP$", 4)
+ || !strncmp(str, "$RF$", 4)
+ || !strncmp(str, "$LT$", 4)
+ || !strncmp(str, "$GT$", 4)
+ || !strncmp(str, "$LP$", 4)
+ || !strncmp(str, "$RP$", 4))
+ str += 4;
+ else if (!strncmp(str, "$u20$", 5)
+ || !strncmp(str, "$u27$", 5)
+ || !strncmp(str, "$u5b$", 5)
+ || !strncmp(str, "$u5d$", 5)
+ || !strncmp(str, "$u7e$", 5))
+ str += 5;
+ else
+ return false;
+ break;
+ case '.':
+ /* Do not allow three or more consecutive dots */
+ if (!strncmp(str, "...", 3))
+ return false;
+ /* Fall through */
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '_':
+ case ':':
+ str++;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * INPUT:
+ * sym: symbol for which rust_is_mangled(sym) returns true
+ *
+ * The input is demangled in-place because the mangled name is always longer
+ * than the demangled one.
+ */
+void
+rust_demangle_sym(char *sym)
+{
+ const char *in;
+ char *out;
+ const char *end;
+
+ if (!sym)
+ return;
+
+ in = sym;
+ out = sym;
+ end = sym + strlen(sym) - (hash_prefix_len + hash_len);
+
+ while (in < end)
+ switch (*in) {
+ case '$':
+ if (!(unescape(&in, &out, "$C$", ',')
+ || unescape(&in, &out, "$SP$", '@')
+ || unescape(&in, &out, "$BP$", '*')
+ || unescape(&in, &out, "$RF$", '&')
+ || unescape(&in, &out, "$LT$", '<')
+ || unescape(&in, &out, "$GT$", '>')
+ || unescape(&in, &out, "$LP$", '(')
+ || unescape(&in, &out, "$RP$", ')')
+ || unescape(&in, &out, "$u20$", ' ')
+ || unescape(&in, &out, "$u27$", '\'')
+ || unescape(&in, &out, "$u5b$", '[')
+ || unescape(&in, &out, "$u5d$", ']')
+ || unescape(&in, &out, "$u7e$", '~'))) {
+ pr_err("demangle-rust: unexpected escape sequence");
+ goto done;
+ }
+ break;
+ case '_':
+ /*
+ * If this is the start of a path component and the next
+ * character is an escape sequence, ignore the
+ * underscore. The mangler inserts an underscore to make
+ * sure the path component begins with a XID_Start
+ * character.
+ */
+ if ((in == sym || in[-1] == ':') && in[1] == '$')
+ in++;
+ else
+ *out++ = *in++;
+ break;
+ case '.':
+ if (in[1] == '.') {
+ /* ".." becomes "::" */
+ *out++ = ':';
+ *out++ = ':';
+ in += 2;
+ } else {
+ /* "." becomes "-" */
+ *out++ = '-';
+ in++;
+ }
+ break;
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case ':':
+ *out++ = *in++;
+ break;
+ default:
+ pr_err("demangle-rust: unexpected character '%c' in symbol\n",
+ *in);
+ goto done;
+ }
+
+done:
+ *out = '\0';
+}
+
+static bool unescape(const char **in, char **out, const char *seq, char value)
+{
+ size_t len = strlen(seq);
+
+ if (strncmp(*in, seq, len))
+ return false;
+
+ **out = value;
+
+ *in += len;
+ *out += 1;
+
+ return true;
+}
diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h
new file mode 100644
index 000000000..2fca618b1
--- /dev/null
+++ b/tools/perf/util/demangle-rust.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_RUST
+#define __PERF_DEMANGLE_RUST 1
+
+bool rust_is_mangled(const char *str);
+void rust_demangle_sym(char *str);
+
+#endif /* __PERF_DEMANGLE_RUST */
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
new file mode 100644
index 000000000..4a1dc21b0
--- /dev/null
+++ b/tools/perf/util/dlfilter.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dlfilter.c: Interface to perf script --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <subcmd/exec-cmd.h>
+#include <linux/zalloc.h>
+#include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "dso.h"
+#include "map.h"
+#include "thread.h"
+#include "trace-event.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dlfilter.h"
+#include "../include/perf/perf_dlfilter.h"
+
+static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
+{
+ struct symbol *sym = al->sym;
+
+ d_al->size = sizeof(*d_al);
+ if (al->map) {
+ struct dso *dso = map__dso(al->map);
+
+ if (symbol_conf.show_kernel_path && dso->long_name)
+ d_al->dso = dso->long_name;
+ else
+ d_al->dso = dso->name;
+ d_al->is_64_bit = dso->is_64_bit;
+ d_al->buildid_size = dso->bid.size;
+ d_al->buildid = dso->bid.data;
+ } else {
+ d_al->dso = NULL;
+ d_al->is_64_bit = 0;
+ d_al->buildid_size = 0;
+ d_al->buildid = NULL;
+ }
+ if (sym) {
+ d_al->sym = sym->name;
+ d_al->sym_start = sym->start;
+ d_al->sym_end = sym->end;
+ if (al->addr < sym->end)
+ d_al->symoff = al->addr - sym->start;
+ else
+ d_al->symoff = al->addr - map__start(al->map) - sym->start;
+ d_al->sym_binding = sym->binding;
+ } else {
+ d_al->sym = NULL;
+ d_al->sym_start = 0;
+ d_al->sym_end = 0;
+ d_al->symoff = 0;
+ d_al->sym_binding = 0;
+ }
+ d_al->addr = al->addr;
+ d_al->comm = NULL;
+ d_al->filtered = 0;
+ d_al->priv = NULL;
+}
+
+static struct addr_location *get_al(struct dlfilter *d)
+{
+ struct addr_location *al = d->al;
+
+ if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0)
+ return NULL;
+ return al;
+}
+
+static struct thread *get_thread(struct dlfilter *d)
+{
+ struct addr_location *al = get_al(d);
+
+ return al ? al->thread : NULL;
+}
+
+static const struct perf_dlfilter_al *dlfilter__resolve_ip(void *ctx)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct perf_dlfilter_al *d_al = d->d_ip_al;
+ struct addr_location *al;
+
+ if (!d->ctx_valid)
+ return NULL;
+
+ /* 'size' is also used to indicate already initialized */
+ if (d_al->size)
+ return d_al;
+
+ al = get_al(d);
+ if (!al)
+ return NULL;
+
+ al_to_d_al(al, d_al);
+
+ d_al->is_kernel_ip = machine__kernel_ip(d->machine, d->sample->ip);
+ d_al->comm = al->thread ? thread__comm_str(al->thread) : ":-1";
+ d_al->filtered = al->filtered;
+
+ return d_al;
+}
+
+static const struct perf_dlfilter_al *dlfilter__resolve_addr(void *ctx)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct perf_dlfilter_al *d_addr_al = d->d_addr_al;
+ struct addr_location *addr_al = d->addr_al;
+
+ if (!d->ctx_valid || !d->d_sample->addr_correlates_sym)
+ return NULL;
+
+ /* 'size' is also used to indicate already initialized */
+ if (d_addr_al->size)
+ return d_addr_al;
+
+ if (!addr_al->thread) {
+ struct thread *thread = get_thread(d);
+
+ if (!thread)
+ return NULL;
+ thread__resolve(thread, addr_al, d->sample);
+ }
+
+ al_to_d_al(addr_al, d_addr_al);
+
+ d_addr_al->is_kernel_ip = machine__kernel_ip(d->machine, d->sample->addr);
+
+ return d_addr_al;
+}
+
+static char **dlfilter__args(void *ctx, int *dlargc)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+
+ if (dlargc)
+ *dlargc = 0;
+ else
+ return NULL;
+
+ if (!d->ctx_valid && !d->in_start && !d->in_stop)
+ return NULL;
+
+ *dlargc = d->dlargc;
+ return d->dlargv;
+}
+
+static bool has_priv(struct perf_dlfilter_al *d_al_p)
+{
+ return d_al_p->size >= offsetof(struct perf_dlfilter_al, priv) + sizeof(d_al_p->priv);
+}
+
+static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct perf_dlfilter_al d_al;
+ struct addr_location al;
+ struct thread *thread;
+ __u32 sz;
+
+ if (!d->ctx_valid || !d_al_p)
+ return -1;
+
+ thread = get_thread(d);
+ if (!thread)
+ return -1;
+
+ addr_location__init(&al);
+ thread__find_symbol_fb(thread, d->sample->cpumode, address, &al);
+
+ al_to_d_al(&al, &d_al);
+
+ d_al.is_kernel_ip = machine__kernel_ip(d->machine, address);
+
+ sz = d_al_p->size;
+ memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al)));
+ d_al_p->size = sz;
+
+ if (has_priv(d_al_p))
+ d_al_p->priv = memdup(&al, sizeof(al));
+ else /* Avoid leak for v0 API */
+ addr_location__exit(&al);
+
+ return 0;
+}
+
+static void dlfilter__al_cleanup(void *ctx __maybe_unused, struct perf_dlfilter_al *d_al_p)
+{
+ struct addr_location *al;
+
+ /* Ensure backward compatibility */
+ if (!has_priv(d_al_p) || !d_al_p->priv)
+ return;
+
+ al = d_al_p->priv;
+
+ d_al_p->priv = NULL;
+
+ addr_location__exit(al);
+
+ free(al);
+}
+
+static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+
+ if (!len)
+ return NULL;
+
+ *len = 0;
+
+ if (!d->ctx_valid)
+ return NULL;
+
+ if (d->sample->ip && !d->sample->insn_len) {
+ struct addr_location *al = d->al;
+
+ if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0)
+ return NULL;
+
+ if (thread__maps(al->thread)) {
+ struct machine *machine = maps__machine(thread__maps(al->thread));
+
+ if (machine)
+ script_fetch_insn(d->sample, al->thread, machine);
+ }
+ }
+
+ if (!d->sample->insn_len)
+ return NULL;
+
+ *len = d->sample->insn_len;
+
+ return (__u8 *)d->sample->insn;
+}
+
+static const char *dlfilter__srcline(void *ctx, __u32 *line_no)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct addr_location *al;
+ unsigned int line = 0;
+ char *srcfile = NULL;
+ struct map *map;
+ struct dso *dso;
+ u64 addr;
+
+ if (!d->ctx_valid || !line_no)
+ return NULL;
+
+ al = get_al(d);
+ if (!al)
+ return NULL;
+
+ map = al->map;
+ addr = al->addr;
+ dso = map ? map__dso(map) : NULL;
+
+ if (dso)
+ srcfile = get_srcline_split(dso, map__rip_2objdump(map, addr), &line);
+
+ *line_no = line;
+ return srcfile;
+}
+
+static struct perf_event_attr *dlfilter__attr(void *ctx)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+
+ if (!d->ctx_valid)
+ return NULL;
+
+ return &d->evsel->core.attr;
+}
+
+static __s32 code_read(__u64 ip, struct map *map, struct machine *machine, void *buf, __u32 len)
+{
+ u64 offset = map__map_ip(map, ip);
+
+ if (ip + len >= map__end(map))
+ len = map__end(map) - ip;
+
+ return dso__data_read_offset(map__dso(map), machine, offset, buf, len);
+}
+
+static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len)
+{
+ struct dlfilter *d = (struct dlfilter *)ctx;
+ struct addr_location *al;
+ struct addr_location a;
+ __s32 ret;
+
+ if (!d->ctx_valid)
+ return -1;
+
+ al = get_al(d);
+ if (!al)
+ return -1;
+
+ if (al->map && ip >= map__start(al->map) && ip < map__end(al->map) &&
+ machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip))
+ return code_read(ip, al->map, d->machine, buf, len);
+
+ addr_location__init(&a);
+
+ thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a);
+ ret = a.map ? code_read(ip, a.map, d->machine, buf, len) : -1;
+
+ addr_location__exit(&a);
+
+ return ret;
+}
+
+static const struct perf_dlfilter_fns perf_dlfilter_fns = {
+ .resolve_ip = dlfilter__resolve_ip,
+ .resolve_addr = dlfilter__resolve_addr,
+ .args = dlfilter__args,
+ .resolve_address = dlfilter__resolve_address,
+ .al_cleanup = dlfilter__al_cleanup,
+ .insn = dlfilter__insn,
+ .srcline = dlfilter__srcline,
+ .attr = dlfilter__attr,
+ .object_code = dlfilter__object_code,
+};
+
+static char *find_dlfilter(const char *file)
+{
+ char path[PATH_MAX];
+ char *exec_path;
+
+ if (strchr(file, '/'))
+ goto out;
+
+ if (!access(file, R_OK)) {
+ /*
+ * Prepend "./" so that dlopen will find the file in the
+ * current directory.
+ */
+ snprintf(path, sizeof(path), "./%s", file);
+ file = path;
+ goto out;
+ }
+
+ exec_path = get_argv_exec_path();
+ if (!exec_path)
+ goto out;
+ snprintf(path, sizeof(path), "%s/dlfilters/%s", exec_path, file);
+ free(exec_path);
+ if (!access(path, R_OK))
+ file = path;
+out:
+ return strdup(file);
+}
+
+#define CHECK_FLAG(x) BUILD_BUG_ON((u64)PERF_DLFILTER_FLAG_ ## x != (u64)PERF_IP_FLAG_ ## x)
+
+static int dlfilter__init(struct dlfilter *d, const char *file, int dlargc, char **dlargv)
+{
+ CHECK_FLAG(BRANCH);
+ CHECK_FLAG(CALL);
+ CHECK_FLAG(RETURN);
+ CHECK_FLAG(CONDITIONAL);
+ CHECK_FLAG(SYSCALLRET);
+ CHECK_FLAG(ASYNC);
+ CHECK_FLAG(INTERRUPT);
+ CHECK_FLAG(TX_ABORT);
+ CHECK_FLAG(TRACE_BEGIN);
+ CHECK_FLAG(TRACE_END);
+ CHECK_FLAG(IN_TX);
+ CHECK_FLAG(VMENTRY);
+ CHECK_FLAG(VMEXIT);
+
+ memset(d, 0, sizeof(*d));
+ d->file = find_dlfilter(file);
+ if (!d->file)
+ return -1;
+ d->dlargc = dlargc;
+ d->dlargv = dlargv;
+ return 0;
+}
+
+static void dlfilter__exit(struct dlfilter *d)
+{
+ zfree(&d->file);
+}
+
+static int dlfilter__open(struct dlfilter *d)
+{
+ d->handle = dlopen(d->file, RTLD_NOW);
+ if (!d->handle) {
+ pr_err("dlopen failed for: '%s'\n", d->file);
+ return -1;
+ }
+ d->start = dlsym(d->handle, "start");
+ d->filter_event = dlsym(d->handle, "filter_event");
+ d->filter_event_early = dlsym(d->handle, "filter_event_early");
+ d->stop = dlsym(d->handle, "stop");
+ d->fns = dlsym(d->handle, "perf_dlfilter_fns");
+ if (d->fns)
+ memcpy(d->fns, &perf_dlfilter_fns, sizeof(struct perf_dlfilter_fns));
+ return 0;
+}
+
+static int dlfilter__close(struct dlfilter *d)
+{
+ return dlclose(d->handle);
+}
+
+struct dlfilter *dlfilter__new(const char *file, int dlargc, char **dlargv)
+{
+ struct dlfilter *d = malloc(sizeof(*d));
+
+ if (!d)
+ return NULL;
+
+ if (dlfilter__init(d, file, dlargc, dlargv))
+ goto err_free;
+
+ if (dlfilter__open(d))
+ goto err_exit;
+
+ return d;
+
+err_exit:
+ dlfilter__exit(d);
+err_free:
+ free(d);
+ return NULL;
+}
+
+static void dlfilter__free(struct dlfilter *d)
+{
+ if (d) {
+ dlfilter__exit(d);
+ free(d);
+ }
+}
+
+int dlfilter__start(struct dlfilter *d, struct perf_session *session)
+{
+ if (d) {
+ d->session = session;
+ if (d->start) {
+ int ret;
+
+ d->in_start = true;
+ ret = d->start(&d->data, d);
+ d->in_start = false;
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int dlfilter__stop(struct dlfilter *d)
+{
+ if (d && d->stop) {
+ int ret;
+
+ d->in_stop = true;
+ ret = d->stop(d->data, d);
+ d->in_stop = false;
+ return ret;
+ }
+ return 0;
+}
+
+void dlfilter__cleanup(struct dlfilter *d)
+{
+ if (d) {
+ dlfilter__stop(d);
+ dlfilter__close(d);
+ dlfilter__free(d);
+ }
+}
+
+#define ASSIGN(x) d_sample.x = sample->x
+
+int dlfilter__do_filter_event(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al,
+ bool early)
+{
+ struct perf_dlfilter_sample d_sample;
+ struct perf_dlfilter_al d_ip_al;
+ struct perf_dlfilter_al d_addr_al;
+ int ret;
+
+ d->event = event;
+ d->sample = sample;
+ d->evsel = evsel;
+ d->machine = machine;
+ d->al = al;
+ d->addr_al = addr_al;
+ d->d_sample = &d_sample;
+ d->d_ip_al = &d_ip_al;
+ d->d_addr_al = &d_addr_al;
+
+ d_sample.size = sizeof(d_sample);
+ d_ip_al.size = 0; /* To indicate d_ip_al is not initialized */
+ d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */
+
+ ASSIGN(ip);
+ ASSIGN(pid);
+ ASSIGN(tid);
+ ASSIGN(time);
+ ASSIGN(addr);
+ ASSIGN(id);
+ ASSIGN(stream_id);
+ ASSIGN(period);
+ ASSIGN(weight);
+ ASSIGN(ins_lat);
+ ASSIGN(p_stage_cyc);
+ ASSIGN(transaction);
+ ASSIGN(insn_cnt);
+ ASSIGN(cyc_cnt);
+ ASSIGN(cpu);
+ ASSIGN(flags);
+ ASSIGN(data_src);
+ ASSIGN(phys_addr);
+ ASSIGN(data_page_size);
+ ASSIGN(code_page_size);
+ ASSIGN(cgroup);
+ ASSIGN(cpumode);
+ ASSIGN(misc);
+ ASSIGN(raw_size);
+ ASSIGN(raw_data);
+ ASSIGN(machine_pid);
+ ASSIGN(vcpu);
+
+ if (sample->branch_stack) {
+ d_sample.brstack_nr = sample->branch_stack->nr;
+ d_sample.brstack = (struct perf_branch_entry *)perf_sample__branch_entries(sample);
+ } else {
+ d_sample.brstack_nr = 0;
+ d_sample.brstack = NULL;
+ }
+
+ if (sample->callchain) {
+ d_sample.raw_callchain_nr = sample->callchain->nr;
+ d_sample.raw_callchain = (__u64 *)sample->callchain->ips;
+ } else {
+ d_sample.raw_callchain_nr = 0;
+ d_sample.raw_callchain = NULL;
+ }
+
+ d_sample.addr_correlates_sym =
+ (evsel->core.attr.sample_type & PERF_SAMPLE_ADDR) &&
+ sample_addr_correlates_sym(&evsel->core.attr);
+
+ d_sample.event = evsel__name(evsel);
+
+ d->ctx_valid = true;
+
+ if (early)
+ ret = d->filter_event_early(d->data, &d_sample, d);
+ else
+ ret = d->filter_event(d->data, &d_sample, d);
+
+ d->ctx_valid = false;
+
+ return ret;
+}
+
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+ char **long_desc)
+{
+ char path[PATH_MAX];
+ void *handle;
+ const char *(*desc_fn)(const char **long_description);
+
+ snprintf(path, sizeof(path), "%s/%s", dirname, name);
+ handle = dlopen(path, RTLD_NOW);
+ if (!handle || !(dlsym(handle, "filter_event") || dlsym(handle, "filter_event_early")))
+ return false;
+ desc_fn = dlsym(handle, "filter_description");
+ if (desc_fn) {
+ const char *dsc;
+ const char *long_dsc;
+
+ dsc = desc_fn(&long_dsc);
+ if (dsc)
+ *desc = strdup(dsc);
+ if (long_dsc)
+ *long_desc = strdup(long_dsc);
+ }
+ dlclose(handle);
+ return true;
+}
+
+static void list_filters(const char *dirname)
+{
+ struct dirent *entry;
+ DIR *dir;
+
+ dir = opendir(dirname);
+ if (!dir)
+ return;
+
+ while ((entry = readdir(dir)) != NULL)
+ {
+ size_t n = strlen(entry->d_name);
+ char *long_desc = NULL;
+ char *desc = NULL;
+
+ if (entry->d_type == DT_DIR || n < 4 ||
+ strcmp(".so", entry->d_name + n - 3))
+ continue;
+ if (!get_filter_desc(dirname, entry->d_name, &desc, &long_desc))
+ continue;
+ printf(" %-36s %s\n", entry->d_name, desc ? desc : "");
+ if (verbose > 0) {
+ char *p = long_desc;
+ char *line;
+
+ while ((line = strsep(&p, "\n")) != NULL)
+ printf("%39s%s\n", "", line);
+ }
+ free(long_desc);
+ free(desc);
+ }
+
+ closedir(dir);
+}
+
+int list_available_dlfilters(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused,
+ int unset __maybe_unused)
+{
+ char path[PATH_MAX];
+ char *exec_path;
+
+ printf("List of available dlfilters:\n");
+
+ list_filters(".");
+
+ exec_path = get_argv_exec_path();
+ if (!exec_path)
+ goto out;
+ snprintf(path, sizeof(path), "%s/dlfilters", exec_path);
+
+ list_filters(path);
+
+ free(exec_path);
+out:
+ exit(0);
+}
diff --git a/tools/perf/util/dlfilter.h b/tools/perf/util/dlfilter.h
new file mode 100644
index 000000000..cc4bb9657
--- /dev/null
+++ b/tools/perf/util/dlfilter.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * dlfilter.h: Interface to perf script --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+
+#ifndef PERF_UTIL_DLFILTER_H
+#define PERF_UTIL_DLFILTER_H
+
+struct perf_session;
+union perf_event;
+struct perf_sample;
+struct evsel;
+struct machine;
+struct addr_location;
+struct perf_dlfilter_fns;
+struct perf_dlfilter_sample;
+struct perf_dlfilter_al;
+
+struct dlfilter {
+ char *file;
+ void *handle;
+ void *data;
+ struct perf_session *session;
+ bool ctx_valid;
+ bool in_start;
+ bool in_stop;
+ int dlargc;
+ char **dlargv;
+
+ union perf_event *event;
+ struct perf_sample *sample;
+ struct evsel *evsel;
+ struct machine *machine;
+ struct addr_location *al;
+ struct addr_location *addr_al;
+ struct perf_dlfilter_sample *d_sample;
+ struct perf_dlfilter_al *d_ip_al;
+ struct perf_dlfilter_al *d_addr_al;
+
+ int (*start)(void **data, void *ctx);
+ int (*stop)(void *data, void *ctx);
+
+ int (*filter_event)(void *data,
+ const struct perf_dlfilter_sample *sample,
+ void *ctx);
+ int (*filter_event_early)(void *data,
+ const struct perf_dlfilter_sample *sample,
+ void *ctx);
+
+ struct perf_dlfilter_fns *fns;
+};
+
+struct dlfilter *dlfilter__new(const char *file, int dlargc, char **dlargv);
+
+int dlfilter__start(struct dlfilter *d, struct perf_session *session);
+
+int dlfilter__do_filter_event(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al,
+ bool early);
+
+void dlfilter__cleanup(struct dlfilter *d);
+
+static inline int dlfilter__filter_event(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ if (!d || !d->filter_event)
+ return 0;
+ return dlfilter__do_filter_event(d, event, sample, evsel, machine, al, addr_al, false);
+}
+
+static inline int dlfilter__filter_event_early(struct dlfilter *d,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ if (!d || !d->filter_event_early)
+ return 0;
+ return dlfilter__do_filter_event(d, event, sample, evsel, machine, al, addr_al, true);
+}
+
+int list_available_dlfilters(const struct option *opt, const char *s, int unset);
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+ char **long_desc);
+
+#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
new file mode 100644
index 000000000..bdfead36b
--- /dev/null
+++ b/tools/perf/util/dso.c
@@ -0,0 +1,1528 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/libbpf.h>
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#endif
+#include "compress.h"
+#include "env.h"
+#include "namespaces.h"
+#include "path.h"
+#include "map.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dso.h"
+#include "dsos.h"
+#include "machine.h"
+#include "auxtrace.h"
+#include "util.h" /* O_CLOEXEC for older systems */
+#include "debug.h"
+#include "string2.h"
+#include "vdso.h"
+
+static const char * const debuglink_paths[] = {
+ "%.0s%s",
+ "%s/%s",
+ "%s/.debug/%s",
+ "/usr/lib/debug%s/%s"
+};
+
+char dso__symtab_origin(const struct dso *dso)
+{
+ static const char origin[] = {
+ [DSO_BINARY_TYPE__KALLSYMS] = 'k',
+ [DSO_BINARY_TYPE__VMLINUX] = 'v',
+ [DSO_BINARY_TYPE__JAVA_JIT] = 'j',
+ [DSO_BINARY_TYPE__DEBUGLINK] = 'l',
+ [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B',
+ [DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO] = 'D',
+ [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f',
+ [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u',
+ [DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO] = 'x',
+ [DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o',
+ [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b',
+ [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd',
+ [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K',
+ [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP] = 'm',
+ [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g',
+ [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G',
+ [DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M',
+ [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V',
+ };
+
+ if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND)
+ return '!';
+ return origin[dso->symtab_type];
+}
+
+bool dso__is_object_file(const struct dso *dso)
+{
+ switch (dso->binary_type) {
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
+ return false;
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ case DSO_BINARY_TYPE__KCORE:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ default:
+ return true;
+ }
+}
+
+int dso__read_binary_type_filename(const struct dso *dso,
+ enum dso_binary_type type,
+ char *root_dir, char *filename, size_t size)
+{
+ char build_id_hex[SBUILD_ID_SIZE];
+ int ret = 0;
+ size_t len;
+
+ switch (type) {
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ {
+ const char *last_slash;
+ char dso_dir[PATH_MAX];
+ char symfile[PATH_MAX];
+ unsigned int i;
+
+ len = __symbol__join_symfs(filename, size, dso->long_name);
+ last_slash = filename + len;
+ while (last_slash != filename && *last_slash != '/')
+ last_slash--;
+
+ strncpy(dso_dir, filename, last_slash - filename);
+ dso_dir[last_slash-filename] = '\0';
+
+ if (!is_regular_file(filename)) {
+ ret = -1;
+ break;
+ }
+
+ ret = filename__read_debuglink(filename, symfile, PATH_MAX);
+ if (ret)
+ break;
+
+ /* Check predefined locations where debug file might reside */
+ ret = -1;
+ for (i = 0; i < ARRAY_SIZE(debuglink_paths); i++) {
+ snprintf(filename, size,
+ debuglink_paths[i], dso_dir, symfile);
+ if (is_regular_file(filename)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ break;
+ }
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ if (dso__build_id_filename(dso, filename, size, false) == NULL)
+ ret = -1;
+ break;
+
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ if (dso__build_id_filename(dso, filename, size, true) == NULL)
+ ret = -1;
+ break;
+
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s.debug", dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s", dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ /*
+ * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in
+ * /usr/lib/debug/lib when it is expected to be in
+ * /usr/lib/debug/usr/lib
+ */
+ if (strlen(dso->long_name) < 9 ||
+ strncmp(dso->long_name, "/usr/lib/", 9)) {
+ ret = -1;
+ break;
+ }
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s", dso->long_name + 4);
+ break;
+
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ {
+ const char *last_slash;
+ size_t dir_size;
+
+ last_slash = dso->long_name + dso->long_name_len;
+ while (last_slash != dso->long_name && *last_slash != '/')
+ last_slash--;
+
+ len = __symbol__join_symfs(filename, size, "");
+ dir_size = last_slash - dso->long_name + 2;
+ if (dir_size > (size - len)) {
+ ret = -1;
+ break;
+ }
+ len += scnprintf(filename + len, dir_size, "%s", dso->long_name);
+ len += scnprintf(filename + len , size - len, ".debug%s",
+ last_slash);
+ break;
+ }
+
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ if (!dso->has_build_id) {
+ ret = -1;
+ break;
+ }
+
+ build_id__sprintf(&dso->bid, build_id_hex);
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/");
+ snprintf(filename + len, size - len, "%.2s/%s.debug",
+ build_id_hex, build_id_hex + 2);
+ break;
+
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ __symbol__join_symfs(filename, size, dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ path__join3(filename, size, symbol_conf.symfs,
+ root_dir, dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ __symbol__join_symfs(filename, size, dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__KCORE:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ snprintf(filename, size, "%s", dso->long_name);
+ break;
+
+ default:
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+enum {
+ COMP_ID__NONE = 0,
+};
+
+static const struct {
+ const char *fmt;
+ int (*decompress)(const char *input, int output);
+ bool (*is_compressed)(const char *input);
+} compressions[] = {
+ [COMP_ID__NONE] = { .fmt = NULL, },
+#ifdef HAVE_ZLIB_SUPPORT
+ { "gz", gzip_decompress_to_file, gzip_is_compressed },
+#endif
+#ifdef HAVE_LZMA_SUPPORT
+ { "xz", lzma_decompress_to_file, lzma_is_compressed },
+#endif
+ { NULL, NULL, NULL },
+};
+
+static int is_supported_compression(const char *ext)
+{
+ unsigned i;
+
+ for (i = 1; compressions[i].fmt; i++) {
+ if (!strcmp(ext, compressions[i].fmt))
+ return i;
+ }
+ return COMP_ID__NONE;
+}
+
+bool is_kernel_module(const char *pathname, int cpumode)
+{
+ struct kmod_path m;
+ int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ WARN_ONCE(mode != cpumode,
+ "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
+ cpumode);
+
+ switch (mode) {
+ case PERF_RECORD_MISC_USER:
+ case PERF_RECORD_MISC_HYPERVISOR:
+ case PERF_RECORD_MISC_GUEST_USER:
+ return false;
+ /* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
+ default:
+ if (kmod_path__parse(&m, pathname)) {
+ pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
+ pathname);
+ return true;
+ }
+ }
+
+ return m.kmod;
+}
+
+bool dso__needs_decompress(struct dso *dso)
+{
+ return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+}
+
+int filename__decompress(const char *name, char *pathname,
+ size_t len, int comp, int *err)
+{
+ char tmpbuf[] = KMOD_DECOMP_NAME;
+ int fd = -1;
+
+ /*
+ * We have proper compression id for DSO and yet the file
+ * behind the 'name' can still be plain uncompressed object.
+ *
+ * The reason is behind the logic we open the DSO object files,
+ * when we try all possible 'debug' objects until we find the
+ * data. So even if the DSO is represented by 'krava.xz' module,
+ * we can end up here opening ~/.debug/....23432432/debug' file
+ * which is not compressed.
+ *
+ * To keep this transparent, we detect this and return the file
+ * descriptor to the uncompressed file.
+ */
+ if (!compressions[comp].is_compressed(name))
+ return open(name, O_RDONLY);
+
+ fd = mkstemp(tmpbuf);
+ if (fd < 0) {
+ *err = errno;
+ return -1;
+ }
+
+ if (compressions[comp].decompress(name, fd)) {
+ *err = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+ close(fd);
+ fd = -1;
+ }
+
+ if (!pathname || (fd < 0))
+ unlink(tmpbuf);
+
+ if (pathname && (fd >= 0))
+ strlcpy(pathname, tmpbuf, len);
+
+ return fd;
+}
+
+static int decompress_kmodule(struct dso *dso, const char *name,
+ char *pathname, size_t len)
+{
+ if (!dso__needs_decompress(dso))
+ return -1;
+
+ if (dso->comp == COMP_ID__NONE)
+ return -1;
+
+ return filename__decompress(name, pathname, len, dso->comp,
+ &dso->load_errno);
+}
+
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
+{
+ return decompress_kmodule(dso, name, NULL, 0);
+}
+
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+ char *pathname, size_t len)
+{
+ int fd = decompress_kmodule(dso, name, pathname, len);
+
+ close(fd);
+ return fd >= 0 ? 0 : -1;
+}
+
+/*
+ * Parses kernel module specified in @path and updates
+ * @m argument like:
+ *
+ * @comp - true if @path contains supported compression suffix,
+ * false otherwise
+ * @kmod - true if @path contains '.ko' suffix in right position,
+ * false otherwise
+ * @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name
+ * of the kernel module without suffixes, otherwise strudup-ed
+ * base name of @path
+ * @ext - if (@alloc_ext && @comp) is true, it contains strdup-ed string
+ * the compression suffix
+ *
+ * Returns 0 if there's no strdup error, -ENOMEM otherwise.
+ */
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+ bool alloc_name)
+{
+ const char *name = strrchr(path, '/');
+ const char *ext = strrchr(path, '.');
+ bool is_simple_name = false;
+
+ memset(m, 0x0, sizeof(*m));
+ name = name ? name + 1 : path;
+
+ /*
+ * '.' is also a valid character for module name. For example:
+ * [aaa.bbb] is a valid module name. '[' should have higher
+ * priority than '.ko' suffix.
+ *
+ * The kernel names are from machine__mmap_name. Such
+ * name should belong to kernel itself, not kernel module.
+ */
+ if (name[0] == '[') {
+ is_simple_name = true;
+ if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
+ (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
+ (strncmp(name, "[vdso]", 6) == 0) ||
+ (strncmp(name, "[vdso32]", 8) == 0) ||
+ (strncmp(name, "[vdsox32]", 9) == 0) ||
+ (strncmp(name, "[vsyscall]", 10) == 0)) {
+ m->kmod = false;
+
+ } else
+ m->kmod = true;
+ }
+
+ /* No extension, just return name. */
+ if ((ext == NULL) || is_simple_name) {
+ if (alloc_name) {
+ m->name = strdup(name);
+ return m->name ? 0 : -ENOMEM;
+ }
+ return 0;
+ }
+
+ m->comp = is_supported_compression(ext + 1);
+ if (m->comp > COMP_ID__NONE)
+ ext -= 3;
+
+ /* Check .ko extension only if there's enough name left. */
+ if (ext > name)
+ m->kmod = !strncmp(ext, ".ko", 3);
+
+ if (alloc_name) {
+ if (m->kmod) {
+ if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1)
+ return -ENOMEM;
+ } else {
+ if (asprintf(&m->name, "%s", name) == -1)
+ return -ENOMEM;
+ }
+
+ strreplace(m->name, '-', '_');
+ }
+
+ return 0;
+}
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+ struct machine *machine)
+{
+ if (machine__is_host(machine))
+ dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+ else
+ dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+ /* _KMODULE_COMP should be next to _KMODULE */
+ if (m->kmod && m->comp) {
+ dso->symtab_type++;
+ dso->comp = m->comp;
+ }
+
+ dso__set_short_name(dso, strdup(m->name), true);
+}
+
+/*
+ * Global list of open DSOs and the counter.
+ */
+static LIST_HEAD(dso__data_open);
+static long dso__data_open_cnt;
+static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void dso__list_add(struct dso *dso)
+{
+ list_add_tail(&dso->data.open_entry, &dso__data_open);
+ dso__data_open_cnt++;
+}
+
+static void dso__list_del(struct dso *dso)
+{
+ list_del_init(&dso->data.open_entry);
+ WARN_ONCE(dso__data_open_cnt <= 0,
+ "DSO data fd counter out of bounds.");
+ dso__data_open_cnt--;
+}
+
+static void close_first_dso(void);
+
+static int do_open(char *name)
+{
+ int fd;
+ char sbuf[STRERR_BUFSIZE];
+
+ do {
+ fd = open(name, O_RDONLY|O_CLOEXEC);
+ if (fd >= 0)
+ return fd;
+
+ pr_debug("dso open failed: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ if (!dso__data_open_cnt || errno != EMFILE)
+ break;
+
+ close_first_dso();
+ } while (1);
+
+ return -1;
+}
+
+char *dso__filename_with_chroot(const struct dso *dso, const char *filename)
+{
+ return filename_with_chroot(nsinfo__pid(dso->nsinfo), filename);
+}
+
+static int __open_dso(struct dso *dso, struct machine *machine)
+{
+ int fd = -EINVAL;
+ char *root_dir = (char *)"";
+ char *name = malloc(PATH_MAX);
+ bool decomp = false;
+
+ if (!name)
+ return -ENOMEM;
+
+ mutex_lock(&dso->lock);
+ if (machine)
+ root_dir = machine->root_dir;
+
+ if (dso__read_binary_type_filename(dso, dso->binary_type,
+ root_dir, name, PATH_MAX))
+ goto out;
+
+ if (!is_regular_file(name)) {
+ char *new_name;
+
+ if (errno != ENOENT || dso->nsinfo == NULL)
+ goto out;
+
+ new_name = dso__filename_with_chroot(dso, name);
+ if (!new_name)
+ goto out;
+
+ free(name);
+ name = new_name;
+ }
+
+ if (dso__needs_decompress(dso)) {
+ char newpath[KMOD_DECOMP_LEN];
+ size_t len = sizeof(newpath);
+
+ if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) {
+ fd = -dso->load_errno;
+ goto out;
+ }
+
+ decomp = true;
+ strcpy(name, newpath);
+ }
+
+ fd = do_open(name);
+
+ if (decomp)
+ unlink(name);
+
+out:
+ mutex_unlock(&dso->lock);
+ free(name);
+ return fd;
+}
+
+static void check_data_close(void);
+
+/**
+ * dso_close - Open DSO data file
+ * @dso: dso object
+ *
+ * Open @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static int open_dso(struct dso *dso, struct machine *machine)
+{
+ int fd;
+ struct nscookie nsc;
+
+ if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) {
+ mutex_lock(&dso->lock);
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ mutex_unlock(&dso->lock);
+ }
+ fd = __open_dso(dso, machine);
+ if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+ nsinfo__mountns_exit(&nsc);
+
+ if (fd >= 0) {
+ dso__list_add(dso);
+ /*
+ * Check if we crossed the allowed number
+ * of opened DSOs and close one if needed.
+ */
+ check_data_close();
+ }
+
+ return fd;
+}
+
+static void close_data_fd(struct dso *dso)
+{
+ if (dso->data.fd >= 0) {
+ close(dso->data.fd);
+ dso->data.fd = -1;
+ dso->data.file_size = 0;
+ dso__list_del(dso);
+ }
+}
+
+/**
+ * dso_close - Close DSO data file
+ * @dso: dso object
+ *
+ * Close @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static void close_dso(struct dso *dso)
+{
+ close_data_fd(dso);
+}
+
+static void close_first_dso(void)
+{
+ struct dso *dso;
+
+ dso = list_first_entry(&dso__data_open, struct dso, data.open_entry);
+ close_dso(dso);
+}
+
+static rlim_t get_fd_limit(void)
+{
+ struct rlimit l;
+ rlim_t limit = 0;
+
+ /* Allow half of the current open fd limit. */
+ if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+ if (l.rlim_cur == RLIM_INFINITY)
+ limit = l.rlim_cur;
+ else
+ limit = l.rlim_cur / 2;
+ } else {
+ pr_err("failed to get fd limit\n");
+ limit = 1;
+ }
+
+ return limit;
+}
+
+static rlim_t fd_limit;
+
+/*
+ * Used only by tests/dso-data.c to reset the environment
+ * for tests. I dont expect we should change this during
+ * standard runtime.
+ */
+void reset_fd_limit(void)
+{
+ fd_limit = 0;
+}
+
+static bool may_cache_fd(void)
+{
+ if (!fd_limit)
+ fd_limit = get_fd_limit();
+
+ if (fd_limit == RLIM_INFINITY)
+ return true;
+
+ return fd_limit > (rlim_t) dso__data_open_cnt;
+}
+
+/*
+ * Check and close LRU dso if we crossed allowed limit
+ * for opened dso file descriptors. The limit is half
+ * of the RLIMIT_NOFILE files opened.
+*/
+static void check_data_close(void)
+{
+ bool cache_fd = may_cache_fd();
+
+ if (!cache_fd)
+ close_first_dso();
+}
+
+/**
+ * dso__data_close - Close DSO data file
+ * @dso: dso object
+ *
+ * External interface to close @dso's data file descriptor.
+ */
+void dso__data_close(struct dso *dso)
+{
+ pthread_mutex_lock(&dso__data_open_lock);
+ close_dso(dso);
+ pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+static void try_to_open_dso(struct dso *dso, struct machine *machine)
+{
+ enum dso_binary_type binary_type_data[] = {
+ DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ DSO_BINARY_TYPE__NOT_FOUND,
+ };
+ int i = 0;
+
+ if (dso->data.fd >= 0)
+ return;
+
+ if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
+ dso->data.fd = open_dso(dso, machine);
+ goto out;
+ }
+
+ do {
+ dso->binary_type = binary_type_data[i++];
+
+ dso->data.fd = open_dso(dso, machine);
+ if (dso->data.fd >= 0)
+ goto out;
+
+ } while (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND);
+out:
+ if (dso->data.fd >= 0)
+ dso->data.status = DSO_DATA_STATUS_OK;
+ else
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+}
+
+/**
+ * dso__data_get_fd - Get dso's data file descriptor
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * External interface to find dso's file, open it and
+ * returns file descriptor. It should be paired with
+ * dso__data_put_fd() if it returns non-negative value.
+ */
+int dso__data_get_fd(struct dso *dso, struct machine *machine)
+{
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ if (pthread_mutex_lock(&dso__data_open_lock) < 0)
+ return -1;
+
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0)
+ pthread_mutex_unlock(&dso__data_open_lock);
+
+ return dso->data.fd;
+}
+
+void dso__data_put_fd(struct dso *dso __maybe_unused)
+{
+ pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
+{
+ u32 flag = 1 << by;
+
+ if (dso->data.status_seen & flag)
+ return true;
+
+ dso->data.status_seen |= flag;
+
+ return false;
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
+{
+ struct bpf_prog_info_node *node;
+ ssize_t size = DSO__DATA_CACHE_SIZE;
+ u64 len;
+ u8 *buf;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ len = node->info_linear->info.jited_prog_len;
+ buf = (u8 *)(uintptr_t)node->info_linear->info.jited_prog_insns;
+
+ if (offset >= len)
+ return -1;
+
+ size = (ssize_t)min(len - offset, (u64)size);
+ memcpy(data, buf + offset, size);
+ return size;
+}
+
+static int bpf_size(struct dso *dso)
+{
+ struct bpf_prog_info_node *node;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ dso->data.file_size = node->info_linear->info.jited_prog_len;
+ return 0;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static void
+dso_cache__free(struct dso *dso)
+{
+ struct rb_root *root = &dso->data.cache;
+ struct rb_node *next = rb_first(root);
+
+ mutex_lock(&dso->lock);
+ while (next) {
+ struct dso_cache *cache;
+
+ cache = rb_entry(next, struct dso_cache, rb_node);
+ next = rb_next(&cache->rb_node);
+ rb_erase(&cache->rb_node, root);
+ free(cache);
+ }
+ mutex_unlock(&dso->lock);
+}
+
+static struct dso_cache *__dso_cache__find(struct dso *dso, u64 offset)
+{
+ const struct rb_root *root = &dso->data.cache;
+ struct rb_node * const *p = &root->rb_node;
+ const struct rb_node *parent = NULL;
+ struct dso_cache *cache;
+
+ while (*p != NULL) {
+ u64 end;
+
+ parent = *p;
+ cache = rb_entry(parent, struct dso_cache, rb_node);
+ end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+ if (offset < cache->offset)
+ p = &(*p)->rb_left;
+ else if (offset >= end)
+ p = &(*p)->rb_right;
+ else
+ return cache;
+ }
+
+ return NULL;
+}
+
+static struct dso_cache *
+dso_cache__insert(struct dso *dso, struct dso_cache *new)
+{
+ struct rb_root *root = &dso->data.cache;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct dso_cache *cache;
+ u64 offset = new->offset;
+
+ mutex_lock(&dso->lock);
+ while (*p != NULL) {
+ u64 end;
+
+ parent = *p;
+ cache = rb_entry(parent, struct dso_cache, rb_node);
+ end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+ if (offset < cache->offset)
+ p = &(*p)->rb_left;
+ else if (offset >= end)
+ p = &(*p)->rb_right;
+ else
+ goto out;
+ }
+
+ rb_link_node(&new->rb_node, parent, p);
+ rb_insert_color(&new->rb_node, root);
+
+ cache = NULL;
+out:
+ mutex_unlock(&dso->lock);
+ return cache;
+}
+
+static ssize_t dso_cache__memcpy(struct dso_cache *cache, u64 offset, u8 *data,
+ u64 size, bool out)
+{
+ u64 cache_offset = offset - cache->offset;
+ u64 cache_size = min(cache->size - cache_offset, size);
+
+ if (out)
+ memcpy(data, cache->data + cache_offset, cache_size);
+ else
+ memcpy(cache->data + cache_offset, data, cache_size);
+ return cache_size;
+}
+
+static ssize_t file_read(struct dso *dso, struct machine *machine,
+ u64 offset, char *data)
+{
+ ssize_t ret;
+
+ pthread_mutex_lock(&dso__data_open_lock);
+
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ ret = -errno;
+ goto out;
+ }
+
+ ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset);
+out:
+ pthread_mutex_unlock(&dso__data_open_lock);
+ return ret;
+}
+
+static struct dso_cache *dso_cache__populate(struct dso *dso,
+ struct machine *machine,
+ u64 offset, ssize_t *ret)
+{
+ u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
+ struct dso_cache *cache;
+ struct dso_cache *old;
+
+ cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+ if (!cache) {
+ *ret = -ENOMEM;
+ return NULL;
+ }
+#ifdef HAVE_LIBBPF_SUPPORT
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ *ret = bpf_read(dso, cache_offset, cache->data);
+ else
+#endif
+ if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+ *ret = DSO__DATA_CACHE_SIZE;
+ else
+ *ret = file_read(dso, machine, cache_offset, cache->data);
+
+ if (*ret <= 0) {
+ free(cache);
+ return NULL;
+ }
+
+ cache->offset = cache_offset;
+ cache->size = *ret;
+
+ old = dso_cache__insert(dso, cache);
+ if (old) {
+ /* we lose the race */
+ free(cache);
+ cache = old;
+ }
+
+ return cache;
+}
+
+static struct dso_cache *dso_cache__find(struct dso *dso,
+ struct machine *machine,
+ u64 offset,
+ ssize_t *ret)
+{
+ struct dso_cache *cache = __dso_cache__find(dso, offset);
+
+ return cache ? cache : dso_cache__populate(dso, machine, offset, ret);
+}
+
+static ssize_t dso_cache_io(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size, bool out)
+{
+ struct dso_cache *cache;
+ ssize_t ret = 0;
+
+ cache = dso_cache__find(dso, machine, offset, &ret);
+ if (!cache)
+ return ret;
+
+ return dso_cache__memcpy(cache, offset, data, size, out);
+}
+
+/*
+ * Reads and caches dso data DSO__DATA_CACHE_SIZE size chunks
+ * in the rb_tree. Any read to already cached data is served
+ * by cached data. Writes update the cache only, not the backing file.
+ */
+static ssize_t cached_io(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size, bool out)
+{
+ ssize_t r = 0;
+ u8 *p = data;
+
+ do {
+ ssize_t ret;
+
+ ret = dso_cache_io(dso, machine, offset, p, size, out);
+ if (ret < 0)
+ return ret;
+
+ /* Reached EOF, return what we have. */
+ if (!ret)
+ break;
+
+ BUG_ON(ret > size);
+
+ r += ret;
+ p += ret;
+ offset += ret;
+ size -= ret;
+
+ } while (size);
+
+ return r;
+}
+
+static int file_size(struct dso *dso, struct machine *machine)
+{
+ int ret = 0;
+ struct stat st;
+ char sbuf[STRERR_BUFSIZE];
+
+ pthread_mutex_lock(&dso__data_open_lock);
+
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0) {
+ ret = -errno;
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ goto out;
+ }
+
+ if (fstat(dso->data.fd, &st) < 0) {
+ ret = -errno;
+ pr_err("dso cache fstat failed: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ goto out;
+ }
+ dso->data.file_size = st.st_size;
+
+out:
+ pthread_mutex_unlock(&dso__data_open_lock);
+ return ret;
+}
+
+int dso__data_file_size(struct dso *dso, struct machine *machine)
+{
+ if (dso->data.file_size)
+ return 0;
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+#ifdef HAVE_LIBBPF_SUPPORT
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return bpf_size(dso);
+#endif
+ return file_size(dso, machine);
+}
+
+/**
+ * dso__data_size - Return dso data size
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * Return: dso data size
+ */
+off_t dso__data_size(struct dso *dso, struct machine *machine)
+{
+ if (dso__data_file_size(dso, machine))
+ return -1;
+
+ /* For now just estimate dso data size is close to file size */
+ return dso->data.file_size;
+}
+
+static ssize_t data_read_write_offset(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size,
+ bool out)
+{
+ if (dso__data_file_size(dso, machine))
+ return -1;
+
+ /* Check the offset sanity. */
+ if (offset > dso->data.file_size)
+ return -1;
+
+ if (offset + size < offset)
+ return -1;
+
+ return cached_io(dso, machine, offset, data, size, out);
+}
+
+/**
+ * dso__data_read_offset - Read data from dso file offset
+ * @dso: dso object
+ * @machine: machine object
+ * @offset: file offset
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso file offset. Open
+ * dso data file and use cached_read to get the data.
+ */
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ return data_read_write_offset(dso, machine, offset, data, size, true);
+}
+
+/**
+ * dso__data_read_addr - Read data from dso address
+ * @dso: dso object
+ * @machine: machine object
+ * @add: virtual memory address
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso address.
+ */
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+ struct machine *machine, u64 addr,
+ u8 *data, ssize_t size)
+{
+ u64 offset = map__map_ip(map, addr);
+
+ return dso__data_read_offset(dso, machine, offset, data, size);
+}
+
+/**
+ * dso__data_write_cache_offs - Write data to dso data cache at file offset
+ * @dso: dso object
+ * @machine: machine object
+ * @offset: file offset
+ * @data: buffer to write
+ * @size: size of the @data buffer
+ *
+ * Write into the dso file data cache, but do not change the file itself.
+ */
+ssize_t dso__data_write_cache_offs(struct dso *dso, struct machine *machine,
+ u64 offset, const u8 *data_in, ssize_t size)
+{
+ u8 *data = (u8 *)data_in; /* cast away const to use same fns for r/w */
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ return data_read_write_offset(dso, machine, offset, data, size, false);
+}
+
+/**
+ * dso__data_write_cache_addr - Write data to dso data cache at dso address
+ * @dso: dso object
+ * @machine: machine object
+ * @add: virtual memory address
+ * @data: buffer to write
+ * @size: size of the @data buffer
+ *
+ * External interface to write into the dso file data cache, but do not change
+ * the file itself.
+ */
+ssize_t dso__data_write_cache_addr(struct dso *dso, struct map *map,
+ struct machine *machine, u64 addr,
+ const u8 *data, ssize_t size)
+{
+ u64 offset = map__map_ip(map, addr);
+
+ return dso__data_write_cache_offs(dso, machine, offset, data, size);
+}
+
+struct map *dso__new_map(const char *name)
+{
+ struct map *map = NULL;
+ struct dso *dso = dso__new(name);
+
+ if (dso) {
+ map = map__new2(0, dso);
+ dso__put(dso);
+ }
+
+ return map;
+}
+
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+ const char *short_name, int dso_type)
+{
+ /*
+ * The kernel dso could be created by build_id processing.
+ */
+ struct dso *dso = machine__findnew_dso(machine, name);
+
+ /*
+ * We need to run this in all cases, since during the build_id
+ * processing we had no idea this was the kernel dso.
+ */
+ if (dso != NULL) {
+ dso__set_short_name(dso, short_name, false);
+ dso->kernel = dso_type;
+ }
+
+ return dso;
+}
+
+static void dso__set_long_name_id(struct dso *dso, const char *name, struct dso_id *id, bool name_allocated)
+{
+ struct rb_root *root = dso->root;
+
+ if (name == NULL)
+ return;
+
+ if (dso->long_name_allocated)
+ free((char *)dso->long_name);
+
+ if (root) {
+ rb_erase(&dso->rb_node, root);
+ /*
+ * __dsos__findnew_link_by_longname_id() isn't guaranteed to
+ * add it back, so a clean removal is required here.
+ */
+ RB_CLEAR_NODE(&dso->rb_node);
+ dso->root = NULL;
+ }
+
+ dso->long_name = name;
+ dso->long_name_len = strlen(name);
+ dso->long_name_allocated = name_allocated;
+
+ if (root)
+ __dsos__findnew_link_by_longname_id(root, dso, NULL, id);
+}
+
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
+{
+ dso__set_long_name_id(dso, name, NULL, name_allocated);
+}
+
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
+{
+ if (name == NULL)
+ return;
+
+ if (dso->short_name_allocated)
+ free((char *)dso->short_name);
+
+ dso->short_name = name;
+ dso->short_name_len = strlen(name);
+ dso->short_name_allocated = name_allocated;
+}
+
+int dso__name_len(const struct dso *dso)
+{
+ if (!dso)
+ return strlen("[unknown]");
+ if (verbose > 0)
+ return dso->long_name_len;
+
+ return dso->short_name_len;
+}
+
+bool dso__loaded(const struct dso *dso)
+{
+ return dso->loaded;
+}
+
+bool dso__sorted_by_name(const struct dso *dso)
+{
+ return dso->sorted_by_name;
+}
+
+void dso__set_sorted_by_name(struct dso *dso)
+{
+ dso->sorted_by_name = true;
+}
+
+struct dso *dso__new_id(const char *name, struct dso_id *id)
+{
+ struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
+
+ if (dso != NULL) {
+ strcpy(dso->name, name);
+ if (id)
+ dso->id = *id;
+ dso__set_long_name_id(dso, dso->name, id, false);
+ dso__set_short_name(dso, dso->name, false);
+ dso->symbols = RB_ROOT_CACHED;
+ dso->symbol_names = NULL;
+ dso->symbol_names_len = 0;
+ dso->data.cache = RB_ROOT;
+ dso->inlined_nodes = RB_ROOT_CACHED;
+ dso->srclines = RB_ROOT_CACHED;
+ dso->data.fd = -1;
+ dso->data.status = DSO_DATA_STATUS_UNKNOWN;
+ dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
+ dso->binary_type = DSO_BINARY_TYPE__NOT_FOUND;
+ dso->is_64_bit = (sizeof(void *) == 8);
+ dso->loaded = 0;
+ dso->rel = 0;
+ dso->sorted_by_name = 0;
+ dso->has_build_id = 0;
+ dso->has_srcline = 1;
+ dso->a2l_fails = 1;
+ dso->kernel = DSO_SPACE__USER;
+ dso->needs_swap = DSO_SWAP__UNSET;
+ dso->comp = COMP_ID__NONE;
+ RB_CLEAR_NODE(&dso->rb_node);
+ dso->root = NULL;
+ INIT_LIST_HEAD(&dso->node);
+ INIT_LIST_HEAD(&dso->data.open_entry);
+ mutex_init(&dso->lock);
+ refcount_set(&dso->refcnt, 1);
+ }
+
+ return dso;
+}
+
+struct dso *dso__new(const char *name)
+{
+ return dso__new_id(name, NULL);
+}
+
+void dso__delete(struct dso *dso)
+{
+ if (!RB_EMPTY_NODE(&dso->rb_node))
+ pr_err("DSO %s is still in rbtree when being deleted!\n",
+ dso->long_name);
+
+ /* free inlines first, as they reference symbols */
+ inlines__tree_delete(&dso->inlined_nodes);
+ srcline__tree_delete(&dso->srclines);
+ symbols__delete(&dso->symbols);
+ dso->symbol_names_len = 0;
+ zfree(&dso->symbol_names);
+ if (dso->short_name_allocated) {
+ zfree((char **)&dso->short_name);
+ dso->short_name_allocated = false;
+ }
+
+ if (dso->long_name_allocated) {
+ zfree((char **)&dso->long_name);
+ dso->long_name_allocated = false;
+ }
+
+ dso__data_close(dso);
+ auxtrace_cache__free(dso->auxtrace_cache);
+ dso_cache__free(dso);
+ dso__free_a2l(dso);
+ zfree(&dso->symsrc_filename);
+ nsinfo__zput(dso->nsinfo);
+ mutex_destroy(&dso->lock);
+ free(dso);
+}
+
+struct dso *dso__get(struct dso *dso)
+{
+ if (dso)
+ refcount_inc(&dso->refcnt);
+ return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+ if (dso && refcount_dec_and_test(&dso->refcnt))
+ dso__delete(dso);
+}
+
+void dso__set_build_id(struct dso *dso, struct build_id *bid)
+{
+ dso->bid = *bid;
+ dso->has_build_id = 1;
+}
+
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
+{
+ if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) {
+ /*
+ * For the backward compatibility, it allows a build-id has
+ * trailing zeros.
+ */
+ return !memcmp(dso->bid.data, bid->data, bid->size) &&
+ !memchr_inv(&dso->bid.data[bid->size], 0,
+ dso->bid.size - bid->size);
+ }
+
+ return dso->bid.size == bid->size &&
+ memcmp(dso->bid.data, bid->data, dso->bid.size) == 0;
+}
+
+void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
+{
+ char path[PATH_MAX];
+
+ if (machine__is_default_guest(machine))
+ return;
+ sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
+ if (sysfs__read_build_id(path, &dso->bid) == 0)
+ dso->has_build_id = true;
+}
+
+int dso__kernel_module_get_build_id(struct dso *dso,
+ const char *root_dir)
+{
+ char filename[PATH_MAX];
+ /*
+ * kernel module short names are of the form "[module]" and
+ * we need just "module" here.
+ */
+ const char *name = dso->short_name + 1;
+
+ snprintf(filename, sizeof(filename),
+ "%s/sys/module/%.*s/notes/.note.gnu.build-id",
+ root_dir, (int)strlen(name) - 1, name);
+
+ if (sysfs__read_build_id(filename, &dso->bid) == 0)
+ dso->has_build_id = true;
+
+ return 0;
+}
+
+static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+ return fprintf(fp, "%s", sbuild_id);
+}
+
+size_t dso__fprintf(struct dso *dso, FILE *fp)
+{
+ struct rb_node *nd;
+ size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
+
+ if (dso->short_name != dso->long_name)
+ ret += fprintf(fp, "%s, ", dso->long_name);
+ ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
+ ret += dso__fprintf_buildid(dso, fp);
+ ret += fprintf(fp, ")\n");
+ for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) {
+ struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+ ret += symbol__fprintf(pos, fp);
+ }
+
+ return ret;
+}
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine)
+{
+ int fd;
+ enum dso_type type = DSO__TYPE_UNKNOWN;
+
+ fd = dso__data_get_fd(dso, machine);
+ if (fd >= 0) {
+ type = dso__type_fd(fd);
+ dso__data_put_fd(dso);
+ }
+
+ return type;
+}
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
+{
+ int idx, errnum = dso->load_errno;
+ /*
+ * This must have a same ordering as the enum dso_load_errno.
+ */
+ static const char *dso_load__error_str[] = {
+ "Internal tools/perf/ library error",
+ "Invalid ELF file",
+ "Can not read build id",
+ "Mismatching build id",
+ "Decompression failure",
+ };
+
+ BUG_ON(buflen == 0);
+
+ if (errnum >= 0) {
+ const char *err = str_error_r(errnum, buf, buflen);
+
+ if (err != buf)
+ scnprintf(buf, buflen, "%s", err);
+
+ return 0;
+ }
+
+ if (errnum < __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END)
+ return -1;
+
+ idx = errnum - __DSO_LOAD_ERRNO__START;
+ scnprintf(buf, buflen, "%s", dso_load__error_str[idx]);
+ return 0;
+}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
new file mode 100644
index 000000000..b41c9782c
--- /dev/null
+++ b/tools/perf/util/dso.h
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DSO
+#define __PERF_DSO
+
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/bitops.h>
+#include "build-id.h"
+#include "mutex.h"
+
+struct machine;
+struct map;
+struct perf_env;
+
+#define DSO__NAME_KALLSYMS "[kernel.kallsyms]"
+#define DSO__NAME_KCORE "[kernel.kcore]"
+
+enum dso_binary_type {
+ DSO_BINARY_TYPE__KALLSYMS = 0,
+ DSO_BINARY_TYPE__GUEST_KALLSYMS,
+ DSO_BINARY_TYPE__VMLINUX,
+ DSO_BINARY_TYPE__GUEST_VMLINUX,
+ DSO_BINARY_TYPE__JAVA_JIT,
+ DSO_BINARY_TYPE__DEBUGLINK,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+ DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ DSO_BINARY_TYPE__GUEST_KMODULE,
+ DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+ DSO_BINARY_TYPE__KCORE,
+ DSO_BINARY_TYPE__GUEST_KCORE,
+ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__BPF_PROG_INFO,
+ DSO_BINARY_TYPE__BPF_IMAGE,
+ DSO_BINARY_TYPE__OOL,
+ DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+enum dso_space_type {
+ DSO_SPACE__USER = 0,
+ DSO_SPACE__KERNEL,
+ DSO_SPACE__KERNEL_GUEST
+};
+
+enum dso_swap_type {
+ DSO_SWAP__UNSET,
+ DSO_SWAP__NO,
+ DSO_SWAP__YES,
+};
+
+enum dso_data_status {
+ DSO_DATA_STATUS_ERROR = -1,
+ DSO_DATA_STATUS_UNKNOWN = 0,
+ DSO_DATA_STATUS_OK = 1,
+};
+
+enum dso_data_status_seen {
+ DSO_DATA_STATUS_SEEN_ITRACE,
+};
+
+enum dso_type {
+ DSO__TYPE_UNKNOWN,
+ DSO__TYPE_64BIT,
+ DSO__TYPE_32BIT,
+ DSO__TYPE_X32BIT,
+};
+
+enum dso_load_errno {
+ DSO_LOAD_ERRNO__SUCCESS = 0,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __DSO_LOAD_ERRNO__START = -10000,
+
+ DSO_LOAD_ERRNO__INTERNAL_ERROR = __DSO_LOAD_ERRNO__START,
+
+ /* for symsrc__init() */
+ DSO_LOAD_ERRNO__INVALID_ELF,
+ DSO_LOAD_ERRNO__CANNOT_READ_BUILDID,
+ DSO_LOAD_ERRNO__MISMATCHING_BUILDID,
+
+ /* for decompress_kmodule */
+ DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE,
+
+ __DSO_LOAD_ERRNO__END,
+};
+
+#define DSO__SWAP(dso, type, val) \
+({ \
+ type ____r = val; \
+ BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \
+ if (dso->needs_swap == DSO_SWAP__YES) { \
+ switch (sizeof(____r)) { \
+ case 2: \
+ ____r = bswap_16(val); \
+ break; \
+ case 4: \
+ ____r = bswap_32(val); \
+ break; \
+ case 8: \
+ ____r = bswap_64(val); \
+ break; \
+ default: \
+ BUG_ON(1); \
+ } \
+ } \
+ ____r; \
+})
+
+#define DSO__DATA_CACHE_SIZE 4096
+#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1)
+
+/*
+ * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events
+ */
+struct dso_id {
+ u32 maj;
+ u32 min;
+ u64 ino;
+ u64 ino_generation;
+};
+
+struct dso_cache {
+ struct rb_node rb_node;
+ u64 offset;
+ u64 size;
+ char data[];
+};
+
+struct auxtrace_cache;
+
+struct dso {
+ struct mutex lock;
+ struct list_head node;
+ struct rb_node rb_node; /* rbtree node sorted by long name */
+ struct rb_root *root; /* root of rbtree that rb_node is in */
+ struct rb_root_cached symbols;
+ struct symbol **symbol_names;
+ size_t symbol_names_len;
+ struct rb_root_cached inlined_nodes;
+ struct rb_root_cached srclines;
+ struct {
+ u64 addr;
+ struct symbol *symbol;
+ } last_find_result;
+ void *a2l;
+ char *symsrc_filename;
+ unsigned int a2l_fails;
+ enum dso_space_type kernel;
+ enum dso_swap_type needs_swap;
+ enum dso_binary_type symtab_type;
+ enum dso_binary_type binary_type;
+ enum dso_load_errno load_errno;
+ u8 adjust_symbols:1;
+ u8 has_build_id:1;
+ u8 header_build_id:1;
+ u8 has_srcline:1;
+ u8 hit:1;
+ u8 annotate_warned:1;
+ u8 auxtrace_warned:1;
+ u8 short_name_allocated:1;
+ u8 long_name_allocated:1;
+ u8 is_64_bit:1;
+ bool sorted_by_name;
+ bool loaded;
+ u8 rel;
+ struct build_id bid;
+ u64 text_offset;
+ const char *short_name;
+ const char *long_name;
+ u16 long_name_len;
+ u16 short_name_len;
+ void *dwfl; /* DWARF debug info */
+ struct auxtrace_cache *auxtrace_cache;
+ int comp;
+
+ /* dso data file */
+ struct {
+ struct rb_root cache;
+ int fd;
+ int status;
+ u32 status_seen;
+ u64 file_size;
+ struct list_head open_entry;
+ u64 elf_base_addr;
+ u64 debug_frame_offset;
+ u64 eh_frame_hdr_addr;
+ u64 eh_frame_hdr_offset;
+ } data;
+ /* bpf prog information */
+ struct {
+ u32 id;
+ u32 sub_id;
+ struct perf_env *env;
+ } bpf_prog;
+
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+ struct nsinfo *nsinfo;
+ struct dso_id id;
+ refcount_t refcnt;
+ char name[];
+};
+
+/* dso__for_each_symbol - iterate over the symbols of given type
+ *
+ * @dso: the 'struct dso *' in which symbols are iterated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ */
+#define dso__for_each_symbol(dso, pos, n) \
+ symbols__for_each_entry(&(dso)->symbols, pos, n)
+
+#define dsos__for_each_with_build_id(pos, head) \
+ list_for_each_entry(pos, head, node) \
+ if (!pos->has_build_id) \
+ continue; \
+ else
+
+static inline void dso__set_loaded(struct dso *dso)
+{
+ dso->loaded = true;
+}
+
+struct dso *dso__new_id(const char *name, struct dso_id *id);
+struct dso *dso__new(const char *name);
+void dso__delete(struct dso *dso);
+
+int dso__cmp_id(struct dso *a, struct dso *b);
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
+
+int dso__name_len(const struct dso *dso);
+
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+ dso__put(*dso);
+ *dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
+bool dso__loaded(const struct dso *dso);
+
+static inline bool dso__has_symbols(const struct dso *dso)
+{
+ return !RB_EMPTY_ROOT(&dso->symbols.rb_root);
+}
+
+char *dso__filename_with_chroot(const struct dso *dso, const char *filename);
+
+bool dso__sorted_by_name(const struct dso *dso);
+void dso__set_sorted_by_name(struct dso *dso);
+void dso__sort_by_name(struct dso *dso);
+
+void dso__set_build_id(struct dso *dso, struct build_id *bid);
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
+void dso__read_running_kernel_build_id(struct dso *dso,
+ struct machine *machine);
+int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
+
+char dso__symtab_origin(const struct dso *dso);
+int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
+ char *root_dir, char *filename, size_t size);
+bool is_kernel_module(const char *pathname, int cpumode);
+bool dso__needs_decompress(struct dso *dso);
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+ char *pathname, size_t len);
+int filename__decompress(const char *name, char *pathname,
+ size_t len, int comp, int *err);
+
+#define KMOD_DECOMP_NAME "/tmp/perf-kmod-XXXXXX"
+#define KMOD_DECOMP_LEN sizeof(KMOD_DECOMP_NAME)
+
+struct kmod_path {
+ char *name;
+ int comp;
+ bool kmod;
+};
+
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+ bool alloc_name);
+
+#define kmod_path__parse(__m, __p) __kmod_path__parse(__m, __p, false)
+#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true)
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+ struct machine *machine);
+
+/*
+ * The dso__data_* external interface provides following functions:
+ * dso__data_get_fd
+ * dso__data_put_fd
+ * dso__data_close
+ * dso__data_size
+ * dso__data_read_offset
+ * dso__data_read_addr
+ * dso__data_write_cache_offs
+ * dso__data_write_cache_addr
+ *
+ * Please refer to the dso.c object code for each function and
+ * arguments documentation. Following text tries to explain the
+ * dso file descriptor caching.
+ *
+ * The dso__data* interface allows caching of opened file descriptors
+ * to speed up the dso data accesses. The idea is to leave the file
+ * descriptor opened ideally for the whole life of the dso object.
+ *
+ * The current usage of the dso__data_* interface is as follows:
+ *
+ * Get DSO's fd:
+ * int fd = dso__data_get_fd(dso, machine);
+ * if (fd >= 0) {
+ * USE 'fd' SOMEHOW
+ * dso__data_put_fd(dso);
+ * }
+ *
+ * Read DSO's data:
+ * n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
+ * n = dso__data_read_addr(dso_0, &machine, 0, buf, BUFSIZE);
+ *
+ * Eventually close DSO's fd:
+ * dso__data_close(dso);
+ *
+ * It is not necessary to close the DSO object data file. Each time new
+ * DSO data file is opened, the limit (RLIMIT_NOFILE/2) is checked. Once
+ * it is crossed, the oldest opened DSO object is closed.
+ *
+ * The dso__delete function calls close_dso function to ensure the
+ * data file descriptor gets closed/unmapped before the dso object
+ * is freed.
+ *
+ * TODO
+*/
+int dso__data_get_fd(struct dso *dso, struct machine *machine);
+void dso__data_put_fd(struct dso *dso);
+void dso__data_close(struct dso *dso);
+
+int dso__data_file_size(struct dso *dso, struct machine *machine);
+off_t dso__data_size(struct dso *dso, struct machine *machine);
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size);
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+ struct machine *machine, u64 addr,
+ u8 *data, ssize_t size);
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
+ssize_t dso__data_write_cache_offs(struct dso *dso, struct machine *machine,
+ u64 offset, const u8 *data, ssize_t size);
+ssize_t dso__data_write_cache_addr(struct dso *dso, struct map *map,
+ struct machine *machine, u64 addr,
+ const u8 *data, ssize_t size);
+
+struct map *dso__new_map(const char *name);
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+ const char *short_name, int dso_type);
+
+void dso__reset_find_symbol_cache(struct dso *dso);
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
+size_t dso__fprintf(struct dso *dso, FILE *fp);
+
+static inline bool dso__is_vmlinux(const struct dso *dso)
+{
+ return dso->binary_type == DSO_BINARY_TYPE__VMLINUX ||
+ dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
+}
+
+static inline bool dso__is_kcore(const struct dso *dso)
+{
+ return dso->binary_type == DSO_BINARY_TYPE__KCORE ||
+ dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
+}
+
+static inline bool dso__is_kallsyms(const struct dso *dso)
+{
+ return dso->kernel && dso->long_name[0] != '/';
+}
+
+bool dso__is_object_file(const struct dso *dso);
+
+void dso__free_a2l(struct dso *dso);
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine);
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen);
+
+void reset_fd_limit(void);
+
+#endif /* __PERF_DSO */
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
new file mode 100644
index 000000000..cf80aa42d
--- /dev/null
+++ b/tools/perf/util/dsos.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "dsos.h"
+#include "dso.h"
+#include "util.h"
+#include "vdso.h"
+#include "namespaces.h"
+#include <errno.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <string.h>
+#include <symbol.h> // filename__read_build_id
+#include <unistd.h>
+
+static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
+{
+ if (a->maj > b->maj) return -1;
+ if (a->maj < b->maj) return 1;
+
+ if (a->min > b->min) return -1;
+ if (a->min < b->min) return 1;
+
+ if (a->ino > b->ino) return -1;
+ if (a->ino < b->ino) return 1;
+
+ /*
+ * Synthesized MMAP events have zero ino_generation, avoid comparing
+ * them with MMAP events with actual ino_generation.
+ *
+ * I found it harmful because the mismatch resulted in a new
+ * dso that did not have a build ID whereas the original dso did have a
+ * build ID. The build ID was essential because the object was not found
+ * otherwise. - Adrian
+ */
+ if (a->ino_generation && b->ino_generation) {
+ if (a->ino_generation > b->ino_generation) return -1;
+ if (a->ino_generation < b->ino_generation) return 1;
+ }
+
+ return 0;
+}
+
+static bool dso_id__empty(struct dso_id *id)
+{
+ if (!id)
+ return true;
+
+ return !id->maj && !id->min && !id->ino && !id->ino_generation;
+}
+
+static void dso__inject_id(struct dso *dso, struct dso_id *id)
+{
+ dso->id.maj = id->maj;
+ dso->id.min = id->min;
+ dso->id.ino = id->ino;
+ dso->id.ino_generation = id->ino_generation;
+}
+
+static int dso_id__cmp(struct dso_id *a, struct dso_id *b)
+{
+ /*
+ * The second is always dso->id, so zeroes if not set, assume passing
+ * NULL for a means a zeroed id
+ */
+ if (dso_id__empty(a) || dso_id__empty(b))
+ return 0;
+
+ return __dso_id__cmp(a, b);
+}
+
+int dso__cmp_id(struct dso *a, struct dso *b)
+{
+ return __dso_id__cmp(&a->id, &b->id);
+}
+
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
+{
+ bool have_build_id = false;
+ struct dso *pos;
+ struct nscookie nsc;
+
+ list_for_each_entry(pos, head, node) {
+ if (with_hits && !pos->hit && !dso__is_vdso(pos))
+ continue;
+ if (pos->has_build_id) {
+ have_build_id = true;
+ continue;
+ }
+ nsinfo__mountns_enter(pos->nsinfo, &nsc);
+ if (filename__read_build_id(pos->long_name, &pos->bid) > 0) {
+ have_build_id = true;
+ pos->has_build_id = true;
+ } else if (errno == ENOENT && pos->nsinfo) {
+ char *new_name = dso__filename_with_chroot(pos, pos->long_name);
+
+ if (new_name && filename__read_build_id(new_name,
+ &pos->bid) > 0) {
+ have_build_id = true;
+ pos->has_build_id = true;
+ }
+ free(new_name);
+ }
+ nsinfo__mountns_exit(&nsc);
+ }
+
+ return have_build_id;
+}
+
+static int __dso__cmp_long_name(const char *long_name, struct dso_id *id, struct dso *b)
+{
+ int rc = strcmp(long_name, b->long_name);
+ return rc ?: dso_id__cmp(id, &b->id);
+}
+
+static int __dso__cmp_short_name(const char *short_name, struct dso_id *id, struct dso *b)
+{
+ int rc = strcmp(short_name, b->short_name);
+ return rc ?: dso_id__cmp(id, &b->id);
+}
+
+static int dso__cmp_short_name(struct dso *a, struct dso *b)
+{
+ return __dso__cmp_short_name(a->short_name, &a->id, b);
+}
+
+/*
+ * Find a matching entry and/or link current entry to RB tree.
+ * Either one of the dso or name parameter must be non-NULL or the
+ * function will not work.
+ */
+struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso,
+ const char *name, struct dso_id *id)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ if (!name)
+ name = dso->long_name;
+ /*
+ * Find node with the matching name
+ */
+ while (*p) {
+ struct dso *this = rb_entry(*p, struct dso, rb_node);
+ int rc = __dso__cmp_long_name(name, id, this);
+
+ parent = *p;
+ if (rc == 0) {
+ /*
+ * In case the new DSO is a duplicate of an existing
+ * one, print a one-time warning & put the new entry
+ * at the end of the list of duplicates.
+ */
+ if (!dso || (dso == this))
+ return this; /* Find matching dso */
+ /*
+ * The core kernel DSOs may have duplicated long name.
+ * In this case, the short name should be different.
+ * Comparing the short names to differentiate the DSOs.
+ */
+ rc = dso__cmp_short_name(dso, this);
+ if (rc == 0) {
+ pr_err("Duplicated dso name: %s\n", name);
+ return NULL;
+ }
+ }
+ if (rc < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ if (dso) {
+ /* Add new node and rebalance tree */
+ rb_link_node(&dso->rb_node, parent, p);
+ rb_insert_color(&dso->rb_node, root);
+ dso->root = root;
+ }
+ return NULL;
+}
+
+void __dsos__add(struct dsos *dsos, struct dso *dso)
+{
+ list_add_tail(&dso->node, &dsos->head);
+ __dsos__findnew_link_by_longname_id(&dsos->root, dso, NULL, &dso->id);
+ /*
+ * It is now in the linked list, grab a reference, then garbage collect
+ * this when needing memory, by looking at LRU dso instances in the
+ * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+ * anywhere besides the one for the list, do, under a lock for the
+ * list: remove it from the list, then a dso__put(), that probably will
+ * be the last and will then call dso__delete(), end of life.
+ *
+ * That, or at the end of the 'struct machine' lifetime, when all
+ * 'struct dso' instances will be removed from the list, in
+ * dsos__exit(), if they have no other reference from some other data
+ * structure.
+ *
+ * E.g.: after processing a 'perf.data' file and storing references
+ * to objects instantiated while processing events, we will have
+ * references to the 'thread', 'map', 'dso' structs all from 'struct
+ * hist_entry' instances, but we may not need anything not referenced,
+ * so we might as well call machines__exit()/machines__delete() and
+ * garbage collect it.
+ */
+ dso__get(dso);
+}
+
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+ down_write(&dsos->lock);
+ __dsos__add(dsos, dso);
+ up_write(&dsos->lock);
+}
+
+static struct dso *__dsos__findnew_by_longname_id(struct rb_root *root, const char *name, struct dso_id *id)
+{
+ return __dsos__findnew_link_by_longname_id(root, NULL, name, id);
+}
+
+static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, bool cmp_short)
+{
+ struct dso *pos;
+
+ if (cmp_short) {
+ list_for_each_entry(pos, &dsos->head, node)
+ if (__dso__cmp_short_name(name, id, pos) == 0)
+ return pos;
+ return NULL;
+ }
+ return __dsos__findnew_by_longname_id(&dsos->root, name, id);
+}
+
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+ return __dsos__find_id(dsos, name, NULL, cmp_short);
+}
+
+static void dso__set_basename(struct dso *dso)
+{
+ char *base, *lname;
+ int tid;
+
+ if (sscanf(dso->long_name, "/tmp/perf-%d.map", &tid) == 1) {
+ if (asprintf(&base, "[JIT] tid %d", tid) < 0)
+ return;
+ } else {
+ /*
+ * basename() may modify path buffer, so we must pass
+ * a copy.
+ */
+ lname = strdup(dso->long_name);
+ if (!lname)
+ return;
+
+ /*
+ * basename() may return a pointer to internal
+ * storage which is reused in subsequent calls
+ * so copy the result.
+ */
+ base = strdup(basename(lname));
+
+ free(lname);
+
+ if (!base)
+ return;
+ }
+ dso__set_short_name(dso, base, true);
+}
+
+static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+{
+ struct dso *dso = dso__new_id(name, id);
+
+ if (dso != NULL) {
+ __dsos__add(dsos, dso);
+ dso__set_basename(dso);
+ /* Put dso here because __dsos_add already got it */
+ dso__put(dso);
+ }
+ return dso;
+}
+
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
+{
+ return __dsos__addnew_id(dsos, name, NULL);
+}
+
+static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+{
+ struct dso *dso = __dsos__find_id(dsos, name, id, false);
+
+ if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id))
+ dso__inject_id(dso, id);
+
+ return dso ? dso : __dsos__addnew_id(dsos, name, id);
+}
+
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+{
+ struct dso *dso;
+ down_write(&dsos->lock);
+ dso = dso__get(__dsos__findnew_id(dsos, name, id));
+ up_write(&dsos->lock);
+ return dso;
+}
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ struct dso *pos;
+ size_t ret = 0;
+
+ list_for_each_entry(pos, head, node) {
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ if (skip && skip(pos, parm))
+ continue;
+ build_id__sprintf(&pos->bid, sbuild_id);
+ ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name);
+ }
+ return ret;
+}
+
+size_t __dsos__fprintf(struct list_head *head, FILE *fp)
+{
+ struct dso *pos;
+ size_t ret = 0;
+
+ list_for_each_entry(pos, head, node) {
+ ret += dso__fprintf(pos, fp);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h
new file mode 100644
index 000000000..5dbec2bc6
--- /dev/null
+++ b/tools/perf/util/dsos.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DSOS
+#define __PERF_DSOS
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "rwsem.h"
+
+struct dso;
+struct dso_id;
+
+/*
+ * DSOs are put into both a list for fast iteration and rbtree for fast
+ * long name lookup.
+ */
+struct dsos {
+ struct list_head head;
+ struct rb_root root; /* rbtree root sorted by long name */
+ struct rw_semaphore lock;
+};
+
+void __dsos__add(struct dsos *dsos, struct dso *dso);
+void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id);
+
+struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso,
+ const char *name, struct dso_id *id);
+
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm);
+size_t __dsos__fprintf(struct list_head *head, FILE *fp);
+
+#endif /* __PERF_DSOS */
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
new file mode 100644
index 000000000..2bd8585db
--- /dev/null
+++ b/tools/perf/util/dump-insn.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "dump-insn.h"
+
+/* Fallback code */
+
+__weak
+const char *dump_insn(struct perf_insn *x __maybe_unused,
+ u64 ip __maybe_unused, u8 *inbuf __maybe_unused,
+ int inlen __maybe_unused, int *lenp)
+{
+ if (lenp)
+ *lenp = 0;
+ return "?";
+}
+
+__weak
+int arch_is_branch(const unsigned char *buf __maybe_unused,
+ size_t len __maybe_unused,
+ int x86_64 __maybe_unused)
+{
+ return 0;
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
new file mode 100644
index 000000000..650125061
--- /dev/null
+++ b/tools/perf/util/dump-insn.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DUMP_INSN_H
+#define __PERF_DUMP_INSN_H 1
+
+#define MAXINSN 15
+
+#include <linux/types.h>
+
+struct thread;
+
+struct perf_insn {
+ /* Initialized by callers: */
+ struct thread *thread;
+ u8 cpumode;
+ bool is64bit;
+ int cpu;
+ /* Temporary */
+ char out[256];
+};
+
+const char *dump_insn(struct perf_insn *x, u64 ip,
+ u8 *inbuf, int inlen, int *lenp);
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+
+#endif
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
new file mode 100644
index 000000000..2941d88f2
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.c
@@ -0,0 +1,1427 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * dwarf-aux.c : libdw auxiliary interfaces
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include "debug.h"
+#include "dwarf-aux.h"
+#include "strbuf.h"
+#include "string2.h"
+
+/**
+ * cu_find_realpath - Find the realpath of the target file
+ * @cu_die: A DIE(dwarf information entry) of CU(compilation Unit)
+ * @fname: The tail filename of the target file
+ *
+ * Find the real(long) path of @fname in @cu_die.
+ */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
+{
+ Dwarf_Files *files;
+ size_t nfiles, i;
+ const char *src = NULL;
+ int ret;
+
+ if (!fname)
+ return NULL;
+
+ ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
+ if (ret != 0)
+ return NULL;
+
+ for (i = 0; i < nfiles; i++) {
+ src = dwarf_filesrc(files, i, NULL, NULL);
+ if (strtailcmp(src, fname) == 0)
+ break;
+ }
+ if (i == nfiles)
+ return NULL;
+ return src;
+}
+
+/**
+ * cu_get_comp_dir - Get the path of compilation directory
+ * @cu_die: a CU DIE
+ *
+ * Get the path of compilation directory of given @cu_die.
+ * Since this depends on DW_AT_comp_dir, older gcc will not
+ * embedded it. In that case, this returns NULL.
+ */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die)
+{
+ Dwarf_Attribute attr;
+ if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
+ return NULL;
+ return dwarf_formstring(&attr);
+}
+
+/* Unlike dwarf_getsrc_die(), cu_getsrc_die() only returns statement line */
+static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr)
+{
+ Dwarf_Addr laddr;
+ Dwarf_Lines *lines;
+ Dwarf_Line *line;
+ size_t nlines, l, u, n;
+ bool flag;
+
+ if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0 ||
+ nlines == 0)
+ return NULL;
+
+ /* Lines are sorted by address, use binary search */
+ l = 0; u = nlines - 1;
+ while (l < u) {
+ n = u - (u - l) / 2;
+ line = dwarf_onesrcline(lines, n);
+ if (!line || dwarf_lineaddr(line, &laddr) != 0)
+ return NULL;
+ if (addr < laddr)
+ u = n - 1;
+ else
+ l = n;
+ }
+ /* Going backward to find the lowest line */
+ do {
+ line = dwarf_onesrcline(lines, --l);
+ if (!line || dwarf_lineaddr(line, &laddr) != 0)
+ return NULL;
+ } while (laddr == addr);
+ l++;
+ /* Going forward to find the statement line */
+ do {
+ line = dwarf_onesrcline(lines, l++);
+ if (!line || dwarf_lineaddr(line, &laddr) != 0 ||
+ dwarf_linebeginstatement(line, &flag) != 0)
+ return NULL;
+ if (laddr > addr)
+ return NULL;
+ } while (!flag);
+
+ return line;
+}
+
+/**
+ * cu_find_lineinfo - Get a line number and file name for given address
+ * @cu_die: a CU DIE
+ * @addr: An address
+ * @fname: a pointer which returns the file name string
+ * @lineno: a pointer which returns the line number
+ *
+ * Find a line number and file name for @addr in @cu_die.
+ */
+int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ const char **fname, int *lineno)
+{
+ Dwarf_Line *line;
+ Dwarf_Die die_mem;
+ Dwarf_Addr faddr;
+
+ if (die_find_realfunc(cu_die, addr, &die_mem)
+ && die_entrypc(&die_mem, &faddr) == 0 &&
+ faddr == addr) {
+ *fname = die_get_decl_file(&die_mem);
+ dwarf_decl_line(&die_mem, lineno);
+ goto out;
+ }
+
+ line = cu_getsrc_die(cu_die, addr);
+ if (line && dwarf_lineno(line, lineno) == 0) {
+ *fname = dwarf_linesrc(line, NULL, NULL);
+ if (!*fname)
+ /* line number is useless without filename */
+ *lineno = 0;
+ }
+
+out:
+ return (*lineno && *fname) ? *lineno : -ENOENT;
+}
+
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
+
+/**
+ * cu_walk_functions_at - Walk on function DIEs at given address
+ * @cu_die: A CU DIE
+ * @addr: An address
+ * @callback: A callback which called with found DIEs
+ * @data: A user data
+ *
+ * Walk on function DIEs at given @addr in @cu_die. Passed DIEs
+ * should be subprogram or inlined-subroutines.
+ */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ int (*callback)(Dwarf_Die *, void *), void *data)
+{
+ Dwarf_Die die_mem;
+ Dwarf_Die *sc_die;
+ int ret = -ENOENT;
+
+ /* Inlined function could be recursive. Trace it until fail */
+ for (sc_die = die_find_realfunc(cu_die, addr, &die_mem);
+ sc_die != NULL;
+ sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr,
+ &die_mem)) {
+ ret = callback(sc_die, data);
+ if (ret)
+ break;
+ }
+
+ return ret;
+
+}
+
+/**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+ return NULL;
+ return dwarf_formstring(&attr);
+}
+
+/**
+ * die_compare_name - Compare diename and tname
+ * @dw_die: a DIE
+ * @tname: a string of target name
+ *
+ * Compare the name of @dw_die and @tname. Return false if @dw_die has no name.
+ */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
+{
+ const char *name;
+
+ name = dwarf_diename(dw_die);
+ return name ? (strcmp(tname, name) == 0) : false;
+}
+
+/**
+ * die_match_name - Match diename/linkage name and glob
+ * @dw_die: a DIE
+ * @glob: a string of target glob pattern
+ *
+ * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
+ */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob)
+{
+ const char *name;
+
+ name = dwarf_diename(dw_die);
+ if (name && strglobmatch(name, glob))
+ return true;
+ /* fall back to check linkage name */
+ name = die_get_linkage_name(dw_die);
+ if (name && strglobmatch(name, glob))
+ return true;
+
+ return false;
+}
+
+/**
+ * die_get_call_lineno - Get callsite line number of inline-function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site line number of @in_die. This means from where the inline
+ * function is called.
+ */
+int die_get_call_lineno(Dwarf_Die *in_die)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Word ret;
+
+ if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+ return -ENOENT;
+
+ dwarf_formudata(&attr, &ret);
+ return (int)ret;
+}
+
+/**
+ * die_get_type - Get type DIE
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
+ dwarf_formref_die(&attr, die_mem))
+ return die_mem;
+ else
+ return NULL;
+}
+
+/* Get a type die, but skip qualifiers */
+static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+ int tag;
+
+ do {
+ vr_die = die_get_type(vr_die, die_mem);
+ if (!vr_die)
+ break;
+ tag = dwarf_tag(vr_die);
+ } while (tag == DW_TAG_const_type ||
+ tag == DW_TAG_restrict_type ||
+ tag == DW_TAG_volatile_type ||
+ tag == DW_TAG_shared_type);
+
+ return vr_die;
+}
+
+/**
+ * die_get_real_type - Get a type die, but skip qualifiers and typedef
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ * If the type is qualifiers (e.g. const) or typedef, this skips it
+ * and tries to find real type (structure or basic types, e.g. int).
+ */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+ do {
+ vr_die = __die_get_real_type(vr_die, die_mem);
+ } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
+
+ return vr_die;
+}
+
+/* Get attribute and translate it as a udata */
+static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ Dwarf_Word *result)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formudata(&attr, result) != 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+/**
+ * die_is_signed_type - Check whether a type DIE is signed or not
+ * @tp_die: a DIE of a type
+ *
+ * Get the encoding of @tp_die and return true if the encoding
+ * is signed.
+ */
+bool die_is_signed_type(Dwarf_Die *tp_die)
+{
+ Dwarf_Word ret;
+
+ if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
+ return false;
+
+ return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
+ ret == DW_ATE_signed_fixed);
+}
+
+/**
+ * die_is_func_def - Ensure that this DIE is a subprogram and definition
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is a subprogram and NOT a declaration. This
+ * returns true if @dw_die is a function definition.
+ **/
+bool die_is_func_def(Dwarf_Die *dw_die)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Addr addr = 0;
+
+ if (dwarf_tag(dw_die) != DW_TAG_subprogram)
+ return false;
+
+ if (dwarf_attr(dw_die, DW_AT_declaration, &attr))
+ return false;
+
+ /*
+ * DW_AT_declaration can be lost from function declaration
+ * by gcc's bug #97060.
+ * So we need to check this subprogram DIE has DW_AT_inline
+ * or an entry address.
+ */
+ if (!dwarf_attr(dw_die, DW_AT_inline, &attr) &&
+ die_entrypc(dw_die, &addr) < 0)
+ return false;
+
+ return true;
+}
+
+/**
+ * die_entrypc - Returns entry PC (the lowest address) of a DIE
+ * @dw_die: a DIE
+ * @addr: where to store entry PC
+ *
+ * Since dwarf_entrypc() does not return entry PC if the DIE has only address
+ * range, we have to use this to retrieve the lowest address from the address
+ * range attribute.
+ */
+int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr)
+{
+ Dwarf_Addr base, end;
+ Dwarf_Attribute attr;
+
+ if (!addr)
+ return -EINVAL;
+
+ if (dwarf_entrypc(dw_die, addr) == 0)
+ return 0;
+
+ /*
+ * Since the dwarf_ranges() will return 0 if there is no
+ * DW_AT_ranges attribute, we should check it first.
+ */
+ if (!dwarf_attr(dw_die, DW_AT_ranges, &attr))
+ return -ENOENT;
+
+ return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0;
+}
+
+/**
+ * die_is_func_instance - Ensure that this DIE is an instance of a subprogram
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is an instance (which has an entry address).
+ * This returns true if @dw_die is a function instance. If not, the @dw_die
+ * must be a prototype. You can use die_walk_instances() to find actual
+ * instances.
+ **/
+bool die_is_func_instance(Dwarf_Die *dw_die)
+{
+ Dwarf_Addr tmp;
+ Dwarf_Attribute attr_mem;
+ int tag = dwarf_tag(dw_die);
+
+ if (tag != DW_TAG_subprogram &&
+ tag != DW_TAG_inlined_subroutine)
+ return false;
+
+ return dwarf_entrypc(dw_die, &tmp) == 0 ||
+ dwarf_attr(dw_die, DW_AT_ranges, &attr_mem) != NULL;
+}
+
+/**
+ * die_get_data_member_location - Get the data-member offset
+ * @mb_die: a DIE of a member of a data structure
+ * @offs: The offset of the member in the data structure
+ *
+ * Get the offset of @mb_die in the data structure including @mb_die, and
+ * stores result offset to @offs. If any error occurs this returns errno.
+ */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Op *expr;
+ size_t nexpr;
+ int ret;
+
+ if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
+ return -ENOENT;
+
+ if (dwarf_formudata(&attr, offs) != 0) {
+ /* DW_AT_data_member_location should be DW_OP_plus_uconst */
+ ret = dwarf_getlocation(&attr, &expr, &nexpr);
+ if (ret < 0 || nexpr == 0)
+ return -ENOENT;
+
+ if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
+ pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
+ expr[0].atom, nexpr);
+ return -ENOTSUP;
+ }
+ *offs = (Dwarf_Word)expr[0].number;
+ }
+ return 0;
+}
+
+/* Get the call file index number in CU DIE */
+static int die_get_call_fileno(Dwarf_Die *in_die)
+{
+ Dwarf_Word idx;
+
+ if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+}
+
+/* Get the declared file index number in CU DIE */
+static int die_get_decl_fileno(Dwarf_Die *pdie)
+{
+ Dwarf_Word idx;
+
+ if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+}
+
+/* Return the file name by index */
+static const char *die_get_file_name(Dwarf_Die *dw_die, int idx)
+{
+ Dwarf_Die cu_die;
+ Dwarf_Files *files;
+ Dwarf_Attribute attr_mem;
+
+ if (idx < 0 || !dwarf_attr_integrate(dw_die, DW_AT_decl_file, &attr_mem) ||
+ !dwarf_cu_die(attr_mem.cu, &cu_die, NULL, NULL, NULL, NULL, NULL, NULL) ||
+ dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
+ return NULL;
+
+ return dwarf_filesrc(files, idx, NULL, NULL);
+}
+
+/**
+ * die_get_call_file - Get callsite file name of inlined function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site file name of @in_die. This means from which file the inline
+ * function is called.
+ */
+const char *die_get_call_file(Dwarf_Die *in_die)
+{
+ return die_get_file_name(in_die, die_get_call_fileno(in_die));
+}
+
+/**
+ * die_get_decl_file - Find the declared file name of this DIE
+ * @dw_die: a DIE for something declared.
+ *
+ * Get declared file name of @dw_die.
+ * NOTE: Since some version of clang DWARF5 implementation incorrectly uses
+ * file index 0 for DW_AT_decl_file, die_get_decl_file() will return NULL for
+ * such cases. Use this function instead.
+ */
+const char *die_get_decl_file(Dwarf_Die *dw_die)
+{
+ return die_get_file_name(dw_die, die_get_decl_fileno(dw_die));
+}
+
+/**
+ * die_find_child - Generic DIE search function in DIE tree
+ * @rt_die: a root DIE
+ * @callback: a callback function
+ * @data: a user data passed to the callback function
+ * @die_mem: a buffer for result DIE
+ *
+ * Trace DIE tree from @rt_die and call @callback for each child DIE.
+ * If @callback returns DIE_FIND_CB_END, this stores the DIE into
+ * @die_mem and returns it. If @callback returns DIE_FIND_CB_CONTINUE,
+ * this continues to trace the tree. Optionally, @callback can return
+ * DIE_FIND_CB_CHILD and DIE_FIND_CB_SIBLING, those means trace only
+ * the children and trace only the siblings respectively.
+ * Returns NULL if @callback can't find any appropriate DIE.
+ */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+ int (*callback)(Dwarf_Die *, void *),
+ void *data, Dwarf_Die *die_mem)
+{
+ Dwarf_Die child_die;
+ int ret;
+
+ ret = dwarf_child(rt_die, die_mem);
+ if (ret != 0)
+ return NULL;
+
+ do {
+ ret = callback(die_mem, data);
+ if (ret == DIE_FIND_CB_END)
+ return die_mem;
+
+ if ((ret & DIE_FIND_CB_CHILD) &&
+ die_find_child(die_mem, callback, data, &child_die)) {
+ memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
+ return die_mem;
+ }
+ } while ((ret & DIE_FIND_CB_SIBLING) &&
+ dwarf_siblingof(die_mem, die_mem) == 0);
+
+ return NULL;
+}
+
+struct __addr_die_search_param {
+ Dwarf_Addr addr;
+ Dwarf_Die *die_mem;
+};
+
+static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct __addr_die_search_param *ad = data;
+ Dwarf_Addr addr = 0;
+
+ if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+ !dwarf_highpc(fn_die, &addr) &&
+ addr == ad->addr) {
+ memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+ return DWARF_CB_ABORT;
+ }
+ return DWARF_CB_OK;
+}
+
+/**
+ * die_find_tailfunc - Search for a non-inlined function with tail call at
+ * given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search for a non-inlined function DIE with tail call at @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ struct __addr_die_search_param ad;
+ ad.addr = addr;
+ ad.die_mem = die_mem;
+ /* dwarf_getscopes can't find subprogram. */
+ if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
+ return NULL;
+ else
+ return die_mem;
+}
+
+/* die_find callback for non-inlined function search */
+static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct __addr_die_search_param *ad = data;
+
+ /*
+ * Since a declaration entry doesn't has given pc, this always returns
+ * function definition entry.
+ */
+ if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+ dwarf_haspc(fn_die, ad->addr)) {
+ memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+ return DWARF_CB_ABORT;
+ }
+ return DWARF_CB_OK;
+}
+
+/**
+ * die_find_realfunc - Search a non-inlined function at given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search a non-inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ struct __addr_die_search_param ad;
+ ad.addr = addr;
+ ad.die_mem = die_mem;
+ /* dwarf_getscopes can't find subprogram. */
+ if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
+ return NULL;
+ else
+ return die_mem;
+}
+
+/* die_find callback for inline function search */
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
+{
+ Dwarf_Addr *addr = data;
+
+ if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
+ dwarf_haspc(die_mem, *addr))
+ return DIE_FIND_CB_END;
+
+ return DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_find_top_inlinefunc - Search the top inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * Even if several inlined functions are expanded recursively, this
+ * doesn't trace it down, and returns the topmost one.
+ */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
+}
+
+/**
+ * die_find_inlinefunc - Search an inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * If several inlined functions are expanded recursively, this trace
+ * it down and returns deepest one.
+ */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ Dwarf_Die tmp_die;
+
+ sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
+ if (!sp_die)
+ return NULL;
+
+ /* Inlined function could be recursive. Trace it until fail */
+ while (sp_die) {
+ memcpy(die_mem, sp_die, sizeof(Dwarf_Die));
+ sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
+ &tmp_die);
+ }
+
+ return die_mem;
+}
+
+struct __instance_walk_param {
+ void *addr;
+ int (*callback)(Dwarf_Die *, void *);
+ void *data;
+ int retval;
+};
+
+static int __die_walk_instances_cb(Dwarf_Die *inst, void *data)
+{
+ struct __instance_walk_param *iwp = data;
+ Dwarf_Attribute attr_mem;
+ Dwarf_Die origin_mem;
+ Dwarf_Attribute *attr;
+ Dwarf_Die *origin;
+ int tmp;
+
+ if (!die_is_func_instance(inst))
+ return DIE_FIND_CB_CONTINUE;
+
+ attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem);
+ if (attr == NULL)
+ return DIE_FIND_CB_CONTINUE;
+
+ origin = dwarf_formref_die(attr, &origin_mem);
+ if (origin == NULL || origin->addr != iwp->addr)
+ return DIE_FIND_CB_CONTINUE;
+
+ /* Ignore redundant instances */
+ if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) {
+ dwarf_decl_line(origin, &tmp);
+ if (die_get_call_lineno(inst) == tmp) {
+ tmp = die_get_decl_fileno(origin);
+ if (die_get_call_fileno(inst) == tmp)
+ return DIE_FIND_CB_CONTINUE;
+ }
+ }
+
+ iwp->retval = iwp->callback(inst, iwp->data);
+
+ return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_walk_instances - Walk on instances of given DIE
+ * @or_die: an abstract original DIE
+ * @callback: a callback function which is called with instance DIE
+ * @data: user data
+ *
+ * Walk on the instances of give @in_die. @in_die must be an inlined function
+ * declaration. This returns the return value of @callback if it returns
+ * non-zero value, or -ENOENT if there is no instance.
+ */
+int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
+ void *data)
+{
+ Dwarf_Die cu_die;
+ Dwarf_Die die_mem;
+ struct __instance_walk_param iwp = {
+ .addr = or_die->addr,
+ .callback = callback,
+ .data = data,
+ .retval = -ENOENT,
+ };
+
+ if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL)
+ return -ENOENT;
+
+ die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem);
+
+ return iwp.retval;
+}
+
+/* Line walker internal parameters */
+struct __line_walk_param {
+ bool recursive;
+ line_walk_callback_t callback;
+ void *data;
+ int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+ Dwarf_Addr addr = 0;
+ const char *fname;
+ int lineno;
+
+ if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+ fname = die_get_call_file(in_die);
+ lineno = die_get_call_lineno(in_die);
+ if (fname && lineno > 0 && die_entrypc(in_die, &addr) == 0) {
+ lw->retval = lw->callback(fname, lineno, addr, lw->data);
+ if (lw->retval != 0)
+ return DIE_FIND_CB_END;
+ }
+ if (!lw->recursive)
+ return DIE_FIND_CB_SIBLING;
+ }
+
+ if (addr) {
+ fname = die_get_decl_file(in_die);
+ if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
+ lw->retval = lw->callback(fname, lineno, addr, lw->data);
+ if (lw->retval != 0)
+ return DIE_FIND_CB_END;
+ }
+ }
+
+ /* Continue to search nested inlined function call-sites */
+ return DIE_FIND_CB_CONTINUE;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
+ line_walk_callback_t callback, void *data)
+{
+ struct __line_walk_param lw = {
+ .recursive = recursive,
+ .callback = callback,
+ .data = data,
+ .retval = 0,
+ };
+ Dwarf_Die die_mem;
+ Dwarf_Addr addr;
+ const char *fname;
+ int lineno;
+
+ /* Handle function declaration line */
+ fname = die_get_decl_file(sp_die);
+ if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+ die_entrypc(sp_die, &addr) == 0) {
+ lw.retval = callback(fname, lineno, addr, data);
+ if (lw.retval != 0)
+ goto done;
+ }
+ die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+ return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+
+ /*
+ * Since inlined function can include another inlined function in
+ * the same file, we need to walk in it recursively.
+ */
+ lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data);
+ if (lw->retval != 0)
+ return DWARF_CB_ABORT;
+
+ return DWARF_CB_OK;
+}
+
+/**
+ * die_walk_lines - Walk on lines inside given DIE
+ * @rt_die: a root DIE (CU, subprogram or inlined_subroutine)
+ * @callback: callback routine
+ * @data: user data
+ *
+ * Walk on all lines inside given @rt_die and call @callback on each line.
+ * If the @rt_die is a function, walk only on the lines inside the function,
+ * otherwise @rt_die must be a CU DIE.
+ * Note that this walks not only dwarf line list, but also function entries
+ * and inline call-site.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
+{
+ Dwarf_Lines *lines;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ const char *fname, *decf = NULL, *inf = NULL;
+ int lineno, ret = 0;
+ int decl = 0, inl;
+ Dwarf_Die die_mem, *cu_die;
+ size_t nlines, i;
+ bool flag;
+
+ /* Get the CU die */
+ if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
+ cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
+ dwarf_decl_line(rt_die, &decl);
+ decf = die_get_decl_file(rt_die);
+ if (!decf) {
+ pr_debug2("Failed to get the declared file name of %s\n",
+ dwarf_diename(rt_die));
+ return -EINVAL;
+ }
+ } else
+ cu_die = rt_die;
+ if (!cu_die) {
+ pr_debug2("Failed to get CU from given DIE.\n");
+ return -EINVAL;
+ }
+
+ /* Get lines list in the CU */
+ if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+ pr_debug2("Failed to get source lines on this CU.\n");
+ return -ENOENT;
+ }
+ pr_debug2("Get %zd lines from this CU\n", nlines);
+
+ /* Walk on the lines on lines list */
+ for (i = 0; i < nlines; i++) {
+ line = dwarf_onesrcline(lines, i);
+ if (line == NULL ||
+ dwarf_lineno(line, &lineno) != 0 ||
+ dwarf_lineaddr(line, &addr) != 0) {
+ pr_debug2("Failed to get line info. "
+ "Possible error in debuginfo.\n");
+ continue;
+ }
+ /* Skip end-of-sequence */
+ if (dwarf_lineendsequence(line, &flag) != 0 || flag)
+ continue;
+ /* Skip Non statement line-info */
+ if (dwarf_linebeginstatement(line, &flag) != 0 || !flag)
+ continue;
+ /* Filter lines based on address */
+ if (rt_die != cu_die) {
+ /*
+ * Address filtering
+ * The line is included in given function, and
+ * no inline block includes it.
+ */
+ if (!dwarf_haspc(rt_die, addr))
+ continue;
+
+ if (die_find_inlinefunc(rt_die, addr, &die_mem)) {
+ /* Call-site check */
+ inf = die_get_call_file(&die_mem);
+ if ((inf && !strcmp(inf, decf)) &&
+ die_get_call_lineno(&die_mem) == lineno)
+ goto found;
+
+ dwarf_decl_line(&die_mem, &inl);
+ if (inl != decl ||
+ decf != die_get_decl_file(&die_mem))
+ continue;
+ }
+ }
+found:
+ /* Get source line */
+ fname = dwarf_linesrc(line, NULL, NULL);
+
+ ret = callback(fname, lineno, addr, data);
+ if (ret != 0)
+ return ret;
+ }
+
+ /*
+ * Dwarf lines doesn't include function declarations and inlined
+ * subroutines. We have to check functions list or given function.
+ */
+ if (rt_die != cu_die)
+ /*
+ * Don't need walk inlined functions recursively, because
+ * inner inlined functions don't have the lines of the
+ * specified function.
+ */
+ ret = __die_walk_funclines(rt_die, false, callback, data);
+ else {
+ struct __line_walk_param param = {
+ .callback = callback,
+ .data = data,
+ .retval = 0,
+ };
+ dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+ ret = param.retval;
+ }
+
+ return ret;
+}
+
+struct __find_variable_param {
+ const char *name;
+ Dwarf_Addr addr;
+};
+
+static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
+{
+ struct __find_variable_param *fvp = data;
+ Dwarf_Attribute attr;
+ int tag;
+
+ tag = dwarf_tag(die_mem);
+ if ((tag == DW_TAG_formal_parameter ||
+ tag == DW_TAG_variable) &&
+ die_compare_name(die_mem, fvp->name) &&
+ /*
+ * Does the DIE have location information or const value
+ * or external instance?
+ */
+ (dwarf_attr(die_mem, DW_AT_external, &attr) ||
+ dwarf_attr(die_mem, DW_AT_location, &attr) ||
+ dwarf_attr(die_mem, DW_AT_const_value, &attr)))
+ return DIE_FIND_CB_END;
+ if (dwarf_haspc(die_mem, fvp->addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_variable_at - Find a given name variable at given address
+ * @sp_die: a function DIE
+ * @name: variable name
+ * @addr: address
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a variable DIE called @name at @addr in @sp_die.
+ */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+ Dwarf_Addr addr, Dwarf_Die *die_mem)
+{
+ struct __find_variable_param fvp = { .name = name, .addr = addr};
+
+ return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
+ die_mem);
+}
+
+static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
+{
+ const char *name = data;
+
+ if (dwarf_tag(die_mem) == DW_TAG_member) {
+ if (die_compare_name(die_mem, name))
+ return DIE_FIND_CB_END;
+ else if (!dwarf_diename(die_mem)) { /* Unnamed structure */
+ Dwarf_Die type_die, tmp_die;
+ if (die_get_type(die_mem, &type_die) &&
+ die_find_member(&type_die, name, &tmp_die))
+ return DIE_FIND_CB_END;
+ }
+ }
+ return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_member - Find a given name member in a data structure
+ * @st_die: a data structure type DIE
+ * @name: member name
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a member DIE called @name in @st_die.
+ */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+ Dwarf_Die *die_mem)
+{
+ return die_find_child(st_die, __die_find_member_cb, (void *)name,
+ die_mem);
+}
+
+/**
+ * die_get_typename - Get the name of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for result type name
+ *
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
+ * Note that the result will stores typedef name if possible, and stores
+ * "*(function_type)" if the type is a function pointer.
+ */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+ Dwarf_Die type;
+ int tag, ret;
+ const char *tmp = "";
+
+ if (__die_get_real_type(vr_die, &type) == NULL)
+ return -ENOENT;
+
+ tag = dwarf_tag(&type);
+ if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
+ tmp = "*";
+ else if (tag == DW_TAG_subroutine_type) {
+ /* Function pointer */
+ return strbuf_add(buf, "(function_type)", 15);
+ } else {
+ const char *name = dwarf_diename(&type);
+
+ if (tag == DW_TAG_union_type)
+ tmp = "union ";
+ else if (tag == DW_TAG_structure_type)
+ tmp = "struct ";
+ else if (tag == DW_TAG_enumeration_type)
+ tmp = "enum ";
+ else if (name == NULL)
+ return -ENOENT;
+ /* Write a base name */
+ return strbuf_addf(buf, "%s%s", tmp, name ?: "");
+ }
+ ret = die_get_typename(&type, buf);
+ return ret ? ret : strbuf_addstr(buf, tmp);
+}
+
+/**
+ * die_get_varname - Get the name and type of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name
+ *
+ * Get the name and type of @vr_die and stores it in @buf as "type\tname".
+ */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+ int ret;
+
+ ret = die_get_typename(vr_die, buf);
+ if (ret < 0) {
+ pr_debug("Failed to get type, make it unknown.\n");
+ ret = strbuf_add(buf, "(unknown_type)", 14);
+ }
+
+ return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+}
+
+#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
+/**
+ * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for variable byte offset range
+ *
+ * Get the innermost scope range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+ struct strbuf *buf)
+{
+ Dwarf_Die *scopes;
+ int count;
+ size_t offset = 0;
+ Dwarf_Addr base;
+ Dwarf_Addr start, end;
+ Dwarf_Addr entry;
+ int ret;
+ bool first = true;
+ const char *name;
+
+ ret = die_entrypc(sp_die, &entry);
+ if (ret)
+ return ret;
+
+ name = dwarf_diename(sp_die);
+ if (!name)
+ return -ENOENT;
+
+ count = dwarf_getscopes_die(vr_die, &scopes);
+
+ /* (*SCOPES)[1] is the DIE for the scope containing that scope */
+ if (count <= 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ while ((offset = dwarf_ranges(&scopes[1], offset, &base,
+ &start, &end)) > 0) {
+ start -= entry;
+ end -= entry;
+
+ if (first) {
+ ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
+ first = false;
+ } else {
+ ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
+ }
+ if (ret < 0)
+ goto out;
+ }
+
+ if (!first)
+ ret = strbuf_add(buf, "]>", 2);
+
+out:
+ free(scopes);
+ return ret;
+}
+
+/**
+ * die_get_var_range - Get byte offset range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name and byte offset range
+ *
+ * Get the byte offset range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
+{
+ int ret = 0;
+ Dwarf_Addr base;
+ Dwarf_Addr start, end;
+ Dwarf_Addr entry;
+ Dwarf_Op *op;
+ size_t nops;
+ size_t offset = 0;
+ Dwarf_Attribute attr;
+ bool first = true;
+ const char *name;
+
+ ret = die_entrypc(sp_die, &entry);
+ if (ret)
+ return ret;
+
+ name = dwarf_diename(sp_die);
+ if (!name)
+ return -ENOENT;
+
+ if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+ return -EINVAL;
+
+ while ((offset = dwarf_getlocations(&attr, offset, &base,
+ &start, &end, &op, &nops)) > 0) {
+ if (start == 0) {
+ /* Single Location Descriptions */
+ ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
+ goto out;
+ }
+
+ /* Location Lists */
+ start -= entry;
+ end -= entry;
+ if (first) {
+ ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
+ first = false;
+ } else {
+ ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
+ }
+ if (ret < 0)
+ goto out;
+ }
+
+ if (!first)
+ ret = strbuf_add(buf, "]>", 2);
+out:
+ return ret;
+}
+#else
+int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
+ Dwarf_Die *vr_die __maybe_unused,
+ struct strbuf *buf __maybe_unused)
+{
+ return -ENOTSUP;
+}
+#endif
+
+/*
+ * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
+ * @vr_die: a variable DIE
+ */
+static bool die_has_loclist(Dwarf_Die *vr_die)
+{
+ Dwarf_Attribute loc;
+ int tag = dwarf_tag(vr_die);
+
+ if (tag != DW_TAG_formal_parameter &&
+ tag != DW_TAG_variable)
+ return false;
+
+ return (dwarf_attr_integrate(vr_die, DW_AT_location, &loc) &&
+ dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * die_is_optimized_target - Check if target program is compiled with
+ * optimization
+ * @cu_die: a CU DIE
+ *
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization. This is applicable to
+ * clang as well.
+ */
+bool die_is_optimized_target(Dwarf_Die *cu_die)
+{
+ Dwarf_Die tmp_die;
+
+ if (die_has_loclist(cu_die))
+ return true;
+
+ if (!dwarf_child(cu_die, &tmp_die) &&
+ die_is_optimized_target(&tmp_die))
+ return true;
+
+ if (!dwarf_siblingof(cu_die, &tmp_die) &&
+ die_is_optimized_target(&tmp_die))
+ return true;
+
+ return false;
+}
+
+/*
+ * die_search_idx - Search index of given line address
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @addr: address we are looking for
+ * @idx: index to be set by this function (return value)
+ *
+ * Search for @addr by looping over every lines of CU. If address
+ * matches, set index of that line in @idx. Note that single source
+ * line can have multiple line records. i.e. single source line can
+ * have multiple index.
+ */
+static bool die_search_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+ Dwarf_Addr addr, unsigned long *idx)
+{
+ unsigned long i;
+ Dwarf_Addr tmp;
+
+ for (i = 0; i < nr_lines; i++) {
+ if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &tmp))
+ return false;
+
+ if (tmp == addr) {
+ *idx = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * die_get_postprologue_addr - Search next address after function prologue
+ * @entrypc_idx: entrypc index
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @hignpc: high PC address of function
+ * @postprologue_addr: Next address after function prologue (return value)
+ *
+ * Look for prologue-end marker. If there is no explicit marker, return
+ * address of next line record or next source line.
+ */
+static bool die_get_postprologue_addr(unsigned long entrypc_idx,
+ Dwarf_Lines *lines,
+ unsigned long nr_lines,
+ Dwarf_Addr highpc,
+ Dwarf_Addr *postprologue_addr)
+{
+ unsigned long i;
+ int entrypc_lno, lno;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ bool p_end;
+
+ /* entrypc_lno is actual source line number */
+ line = dwarf_onesrcline(lines, entrypc_idx);
+ if (dwarf_lineno(line, &entrypc_lno))
+ return false;
+
+ for (i = entrypc_idx; i < nr_lines; i++) {
+ line = dwarf_onesrcline(lines, i);
+
+ if (dwarf_lineaddr(line, &addr) ||
+ dwarf_lineno(line, &lno) ||
+ dwarf_lineprologueend(line, &p_end))
+ return false;
+
+ /* highpc is exclusive. [entrypc,highpc) */
+ if (addr >= highpc)
+ break;
+
+ /* clang supports prologue-end marker */
+ if (p_end)
+ break;
+
+ /* Actual next line in source */
+ if (lno != entrypc_lno)
+ break;
+
+ /*
+ * Single source line can have multiple line records.
+ * For Example,
+ * void foo() { printf("hello\n"); }
+ * contains two line records. One points to declaration and
+ * other points to printf() line. Variable 'lno' won't get
+ * incremented in this case but 'i' will.
+ */
+ if (i != entrypc_idx)
+ break;
+ }
+
+ dwarf_lineaddr(line, postprologue_addr);
+ if (*postprologue_addr >= highpc)
+ dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+ postprologue_addr);
+
+ return true;
+}
+
+/*
+ * die_skip_prologue - Use next address after prologue as probe location
+ * @sp_die: a subprogram DIE
+ * @cu_die: a CU DIE
+ * @entrypc: entrypc of the function
+ *
+ * Function prologue prepares stack and registers before executing function
+ * logic. When target program is compiled without optimization, function
+ * parameter information is only valid after prologue. When we probe entrypc
+ * of the function, and try to record function parameter, it contains
+ * garbage value.
+ */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+ Dwarf_Addr *entrypc)
+{
+ size_t nr_lines = 0;
+ unsigned long entrypc_idx = 0;
+ Dwarf_Lines *lines = NULL;
+ Dwarf_Addr postprologue_addr;
+ Dwarf_Addr highpc;
+
+ if (dwarf_highpc(sp_die, &highpc))
+ return;
+
+ if (dwarf_getsrclines(cu_die, &lines, &nr_lines))
+ return;
+
+ if (!die_search_idx(lines, nr_lines, *entrypc, &entrypc_idx))
+ return;
+
+ if (!die_get_postprologue_addr(entrypc_idx, lines, nr_lines,
+ highpc, &postprologue_addr))
+ return;
+
+ *entrypc = postprologue_addr;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
new file mode 100644
index 000000000..7ec8bc108
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _DWARF_AUX_H
+#define _DWARF_AUX_H
+/*
+ * dwarf-aux.h : libdw auxiliary interfaces
+ */
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
+
+struct strbuf;
+
+/* Find the realpath of the target file */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+
+/* Get DW_AT_comp_dir (should be NULL with older gcc) */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+
+/* Get a line number and file name for given address */
+int cu_find_lineinfo(Dwarf_Die *cudie, Dwarf_Addr addr,
+ const char **fname, int *lineno);
+
+/* Walk on functions at given address */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
+/* Get the lowest PC in DIE (including range list) */
+int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr);
+
+/* Ensure that this DIE is a subprogram and definition (not declaration) */
+bool die_is_func_def(Dwarf_Die *dw_die);
+
+/* Ensure that this DIE is an instance of a subprogram */
+bool die_is_func_instance(Dwarf_Die *dw_die);
+
+/* Compare diename and tname */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+
+/* Matching diename with glob pattern */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+
+/* Get callsite line number of inline-function instance */
+int die_get_call_lineno(Dwarf_Die *in_die);
+
+/* Get callsite file name of inlined function instance */
+const char *die_get_call_file(Dwarf_Die *in_die);
+
+/* Get declared file name of a DIE */
+const char *die_get_decl_file(Dwarf_Die *dw_die);
+
+/* Get type die */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Get a type die, but skip qualifiers and typedef */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Check whether the DIE is signed or not */
+bool die_is_signed_type(Dwarf_Die *tp_die);
+
+/* Get data_member_location offset */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+
+/* Return values for die_find_child() callbacks */
+enum {
+ DIE_FIND_CB_END = 0, /* End of Search */
+ DIE_FIND_CB_CHILD = 1, /* Search only children */
+ DIE_FIND_CB_SIBLING = 2, /* Search only siblings */
+ DIE_FIND_CB_CONTINUE = 3, /* Search children and siblings */
+};
+
+/* Search child DIEs */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+ int (*callback)(Dwarf_Die *, void *),
+ void *data, Dwarf_Die *die_mem);
+
+/* Search a non-inlined function including given address */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Search a non-inlined function with tail call at given address */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Search the top inlined function including given address */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Search the deepest inlined function including given address */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Walk on the instances of given DIE */
+int die_walk_instances(Dwarf_Die *in_die,
+ int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_callback_t) (const char *fname, int lineno,
+ Dwarf_Addr addr, void *data);
+
+/*
+ * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
+ * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data);
+
+/* Find a variable called 'name' at given address */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+ Dwarf_Addr addr, Dwarf_Die *die_mem);
+
+/* Find a member called 'name' */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+ Dwarf_Die *die_mem);
+
+/* Get the name of given variable DIE */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Get the name and type of given variable DIE, stored as "type\tname" */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Check if target program is compiled with optimization */
+bool die_is_optimized_target(Dwarf_Die *cu_die);
+
+/* Use next address after prologue as probe location */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+ Dwarf_Addr *entrypc);
+
+#endif
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
new file mode 100644
index 000000000..69cfaa595
--- /dev/null
+++ b/tools/perf/util/dwarf-regs.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Written by: Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <debug.h>
+#include <dwarf-regs.h>
+#include <elf.h>
+#include <linux/kernel.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64 183 /* ARM 64 bit */
+#endif
+
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH 258 /* LoongArch */
+#endif
+
+/* Define const char * {arch}_register_tbl[] */
+#define DEFINE_DWARF_REGSTR_TABLE
+#include "../arch/x86/include/dwarf-regs-table.h"
+#include "../arch/arm/include/dwarf-regs-table.h"
+#include "../arch/arm64/include/dwarf-regs-table.h"
+#include "../arch/sh/include/dwarf-regs-table.h"
+#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/s390/include/dwarf-regs-table.h"
+#include "../arch/sparc/include/dwarf-regs-table.h"
+#include "../arch/xtensa/include/dwarf-regs-table.h"
+#include "../arch/mips/include/dwarf-regs-table.h"
+#include "../arch/loongarch/include/dwarf-regs-table.h"
+
+#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+{
+ switch (machine) {
+ case EM_NONE: /* Generic arch - use host arch */
+ return get_arch_regstr(n);
+ case EM_386:
+ return __get_dwarf_regstr(x86_32_regstr_tbl, n);
+ case EM_X86_64:
+ return __get_dwarf_regstr(x86_64_regstr_tbl, n);
+ case EM_ARM:
+ return __get_dwarf_regstr(arm_regstr_tbl, n);
+ case EM_AARCH64:
+ return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+ case EM_SH:
+ return __get_dwarf_regstr(sh_regstr_tbl, n);
+ case EM_S390:
+ return __get_dwarf_regstr(s390_regstr_tbl, n);
+ case EM_PPC:
+ case EM_PPC64:
+ return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+ case EM_SPARC:
+ case EM_SPARCV9:
+ return __get_dwarf_regstr(sparc_regstr_tbl, n);
+ case EM_XTENSA:
+ return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+ case EM_MIPS:
+ return __get_dwarf_regstr(mips_regstr_tbl, n);
+ case EM_LOONGARCH:
+ return __get_dwarf_regstr(loongarch_regstr_tbl, n);
+ default:
+ pr_err("ELF MACHINE %x is not supported.\n", machine);
+ }
+ return NULL;
+}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
new file mode 100644
index 000000000..d2c7b6e6e
--- /dev/null
+++ b/tools/perf/util/env.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cpumap.h"
+#include "debug.h"
+#include "env.h"
+#include "util/header.h"
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+#include "cgroup.h"
+#include <errno.h>
+#include <sys/utsname.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pmus.h"
+#include "strbuf.h"
+
+struct perf_env perf_env;
+
+#ifdef HAVE_LIBBPF_SUPPORT
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include <bpf/libbpf.h>
+
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+ struct bpf_prog_info_node *info_node)
+{
+ down_write(&env->bpf_progs.lock);
+ __perf_env__insert_bpf_prog_info(env, info_node);
+ up_write(&env->bpf_progs.lock);
+}
+
+void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node)
+{
+ __u32 prog_id = info_node->info_linear->info.id;
+ struct bpf_prog_info_node *node;
+ struct rb_node *parent = NULL;
+ struct rb_node **p;
+
+ p = &env->bpf_progs.infos.rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ node = rb_entry(parent, struct bpf_prog_info_node, rb_node);
+ if (prog_id < node->info_linear->info.id) {
+ p = &(*p)->rb_left;
+ } else if (prog_id > node->info_linear->info.id) {
+ p = &(*p)->rb_right;
+ } else {
+ pr_debug("duplicated bpf prog info %u\n", prog_id);
+ return;
+ }
+ }
+
+ rb_link_node(&info_node->rb_node, parent, p);
+ rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos);
+ env->bpf_progs.infos_cnt++;
+}
+
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+ __u32 prog_id)
+{
+ struct bpf_prog_info_node *node = NULL;
+ struct rb_node *n;
+
+ down_read(&env->bpf_progs.lock);
+ n = env->bpf_progs.infos.rb_node;
+
+ while (n) {
+ node = rb_entry(n, struct bpf_prog_info_node, rb_node);
+ if (prog_id < node->info_linear->info.id)
+ n = n->rb_left;
+ else if (prog_id > node->info_linear->info.id)
+ n = n->rb_right;
+ else
+ goto out;
+ }
+ node = NULL;
+
+out:
+ up_read(&env->bpf_progs.lock);
+ return node;
+}
+
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+{
+ bool ret;
+
+ down_write(&env->bpf_progs.lock);
+ ret = __perf_env__insert_btf(env, btf_node);
+ up_write(&env->bpf_progs.lock);
+ return ret;
+}
+
+bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+{
+ struct rb_node *parent = NULL;
+ __u32 btf_id = btf_node->id;
+ struct btf_node *node;
+ struct rb_node **p;
+
+ p = &env->bpf_progs.btfs.rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ node = rb_entry(parent, struct btf_node, rb_node);
+ if (btf_id < node->id) {
+ p = &(*p)->rb_left;
+ } else if (btf_id > node->id) {
+ p = &(*p)->rb_right;
+ } else {
+ pr_debug("duplicated btf %u\n", btf_id);
+ return false;
+ }
+ }
+
+ rb_link_node(&btf_node->rb_node, parent, p);
+ rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs);
+ env->bpf_progs.btfs_cnt++;
+ return true;
+}
+
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+{
+ struct btf_node *res;
+
+ down_read(&env->bpf_progs.lock);
+ res = __perf_env__find_btf(env, btf_id);
+ up_read(&env->bpf_progs.lock);
+ return res;
+}
+
+struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+{
+ struct btf_node *node = NULL;
+ struct rb_node *n;
+
+ n = env->bpf_progs.btfs.rb_node;
+
+ while (n) {
+ node = rb_entry(n, struct btf_node, rb_node);
+ if (btf_id < node->id)
+ n = n->rb_left;
+ else if (btf_id > node->id)
+ n = n->rb_right;
+ else
+ return node;
+ }
+ return NULL;
+}
+
+/* purge data in bpf_progs.infos tree */
+static void perf_env__purge_bpf(struct perf_env *env)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+
+ down_write(&env->bpf_progs.lock);
+
+ root = &env->bpf_progs.infos;
+ next = rb_first(root);
+
+ while (next) {
+ struct bpf_prog_info_node *node;
+
+ node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+ next = rb_next(&node->rb_node);
+ rb_erase(&node->rb_node, root);
+ zfree(&node->info_linear);
+ free(node);
+ }
+
+ env->bpf_progs.infos_cnt = 0;
+
+ root = &env->bpf_progs.btfs;
+ next = rb_first(root);
+
+ while (next) {
+ struct btf_node *node;
+
+ node = rb_entry(next, struct btf_node, rb_node);
+ next = rb_next(&node->rb_node);
+ rb_erase(&node->rb_node, root);
+ free(node);
+ }
+
+ env->bpf_progs.btfs_cnt = 0;
+
+ up_write(&env->bpf_progs.lock);
+}
+#else // HAVE_LIBBPF_SUPPORT
+static void perf_env__purge_bpf(struct perf_env *env __maybe_unused)
+{
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+void perf_env__exit(struct perf_env *env)
+{
+ int i, j;
+
+ perf_env__purge_bpf(env);
+ perf_env__purge_cgroups(env);
+ zfree(&env->hostname);
+ zfree(&env->os_release);
+ zfree(&env->version);
+ zfree(&env->arch);
+ zfree(&env->cpu_desc);
+ zfree(&env->cpuid);
+ zfree(&env->cmdline);
+ zfree(&env->cmdline_argv);
+ zfree(&env->sibling_dies);
+ zfree(&env->sibling_cores);
+ zfree(&env->sibling_threads);
+ zfree(&env->pmu_mappings);
+ zfree(&env->cpu);
+ for (i = 0; i < env->nr_cpu_pmu_caps; i++)
+ zfree(&env->cpu_pmu_caps[i]);
+ zfree(&env->cpu_pmu_caps);
+ zfree(&env->numa_map);
+
+ for (i = 0; i < env->nr_numa_nodes; i++)
+ perf_cpu_map__put(env->numa_nodes[i].map);
+ zfree(&env->numa_nodes);
+
+ for (i = 0; i < env->caches_cnt; i++)
+ cpu_cache_level__free(&env->caches[i]);
+ zfree(&env->caches);
+
+ for (i = 0; i < env->nr_memory_nodes; i++)
+ zfree(&env->memory_nodes[i].set);
+ zfree(&env->memory_nodes);
+
+ for (i = 0; i < env->nr_hybrid_nodes; i++) {
+ zfree(&env->hybrid_nodes[i].pmu_name);
+ zfree(&env->hybrid_nodes[i].cpus);
+ }
+ zfree(&env->hybrid_nodes);
+
+ for (i = 0; i < env->nr_pmus_with_caps; i++) {
+ for (j = 0; j < env->pmu_caps[i].nr_caps; j++)
+ zfree(&env->pmu_caps[i].caps[j]);
+ zfree(&env->pmu_caps[i].caps);
+ zfree(&env->pmu_caps[i].pmu_name);
+ }
+ zfree(&env->pmu_caps);
+}
+
+void perf_env__init(struct perf_env *env)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+ env->bpf_progs.infos = RB_ROOT;
+ env->bpf_progs.btfs = RB_ROOT;
+ init_rwsem(&env->bpf_progs.lock);
+#endif
+ env->kernel_is_64_bit = -1;
+}
+
+static void perf_env__init_kernel_mode(struct perf_env *env)
+{
+ const char *arch = perf_env__raw_arch(env);
+
+ if (!strncmp(arch, "x86_64", 6) || !strncmp(arch, "aarch64", 7) ||
+ !strncmp(arch, "arm64", 5) || !strncmp(arch, "mips64", 6) ||
+ !strncmp(arch, "parisc64", 8) || !strncmp(arch, "riscv64", 7) ||
+ !strncmp(arch, "s390x", 5) || !strncmp(arch, "sparc64", 7))
+ env->kernel_is_64_bit = 1;
+ else
+ env->kernel_is_64_bit = 0;
+}
+
+int perf_env__kernel_is_64_bit(struct perf_env *env)
+{
+ if (env->kernel_is_64_bit == -1)
+ perf_env__init_kernel_mode(env);
+
+ return env->kernel_is_64_bit;
+}
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
+{
+ int i;
+
+ /* do not include NULL termination */
+ env->cmdline_argv = calloc(argc, sizeof(char *));
+ if (env->cmdline_argv == NULL)
+ goto out_enomem;
+
+ /*
+ * Must copy argv contents because it gets moved around during option
+ * parsing:
+ */
+ for (i = 0; i < argc ; i++) {
+ env->cmdline_argv[i] = argv[i];
+ if (env->cmdline_argv[i] == NULL)
+ goto out_free;
+ }
+
+ env->nr_cmdline = argc;
+
+ return 0;
+out_free:
+ zfree(&env->cmdline_argv);
+out_enomem:
+ return -ENOMEM;
+}
+
+int perf_env__read_cpu_topology_map(struct perf_env *env)
+{
+ int idx, nr_cpus;
+
+ if (env->cpu != NULL)
+ return 0;
+
+ if (env->nr_cpus_avail == 0)
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
+
+ nr_cpus = env->nr_cpus_avail;
+ if (nr_cpus == -1)
+ return -EINVAL;
+
+ env->cpu = calloc(nr_cpus, sizeof(env->cpu[0]));
+ if (env->cpu == NULL)
+ return -ENOMEM;
+
+ for (idx = 0; idx < nr_cpus; ++idx) {
+ struct perf_cpu cpu = { .cpu = idx };
+
+ env->cpu[idx].core_id = cpu__get_core_id(cpu);
+ env->cpu[idx].socket_id = cpu__get_socket_id(cpu);
+ env->cpu[idx].die_id = cpu__get_die_id(cpu);
+ }
+
+ env->nr_cpus_avail = nr_cpus;
+ return 0;
+}
+
+int perf_env__read_pmu_mappings(struct perf_env *env)
+{
+ struct perf_pmu *pmu = NULL;
+ u32 pmu_num = 0;
+ struct strbuf sb;
+
+ while ((pmu = perf_pmus__scan(pmu)))
+ pmu_num++;
+
+ if (!pmu_num) {
+ pr_debug("pmu mappings not available\n");
+ return -ENOENT;
+ }
+ env->nr_pmu_mappings = pmu_num;
+
+ if (strbuf_init(&sb, 128 * pmu_num) < 0)
+ return -ENOMEM;
+
+ while ((pmu = perf_pmus__scan(pmu))) {
+ if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0)
+ goto error;
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, "", 1) < 0)
+ goto error;
+ }
+
+ env->pmu_mappings = strbuf_detach(&sb, NULL);
+
+ return 0;
+
+error:
+ strbuf_release(&sb);
+ return -1;
+}
+
+int perf_env__read_cpuid(struct perf_env *env)
+{
+ char cpuid[128];
+ int err = get_cpuid(cpuid, sizeof(cpuid));
+
+ if (err)
+ return err;
+
+ free(env->cpuid);
+ env->cpuid = strdup(cpuid);
+ if (env->cpuid == NULL)
+ return ENOMEM;
+ return 0;
+}
+
+static int perf_env__read_arch(struct perf_env *env)
+{
+ struct utsname uts;
+
+ if (env->arch)
+ return 0;
+
+ if (!uname(&uts))
+ env->arch = strdup(uts.machine);
+
+ return env->arch ? 0 : -ENOMEM;
+}
+
+static int perf_env__read_nr_cpus_avail(struct perf_env *env)
+{
+ if (env->nr_cpus_avail == 0)
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
+
+ return env->nr_cpus_avail ? 0 : -ENOENT;
+}
+
+const char *perf_env__raw_arch(struct perf_env *env)
+{
+ return env && !perf_env__read_arch(env) ? env->arch : "unknown";
+}
+
+int perf_env__nr_cpus_avail(struct perf_env *env)
+{
+ return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
+}
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+ zfree(&cache->type);
+ zfree(&cache->map);
+ zfree(&cache->size);
+}
+
+/*
+ * Return architecture name in a normalized form.
+ * The conversion logic comes from the Makefile.
+ */
+static const char *normalize_arch(char *arch)
+{
+ if (!strcmp(arch, "x86_64"))
+ return "x86";
+ if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
+ return "x86";
+ if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
+ return "sparc";
+ if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5))
+ return "arm64";
+ if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
+ return "arm";
+ if (!strncmp(arch, "s390", 4))
+ return "s390";
+ if (!strncmp(arch, "parisc", 6))
+ return "parisc";
+ if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
+ return "powerpc";
+ if (!strncmp(arch, "mips", 4))
+ return "mips";
+ if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
+ return "sh";
+ if (!strncmp(arch, "loongarch", 9))
+ return "loongarch";
+
+ return arch;
+}
+
+const char *perf_env__arch(struct perf_env *env)
+{
+ char *arch_name;
+
+ if (!env || !env->arch) { /* Assume local operation */
+ static struct utsname uts = { .machine[0] = '\0', };
+ if (uts.machine[0] == '\0' && uname(&uts) < 0)
+ return NULL;
+ arch_name = uts.machine;
+ } else
+ arch_name = env->arch;
+
+ return normalize_arch(arch_name);
+}
+
+const char *perf_env__cpuid(struct perf_env *env)
+{
+ int status;
+
+ if (!env || !env->cpuid) { /* Assume local operation */
+ status = perf_env__read_cpuid(env);
+ if (status)
+ return NULL;
+ }
+
+ return env->cpuid;
+}
+
+int perf_env__nr_pmu_mappings(struct perf_env *env)
+{
+ int status;
+
+ if (!env || !env->nr_pmu_mappings) { /* Assume local operation */
+ status = perf_env__read_pmu_mappings(env);
+ if (status)
+ return 0;
+ }
+
+ return env->nr_pmu_mappings;
+}
+
+const char *perf_env__pmu_mappings(struct perf_env *env)
+{
+ int status;
+
+ if (!env || !env->pmu_mappings) { /* Assume local operation */
+ status = perf_env__read_pmu_mappings(env);
+ if (status)
+ return NULL;
+ }
+
+ return env->pmu_mappings;
+}
+
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
+{
+ if (!env->nr_numa_map) {
+ struct numa_node *nn;
+ int i, nr = 0;
+
+ for (i = 0; i < env->nr_numa_nodes; i++) {
+ nn = &env->numa_nodes[i];
+ nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+ }
+
+ nr++;
+
+ /*
+ * We initialize the numa_map array to prepare
+ * it for missing cpus, which return node -1
+ */
+ env->numa_map = malloc(nr * sizeof(int));
+ if (!env->numa_map)
+ return -1;
+
+ for (i = 0; i < nr; i++)
+ env->numa_map[i] = -1;
+
+ env->nr_numa_map = nr;
+
+ for (i = 0; i < env->nr_numa_nodes; i++) {
+ struct perf_cpu tmp;
+ int j;
+
+ nn = &env->numa_nodes[i];
+ perf_cpu_map__for_each_cpu(tmp, j, nn->map)
+ env->numa_map[tmp.cpu] = i;
+ }
+ }
+
+ return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
+}
+
+char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
+ const char *cap)
+{
+ char *cap_eq;
+ int cap_size;
+ char **ptr;
+ int i, j;
+
+ if (!pmu_name || !cap)
+ return NULL;
+
+ cap_size = strlen(cap);
+ cap_eq = zalloc(cap_size + 2);
+ if (!cap_eq)
+ return NULL;
+
+ memcpy(cap_eq, cap, cap_size);
+ cap_eq[cap_size] = '=';
+
+ if (!strcmp(pmu_name, "cpu")) {
+ for (i = 0; i < env->nr_cpu_pmu_caps; i++) {
+ if (!strncmp(env->cpu_pmu_caps[i], cap_eq, cap_size + 1)) {
+ free(cap_eq);
+ return &env->cpu_pmu_caps[i][cap_size + 1];
+ }
+ }
+ goto out;
+ }
+
+ for (i = 0; i < env->nr_pmus_with_caps; i++) {
+ if (strcmp(env->pmu_caps[i].pmu_name, pmu_name))
+ continue;
+
+ ptr = env->pmu_caps[i].caps;
+
+ for (j = 0; j < env->pmu_caps[i].nr_caps; j++) {
+ if (!strncmp(ptr[j], cap_eq, cap_size + 1)) {
+ free(cap_eq);
+ return &ptr[j][cap_size + 1];
+ }
+ }
+ }
+
+out:
+ free(cap_eq);
+ return NULL;
+}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
new file mode 100644
index 000000000..359eff51c
--- /dev/null
+++ b/tools/perf/util/env.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ENV_H
+#define __PERF_ENV_H
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include "cpumap.h"
+#include "rwsem.h"
+
+struct perf_cpu_map;
+
+struct cpu_topology_map {
+ int socket_id;
+ int die_id;
+ int core_id;
+};
+
+struct cpu_cache_level {
+ u32 level;
+ u32 line_size;
+ u32 sets;
+ u32 ways;
+ char *type;
+ char *size;
+ char *map;
+};
+
+struct numa_node {
+ u32 node;
+ u64 mem_total;
+ u64 mem_free;
+ struct perf_cpu_map *map;
+};
+
+struct memory_node {
+ u64 node;
+ u64 size;
+ unsigned long *set;
+};
+
+struct hybrid_node {
+ char *pmu_name;
+ char *cpus;
+};
+
+struct pmu_caps {
+ int nr_caps;
+ unsigned int max_branches;
+ char **caps;
+ char *pmu_name;
+};
+
+struct perf_env {
+ char *hostname;
+ char *os_release;
+ char *version;
+ char *arch;
+ int nr_cpus_online;
+ int nr_cpus_avail;
+ char *cpu_desc;
+ char *cpuid;
+ unsigned long long total_mem;
+ unsigned int msr_pmu_type;
+ unsigned int max_branches;
+ int kernel_is_64_bit;
+
+ int nr_cmdline;
+ int nr_sibling_cores;
+ int nr_sibling_dies;
+ int nr_sibling_threads;
+ int nr_numa_nodes;
+ int nr_memory_nodes;
+ int nr_pmu_mappings;
+ int nr_groups;
+ int nr_cpu_pmu_caps;
+ int nr_hybrid_nodes;
+ int nr_pmus_with_caps;
+ char *cmdline;
+ const char **cmdline_argv;
+ char *sibling_cores;
+ char *sibling_dies;
+ char *sibling_threads;
+ char *pmu_mappings;
+ char **cpu_pmu_caps;
+ struct cpu_topology_map *cpu;
+ struct cpu_cache_level *caches;
+ int caches_cnt;
+ u32 comp_ratio;
+ u32 comp_ver;
+ u32 comp_type;
+ u32 comp_level;
+ u32 comp_mmap_len;
+ struct numa_node *numa_nodes;
+ struct memory_node *memory_nodes;
+ unsigned long long memory_bsize;
+ struct hybrid_node *hybrid_nodes;
+ struct pmu_caps *pmu_caps;
+#ifdef HAVE_LIBBPF_SUPPORT
+ /*
+ * bpf_info_lock protects bpf rbtrees. This is needed because the
+ * trees are accessed by different threads in perf-top
+ */
+ struct {
+ struct rw_semaphore lock;
+ struct rb_root infos;
+ u32 infos_cnt;
+ struct rb_root btfs;
+ u32 btfs_cnt;
+ } bpf_progs;
+#endif // HAVE_LIBBPF_SUPPORT
+ /* same reason as above (for perf-top) */
+ struct {
+ struct rw_semaphore lock;
+ struct rb_root tree;
+ } cgroups;
+
+ /* For fast cpu to numa node lookup via perf_env__numa_node */
+ int *numa_map;
+ int nr_numa_map;
+
+ /* For real clock time reference. */
+ struct {
+ u64 tod_ns;
+ u64 clockid_ns;
+ u64 clockid_res_ns;
+ int clockid;
+ /*
+ * enabled is valid for report mode, and is true if above
+ * values are set, it's set in process_clock_data
+ */
+ bool enabled;
+ } clock;
+};
+
+enum perf_compress_type {
+ PERF_COMP_NONE = 0,
+ PERF_COMP_ZSTD,
+ PERF_COMP_MAX
+};
+
+struct bpf_prog_info_node;
+struct btf_node;
+
+extern struct perf_env perf_env;
+
+void perf_env__exit(struct perf_env *env);
+
+int perf_env__kernel_is_64_bit(struct perf_env *env);
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
+
+int perf_env__read_cpuid(struct perf_env *env);
+int perf_env__read_pmu_mappings(struct perf_env *env);
+int perf_env__nr_pmu_mappings(struct perf_env *env);
+const char *perf_env__pmu_mappings(struct perf_env *env);
+
+int perf_env__read_cpu_topology_map(struct perf_env *env);
+
+void cpu_cache_level__free(struct cpu_cache_level *cache);
+
+const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__cpuid(struct perf_env *env);
+const char *perf_env__raw_arch(struct perf_env *env);
+int perf_env__nr_cpus_avail(struct perf_env *env);
+
+void perf_env__init(struct perf_env *env);
+void __perf_env__insert_bpf_prog_info(struct perf_env *env,
+ struct bpf_prog_info_node *info_node);
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+ struct bpf_prog_info_node *info_node);
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+ __u32 prog_id);
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
+struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id);
+
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
+char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
+ const char *cap);
+#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
new file mode 100644
index 000000000..923c0fb15
--- /dev/null
+++ b/tools/perf/util/event.c
@@ -0,0 +1,829 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <perf/cpumap.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <linux/perf_event.h>
+#include <linux/zalloc.h>
+#include "cpumap.h"
+#include "dso.h"
+#include "event.h"
+#include "debug.h"
+#include "hist.h"
+#include "machine.h"
+#include "sort.h"
+#include "string2.h"
+#include "strlist.h"
+#include "thread.h"
+#include "thread_map.h"
+#include "time-utils.h"
+#include <linux/ctype.h>
+#include "map.h"
+#include "util/namespaces.h"
+#include "symbol.h"
+#include "symbol/kallsyms.h"
+#include "asm/bug.h"
+#include "stat.h"
+#include "session.h"
+#include "bpf-event.h"
+#include "print_binary.h"
+#include "tool.h"
+#include "util.h"
+
+static const char *perf_event__names[] = {
+ [0] = "TOTAL",
+ [PERF_RECORD_MMAP] = "MMAP",
+ [PERF_RECORD_MMAP2] = "MMAP2",
+ [PERF_RECORD_LOST] = "LOST",
+ [PERF_RECORD_COMM] = "COMM",
+ [PERF_RECORD_EXIT] = "EXIT",
+ [PERF_RECORD_THROTTLE] = "THROTTLE",
+ [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+ [PERF_RECORD_FORK] = "FORK",
+ [PERF_RECORD_READ] = "READ",
+ [PERF_RECORD_SAMPLE] = "SAMPLE",
+ [PERF_RECORD_AUX] = "AUX",
+ [PERF_RECORD_ITRACE_START] = "ITRACE_START",
+ [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
+ [PERF_RECORD_SWITCH] = "SWITCH",
+ [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
+ [PERF_RECORD_NAMESPACES] = "NAMESPACES",
+ [PERF_RECORD_KSYMBOL] = "KSYMBOL",
+ [PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
+ [PERF_RECORD_CGROUP] = "CGROUP",
+ [PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
+ [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID",
+ [PERF_RECORD_HEADER_ATTR] = "ATTR",
+ [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
+ [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
+ [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
+ [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
+ [PERF_RECORD_ID_INDEX] = "ID_INDEX",
+ [PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
+ [PERF_RECORD_AUXTRACE] = "AUXTRACE",
+ [PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
+ [PERF_RECORD_THREAD_MAP] = "THREAD_MAP",
+ [PERF_RECORD_CPU_MAP] = "CPU_MAP",
+ [PERF_RECORD_STAT_CONFIG] = "STAT_CONFIG",
+ [PERF_RECORD_STAT] = "STAT",
+ [PERF_RECORD_STAT_ROUND] = "STAT_ROUND",
+ [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
+ [PERF_RECORD_TIME_CONV] = "TIME_CONV",
+ [PERF_RECORD_HEADER_FEATURE] = "FEATURE",
+ [PERF_RECORD_COMPRESSED] = "COMPRESSED",
+ [PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT",
+};
+
+const char *perf_event__name(unsigned int id)
+{
+ if (id >= ARRAY_SIZE(perf_event__names))
+ return "INVALID";
+ if (!perf_event__names[id])
+ return "UNKNOWN";
+ return perf_event__names[id];
+}
+
+struct process_symbol_args {
+ const char *name;
+ u64 start;
+};
+
+static int find_func_symbol_cb(void *arg, const char *name, char type,
+ u64 start)
+{
+ struct process_symbol_args *args = arg;
+
+ /*
+ * Must be a function or at least an alias, as in PARISC64, where "_text" is
+ * an 'A' to the same address as "_stext".
+ */
+ if (!(kallsyms__is_function(type) ||
+ type == 'A') || strcmp(name, args->name))
+ return 0;
+
+ args->start = start;
+ return 1;
+}
+
+static int find_any_symbol_cb(void *arg, const char *name,
+ char type __maybe_unused, u64 start)
+{
+ struct process_symbol_args *args = arg;
+
+ if (strcmp(name, args->name))
+ return 0;
+
+ args->start = start;
+ return 1;
+}
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+ const char *symbol_name, u64 *addr)
+{
+ struct process_symbol_args args = { .name = symbol_name, };
+
+ if (kallsyms__parse(kallsyms_filename, &args, find_func_symbol_cb) <= 0)
+ return -1;
+
+ *addr = args.start;
+ return 0;
+}
+
+int kallsyms__get_symbol_start(const char *kallsyms_filename,
+ const char *symbol_name, u64 *addr)
+{
+ struct process_symbol_args args = { .name = symbol_name, };
+
+ if (kallsyms__parse(kallsyms_filename, &args, find_any_symbol_cb) <= 0)
+ return -1;
+
+ *addr = args.start;
+ return 0;
+}
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct perf_record_stat_config *event)
+{
+ unsigned i;
+
+ for (i = 0; i < event->nr; i++) {
+
+ switch (event->data[i].tag) {
+#define CASE(__term, __val) \
+ case PERF_STAT_CONFIG_TERM__##__term: \
+ config->__val = event->data[i].val; \
+ break;
+
+ CASE(AGGR_MODE, aggr_mode)
+ CASE(SCALE, scale)
+ CASE(INTERVAL, interval)
+ CASE(AGGR_LEVEL, aggr_level)
+#undef CASE
+ default:
+ pr_warning("unknown stat config term %" PRI_lu64 "\n",
+ event->data[i].tag);
+ }
+ }
+}
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
+{
+ const char *s;
+
+ if (event->header.misc & PERF_RECORD_MISC_COMM_EXEC)
+ s = " exec";
+ else
+ s = "";
+
+ return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
+}
+
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
+{
+ size_t ret = 0;
+ struct perf_ns_link_info *ns_link_info;
+ u32 nr_namespaces, idx;
+
+ ns_link_info = event->namespaces.link_info;
+ nr_namespaces = event->namespaces.nr_namespaces;
+
+ ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
+ event->namespaces.pid,
+ event->namespaces.tid,
+ nr_namespaces);
+
+ for (idx = 0; idx < nr_namespaces; idx++) {
+ if (idx && (idx % 4 == 0))
+ ret += fprintf(fp, "\n\t\t ");
+
+ ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
+ perf_ns__name(idx), (u64)ns_link_info[idx].dev,
+ (u64)ns_link_info[idx].ino,
+ ((idx + 1) != nr_namespaces) ? ", " : "]\n");
+ }
+
+ return ret;
+}
+
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n",
+ event->cgroup.id, event->cgroup.path);
+}
+
+int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_comm_event(machine, event, sample);
+}
+
+int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_namespaces_event(machine, event, sample);
+}
+
+int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_cgroup_event(machine, event, sample);
+}
+
+int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_lost_event(machine, event, sample);
+}
+
+int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_aux_event(machine, event);
+}
+
+int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_itrace_start_event(machine, event);
+}
+
+int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_aux_output_hw_id_event(machine, event);
+}
+
+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_lost_samples_event(machine, event, sample);
+}
+
+int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_switch_event(machine, event);
+}
+
+int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_ksymbol(machine, event, sample);
+}
+
+int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_bpf(machine, event, sample);
+}
+
+int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_text_poke(machine, event, sample);
+}
+
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
+ event->mmap.pid, event->mmap.tid, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff,
+ (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x',
+ event->mmap.filename);
+}
+
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
+{
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+ char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
+
+ build_id__init(&bid, event->mmap2.build_id,
+ event->mmap2.build_id_size);
+ build_id__sprintf(&bid, sbuild_id);
+
+ return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
+ " <%s>]: %c%c%c%c %s\n",
+ event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff, sbuild_id,
+ (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+ (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+ (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+ (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+ event->mmap2.filename);
+ } else {
+ return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
+ " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n",
+ event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
+ event->mmap2.min, event->mmap2.ino,
+ event->mmap2.ino_generation,
+ (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+ (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+ (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+ (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+ event->mmap2.filename);
+ }
+}
+
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
+{
+ struct perf_thread_map *threads = thread_map__new_event(&event->thread_map);
+ size_t ret;
+
+ ret = fprintf(fp, " nr: ");
+
+ if (threads)
+ ret += thread_map__fprintf(threads, fp);
+ else
+ ret += fprintf(fp, "failed to get threads from event\n");
+
+ perf_thread_map__put(threads);
+ return ret;
+}
+
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
+{
+ struct perf_cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
+ size_t ret;
+
+ ret = fprintf(fp, ": ");
+
+ if (cpus)
+ ret += cpu_map__fprintf(cpus, fp);
+ else
+ ret += fprintf(fp, "failed to get cpumap from event\n");
+
+ perf_cpu_map__put(cpus);
+ return ret;
+}
+
+int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_mmap_event(machine, event, sample);
+}
+
+int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_mmap2_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, "(%d:%d):(%d:%d)\n",
+ event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
+}
+
+int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_fork_event(machine, event, sample);
+}
+
+int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_exit_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n",
+ event->aux.aux_offset, event->aux.aux_size,
+ event->aux.flags,
+ event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
+ event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "",
+ event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "");
+}
+
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " pid: %u tid: %u\n",
+ event->itrace_start.pid, event->itrace_start.tid);
+}
+
+size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " hw_id: %#"PRI_lx64"\n",
+ event->aux_output_hw_id.hw_id);
+}
+
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ const char *in_out = !out ? "IN " :
+ !(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ?
+ "OUT " : "OUT preempt";
+
+ if (event->header.type == PERF_RECORD_SWITCH)
+ return fprintf(fp, " %s\n", in_out);
+
+ return fprintf(fp, " %s %s pid/tid: %5d/%-5d\n",
+ in_out, out ? "next" : "prev",
+ event->context_switch.next_prev_pid,
+ event->context_switch.next_prev_tid);
+}
+
+static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " lost %" PRI_lu64 "\n", event->lost.lost);
+}
+
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " addr %" PRI_lx64 " len %u type %u flags 0x%x name %s\n",
+ event->ksymbol.addr, event->ksymbol.len,
+ event->ksymbol.ksym_type,
+ event->ksymbol.flags, event->ksymbol.name);
+}
+
+size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " type %u, flags %u, id %u\n",
+ event->bpf.type, event->bpf.flags, event->bpf.id);
+}
+
+static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
+ void *extra, FILE *fp)
+{
+ bool old = *(bool *)extra;
+
+ switch ((int)op) {
+ case BINARY_PRINT_LINE_BEGIN:
+ return fprintf(fp, " %s bytes:", old ? "Old" : "New");
+ case BINARY_PRINT_NUM_DATA:
+ return fprintf(fp, " %02x", val);
+ case BINARY_PRINT_LINE_END:
+ return fprintf(fp, "\n");
+ default:
+ return 0;
+ }
+}
+
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp)
+{
+ struct perf_record_text_poke_event *tp = &event->text_poke;
+ size_t ret;
+ bool old;
+
+ ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr);
+ if (machine) {
+ struct addr_location al;
+
+ addr_location__init(&al);
+ al.map = map__get(maps__find(machine__kernel_maps(machine), tp->addr));
+ if (al.map && map__load(al.map) >= 0) {
+ al.addr = map__map_ip(al.map, tp->addr);
+ al.sym = map__find_symbol(al.map, al.addr);
+ if (al.sym)
+ ret += symbol__fprintf_symname_offs(al.sym, &al, fp);
+ }
+ addr_location__exit(&al);
+ }
+ ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len);
+ old = true;
+ ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer,
+ &old, fp);
+ old = false;
+ ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16,
+ text_poke_printer, &old, fp);
+ return ret;
+}
+
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp)
+{
+ size_t ret = fprintf(fp, "PERF_RECORD_%s",
+ perf_event__name(event->header.type));
+
+ switch (event->header.type) {
+ case PERF_RECORD_COMM:
+ ret += perf_event__fprintf_comm(event, fp);
+ break;
+ case PERF_RECORD_FORK:
+ case PERF_RECORD_EXIT:
+ ret += perf_event__fprintf_task(event, fp);
+ break;
+ case PERF_RECORD_MMAP:
+ ret += perf_event__fprintf_mmap(event, fp);
+ break;
+ case PERF_RECORD_NAMESPACES:
+ ret += perf_event__fprintf_namespaces(event, fp);
+ break;
+ case PERF_RECORD_CGROUP:
+ ret += perf_event__fprintf_cgroup(event, fp);
+ break;
+ case PERF_RECORD_MMAP2:
+ ret += perf_event__fprintf_mmap2(event, fp);
+ break;
+ case PERF_RECORD_AUX:
+ ret += perf_event__fprintf_aux(event, fp);
+ break;
+ case PERF_RECORD_ITRACE_START:
+ ret += perf_event__fprintf_itrace_start(event, fp);
+ break;
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ ret += perf_event__fprintf_switch(event, fp);
+ break;
+ case PERF_RECORD_LOST:
+ ret += perf_event__fprintf_lost(event, fp);
+ break;
+ case PERF_RECORD_KSYMBOL:
+ ret += perf_event__fprintf_ksymbol(event, fp);
+ break;
+ case PERF_RECORD_BPF_EVENT:
+ ret += perf_event__fprintf_bpf(event, fp);
+ break;
+ case PERF_RECORD_TEXT_POKE:
+ ret += perf_event__fprintf_text_poke(event, machine, fp);
+ break;
+ case PERF_RECORD_AUX_OUTPUT_HW_ID:
+ ret += perf_event__fprintf_aux_output_hw_id(event, fp);
+ break;
+ default:
+ ret += fprintf(fp, "\n");
+ }
+
+ return ret;
+}
+
+int perf_event__process(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_event(machine, event, sample);
+}
+
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ struct maps *maps = thread__maps(thread);
+ struct machine *machine = maps__machine(maps);
+ bool load_map = false;
+
+ maps__zput(al->maps);
+ map__zput(al->map);
+ thread__zput(al->thread);
+ al->thread = thread__get(thread);
+
+ al->addr = addr;
+ al->cpumode = cpumode;
+ al->filtered = 0;
+
+ if (machine == NULL)
+ return NULL;
+
+ if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
+ al->level = 'k';
+ maps = machine__kernel_maps(machine);
+ load_map = true;
+ } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
+ al->level = '.';
+ } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
+ al->level = 'g';
+ maps = machine__kernel_maps(machine);
+ load_map = true;
+ } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
+ al->level = 'u';
+ } else {
+ al->level = 'H';
+
+ if ((cpumode == PERF_RECORD_MISC_GUEST_USER ||
+ cpumode == PERF_RECORD_MISC_GUEST_KERNEL) &&
+ !perf_guest)
+ al->filtered |= (1 << HIST_FILTER__GUEST);
+ if ((cpumode == PERF_RECORD_MISC_USER ||
+ cpumode == PERF_RECORD_MISC_KERNEL) &&
+ !perf_host)
+ al->filtered |= (1 << HIST_FILTER__HOST);
+
+ return NULL;
+ }
+ al->maps = maps__get(maps);
+ al->map = map__get(maps__find(maps, al->addr));
+ if (al->map != NULL) {
+ /*
+ * Kernel maps might be changed when loading symbols so loading
+ * must be done prior to using kernel maps.
+ */
+ if (load_map)
+ map__load(al->map);
+ al->addr = map__map_ip(al->map, al->addr);
+ }
+
+ return al->map;
+}
+
+/*
+ * For branch stacks or branch samples, the sample cpumode might not be correct
+ * because it applies only to the sample 'ip' and not necessary to 'addr' or
+ * branch stack addresses. If possible, use a fallback to deal with those cases.
+ */
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ struct map *map = thread__find_map(thread, cpumode, addr, al);
+ struct machine *machine = maps__machine(thread__maps(thread));
+ u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
+
+ if (map || addr_cpumode == cpumode)
+ return map;
+
+ return thread__find_map(thread, addr_cpumode, addr, al);
+}
+
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
+{
+ al->sym = NULL;
+ if (thread__find_map(thread, cpumode, addr, al))
+ al->sym = map__find_symbol(al->map, al->addr);
+ return al->sym;
+}
+
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
+{
+ al->sym = NULL;
+ if (thread__find_map_fb(thread, cpumode, addr, al))
+ al->sym = map__find_symbol(al->map, al->addr);
+ return al->sym;
+}
+
+static bool check_address_range(struct intlist *addr_list, int addr_range,
+ unsigned long addr)
+{
+ struct int_node *pos;
+
+ intlist__for_each_entry(pos, addr_list) {
+ if (addr >= pos->i && addr < pos->i + addr_range)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Callers need to drop the reference to al->thread, obtained in
+ * machine__findnew_thread()
+ */
+int machine__resolve(struct machine *machine, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ struct dso *dso;
+
+ if (symbol_conf.guest_code && !machine__is_host(machine))
+ thread = machine__findnew_guest_code(machine, sample->pid);
+ else
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ if (thread == NULL)
+ return -1;
+
+ dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread__tid(thread));
+ thread__find_map(thread, sample->cpumode, sample->ip, al);
+ dso = al->map ? map__dso(al->map) : NULL;
+ dump_printf(" ...... dso: %s\n",
+ dso
+ ? dso->long_name
+ : (al->level == 'H' ? "[hypervisor]" : "<not found>"));
+
+ if (thread__is_filtered(thread))
+ al->filtered |= (1 << HIST_FILTER__THREAD);
+
+ thread__put(thread);
+ thread = NULL;
+
+ al->sym = NULL;
+ al->cpu = sample->cpu;
+ al->socket = -1;
+ al->srcline = NULL;
+
+ if (al->cpu >= 0) {
+ struct perf_env *env = machine->env;
+
+ if (env && env->cpu)
+ al->socket = env->cpu[al->cpu].socket_id;
+ }
+
+ if (al->map) {
+ if (symbol_conf.dso_list &&
+ (!dso || !(strlist__has_entry(symbol_conf.dso_list,
+ dso->short_name) ||
+ (dso->short_name != dso->long_name &&
+ strlist__has_entry(symbol_conf.dso_list,
+ dso->long_name))))) {
+ al->filtered |= (1 << HIST_FILTER__DSO);
+ }
+
+ al->sym = map__find_symbol(al->map, al->addr);
+ } else if (symbol_conf.dso_list) {
+ al->filtered |= (1 << HIST_FILTER__DSO);
+ }
+
+ if (symbol_conf.sym_list) {
+ int ret = 0;
+ char al_addr_str[32];
+ size_t sz = sizeof(al_addr_str);
+
+ if (al->sym) {
+ ret = strlist__has_entry(symbol_conf.sym_list,
+ al->sym->name);
+ }
+ if (!ret && al->sym) {
+ snprintf(al_addr_str, sz, "0x%"PRIx64,
+ map__unmap_ip(al->map, al->sym->start));
+ ret = strlist__has_entry(symbol_conf.sym_list,
+ al_addr_str);
+ }
+ if (!ret && symbol_conf.addr_list && al->map) {
+ unsigned long addr = map__unmap_ip(al->map, al->addr);
+
+ ret = intlist__has_entry(symbol_conf.addr_list, addr);
+ if (!ret && symbol_conf.addr_range) {
+ ret = check_address_range(symbol_conf.addr_list,
+ symbol_conf.addr_range,
+ addr);
+ }
+ }
+
+ if (!ret)
+ al->filtered |= (1 << HIST_FILTER__SYMBOL);
+ }
+
+ return 0;
+}
+
+bool is_bts_event(struct perf_event_attr *attr)
+{
+ return attr->type == PERF_TYPE_HARDWARE &&
+ (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+ attr->sample_period == 1;
+}
+
+bool sample_addr_correlates_sym(struct perf_event_attr *attr)
+{
+ if (attr->type == PERF_TYPE_SOFTWARE &&
+ (attr->config == PERF_COUNT_SW_PAGE_FAULTS ||
+ attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+ attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+ return true;
+
+ if (is_bts_event(attr))
+ return true;
+
+ return false;
+}
+
+void thread__resolve(struct thread *thread, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ thread__find_map_fb(thread, sample->cpumode, sample->addr, al);
+
+ al->cpu = sample->cpu;
+ al->sym = NULL;
+
+ if (al->map)
+ al->sym = map__find_symbol(al->map, al->addr);
+}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
new file mode 100644
index 000000000..d8bcee2e9
--- /dev/null
+++ b/tools/perf/util/event.h
@@ -0,0 +1,404 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RECORD_H
+#define __PERF_RECORD_H
+/*
+ * The linux/stddef.h isn't need here, but is needed for __always_inline used
+ * in files included from uapi/linux/perf_event.h such as
+ * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
+ * detected in at least musl libc, used in Alpine Linux. -acme
+ */
+#include <stdio.h>
+#include <linux/stddef.h>
+#include <perf/event.h>
+#include <linux/types.h>
+
+struct dso;
+struct machine;
+struct perf_event_attr;
+struct perf_sample;
+
+#ifdef __LP64__
+/*
+ * /usr/include/inttypes.h uses just 'lu' for PRIu64, but we end up defining
+ * __u64 as long long unsigned int, and then -Werror=format= kicks in and
+ * complains of the mismatched types, so use these two special extra PRI
+ * macros to overcome that.
+ */
+#define PRI_lu64 "l" PRIu64
+#define PRI_lx64 "l" PRIx64
+#define PRI_ld64 "l" PRId64
+#else
+#define PRI_lu64 PRIu64
+#define PRI_lx64 PRIx64
+#define PRI_ld64 PRId64
+#endif
+
+#define PERF_SAMPLE_MASK \
+ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
+ PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD | \
+ PERF_SAMPLE_IDENTIFIER)
+
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
+struct ip_callchain {
+ u64 nr;
+ u64 ips[];
+};
+
+struct branch_stack;
+
+enum {
+ PERF_IP_FLAG_BRANCH = 1ULL << 0,
+ PERF_IP_FLAG_CALL = 1ULL << 1,
+ PERF_IP_FLAG_RETURN = 1ULL << 2,
+ PERF_IP_FLAG_CONDITIONAL = 1ULL << 3,
+ PERF_IP_FLAG_SYSCALLRET = 1ULL << 4,
+ PERF_IP_FLAG_ASYNC = 1ULL << 5,
+ PERF_IP_FLAG_INTERRUPT = 1ULL << 6,
+ PERF_IP_FLAG_TX_ABORT = 1ULL << 7,
+ PERF_IP_FLAG_TRACE_BEGIN = 1ULL << 8,
+ PERF_IP_FLAG_TRACE_END = 1ULL << 9,
+ PERF_IP_FLAG_IN_TX = 1ULL << 10,
+ PERF_IP_FLAG_VMENTRY = 1ULL << 11,
+ PERF_IP_FLAG_VMEXIT = 1ULL << 12,
+ PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13,
+ PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14,
+};
+
+#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
+
+#define PERF_BRANCH_MASK (\
+ PERF_IP_FLAG_BRANCH |\
+ PERF_IP_FLAG_CALL |\
+ PERF_IP_FLAG_RETURN |\
+ PERF_IP_FLAG_CONDITIONAL |\
+ PERF_IP_FLAG_SYSCALLRET |\
+ PERF_IP_FLAG_ASYNC |\
+ PERF_IP_FLAG_INTERRUPT |\
+ PERF_IP_FLAG_TX_ABORT |\
+ PERF_IP_FLAG_TRACE_BEGIN |\
+ PERF_IP_FLAG_TRACE_END |\
+ PERF_IP_FLAG_VMENTRY |\
+ PERF_IP_FLAG_VMEXIT)
+
+#define PERF_MEM_DATA_SRC_NONE \
+ (PERF_MEM_S(OP, NA) |\
+ PERF_MEM_S(LVL, NA) |\
+ PERF_MEM_S(SNOOP, NA) |\
+ PERF_MEM_S(LOCK, NA) |\
+ PERF_MEM_S(TLB, NA) |\
+ PERF_MEM_S(LVLNUM, NA))
+
+/* Attribute type for custom synthesized events */
+#define PERF_TYPE_SYNTH (INT_MAX + 1U)
+
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+ PERF_SYNTH_INTEL_PTWRITE,
+ PERF_SYNTH_INTEL_MWAIT,
+ PERF_SYNTH_INTEL_PWRE,
+ PERF_SYNTH_INTEL_EXSTOP,
+ PERF_SYNTH_INTEL_PWRX,
+ PERF_SYNTH_INTEL_CBR,
+ PERF_SYNTH_INTEL_PSB,
+ PERF_SYNTH_INTEL_EVT,
+ PERF_SYNTH_INTEL_IFLAG_CHG,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data(). It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+ u32 padding;
+ union {
+ struct {
+ u32 ip : 1,
+ reserved : 31;
+ };
+ u32 flags;
+ };
+ u64 payload;
+};
+
+struct perf_synth_intel_mwait {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 hints : 8,
+ reserved1 : 24,
+ extensions : 2,
+ reserved2 : 30;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_pwre {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 reserved1 : 7,
+ hw : 1,
+ subcstate : 4,
+ cstate : 4,
+ reserved2 : 48;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_exstop {
+ u32 padding;
+ union {
+ struct {
+ u32 ip : 1,
+ reserved : 31;
+ };
+ u32 flags;
+ };
+};
+
+struct perf_synth_intel_pwrx {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 deepest_cstate : 4,
+ last_cstate : 4,
+ wake_reason : 4,
+ reserved1 : 52;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_cbr {
+ u32 padding;
+ union {
+ struct {
+ u32 cbr : 8,
+ reserved1 : 8,
+ max_nonturbo : 8,
+ reserved2 : 8;
+ };
+ u32 flags;
+ };
+ u32 freq;
+ u32 reserved3;
+};
+
+struct perf_synth_intel_psb {
+ u32 padding;
+ u32 reserved;
+ u64 offset;
+};
+
+struct perf_synth_intel_evd {
+ union {
+ struct {
+ u8 evd_type;
+ u8 reserved[7];
+ };
+ u64 et;
+ };
+ u64 payload;
+};
+
+/* Intel PT Event Trace */
+struct perf_synth_intel_evt {
+ u32 padding;
+ union {
+ struct {
+ u32 type : 5,
+ reserved : 2,
+ ip : 1,
+ vector : 8,
+ evd_cnt : 16;
+ };
+ u32 cfe;
+ };
+ struct perf_synth_intel_evd evd[0];
+};
+
+struct perf_synth_intel_iflag_chg {
+ u32 padding;
+ union {
+ struct {
+ u32 iflag : 1,
+ via_branch : 1;
+ };
+ u32 flags;
+ };
+ u64 branch_ip; /* If via_branch */
+};
+
+static inline void *perf_synth__raw_data(void *p)
+{
+ return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
+enum {
+ PERF_STAT_ROUND_TYPE__INTERVAL = 0,
+ PERF_STAT_ROUND_TYPE__FINAL = 1,
+};
+
+void perf_event__print_totals(void);
+
+struct perf_cpu_map;
+struct perf_record_stat_config;
+struct perf_stat_config;
+struct perf_tool;
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct perf_record_stat_config *event);
+
+int perf_event__process_comm(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_lost(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_aux(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_itrace_start(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_aux_output_hw_id(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_cgroup(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_mmap(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_fork(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_exit(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_ksymbol(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_bpf(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_text_poke(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+
+bool is_bts_event(struct perf_event_attr *attr);
+bool sample_addr_correlates_sym(struct perf_event_attr *attr);
+
+const char *perf_event__name(unsigned int id);
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+ const char *symbol_name, u64 *addr);
+int kallsyms__get_symbol_start(const char *kallsyms_filename,
+ const char *symbol_name, u64 *addr);
+
+void event_attr_init(struct perf_event_attr *attr);
+
+int perf_event_paranoid(void);
+bool perf_event_paranoid_check(int max_level);
+
+extern int sysctl_perf_event_max_stack;
+extern int sysctl_perf_event_max_contexts_per_stack;
+extern unsigned int proc_map_timeout;
+
+#define PAGE_SIZE_NAME_LEN 32
+char *get_page_size_name(u64 size, char *str);
+
+void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
+const char *arch_perf_header_entry(const char *se_header);
+int arch_support_sort_key(const char *sort_key);
+
+static inline bool perf_event_header__cpumode_is_guest(u8 cpumode)
+{
+ return cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ cpumode == PERF_RECORD_MISC_GUEST_USER;
+}
+
+static inline bool perf_event_header__misc_is_guest(u16 misc)
+{
+ return perf_event_header__cpumode_is_guest(misc & PERF_RECORD_MISC_CPUMODE_MASK);
+}
+
+static inline bool perf_event_header__is_guest(const struct perf_event_header *header)
+{
+ return perf_event_header__misc_is_guest(header->misc);
+}
+
+static inline bool perf_event__is_guest(const union perf_event *event)
+{
+ return perf_event_header__is_guest(&event->header);
+}
+
+#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
new file mode 100644
index 000000000..8fecc9fba
--- /dev/null
+++ b/tools/perf/util/events_stats.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVENTS_STATS_
+#define __PERF_EVENTS_STATS_
+
+#include <stdio.h>
+#include <perf/event.h>
+#include <linux/types.h>
+#include "auxtrace.h"
+
+/*
+ * The kernel collects the number of events it couldn't send in a stretch and
+ * when possible sends this number in a PERF_RECORD_LOST event. The number of
+ * such "chunks" of lost events is stored in .nr_events[PERF_EVENT_LOST] while
+ * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
+ * the sum of all struct perf_record_lost.lost fields reported.
+ *
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct perf_record_lost_samples.lost fields reported.
+ *
+ * The total_period is needed because by default auto-freq is used, so
+ * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
+ * the total number of low level events, it is necessary to sum all struct
+ * perf_record_sample.period and stash the result in total_period.
+ */
+struct events_stats {
+ u64 total_lost;
+ u64 total_lost_samples;
+ u64 total_aux_lost;
+ u64 total_aux_partial;
+ u64 total_aux_collision;
+ u64 total_invalid_chains;
+ u32 nr_events[PERF_RECORD_HEADER_MAX];
+ u32 nr_lost_warned;
+ u32 nr_unknown_events;
+ u32 nr_invalid_chains;
+ u32 nr_unknown_id;
+ u32 nr_unprocessable_samples;
+ u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+ u32 nr_proc_map_timeout;
+};
+
+struct hists_stats {
+ u64 total_period;
+ u64 total_non_filtered_period;
+ u32 nr_samples;
+ u32 nr_non_filtered_samples;
+ u32 nr_lost_samples;
+};
+
+void events_stats__inc(struct events_stats *stats, u32 type);
+
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp,
+ bool skip_empty);
+
+#endif /* __PERF_EVENTS_STATS_ */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000..c779b9f2e
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,2502 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ */
+#include <api/fs/fs.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include "cpumap.h"
+#include "util/mmap.h"
+#include "thread_map.h"
+#include "target.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "record.h"
+#include "debug.h"
+#include "units.h"
+#include "bpf_counter.h"
+#include <internal/lib.h> // page_size
+#include "affinity.h"
+#include "../perf.h"
+#include "asm/bug.h"
+#include "bpf-event.h"
+#include "util/event.h"
+#include "util/string2.h"
+#include "util/perf_api_probe.h"
+#include "util/evsel_fprintf.h"
+#include "util/pmu.h"
+#include "util/sample.h"
+#include "util/bpf-filter.h"
+#include "util/stat.h"
+#include "util/util.h"
+#include <signal.h>
+#include <unistd.h>
+#include <sched.h>
+#include <stdlib.h>
+
+#include "parse-events.h"
+#include <subcmd/parse-options.h>
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/timerfd.h>
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
+#include <linux/log2.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/mmap.h>
+
+#include <internal/xyarray.h>
+
+#ifdef LACKS_SIGQUEUE_PROTOTYPE
+int sigqueue(pid_t pid, int sig, const union sigval value);
+#endif
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
+
+void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ perf_evlist__init(&evlist->core);
+ perf_evlist__set_maps(&evlist->core, cpus, threads);
+ evlist->workload.pid = -1;
+ evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+ evlist->ctl_fd.fd = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.pos = -1;
+}
+
+struct evlist *evlist__new(void)
+{
+ struct evlist *evlist = zalloc(sizeof(*evlist));
+
+ if (evlist != NULL)
+ evlist__init(evlist, NULL, NULL);
+
+ return evlist;
+}
+
+struct evlist *evlist__new_default(void)
+{
+ struct evlist *evlist = evlist__new();
+ bool can_profile_kernel;
+ int err;
+
+ if (!evlist)
+ return NULL;
+
+ can_profile_kernel = perf_event_paranoid_check(1);
+ err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ if (err) {
+ evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
+struct evlist *evlist__new_dummy(void)
+{
+ struct evlist *evlist = evlist__new();
+
+ if (evlist && evlist__add_dummy(evlist)) {
+ evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
+/**
+ * evlist__set_id_pos - set the positions of event ids.
+ * @evlist: selected event list
+ *
+ * Events with compatible sample types all have the same id_pos
+ * and is_pos. For convenience, put a copy on evlist.
+ */
+void evlist__set_id_pos(struct evlist *evlist)
+{
+ struct evsel *first = evlist__first(evlist);
+
+ evlist->id_pos = first->id_pos;
+ evlist->is_pos = first->is_pos;
+}
+
+static void evlist__update_id_pos(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__calc_id_pos(evsel);
+
+ evlist__set_id_pos(evlist);
+}
+
+static void evlist__purge(struct evlist *evlist)
+{
+ struct evsel *pos, *n;
+
+ evlist__for_each_entry_safe(evlist, n, pos) {
+ list_del_init(&pos->core.node);
+ pos->evlist = NULL;
+ evsel__delete(pos);
+ }
+
+ evlist->core.nr_entries = 0;
+}
+
+void evlist__exit(struct evlist *evlist)
+{
+ event_enable_timer__exit(&evlist->eet);
+ zfree(&evlist->mmap);
+ zfree(&evlist->overwrite_mmap);
+ perf_evlist__exit(&evlist->core);
+}
+
+void evlist__delete(struct evlist *evlist)
+{
+ if (evlist == NULL)
+ return;
+
+ evlist__free_stats(evlist);
+ evlist__munmap(evlist);
+ evlist__close(evlist);
+ evlist__purge(evlist);
+ evlist__exit(evlist);
+ free(evlist);
+}
+
+void evlist__add(struct evlist *evlist, struct evsel *entry)
+{
+ perf_evlist__add(&evlist->core, &entry->core);
+ entry->evlist = evlist;
+ entry->tracking = !entry->core.idx;
+
+ if (evlist->core.nr_entries == 1)
+ evlist__set_id_pos(evlist);
+}
+
+void evlist__remove(struct evlist *evlist, struct evsel *evsel)
+{
+ evsel->evlist = NULL;
+ perf_evlist__remove(&evlist->core, &evsel->core);
+}
+
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list)
+{
+ while (!list_empty(list)) {
+ struct evsel *evsel, *temp, *leader = NULL;
+
+ __evlist__for_each_entry_safe(list, temp, evsel) {
+ list_del_init(&evsel->core.node);
+ evlist__add(evlist, evsel);
+ leader = evsel;
+ break;
+ }
+
+ __evlist__for_each_entry_safe(list, temp, evsel) {
+ if (evsel__has_leader(evsel, leader)) {
+ list_del_init(&evsel->core.node);
+ evlist__add(evlist, evsel);
+ }
+ }
+ }
+}
+
+int __evlist__set_tracepoints_handlers(struct evlist *evlist,
+ const struct evsel_str_handler *assocs, size_t nr_assocs)
+{
+ size_t i;
+ int err;
+
+ for (i = 0; i < nr_assocs; i++) {
+ // Adding a handler for an event not in this evlist, just ignore it.
+ struct evsel *evsel = evlist__find_tracepoint_by_name(evlist, assocs[i].name);
+ if (evsel == NULL)
+ continue;
+
+ err = -EEXIST;
+ if (evsel->handler != NULL)
+ goto out;
+ evsel->handler = assocs[i].handler;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
+static void evlist__set_leader(struct evlist *evlist)
+{
+ perf_evlist__set_leader(&evlist->core);
+}
+
+static struct evsel *evlist__dummy_event(struct evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_DUMMY,
+ .size = sizeof(attr), /* to capture ABI version */
+ /* Avoid frequency mode for dummy events to avoid associated timers. */
+ .freq = 0,
+ .sample_period = 1,
+ };
+
+ return evsel__new_idx(&attr, evlist->core.nr_entries);
+}
+
+int evlist__add_dummy(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__dummy_event(evlist);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ evlist__add(evlist, evsel);
+ return 0;
+}
+
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
+{
+ struct evsel *evsel = evlist__dummy_event(evlist);
+
+ if (!evsel)
+ return NULL;
+
+ evsel->core.attr.exclude_kernel = 1;
+ evsel->core.attr.exclude_guest = 1;
+ evsel->core.attr.exclude_hv = 1;
+ evsel->core.system_wide = system_wide;
+ evsel->no_aux_samples = true;
+ evsel->name = strdup("dummy:u");
+
+ evlist__add(evlist, evsel);
+ return evsel;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
+{
+ struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0);
+
+ if (IS_ERR(evsel))
+ return evsel;
+
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TIME);
+
+ evsel->core.system_wide = system_wide;
+ evsel->no_aux_samples = true;
+
+ evlist__add(evlist, evsel);
+ return evsel;
+}
+#endif
+
+int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
+{
+ struct evsel *evsel, *n;
+ LIST_HEAD(head);
+ size_t i;
+
+ for (i = 0; i < nr_attrs; i++) {
+ evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
+ if (evsel == NULL)
+ goto out_delete_partial_list;
+ list_add_tail(&evsel->core.node, &head);
+ }
+
+ evlist__splice_list_tail(evlist, &head);
+
+ return 0;
+
+out_delete_partial_list:
+ __evlist__for_each_entry_safe(&head, n, evsel)
+ evsel__delete(evsel);
+ return -1;
+}
+
+int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
+{
+ size_t i;
+
+ for (i = 0; i < nr_attrs; i++)
+ event_attr_init(attrs + i);
+
+ return evlist__add_attrs(evlist, attrs, nr_attrs);
+}
+
+__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
+ struct perf_event_attr *attrs,
+ size_t nr_attrs)
+{
+ if (!nr_attrs)
+ return 0;
+
+ return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
+}
+
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
+ (int)evsel->core.attr.config == id)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
+ (strcmp(evsel->name, name) == 0))
+ return evsel;
+ }
+
+ return NULL;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
+{
+ struct evsel *evsel = evsel__newtp(sys, name);
+
+ if (IS_ERR(evsel))
+ return -1;
+
+ evsel->handler = handler;
+ evlist__add(evlist, evsel);
+ return 0;
+}
+#endif
+
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
+{
+ struct evlist_cpu_iterator itr = {
+ .container = evlist,
+ .evsel = NULL,
+ .cpu_map_idx = 0,
+ .evlist_cpu_map_idx = 0,
+ .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
+ .cpu = (struct perf_cpu){ .cpu = -1},
+ .affinity = affinity,
+ };
+
+ if (evlist__empty(evlist)) {
+ /* Ensure the empty list doesn't iterate. */
+ itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr;
+ } else {
+ itr.evsel = evlist__first(evlist);
+ if (itr.affinity) {
+ itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+ affinity__set(itr.affinity, itr.cpu.cpu);
+ itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance
+ * through the list.
+ */
+ if (itr.cpu_map_idx == -1)
+ evlist_cpu_iterator__next(&itr);
+ }
+ }
+ return itr;
+}
+
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr)
+{
+ while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) {
+ evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ if (evlist_cpu_itr->cpu_map_idx != -1)
+ return;
+ }
+ evlist_cpu_itr->evlist_cpu_map_idx++;
+ if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) {
+ evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container);
+ evlist_cpu_itr->cpu =
+ perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus,
+ evlist_cpu_itr->evlist_cpu_map_idx);
+ if (evlist_cpu_itr->affinity)
+ affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance through
+ * the list.
+ */
+ if (evlist_cpu_itr->cpu_map_idx == -1)
+ evlist_cpu_iterator__next(evlist_cpu_itr);
+ }
+}
+
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr)
+{
+ return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr;
+}
+
+static int evsel__strcmp(struct evsel *pos, char *evsel_name)
+{
+ if (!evsel_name)
+ return 0;
+ if (evsel__is_dummy_event(pos))
+ return 1;
+ return !evsel__name_is(pos, evsel_name);
+}
+
+static int evlist__is_enabled(struct evlist *evlist)
+{
+ struct evsel *pos;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ /* If at least one event is enabled, evlist is enabled. */
+ if (!pos->disabled)
+ return true;
+ }
+ return false;
+}
+
+static void __evlist__disable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
+{
+ struct evsel *pos;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
+ bool has_imm = false;
+
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
+
+ /* Disable 'immediate' events last */
+ for (int imm = 0; imm <= 1; imm++) {
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
+ if (pos->immediate)
+ has_imm = true;
+ if (pos->immediate != imm)
+ continue;
+ evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
+ }
+ if (!has_imm)
+ break;
+ }
+
+ affinity__cleanup(affinity);
+ evlist__for_each_entry(evlist, pos) {
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
+ pos->disabled = true;
+ }
+
+ /*
+ * If we disabled only single event, we need to check
+ * the enabled state of the evlist manually.
+ */
+ if (evsel_name)
+ evlist->enabled = evlist__is_enabled(evlist);
+ else
+ evlist->enabled = false;
+}
+
+void evlist__disable(struct evlist *evlist)
+{
+ __evlist__disable(evlist, NULL, false);
+}
+
+void evlist__disable_non_dummy(struct evlist *evlist)
+{
+ __evlist__disable(evlist, NULL, true);
+}
+
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
+{
+ __evlist__disable(evlist, evsel_name, false);
+}
+
+static void __evlist__enable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
+{
+ struct evsel *pos;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
+
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
+
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
+ evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
+ }
+ affinity__cleanup(affinity);
+ evlist__for_each_entry(evlist, pos) {
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
+ pos->disabled = false;
+ }
+
+ /*
+ * Even single event sets the 'enabled' for evlist,
+ * so the toggle can work properly and toggle to
+ * 'disabled' state.
+ */
+ evlist->enabled = true;
+}
+
+void evlist__enable(struct evlist *evlist)
+{
+ __evlist__enable(evlist, NULL, false);
+}
+
+void evlist__enable_non_dummy(struct evlist *evlist)
+{
+ __evlist__enable(evlist, NULL, true);
+}
+
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name)
+{
+ __evlist__enable(evlist, evsel_name, false);
+}
+
+void evlist__toggle_enable(struct evlist *evlist)
+{
+ (evlist->enabled ? evlist__disable : evlist__enable)(evlist);
+}
+
+int evlist__add_pollfd(struct evlist *evlist, int fd)
+{
+ return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
+}
+
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
+{
+ return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask);
+}
+
+#ifdef HAVE_EVENTFD_SUPPORT
+int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd)
+{
+ return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+ fdarray_flag__nonfilterable |
+ fdarray_flag__non_perf_event);
+}
+#endif
+
+int evlist__poll(struct evlist *evlist, int timeout)
+{
+ return perf_evlist__poll(&evlist->core, timeout);
+}
+
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id)
+{
+ struct hlist_head *head;
+ struct perf_sample_id *sid;
+ int hash;
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &evlist->core.heads[hash];
+
+ hlist_for_each_entry(sid, head, node)
+ if (sid->id == id)
+ return sid;
+
+ return NULL;
+}
+
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id)
+{
+ struct perf_sample_id *sid;
+
+ if (evlist->core.nr_entries == 1 || !id)
+ return evlist__first(evlist);
+
+ sid = evlist__id2sid(evlist, id);
+ if (sid)
+ return container_of(sid->evsel, struct evsel, core);
+
+ if (!evlist__sample_id_all(evlist))
+ return evlist__first(evlist);
+
+ return NULL;
+}
+
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id)
+{
+ struct perf_sample_id *sid;
+
+ if (!id)
+ return NULL;
+
+ sid = evlist__id2sid(evlist, id);
+ if (sid)
+ return container_of(sid->evsel, struct evsel, core);
+
+ return NULL;
+}
+
+static int evlist__event2id(struct evlist *evlist, union perf_event *event, u64 *id)
+{
+ const __u64 *array = event->sample.array;
+ ssize_t n;
+
+ n = (event->header.size - sizeof(event->header)) >> 3;
+
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ if (evlist->id_pos >= n)
+ return -1;
+ *id = array[evlist->id_pos];
+ } else {
+ if (evlist->is_pos > n)
+ return -1;
+ n -= evlist->is_pos;
+ *id = array[n];
+ }
+ return 0;
+}
+
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event)
+{
+ struct evsel *first = evlist__first(evlist);
+ struct hlist_head *head;
+ struct perf_sample_id *sid;
+ int hash;
+ u64 id;
+
+ if (evlist->core.nr_entries == 1)
+ return first;
+
+ if (!first->core.attr.sample_id_all &&
+ event->header.type != PERF_RECORD_SAMPLE)
+ return first;
+
+ if (evlist__event2id(evlist, event, &id))
+ return NULL;
+
+ /* Synthesized events have an id of zero */
+ if (!id)
+ return first;
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &evlist->core.heads[hash];
+
+ hlist_for_each_entry(sid, head, node) {
+ if (sid->id == id)
+ return container_of(sid->evsel, struct evsel, core);
+ }
+ return NULL;
+}
+
+static int evlist__set_paused(struct evlist *evlist, bool value)
+{
+ int i;
+
+ if (!evlist->overwrite_mmap)
+ return 0;
+
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ int fd = evlist->overwrite_mmap[i].core.fd;
+ int err;
+
+ if (fd < 0)
+ continue;
+ err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int evlist__pause(struct evlist *evlist)
+{
+ return evlist__set_paused(evlist, true);
+}
+
+static int evlist__resume(struct evlist *evlist)
+{
+ return evlist__set_paused(evlist, false);
+}
+
+static void evlist__munmap_nofree(struct evlist *evlist)
+{
+ int i;
+
+ if (evlist->mmap)
+ for (i = 0; i < evlist->core.nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->mmap[i].core);
+
+ if (evlist->overwrite_mmap)
+ for (i = 0; i < evlist->core.nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->overwrite_mmap[i].core);
+}
+
+void evlist__munmap(struct evlist *evlist)
+{
+ evlist__munmap_nofree(evlist);
+ zfree(&evlist->mmap);
+ zfree(&evlist->overwrite_mmap);
+}
+
+static void perf_mmap__unmap_cb(struct perf_mmap *map)
+{
+ struct mmap *m = container_of(map, struct mmap, core);
+
+ mmap__munmap(m);
+}
+
+static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
+ bool overwrite)
+{
+ int i;
+ struct mmap *map;
+
+ map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
+ if (!map)
+ return NULL;
+
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ struct perf_mmap *prev = i ? &map[i - 1].core : NULL;
+
+ /*
+ * When the perf_mmap() call is made we grab one refcount, plus
+ * one extra to let perf_mmap__consume() get the last
+ * events after all real references (perf_mmap__get()) are
+ * dropped.
+ *
+ * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
+ * thus does perf_mmap__get() on it.
+ */
+ perf_mmap__init(&map[i].core, prev, overwrite, perf_mmap__unmap_cb);
+ }
+
+ return map;
+}
+
+static void
+perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
+ struct perf_evsel *_evsel,
+ struct perf_mmap_param *_mp,
+ int idx)
+{
+ struct evlist *evlist = container_of(_evlist, struct evlist, core);
+ struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
+ struct evsel *evsel = container_of(_evsel, struct evsel, core);
+
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx);
+}
+
+static struct perf_mmap*
+perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
+{
+ struct evlist *evlist = container_of(_evlist, struct evlist, core);
+ struct mmap *maps;
+
+ maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
+
+ if (!maps) {
+ maps = evlist__alloc_mmap(evlist, overwrite);
+ if (!maps)
+ return NULL;
+
+ if (overwrite) {
+ evlist->overwrite_mmap = maps;
+ if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
+ evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+ } else {
+ evlist->mmap = maps;
+ }
+ }
+
+ return &maps[idx].core;
+}
+
+static int
+perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
+ int output, struct perf_cpu cpu)
+{
+ struct mmap *map = container_of(_map, struct mmap, core);
+ struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
+
+ return mmap__mmap(map, mp, output, cpu);
+}
+
+unsigned long perf_event_mlock_kb_in_pages(void)
+{
+ unsigned long pages;
+ int max;
+
+ if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+ /*
+ * Pick a once upon a time good value, i.e. things look
+ * strange since we can't read a sysctl value, but lets not
+ * die yet...
+ */
+ max = 512;
+ } else {
+ max -= (page_size / 1024);
+ }
+
+ pages = (max * 1024) / page_size;
+ if (!is_power_of_2(pages))
+ pages = rounddown_pow_of_two(pages);
+
+ return pages;
+}
+
+size_t evlist__mmap_size(unsigned long pages)
+{
+ if (pages == UINT_MAX)
+ pages = perf_event_mlock_kb_in_pages();
+ else if (!is_power_of_2(pages))
+ return 0;
+
+ return (pages + 1) * page_size;
+}
+
+static long parse_pages_arg(const char *str, unsigned long min,
+ unsigned long max)
+{
+ unsigned long pages, val;
+ static struct parse_tag tags[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+
+ if (str == NULL)
+ return -EINVAL;
+
+ val = parse_tag_value(str, tags);
+ if (val != (unsigned long) -1) {
+ /* we got file size value */
+ pages = PERF_ALIGN(val, page_size) / page_size;
+ } else {
+ /* we got pages count value */
+ char *eptr;
+ pages = strtoul(str, &eptr, 10);
+ if (*eptr != '\0')
+ return -EINVAL;
+ }
+
+ if (pages == 0 && min == 0) {
+ /* leave number of pages at 0 */
+ } else if (!is_power_of_2(pages)) {
+ char buf[100];
+
+ /* round pages up to next power of 2 */
+ pages = roundup_pow_of_two(pages);
+ if (!pages)
+ return -EINVAL;
+
+ unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
+ pr_info("rounding mmap pages size to %s (%lu pages)\n",
+ buf, pages);
+ }
+
+ if (pages > max)
+ return -EINVAL;
+
+ return pages;
+}
+
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
+{
+ unsigned long max = UINT_MAX;
+ long pages;
+
+ if (max > SIZE_MAX / page_size)
+ max = SIZE_MAX / page_size;
+
+ pages = parse_pages_arg(str, 1, max);
+ if (pages < 0) {
+ pr_err("Invalid argument for --mmap_pages/-m\n");
+ return -1;
+ }
+
+ *mmap_pages = pages;
+ return 0;
+}
+
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused)
+{
+ return __evlist__parse_mmap_pages(opt->value, str);
+}
+
+/**
+ * evlist__mmap_ex - Create mmaps to receive events.
+ * @evlist: list of events
+ * @pages: map length in pages
+ * @overwrite: overwrite older events?
+ * @auxtrace_pages - auxtrace map length in pages
+ * @auxtrace_overwrite - overwrite older auxtrace data?
+ *
+ * If @overwrite is %false the user needs to signal event consumption using
+ * perf_mmap__write_tail(). Using evlist__mmap_read() does this
+ * automatically.
+ *
+ * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
+ * consumption using auxtrace_mmap__write_tail().
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
+ int comp_level)
+{
+ /*
+ * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+ * Its value is decided by evsel's write_backward.
+ * So &mp should not be passed through const pointer.
+ */
+ struct mmap_params mp = {
+ .nr_cblocks = nr_cblocks,
+ .affinity = affinity,
+ .flush = flush,
+ .comp_level = comp_level
+ };
+ struct perf_evlist_mmap_ops ops = {
+ .idx = perf_evlist__mmap_cb_idx,
+ .get = perf_evlist__mmap_cb_get,
+ .mmap = perf_evlist__mmap_cb_mmap,
+ };
+
+ evlist->core.mmap_len = evlist__mmap_size(pages);
+ pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
+
+ auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
+ auxtrace_pages, auxtrace_overwrite);
+
+ return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core);
+}
+
+int evlist__mmap(struct evlist *evlist, unsigned int pages)
+{
+ return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
+}
+
+int evlist__create_maps(struct evlist *evlist, struct target *target)
+{
+ bool all_threads = (target->per_thread && target->system_wide);
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+
+ /*
+ * If specify '-a' and '--per-thread' to perf record, perf record
+ * will override '--per-thread'. target->per_thread = false and
+ * target->system_wide = true.
+ *
+ * If specify '--per-thread' only to perf record,
+ * target->per_thread = true and target->system_wide = false.
+ *
+ * So target->per_thread && target->system_wide is false.
+ * For perf record, thread_map__new_str doesn't call
+ * thread_map__new_all_cpus. That will keep perf record's
+ * current behavior.
+ *
+ * For perf stat, it allows the case that target->per_thread and
+ * target->system_wide are all true. It means to collect system-wide
+ * per-thread data. thread_map__new_str will call
+ * thread_map__new_all_cpus to enumerate all threads.
+ */
+ threads = thread_map__new_str(target->pid, target->tid, target->uid,
+ all_threads);
+
+ if (!threads)
+ return -1;
+
+ if (target__uses_dummy_map(target))
+ cpus = perf_cpu_map__dummy_new();
+ else
+ cpus = perf_cpu_map__new(target->cpu_list);
+
+ if (!cpus)
+ goto out_delete_threads;
+
+ evlist->core.has_user_cpus = !!target->cpu_list;
+
+ perf_evlist__set_maps(&evlist->core, cpus, threads);
+
+ /* as evlist now has references, put count here */
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
+
+ return 0;
+
+out_delete_threads:
+ perf_thread_map__put(threads);
+ return -1;
+}
+
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
+{
+ struct evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ /*
+ * filters only work for tracepoint event, which doesn't have cpu limit.
+ * So evlist and evsel should always be same.
+ */
+ if (evsel->filter) {
+ err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
+ if (err) {
+ *err_evsel = evsel;
+ break;
+ }
+ }
+
+ /*
+ * non-tracepoint events can have BPF filters.
+ */
+ if (!list_empty(&evsel->bpf_filters)) {
+ err = perf_bpf_filter__prepare(evsel);
+ if (err) {
+ *err_evsel = evsel;
+ break;
+ }
+ }
+ }
+
+ return err;
+}
+
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter)
+{
+ struct evsel *evsel;
+ int err = 0;
+
+ if (filter == NULL)
+ return -1;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ err = evsel__set_filter(evsel, filter);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter)
+{
+ struct evsel *evsel;
+ int err = 0;
+
+ if (filter == NULL)
+ return -1;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ err = evsel__append_tp_filter(evsel, filter);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
+{
+ char *filter;
+ size_t i;
+
+ for (i = 0; i < npids; ++i) {
+ if (i == 0) {
+ if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
+ return NULL;
+ } else {
+ char *tmp;
+
+ if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
+ goto out_free;
+
+ free(filter);
+ filter = tmp;
+ }
+ }
+
+ return filter;
+out_free:
+ free(filter);
+ return NULL;
+}
+
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+{
+ char *filter = asprintf__tp_filter_pids(npids, pids);
+ int ret = evlist__set_tp_filter(evlist, filter);
+
+ free(filter);
+ return ret;
+}
+
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
+{
+ return evlist__set_tp_filter_pids(evlist, 1, &pid);
+}
+
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+{
+ char *filter = asprintf__tp_filter_pids(npids, pids);
+ int ret = evlist__append_tp_filter(evlist, filter);
+
+ free(filter);
+ return ret;
+}
+
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
+{
+ return evlist__append_tp_filter_pids(evlist, 1, &pid);
+}
+
+bool evlist__valid_sample_type(struct evlist *evlist)
+{
+ struct evsel *pos;
+
+ if (evlist->core.nr_entries == 1)
+ return true;
+
+ if (evlist->id_pos < 0 || evlist->is_pos < 0)
+ return false;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (pos->id_pos != evlist->id_pos ||
+ pos->is_pos != evlist->is_pos)
+ return false;
+ }
+
+ return true;
+}
+
+u64 __evlist__combined_sample_type(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (evlist->combined_sample_type)
+ return evlist->combined_sample_type;
+
+ evlist__for_each_entry(evlist, evsel)
+ evlist->combined_sample_type |= evsel->core.attr.sample_type;
+
+ return evlist->combined_sample_type;
+}
+
+u64 evlist__combined_sample_type(struct evlist *evlist)
+{
+ evlist->combined_sample_type = 0;
+ return __evlist__combined_sample_type(evlist);
+}
+
+u64 evlist__combined_branch_type(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ u64 branch_type = 0;
+
+ evlist__for_each_entry(evlist, evsel)
+ branch_type |= evsel->core.attr.branch_sample_type;
+ return branch_type;
+}
+
+bool evlist__valid_read_format(struct evlist *evlist)
+{
+ struct evsel *first = evlist__first(evlist), *pos = first;
+ u64 read_format = first->core.attr.read_format;
+ u64 sample_type = first->core.attr.sample_type;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (read_format != pos->core.attr.read_format) {
+ pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n",
+ read_format, (u64)pos->core.attr.read_format);
+ }
+ }
+
+ /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */
+ if ((sample_type & PERF_SAMPLE_READ) &&
+ !(read_format & PERF_FORMAT_ID)) {
+ return false;
+ }
+
+ return true;
+}
+
+u16 evlist__id_hdr_size(struct evlist *evlist)
+{
+ struct evsel *first = evlist__first(evlist);
+
+ return first->core.attr.sample_id_all ? evsel__id_hdr_size(first) : 0;
+}
+
+bool evlist__valid_sample_id_all(struct evlist *evlist)
+{
+ struct evsel *first = evlist__first(evlist), *pos = first;
+
+ evlist__for_each_entry_continue(evlist, pos) {
+ if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
+ return false;
+ }
+
+ return true;
+}
+
+bool evlist__sample_id_all(struct evlist *evlist)
+{
+ struct evsel *first = evlist__first(evlist);
+ return first->core.attr.sample_id_all;
+}
+
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
+{
+ evlist->selected = evsel;
+}
+
+void evlist__close(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity affinity;
+
+ /*
+ * With perf record core.user_requested_cpus is usually NULL.
+ * Use the old method to handle this for now.
+ */
+ if (!evlist->core.user_requested_cpus ||
+ cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+ evlist__for_each_entry_reverse(evlist, evsel)
+ evsel__close(evsel);
+ return;
+ }
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
+ perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core,
+ evlist_cpu_itr.cpu_map_idx);
+ }
+
+ affinity__cleanup(&affinity);
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ perf_evsel__free_fd(&evsel->core);
+ perf_evsel__free_id(&evsel->core);
+ }
+ perf_evlist__reset_id_hash(&evlist->core);
+}
+
+static int evlist__create_syswide_maps(struct evlist *evlist)
+{
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+
+ /*
+ * Try reading /sys/devices/system/cpu/online to get
+ * an all cpus map.
+ *
+ * FIXME: -ENOMEM is the best we can do here, the cpu_map
+ * code needs an overhaul to properly forward the
+ * error, and we may not want to do that fallback to a
+ * default cpu identity map :-\
+ */
+ cpus = perf_cpu_map__new(NULL);
+ if (!cpus)
+ goto out;
+
+ threads = perf_thread_map__new_dummy();
+ if (!threads)
+ goto out_put;
+
+ perf_evlist__set_maps(&evlist->core, cpus, threads);
+
+ perf_thread_map__put(threads);
+out_put:
+ perf_cpu_map__put(cpus);
+out:
+ return -ENOMEM;
+}
+
+int evlist__open(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ int err;
+
+ /*
+ * Default: one fd per CPU, all threads, aka systemwide
+ * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
+ */
+ if (evlist->core.threads == NULL && evlist->core.user_requested_cpus == NULL) {
+ err = evlist__create_syswide_maps(evlist);
+ if (err < 0)
+ goto out_err;
+ }
+
+ evlist__update_id_pos(evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
+ if (err < 0)
+ goto out_err;
+ }
+
+ return 0;
+out_err:
+ evlist__close(evlist);
+ errno = -err;
+ return err;
+}
+
+int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[],
+ bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
+{
+ int child_ready_pipe[2], go_pipe[2];
+ char bf;
+
+ if (pipe(child_ready_pipe) < 0) {
+ perror("failed to create 'ready' pipe");
+ return -1;
+ }
+
+ if (pipe(go_pipe) < 0) {
+ perror("failed to create 'go' pipe");
+ goto out_close_ready_pipe;
+ }
+
+ evlist->workload.pid = fork();
+ if (evlist->workload.pid < 0) {
+ perror("failed to fork");
+ goto out_close_pipes;
+ }
+
+ if (!evlist->workload.pid) {
+ int ret;
+
+ if (pipe_output)
+ dup2(2, 1);
+
+ signal(SIGTERM, SIG_DFL);
+
+ close(child_ready_pipe[0]);
+ close(go_pipe[1]);
+ fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+ /*
+ * Change the name of this process not to confuse --exclude-perf users
+ * that sees 'perf' in the window up to the execvp() and thinks that
+ * perf samples are not being excluded.
+ */
+ prctl(PR_SET_NAME, "perf-exec");
+
+ /*
+ * Tell the parent we're ready to go
+ */
+ close(child_ready_pipe[1]);
+
+ /*
+ * Wait until the parent tells us to go.
+ */
+ ret = read(go_pipe[0], &bf, 1);
+ /*
+ * The parent will ask for the execvp() to be performed by
+ * writing exactly one byte, in workload.cork_fd, usually via
+ * evlist__start_workload().
+ *
+ * For cancelling the workload without actually running it,
+ * the parent will just close workload.cork_fd, without writing
+ * anything, i.e. read will return zero and we just exit()
+ * here.
+ */
+ if (ret != 1) {
+ if (ret == -1)
+ perror("unable to read pipe");
+ exit(ret);
+ }
+
+ execvp(argv[0], (char **)argv);
+
+ if (exec_error) {
+ union sigval val;
+
+ val.sival_int = errno;
+ if (sigqueue(getppid(), SIGUSR1, val))
+ perror(argv[0]);
+ } else
+ perror(argv[0]);
+ exit(-1);
+ }
+
+ if (exec_error) {
+ struct sigaction act = {
+ .sa_flags = SA_SIGINFO,
+ .sa_sigaction = exec_error,
+ };
+ sigaction(SIGUSR1, &act, NULL);
+ }
+
+ if (target__none(target)) {
+ if (evlist->core.threads == NULL) {
+ fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
+ __func__, __LINE__);
+ goto out_close_pipes;
+ }
+ perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
+ }
+
+ close(child_ready_pipe[1]);
+ close(go_pipe[0]);
+ /*
+ * wait for child to settle
+ */
+ if (read(child_ready_pipe[0], &bf, 1) == -1) {
+ perror("unable to read pipe");
+ goto out_close_pipes;
+ }
+
+ fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
+ evlist->workload.cork_fd = go_pipe[1];
+ close(child_ready_pipe[0]);
+ return 0;
+
+out_close_pipes:
+ close(go_pipe[0]);
+ close(go_pipe[1]);
+out_close_ready_pipe:
+ close(child_ready_pipe[0]);
+ close(child_ready_pipe[1]);
+ return -1;
+}
+
+int evlist__start_workload(struct evlist *evlist)
+{
+ if (evlist->workload.cork_fd > 0) {
+ char bf = 0;
+ int ret;
+ /*
+ * Remove the cork, let it rip!
+ */
+ ret = write(evlist->workload.cork_fd, &bf, 1);
+ if (ret < 0)
+ perror("unable to write to pipe");
+
+ close(evlist->workload.cork_fd);
+ return ret;
+ }
+
+ return 0;
+}
+
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
+{
+ struct evsel *evsel = evlist__event2evsel(evlist, event);
+ int ret;
+
+ if (!evsel)
+ return -EFAULT;
+ ret = evsel__parse_sample(evsel, event, sample);
+ if (ret)
+ return ret;
+ if (perf_guest && sample->id) {
+ struct perf_sample_id *sid = evlist__id2sid(evlist, sample->id);
+
+ if (sid) {
+ sample->machine_pid = sid->machine_pid;
+ sample->vcpu = sid->vcpu.cpu;
+ }
+ }
+ return 0;
+}
+
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp)
+{
+ struct evsel *evsel = evlist__event2evsel(evlist, event);
+
+ if (!evsel)
+ return -EFAULT;
+ return evsel__parse_sample_timestamp(evsel, event, timestamp);
+}
+
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size)
+{
+ int printed, value;
+ char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+
+ switch (err) {
+ case EACCES:
+ case EPERM:
+ printed = scnprintf(buf, size,
+ "Error:\t%s.\n"
+ "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
+
+ value = perf_event_paranoid();
+
+ printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
+
+ if (value >= 2) {
+ printed += scnprintf(buf + printed, size - printed,
+ "For your workloads it needs to be <= 1\nHint:\t");
+ }
+ printed += scnprintf(buf + printed, size - printed,
+ "For system wide tracing it needs to be set to -1.\n");
+
+ printed += scnprintf(buf + printed, size - printed,
+ "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
+ "Hint:\tThe current value is %d.", value);
+ break;
+ case EINVAL: {
+ struct evsel *first = evlist__first(evlist);
+ int max_freq;
+
+ if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+ goto out_default;
+
+ if (first->core.attr.sample_freq < (u64)max_freq)
+ goto out_default;
+
+ printed = scnprintf(buf, size,
+ "Error:\t%s.\n"
+ "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+ "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+ emsg, max_freq, first->core.attr.sample_freq);
+ break;
+ }
+ default:
+out_default:
+ scnprintf(buf, size, "%s", emsg);
+ break;
+ }
+
+ return 0;
+}
+
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
+{
+ char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+ int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
+
+ switch (err) {
+ case EPERM:
+ sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
+ printed += scnprintf(buf + printed, size - printed,
+ "Error:\t%s.\n"
+ "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
+ "Hint:\tTried using %zd kB.\n",
+ emsg, pages_max_per_user, pages_attempted);
+
+ if (pages_attempted >= pages_max_per_user) {
+ printed += scnprintf(buf + printed, size - printed,
+ "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
+ pages_max_per_user + pages_attempted);
+ }
+
+ printed += scnprintf(buf + printed, size - printed,
+ "Hint:\tTry using a smaller -m/--mmap-pages value.");
+ break;
+ default:
+ scnprintf(buf, size, "%s", emsg);
+ break;
+ }
+
+ return 0;
+}
+
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel)
+{
+ struct evsel *evsel, *n;
+ LIST_HEAD(move);
+
+ if (move_evsel == evlist__first(evlist))
+ return;
+
+ evlist__for_each_entry_safe(evlist, n, evsel) {
+ if (evsel__leader(evsel) == evsel__leader(move_evsel))
+ list_move_tail(&evsel->core.node, &move);
+ }
+
+ list_splice(&move, &evlist->core.entries);
+}
+
+struct evsel *evlist__get_tracking_event(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->tracking)
+ return evsel;
+ }
+
+ return evlist__first(evlist);
+}
+
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel)
+{
+ struct evsel *evsel;
+
+ if (tracking_evsel->tracking)
+ return;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel != tracking_evsel)
+ evsel->tracking = false;
+ }
+
+ tracking_evsel->tracking = true;
+}
+
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (!evsel->name)
+ continue;
+ if (evsel__name_is(evsel, str))
+ return evsel;
+ }
+
+ return NULL;
+}
+
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state)
+{
+ enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
+ enum action {
+ NONE,
+ PAUSE,
+ RESUME,
+ } action = NONE;
+
+ if (!evlist->overwrite_mmap)
+ return;
+
+ switch (old_state) {
+ case BKW_MMAP_NOTREADY: {
+ if (state != BKW_MMAP_RUNNING)
+ goto state_err;
+ break;
+ }
+ case BKW_MMAP_RUNNING: {
+ if (state != BKW_MMAP_DATA_PENDING)
+ goto state_err;
+ action = PAUSE;
+ break;
+ }
+ case BKW_MMAP_DATA_PENDING: {
+ if (state != BKW_MMAP_EMPTY)
+ goto state_err;
+ break;
+ }
+ case BKW_MMAP_EMPTY: {
+ if (state != BKW_MMAP_RUNNING)
+ goto state_err;
+ action = RESUME;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Shouldn't get there\n");
+ }
+
+ evlist->bkw_mmap_state = state;
+
+ switch (action) {
+ case PAUSE:
+ evlist__pause(evlist);
+ break;
+ case RESUME:
+ evlist__resume(evlist);
+ break;
+ case NONE:
+ default:
+ break;
+ }
+
+state_err:
+ return;
+}
+
+bool evlist__exclude_kernel(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (!evsel->core.attr.exclude_kernel)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+void evlist__force_leader(struct evlist *evlist)
+{
+ if (evlist__nr_groups(evlist) == 0) {
+ struct evsel *leader = evlist__first(evlist);
+
+ evlist__set_leader(evlist);
+ leader->forced_leader = true;
+ }
+}
+
+struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *evsel, bool close)
+{
+ struct evsel *c2, *leader;
+ bool is_open = true;
+
+ leader = evsel__leader(evsel);
+
+ pr_debug("Weak group for %s/%d failed\n",
+ leader->name, leader->core.nr_members);
+
+ /*
+ * for_each_group_member doesn't work here because it doesn't
+ * include the first entry.
+ */
+ evlist__for_each_entry(evsel_list, c2) {
+ if (c2 == evsel)
+ is_open = false;
+ if (evsel__has_leader(c2, leader)) {
+ if (is_open && close)
+ perf_evsel__close(&c2->core);
+ /*
+ * We want to close all members of the group and reopen
+ * them. Some events, like Intel topdown, require being
+ * in a group and so keep these in the group.
+ */
+ evsel__remove_from_group(c2, leader);
+
+ /*
+ * Set this for all former members of the group
+ * to indicate they get reopened.
+ */
+ c2->reset_group = true;
+ }
+ }
+ /* Reset the leader count if all entries were removed. */
+ if (leader->core.nr_members == 1)
+ leader->core.nr_members = 0;
+ return leader;
+}
+
+static int evlist__parse_control_fifo(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
+{
+ char *s, *p;
+ int ret = 0, fd;
+
+ if (strncmp(str, "fifo:", 5))
+ return -EINVAL;
+
+ str += 5;
+ if (!*str || *str == ',')
+ return -EINVAL;
+
+ s = strdup(str);
+ if (!s)
+ return -ENOMEM;
+
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ /*
+ * O_RDWR avoids POLLHUPs which is necessary to allow the other
+ * end of a FIFO to be repeatedly opened and closed.
+ */
+ fd = open(s, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ pr_err("Failed to open '%s'\n", s);
+ ret = -errno;
+ goto out_free;
+ }
+ *ctl_fd = fd;
+ *ctl_fd_close = true;
+
+ if (p && *++p) {
+ /* O_RDWR | O_NONBLOCK means the other end need not be open */
+ fd = open(p, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ pr_err("Failed to open '%s'\n", p);
+ ret = -errno;
+ goto out_free;
+ }
+ *ctl_fd_ack = fd;
+ }
+
+out_free:
+ free(s);
+ return ret;
+}
+
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
+{
+ char *comma = NULL, *endptr = NULL;
+
+ *ctl_fd_close = false;
+
+ if (strncmp(str, "fd:", 3))
+ return evlist__parse_control_fifo(str, ctl_fd, ctl_fd_ack, ctl_fd_close);
+
+ *ctl_fd = strtoul(&str[3], &endptr, 0);
+ if (endptr == &str[3])
+ return -EINVAL;
+
+ comma = strchr(str, ',');
+ if (comma) {
+ if (endptr != comma)
+ return -EINVAL;
+
+ *ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+ if (endptr == comma + 1 || *endptr != '\0')
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close)
+{
+ if (*ctl_fd_close) {
+ *ctl_fd_close = false;
+ close(ctl_fd);
+ if (ctl_fd_ack >= 0)
+ close(ctl_fd_ack);
+ }
+}
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
+{
+ if (fd == -1) {
+ pr_debug("Control descriptor is not initialized\n");
+ return 0;
+ }
+
+ evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+ fdarray_flag__nonfilterable |
+ fdarray_flag__non_perf_event);
+ if (evlist->ctl_fd.pos < 0) {
+ evlist->ctl_fd.pos = -1;
+ pr_err("Failed to add ctl fd entry: %m\n");
+ return -1;
+ }
+
+ evlist->ctl_fd.fd = fd;
+ evlist->ctl_fd.ack = ack;
+
+ return 0;
+}
+
+bool evlist__ctlfd_initialized(struct evlist *evlist)
+{
+ return evlist->ctl_fd.pos >= 0;
+}
+
+int evlist__finalize_ctlfd(struct evlist *evlist)
+{
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist))
+ return 0;
+
+ entries[evlist->ctl_fd.pos].fd = -1;
+ entries[evlist->ctl_fd.pos].events = 0;
+ entries[evlist->ctl_fd.pos].revents = 0;
+
+ evlist->ctl_fd.pos = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.fd = -1;
+
+ return 0;
+}
+
+static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
+ char *cmd_data, size_t data_size)
+{
+ int err;
+ char c;
+ size_t bytes_read = 0;
+
+ *cmd = EVLIST_CTL_CMD_UNSUPPORTED;
+ memset(cmd_data, 0, data_size);
+ data_size--;
+
+ do {
+ err = read(evlist->ctl_fd.fd, &c, 1);
+ if (err > 0) {
+ if (c == '\n' || c == '\0')
+ break;
+ cmd_data[bytes_read++] = c;
+ if (bytes_read == data_size)
+ break;
+ continue;
+ } else if (err == -1) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ err = 0;
+ else
+ pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
+ }
+ break;
+ } while (1);
+
+ pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
+ bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
+
+ if (bytes_read > 0) {
+ if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_ENABLE;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_DISABLE;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_SNAPSHOT_TAG,
+ (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_SNAPSHOT;
+ pr_debug("is snapshot\n");
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_EVLIST_TAG,
+ (sizeof(EVLIST_CTL_CMD_EVLIST_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_EVLIST;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_STOP_TAG,
+ (sizeof(EVLIST_CTL_CMD_STOP_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_STOP;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_PING_TAG,
+ (sizeof(EVLIST_CTL_CMD_PING_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_PING;
+ }
+ }
+
+ return bytes_read ? (int)bytes_read : err;
+}
+
+int evlist__ctlfd_ack(struct evlist *evlist)
+{
+ int err;
+
+ if (evlist->ctl_fd.ack == -1)
+ return 0;
+
+ err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG,
+ sizeof(EVLIST_CTL_CMD_ACK_TAG));
+ if (err == -1)
+ pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack);
+
+ return err;
+}
+
+static int get_cmd_arg(char *cmd_data, size_t cmd_size, char **arg)
+{
+ char *data = cmd_data + cmd_size;
+
+ /* no argument */
+ if (!*data)
+ return 0;
+
+ /* there's argument */
+ if (*data == ' ') {
+ *arg = data + 1;
+ return 1;
+ }
+
+ /* malformed */
+ return -1;
+}
+
+static int evlist__ctlfd_enable(struct evlist *evlist, char *cmd_data, bool enable)
+{
+ struct evsel *evsel;
+ char *name;
+ int err;
+
+ err = get_cmd_arg(cmd_data,
+ enable ? sizeof(EVLIST_CTL_CMD_ENABLE_TAG) - 1 :
+ sizeof(EVLIST_CTL_CMD_DISABLE_TAG) - 1,
+ &name);
+ if (err < 0) {
+ pr_info("failed: wrong command\n");
+ return -1;
+ }
+
+ if (err) {
+ evsel = evlist__find_evsel_by_str(evlist, name);
+ if (evsel) {
+ if (enable)
+ evlist__enable_evsel(evlist, name);
+ else
+ evlist__disable_evsel(evlist, name);
+ pr_info("Event %s %s\n", evsel->name,
+ enable ? "enabled" : "disabled");
+ } else {
+ pr_info("failed: can't find '%s' event\n", name);
+ }
+ } else {
+ if (enable) {
+ evlist__enable(evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ } else {
+ evlist__disable(evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ }
+ }
+
+ return 0;
+}
+
+static int evlist__ctlfd_list(struct evlist *evlist, char *cmd_data)
+{
+ struct perf_attr_details details = { .verbose = false, };
+ struct evsel *evsel;
+ char *arg;
+ int err;
+
+ err = get_cmd_arg(cmd_data,
+ sizeof(EVLIST_CTL_CMD_EVLIST_TAG) - 1,
+ &arg);
+ if (err < 0) {
+ pr_info("failed: wrong command\n");
+ return -1;
+ }
+
+ if (err) {
+ if (!strcmp(arg, "-v")) {
+ details.verbose = true;
+ } else if (!strcmp(arg, "-g")) {
+ details.event_group = true;
+ } else if (!strcmp(arg, "-F")) {
+ details.freq = true;
+ } else {
+ pr_info("failed: wrong command\n");
+ return -1;
+ }
+ }
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__fprintf(evsel, &details, stderr);
+
+ return 0;
+}
+
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
+{
+ int err = 0;
+ char cmd_data[EVLIST_CTL_CMD_MAX_LEN];
+ int ctlfd_pos = evlist->ctl_fd.pos;
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents)
+ return 0;
+
+ if (entries[ctlfd_pos].revents & POLLIN) {
+ err = evlist__ctlfd_recv(evlist, cmd, cmd_data,
+ EVLIST_CTL_CMD_MAX_LEN);
+ if (err > 0) {
+ switch (*cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ case EVLIST_CTL_CMD_DISABLE:
+ err = evlist__ctlfd_enable(evlist, cmd_data,
+ *cmd == EVLIST_CTL_CMD_ENABLE);
+ break;
+ case EVLIST_CTL_CMD_EVLIST:
+ err = evlist__ctlfd_list(evlist, cmd_data);
+ break;
+ case EVLIST_CTL_CMD_SNAPSHOT:
+ case EVLIST_CTL_CMD_STOP:
+ case EVLIST_CTL_CMD_PING:
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ pr_debug("ctlfd: unsupported %d\n", *cmd);
+ break;
+ }
+ if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED ||
+ *cmd == EVLIST_CTL_CMD_SNAPSHOT))
+ evlist__ctlfd_ack(evlist);
+ }
+ }
+
+ if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR))
+ evlist__finalize_ctlfd(evlist);
+ else
+ entries[ctlfd_pos].revents = 0;
+
+ return err;
+}
+
+/**
+ * struct event_enable_time - perf record -D/--delay single time range.
+ * @start: start of time range to enable events in milliseconds
+ * @end: end of time range to enable events in milliseconds
+ *
+ * N.B. this structure is also accessed as an array of int.
+ */
+struct event_enable_time {
+ int start;
+ int end;
+};
+
+static int parse_event_enable_time(const char *str, struct event_enable_time *range, bool first)
+{
+ const char *fmt = first ? "%u - %u %n" : " , %u - %u %n";
+ int ret, start, end, n;
+
+ ret = sscanf(str, fmt, &start, &end, &n);
+ if (ret != 2 || end <= start)
+ return -EINVAL;
+ if (range) {
+ range->start = start;
+ range->end = end;
+ }
+ return n;
+}
+
+static ssize_t parse_event_enable_times(const char *str, struct event_enable_time *range)
+{
+ int incr = !!range;
+ bool first = true;
+ ssize_t ret, cnt;
+
+ for (cnt = 0; *str; cnt++) {
+ ret = parse_event_enable_time(str, range, first);
+ if (ret < 0)
+ return ret;
+ /* Check no overlap */
+ if (!first && range && range->start <= range[-1].end)
+ return -EINVAL;
+ str += ret;
+ range += incr;
+ first = false;
+ }
+ return cnt;
+}
+
+/**
+ * struct event_enable_timer - control structure for perf record -D/--delay.
+ * @evlist: event list
+ * @times: time ranges that events are enabled (N.B. this is also accessed as an
+ * array of int)
+ * @times_cnt: number of time ranges
+ * @timerfd: timer file descriptor
+ * @pollfd_pos: position in @evlist array of file descriptors to poll (fdarray)
+ * @times_step: current position in (int *)@times)[],
+ * refer event_enable_timer__process()
+ *
+ * Note, this structure is only used when there are time ranges, not when there
+ * is only an initial delay.
+ */
+struct event_enable_timer {
+ struct evlist *evlist;
+ struct event_enable_time *times;
+ size_t times_cnt;
+ int timerfd;
+ int pollfd_pos;
+ size_t times_step;
+};
+
+static int str_to_delay(const char *str)
+{
+ char *endptr;
+ long d;
+
+ d = strtol(str, &endptr, 10);
+ if (*endptr || d > INT_MAX || d < -1)
+ return 0;
+ return d;
+}
+
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+ const char *str, int unset)
+{
+ enum fdarray_flags flags = fdarray_flag__nonfilterable | fdarray_flag__non_perf_event;
+ struct event_enable_timer *eet;
+ ssize_t times_cnt;
+ ssize_t ret;
+ int err;
+
+ if (unset)
+ return 0;
+
+ opts->target.initial_delay = str_to_delay(str);
+ if (opts->target.initial_delay)
+ return 0;
+
+ ret = parse_event_enable_times(str, NULL);
+ if (ret < 0)
+ return ret;
+
+ times_cnt = ret;
+ if (times_cnt == 0)
+ return -EINVAL;
+
+ eet = zalloc(sizeof(*eet));
+ if (!eet)
+ return -ENOMEM;
+
+ eet->times = calloc(times_cnt, sizeof(*eet->times));
+ if (!eet->times) {
+ err = -ENOMEM;
+ goto free_eet;
+ }
+
+ if (parse_event_enable_times(str, eet->times) != times_cnt) {
+ err = -EINVAL;
+ goto free_eet_times;
+ }
+
+ eet->times_cnt = times_cnt;
+
+ eet->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
+ if (eet->timerfd == -1) {
+ err = -errno;
+ pr_err("timerfd_create failed: %s\n", strerror(errno));
+ goto free_eet_times;
+ }
+
+ eet->pollfd_pos = perf_evlist__add_pollfd(&evlist->core, eet->timerfd, NULL, POLLIN, flags);
+ if (eet->pollfd_pos < 0) {
+ err = eet->pollfd_pos;
+ goto close_timerfd;
+ }
+
+ eet->evlist = evlist;
+ evlist->eet = eet;
+ opts->target.initial_delay = eet->times[0].start;
+
+ return 0;
+
+close_timerfd:
+ close(eet->timerfd);
+free_eet_times:
+ zfree(&eet->times);
+free_eet:
+ free(eet);
+ return err;
+}
+
+static int event_enable_timer__set_timer(struct event_enable_timer *eet, int ms)
+{
+ struct itimerspec its = {
+ .it_value.tv_sec = ms / MSEC_PER_SEC,
+ .it_value.tv_nsec = (ms % MSEC_PER_SEC) * NSEC_PER_MSEC,
+ };
+ int err = 0;
+
+ if (timerfd_settime(eet->timerfd, 0, &its, NULL) < 0) {
+ err = -errno;
+ pr_err("timerfd_settime failed: %s\n", strerror(errno));
+ }
+ return err;
+}
+
+int event_enable_timer__start(struct event_enable_timer *eet)
+{
+ int ms;
+
+ if (!eet)
+ return 0;
+
+ ms = eet->times[0].end - eet->times[0].start;
+ eet->times_step = 1;
+
+ return event_enable_timer__set_timer(eet, ms);
+}
+
+int event_enable_timer__process(struct event_enable_timer *eet)
+{
+ struct pollfd *entries;
+ short revents;
+
+ if (!eet)
+ return 0;
+
+ entries = eet->evlist->core.pollfd.entries;
+ revents = entries[eet->pollfd_pos].revents;
+ entries[eet->pollfd_pos].revents = 0;
+
+ if (revents & POLLIN) {
+ size_t step = eet->times_step;
+ size_t pos = step / 2;
+
+ if (step & 1) {
+ evlist__disable_non_dummy(eet->evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ if (pos >= eet->times_cnt - 1) {
+ /* Disarm timer */
+ event_enable_timer__set_timer(eet, 0);
+ return 1; /* Stop */
+ }
+ } else {
+ evlist__enable_non_dummy(eet->evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ }
+
+ step += 1;
+ pos = step / 2;
+
+ if (pos < eet->times_cnt) {
+ int *times = (int *)eet->times; /* Accessing 'times' as array of int */
+ int ms = times[step] - times[step - 1];
+
+ eet->times_step = step;
+ return event_enable_timer__set_timer(eet, ms);
+ }
+ }
+
+ return 0;
+}
+
+void event_enable_timer__exit(struct event_enable_timer **ep)
+{
+ if (!ep || !*ep)
+ return;
+ zfree(&(*ep)->times);
+ zfree(ep);
+}
+
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.idx == idx)
+ return evsel;
+ }
+ return NULL;
+}
+
+int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf)
+{
+ struct evsel *evsel;
+ int printed = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_dummy_event(evsel))
+ continue;
+ if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) {
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel));
+ } else {
+ printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : "");
+ break;
+ }
+ }
+
+ return printed;
+}
+
+void evlist__check_mem_load_aux(struct evlist *evlist)
+{
+ struct evsel *leader, *evsel, *pos;
+
+ /*
+ * For some platforms, the 'mem-loads' event is required to use
+ * together with 'mem-loads-aux' within a group and 'mem-loads-aux'
+ * must be the group leader. Now we disable this group before reporting
+ * because 'mem-loads-aux' is just an auxiliary event. It doesn't carry
+ * any valid memory load information.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ leader = evsel__leader(evsel);
+ if (leader == evsel)
+ continue;
+
+ if (leader->name && strstr(leader->name, "mem-loads-aux")) {
+ for_each_group_evsel(pos, leader) {
+ evsel__set_leader(pos, pos);
+ pos->core.nr_members = 0;
+ }
+ }
+ }
+}
+
+/**
+ * evlist__warn_user_requested_cpus() - Check each evsel against requested CPUs
+ * and warn if the user CPU list is inapplicable for the event's PMU's
+ * CPUs. Not core PMUs list a CPU in sysfs, but this may be overwritten by a
+ * user requested CPU and so any online CPU is applicable. Core PMUs handle
+ * events on the CPUs in their list and otherwise the event isn't supported.
+ * @evlist: The list of events being checked.
+ * @cpu_list: The user provided list of CPUs.
+ */
+void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list)
+{
+ struct perf_cpu_map *user_requested_cpus;
+ struct evsel *pos;
+
+ if (!cpu_list)
+ return;
+
+ user_requested_cpus = perf_cpu_map__new(cpu_list);
+ if (!user_requested_cpus)
+ return;
+
+ evlist__for_each_entry(evlist, pos) {
+ struct perf_cpu_map *intersect, *to_test;
+ const struct perf_pmu *pmu = evsel__find_pmu(pos);
+
+ to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online();
+ intersect = perf_cpu_map__intersect(to_test, user_requested_cpus);
+ if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
+ char buf[128];
+
+ cpu_map__snprint(to_test, buf, sizeof(buf));
+ pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n",
+ cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos));
+ }
+ perf_cpu_map__put(intersect);
+ }
+ perf_cpu_map__put(user_requested_cpus);
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000..664c6bf7b
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,445 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVLIST_H
+#define __PERF_EVLIST_H 1
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <api/fd/array.h>
+#include <internal/evlist.h>
+#include <internal/evsel.h>
+#include <perf/evlist.h>
+#include "events_stats.h"
+#include "evsel.h"
+#include <pthread.h>
+#include <signal.h>
+#include <unistd.h>
+
+struct pollfd;
+struct thread_map;
+struct perf_cpu_map;
+struct record_opts;
+
+/*
+ * State machine of bkw_mmap_state:
+ *
+ * .________________(forbid)_____________.
+ * | V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ * ^ ^ | ^ |
+ * | |__(forbid)____/ |___(forbid)___/|
+ * | |
+ * \_________________(3)_______________/
+ *
+ * NOTREADY : Backward ring buffers are not ready
+ * RUNNING : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+ BKW_MMAP_NOTREADY,
+ BKW_MMAP_RUNNING,
+ BKW_MMAP_DATA_PENDING,
+ BKW_MMAP_EMPTY,
+};
+
+struct event_enable_timer;
+
+struct evlist {
+ struct perf_evlist core;
+ bool enabled;
+ int id_pos;
+ int is_pos;
+ u64 combined_sample_type;
+ enum bkw_mmap_state bkw_mmap_state;
+ struct {
+ int cork_fd;
+ pid_t pid;
+ } workload;
+ struct mmap *mmap;
+ struct mmap *overwrite_mmap;
+ struct evsel *selected;
+ struct events_stats stats;
+ struct perf_env *env;
+ void (*trace_event_sample_raw)(struct evlist *evlist,
+ union perf_event *event,
+ struct perf_sample *sample);
+ u64 first_sample_time;
+ u64 last_sample_time;
+ struct {
+ pthread_t th;
+ volatile int done;
+ } thread;
+ struct {
+ int fd; /* control file descriptor */
+ int ack; /* ack file descriptor for control commands */
+ int pos; /* index at evlist core object to check signals */
+ } ctl_fd;
+ struct event_enable_timer *eet;
+};
+
+struct evsel_str_handler {
+ const char *name;
+ void *handler;
+};
+
+struct evlist *evlist__new(void);
+struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_dummy(void);
+void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads);
+void evlist__exit(struct evlist *evlist);
+void evlist__delete(struct evlist *evlist);
+
+void evlist__add(struct evlist *evlist, struct evsel *entry);
+void evlist__remove(struct evlist *evlist, struct evsel *evsel);
+
+int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
+
+int __evlist__add_default_attrs(struct evlist *evlist,
+ struct perf_event_attr *attrs, size_t nr_attrs);
+
+int arch_evlist__add_default_attrs(struct evlist *evlist,
+ struct perf_event_attr *attrs,
+ size_t nr_attrs);
+
+#define evlist__add_default_attrs(evlist, array) \
+ arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+
+int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs);
+
+int evlist__add_dummy(struct evlist *evlist);
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide);
+static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
+{
+ return evlist__add_aux_dummy(evlist, true);
+}
+#ifdef HAVE_LIBTRACEEVENT
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide);
+#endif
+
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+ evsel__sb_cb_t cb, void *data);
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target);
+void evlist__stop_sb_thread(struct evlist *evlist);
+
+#ifdef HAVE_LIBTRACEEVENT
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
+#endif
+
+int __evlist__set_tracepoints_handlers(struct evlist *evlist,
+ const struct evsel_str_handler *assocs,
+ size_t nr_assocs);
+
+#define evlist__set_tracepoints_handlers(evlist, array) \
+ __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
+
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter);
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter);
+
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name);
+
+int evlist__add_pollfd(struct evlist *evlist, int fd);
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
+
+#ifdef HAVE_EVENTFD_SUPPORT
+int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd);
+#endif
+
+int evlist__poll(struct evlist *evlist, int timeout);
+
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id);
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id);
+
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id);
+
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
+
+void evlist__mmap_consume(struct evlist *evlist, int idx);
+
+int evlist__open(struct evlist *evlist);
+void evlist__close(struct evlist *evlist);
+
+struct callchain_param;
+
+void evlist__set_id_pos(struct evlist *evlist);
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain);
+int record_opts__config(struct record_opts *opts);
+
+int evlist__prepare_workload(struct evlist *evlist, struct target *target,
+ const char *argv[], bool pipe_output,
+ void (*exec_error)(int signo, siginfo_t *info, void *ucontext));
+int evlist__start_workload(struct evlist *evlist);
+
+struct option;
+
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset);
+
+unsigned long perf_event_mlock_kb_in_pages(void);
+
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite, int nr_cblocks,
+ int affinity, int flush, int comp_level);
+int evlist__mmap(struct evlist *evlist, unsigned int pages);
+void evlist__munmap(struct evlist *evlist);
+
+size_t evlist__mmap_size(unsigned long pages);
+
+void evlist__disable(struct evlist *evlist);
+void evlist__enable(struct evlist *evlist);
+void evlist__toggle_enable(struct evlist *evlist);
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__disable_non_dummy(struct evlist *evlist);
+void evlist__enable_non_dummy(struct evlist *evlist);
+
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
+
+int evlist__create_maps(struct evlist *evlist, struct target *target);
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
+
+u64 __evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_branch_type(struct evlist *evlist);
+bool evlist__sample_id_all(struct evlist *evlist);
+u16 evlist__id_hdr_size(struct evlist *evlist);
+
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp);
+
+bool evlist__valid_sample_type(struct evlist *evlist);
+bool evlist__valid_sample_id_all(struct evlist *evlist);
+bool evlist__valid_read_format(struct evlist *evlist);
+
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list);
+
+static inline bool evlist__empty(struct evlist *evlist)
+{
+ return list_empty(&evlist->core.entries);
+}
+
+static inline struct evsel *evlist__first(struct evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(&evlist->core);
+
+ return container_of(evsel, struct evsel, core);
+}
+
+static inline struct evsel *evlist__last(struct evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(&evlist->core);
+
+ return container_of(evsel, struct evsel, core);
+}
+
+static inline int evlist__nr_groups(struct evlist *evlist)
+{
+ return perf_evlist__nr_groups(&evlist->core);
+}
+
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
+
+bool evlist__can_select_event(struct evlist *evlist, const char *str);
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
+
+/**
+ * __evlist__for_each_entry - iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry(list, evsel) \
+ list_for_each_entry(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry - iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry(evlist, evsel) \
+ __evlist__for_each_entry(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_continue(list, evsel) \
+ list_for_each_entry_continue(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_continue(evlist, evsel) \
+ __evlist__for_each_entry_continue(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_from(list, evsel) \
+ list_for_each_entry_from(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_from(evlist, evsel) \
+ __evlist__for_each_entry_from(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_reverse(list, evsel) \
+ list_for_each_entry_reverse(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_reverse(evlist, evsel) \
+ __evlist__for_each_entry_reverse(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @tmp: struct evsel temp iterator
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_safe(list, tmp, evsel) \
+ list_for_each_entry_safe(evsel, tmp, list, core.node)
+
+/**
+ * evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ * @tmp: struct evsel temp iterator
+ */
+#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
+ __evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
+
+/** Iterator state for evlist__for_each_cpu */
+struct evlist_cpu_iterator {
+ /** The list being iterated through. */
+ struct evlist *container;
+ /** The current evsel of the iterator. */
+ struct evsel *evsel;
+ /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */
+ int cpu_map_idx;
+ /**
+ * The CPU map index corresponding to evlist->core.all_cpus for the
+ * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may
+ * contain fewer entries.
+ */
+ int evlist_cpu_map_idx;
+ /** The number of CPU map entries in evlist->core.all_cpus. */
+ int evlist_cpu_map_nr;
+ /** The current CPU of the iterator. */
+ struct perf_cpu cpu;
+ /** If present, used to set the affinity when switching between CPUs. */
+ struct affinity *affinity;
+};
+
+/**
+ * evlist__for_each_cpu - without affinity, iterate over the evlist. With
+ * affinity, iterate over all CPUs and then the evlist
+ * for each evsel on that CPU. When switching between
+ * CPUs the affinity is set to the CPU to avoid IPIs
+ * during syscalls.
+ * @evlist_cpu_itr: the iterator instance.
+ * @evlist: evlist instance to iterate.
+ * @affinity: NULL or used to set the affinity to the current CPU.
+ */
+#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \
+ for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \
+ !evlist_cpu_iterator__end(&evlist_cpu_itr); \
+ evlist_cpu_iterator__next(&evlist_cpu_itr))
+
+/** Returns an iterator set to the first CPU/evsel of evlist. */
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity);
+/** Move to next element in iterator, updating CPU, evsel and the affinity. */
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr);
+/** Returns true when iterator is at the end of the CPUs and evlist. */
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr);
+
+struct evsel *evlist__get_tracking_event(struct evlist *evlist);
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
+
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
+
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
+
+bool evlist__exclude_kernel(struct evlist *evlist);
+
+void evlist__force_leader(struct evlist *evlist);
+
+struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close);
+
+#define EVLIST_CTL_CMD_ENABLE_TAG "enable"
+#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
+#define EVLIST_CTL_CMD_ACK_TAG "ack\n"
+#define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot"
+#define EVLIST_CTL_CMD_EVLIST_TAG "evlist"
+#define EVLIST_CTL_CMD_STOP_TAG "stop"
+#define EVLIST_CTL_CMD_PING_TAG "ping"
+
+#define EVLIST_CTL_CMD_MAX_LEN 64
+
+enum evlist_ctl_cmd {
+ EVLIST_CTL_CMD_UNSUPPORTED = 0,
+ EVLIST_CTL_CMD_ENABLE,
+ EVLIST_CTL_CMD_DISABLE,
+ EVLIST_CTL_CMD_ACK,
+ EVLIST_CTL_CMD_SNAPSHOT,
+ EVLIST_CTL_CMD_EVLIST,
+ EVLIST_CTL_CMD_STOP,
+ EVLIST_CTL_CMD_PING,
+};
+
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close);
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
+int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
+int evlist__finalize_ctlfd(struct evlist *evlist);
+bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
+int evlist__ctlfd_ack(struct evlist *evlist);
+
+#define EVLIST_ENABLED_MSG "Events enabled\n"
+#define EVLIST_DISABLED_MSG "Events disabled\n"
+
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+ const char *str, int unset);
+int event_enable_timer__start(struct event_enable_timer *eet);
+void event_enable_timer__exit(struct event_enable_timer **ep);
+int event_enable_timer__process(struct event_enable_timer *eet);
+
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
+
+int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
+void evlist__check_mem_load_aux(struct evlist *evlist);
+void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
+
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
new file mode 100644
index 000000000..a8a5ff87c
--- /dev/null
+++ b/tools/perf/util/evsel.c
@@ -0,0 +1,3171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ */
+
+#include <byteswap.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <api/fs/fs.h>
+#include <api/fs/tracing_path.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <perf/evsel.h>
+#include "asm/bug.h"
+#include "bpf_counter.h"
+#include "callchain.h"
+#include "cgroup.h"
+#include "counts.h"
+#include "event.h"
+#include "evsel.h"
+#include "util/env.h"
+#include "util/evsel_config.h"
+#include "util/evsel_fprintf.h"
+#include "evlist.h"
+#include <perf/cpumap.h>
+#include "thread_map.h"
+#include "target.h"
+#include "perf_regs.h"
+#include "record.h"
+#include "debug.h"
+#include "trace-event.h"
+#include "stat.h"
+#include "string2.h"
+#include "memswap.h"
+#include "util.h"
+#include "util/hashmap.h"
+#include "off_cpu.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "../perf-sys.h"
+#include "util/parse-branch-options.h"
+#include "util/bpf-filter.h"
+#include <internal/xyarray.h>
+#include <internal/lib.h>
+#include <internal/threadmap.h>
+
+#include <linux/ctype.h>
+
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+struct perf_missing_features perf_missing_features;
+
+static clockid_t clockid;
+
+static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = {
+ NULL,
+ "duration_time",
+ "user_time",
+ "system_time",
+};
+
+const char *perf_tool_event__to_str(enum perf_tool_event ev)
+{
+ if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX)
+ return perf_tool_event__tool_names[ev];
+
+ return NULL;
+}
+
+enum perf_tool_event perf_tool_event__from_str(const char *str)
+{
+ int i;
+
+ perf_tool_event__for_each_event(i) {
+ if (!strcmp(str, perf_tool_event__tool_names[i]))
+ return i;
+ }
+ return PERF_TOOL_NONE;
+}
+
+
+static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+void __weak test_attr__ready(void) { }
+
+static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
+{
+}
+
+static struct {
+ size_t size;
+ int (*init)(struct evsel *evsel);
+ void (*fini)(struct evsel *evsel);
+} perf_evsel__object = {
+ .size = sizeof(struct evsel),
+ .init = evsel__no_extra_init,
+ .fini = evsel__no_extra_fini,
+};
+
+int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
+ void (*fini)(struct evsel *evsel))
+{
+
+ if (object_size == 0)
+ goto set_methods;
+
+ if (perf_evsel__object.size > object_size)
+ return -EINVAL;
+
+ perf_evsel__object.size = object_size;
+
+set_methods:
+ if (init != NULL)
+ perf_evsel__object.init = init;
+
+ if (fini != NULL)
+ perf_evsel__object.fini = fini;
+
+ return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
+
+int __evsel__sample_size(u64 sample_type)
+{
+ u64 mask = sample_type & PERF_SAMPLE_MASK;
+ int size = 0;
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (mask & (1ULL << i))
+ size++;
+ }
+
+ size *= sizeof(u64);
+
+ return size;
+}
+
+/**
+ * __perf_evsel__calc_id_pos - calculate id_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
+ * perf_record_sample.
+ */
+static int __perf_evsel__calc_id_pos(u64 sample_type)
+{
+ int idx = 0;
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ return 0;
+
+ if (!(sample_type & PERF_SAMPLE_ID))
+ return -1;
+
+ if (sample_type & PERF_SAMPLE_IP)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_TID)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_ADDR)
+ idx += 1;
+
+ return idx;
+}
+
+/**
+ * __perf_evsel__calc_is_pos - calculate is_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position (counting backwards) of the event id
+ * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
+ * sample_id_all is used there is an id sample appended to non-sample events.
+ */
+static int __perf_evsel__calc_is_pos(u64 sample_type)
+{
+ int idx = 1;
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ return 1;
+
+ if (!(sample_type & PERF_SAMPLE_ID))
+ return -1;
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ idx += 1;
+
+ return idx;
+}
+
+void evsel__calc_id_pos(struct evsel *evsel)
+{
+ evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
+ evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
+}
+
+void __evsel__set_sample_bit(struct evsel *evsel,
+ enum perf_event_sample_format bit)
+{
+ if (!(evsel->core.attr.sample_type & bit)) {
+ evsel->core.attr.sample_type |= bit;
+ evsel->sample_size += sizeof(u64);
+ evsel__calc_id_pos(evsel);
+ }
+}
+
+void __evsel__reset_sample_bit(struct evsel *evsel,
+ enum perf_event_sample_format bit)
+{
+ if (evsel->core.attr.sample_type & bit) {
+ evsel->core.attr.sample_type &= ~bit;
+ evsel->sample_size -= sizeof(u64);
+ evsel__calc_id_pos(evsel);
+ }
+}
+
+void evsel__set_sample_id(struct evsel *evsel,
+ bool can_sample_identifier)
+{
+ if (can_sample_identifier) {
+ evsel__reset_sample_bit(evsel, ID);
+ evsel__set_sample_bit(evsel, IDENTIFIER);
+ } else {
+ evsel__set_sample_bit(evsel, ID);
+ }
+ evsel->core.attr.read_format |= PERF_FORMAT_ID;
+}
+
+/**
+ * evsel__is_function_event - Return whether given evsel is a function
+ * trace event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if event is function trace event
+ */
+bool evsel__is_function_event(struct evsel *evsel)
+{
+#define FUNCTION_EVENT "ftrace:function"
+
+ return evsel->name &&
+ !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
+
+#undef FUNCTION_EVENT
+}
+
+void evsel__init(struct evsel *evsel,
+ struct perf_event_attr *attr, int idx)
+{
+ perf_evsel__init(&evsel->core, attr, idx);
+ evsel->tracking = !idx;
+ evsel->unit = strdup("");
+ evsel->scale = 1.0;
+ evsel->max_events = ULONG_MAX;
+ evsel->evlist = NULL;
+ evsel->bpf_obj = NULL;
+ evsel->bpf_fd = -1;
+ INIT_LIST_HEAD(&evsel->config_terms);
+ INIT_LIST_HEAD(&evsel->bpf_counter_list);
+ INIT_LIST_HEAD(&evsel->bpf_filters);
+ perf_evsel__object.init(evsel);
+ evsel->sample_size = __evsel__sample_size(attr->sample_type);
+ evsel__calc_id_pos(evsel);
+ evsel->cmdline_group_boundary = false;
+ evsel->metric_events = NULL;
+ evsel->per_pkg_mask = NULL;
+ evsel->collect_stat = false;
+ evsel->pmu_name = NULL;
+ evsel->group_pmu_name = NULL;
+ evsel->skippable = false;
+}
+
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
+{
+ struct evsel *evsel = zalloc(perf_evsel__object.size);
+
+ if (!evsel)
+ return NULL;
+ evsel__init(evsel, attr, idx);
+
+ if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
+ evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+ evsel->core.attr.sample_period = 1;
+ }
+
+ if (evsel__is_clock(evsel)) {
+ free((char *)evsel->unit);
+ evsel->unit = strdup("msec");
+ evsel->scale = 1e-6;
+ }
+
+ return evsel;
+}
+
+int copy_config_terms(struct list_head *dst, struct list_head *src)
+{
+ struct evsel_config_term *pos, *tmp;
+
+ list_for_each_entry(pos, src, list) {
+ tmp = malloc(sizeof(*tmp));
+ if (tmp == NULL)
+ return -ENOMEM;
+
+ *tmp = *pos;
+ if (tmp->free_str) {
+ tmp->val.str = strdup(pos->val.str);
+ if (tmp->val.str == NULL) {
+ free(tmp);
+ return -ENOMEM;
+ }
+ }
+ list_add_tail(&tmp->list, dst);
+ }
+ return 0;
+}
+
+static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
+{
+ return copy_config_terms(&dst->config_terms, &src->config_terms);
+}
+
+/**
+ * evsel__clone - create a new evsel copied from @orig
+ * @orig: original evsel
+ *
+ * The assumption is that @orig is not configured nor opened yet.
+ * So we only care about the attributes that can be set while it's parsed.
+ */
+struct evsel *evsel__clone(struct evsel *orig)
+{
+ struct evsel *evsel;
+
+ BUG_ON(orig->core.fd);
+ BUG_ON(orig->counts);
+ BUG_ON(orig->priv);
+ BUG_ON(orig->per_pkg_mask);
+
+ /* cannot handle BPF objects for now */
+ if (orig->bpf_obj)
+ return NULL;
+
+ evsel = evsel__new(&orig->core.attr);
+ if (evsel == NULL)
+ return NULL;
+
+ evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
+ evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus);
+ evsel->core.threads = perf_thread_map__get(orig->core.threads);
+ evsel->core.nr_members = orig->core.nr_members;
+ evsel->core.system_wide = orig->core.system_wide;
+ evsel->core.requires_cpu = orig->core.requires_cpu;
+ evsel->core.is_pmu_core = orig->core.is_pmu_core;
+
+ if (orig->name) {
+ evsel->name = strdup(orig->name);
+ if (evsel->name == NULL)
+ goto out_err;
+ }
+ if (orig->group_name) {
+ evsel->group_name = strdup(orig->group_name);
+ if (evsel->group_name == NULL)
+ goto out_err;
+ }
+ if (orig->pmu_name) {
+ evsel->pmu_name = strdup(orig->pmu_name);
+ if (evsel->pmu_name == NULL)
+ goto out_err;
+ }
+ if (orig->group_pmu_name) {
+ evsel->group_pmu_name = strdup(orig->group_pmu_name);
+ if (evsel->group_pmu_name == NULL)
+ goto out_err;
+ }
+ if (orig->filter) {
+ evsel->filter = strdup(orig->filter);
+ if (evsel->filter == NULL)
+ goto out_err;
+ }
+ if (orig->metric_id) {
+ evsel->metric_id = strdup(orig->metric_id);
+ if (evsel->metric_id == NULL)
+ goto out_err;
+ }
+ evsel->cgrp = cgroup__get(orig->cgrp);
+#ifdef HAVE_LIBTRACEEVENT
+ evsel->tp_format = orig->tp_format;
+#endif
+ evsel->handler = orig->handler;
+ evsel->core.leader = orig->core.leader;
+
+ evsel->max_events = orig->max_events;
+ evsel->tool_event = orig->tool_event;
+ free((char *)evsel->unit);
+ evsel->unit = strdup(orig->unit);
+ if (evsel->unit == NULL)
+ goto out_err;
+
+ evsel->scale = orig->scale;
+ evsel->snapshot = orig->snapshot;
+ evsel->per_pkg = orig->per_pkg;
+ evsel->percore = orig->percore;
+ evsel->precise_max = orig->precise_max;
+ evsel->is_libpfm_event = orig->is_libpfm_event;
+
+ evsel->exclude_GH = orig->exclude_GH;
+ evsel->sample_read = orig->sample_read;
+ evsel->auto_merge_stats = orig->auto_merge_stats;
+ evsel->collect_stat = orig->collect_stat;
+ evsel->weak_group = orig->weak_group;
+ evsel->use_config_name = orig->use_config_name;
+ evsel->pmu = orig->pmu;
+
+ if (evsel__copy_config_terms(evsel, orig) < 0)
+ goto out_err;
+
+ return evsel;
+
+out_err:
+ evsel__delete(evsel);
+ return NULL;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+#ifdef HAVE_LIBTRACEEVENT
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx)
+{
+ struct evsel *evsel = zalloc(perf_evsel__object.size);
+ int err = -ENOMEM;
+
+ if (evsel == NULL) {
+ goto out_err;
+ } else {
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_TRACEPOINT,
+ .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+ };
+
+ if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
+ goto out_free;
+
+ evsel->tp_format = trace_event__tp_format(sys, name);
+ if (IS_ERR(evsel->tp_format)) {
+ err = PTR_ERR(evsel->tp_format);
+ goto out_free;
+ }
+
+ event_attr_init(&attr);
+ attr.config = evsel->tp_format->id;
+ attr.sample_period = 1;
+ evsel__init(evsel, &attr, idx);
+ }
+
+ return evsel;
+
+out_free:
+ zfree(&evsel->name);
+ free(evsel);
+out_err:
+ return ERR_PTR(err);
+}
+#endif
+
+const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
+ "cycles",
+ "instructions",
+ "cache-references",
+ "cache-misses",
+ "branches",
+ "branch-misses",
+ "bus-cycles",
+ "stalled-cycles-frontend",
+ "stalled-cycles-backend",
+ "ref-cycles",
+};
+
+char *evsel__bpf_counter_events;
+
+bool evsel__match_bpf_counter_events(const char *name)
+{
+ int name_len;
+ bool match;
+ char *ptr;
+
+ if (!evsel__bpf_counter_events)
+ return false;
+
+ ptr = strstr(evsel__bpf_counter_events, name);
+ name_len = strlen(name);
+
+ /* check name matches a full token in evsel__bpf_counter_events */
+ match = (ptr != NULL) &&
+ ((ptr == evsel__bpf_counter_events) || (*(ptr - 1) == ',')) &&
+ ((*(ptr + name_len) == ',') || (*(ptr + name_len) == '\0'));
+
+ return match;
+}
+
+static const char *__evsel__hw_name(u64 config)
+{
+ if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
+ return evsel__hw_names[config];
+
+ return "unknown-hardware";
+}
+
+static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
+{
+ int colon = 0, r = 0;
+ struct perf_event_attr *attr = &evsel->core.attr;
+ bool exclude_guest_default = false;
+
+#define MOD_PRINT(context, mod) do { \
+ if (!attr->exclude_##context) { \
+ if (!colon) colon = ++r; \
+ r += scnprintf(bf + r, size - r, "%c", mod); \
+ } } while(0)
+
+ if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
+ MOD_PRINT(kernel, 'k');
+ MOD_PRINT(user, 'u');
+ MOD_PRINT(hv, 'h');
+ exclude_guest_default = true;
+ }
+
+ if (attr->precise_ip) {
+ if (!colon)
+ colon = ++r;
+ r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
+ exclude_guest_default = true;
+ }
+
+ if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
+ MOD_PRINT(host, 'H');
+ MOD_PRINT(guest, 'G');
+ }
+#undef MOD_PRINT
+ if (colon)
+ bf[colon - 1] = ':';
+ return r;
+}
+
+int __weak arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+{
+ return scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
+}
+
+static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+{
+ int r = arch_evsel__hw_name(evsel, bf, size);
+ return r + evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = {
+ "cpu-clock",
+ "task-clock",
+ "page-faults",
+ "context-switches",
+ "cpu-migrations",
+ "minor-faults",
+ "major-faults",
+ "alignment-faults",
+ "emulation-faults",
+ "dummy",
+};
+
+static const char *__evsel__sw_name(u64 config)
+{
+ if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
+ return evsel__sw_names[config];
+ return "unknown-software";
+}
+
+static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
+{
+ int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
+ return r + evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size)
+{
+ return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev));
+}
+
+static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+{
+ int r;
+
+ r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
+
+ if (type & HW_BREAKPOINT_R)
+ r += scnprintf(bf + r, size - r, "r");
+
+ if (type & HW_BREAKPOINT_W)
+ r += scnprintf(bf + r, size - r, "w");
+
+ if (type & HW_BREAKPOINT_X)
+ r += scnprintf(bf + r, size - r, "x");
+
+ return r;
+}
+
+static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+ return r + evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
+ { "L1-dcache", "l1-d", "l1d", "L1-data", },
+ { "L1-icache", "l1-i", "l1i", "L1-instruction", },
+ { "LLC", "L2", },
+ { "dTLB", "d-tlb", "Data-TLB", },
+ { "iTLB", "i-tlb", "Instruction-TLB", },
+ { "branch", "branches", "bpu", "btb", "bpc", },
+ { "node", },
+};
+
+const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
+ { "load", "loads", "read", },
+ { "store", "stores", "write", },
+ { "prefetch", "prefetches", "speculative-read", "speculative-load", },
+};
+
+const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
+ { "refs", "Reference", "ops", "access", },
+ { "misses", "miss", },
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ (1 << C(OP_READ))
+#define CACHE_WRITE (1 << C(OP_WRITE))
+#define CACHE_PREFETCH (1 << C(OP_PREFETCH))
+#define COP(x) (1 << x)
+
+/*
+ * cache operation stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static const unsigned long evsel__hw_cache_stat[C(MAX)] = {
+ [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)] = (CACHE_READ),
+ [C(BPU)] = (CACHE_READ),
+ [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+};
+
+bool evsel__is_cache_op_valid(u8 type, u8 op)
+{
+ if (evsel__hw_cache_stat[type] & COP(op))
+ return true; /* valid */
+ else
+ return false; /* invalid */
+}
+
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
+{
+ if (result) {
+ return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
+ evsel__hw_cache_op[op][0],
+ evsel__hw_cache_result[result][0]);
+ }
+
+ return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
+ evsel__hw_cache_op[op][1]);
+}
+
+static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
+{
+ u8 op, result, type = (config >> 0) & 0xff;
+ const char *err = "unknown-ext-hardware-cache-type";
+
+ if (type >= PERF_COUNT_HW_CACHE_MAX)
+ goto out_err;
+
+ op = (config >> 8) & 0xff;
+ err = "unknown-ext-hardware-cache-op";
+ if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ goto out_err;
+
+ result = (config >> 16) & 0xff;
+ err = "unknown-ext-hardware-cache-result";
+ if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ goto out_err;
+
+ err = "invalid-cache";
+ if (!evsel__is_cache_op_valid(type, op))
+ goto out_err;
+
+ return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+out_err:
+ return scnprintf(bf, size, "%s", err);
+}
+
+static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
+{
+ int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
+ return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
+{
+ int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
+ return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+const char *evsel__name(struct evsel *evsel)
+{
+ char bf[128];
+
+ if (!evsel)
+ goto out_unknown;
+
+ if (evsel->name)
+ return evsel->name;
+
+ switch (evsel->core.attr.type) {
+ case PERF_TYPE_RAW:
+ evsel__raw_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_HARDWARE:
+ evsel__hw_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_HW_CACHE:
+ evsel__hw_cache_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_SOFTWARE:
+ if (evsel__is_tool(evsel))
+ evsel__tool_name(evsel->tool_event, bf, sizeof(bf));
+ else
+ evsel__sw_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_TRACEPOINT:
+ scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
+ break;
+
+ case PERF_TYPE_BREAKPOINT:
+ evsel__bp_name(evsel, bf, sizeof(bf));
+ break;
+
+ default:
+ scnprintf(bf, sizeof(bf), "unknown attr type: %d",
+ evsel->core.attr.type);
+ break;
+ }
+
+ evsel->name = strdup(bf);
+
+ if (evsel->name)
+ return evsel->name;
+out_unknown:
+ return "unknown";
+}
+
+bool evsel__name_is(struct evsel *evsel, const char *name)
+{
+ return !strcmp(evsel__name(evsel), name);
+}
+
+const char *evsel__metric_id(const struct evsel *evsel)
+{
+ if (evsel->metric_id)
+ return evsel->metric_id;
+
+ if (evsel__is_tool(evsel))
+ return perf_tool_event__to_str(evsel->tool_event);
+
+ return "unknown";
+}
+
+const char *evsel__group_name(struct evsel *evsel)
+{
+ return evsel->group_name ?: "anon group";
+}
+
+/*
+ * Returns the group details for the specified leader,
+ * with following rules.
+ *
+ * For record -e '{cycles,instructions}'
+ * 'anon group { cycles:u, instructions:u }'
+ *
+ * For record -e 'cycles,instructions' and report --group
+ * 'cycles:u, instructions:u'
+ */
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
+{
+ int ret = 0;
+ struct evsel *pos;
+ const char *group_name = evsel__group_name(evsel);
+
+ if (!evsel->forced_leader)
+ ret = scnprintf(buf, size, "%s { ", group_name);
+
+ ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel));
+
+ for_each_group_member(pos, evsel)
+ ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos));
+
+ if (!evsel->forced_leader)
+ ret += scnprintf(buf + ret, size - ret, " }");
+
+ return ret;
+}
+
+static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *param)
+{
+ bool function = evsel__is_function_event(evsel);
+ struct perf_event_attr *attr = &evsel->core.attr;
+ const char *arch = perf_env__arch(evsel__env(evsel));
+
+ evsel__set_sample_bit(evsel, CALLCHAIN);
+
+ attr->sample_max_stack = param->max_stack;
+
+ if (opts->kernel_callchains)
+ attr->exclude_callchain_user = 1;
+ if (opts->user_callchains)
+ attr->exclude_callchain_kernel = 1;
+ if (param->record_mode == CALLCHAIN_LBR) {
+ if (!opts->branch_stack) {
+ if (attr->exclude_user) {
+ pr_warning("LBR callstack option is only available "
+ "to get user callchain information. "
+ "Falling back to framepointers.\n");
+ } else {
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
+ } else
+ pr_warning("Cannot use LBR callstack with branch stack. "
+ "Falling back to framepointers.\n");
+ }
+
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ if (!function) {
+ evsel__set_sample_bit(evsel, REGS_USER);
+ evsel__set_sample_bit(evsel, STACK_USER);
+ if (opts->sample_user_regs &&
+ DWARF_MINIMAL_REGS(arch) != arch__user_reg_mask()) {
+ attr->sample_regs_user |= DWARF_MINIMAL_REGS(arch);
+ pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
+ "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
+ "so the minimal registers set (IP, SP) is explicitly forced.\n");
+ } else {
+ attr->sample_regs_user |= arch__user_reg_mask();
+ }
+ attr->sample_stack_user = param->dump_size;
+ attr->exclude_callchain_user = 1;
+ } else {
+ pr_info("Cannot use DWARF unwind for function trace event,"
+ " falling back to framepointers.\n");
+ }
+ }
+
+ if (function) {
+ pr_info("Disabling user space callchains for function trace event.\n");
+ attr->exclude_callchain_user = 1;
+ }
+}
+
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *param)
+{
+ if (param->enabled)
+ return __evsel__config_callchain(evsel, opts, param);
+}
+
+static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+
+ evsel__reset_sample_bit(evsel, CALLCHAIN);
+ if (param->record_mode == CALLCHAIN_LBR) {
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_HW_INDEX);
+ }
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ evsel__reset_sample_bit(evsel, REGS_USER);
+ evsel__reset_sample_bit(evsel, STACK_USER);
+ }
+}
+
+static void evsel__apply_config_terms(struct evsel *evsel,
+ struct record_opts *opts, bool track)
+{
+ struct evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ struct perf_event_attr *attr = &evsel->core.attr;
+ /* callgraph default */
+ struct callchain_param param = {
+ .record_mode = callchain_param.record_mode,
+ };
+ u32 dump_size = 0;
+ int max_stack = 0;
+ const char *callgraph_buf = NULL;
+
+ list_for_each_entry(term, config_terms, list) {
+ switch (term->type) {
+ case EVSEL__CONFIG_TERM_PERIOD:
+ if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
+ attr->sample_period = term->val.period;
+ attr->freq = 0;
+ evsel__reset_sample_bit(evsel, PERIOD);
+ }
+ break;
+ case EVSEL__CONFIG_TERM_FREQ:
+ if (!(term->weak && opts->user_freq != UINT_MAX)) {
+ attr->sample_freq = term->val.freq;
+ attr->freq = 1;
+ evsel__set_sample_bit(evsel, PERIOD);
+ }
+ break;
+ case EVSEL__CONFIG_TERM_TIME:
+ if (term->val.time)
+ evsel__set_sample_bit(evsel, TIME);
+ else
+ evsel__reset_sample_bit(evsel, TIME);
+ break;
+ case EVSEL__CONFIG_TERM_CALLGRAPH:
+ callgraph_buf = term->val.str;
+ break;
+ case EVSEL__CONFIG_TERM_BRANCH:
+ if (term->val.str && strcmp(term->val.str, "no")) {
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
+ parse_branch_str(term->val.str,
+ &attr->branch_sample_type);
+ } else
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ break;
+ case EVSEL__CONFIG_TERM_STACK_USER:
+ dump_size = term->val.stack_user;
+ break;
+ case EVSEL__CONFIG_TERM_MAX_STACK:
+ max_stack = term->val.max_stack;
+ break;
+ case EVSEL__CONFIG_TERM_MAX_EVENTS:
+ evsel->max_events = term->val.max_events;
+ break;
+ case EVSEL__CONFIG_TERM_INHERIT:
+ /*
+ * attr->inherit should has already been set by
+ * evsel__config. If user explicitly set
+ * inherit using config terms, override global
+ * opt->no_inherit setting.
+ */
+ attr->inherit = term->val.inherit ? 1 : 0;
+ break;
+ case EVSEL__CONFIG_TERM_OVERWRITE:
+ attr->write_backward = term->val.overwrite ? 1 : 0;
+ break;
+ case EVSEL__CONFIG_TERM_DRV_CFG:
+ break;
+ case EVSEL__CONFIG_TERM_PERCORE:
+ break;
+ case EVSEL__CONFIG_TERM_AUX_OUTPUT:
+ attr->aux_output = term->val.aux_output ? 1 : 0;
+ break;
+ case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
+ /* Already applied by auxtrace */
+ break;
+ case EVSEL__CONFIG_TERM_CFG_CHG:
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* User explicitly set per-event callgraph, clear the old setting and reset. */
+ if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+ bool sample_address = false;
+
+ if (max_stack) {
+ param.max_stack = max_stack;
+ if (callgraph_buf == NULL)
+ callgraph_buf = "fp";
+ }
+
+ /* parse callgraph parameters */
+ if (callgraph_buf != NULL) {
+ if (!strcmp(callgraph_buf, "no")) {
+ param.enabled = false;
+ param.record_mode = CALLCHAIN_NONE;
+ } else {
+ param.enabled = true;
+ if (parse_callchain_record(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
+ if (param.record_mode == CALLCHAIN_DWARF)
+ sample_address = true;
+ }
+ }
+ if (dump_size > 0) {
+ dump_size = round_up(dump_size, sizeof(u64));
+ param.dump_size = dump_size;
+ }
+
+ /* If global callgraph set, clear it */
+ if (callchain_param.enabled)
+ evsel__reset_callgraph(evsel, &callchain_param);
+
+ /* set perf-event callgraph */
+ if (param.enabled) {
+ if (sample_address) {
+ evsel__set_sample_bit(evsel, ADDR);
+ evsel__set_sample_bit(evsel, DATA_SRC);
+ evsel->core.attr.mmap_data = track;
+ }
+ evsel__config_callchain(evsel, opts, &param);
+ }
+ }
+}
+
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
+{
+ struct evsel_config_term *term, *found_term = NULL;
+
+ list_for_each_entry(term, &evsel->config_terms, list) {
+ if (term->type == type)
+ found_term = term;
+ }
+
+ return found_term;
+}
+
+void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+ evsel__set_sample_bit(evsel, WEIGHT);
+}
+
+void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
+ struct perf_event_attr *attr __maybe_unused)
+{
+}
+
+static void evsel__set_default_freq_period(struct record_opts *opts,
+ struct perf_event_attr *attr)
+{
+ if (opts->freq) {
+ attr->freq = 1;
+ attr->sample_freq = opts->freq;
+ } else {
+ attr->sample_period = opts->default_interval;
+ }
+}
+
+static bool evsel__is_offcpu_event(struct evsel *evsel)
+{
+ return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT);
+}
+
+/*
+ * The enable_on_exec/disabled value strategy:
+ *
+ * 1) For any type of traced program:
+ * - all independent events and group leaders are disabled
+ * - all group members are enabled
+ *
+ * Group members are ruled by group leaders. They need to
+ * be enabled, because the group scheduling relies on that.
+ *
+ * 2) For traced programs executed by perf:
+ * - all independent events and group leaders have
+ * enable_on_exec set
+ * - we don't specifically enable or disable any event during
+ * the record command
+ *
+ * Independent events and group leaders are initially disabled
+ * and get enabled by exec. Group members are ruled by group
+ * leaders as stated in 1).
+ *
+ * 3) For traced programs attached by perf (pid/tid):
+ * - we specifically enable or disable all events during
+ * the record command
+ *
+ * When attaching events to already running traced we
+ * enable/disable events specifically, as there's no
+ * initial traced exec call.
+ */
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain)
+{
+ struct evsel *leader = evsel__leader(evsel);
+ struct perf_event_attr *attr = &evsel->core.attr;
+ int track = evsel->tracking;
+ bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
+
+ attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
+ attr->inherit = !opts->no_inherit;
+ attr->write_backward = opts->overwrite ? 1 : 0;
+ attr->read_format = PERF_FORMAT_LOST;
+
+ evsel__set_sample_bit(evsel, IP);
+ evsel__set_sample_bit(evsel, TID);
+
+ if (evsel->sample_read) {
+ evsel__set_sample_bit(evsel, READ);
+
+ /*
+ * We need ID even in case of single event, because
+ * PERF_SAMPLE_READ process ID specific data.
+ */
+ evsel__set_sample_id(evsel, false);
+
+ /*
+ * Apply group format only if we belong to group
+ * with more than one members.
+ */
+ if (leader->core.nr_members > 1) {
+ attr->read_format |= PERF_FORMAT_GROUP;
+ attr->inherit = 0;
+ }
+ }
+
+ /*
+ * We default some events to have a default interval. But keep
+ * it a weak assumption overridable by the user.
+ */
+ if ((evsel->is_libpfm_event && !attr->sample_period) ||
+ (!evsel->is_libpfm_event && (!attr->sample_period ||
+ opts->user_freq != UINT_MAX ||
+ opts->user_interval != ULLONG_MAX)))
+ evsel__set_default_freq_period(opts, attr);
+
+ /*
+ * If attr->freq was set (here or earlier), ask for period
+ * to be sampled.
+ */
+ if (attr->freq)
+ evsel__set_sample_bit(evsel, PERIOD);
+
+ if (opts->no_samples)
+ attr->sample_freq = 0;
+
+ if (opts->inherit_stat) {
+ evsel->core.attr.read_format |=
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING |
+ PERF_FORMAT_ID;
+ attr->inherit_stat = 1;
+ }
+
+ if (opts->sample_address) {
+ evsel__set_sample_bit(evsel, ADDR);
+ attr->mmap_data = track;
+ }
+
+ /*
+ * We don't allow user space callchains for function trace
+ * event, due to issues with page faults while tracing page
+ * fault handler and its overall trickiness nature.
+ */
+ if (evsel__is_function_event(evsel))
+ evsel->core.attr.exclude_callchain_user = 1;
+
+ if (callchain && callchain->enabled && !evsel->no_aux_samples)
+ evsel__config_callchain(evsel, opts, callchain);
+
+ if (opts->sample_intr_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
+ attr->sample_regs_intr = opts->sample_intr_regs;
+ evsel__set_sample_bit(evsel, REGS_INTR);
+ }
+
+ if (opts->sample_user_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
+ attr->sample_regs_user |= opts->sample_user_regs;
+ evsel__set_sample_bit(evsel, REGS_USER);
+ }
+
+ if (target__has_cpu(&opts->target) || opts->sample_cpu)
+ evsel__set_sample_bit(evsel, CPU);
+
+ /*
+ * When the user explicitly disabled time don't force it here.
+ */
+ if (opts->sample_time &&
+ (!perf_missing_features.sample_id_all &&
+ (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
+ opts->sample_time_set)))
+ evsel__set_sample_bit(evsel, TIME);
+
+ if (opts->raw_samples && !evsel->no_aux_samples) {
+ evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, RAW);
+ evsel__set_sample_bit(evsel, CPU);
+ }
+
+ if (opts->sample_address)
+ evsel__set_sample_bit(evsel, DATA_SRC);
+
+ if (opts->sample_phys_addr)
+ evsel__set_sample_bit(evsel, PHYS_ADDR);
+
+ if (opts->no_buffering) {
+ attr->watermark = 0;
+ attr->wakeup_events = 1;
+ }
+ if (opts->branch_stack && !evsel->no_aux_samples) {
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type = opts->branch_stack;
+ }
+
+ if (opts->sample_weight)
+ arch_evsel__set_sample_weight(evsel);
+
+ attr->task = track;
+ attr->mmap = track;
+ attr->mmap2 = track && !perf_missing_features.mmap2;
+ attr->comm = track;
+ attr->build_id = track && opts->build_id;
+
+ /*
+ * ksymbol is tracked separately with text poke because it needs to be
+ * system wide and enabled immediately.
+ */
+ if (!opts->text_poke)
+ attr->ksymbol = track && !perf_missing_features.ksymbol;
+ attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
+
+ if (opts->record_namespaces)
+ attr->namespaces = track;
+
+ if (opts->record_cgroup) {
+ attr->cgroup = track && !perf_missing_features.cgroup;
+ evsel__set_sample_bit(evsel, CGROUP);
+ }
+
+ if (opts->sample_data_page_size)
+ evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
+
+ if (opts->sample_code_page_size)
+ evsel__set_sample_bit(evsel, CODE_PAGE_SIZE);
+
+ if (opts->record_switch_events)
+ attr->context_switch = track;
+
+ if (opts->sample_transaction)
+ evsel__set_sample_bit(evsel, TRANSACTION);
+
+ if (opts->running_time) {
+ evsel->core.attr.read_format |=
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ }
+
+ /*
+ * XXX see the function comment above
+ *
+ * Disabling only independent events or group leaders,
+ * keeping group members enabled.
+ */
+ if (evsel__is_group_leader(evsel))
+ attr->disabled = 1;
+
+ /*
+ * Setting enable_on_exec for independent events and
+ * group leaders for traced executed by perf.
+ */
+ if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
+ !opts->target.initial_delay)
+ attr->enable_on_exec = 1;
+
+ if (evsel->immediate) {
+ attr->disabled = 0;
+ attr->enable_on_exec = 0;
+ }
+
+ clockid = opts->clockid;
+ if (opts->use_clockid) {
+ attr->use_clockid = 1;
+ attr->clockid = opts->clockid;
+ }
+
+ if (evsel->precise_max)
+ attr->precise_ip = 3;
+
+ if (opts->all_user) {
+ attr->exclude_kernel = 1;
+ attr->exclude_user = 0;
+ }
+
+ if (opts->all_kernel) {
+ attr->exclude_kernel = 0;
+ attr->exclude_user = 1;
+ }
+
+ if (evsel->core.own_cpus || evsel->unit)
+ evsel->core.attr.read_format |= PERF_FORMAT_ID;
+
+ /*
+ * Apply event specific term settings,
+ * it overloads any global configuration.
+ */
+ evsel__apply_config_terms(evsel, opts, track);
+
+ evsel->ignore_missing_thread = opts->ignore_missing_thread;
+
+ /* The --period option takes the precedence. */
+ if (opts->period_set) {
+ if (opts->period)
+ evsel__set_sample_bit(evsel, PERIOD);
+ else
+ evsel__reset_sample_bit(evsel, PERIOD);
+ }
+
+ /*
+ * A dummy event never triggers any actual counter and therefore
+ * cannot be used with branch_stack.
+ *
+ * For initial_delay, a dummy event is added implicitly.
+ * The software event will trigger -EOPNOTSUPP error out,
+ * if BRANCH_STACK bit is set.
+ */
+ if (evsel__is_dummy_event(evsel))
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
+
+ if (evsel__is_offcpu_event(evsel))
+ evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
+
+ arch__post_evsel_config(evsel, attr);
+}
+
+int evsel__set_filter(struct evsel *evsel, const char *filter)
+{
+ char *new_filter = strdup(filter);
+
+ if (new_filter != NULL) {
+ free(evsel->filter);
+ evsel->filter = new_filter;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
+{
+ char *new_filter;
+
+ if (evsel->filter == NULL)
+ return evsel__set_filter(evsel, filter);
+
+ if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
+ free(evsel->filter);
+ evsel->filter = new_filter;
+ return 0;
+ }
+
+ return -1;
+}
+
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
+{
+ return evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
+{
+ return evsel__append_filter(evsel, "%s,%s", filter);
+}
+
+/* Caller has to clear disabled after going through all CPUs. */
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
+{
+ return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
+}
+
+int evsel__enable(struct evsel *evsel)
+{
+ int err = perf_evsel__enable(&evsel->core);
+
+ if (!err)
+ evsel->disabled = false;
+ return err;
+}
+
+/* Caller has to set disabled after going through all CPUs. */
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
+{
+ return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
+}
+
+int evsel__disable(struct evsel *evsel)
+{
+ int err = perf_evsel__disable(&evsel->core);
+ /*
+ * We mark it disabled here so that tools that disable a event can
+ * ignore events after they disable it. I.e. the ring buffer may have
+ * already a few more events queued up before the kernel got the stop
+ * request.
+ */
+ if (!err)
+ evsel->disabled = true;
+
+ return err;
+}
+
+void free_config_terms(struct list_head *config_terms)
+{
+ struct evsel_config_term *term, *h;
+
+ list_for_each_entry_safe(term, h, config_terms, list) {
+ list_del_init(&term->list);
+ if (term->free_str)
+ zfree(&term->val.str);
+ free(term);
+ }
+}
+
+static void evsel__free_config_terms(struct evsel *evsel)
+{
+ free_config_terms(&evsel->config_terms);
+}
+
+void evsel__exit(struct evsel *evsel)
+{
+ assert(list_empty(&evsel->core.node));
+ assert(evsel->evlist == NULL);
+ bpf_counter__destroy(evsel);
+ perf_bpf_filter__destroy(evsel);
+ evsel__free_counts(evsel);
+ perf_evsel__free_fd(&evsel->core);
+ perf_evsel__free_id(&evsel->core);
+ evsel__free_config_terms(evsel);
+ cgroup__put(evsel->cgrp);
+ perf_cpu_map__put(evsel->core.cpus);
+ perf_cpu_map__put(evsel->core.own_cpus);
+ perf_thread_map__put(evsel->core.threads);
+ zfree(&evsel->group_name);
+ zfree(&evsel->name);
+ zfree(&evsel->filter);
+ zfree(&evsel->pmu_name);
+ zfree(&evsel->group_pmu_name);
+ zfree(&evsel->unit);
+ zfree(&evsel->metric_id);
+ evsel__zero_per_pkg(evsel);
+ hashmap__free(evsel->per_pkg_mask);
+ evsel->per_pkg_mask = NULL;
+ zfree(&evsel->metric_events);
+ perf_evsel__object.fini(evsel);
+}
+
+void evsel__delete(struct evsel *evsel)
+{
+ if (!evsel)
+ return;
+
+ evsel__exit(evsel);
+ free(evsel);
+}
+
+void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
+ struct perf_counts_values *count)
+{
+ struct perf_counts_values tmp;
+
+ if (!evsel->prev_raw_counts)
+ return;
+
+ tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+ *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
+
+ count->val = count->val - tmp.val;
+ count->ena = count->ena - tmp.ena;
+ count->run = count->run - tmp.run;
+}
+
+static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
+
+ return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
+}
+
+static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
+ u64 val, u64 ena, u64 run, u64 lost)
+{
+ struct perf_counts_values *count;
+
+ count = perf_counts(counter->counts, cpu_map_idx, thread);
+
+ count->val = val;
+ count->ena = ena;
+ count->run = run;
+ count->lost = lost;
+
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
+}
+
+static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
+{
+ u64 read_format = leader->core.attr.read_format;
+ struct sample_read_value *v;
+ u64 nr, ena = 0, run = 0, lost = 0;
+
+ nr = *data++;
+
+ if (nr != (u64) leader->core.nr_members)
+ return -EINVAL;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ ena = *data++;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ run = *data++;
+
+ v = (void *)data;
+ sample_read_group__for_each(v, nr, read_format) {
+ struct evsel *counter;
+
+ counter = evlist__id2evsel(leader->evlist, v->id);
+ if (!counter)
+ return -EINVAL;
+
+ if (read_format & PERF_FORMAT_LOST)
+ lost = v->lost;
+
+ evsel__set_count(counter, cpu_map_idx, thread, v->value, ena, run, lost);
+ }
+
+ return 0;
+}
+
+static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
+{
+ struct perf_stat_evsel *ps = leader->stats;
+ u64 read_format = leader->core.attr.read_format;
+ int size = perf_evsel__read_size(&leader->core);
+ u64 *data = ps->group_data;
+
+ if (!(read_format & PERF_FORMAT_ID))
+ return -EINVAL;
+
+ if (!evsel__is_group_leader(leader))
+ return -EINVAL;
+
+ if (!data) {
+ data = zalloc(size);
+ if (!data)
+ return -ENOMEM;
+
+ ps->group_data = data;
+ }
+
+ if (FD(leader, cpu_map_idx, thread) < 0)
+ return -EINVAL;
+
+ if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
+ return -errno;
+
+ return evsel__process_group_data(leader, cpu_map_idx, thread, data);
+}
+
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ u64 read_format = evsel->core.attr.read_format;
+
+ if (read_format & PERF_FORMAT_GROUP)
+ return evsel__read_group(evsel, cpu_map_idx, thread);
+
+ return evsel__read_one(evsel, cpu_map_idx, thread);
+}
+
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
+{
+ struct perf_counts_values count;
+ size_t nv = scale ? 3 : 1;
+
+ if (FD(evsel, cpu_map_idx, thread) < 0)
+ return -EINVAL;
+
+ if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
+ return -ENOMEM;
+
+ if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
+ return -errno;
+
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
+ perf_counts_values__scale(&count, scale, NULL);
+ *perf_counts(evsel->counts, cpu_map_idx, thread) = count;
+ return 0;
+}
+
+static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
+ int cpu_map_idx)
+{
+ struct perf_cpu cpu;
+
+ cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+ return perf_cpu_map__idx(other->core.cpus, cpu);
+}
+
+static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
+{
+ struct evsel *leader = evsel__leader(evsel);
+
+ if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
+ (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
+ return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
+ }
+
+ return cpu_map_idx;
+}
+
+static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ struct evsel *leader = evsel__leader(evsel);
+ int fd;
+
+ if (evsel__is_group_leader(evsel))
+ return -1;
+
+ /*
+ * Leader must be already processed/open,
+ * if not it's a bug.
+ */
+ BUG_ON(!leader->core.fd);
+
+ cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
+ if (cpu_map_idx == -1)
+ return -1;
+
+ fd = FD(leader, cpu_map_idx, thread);
+ BUG_ON(fd == -1 && !leader->skippable);
+
+ /*
+ * When the leader has been skipped, return -2 to distinguish from no
+ * group leader case.
+ */
+ return fd == -1 ? -2 : fd;
+}
+
+static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
+{
+ for (int cpu = 0; cpu < nr_cpus; cpu++)
+ for (int thread = thread_idx; thread < nr_threads - 1; thread++)
+ FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
+}
+
+static int update_fds(struct evsel *evsel,
+ int nr_cpus, int cpu_map_idx,
+ int nr_threads, int thread_idx)
+{
+ struct evsel *pos;
+
+ if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
+ return -EINVAL;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
+
+ evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
+
+ /*
+ * Since fds for next evsel has not been created,
+ * there is no need to iterate whole event list.
+ */
+ if (pos == evsel)
+ break;
+ }
+ return 0;
+}
+
+static bool evsel__ignore_missing_thread(struct evsel *evsel,
+ int nr_cpus, int cpu_map_idx,
+ struct perf_thread_map *threads,
+ int thread, int err)
+{
+ pid_t ignore_pid = perf_thread_map__pid(threads, thread);
+
+ if (!evsel->ignore_missing_thread)
+ return false;
+
+ /* The system wide setup does not work with threads. */
+ if (evsel->core.system_wide)
+ return false;
+
+ /* The -ESRCH is perf event syscall errno for pid's not found. */
+ if (err != -ESRCH)
+ return false;
+
+ /* If there's only one thread, let it fail. */
+ if (threads->nr == 1)
+ return false;
+
+ /*
+ * We should remove fd for missing_thread first
+ * because thread_map__remove() will decrease threads->nr.
+ */
+ if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
+ return false;
+
+ if (thread_map__remove(threads, thread))
+ return false;
+
+ pr_warning("WARNING: Ignored open failure for pid %d\n",
+ ignore_pid);
+ return true;
+}
+
+static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
+ void *priv __maybe_unused)
+{
+ return fprintf(fp, " %-32s %s\n", name, val);
+}
+
+static void display_attr(struct perf_event_attr *attr)
+{
+ if (verbose >= 2 || debug_peo_args) {
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ fprintf(stderr, "perf_event_attr:\n");
+ perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ }
+}
+
+bool evsel__precise_ip_fallback(struct evsel *evsel)
+{
+ /* Do not try less precise if not requested. */
+ if (!evsel->precise_max)
+ return false;
+
+ /*
+ * We tried all the precise_ip values, and it's
+ * still failing, so leave it to standard fallback.
+ */
+ if (!evsel->core.attr.precise_ip) {
+ evsel->core.attr.precise_ip = evsel->precise_ip_original;
+ return false;
+ }
+
+ if (!evsel->precise_ip_original)
+ evsel->precise_ip_original = evsel->core.attr.precise_ip;
+
+ evsel->core.attr.precise_ip--;
+ pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
+ display_attr(&evsel->core.attr);
+ return true;
+}
+
+static struct perf_cpu_map *empty_cpu_map;
+static struct perf_thread_map *empty_thread_map;
+
+static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ int nthreads = perf_thread_map__nr(threads);
+
+ if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
+ (perf_missing_features.aux_output && evsel->core.attr.aux_output))
+ return -EINVAL;
+
+ if (cpus == NULL) {
+ if (empty_cpu_map == NULL) {
+ empty_cpu_map = perf_cpu_map__dummy_new();
+ if (empty_cpu_map == NULL)
+ return -ENOMEM;
+ }
+
+ cpus = empty_cpu_map;
+ }
+
+ if (threads == NULL) {
+ if (empty_thread_map == NULL) {
+ empty_thread_map = thread_map__new_by_tid(-1);
+ if (empty_thread_map == NULL)
+ return -ENOMEM;
+ }
+
+ threads = empty_thread_map;
+ }
+
+ if (evsel->core.fd == NULL &&
+ perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
+ return -ENOMEM;
+
+ evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
+ if (evsel->cgrp)
+ evsel->open_flags |= PERF_FLAG_PID_CGROUP;
+
+ return 0;
+}
+
+static void evsel__disable_missing_features(struct evsel *evsel)
+{
+ if (perf_missing_features.read_lost)
+ evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
+ if (perf_missing_features.weight_struct) {
+ evsel__set_sample_bit(evsel, WEIGHT);
+ evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
+ }
+ if (perf_missing_features.clockid_wrong)
+ evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
+ if (perf_missing_features.clockid) {
+ evsel->core.attr.use_clockid = 0;
+ evsel->core.attr.clockid = 0;
+ }
+ if (perf_missing_features.cloexec)
+ evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
+ if (perf_missing_features.mmap2)
+ evsel->core.attr.mmap2 = 0;
+ if (evsel->pmu && evsel->pmu->missing_features.exclude_guest)
+ evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
+ if (perf_missing_features.lbr_flags)
+ evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES);
+ if (perf_missing_features.group_read && evsel->core.attr.inherit)
+ evsel->core.attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
+ if (perf_missing_features.ksymbol)
+ evsel->core.attr.ksymbol = 0;
+ if (perf_missing_features.bpf)
+ evsel->core.attr.bpf_event = 0;
+ if (perf_missing_features.branch_hw_idx)
+ evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
+ if (perf_missing_features.sample_id_all)
+ evsel->core.attr.sample_id_all = 0;
+}
+
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ int err;
+
+ err = __evsel__prepare_open(evsel, cpus, threads);
+ if (err)
+ return err;
+
+ evsel__disable_missing_features(evsel);
+
+ return err;
+}
+
+bool evsel__detect_missing_features(struct evsel *evsel)
+{
+ /*
+ * Must probe features in the order they were added to the
+ * perf_event_attr interface.
+ */
+ if (!perf_missing_features.read_lost &&
+ (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
+ perf_missing_features.read_lost = true;
+ pr_debug2("switching off PERF_FORMAT_LOST support\n");
+ return true;
+ } else if (!perf_missing_features.weight_struct &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
+ perf_missing_features.weight_struct = true;
+ pr_debug2("switching off weight struct support\n");
+ return true;
+ } else if (!perf_missing_features.code_page_size &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
+ perf_missing_features.code_page_size = true;
+ pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
+ return false;
+ } else if (!perf_missing_features.data_page_size &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
+ perf_missing_features.data_page_size = true;
+ pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
+ return false;
+ } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
+ perf_missing_features.cgroup = true;
+ pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
+ return false;
+ } else if (!perf_missing_features.branch_hw_idx &&
+ (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
+ perf_missing_features.branch_hw_idx = true;
+ pr_debug2("switching off branch HW index support\n");
+ return true;
+ } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
+ perf_missing_features.aux_output = true;
+ pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
+ return false;
+ } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
+ perf_missing_features.bpf = true;
+ pr_debug2_peo("switching off bpf_event\n");
+ return true;
+ } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
+ perf_missing_features.ksymbol = true;
+ pr_debug2_peo("switching off ksymbol\n");
+ return true;
+ } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
+ perf_missing_features.write_backward = true;
+ pr_debug2_peo("switching off write_backward\n");
+ return false;
+ } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
+ perf_missing_features.clockid_wrong = true;
+ pr_debug2_peo("switching off clockid\n");
+ return true;
+ } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
+ perf_missing_features.clockid = true;
+ pr_debug2_peo("switching off use_clockid\n");
+ return true;
+ } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) {
+ perf_missing_features.cloexec = true;
+ pr_debug2_peo("switching off cloexec flag\n");
+ return true;
+ } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
+ perf_missing_features.mmap2 = true;
+ pr_debug2_peo("switching off mmap2\n");
+ return true;
+ } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) {
+ if (evsel->pmu == NULL)
+ evsel->pmu = evsel__find_pmu(evsel);
+
+ if (evsel->pmu)
+ evsel->pmu->missing_features.exclude_guest = true;
+ else {
+ /* we cannot find PMU, disable attrs now */
+ evsel->core.attr.exclude_host = false;
+ evsel->core.attr.exclude_guest = false;
+ }
+
+ if (evsel->exclude_GH) {
+ pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n");
+ return false;
+ }
+ if (!perf_missing_features.exclude_guest) {
+ perf_missing_features.exclude_guest = true;
+ pr_debug2_peo("switching off exclude_guest, exclude_host\n");
+ }
+ return true;
+ } else if (!perf_missing_features.sample_id_all) {
+ perf_missing_features.sample_id_all = true;
+ pr_debug2_peo("switching off sample_id_all\n");
+ return true;
+ } else if (!perf_missing_features.lbr_flags &&
+ (evsel->core.attr.branch_sample_type &
+ (PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS))) {
+ perf_missing_features.lbr_flags = true;
+ pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
+ return true;
+ } else if (!perf_missing_features.group_read &&
+ evsel->core.attr.inherit &&
+ (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
+ evsel__is_group_leader(evsel)) {
+ perf_missing_features.group_read = true;
+ pr_debug2_peo("switching off group read\n");
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
+{
+ int old_errno;
+ struct rlimit l;
+
+ if (*set_rlimit < INCREASED_MAX) {
+ old_errno = errno;
+
+ if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+ if (*set_rlimit == NO_CHANGE) {
+ l.rlim_cur = l.rlim_max;
+ } else {
+ l.rlim_cur = l.rlim_max + 1000;
+ l.rlim_max = l.rlim_cur;
+ }
+ if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
+ (*set_rlimit) += 1;
+ errno = old_errno;
+ return true;
+ }
+ }
+ errno = old_errno;
+ }
+
+ return false;
+}
+
+static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads,
+ int start_cpu_map_idx, int end_cpu_map_idx)
+{
+ int idx, thread, nthreads;
+ int pid = -1, err, old_errno;
+ enum rlimit_action set_rlimit = NO_CHANGE;
+
+ err = __evsel__prepare_open(evsel, cpus, threads);
+ if (err)
+ return err;
+
+ if (cpus == NULL)
+ cpus = empty_cpu_map;
+
+ if (threads == NULL)
+ threads = empty_thread_map;
+
+ nthreads = perf_thread_map__nr(threads);
+
+ if (evsel->cgrp)
+ pid = evsel->cgrp->fd;
+
+fallback_missing_features:
+ evsel__disable_missing_features(evsel);
+
+ pr_debug3("Opening: %s\n", evsel__name(evsel));
+ display_attr(&evsel->core.attr);
+
+ for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
+
+ for (thread = 0; thread < nthreads; thread++) {
+ int fd, group_fd;
+retry_open:
+ if (thread >= nthreads)
+ break;
+
+ if (!evsel->cgrp && !evsel->core.system_wide)
+ pid = perf_thread_map__pid(threads, thread);
+
+ group_fd = get_group_fd(evsel, idx, thread);
+
+ if (group_fd == -2) {
+ pr_debug("broken group leader for %s\n", evsel->name);
+ err = -EINVAL;
+ goto out_close;
+ }
+
+ test_attr__ready();
+
+ /* Debug message used by test scripts */
+ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
+ pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
+
+ fd = sys_perf_event_open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx).cpu,
+ group_fd, evsel->open_flags);
+
+ FD(evsel, idx, thread) = fd;
+
+ if (fd < 0) {
+ err = -errno;
+
+ pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
+ err);
+ goto try_fallback;
+ }
+
+ bpf_counter__install_pe(evsel, idx, fd);
+
+ if (unlikely(test_attr__enabled)) {
+ test_attr__open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx),
+ fd, group_fd, evsel->open_flags);
+ }
+
+ /* Debug message used by test scripts */
+ pr_debug2_peo(" = %d\n", fd);
+
+ if (evsel->bpf_fd >= 0) {
+ int evt_fd = fd;
+ int bpf_fd = evsel->bpf_fd;
+
+ err = ioctl(evt_fd,
+ PERF_EVENT_IOC_SET_BPF,
+ bpf_fd);
+ if (err && errno != EEXIST) {
+ pr_err("failed to attach bpf fd %d: %s\n",
+ bpf_fd, strerror(errno));
+ err = -EINVAL;
+ goto out_close;
+ }
+ }
+
+ set_rlimit = NO_CHANGE;
+
+ /*
+ * If we succeeded but had to kill clockid, fail and
+ * have evsel__open_strerror() print us a nice error.
+ */
+ if (perf_missing_features.clockid ||
+ perf_missing_features.clockid_wrong) {
+ err = -EINVAL;
+ goto out_close;
+ }
+ }
+ }
+
+ return 0;
+
+try_fallback:
+ if (evsel__precise_ip_fallback(evsel))
+ goto retry_open;
+
+ if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
+ idx, threads, thread, err)) {
+ /* We just removed 1 thread, so lower the upper nthreads limit. */
+ nthreads--;
+
+ /* ... and pretend like nothing have happened. */
+ err = 0;
+ goto retry_open;
+ }
+ /*
+ * perf stat needs between 5 and 22 fds per CPU. When we run out
+ * of them try to increase the limits.
+ */
+ if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
+ goto retry_open;
+
+ if (err != -EINVAL || idx > 0 || thread > 0)
+ goto out_close;
+
+ if (evsel__detect_missing_features(evsel))
+ goto fallback_missing_features;
+out_close:
+ if (err)
+ threads->err_thread = thread;
+
+ old_errno = errno;
+ do {
+ while (--thread >= 0) {
+ if (FD(evsel, idx, thread) >= 0)
+ close(FD(evsel, idx, thread));
+ FD(evsel, idx, thread) = -1;
+ }
+ thread = nthreads;
+ } while (--idx >= 0);
+ errno = old_errno;
+ return err;
+}
+
+int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
+}
+
+void evsel__close(struct evsel *evsel)
+{
+ perf_evsel__close(&evsel->core);
+ perf_evsel__free_id(&evsel->core);
+}
+
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
+{
+ if (cpu_map_idx == -1)
+ return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
+
+ return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
+}
+
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
+{
+ return evsel__open(evsel, NULL, threads);
+}
+
+static int perf_evsel__parse_id_sample(const struct evsel *evsel,
+ const union perf_event *event,
+ struct perf_sample *sample)
+{
+ u64 type = evsel->core.attr.sample_type;
+ const __u64 *array = event->sample.array;
+ bool swapped = evsel->needs_swap;
+ union u64_swap u;
+
+ array += ((event->header.size -
+ sizeof(event->header)) / sizeof(u64)) - 1;
+
+ if (type & PERF_SAMPLE_IDENTIFIER) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ }
+
+ sample->cpu = u.val32[0];
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ sample->stream_id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ sample->time = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val32[1] = bswap_32(u.val32[1]);
+ }
+
+ sample->pid = u.val32[0];
+ sample->tid = u.val32[1];
+ array--;
+ }
+
+ return 0;
+}
+
+static inline bool overflow(const void *endp, u16 max_size, const void *offset,
+ u64 size)
+{
+ return size > max_size || offset + size > endp;
+}
+
+#define OVERFLOW_CHECK(offset, size, max_size) \
+ do { \
+ if (overflow(endp, (max_size), (offset), (size))) \
+ return -EFAULT; \
+ } while (0)
+
+#define OVERFLOW_CHECK_u64(offset) \
+ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
+
+static int
+perf_event__check_size(union perf_event *event, unsigned int sample_size)
+{
+ /*
+ * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
+ * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to
+ * check the format does not go past the end of the event.
+ */
+ if (sample_size + sizeof(event->header) > event->header.size)
+ return -EFAULT;
+
+ return 0;
+}
+
+void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
+ const __u64 *array,
+ u64 type __maybe_unused)
+{
+ data->weight = *array;
+}
+
+u64 evsel__bitfield_swap_branch_flags(u64 value)
+{
+ u64 new_val = 0;
+
+ /*
+ * branch_flags
+ * union {
+ * u64 values;
+ * struct {
+ * mispred:1 //target mispredicted
+ * predicted:1 //target predicted
+ * in_tx:1 //in transaction
+ * abort:1 //transaction abort
+ * cycles:16 //cycle count to last branch
+ * type:4 //branch type
+ * spec:2 //branch speculation info
+ * new_type:4 //additional branch type
+ * priv:3 //privilege level
+ * reserved:31
+ * }
+ * }
+ *
+ * Avoid bswap64() the entire branch_flag.value,
+ * as it has variable bit-field sizes. Instead the
+ * macro takes the bit-field position/size,
+ * swaps it based on the host endianness.
+ */
+ if (host_is_bigendian()) {
+ new_val = bitfield_swap(value, 0, 1);
+ new_val |= bitfield_swap(value, 1, 1);
+ new_val |= bitfield_swap(value, 2, 1);
+ new_val |= bitfield_swap(value, 3, 1);
+ new_val |= bitfield_swap(value, 4, 16);
+ new_val |= bitfield_swap(value, 20, 4);
+ new_val |= bitfield_swap(value, 24, 2);
+ new_val |= bitfield_swap(value, 26, 4);
+ new_val |= bitfield_swap(value, 30, 3);
+ new_val |= bitfield_swap(value, 33, 31);
+ } else {
+ new_val = bitfield_swap(value, 63, 1);
+ new_val |= bitfield_swap(value, 62, 1);
+ new_val |= bitfield_swap(value, 61, 1);
+ new_val |= bitfield_swap(value, 60, 1);
+ new_val |= bitfield_swap(value, 44, 16);
+ new_val |= bitfield_swap(value, 40, 4);
+ new_val |= bitfield_swap(value, 38, 2);
+ new_val |= bitfield_swap(value, 34, 4);
+ new_val |= bitfield_swap(value, 31, 3);
+ new_val |= bitfield_swap(value, 0, 31);
+ }
+
+ return new_val;
+}
+
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *data)
+{
+ u64 type = evsel->core.attr.sample_type;
+ bool swapped = evsel->needs_swap;
+ const __u64 *array;
+ u16 max_size = event->header.size;
+ const void *endp = (void *)event + max_size;
+ u64 sz;
+
+ /*
+ * used for cross-endian analysis. See git commit 65014ab3
+ * for why this goofiness is needed.
+ */
+ union u64_swap u;
+
+ memset(data, 0, sizeof(*data));
+ data->cpu = data->pid = data->tid = -1;
+ data->stream_id = data->id = data->time = -1ULL;
+ data->period = evsel->core.attr.sample_period;
+ data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ data->misc = event->header.misc;
+ data->id = -1ULL;
+ data->data_src = PERF_MEM_DATA_SRC_NONE;
+ data->vcpu = -1;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ if (!evsel->core.attr.sample_id_all)
+ return 0;
+ return perf_evsel__parse_id_sample(evsel, event, data);
+ }
+
+ array = event->sample.array;
+
+ if (perf_event__check_size(event, evsel->sample_size))
+ return -EFAULT;
+
+ if (type & PERF_SAMPLE_IDENTIFIER) {
+ data->id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_IP) {
+ data->ip = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val32[1] = bswap_32(u.val32[1]);
+ }
+
+ data->pid = u.val32[0];
+ data->tid = u.val32[1];
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ data->time = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ data->addr = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ data->id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ data->stream_id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ }
+
+ data->cpu = u.val32[0];
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ data->period = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_READ) {
+ u64 read_format = evsel->core.attr.read_format;
+
+ OVERFLOW_CHECK_u64(array);
+ if (read_format & PERF_FORMAT_GROUP)
+ data->read.group.nr = *array;
+ else
+ data->read.one.value = *array;
+
+ array++;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ OVERFLOW_CHECK_u64(array);
+ data->read.time_enabled = *array;
+ array++;
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ OVERFLOW_CHECK_u64(array);
+ data->read.time_running = *array;
+ array++;
+ }
+
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_GROUP) {
+ const u64 max_group_nr = UINT64_MAX /
+ sizeof(struct sample_read_value);
+
+ if (data->read.group.nr > max_group_nr)
+ return -EFAULT;
+
+ sz = data->read.group.nr * sample_read_value_size(read_format);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->read.group.values =
+ (struct sample_read_value *)array;
+ array = (void *)array + sz;
+ } else {
+ OVERFLOW_CHECK_u64(array);
+ data->read.one.id = *array;
+ array++;
+
+ if (read_format & PERF_FORMAT_LOST) {
+ OVERFLOW_CHECK_u64(array);
+ data->read.one.lost = *array;
+ array++;
+ }
+ }
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+ OVERFLOW_CHECK_u64(array);
+ data->callchain = (struct ip_callchain *)array++;
+ if (data->callchain->nr > max_callchain_nr)
+ return -EFAULT;
+ sz = data->callchain->nr * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ OVERFLOW_CHECK_u64(array);
+ u.val64 = *array;
+
+ /*
+ * Undo swap of u64, then swap on individual u32s,
+ * get the size of the raw area and undo all of the
+ * swap. The pevent interface handles endianness by
+ * itself.
+ */
+ if (swapped) {
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val32[1] = bswap_32(u.val32[1]);
+ }
+ data->raw_size = u.val32[0];
+
+ /*
+ * The raw data is aligned on 64bits including the
+ * u32 size, so it's safe to use mem_bswap_64.
+ */
+ if (swapped)
+ mem_bswap_64((void *) array, data->raw_size);
+
+ array = (void *)array + sizeof(u32);
+
+ OVERFLOW_CHECK(array, data->raw_size, max_size);
+ data->raw_data = (void *)array;
+ array = (void *)array + data->raw_size;
+ }
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ const u64 max_branch_nr = UINT64_MAX /
+ sizeof(struct branch_entry);
+ struct branch_entry *e;
+ unsigned int i;
+
+ OVERFLOW_CHECK_u64(array);
+ data->branch_stack = (struct branch_stack *)array++;
+
+ if (data->branch_stack->nr > max_branch_nr)
+ return -EFAULT;
+
+ sz = data->branch_stack->nr * sizeof(struct branch_entry);
+ if (evsel__has_branch_hw_idx(evsel)) {
+ sz += sizeof(u64);
+ e = &data->branch_stack->entries[0];
+ } else {
+ data->no_hw_idx = true;
+ /*
+ * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
+ * only nr and entries[] will be output by kernel.
+ */
+ e = (struct branch_entry *)&data->branch_stack->hw_idx;
+ }
+
+ if (swapped) {
+ /*
+ * struct branch_flag does not have endian
+ * specific bit field definition. And bswap
+ * will not resolve the issue, since these
+ * are bit fields.
+ *
+ * evsel__bitfield_swap_branch_flags() uses a
+ * bitfield_swap macro to swap the bit position
+ * based on the host endians.
+ */
+ for (i = 0; i < data->branch_stack->nr; i++, e++)
+ e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
+ }
+
+ OVERFLOW_CHECK(array, sz, max_size);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_REGS_USER) {
+ OVERFLOW_CHECK_u64(array);
+ data->user_regs.abi = *array;
+ array++;
+
+ if (data->user_regs.abi) {
+ u64 mask = evsel->core.attr.sample_regs_user;
+
+ sz = hweight64(mask) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->user_regs.mask = mask;
+ data->user_regs.regs = (u64 *)array;
+ array = (void *)array + sz;
+ }
+ }
+
+ if (type & PERF_SAMPLE_STACK_USER) {
+ OVERFLOW_CHECK_u64(array);
+ sz = *array++;
+
+ data->user_stack.offset = ((char *)(array - 1)
+ - (char *) event);
+
+ if (!sz) {
+ data->user_stack.size = 0;
+ } else {
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->user_stack.data = (char *)array;
+ array = (void *)array + sz;
+ OVERFLOW_CHECK_u64(array);
+ data->user_stack.size = *array++;
+ if (WARN_ONCE(data->user_stack.size > sz,
+ "user stack dump failure\n"))
+ return -EFAULT;
+ }
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT_TYPE) {
+ OVERFLOW_CHECK_u64(array);
+ arch_perf_parse_sample_weight(data, array, type);
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_DATA_SRC) {
+ OVERFLOW_CHECK_u64(array);
+ data->data_src = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TRANSACTION) {
+ OVERFLOW_CHECK_u64(array);
+ data->transaction = *array;
+ array++;
+ }
+
+ data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ OVERFLOW_CHECK_u64(array);
+ data->intr_regs.abi = *array;
+ array++;
+
+ if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ u64 mask = evsel->core.attr.sample_regs_intr;
+
+ sz = hweight64(mask) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->intr_regs.mask = mask;
+ data->intr_regs.regs = (u64 *)array;
+ array = (void *)array + sz;
+ }
+ }
+
+ data->phys_addr = 0;
+ if (type & PERF_SAMPLE_PHYS_ADDR) {
+ data->phys_addr = *array;
+ array++;
+ }
+
+ data->cgroup = 0;
+ if (type & PERF_SAMPLE_CGROUP) {
+ data->cgroup = *array;
+ array++;
+ }
+
+ data->data_page_size = 0;
+ if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+ data->data_page_size = *array;
+ array++;
+ }
+
+ data->code_page_size = 0;
+ if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
+ data->code_page_size = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_AUX) {
+ OVERFLOW_CHECK_u64(array);
+ sz = *array++;
+
+ OVERFLOW_CHECK(array, sz, max_size);
+ /* Undo swap of data */
+ if (swapped)
+ mem_bswap_64((char *)array, sz);
+ data->aux_sample.size = sz;
+ data->aux_sample.data = (char *)array;
+ array = (void *)array + sz;
+ }
+
+ return 0;
+}
+
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+ u64 *timestamp)
+{
+ u64 type = evsel->core.attr.sample_type;
+ const __u64 *array;
+
+ if (!(type & PERF_SAMPLE_TIME))
+ return -1;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ struct perf_sample data = {
+ .time = -1ULL,
+ };
+
+ if (!evsel->core.attr.sample_id_all)
+ return -1;
+ if (perf_evsel__parse_id_sample(evsel, event, &data))
+ return -1;
+
+ *timestamp = data.time;
+ return 0;
+ }
+
+ array = event->sample.array;
+
+ if (perf_event__check_size(event, evsel->sample_size))
+ return -EFAULT;
+
+ if (type & PERF_SAMPLE_IDENTIFIER)
+ array++;
+
+ if (type & PERF_SAMPLE_IP)
+ array++;
+
+ if (type & PERF_SAMPLE_TID)
+ array++;
+
+ if (type & PERF_SAMPLE_TIME)
+ *timestamp = *array;
+
+ return 0;
+}
+
+u16 evsel__id_hdr_size(struct evsel *evsel)
+{
+ u64 sample_type = evsel->core.attr.sample_type;
+ u16 size = 0;
+
+ if (sample_type & PERF_SAMPLE_TID)
+ size += sizeof(u64);
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ size += sizeof(u64);
+
+ if (sample_type & PERF_SAMPLE_ID)
+ size += sizeof(u64);
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ size += sizeof(u64);
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ size += sizeof(u64);
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ size += sizeof(u64);
+
+ return size;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
+{
+ return tep_find_field(evsel->tp_format, name);
+}
+
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
+{
+ struct tep_format_field *field = evsel__field(evsel, name);
+ int offset;
+
+ if (!field)
+ return NULL;
+
+ offset = field->offset;
+
+ if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+ offset = *(int *)(sample->raw_data + field->offset);
+ offset &= 0xffff;
+ if (tep_field_is_relative(field->flags))
+ offset += field->offset + field->size;
+ }
+
+ return sample->raw_data + offset;
+}
+
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
+ bool needs_swap)
+{
+ u64 value;
+ void *ptr = sample->raw_data + field->offset;
+
+ switch (field->size) {
+ case 1:
+ return *(u8 *)ptr;
+ case 2:
+ value = *(u16 *)ptr;
+ break;
+ case 4:
+ value = *(u32 *)ptr;
+ break;
+ case 8:
+ memcpy(&value, ptr, sizeof(u64));
+ break;
+ default:
+ return 0;
+ }
+
+ if (!needs_swap)
+ return value;
+
+ switch (field->size) {
+ case 2:
+ return bswap_16(value);
+ case 4:
+ return bswap_32(value);
+ case 8:
+ return bswap_64(value);
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name)
+{
+ struct tep_format_field *field = evsel__field(evsel, name);
+
+ return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+#endif
+
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
+{
+ int paranoid;
+
+ if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
+ evsel->core.attr.type == PERF_TYPE_HARDWARE &&
+ evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+ /*
+ * If it's cycles then fall back to hrtimer based
+ * cpu-clock-tick sw counter, which is always available even if
+ * no PMU support.
+ *
+ * PPC returns ENXIO until 2.6.37 (behavior changed with commit
+ * b0a873e).
+ */
+ scnprintf(msg, msgsize, "%s",
+"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
+
+ evsel->core.attr.type = PERF_TYPE_SOFTWARE;
+ evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK;
+
+ zfree(&evsel->name);
+ return true;
+ } else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
+ (paranoid = perf_event_paranoid()) > 1) {
+ const char *name = evsel__name(evsel);
+ char *new_name;
+ const char *sep = ":";
+
+ /* If event has exclude user then don't exclude kernel. */
+ if (evsel->core.attr.exclude_user)
+ return false;
+
+ /* Is there already the separator in the name. */
+ if (strchr(name, '/') ||
+ (strchr(name, ':') && !evsel->is_libpfm_event))
+ sep = "";
+
+ if (asprintf(&new_name, "%s%su", name, sep) < 0)
+ return false;
+
+ free(evsel->name);
+ evsel->name = new_name;
+ scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
+ "to fall back to excluding kernel and hypervisor "
+ " samples", paranoid);
+ evsel->core.attr.exclude_kernel = 1;
+ evsel->core.attr.exclude_hv = 1;
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool find_process(const char *name)
+{
+ size_t len = strlen(name);
+ DIR *dir;
+ struct dirent *d;
+ int ret = -1;
+
+ dir = opendir(procfs__mountpoint());
+ if (!dir)
+ return false;
+
+ /* Walk through the directory. */
+ while (ret && (d = readdir(dir)) != NULL) {
+ char path[PATH_MAX];
+ char *data;
+ size_t size;
+
+ if ((d->d_type != DT_DIR) ||
+ !strcmp(".", d->d_name) ||
+ !strcmp("..", d->d_name))
+ continue;
+
+ scnprintf(path, sizeof(path), "%s/%s/comm",
+ procfs__mountpoint(), d->d_name);
+
+ if (filename__read_str(path, &data, &size))
+ continue;
+
+ ret = strncmp(name, data, len);
+ free(data);
+ }
+
+ closedir(dir);
+ return ret ? false : true;
+}
+
+int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
+ char *msg __maybe_unused,
+ size_t size __maybe_unused)
+{
+ return 0;
+}
+
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+ int err, char *msg, size_t size)
+{
+ char sbuf[STRERR_BUFSIZE];
+ int printed = 0, enforced = 0;
+ int ret;
+
+ switch (err) {
+ case EPERM:
+ case EACCES:
+ printed += scnprintf(msg + printed, size - printed,
+ "Access to performance monitoring and observability operations is limited.\n");
+
+ if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
+ if (enforced) {
+ printed += scnprintf(msg + printed, size - printed,
+ "Enforced MAC policy settings (SELinux) can limit access to performance\n"
+ "monitoring and observability operations. Inspect system audit records for\n"
+ "more perf_event access control information and adjusting the policy.\n");
+ }
+ }
+
+ if (err == EPERM)
+ printed += scnprintf(msg, size,
+ "No permission to enable %s event.\n\n", evsel__name(evsel));
+
+ return scnprintf(msg + printed, size - printed,
+ "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
+ "access to performance monitoring and observability operations for processes\n"
+ "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
+ "More information can be found at 'Perf events and tool security' document:\n"
+ "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
+ "perf_event_paranoid setting is %d:\n"
+ " -1: Allow use of (almost) all events by all users\n"
+ " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
+ ">= 0: Disallow raw and ftrace function tracepoint access\n"
+ ">= 1: Disallow CPU event access\n"
+ ">= 2: Disallow kernel profiling\n"
+ "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
+ "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
+ perf_event_paranoid());
+ case ENOENT:
+ return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
+ case EMFILE:
+ return scnprintf(msg, size, "%s",
+ "Too many events are opened.\n"
+ "Probably the maximum number of open file descriptors has been reached.\n"
+ "Hint: Try again after reducing the number of events.\n"
+ "Hint: Try increasing the limit with 'ulimit -n <limit>'");
+ case ENOMEM:
+ if (evsel__has_callchain(evsel) &&
+ access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
+ return scnprintf(msg, size,
+ "Not enough memory to setup event with callchain.\n"
+ "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
+ "Hint: Current value: %d", sysctl__max_stack());
+ break;
+ case ENODEV:
+ if (target->cpu_list)
+ return scnprintf(msg, size, "%s",
+ "No such device - did you specify an out-of-range profile CPU?");
+ break;
+ case EOPNOTSUPP:
+ if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware or event type doesn't support branch stack sampling.",
+ evsel__name(evsel));
+ if (evsel->core.attr.aux_output)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support 'aux_output' feature",
+ evsel__name(evsel));
+ if (evsel->core.attr.sample_period != 0)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
+ evsel__name(evsel));
+ if (evsel->core.attr.precise_ip)
+ return scnprintf(msg, size, "%s",
+ "\'precise\' request may not be supported. Try removing 'p' modifier.");
+#if defined(__i386__) || defined(__x86_64__)
+ if (evsel->core.attr.type == PERF_TYPE_HARDWARE)
+ return scnprintf(msg, size, "%s",
+ "No hardware sampling interrupt available.\n");
+#endif
+ break;
+ case EBUSY:
+ if (find_process("oprofiled"))
+ return scnprintf(msg, size,
+ "The PMU counters are busy/taken by another profiler.\n"
+ "We found oprofile daemon running, please stop it and try again.");
+ break;
+ case EINVAL:
+ if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
+ return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel.");
+ if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
+ return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
+ if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
+ return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
+ if (perf_missing_features.clockid)
+ return scnprintf(msg, size, "clockid feature not supported.");
+ if (perf_missing_features.clockid_wrong)
+ return scnprintf(msg, size, "wrong clockid (%d).", clockid);
+ if (perf_missing_features.aux_output)
+ return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
+ if (!target__has_cpu(target))
+ return scnprintf(msg, size,
+ "Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
+ evsel__name(evsel));
+
+ break;
+ case ENODATA:
+ return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
+ "Please add an auxiliary event in front of the load latency event.");
+ default:
+ break;
+ }
+
+ ret = arch_evsel__open_strerror(evsel, msg, size);
+ if (ret)
+ return ret;
+
+ return scnprintf(msg, size,
+ "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
+ "/bin/dmesg | grep -i perf may provide additional information.\n",
+ err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
+}
+
+struct perf_env *evsel__env(struct evsel *evsel)
+{
+ if (evsel && evsel->evlist && evsel->evlist->env)
+ return evsel->evlist->env;
+ return &perf_env;
+}
+
+static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
+{
+ int cpu_map_idx, thread;
+
+ for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
+ for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
+ thread++) {
+ int fd = FD(evsel, cpu_map_idx, thread);
+
+ if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
+ cpu_map_idx, thread, fd) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
+{
+ struct perf_cpu_map *cpus = evsel->core.cpus;
+ struct perf_thread_map *threads = evsel->core.threads;
+
+ if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
+ return -ENOMEM;
+
+ return store_evsel_ids(evsel, evlist);
+}
+
+void evsel__zero_per_pkg(struct evsel *evsel)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (evsel->per_pkg_mask) {
+ hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
+ zfree(&cur->pkey);
+
+ hashmap__clear(evsel->per_pkg_mask);
+ }
+}
+
+/**
+ * evsel__is_hybrid - does the evsel have a known PMU that is hybrid. Note, this
+ * will be false on hybrid systems for hardware and legacy
+ * cache events.
+ */
+bool evsel__is_hybrid(const struct evsel *evsel)
+{
+ if (perf_pmus__num_core_pmus() == 1)
+ return false;
+
+ return evsel->core.is_pmu_core;
+}
+
+struct evsel *evsel__leader(const struct evsel *evsel)
+{
+ return container_of(evsel->core.leader, struct evsel, core);
+}
+
+bool evsel__has_leader(struct evsel *evsel, struct evsel *leader)
+{
+ return evsel->core.leader == &leader->core;
+}
+
+bool evsel__is_leader(struct evsel *evsel)
+{
+ return evsel__has_leader(evsel, evsel);
+}
+
+void evsel__set_leader(struct evsel *evsel, struct evsel *leader)
+{
+ evsel->core.leader = &leader->core;
+}
+
+int evsel__source_count(const struct evsel *evsel)
+{
+ struct evsel *pos;
+ int count = 0;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos->metric_leader == evsel)
+ count++;
+ }
+ return count;
+}
+
+bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused)
+{
+ return false;
+}
+
+/*
+ * Remove an event from a given group (leader).
+ * Some events, e.g., perf metrics Topdown events,
+ * must always be grouped. Ignore the events.
+ */
+void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader)
+{
+ if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) {
+ evsel__set_leader(evsel, evsel);
+ evsel->core.nr_members = 0;
+ leader->core.nr_members--;
+ }
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
new file mode 100644
index 000000000..848534ec7
--- /dev/null
+++ b/tools/perf/util/evsel.h
@@ -0,0 +1,558 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVSEL_H
+#define __PERF_EVSEL_H 1
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <internal/evsel.h>
+#include <perf/evsel.h>
+#include "symbol_conf.h"
+#include "pmus.h"
+
+struct bpf_object;
+struct cgroup;
+struct perf_counts;
+struct perf_stat_evsel;
+union perf_event;
+struct bpf_counter_ops;
+struct target;
+struct hashmap;
+struct bperf_leader_bpf;
+struct bperf_follower_bpf;
+struct perf_pmu;
+
+typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
+
+enum perf_tool_event {
+ PERF_TOOL_NONE = 0,
+ PERF_TOOL_DURATION_TIME = 1,
+ PERF_TOOL_USER_TIME = 2,
+ PERF_TOOL_SYSTEM_TIME = 3,
+
+ PERF_TOOL_MAX,
+};
+
+const char *perf_tool_event__to_str(enum perf_tool_event ev);
+enum perf_tool_event perf_tool_event__from_str(const char *str);
+
+#define perf_tool_event__for_each_event(ev) \
+ for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++)
+
+/** struct evsel - event selector
+ *
+ * @evlist - evlist this evsel is in, if it is in one.
+ * @core - libperf evsel object
+ * @name - Can be set to retain the original event name passed by the user,
+ * so that when showing results in tools such as 'perf stat', we
+ * show the name used, not some alias.
+ * @id_pos: the position of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of
+ * struct perf_record_sample
+ * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
+ * is used there is an id sample appended to non-sample events
+ * @priv: And what is in its containing unnamed union are tool specific
+ */
+struct evsel {
+ struct perf_evsel core;
+ struct evlist *evlist;
+ off_t id_offset;
+ int id_pos;
+ int is_pos;
+ unsigned int sample_size;
+
+ /*
+ * These fields can be set in the parse-events code or similar.
+ * Please check evsel__clone() to copy them properly so that
+ * they can be released properly.
+ */
+ struct {
+ char *name;
+ char *group_name;
+ const char *pmu_name;
+ const char *group_pmu_name;
+#ifdef HAVE_LIBTRACEEVENT
+ struct tep_event *tp_format;
+#endif
+ char *filter;
+ unsigned long max_events;
+ double scale;
+ const char *unit;
+ struct cgroup *cgrp;
+ const char *metric_id;
+ enum perf_tool_event tool_event;
+ /* parse modifier helper */
+ int exclude_GH;
+ int sample_read;
+ bool snapshot;
+ bool per_pkg;
+ bool percore;
+ bool precise_max;
+ bool is_libpfm_event;
+ bool auto_merge_stats;
+ bool collect_stat;
+ bool weak_group;
+ bool bpf_counter;
+ bool use_config_name;
+ bool skippable;
+ int bpf_fd;
+ struct bpf_object *bpf_obj;
+ struct list_head config_terms;
+ };
+
+ /*
+ * metric fields are similar, but needs more care as they can have
+ * references to other metric (evsel).
+ */
+ struct evsel **metric_events;
+ struct evsel *metric_leader;
+
+ void *handler;
+ struct perf_counts *counts;
+ struct perf_counts *prev_raw_counts;
+ unsigned long nr_events_printed;
+ struct perf_stat_evsel *stats;
+ void *priv;
+ u64 db_id;
+ bool uniquified_name;
+ bool supported;
+ bool needs_swap;
+ bool disabled;
+ bool no_aux_samples;
+ bool immediate;
+ bool tracking;
+ bool ignore_missing_thread;
+ bool forced_leader;
+ bool cmdline_group_boundary;
+ bool merged_stat;
+ bool reset_group;
+ bool errored;
+ bool needs_auxtrace_mmap;
+ bool default_metricgroup; /* A member of the Default metricgroup */
+ struct hashmap *per_pkg_mask;
+ int err;
+ struct {
+ evsel__sb_cb_t *cb;
+ void *data;
+ } side_band;
+ /*
+ * For reporting purposes, an evsel sample can have a callchain
+ * synthesized from AUX area data. Keep track of synthesized sample
+ * types here. Note, the recorded sample_type cannot be changed because
+ * it is needed to continue to parse events.
+ * See also evsel__has_callchain().
+ */
+ __u64 synth_sample_type;
+
+ /*
+ * bpf_counter_ops serves two use cases:
+ * 1. perf-stat -b counting events used byBPF programs
+ * 2. perf-stat --use-bpf use BPF programs to aggregate counts
+ */
+ struct bpf_counter_ops *bpf_counter_ops;
+
+ struct list_head bpf_counter_list; /* for perf-stat -b */
+ struct list_head bpf_filters; /* for perf-record --filter */
+
+ /* for perf-stat --use-bpf */
+ int bperf_leader_prog_fd;
+ int bperf_leader_link_fd;
+ union {
+ struct bperf_leader_bpf *leader_skel;
+ struct bperf_follower_bpf *follower_skel;
+ void *bpf_skel;
+ };
+ unsigned long open_flags;
+ int precise_ip_original;
+
+ /* for missing_features */
+ struct perf_pmu *pmu;
+};
+
+struct perf_missing_features {
+ bool sample_id_all;
+ bool exclude_guest;
+ bool mmap2;
+ bool cloexec;
+ bool clockid;
+ bool clockid_wrong;
+ bool lbr_flags;
+ bool write_backward;
+ bool group_read;
+ bool ksymbol;
+ bool bpf;
+ bool aux_output;
+ bool branch_hw_idx;
+ bool cgroup;
+ bool data_page_size;
+ bool code_page_size;
+ bool weight_struct;
+ bool read_lost;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
+struct perf_cpu_map;
+struct thread_map;
+struct record_opts;
+
+static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
+{
+ return perf_evsel__cpus(&evsel->core);
+}
+
+static inline int evsel__nr_cpus(struct evsel *evsel)
+{
+ return perf_cpu_map__nr(evsel__cpus(evsel));
+}
+
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count);
+
+int evsel__object_config(size_t object_size,
+ int (*init)(struct evsel *evsel),
+ void (*fini)(struct evsel *evsel));
+
+struct perf_pmu *evsel__find_pmu(const struct evsel *evsel);
+bool evsel__is_aux_event(const struct evsel *evsel);
+
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
+
+static inline struct evsel *evsel__new(struct perf_event_attr *attr)
+{
+ return evsel__new_idx(attr, 0);
+}
+
+struct evsel *evsel__clone(struct evsel *orig);
+
+int copy_config_terms(struct list_head *dst, struct list_head *src);
+void free_config_terms(struct list_head *config_terms);
+
+
+#ifdef HAVE_LIBTRACEEVENT
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static inline struct evsel *evsel__newtp(const char *sys, const char *name)
+{
+ return evsel__newtp_idx(sys, name, 0);
+}
+#endif
+
+#ifdef HAVE_LIBTRACEEVENT
+struct tep_event *event_format__new(const char *sys, const char *name);
+#endif
+
+void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
+void evsel__exit(struct evsel *evsel);
+void evsel__delete(struct evsel *evsel);
+
+struct callchain_param;
+
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain);
+
+int __evsel__sample_size(u64 sample_type);
+void evsel__calc_id_pos(struct evsel *evsel);
+
+bool evsel__is_cache_op_valid(u8 type, u8 op);
+
+static inline bool evsel__is_bpf(struct evsel *evsel)
+{
+ return evsel->bpf_counter_ops != NULL;
+}
+
+static inline bool evsel__is_bperf(struct evsel *evsel)
+{
+ return evsel->bpf_counter_ops != NULL && list_empty(&evsel->bpf_counter_list);
+}
+
+#define EVSEL__MAX_ALIASES 8
+
+extern const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *const evsel__sw_names[PERF_COUNT_SW_MAX];
+extern char *evsel__bpf_counter_events;
+bool evsel__match_bpf_counter_events(const char *name);
+int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
+
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
+const char *evsel__name(struct evsel *evsel);
+bool evsel__name_is(struct evsel *evsel, const char *name);
+const char *evsel__metric_id(const struct evsel *evsel);
+
+static inline bool evsel__is_tool(const struct evsel *evsel)
+{
+ return evsel->tool_event != PERF_TOOL_NONE;
+}
+
+const char *evsel__group_name(struct evsel *evsel);
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
+
+void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+
+#define evsel__set_sample_bit(evsel, bit) \
+ __evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+#define evsel__reset_sample_bit(evsel, bit) \
+ __evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
+
+void arch_evsel__set_sample_weight(struct evsel *evsel);
+void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr);
+int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size);
+
+int evsel__set_filter(struct evsel *evsel, const char *filter);
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx);
+int evsel__enable(struct evsel *evsel);
+int evsel__disable(struct evsel *evsel);
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx);
+
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx);
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
+int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads);
+void evsel__close(struct evsel *evsel);
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads);
+bool evsel__detect_missing_features(struct evsel *evsel);
+
+enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
+
+bool evsel__precise_ip_fallback(struct evsel *evsel);
+
+struct perf_sample;
+
+#ifdef HAVE_LIBTRACEEVENT
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name);
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name);
+
+static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name)
+{
+ return evsel__rawptr(evsel, sample, name);
+}
+#endif
+
+struct tep_format_field;
+
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap);
+
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
+
+static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
+{
+ if (evsel->core.attr.type != type)
+ return false;
+
+ if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
+ perf_pmus__supports_extended_type())
+ return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
+
+ return evsel->core.attr.config == config;
+}
+
+#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
+
+static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
+{
+ return (e1->core.attr.type == e2->core.attr.type) &&
+ (e1->core.attr.config == e2->core.attr.config);
+}
+
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
+
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
+
+/**
+ * evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu_map_idx - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false);
+}
+
+/**
+ * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu_map_idx - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true);
+}
+
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *sample);
+
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+ u64 *timestamp);
+
+u16 evsel__id_hdr_size(struct evsel *evsel);
+
+static inline struct evsel *evsel__next(struct evsel *evsel)
+{
+ return list_entry(evsel->core.node.next, struct evsel, core.node);
+}
+
+static inline struct evsel *evsel__prev(struct evsel *evsel)
+{
+ return list_entry(evsel->core.node.prev, struct evsel, core.node);
+}
+
+/**
+ * evsel__is_group_leader - Return whether given evsel is a leader event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if @evsel is a group leader or a stand-alone event
+ */
+static inline bool evsel__is_group_leader(const struct evsel *evsel)
+{
+ return evsel->core.leader == &evsel->core;
+}
+
+/**
+ * evsel__is_group_event - Return whether given evsel is a group event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true iff event group view is enabled and @evsel is a actual group
+ * leader which has other members in the group
+ */
+static inline bool evsel__is_group_event(struct evsel *evsel)
+{
+ if (!symbol_conf.event_group)
+ return false;
+
+ return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
+}
+
+bool evsel__is_function_event(struct evsel *evsel);
+
+static inline bool evsel__is_bpf_output(struct evsel *evsel)
+{
+ return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+}
+
+static inline bool evsel__is_clock(const struct evsel *evsel)
+{
+ return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+ evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+}
+
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+ int err, char *msg, size_t size);
+
+static inline int evsel__group_idx(struct evsel *evsel)
+{
+ return evsel->core.idx - evsel->core.leader->idx;
+}
+
+/* Iterates group WITHOUT the leader. */
+#define for_each_group_member_head(_evsel, _leader, _head) \
+for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
+ (_evsel) && &(_evsel)->core.node != (_head) && \
+ (_evsel)->core.leader == &(_leader)->core; \
+ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+#define for_each_group_member(_evsel, _leader) \
+ for_each_group_member_head(_evsel, _leader, &(_leader)->evlist->core.entries)
+
+/* Iterates group WITH the leader. */
+#define for_each_group_evsel_head(_evsel, _leader, _head) \
+for ((_evsel) = _leader; \
+ (_evsel) && &(_evsel)->core.node != (_head) && \
+ (_evsel)->core.leader == &(_leader)->core; \
+ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+#define for_each_group_evsel(_evsel, _leader) \
+ for_each_group_evsel_head(_evsel, _leader, &(_leader)->evlist->core.entries)
+
+static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
+{
+ return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
+static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel)
+{
+ return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
+static inline bool evsel__has_callchain(const struct evsel *evsel)
+{
+ /*
+ * For reporting purposes, an evsel sample can have a recorded callchain
+ * or a callchain synthesized from AUX area data.
+ */
+ return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN ||
+ evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN;
+}
+
+static inline bool evsel__has_br_stack(const struct evsel *evsel)
+{
+ /*
+ * For reporting purposes, an evsel sample can have a recorded branch
+ * stack or a branch stack synthesized from AUX area data.
+ */
+ return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK ||
+ evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK;
+}
+
+static inline bool evsel__is_dummy_event(struct evsel *evsel)
+{
+ return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) &&
+ (evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
+}
+
+struct perf_env *evsel__env(struct evsel *evsel);
+
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+
+void evsel__zero_per_pkg(struct evsel *evsel);
+bool evsel__is_hybrid(const struct evsel *evsel);
+struct evsel *evsel__leader(const struct evsel *evsel);
+bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
+bool evsel__is_leader(struct evsel *evsel);
+void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
+int evsel__source_count(const struct evsel *evsel);
+void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader);
+
+bool arch_evsel__must_be_in_group(const struct evsel *evsel);
+
+/*
+ * Macro to swap the bit-field postition and size.
+ * Used when,
+ * - dont need to swap the entire u64 &&
+ * - when u64 has variable bit-field sizes &&
+ * - when presented in a host endian which is different
+ * than the source endian of the perf.data file
+ */
+#define bitfield_swap(src, pos, size) \
+ ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
+
+u64 evsel__bitfield_swap_branch_flags(u64 value);
+void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
+ const char *config_name, u64 val);
+
+#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
new file mode 100644
index 000000000..aee6f808b
--- /dev/null
+++ b/tools/perf/util/evsel_config.h
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_CONFIG_H
+#define __PERF_EVSEL_CONFIG_H 1
+
+#include <linux/types.h>
+#include <stdbool.h>
+
+/*
+ * The 'struct evsel_config_term' is used to pass event
+ * specific configuration data to evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * evsel::config_terms list head.
+*/
+enum evsel_term_type {
+ EVSEL__CONFIG_TERM_PERIOD,
+ EVSEL__CONFIG_TERM_FREQ,
+ EVSEL__CONFIG_TERM_TIME,
+ EVSEL__CONFIG_TERM_CALLGRAPH,
+ EVSEL__CONFIG_TERM_STACK_USER,
+ EVSEL__CONFIG_TERM_INHERIT,
+ EVSEL__CONFIG_TERM_MAX_STACK,
+ EVSEL__CONFIG_TERM_MAX_EVENTS,
+ EVSEL__CONFIG_TERM_OVERWRITE,
+ EVSEL__CONFIG_TERM_DRV_CFG,
+ EVSEL__CONFIG_TERM_BRANCH,
+ EVSEL__CONFIG_TERM_PERCORE,
+ EVSEL__CONFIG_TERM_AUX_OUTPUT,
+ EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
+ EVSEL__CONFIG_TERM_CFG_CHG,
+};
+
+struct evsel_config_term {
+ struct list_head list;
+ enum evsel_term_type type;
+ bool free_str;
+ union {
+ u64 period;
+ u64 freq;
+ bool time;
+ u64 stack_user;
+ int max_stack;
+ bool inherit;
+ bool overwrite;
+ unsigned long max_events;
+ bool percore;
+ bool aux_output;
+ u32 aux_sample_size;
+ u64 cfg_chg;
+ char *str;
+ } val;
+ bool weak;
+};
+
+struct evsel;
+
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type);
+
+#define evsel__get_config_term(evsel, type) \
+ __evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type)
+
+#endif // __PERF_EVSEL_CONFIG_H
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
new file mode 100644
index 000000000..8719b3cb5
--- /dev/null
+++ b/tools/perf/util/evsel_fprintf.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include "util/evlist.h"
+#include "evsel.h"
+#include "util/evsel_fprintf.h"
+#include "util/event.h"
+#include "callchain.h"
+#include "map.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dso.h"
+
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (!*first) {
+ ret += fprintf(fp, ",");
+ } else {
+ ret += fprintf(fp, ":");
+ *first = false;
+ }
+
+ va_start(args, fmt);
+ ret += vfprintf(fp, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
+{
+ return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
+}
+
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp)
+{
+ bool first = true;
+ int printed = 0;
+
+ if (details->event_group) {
+ struct evsel *pos;
+
+ if (!evsel__is_group_leader(evsel))
+ return 0;
+
+ if (evsel->core.nr_members > 1)
+ printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+ printed += fprintf(fp, "%s", evsel__name(evsel));
+ for_each_group_member(pos, evsel)
+ printed += fprintf(fp, ",%s", evsel__name(pos));
+
+ if (evsel->core.nr_members > 1)
+ printed += fprintf(fp, "}");
+ goto out;
+ }
+
+ printed += fprintf(fp, "%s", evsel__name(evsel));
+
+ if (details->verbose) {
+ printed += perf_event_attr__fprintf(fp, &evsel->core.attr,
+ __print_attr__fprintf, &first);
+ } else if (details->freq) {
+ const char *term = "sample_freq";
+
+ if (!evsel->core.attr.freq)
+ term = "sample_period";
+
+ printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+ term, (u64)evsel->core.attr.sample_freq);
+ }
+
+#ifdef HAVE_LIBTRACEEVENT
+ if (details->trace_fields) {
+ struct tep_format_field *field;
+
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+ printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+ goto out;
+ }
+
+ field = evsel->tp_format->format.fields;
+ if (field == NULL) {
+ printed += comma_fprintf(fp, &first, " (no trace field)");
+ goto out;
+ }
+
+ printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+ field = field->next;
+ while (field) {
+ printed += comma_fprintf(fp, &first, "%s", field->name);
+ field = field->next;
+ }
+ }
+#endif
+out:
+ fputc('\n', fp);
+ return ++printed;
+}
+
+#ifndef PYTHON_PERF
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+ unsigned int print_opts, struct callchain_cursor *cursor,
+ struct strlist *bt_stop_list, FILE *fp)
+{
+ int printed = 0;
+ struct callchain_cursor_node *node;
+ int print_ip = print_opts & EVSEL__PRINT_IP;
+ int print_sym = print_opts & EVSEL__PRINT_SYM;
+ int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_dsoff = print_opts & EVSEL__PRINT_DSOFF;
+ int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+ int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
+ int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+ int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+ int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW;
+ int print_skip_ignored = print_opts & EVSEL__PRINT_SKIP_IGNORED;
+ char s = print_oneline ? ' ' : '\t';
+ bool first = true;
+
+ if (cursor == NULL)
+ return fprintf(fp, "<not enough memory for the callchain cursor>%s", print_oneline ? "" : "\n");
+
+ if (sample->callchain) {
+ callchain_cursor_commit(cursor);
+
+ while (1) {
+ struct symbol *sym;
+ struct map *map;
+ u64 addr = 0;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ sym = node->ms.sym;
+ map = node->ms.map;
+
+ if (sym && sym->ignore && print_skip_ignored)
+ goto next;
+
+ printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+ if (print_arrow && !first)
+ printed += fprintf(fp, " <-");
+
+ if (map)
+ addr = map__map_ip(map, node->ip);
+
+ if (print_ip)
+ printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
+
+ if (print_sym) {
+ struct addr_location node_al;
+
+ addr_location__init(&node_al);
+ printed += fprintf(fp, " ");
+ node_al.addr = addr;
+ node_al.map = map__get(map);
+
+ if (print_symoffset) {
+ printed += __symbol__fprintf_symname_offs(sym, &node_al,
+ print_unknown_as_addr,
+ true, fp);
+ } else {
+ printed += __symbol__fprintf_symname(sym, &node_al,
+ print_unknown_as_addr, fp);
+ }
+ addr_location__exit(&node_al);
+ }
+
+ if (print_dso && (!sym || !sym->inlined))
+ printed += map__fprintf_dsoname_dsoff(map, print_dsoff, addr, fp);
+
+ if (print_srcline)
+ printed += map__fprintf_srcline(map, addr, "\n ", fp);
+
+ if (sym && sym->inlined)
+ printed += fprintf(fp, " (inlined)");
+
+ if (!print_oneline)
+ printed += fprintf(fp, "\n");
+
+ /* Add srccode here too? */
+ if (bt_stop_list && sym &&
+ strlist__has_entry(bt_stop_list, sym->name)) {
+ break;
+ }
+
+ first = false;
+next:
+ callchain_cursor_advance(cursor);
+ }
+ }
+
+ return printed;
+}
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+ int left_alignment, unsigned int print_opts,
+ struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp)
+{
+ int printed = 0;
+ int print_ip = print_opts & EVSEL__PRINT_IP;
+ int print_sym = print_opts & EVSEL__PRINT_SYM;
+ int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_dsoff = print_opts & EVSEL__PRINT_DSOFF;
+ int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+ int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+ int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+ if (cursor != NULL) {
+ printed += sample__fprintf_callchain(sample, left_alignment, print_opts,
+ cursor, bt_stop_list, fp);
+ } else {
+ printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+ if (print_ip)
+ printed += fprintf(fp, "%16" PRIx64, sample->ip);
+
+ if (print_sym) {
+ printed += fprintf(fp, " ");
+ if (print_symoffset) {
+ printed += __symbol__fprintf_symname_offs(al->sym, al,
+ print_unknown_as_addr,
+ true, fp);
+ } else {
+ printed += __symbol__fprintf_symname(al->sym, al,
+ print_unknown_as_addr, fp);
+ }
+ }
+
+ if (print_dso)
+ printed += map__fprintf_dsoname_dsoff(al->map, print_dsoff, al->addr, fp);
+
+ if (print_srcline)
+ printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
+ }
+
+ return printed;
+}
+#endif /* PYTHON_PERF */
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
new file mode 100644
index 000000000..c8a9fac2f
--- /dev/null
+++ b/tools/perf/util/evsel_fprintf.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_FPRINTF_H
+#define __PERF_EVSEL_FPRINTF_H 1
+
+#include <stdio.h>
+#include <stdbool.h>
+
+struct evsel;
+
+struct perf_attr_details {
+ bool freq;
+ bool verbose;
+ bool event_group;
+ bool force;
+ bool trace_fields;
+};
+
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp);
+
+#define EVSEL__PRINT_IP (1<<0)
+#define EVSEL__PRINT_SYM (1<<1)
+#define EVSEL__PRINT_DSO (1<<2)
+#define EVSEL__PRINT_SYMOFFSET (1<<3)
+#define EVSEL__PRINT_ONELINE (1<<4)
+#define EVSEL__PRINT_SRCLINE (1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6)
+#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7)
+#define EVSEL__PRINT_SKIP_IGNORED (1<<8)
+#define EVSEL__PRINT_DSOFF (1<<9)
+
+struct addr_location;
+struct perf_event_attr;
+struct perf_sample;
+struct callchain_cursor;
+struct strlist;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+ unsigned int print_opts, struct callchain_cursor *cursor,
+ struct strlist *bt_stop_list, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+ int left_alignment, unsigned int print_opts,
+ struct callchain_cursor *cursor,
+ struct strlist *bt_stop_list, FILE *fp);
+
+typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+ attr__fprintf_f attr__fprintf, void *priv);
+#endif // __PERF_EVSEL_H
diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c
new file mode 100644
index 000000000..40cb56a93
--- /dev/null
+++ b/tools/perf/util/evswitch.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+
+#include "evswitch.h"
+#include "evlist.h"
+
+bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel)
+{
+ if (evswitch->on && evswitch->discarding) {
+ if (evswitch->on != evsel)
+ return true;
+
+ evswitch->discarding = false;
+
+ if (!evswitch->show_on_off_events)
+ return true;
+
+ return false;
+ }
+
+ if (evswitch->off && !evswitch->discarding) {
+ if (evswitch->off != evsel)
+ return false;
+
+ evswitch->discarding = true;
+
+ if (!evswitch->show_on_off_events)
+ return true;
+ }
+
+ return false;
+}
+
+static int evswitch__fprintf_enoent(FILE *fp, const char *evtype, const char *evname)
+{
+ int printed = fprintf(fp, "ERROR: switch-%s event not found (%s)\n", evtype, evname);
+
+ return printed += fprintf(fp, "HINT: use 'perf evlist' to see the available event names\n");
+}
+
+int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp)
+{
+ if (evswitch->on_name) {
+ evswitch->on = evlist__find_evsel_by_str(evlist, evswitch->on_name);
+ if (evswitch->on == NULL) {
+ evswitch__fprintf_enoent(fp, "on", evswitch->on_name);
+ return -ENOENT;
+ }
+ evswitch->discarding = true;
+ }
+
+ if (evswitch->off_name) {
+ evswitch->off = evlist__find_evsel_by_str(evlist, evswitch->off_name);
+ if (evswitch->off == NULL) {
+ evswitch__fprintf_enoent(fp, "off", evswitch->off_name);
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/evswitch.h b/tools/perf/util/evswitch.h
new file mode 100644
index 000000000..8ffdbe526
--- /dev/null
+++ b/tools/perf/util/evswitch.h
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+#ifndef __PERF_EVSWITCH_H
+#define __PERF_EVSWITCH_H 1
+
+#include <stdbool.h>
+#include <stdio.h>
+
+struct evsel;
+struct evlist;
+
+struct evswitch {
+ struct evsel *on, *off;
+ const char *on_name, *off_name;
+ bool discarding;
+ bool show_on_off_events;
+};
+
+int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp);
+
+bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel);
+
+#define OPTS_EVSWITCH(evswitch) \
+ OPT_STRING(0, "switch-on", &(evswitch)->on_name, \
+ "event", "Consider events after the occurrence of this event"), \
+ OPT_STRING(0, "switch-off", &(evswitch)->off_name, \
+ "event", "Stop considering events after the occurrence of this event"), \
+ OPT_BOOLEAN(0, "show-on-off-events", &(evswitch)->show_on_off_events, \
+ "Show the on/off switch events, used with --switch-on and --switch-off")
+
+#endif /* __PERF_EVSWITCH_H */
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
new file mode 100644
index 000000000..4488f306d
--- /dev/null
+++ b/tools/perf/util/expr.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "metricgroup.h"
+#include "cpumap.h"
+#include "cputopo.h"
+#include "debug.h"
+#include "evlist.h"
+#include "expr.h"
+#include <util/expr-bison.h>
+#include <util/expr-flex.h>
+#include "util/hashmap.h"
+#include "util/header.h"
+#include "util/pmu.h"
+#include "smt.h"
+#include "tsc.h"
+#include <api/fs/fs.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <ctype.h>
+#include <math.h>
+#include "pmu.h"
+
+#ifdef PARSER_DEBUG
+extern int expr_debug;
+#endif
+
+struct expr_id_data {
+ union {
+ struct {
+ double val;
+ int source_count;
+ } val;
+ struct {
+ double val;
+ const char *metric_name;
+ const char *metric_expr;
+ } ref;
+ };
+
+ enum {
+ /* Holding a double value. */
+ EXPR_ID_DATA__VALUE,
+ /* Reference to another metric. */
+ EXPR_ID_DATA__REF,
+ /* A reference but the value has been computed. */
+ EXPR_ID_DATA__REF_VALUE,
+ } kind;
+};
+
+static size_t key_hash(long key, void *ctx __maybe_unused)
+{
+ const char *str = (const char *)key;
+ size_t hash = 0;
+
+ while (*str != '\0') {
+ hash *= 31;
+ hash += *str;
+ str++;
+ }
+ return hash;
+}
+
+static bool key_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return !strcmp((const char *)key1, (const char *)key2);
+}
+
+struct hashmap *ids__new(void)
+{
+ struct hashmap *hash;
+
+ hash = hashmap__new(key_hash, key_equal, NULL);
+ if (IS_ERR(hash))
+ return NULL;
+ return hash;
+}
+
+void ids__free(struct hashmap *ids)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (ids == NULL)
+ return;
+
+ hashmap__for_each_entry(ids, cur, bkt) {
+ zfree(&cur->pkey);
+ zfree(&cur->pvalue);
+ }
+
+ hashmap__free(ids);
+}
+
+int ids__insert(struct hashmap *ids, const char *id)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ int ret;
+
+ ret = hashmap__set(ids, id, data_ptr, &old_key, &old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2)
+{
+ size_t bkt;
+ struct hashmap_entry *cur;
+ int ret;
+ struct expr_id_data *old_data = NULL;
+ char *old_key = NULL;
+
+ if (!ids1)
+ return ids2;
+
+ if (!ids2)
+ return ids1;
+
+ if (hashmap__size(ids1) < hashmap__size(ids2)) {
+ struct hashmap *tmp = ids1;
+
+ ids1 = ids2;
+ ids2 = tmp;
+ }
+ hashmap__for_each_entry(ids2, cur, bkt) {
+ ret = hashmap__set(ids1, cur->key, cur->value, &old_key, &old_data);
+ free(old_key);
+ free(old_data);
+
+ if (ret) {
+ hashmap__free(ids1);
+ hashmap__free(ids2);
+ return NULL;
+ }
+ }
+ hashmap__free(ids2);
+ return ids1;
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
+{
+ return ids__insert(ctx->ids, id);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
+{
+ return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+ double val, int source_count)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ int ret;
+
+ data_ptr = malloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+ data_ptr->val.val = val;
+ data_ptr->val.source_count = source_count;
+ data_ptr->kind = EXPR_ID_DATA__VALUE;
+
+ ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ char *name;
+ int ret;
+
+ data_ptr = zalloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+
+ name = strdup(ref->metric_name);
+ if (!name) {
+ free(data_ptr);
+ return -ENOMEM;
+ }
+
+ /*
+ * Intentionally passing just const char pointers,
+ * originally from 'struct pmu_event' object.
+ * We don't need to change them, so there's no
+ * need to create our own copy.
+ */
+ data_ptr->ref.metric_name = ref->metric_name;
+ data_ptr->ref.metric_expr = ref->metric_expr;
+ data_ptr->kind = EXPR_ID_DATA__REF;
+
+ ret = hashmap__set(ctx->ids, name, data_ptr, &old_key, &old_data);
+ if (ret)
+ free(data_ptr);
+
+ pr_debug2("adding ref metric %s: %s\n",
+ ref->metric_name, ref->metric_expr);
+
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data)
+{
+ return hashmap__find(ctx->ids, id, data) ? 0 : -1;
+}
+
+bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
+ struct expr_parse_ctx *needles)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ struct expr_id_data *data;
+
+ hashmap__for_each_entry(needles->ids, cur, bkt) {
+ if (expr__get_id(haystack, cur->pkey, &data))
+ return false;
+ }
+ return true;
+}
+
+
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap)
+{
+ struct expr_id_data *data;
+
+ if (expr__get_id(ctx, id, datap) || !*datap) {
+ pr_debug("%s not found\n", id);
+ return -1;
+ }
+
+ data = *datap;
+
+ switch (data->kind) {
+ case EXPR_ID_DATA__VALUE:
+ pr_debug2("lookup(%s): val %f\n", id, data->val.val);
+ break;
+ case EXPR_ID_DATA__REF:
+ pr_debug2("lookup(%s): ref metric name %s\n", id,
+ data->ref.metric_name);
+ pr_debug("processing metric: %s ENTRY\n", id);
+ data->kind = EXPR_ID_DATA__REF_VALUE;
+ if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr)) {
+ pr_debug("%s failed to count\n", id);
+ return -1;
+ }
+ pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val);
+ break;
+ case EXPR_ID_DATA__REF_VALUE:
+ pr_debug2("lookup(%s): ref val %f metric name %s\n", id,
+ data->ref.val, data->ref.metric_name);
+ break;
+ default:
+ assert(0); /* Unreachable. */
+ }
+
+ return 0;
+}
+
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
+{
+ struct expr_id_data *old_val = NULL;
+ char *old_key = NULL;
+
+ hashmap__delete(ctx->ids, id, &old_key, &old_val);
+ free(old_key);
+ free(old_val);
+}
+
+struct expr_parse_ctx *expr__ctx_new(void)
+{
+ struct expr_parse_ctx *ctx;
+
+ ctx = malloc(sizeof(struct expr_parse_ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->ids = hashmap__new(key_hash, key_equal, NULL);
+ if (IS_ERR(ctx->ids)) {
+ free(ctx);
+ return NULL;
+ }
+ ctx->sctx.user_requested_cpu_list = NULL;
+ ctx->sctx.runtime = 0;
+ ctx->sctx.system_wide = false;
+
+ return ctx;
+}
+
+void expr__ctx_clear(struct expr_parse_ctx *ctx)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ hashmap__for_each_entry(ctx->ids, cur, bkt) {
+ zfree(&cur->pkey);
+ zfree(&cur->pvalue);
+ }
+ hashmap__clear(ctx->ids);
+}
+
+void expr__ctx_free(struct expr_parse_ctx *ctx)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (!ctx)
+ return;
+
+ zfree(&ctx->sctx.user_requested_cpu_list);
+ hashmap__for_each_entry(ctx->ids, cur, bkt) {
+ zfree(&cur->pkey);
+ zfree(&cur->pvalue);
+ }
+ hashmap__free(ctx->ids);
+ free(ctx);
+}
+
+static int
+__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
+ bool compute_ids)
+{
+ YY_BUFFER_STATE buffer;
+ void *scanner;
+ int ret;
+
+ pr_debug2("parsing metric: %s\n", expr);
+
+ ret = expr_lex_init_extra(&ctx->sctx, &scanner);
+ if (ret)
+ return ret;
+
+ buffer = expr__scan_string(expr, scanner);
+
+#ifdef PARSER_DEBUG
+ expr_debug = 1;
+ expr_set_debug(1, scanner);
+#endif
+
+ ret = expr_parse(val, ctx, compute_ids, scanner);
+
+ expr__flush_buffer(buffer, scanner);
+ expr__delete_buffer(buffer, scanner);
+ expr_lex_destroy(scanner);
+ return ret;
+}
+
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+ const char *expr)
+{
+ return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false) ? -1 : 0;
+}
+
+int expr__find_ids(const char *expr, const char *one,
+ struct expr_parse_ctx *ctx)
+{
+ int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true);
+
+ if (one)
+ expr__del_id(ctx, one);
+
+ return ret;
+}
+
+double expr_id_data__value(const struct expr_id_data *data)
+{
+ if (data->kind == EXPR_ID_DATA__VALUE)
+ return data->val.val;
+ assert(data->kind == EXPR_ID_DATA__REF_VALUE);
+ return data->ref.val;
+}
+
+double expr_id_data__source_count(const struct expr_id_data *data)
+{
+ assert(data->kind == EXPR_ID_DATA__VALUE);
+ return data->val.source_count;
+}
+
+#if !defined(__i386__) && !defined(__x86_64__)
+double arch_get_tsc_freq(void)
+{
+ return 0.0;
+}
+#endif
+
+static double has_pmem(void)
+{
+ static bool has_pmem, cached;
+ const char *sysfs = sysfs__mountpoint();
+ char path[PATH_MAX];
+
+ if (!cached) {
+ snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
+ has_pmem = access(path, F_OK) == 0;
+ cached = true;
+ }
+ return has_pmem ? 1.0 : 0.0;
+}
+
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx)
+{
+ const struct cpu_topology *topology;
+ double result = NAN;
+
+ if (!strcmp("#num_cpus", literal)) {
+ result = cpu__max_present_cpu().cpu;
+ goto out;
+ }
+ if (!strcmp("#num_cpus_online", literal)) {
+ struct perf_cpu_map *online = cpu_map__online();
+
+ if (online)
+ result = perf_cpu_map__nr(online);
+ goto out;
+ }
+
+ if (!strcasecmp("#system_tsc_freq", literal)) {
+ result = arch_get_tsc_freq();
+ goto out;
+ }
+
+ /*
+ * Assume that topology strings are consistent, such as CPUs "0-1"
+ * wouldn't be listed as "0,1", and so after deduplication the number of
+ * these strings gives an indication of the number of packages, dies,
+ * etc.
+ */
+ if (!strcasecmp("#smt_on", literal)) {
+ result = smt_on() ? 1.0 : 0.0;
+ goto out;
+ }
+ if (!strcmp("#core_wide", literal)) {
+ result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list)
+ ? 1.0 : 0.0;
+ goto out;
+ }
+ if (!strcmp("#num_packages", literal)) {
+ topology = online_topology();
+ result = topology->package_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_dies", literal)) {
+ topology = online_topology();
+ result = topology->die_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_cores", literal)) {
+ topology = online_topology();
+ result = topology->core_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#slots", literal)) {
+ result = perf_pmu__cpu_slots_per_cycle();
+ goto out;
+ }
+ if (!strcmp("#has_pmem", literal)) {
+ result = has_pmem();
+ goto out;
+ }
+
+ pr_err("Unrecognized literal '%s'", literal);
+out:
+ pr_debug2("literal: %s = %f\n", literal, result);
+ return result;
+}
+
+/* Does the event 'id' parse? Determine via ctx->ids if possible. */
+double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id)
+{
+ struct evlist *tmp;
+ double ret;
+
+ if (hashmap__find(ctx->ids, id, /*value=*/NULL))
+ return 1.0;
+
+ if (!compute_ids)
+ return 0.0;
+
+ tmp = evlist__new();
+ if (!tmp)
+ return NAN;
+ ret = parse_event(tmp, id) ? 0 : 1;
+ evlist__delete(tmp);
+ return ret;
+}
+
+double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused,
+ bool compute_ids __maybe_unused, const char *test_id)
+{
+ double ret;
+ struct perf_pmu *pmu = pmu__find_core_pmu();
+ char *cpuid = perf_pmu__getcpuid(pmu);
+
+ if (!cpuid)
+ return NAN;
+
+ ret = !strcmp_cpuid_str(test_id, cpuid);
+
+ free(cpuid);
+ return ret;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
new file mode 100644
index 000000000..c0cec29dd
--- /dev/null
+++ b/tools/perf/util/expr.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PARSE_CTX_H
+#define PARSE_CTX_H 1
+
+struct hashmap;
+struct metric_ref;
+
+struct expr_scanner_ctx {
+ char *user_requested_cpu_list;
+ int runtime;
+ bool system_wide;
+ bool is_test;
+};
+
+struct expr_parse_ctx {
+ struct hashmap *ids;
+ struct expr_scanner_ctx sctx;
+};
+
+struct expr_id_data;
+
+struct hashmap *ids__new(void);
+void ids__free(struct hashmap *ids);
+int ids__insert(struct hashmap *ids, const char *id);
+/*
+ * Union two sets of ids (hashmaps) and construct a third, freeing ids1 and
+ * ids2.
+ */
+struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2);
+
+struct expr_parse_ctx *expr__ctx_new(void);
+void expr__ctx_clear(struct expr_parse_ctx *ctx);
+void expr__ctx_free(struct expr_parse_ctx *ctx);
+
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+ double val, int source_count);
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data);
+bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
+ struct expr_parse_ctx *needles);
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap);
+
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+ const char *expr);
+
+int expr__find_ids(const char *expr, const char *one,
+ struct expr_parse_ctx *ids);
+
+double expr_id_data__value(const struct expr_id_data *data);
+double expr_id_data__source_count(const struct expr_id_data *data);
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx);
+double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id);
+double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id);
+
+#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
new file mode 100644
index 000000000..0feef0726
--- /dev/null
+++ b/tools/perf/util/expr.l
@@ -0,0 +1,140 @@
+%option prefix="expr_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <linux/compiler.h>
+#include "expr.h"
+#include "expr-bison.h"
+#include <math.h>
+
+char *expr_get_text(yyscan_t yyscanner);
+YYSTYPE *expr_get_lval(yyscan_t yyscanner);
+
+static double __value(YYSTYPE *yylval, char *str, int token)
+{
+ double num;
+
+ errno = 0;
+ num = strtod(str, NULL);
+ if (errno)
+ return EXPR_ERROR;
+
+ yylval->num = num;
+ return token;
+}
+
+static int value(yyscan_t scanner)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ return __value(yylval, text, NUMBER);
+}
+
+/*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+static char *normalize(char *str, int runtime)
+{
+ char *ret = str;
+ char *dst = str;
+
+ while (*str) {
+ if (*str == '\\') {
+ *dst++ = *++str;
+ if (!*str)
+ break;
+ }
+ else if (*str == '?') {
+ char *paramval;
+ int i = 0;
+ int size = asprintf(&paramval, "%d", runtime);
+
+ if (size < 0)
+ *dst++ = '0';
+ else {
+ while (i < size)
+ *dst++ = paramval[i++];
+ free(paramval);
+ }
+ }
+ else
+ *dst++ = *str;
+ str++;
+ }
+
+ *dst = 0x0;
+ return ret;
+}
+
+static int str(yyscan_t scanner, int token, int runtime)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ yylval->str = normalize(strdup(text), runtime);
+ if (!yylval->str)
+ return EXPR_ERROR;
+
+ yylval->str = normalize(yylval->str, runtime);
+ return token;
+}
+
+static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+
+ yylval->num = expr__get_literal(expr_get_text(scanner), sctx);
+ if (isnan(yylval->num)) {
+ if (!sctx->is_test)
+ return EXPR_ERROR;
+ yylval->num = 1;
+ }
+ return LITERAL;
+}
+%}
+
+number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)?
+
+sch [-,=]
+spec \\{sch}
+sym [0-9a-zA-Z_\.:@?]+
+symbol ({spec}|{sym})+
+literal #[0-9a-zA-Z_\.\-]+
+
+%%
+ struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
+
+d_ratio { return D_RATIO; }
+max { return MAX; }
+min { return MIN; }
+if { return IF; }
+else { return ELSE; }
+source_count { return SOURCE_COUNT; }
+has_event { return HAS_EVENT; }
+strcmp_cpuid_str { return STRCMP_CPUID_STR; }
+{literal} { return literal(yyscanner, sctx); }
+{number} { return value(yyscanner); }
+{symbol} { return str(yyscanner, ID, sctx->runtime); }
+"|" { return '|'; }
+"^" { return '^'; }
+"&" { return '&'; }
+"<" { return '<'; }
+">" { return '>'; }
+"-" { return '-'; }
+"+" { return '+'; }
+"*" { return '*'; }
+"/" { return '/'; }
+"%" { return '%'; }
+"(" { return '('; }
+")" { return ')'; }
+"," { return ','; }
+. { }
+%%
+
+int expr_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
new file mode 100644
index 000000000..6c93b358c
--- /dev/null
+++ b/tools/perf/util/expr.y
@@ -0,0 +1,379 @@
+/* Simple expression parser */
+%{
+#define YYDEBUG 1
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include "util/debug.h"
+#define IN_EXPR_Y 1
+#include "expr.h"
+#include "expr-bison.h"
+int expr_lex(YYSTYPE * yylval_param , void *yyscanner);
+%}
+
+%define api.pure full
+
+%parse-param { double *final_val }
+%parse-param { struct expr_parse_ctx *ctx }
+%parse-param { bool compute_ids }
+%parse-param {void *scanner}
+%lex-param {void* scanner}
+
+%union {
+ double num;
+ char *str;
+ struct ids {
+ /*
+ * When creating ids, holds the working set of event ids. NULL
+ * implies the set is empty.
+ */
+ struct hashmap *ids;
+ /*
+ * The metric value. When not creating ids this is the value
+ * read from a counter, a constant or some computed value. When
+ * creating ids the value is either a constant or BOTTOM. NAN is
+ * used as the special BOTTOM value, representing a "set of all
+ * values" case.
+ */
+ double val;
+ } ids;
+}
+
+%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT HAS_EVENT STRCMP_CPUID_STR EXPR_ERROR
+%left MIN MAX IF
+%left '|'
+%left '^'
+%left '&'
+%left '<' '>'
+%left '-' '+'
+%left '*' '/' '%'
+%left NEG NOT
+%type <num> NUMBER LITERAL
+%type <str> ID
+%destructor { free ($$); } <str>
+%type <ids> expr if_expr
+%destructor { ids__free($$.ids); } <ids>
+
+%{
+static void expr_error(double *final_val __maybe_unused,
+ struct expr_parse_ctx *ctx __maybe_unused,
+ bool compute_ids __maybe_unused,
+ void *scanner __maybe_unused,
+ const char *s)
+{
+ pr_debug("%s\n", s);
+}
+
+/*
+ * During compute ids, the special "bottom" value uses NAN to represent the set
+ * of all values. NAN is selected as it isn't a useful constant value.
+ */
+#define BOTTOM NAN
+
+/* During computing ids, does val represent a constant (non-BOTTOM) value? */
+static bool is_const(double val)
+{
+ return isfinite(val);
+}
+
+static struct ids union_expr(struct ids ids1, struct ids ids2)
+{
+ struct ids result = {
+ .val = BOTTOM,
+ .ids = ids__union(ids1.ids, ids2.ids),
+ };
+ return result;
+}
+
+static struct ids handle_id(struct expr_parse_ctx *ctx, char *id,
+ bool compute_ids, bool source_count)
+{
+ struct ids result;
+
+ if (!compute_ids) {
+ /*
+ * Compute the event's value from ID. If the ID isn't known then
+ * it isn't used to compute the formula so set to NAN.
+ */
+ struct expr_id_data *data;
+
+ result.val = NAN;
+ if (expr__resolve_id(ctx, id, &data) == 0) {
+ result.val = source_count
+ ? expr_id_data__source_count(data)
+ : expr_id_data__value(data);
+ }
+ result.ids = NULL;
+ free(id);
+ } else {
+ /*
+ * Set the value to BOTTOM to show that any value is possible
+ * when the event is computed. Create a set of just the ID.
+ */
+ result.val = BOTTOM;
+ result.ids = ids__new();
+ if (!result.ids || ids__insert(result.ids, id)) {
+ pr_err("Error creating IDs for '%s'", id);
+ free(id);
+ }
+ }
+ return result;
+}
+
+/*
+ * If we're not computing ids or $1 and $3 are constants, compute the new
+ * constant value using OP. Its invariant that there are no ids. If computing
+ * ids for non-constants union the set of IDs that must be computed.
+ */
+#define BINARY_OP(RESULT, OP, LHS, RHS) \
+ if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \
+ assert(LHS.ids == NULL); \
+ assert(RHS.ids == NULL); \
+ if (isnan(LHS.val) || isnan(RHS.val)) { \
+ RESULT.val = NAN; \
+ } else { \
+ RESULT.val = LHS.val OP RHS.val; \
+ } \
+ RESULT.ids = NULL; \
+ } else { \
+ RESULT = union_expr(LHS, RHS); \
+ }
+
+%}
+%%
+
+start: if_expr
+{
+ if (compute_ids)
+ ctx->ids = ids__union($1.ids, ctx->ids);
+
+ if (final_val)
+ *final_val = $1.val;
+}
+;
+
+if_expr: expr IF expr ELSE if_expr
+{
+ if (fpclassify($3.val) == FP_ZERO) {
+ /*
+ * The IF expression evaluated to 0 so treat as false, take the
+ * ELSE and discard everything else.
+ */
+ $$.val = $5.val;
+ $$.ids = $5.ids;
+ ids__free($1.ids);
+ ids__free($3.ids);
+ } else if (!compute_ids || is_const($3.val)) {
+ /*
+ * If ids aren't computed then treat the expression as true. If
+ * ids are being computed and the IF expr is a non-zero
+ * constant, then also evaluate the true case.
+ */
+ $$.val = $1.val;
+ $$.ids = $1.ids;
+ ids__free($3.ids);
+ ids__free($5.ids);
+ } else if ($1.val == $5.val) {
+ /*
+ * LHS == RHS, so both are an identical constant. No need to
+ * evaluate any events.
+ */
+ $$.val = $1.val;
+ $$.ids = NULL;
+ ids__free($1.ids);
+ ids__free($3.ids);
+ ids__free($5.ids);
+ } else {
+ /*
+ * Value is either the LHS or RHS and we need the IF expression
+ * to compute it.
+ */
+ $$ = union_expr($1, union_expr($3, $5));
+ }
+}
+| expr
+;
+
+expr: NUMBER
+{
+ $$.val = $1;
+ $$.ids = NULL;
+}
+| ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); }
+| SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); }
+| HAS_EVENT '(' ID ')'
+{
+ $$.val = expr__has_event(ctx, compute_ids, $3);
+ $$.ids = NULL;
+ free($3);
+}
+| STRCMP_CPUID_STR '(' ID ')'
+{
+ $$.val = expr__strcmp_cpuid_str(ctx, compute_ids, $3);
+ $$.ids = NULL;
+ free($3);
+}
+| expr '|' expr
+{
+ if (is_const($1.val) && is_const($3.val)) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.ids = NULL;
+ $$.val = (fpclassify($1.val) == FP_ZERO && fpclassify($3.val) == FP_ZERO) ? 0 : 1;
+ } else if (is_const($1.val)) {
+ assert($1.ids == NULL);
+ if (fpclassify($1.val) == FP_ZERO) {
+ $$ = $3;
+ } else {
+ $$.val = 1;
+ $$.ids = NULL;
+ ids__free($3.ids);
+ }
+ } else if (is_const($3.val)) {
+ assert($3.ids == NULL);
+ if (fpclassify($3.val) == FP_ZERO) {
+ $$ = $1;
+ } else {
+ $$.val = 1;
+ $$.ids = NULL;
+ ids__free($1.ids);
+ }
+ } else {
+ $$ = union_expr($1, $3);
+ }
+}
+| expr '&' expr
+{
+ if (is_const($1.val) && is_const($3.val)) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.val = (fpclassify($1.val) != FP_ZERO && fpclassify($3.val) != FP_ZERO) ? 1 : 0;
+ $$.ids = NULL;
+ } else if (is_const($1.val)) {
+ assert($1.ids == NULL);
+ if (fpclassify($1.val) != FP_ZERO) {
+ $$ = $3;
+ } else {
+ $$.val = 0;
+ $$.ids = NULL;
+ ids__free($3.ids);
+ }
+ } else if (is_const($3.val)) {
+ assert($3.ids == NULL);
+ if (fpclassify($3.val) != FP_ZERO) {
+ $$ = $1;
+ } else {
+ $$.val = 0;
+ $$.ids = NULL;
+ ids__free($1.ids);
+ }
+ } else {
+ $$ = union_expr($1, $3);
+ }
+}
+| expr '^' expr
+{
+ if (is_const($1.val) && is_const($3.val)) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.val = (fpclassify($1.val) == FP_ZERO) != (fpclassify($3.val) == FP_ZERO) ? 1 : 0;
+ $$.ids = NULL;
+ } else {
+ $$ = union_expr($1, $3);
+ }
+}
+| expr '<' expr { BINARY_OP($$, <, $1, $3); }
+| expr '>' expr { BINARY_OP($$, >, $1, $3); }
+| expr '+' expr { BINARY_OP($$, +, $1, $3); }
+| expr '-' expr { BINARY_OP($$, -, $1, $3); }
+| expr '*' expr { BINARY_OP($$, *, $1, $3); }
+| expr '/' expr
+{
+ if (fpclassify($3.val) == FP_ZERO) {
+ pr_debug("division by zero\n");
+ assert($3.ids == NULL);
+ if (compute_ids)
+ ids__free($1.ids);
+ $$.val = NAN;
+ $$.ids = NULL;
+ } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.val = $1.val / $3.val;
+ $$.ids = NULL;
+ } else {
+ /* LHS and/or RHS need computing from event IDs so union. */
+ $$ = union_expr($1, $3);
+ }
+}
+| expr '%' expr
+{
+ if (fpclassify($3.val) == FP_ZERO) {
+ pr_debug("division by zero\n");
+ YYABORT;
+ } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.val = (long)$1.val % (long)$3.val;
+ $$.ids = NULL;
+ } else {
+ /* LHS and/or RHS need computing from event IDs so union. */
+ $$ = union_expr($1, $3);
+ }
+}
+| D_RATIO '(' expr ',' expr ')'
+{
+ if (fpclassify($5.val) == FP_ZERO) {
+ /*
+ * Division by constant zero always yields zero and no events
+ * are necessary.
+ */
+ assert($5.ids == NULL);
+ $$.val = 0.0;
+ $$.ids = NULL;
+ ids__free($3.ids);
+ } else if (!compute_ids || (is_const($3.val) && is_const($5.val))) {
+ assert($3.ids == NULL);
+ assert($5.ids == NULL);
+ $$.val = $3.val / $5.val;
+ $$.ids = NULL;
+ } else {
+ /* LHS and/or RHS need computing from event IDs so union. */
+ $$ = union_expr($3, $5);
+ }
+}
+| '-' expr %prec NEG
+{
+ $$.val = -$2.val;
+ $$.ids = $2.ids;
+}
+| '(' if_expr ')'
+{
+ $$ = $2;
+}
+| MIN '(' expr ',' expr ')'
+{
+ if (!compute_ids) {
+ $$.val = $3.val < $5.val ? $3.val : $5.val;
+ $$.ids = NULL;
+ } else {
+ $$ = union_expr($3, $5);
+ }
+}
+| MAX '(' expr ',' expr ')'
+{
+ if (!compute_ids) {
+ $$.val = $3.val > $5.val ? $3.val : $5.val;
+ $$.ids = NULL;
+ } else {
+ $$ = union_expr($3, $5);
+ }
+}
+| LITERAL
+{
+ $$.val = $1;
+ $$.ids = NULL;
+}
+;
+
+%%
diff --git a/tools/perf/util/find-map.c b/tools/perf/util/find-map.c
new file mode 100644
index 000000000..7b2300588
--- /dev/null
+++ b/tools/perf/util/find-map.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+static int find_map(void **start, void **end, const char *name)
+{
+ FILE *maps;
+ char line[128];
+ int found = 0;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ fprintf(stderr, "cannot open maps\n");
+ return -1;
+ }
+
+ while (!found && fgets(line, sizeof(line), maps)) {
+ int m = -1;
+
+ /* We care only about private r-x mappings. */
+ if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n",
+ start, end, &m))
+ continue;
+ if (m < 0)
+ continue;
+
+ if (!strncmp(&line[m], name, strlen(name)))
+ found = 1;
+ }
+
+ fclose(maps);
+ return !found;
+}
diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c
new file mode 100644
index 000000000..6225cbc52
--- /dev/null
+++ b/tools/perf/util/fncache.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Manage a cache of file names' existence */
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/list.h>
+#include "fncache.h"
+
+struct fncache {
+ struct hlist_node nd;
+ bool res;
+ char name[];
+};
+
+#define FNHSIZE 61
+
+static struct hlist_head fncache_hash[FNHSIZE];
+
+unsigned shash(const unsigned char *s)
+{
+ unsigned h = 0;
+ while (*s)
+ h = 65599 * h + *s++;
+ return h ^ (h >> 16);
+}
+
+static bool lookup_fncache(const char *name, bool *res)
+{
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+ struct fncache *n;
+
+ hlist_for_each_entry(n, &fncache_hash[h], nd) {
+ if (!strcmp(n->name, name)) {
+ *res = n->res;
+ return true;
+ }
+ }
+ return false;
+}
+
+static void update_fncache(const char *name, bool res)
+{
+ struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1);
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+
+ if (!n)
+ return;
+ strcpy(n->name, name);
+ n->res = res;
+ hlist_add_head(&n->nd, &fncache_hash[h]);
+}
+
+/* No LRU, only use when bounded in some other way. */
+bool file_available(const char *name)
+{
+ bool res;
+
+ if (lookup_fncache(name, &res))
+ return res;
+ res = access(name, R_OK) == 0;
+ update_fncache(name, res);
+ return res;
+}
diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h
new file mode 100644
index 000000000..fe020beae
--- /dev/null
+++ b/tools/perf/util/fncache.h
@@ -0,0 +1,7 @@
+#ifndef _FCACHE_H
+#define _FCACHE_H 1
+
+unsigned shash(const unsigned char *s);
+bool file_available(const char *name);
+
+#endif
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
new file mode 100644
index 000000000..558efcb98
--- /dev/null
+++ b/tools/perf/util/ftrace.h
@@ -0,0 +1,81 @@
+#ifndef __PERF_FTRACE_H__
+#define __PERF_FTRACE_H__
+
+#include <linux/list.h>
+
+#include "target.h"
+
+struct evlist;
+
+struct perf_ftrace {
+ struct evlist *evlist;
+ struct target target;
+ const char *tracer;
+ struct list_head filters;
+ struct list_head notrace;
+ struct list_head graph_funcs;
+ struct list_head nograph_funcs;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ bool use_nsec;
+ int graph_depth;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+};
+
+struct filter_entry {
+ struct list_head list;
+ char name[];
+};
+
+#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+ int buckets[]);
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[] __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_FTRACE_H__ */
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
new file mode 100644
index 000000000..ac17a3cb5
--- /dev/null
+++ b/tools/perf/util/genelf.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ * Stephane Eranian <eranian@gmail.com>
+ */
+
+#include <sys/types.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <err.h>
+#ifdef HAVE_DWARF_SUPPORT
+#include <dwarf.h>
+#endif
+
+#include "genelf.h"
+#include "../util/jitdump.h"
+#include <linux/compiler.h>
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO_SUPPORT
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/evp.h>
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+ unsigned int namesz; /* Size of entry's owner string */
+ unsigned int descsz; /* Size of the note descriptor */
+ unsigned int type; /* Interpretation of the descriptor */
+ char name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+ char *output;
+ int fd;
+};
+
+static char shd_string_table[] = {
+ 0,
+ '.', 't', 'e', 'x', 't', 0, /* 1 */
+ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /* 7 */
+ '.', 's', 'y', 'm', 't', 'a', 'b', 0, /* 17 */
+ '.', 's', 't', 'r', 't', 'a', 'b', 0, /* 25 */
+ '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+ '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+ '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+ '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+ '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', '_', 'h', 'd', 'r', 0, /* 90 */
+ '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', 0, /* 104 */
+};
+
+static struct buildid_note {
+ Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */
+ char name[4]; /* GNU\0 */
+ char build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+ /* symbol 0 MUST be the undefined symbol */
+ { .st_name = 0, /* index in sym_string table */
+ .st_info = ELF_ST_TYPE(STT_NOTYPE),
+ .st_shndx = 0, /* for now */
+ .st_value = 0x0,
+ .st_other = ELF_ST_VIS(STV_DEFAULT),
+ .st_size = 0,
+ },
+ { .st_name = 1, /* index in sym_string table */
+ .st_info = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+ .st_shndx = 1,
+ .st_value = 0, /* for now */
+ .st_other = ELF_ST_VIS(STV_DEFAULT),
+ .st_size = 0, /* for now */
+ }
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+ unsigned long load_addr __maybe_unused,
+ const void *code __maybe_unused,
+ size_t csize __maybe_unused)
+{
+ int fd;
+ size_t sz = sizeof(note->build_id);
+ ssize_t sret;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd == -1)
+ err(1, "cannot access /dev/urandom for buildid");
+
+ sret = read(fd, note->build_id, sz);
+
+ close(fd);
+
+ if (sret != (ssize_t)sz)
+ memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+ unsigned long load_addr __maybe_unused,
+ const void *code,
+ size_t csize)
+{
+ if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+ errx(1, "build_id too small for SHA1");
+
+ SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+ EVP_MD_CTX *mdctx;
+
+ if (sizeof(note->build_id) < 16)
+ errx(1, "build_id too small for MD5");
+
+ mdctx = EVP_MD_CTX_new();
+ if (!mdctx)
+ errx(2, "failed to create EVP_MD_CTX");
+
+ EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
+ EVP_DigestUpdate(mdctx, &load_addr, sizeof(load_addr));
+ EVP_DigestUpdate(mdctx, code, csize);
+ EVP_DigestFinal_ex(mdctx, (unsigned char *)note->build_id, NULL);
+ EVP_MD_CTX_free(mdctx);
+}
+#endif
+
+static int
+jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size,
+ uint64_t unwinding_size, uint64_t base_offset)
+{
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Shdr *shdr;
+ uint64_t unwinding_table_size = unwinding_size - unwinding_header_size;
+
+ /*
+ * setup eh_frame section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 8;
+ d->d_off = 0LL;
+ d->d_buf = unwinding;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = unwinding_table_size;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 104;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = base_offset;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup eh_frame_hdr section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 4;
+ d->d_off = 0LL;
+ d->d_buf = unwinding + unwinding_table_size;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = unwinding_header_size;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 90;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = base_offset + unwinding_table_size;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ return 0;
+}
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+ const void *code, int csize,
+ void *debug __maybe_unused, int nr_debug_entries __maybe_unused,
+ void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size)
+{
+ Elf *e;
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Ehdr *ehdr;
+ Elf_Phdr *phdr;
+ Elf_Shdr *shdr;
+ uint64_t eh_frame_base_offset;
+ char *strsym = NULL;
+ int symlen;
+ int retval = -1;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ warnx("ELF initialization failed");
+ return -1;
+ }
+
+ e = elf_begin(fd, ELF_C_WRITE, NULL);
+ if (!e) {
+ warnx("elf_begin failed");
+ goto error;
+ }
+
+ /*
+ * setup ELF header
+ */
+ ehdr = elf_newehdr(e);
+ if (!ehdr) {
+ warnx("cannot get ehdr");
+ goto error;
+ }
+
+ ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+ ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+ ehdr->e_machine = GEN_ELF_ARCH;
+ ehdr->e_type = ET_DYN;
+ ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
+
+ /*
+ * setup program header
+ */
+ phdr = elf_newphdr(e, 1);
+ phdr[0].p_type = PT_LOAD;
+ phdr[0].p_offset = GEN_ELF_TEXT_OFFSET;
+ phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET;
+ phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET;
+ phdr[0].p_filesz = csize;
+ phdr[0].p_memsz = csize;
+ phdr[0].p_flags = PF_X | PF_R;
+ phdr[0].p_align = 8;
+
+ /*
+ * setup text section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 16;
+ d->d_off = 0LL;
+ d->d_buf = (void *)code;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = csize;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 1;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+ shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ /*
+ * Setup .eh_frame_hdr and .eh_frame
+ */
+ if (unwinding) {
+ eh_frame_base_offset = ALIGN_8(GEN_ELF_TEXT_OFFSET + csize);
+ retval = jit_add_eh_frame_info(e, unwinding,
+ unwinding_header_size, unwinding_size,
+ eh_frame_base_offset);
+ if (retval)
+ goto error;
+ retval = -1;
+ }
+
+ /*
+ * setup section headers string table
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = shd_string_table;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = sizeof(shd_string_table);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+ shdr->sh_type = SHT_STRTAB;
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup symtab section
+ */
+ symtab[1].st_size = csize;
+ symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 8;
+ d->d_off = 0LL;
+ d->d_buf = symtab;
+ d->d_type = ELF_T_SYM;
+ d->d_size = sizeof(symtab);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+ shdr->sh_type = SHT_SYMTAB;
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = sizeof(Elf_Sym);
+ shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */
+
+ /*
+ * setup symbols string table
+ * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+ */
+ symlen = 2 + strlen(sym);
+ strsym = calloc(1, symlen);
+ if (!strsym) {
+ warnx("cannot allocate strsym");
+ goto error;
+ }
+ strcpy(strsym + 1, sym);
+
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = strsym;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = symlen;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 25; /* offset in shd_string_table */
+ shdr->sh_type = SHT_STRTAB;
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup build-id section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ /*
+ * build-id generation
+ */
+ gen_build_id(&bnote, load_addr, code, csize);
+ bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+ bnote.desc.descsz = sizeof(bnote.build_id);
+ bnote.desc.type = NT_GNU_BUILD_ID;
+ strcpy(bnote.name, "GNU");
+
+ d->d_align = 4;
+ d->d_off = 0LL;
+ d->d_buf = &bnote;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = sizeof(bnote);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 33; /* offset in shd_string_table */
+ shdr->sh_type = SHT_NOTE;
+ shdr->sh_addr = 0x0;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_size = sizeof(bnote);
+ shdr->sh_entsize = 0;
+
+#ifdef HAVE_DWARF_SUPPORT
+ if (debug && nr_debug_entries) {
+ retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+ if (retval)
+ goto error;
+ } else
+#endif
+ {
+ if (elf_update(e, ELF_C_WRITE) < 0) {
+ warnx("elf_update 4 failed");
+ goto error;
+ }
+ }
+
+ retval = 0;
+error:
+ (void)elf_end(e);
+
+ free(strsym);
+
+
+ return retval;
+}
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
new file mode 100644
index 000000000..5f18d20ea
--- /dev/null
+++ b/tools/perf/util/genelf.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+#include <linux/math.h>
+
+/* genelf.c */
+int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+ const void *code, int csize, void *debug, int nr_debug_entries,
+ void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size);
+#ifdef HAVE_DWARF_SUPPORT
+/* genelf_debug.c */
+int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
+#endif
+
+#if defined(__arm__)
+#define GEN_ELF_ARCH EM_ARM
+#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH EM_AARCH64
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH EM_X86_64
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH EM_386
+#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__powerpc64__)
+#define GEN_ELF_ARCH EM_PPC64
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH EM_PPC
+#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH EM_SPARCV9
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH EM_SPARC
+#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__s390x__)
+#define GEN_ELF_ARCH EM_S390
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__riscv) && __riscv_xlen == 64
+#define GEN_ELF_ARCH EM_RISCV
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__loongarch__)
+#define GEN_ELF_ARCH EM_LOONGARCH
+#define GEN_ELF_CLASS ELFCLASS64
+#else
+#error "unsupported architecture"
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define GEN_ELF_ENDIAN ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr elf64_newehdr
+#define elf_newphdr elf64_newphdr
+#define elf_getshdr elf64_getshdr
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Phdr Elf64_Phdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr elf32_newehdr
+#define elf_newphdr elf32_newphdr
+#define elf_getshdr elf32_getshdr
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Phdr Elf32_Phdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a) ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET round_up(sizeof(Elf_Ehdr) + sizeof(Elf_Phdr), 16)
+
+#endif
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
new file mode 100644
index 000000000..8588b3e35
--- /dev/null
+++ b/tools/perf/util/genelf_debug.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ * Stephane Eranian <eranian@google.com>
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE (4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t sword;
+typedef int16_t shalf;
+typedef uint8_t ubyte;
+typedef int8_t sbyte;
+
+struct buffer_ext {
+ size_t cur_pos;
+ size_t max_sz;
+ void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+ size_t i;
+ warnx("DUMP for %s", msg);
+ for (i = 0 ; i < be->cur_pos; i++)
+ warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+ void *tmp;
+ size_t be_sz = be->max_sz;
+
+retry:
+ if ((be->cur_pos + sz) < be_sz) {
+ memcpy(be->data + be->cur_pos, addr, sz);
+ be->cur_pos += sz;
+ return 0;
+ }
+
+ if (!be_sz)
+ be_sz = BUFFER_EXT_DFL_SIZE;
+ else
+ be_sz <<= 1;
+
+ tmp = realloc(be->data, be_sz);
+ if (!tmp)
+ return -1;
+
+ be->data = tmp;
+ be->max_sz = be_sz;
+
+ goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+ be->data = NULL;
+ be->cur_pos = 0;
+ be->max_sz = 0;
+}
+
+static void
+buffer_ext_exit(struct buffer_ext *be)
+{
+ zfree(&be->data);
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+ return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+ return be->data;
+}
+
+struct debug_line_header {
+ // Not counting this field
+ uword total_length;
+ // version number (2 currently)
+ uhalf version;
+ // relative offset from next field to
+ // program statement
+ uword prolog_length;
+ ubyte minimum_instruction_length;
+ ubyte default_is_stmt;
+ // line_base - see DWARF 2 specs
+ sbyte line_base;
+ // line_range - see DWARF 2 specs
+ ubyte line_range;
+ // number of opcode + 1
+ ubyte opcode_base;
+ /* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+ /* follow the search directories index, zero terminated string
+ * terminated by an empty string.
+ */
+ /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+ * the directory index entry, 0 means current directory, then mtime
+ * and filesize, last entry is followed by en empty string.
+ */
+ /* follow the first program statement */
+} __packed;
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+ uword total_length;
+ uhalf version;
+ uword debug_abbrev_offset;
+ ubyte pointer_size;
+} __packed;
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+ .total_length = -1,
+ .version = 2,
+ .prolog_length = -1,
+ .minimum_instruction_length = 1, /* could be better when min instruction size != 1 */
+ .default_is_stmt = 1, /* we don't take care about basic block */
+ .line_base = -5, /* sensible value for line base ... */
+ .line_range = -14, /* ... and line range are guessed statically */
+ .opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+ [DW_LNS_advance_pc] = 1,
+ [DW_LNS_advance_line] = 1,
+ [DW_LNS_set_file] = 1,
+ [DW_LNS_set_column] = 1,
+ [DW_LNS_fixed_advance_pc] = 1,
+ [DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+ .total_length = -1,
+ .version = 2,
+ .debug_abbrev_offset = 0, /* we reuse the same abbrev entries for all comp unit */
+ .pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+ buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+ buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+ unsigned long data)
+{
+ do {
+ ubyte cur = data & 0x7F;
+ data >>= 7;
+ if (data)
+ cur |= 0x80;
+ buffer_ext_add(be, &cur, 1);
+ } while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+ int more = 1;
+ int negative = data < 0;
+ int size = sizeof(long) * CHAR_BIT;
+ while (more) {
+ ubyte cur = data & 0x7F;
+ data >>= 7;
+ if (negative)
+ data |= - (1 << (size - 7));
+ if ((data == 0 && !(cur & 0x40)) ||
+ (data == -1l && (cur & 0x40)))
+ more = 0;
+ else
+ cur |= 0x80;
+ buffer_ext_add(be, &cur, 1);
+ }
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+ void *data, size_t data_len)
+{
+ buffer_ext_add(be, (char *)"", 1);
+
+ emit_unsigned_LEB128(be, data_len + 1);
+
+ buffer_ext_add(be, &opcode, 1);
+ buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+ buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext *be,
+ ubyte opcode, long data)
+{
+ buffer_ext_add(be, &opcode, 1);
+ emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+ unsigned long data)
+{
+ buffer_ext_add(be, &opcode, 1);
+ emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+ emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext *be, long delta_lineno)
+{
+ emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+ emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+ emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+ const char *filename)
+{
+ buffer_ext_add(be, (void *)"", 1);
+
+ /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+ emit_unsigned_LEB128(be, strlen(filename) + 5);
+ emit_opcode(be, DW_LNE_define_file);
+ emit_string(be, filename);
+ /* directory index 0=do not know */
+ emit_unsigned_LEB128(be, 0);
+ /* last modification date on file 0=do not know */
+ emit_unsigned_LEB128(be, 0);
+ /* filesize 0=do not know */
+ emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+ void *address)
+{
+ emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+ unsigned int last_line,
+ unsigned long last_vma)
+{
+ unsigned int temp;
+ unsigned long delta_addr;
+
+ /*
+ * delta from line_base
+ */
+ temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+ if (temp >= default_debug_line_header.line_range)
+ return 0;
+
+ /*
+ * delta of addresses
+ */
+ delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+ /* This is not sufficient to ensure opcode will be in [0-256] but
+ * sufficient to ensure when summing with the delta lineno we will
+ * not overflow the unsigned long opcode */
+
+ if (delta_addr <= 256 / default_debug_line_header.line_range) {
+ unsigned long opcode = temp +
+ (delta_addr * default_debug_line_header.line_range) +
+ default_debug_line_header.opcode_base;
+
+ return opcode <= 255 ? opcode : 0;
+ }
+ return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+ struct debug_entry *ent, size_t nr_entry,
+ unsigned long code_addr)
+{
+ size_t i;
+
+ /* as described in the jitdump format */
+ const char repeated_name_marker[] = {'\xff', '\0'};
+
+ /*
+ * Machine state at start of a statement program
+ * address = 0
+ * file = 1
+ * line = 1
+ * column = 0
+ * is_stmt = default_is_stmt as given in the debug_line_header
+ * basic block = 0
+ * end sequence = 0
+ */
+
+ /* start state of the state machine we take care of */
+ unsigned long last_vma = 0;
+ char const *cur_filename = NULL;
+ unsigned long cur_file_idx = 0;
+ int last_line = 1;
+
+ emit_lne_set_address(be, (void *)code_addr);
+
+ for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+ int need_copy = 0;
+ ubyte special_opcode;
+
+ /*
+ * check if filename changed, if so add it
+ */
+ if ((!cur_filename || strcmp(cur_filename, ent->name)) &&
+ strcmp(repeated_name_marker, ent->name)) {
+ emit_lne_define_filename(be, ent->name);
+ cur_filename = ent->name;
+ emit_set_file(be, ++cur_file_idx);
+ need_copy = 1;
+ }
+
+ special_opcode = get_special_opcode(ent, last_line, last_vma);
+ if (special_opcode != 0) {
+ last_line = ent->lineno;
+ last_vma = ent->addr;
+ emit_opcode(be, special_opcode);
+ } else {
+ /*
+ * lines differ, emit line delta
+ */
+ if (last_line != ent->lineno) {
+ emit_advance_lineno(be, ent->lineno - last_line);
+ last_line = ent->lineno;
+ need_copy = 1;
+ }
+ /*
+ * addresses differ, emit address delta
+ */
+ if (last_vma != ent->addr) {
+ emit_advance_pc(be, ent->addr - last_vma);
+ last_vma = ent->addr;
+ need_copy = 1;
+ }
+ /*
+ * add new row to matrix
+ */
+ if (need_copy)
+ emit_opcode(be, DW_LNS_copy);
+ }
+ }
+}
+
+static void add_debug_line(struct buffer_ext *be,
+ struct debug_entry *ent, size_t nr_entry,
+ unsigned long code_addr)
+{
+ struct debug_line_header * dbg_header;
+ size_t old_size;
+
+ old_size = buffer_ext_size(be);
+
+ buffer_ext_add(be, (void *)&default_debug_line_header,
+ sizeof(default_debug_line_header));
+
+ buffer_ext_add(be, &standard_opcode_length, sizeof(standard_opcode_length));
+
+ // empty directory entry
+ buffer_ext_add(be, (void *)"", 1);
+
+ // empty filename directory
+ buffer_ext_add(be, (void *)"", 1);
+
+ dbg_header = buffer_ext_addr(be) + old_size;
+ dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+ offsetof(struct debug_line_header, minimum_instruction_length);
+
+ emit_lineno_info(be, ent, nr_entry, code_addr);
+
+ emit_lne_end_of_sequence(be);
+
+ dbg_header = buffer_ext_addr(be) + old_size;
+ dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+ offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+ emit_unsigned_LEB128(be, 1);
+ emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+ emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+ emit_unsigned_LEB128(be, DW_AT_stmt_list);
+ emit_unsigned_LEB128(be, DW_FORM_data4);
+ emit_unsigned_LEB128(be, 0);
+ emit_unsigned_LEB128(be, 0);
+ emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+ size_t offset_debug_line)
+{
+ struct compilation_unit_header *comp_unit_header;
+ size_t old_size = buffer_ext_size(be);
+
+ buffer_ext_add(be, &default_comp_unit_header,
+ sizeof(default_comp_unit_header));
+
+ emit_unsigned_LEB128(be, 1);
+ emit_uword(be, offset_debug_line);
+
+ comp_unit_header = buffer_ext_addr(be) + old_size;
+ comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+ offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+ void *debug, int nr_debug_entries,
+ struct buffer_ext *dl,
+ struct buffer_ext *da,
+ struct buffer_ext *di)
+{
+ struct debug_entry *ent = debug;
+ int i;
+
+ for (i = 0; i < nr_debug_entries; i++) {
+ ent->addr = ent->addr - code_addr;
+ ent = debug_entry_next(ent);
+ }
+ add_compilation_unit(di, buffer_ext_size(dl));
+ add_debug_line(dl, debug, nr_debug_entries, GEN_ELF_TEXT_OFFSET);
+ add_debug_abbrev(da);
+ if (0) buffer_ext_dump(da, "abbrev");
+
+ return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Shdr *shdr;
+ struct buffer_ext dl, di, da;
+ int ret = -1;
+
+ buffer_ext_init(&dl);
+ buffer_ext_init(&di);
+ buffer_ext_init(&da);
+
+ if (jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di))
+ goto out;
+
+ /*
+ * setup .debug_line section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto out;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto out;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = buffer_ext_addr(&dl);
+ d->d_type = ELF_T_BYTE;
+ d->d_size = buffer_ext_size(&dl);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto out;
+ }
+
+ shdr->sh_name = 52; /* .debug_line */
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup .debug_info section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto out;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto out;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = buffer_ext_addr(&di);
+ d->d_type = ELF_T_BYTE;
+ d->d_size = buffer_ext_size(&di);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto out;
+ }
+
+ shdr->sh_name = 64; /* .debug_info */
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup .debug_abbrev section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto out;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto out;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = buffer_ext_addr(&da);
+ d->d_type = ELF_T_BYTE;
+ d->d_size = buffer_ext_size(&da);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto out;
+ }
+
+ shdr->sh_name = 76; /* .debug_info */
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * now we update the ELF image with all the sections
+ */
+ if (elf_update(e, ELF_C_WRITE) < 0)
+ warnx("elf_update debug failed");
+ else
+ ret = 0;
+
+out:
+ buffer_ext_exit(&dl);
+ buffer_ext_exit(&di);
+ buffer_ext_exit(&da);
+ return ret;
+}
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
new file mode 100755
index 000000000..1b5140e5c
--- /dev/null
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -0,0 +1,70 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+ char name[16];
+ char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+
+echo "#ifdef HAVE_LIBELF_SUPPORT"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* full.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
+
+echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */"
+
+echo "#ifdef HAVE_LIBTRACEEVENT"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBTRACEEVENT */"
+echo "};"
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
new file mode 100644
index 000000000..e68935e9a
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+#include "get_current_dir_name.h"
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Android's 'bionic' library, for one, doesn't have this */
+
+char *get_current_dir_name(void)
+{
+ char pwd[PATH_MAX];
+
+ return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
+}
+#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/get_current_dir_name.h b/tools/perf/util/get_current_dir_name.h
new file mode 100644
index 000000000..69f7d5537
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.h
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef __PERF_GET_CURRENT_DIR_NAME_H
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+char *get_current_dir_name(void);
+#endif // HAVE_GET_CURRENT_DIR_NAME
+#endif // __PERF_GET_CURRENT_DIR_NAME_H
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
new file mode 100644
index 000000000..140ee4055
--- /dev/null
+++ b/tools/perf/util/hashmap.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ entry->next = *pprev;
+ *pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ *pprev = entry->next;
+ entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx)
+{
+ map->hash_fn = hash_fn;
+ map->equal_fn = equal_fn;
+ map->ctx = ctx;
+
+ map->buckets = NULL;
+ map->cap = 0;
+ map->cap_bits = 0;
+ map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx)
+{
+ struct hashmap *map = malloc(sizeof(struct hashmap));
+
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+ hashmap__init(map, hash_fn, equal_fn, ctx);
+ return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ free(cur);
+ }
+ free(map->buckets);
+ map->buckets = NULL;
+ map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+ if (IS_ERR_OR_NULL(map))
+ return;
+
+ hashmap__clear(map);
+ free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+ return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+ return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+ /* grow if empty or more than 75% filled */
+ return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+ struct hashmap_entry **new_buckets;
+ struct hashmap_entry *cur, *tmp;
+ size_t new_cap_bits, new_cap;
+ size_t h, bkt;
+
+ new_cap_bits = map->cap_bits + 1;
+ if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+ new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+ new_cap = 1UL << new_cap_bits;
+ new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+ if (!new_buckets)
+ return -ENOMEM;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+ hashmap_add_entry(&new_buckets[h], cur);
+ }
+
+ map->cap = new_cap;
+ map->cap_bits = new_cap_bits;
+ free(map->buckets);
+ map->buckets = new_buckets;
+
+ return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+ const long key, size_t hash,
+ struct hashmap_entry ***pprev,
+ struct hashmap_entry **entry)
+{
+ struct hashmap_entry *cur, **prev_ptr;
+
+ if (!map->buckets)
+ return false;
+
+ for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+ cur;
+ prev_ptr = &cur->next, cur = cur->next) {
+ if (map->equal_fn(cur->key, key, map->ctx)) {
+ if (pprev)
+ *pprev = prev_ptr;
+ *entry = cur;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+ int err;
+
+ if (old_key)
+ *old_key = 0;
+ if (old_value)
+ *old_value = 0;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (strategy != HASHMAP_APPEND &&
+ hashmap_find_entry(map, key, h, NULL, &entry)) {
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+ entry->key = key;
+ entry->value = value;
+ return 0;
+ } else if (strategy == HASHMAP_ADD) {
+ return -EEXIST;
+ }
+ }
+
+ if (strategy == HASHMAP_UPDATE)
+ return -ENOENT;
+
+ if (hashmap_needs_to_grow(map)) {
+ err = hashmap_grow(map);
+ if (err)
+ return err;
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ }
+
+ entry = malloc(sizeof(struct hashmap_entry));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key = key;
+ entry->value = value;
+ hashmap_add_entry(&map->buckets[h], entry);
+ map->sz++;
+
+ return 0;
+}
+
+bool hashmap_find(const struct hashmap *map, long key, long *value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, NULL, &entry))
+ return false;
+
+ if (value)
+ *value = entry->value;
+ return true;
+}
+
+bool hashmap_delete(struct hashmap *map, long key,
+ long *old_key, long *old_value)
+{
+ struct hashmap_entry **pprev, *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+ return false;
+
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ hashmap_del_entry(pprev, entry);
+ free(entry);
+ map->sz--;
+
+ return true;
+}
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
new file mode 100644
index 000000000..c12f8320e
--- /dev/null
+++ b/tools/perf/util/hashmap.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <limits.h>
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+ /* shuffle bits and return requested number of upper bits */
+ if (bits == 0)
+ return 0;
+
+#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
+ /* LP64 case */
+ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
+#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__)
+ return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits);
+#else
+# error "Unsupported size_t size"
+#endif
+}
+
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+ size_t h = 0;
+
+ while (*s) {
+ h = h * 31 + *s;
+ s++;
+ }
+ return h;
+}
+
+typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
+typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);
+
+/*
+ * Hashmap interface is polymorphic, keys and values could be either
+ * long-sized integers or pointers, this is achieved as follows:
+ * - interface functions that operate on keys and values are hidden
+ * behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
+ * - these auxiliary macros cast the key and value parameters as
+ * long or long *, so the user does not have to specify the casts explicitly;
+ * - for pointer parameters (e.g. old_key) the size of the pointed
+ * type is verified by hashmap_cast_ptr using _Static_assert;
+ * - when iterating using hashmap__for_each_* forms
+ * hasmap_entry->key should be used for integer keys and
+ * hasmap_entry->pkey should be used for pointer keys,
+ * same goes for values.
+ */
+struct hashmap_entry {
+ union {
+ long key;
+ const void *pkey;
+ };
+ union {
+ long value;
+ void *pvalue;
+ };
+ struct hashmap_entry *next;
+};
+
+struct hashmap {
+ hashmap_hash_fn hash_fn;
+ hashmap_equal_fn equal_fn;
+ void *ctx;
+
+ struct hashmap_entry **buckets;
+ size_t cap;
+ size_t cap_bits;
+ size_t sz;
+};
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ * update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ * nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ * This turns hashmap into a multimap by allowing multiple values to be
+ * associated with the same key. Most useful read API for such hashmap is
+ * hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ * used, it will return last inserted key/value entry (first in a bucket
+ * chain).
+ */
+enum hashmap_insert_strategy {
+ HASHMAP_ADD,
+ HASHMAP_SET,
+ HASHMAP_UPDATE,
+ HASHMAP_APPEND,
+};
+
+#define hashmap_cast_ptr(p) ({ \
+ _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \
+ sizeof(*(p)) == sizeof(long), \
+ #p " pointee should be a long-sized integer or a pointer"); \
+ (long *)(p); \
+})
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap_insert(struct hashmap *map, long key, long value,
+ enum hashmap_insert_strategy strategy,
+ long *old_key, long *old_value);
+
+#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
+ hashmap_insert((map), (long)(key), (long)(value), (strategy), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
+
+#define hashmap__add(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)
+
+#define hashmap__set(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))
+
+#define hashmap__update(map, key, value, old_key, old_value) \
+ hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))
+
+#define hashmap__append(map, key, value) \
+ hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)
+
+bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
+
+#define hashmap__delete(map, key, old_key, old_value) \
+ hashmap_delete((map), (long)(key), \
+ hashmap_cast_ptr(old_key), \
+ hashmap_cast_ptr(old_value))
+
+bool hashmap_find(const struct hashmap *map, long key, long *value);
+
+#define hashmap__find(map, key, value) \
+ hashmap_find((map), (long)(key), hashmap_cast_ptr(value))
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; \
+ cur && ({tmp = cur->next; true; }); \
+ cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key) \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
+ cur; \
+ cur = cur->next) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
+ cur && ({ tmp = cur->next; true; }); \
+ cur = tmp) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
new file mode 100644
index 000000000..1482567e5
--- /dev/null
+++ b/tools/perf/util/header.c
@@ -0,0 +1,4523 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include "string2.h"
+#include <sys/param.h>
+#include <sys/types.h>
+#include <byteswap.h>
+#include <unistd.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/zalloc.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <linux/time64.h>
+#include <dirent.h>
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/libbpf.h>
+#endif
+#include <perf/cpumap.h>
+#include <tools/libc_compat.h> // reallocarray
+
+#include "dso.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util/evsel_fprintf.h"
+#include "header.h"
+#include "memswap.h"
+#include "trace-event.h"
+#include "session.h"
+#include "symbol.h"
+#include "debug.h"
+#include "cpumap.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "vdso.h"
+#include "strbuf.h"
+#include "build-id.h"
+#include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
+#include "tool.h"
+#include "time-utils.h"
+#include "units.h"
+#include "util/util.h" // perf_exe()
+#include "cputopo.h"
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include "clockid.h"
+
+#include <linux/ctype.h>
+#include <internal/lib.h>
+
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+/*
+ * magic2 = "PERFILE2"
+ * must be a numerical value to let the endianness
+ * determine the memory layout. That way we are able
+ * to detect endianness when reading the perf.data file
+ * back.
+ *
+ * we check for legacy (PERFFILE) format.
+ */
+static const char *__perf_magic1 = "PERFFILE";
+static const u64 __perf_magic2 = 0x32454c4946524550ULL;
+static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
+
+#define PERF_MAGIC __perf_magic2
+
+const char perf_version_string[] = PERF_VERSION;
+
+struct perf_file_attr {
+ struct perf_event_attr attr;
+ struct perf_file_section ids;
+};
+
+void perf_header__set_feat(struct perf_header *header, int feat)
+{
+ __set_bit(feat, header->adds_features);
+}
+
+void perf_header__clear_feat(struct perf_header *header, int feat)
+{
+ __clear_bit(feat, header->adds_features);
+}
+
+bool perf_header__has_feat(const struct perf_header *header, int feat)
+{
+ return test_bit(feat, header->adds_features);
+}
+
+static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
+{
+ ssize_t ret = writen(ff->fd, buf, size);
+
+ if (ret != (ssize_t)size)
+ return ret < 0 ? (int)ret : -1;
+ return 0;
+}
+
+static int __do_write_buf(struct feat_fd *ff, const void *buf, size_t size)
+{
+ /* struct perf_event_header::size is u16 */
+ const size_t max_size = 0xffff - sizeof(struct perf_event_header);
+ size_t new_size = ff->size;
+ void *addr;
+
+ if (size + ff->offset > max_size)
+ return -E2BIG;
+
+ while (size > (new_size - ff->offset))
+ new_size <<= 1;
+ new_size = min(max_size, new_size);
+
+ if (ff->size < new_size) {
+ addr = realloc(ff->buf, new_size);
+ if (!addr)
+ return -ENOMEM;
+ ff->buf = addr;
+ ff->size = new_size;
+ }
+
+ memcpy(ff->buf + ff->offset, buf, size);
+ ff->offset += size;
+
+ return 0;
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+int do_write(struct feat_fd *ff, const void *buf, size_t size)
+{
+ if (!ff->buf)
+ return __do_write_fd(ff, buf, size);
+ return __do_write_buf(ff, buf, size);
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
+{
+ u64 *p = (u64 *) set;
+ int i, ret;
+
+ ret = do_write(ff, &size, sizeof(size));
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_write(ff, p + i, sizeof(*p));
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+int write_padded(struct feat_fd *ff, const void *bf,
+ size_t count, size_t count_aligned)
+{
+ static const char zero_buf[NAME_ALIGN];
+ int err = do_write(ff, bf, count);
+
+ if (!err)
+ err = do_write(ff, zero_buf, count_aligned - count);
+
+ return err;
+}
+
+#define string_size(str) \
+ (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32))
+
+/* Return: 0 if succeeded, -ERR if failed. */
+static int do_write_string(struct feat_fd *ff, const char *str)
+{
+ u32 len, olen;
+ int ret;
+
+ olen = strlen(str) + 1;
+ len = PERF_ALIGN(olen, NAME_ALIGN);
+
+ /* write len, incl. \0 */
+ ret = do_write(ff, &len, sizeof(len));
+ if (ret < 0)
+ return ret;
+
+ return write_padded(ff, str, olen, len);
+}
+
+static int __do_read_fd(struct feat_fd *ff, void *addr, ssize_t size)
+{
+ ssize_t ret = readn(ff->fd, addr, size);
+
+ if (ret != size)
+ return ret < 0 ? (int)ret : -1;
+ return 0;
+}
+
+static int __do_read_buf(struct feat_fd *ff, void *addr, ssize_t size)
+{
+ if (size > (ssize_t)ff->size - ff->offset)
+ return -1;
+
+ memcpy(addr, ff->buf + ff->offset, size);
+ ff->offset += size;
+
+ return 0;
+
+}
+
+static int __do_read(struct feat_fd *ff, void *addr, ssize_t size)
+{
+ if (!ff->buf)
+ return __do_read_fd(ff, addr, size);
+ return __do_read_buf(ff, addr, size);
+}
+
+static int do_read_u32(struct feat_fd *ff, u32 *addr)
+{
+ int ret;
+
+ ret = __do_read(ff, addr, sizeof(*addr));
+ if (ret)
+ return ret;
+
+ if (ff->ph->needs_swap)
+ *addr = bswap_32(*addr);
+ return 0;
+}
+
+static int do_read_u64(struct feat_fd *ff, u64 *addr)
+{
+ int ret;
+
+ ret = __do_read(ff, addr, sizeof(*addr));
+ if (ret)
+ return ret;
+
+ if (ff->ph->needs_swap)
+ *addr = bswap_64(*addr);
+ return 0;
+}
+
+static char *do_read_string(struct feat_fd *ff)
+{
+ u32 len;
+ char *buf;
+
+ if (do_read_u32(ff, &len))
+ return NULL;
+
+ buf = malloc(len);
+ if (!buf)
+ return NULL;
+
+ if (!__do_read(ff, buf, len)) {
+ /*
+ * strings are padded by zeroes
+ * thus the actual strlen of buf
+ * may be less than len
+ */
+ return buf;
+ }
+
+ free(buf);
+ return NULL;
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
+{
+ unsigned long *set;
+ u64 size, *p;
+ int i, ret;
+
+ ret = do_read_u64(ff, &size);
+ if (ret)
+ return ret;
+
+ set = bitmap_zalloc(size);
+ if (!set)
+ return -ENOMEM;
+
+ p = (u64 *) set;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_read_u64(ff, p + i);
+ if (ret < 0) {
+ free(set);
+ return ret;
+ }
+ }
+
+ *pset = set;
+ *psize = size;
+ return 0;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static int write_tracing_data(struct feat_fd *ff,
+ struct evlist *evlist)
+{
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ return read_tracing_data(ff->fd, &evlist->core.entries);
+}
+#endif
+
+static int write_build_id(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ if (!perf_session__read_build_ids(session, true))
+ return -1;
+
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ err = perf_session__write_buildid_table(session, ff);
+ if (err < 0) {
+ pr_debug("failed to write buildid table\n");
+ return err;
+ }
+ perf_session__cache_build_ids(session);
+
+ return 0;
+}
+
+static int write_hostname(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct utsname uts;
+ int ret;
+
+ ret = uname(&uts);
+ if (ret < 0)
+ return -1;
+
+ return do_write_string(ff, uts.nodename);
+}
+
+static int write_osrelease(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct utsname uts;
+ int ret;
+
+ ret = uname(&uts);
+ if (ret < 0)
+ return -1;
+
+ return do_write_string(ff, uts.release);
+}
+
+static int write_arch(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct utsname uts;
+ int ret;
+
+ ret = uname(&uts);
+ if (ret < 0)
+ return -1;
+
+ return do_write_string(ff, uts.machine);
+}
+
+static int write_version(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ return do_write_string(ff, perf_version_string);
+}
+
+static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc)
+{
+ FILE *file;
+ char *buf = NULL;
+ char *s, *p;
+ const char *search = cpuinfo_proc;
+ size_t len = 0;
+ int ret = -1;
+
+ if (!search)
+ return -1;
+
+ file = fopen("/proc/cpuinfo", "r");
+ if (!file)
+ return -1;
+
+ while (getline(&buf, &len, file) > 0) {
+ ret = strncmp(buf, search, strlen(search));
+ if (!ret)
+ break;
+ }
+
+ if (ret) {
+ ret = -1;
+ goto done;
+ }
+
+ s = buf;
+
+ p = strchr(buf, ':');
+ if (p && *(p+1) == ' ' && *(p+2))
+ s = p + 2;
+ p = strchr(s, '\n');
+ if (p)
+ *p = '\0';
+
+ /* squash extra space characters (branding string) */
+ p = s;
+ while (*p) {
+ if (isspace(*p)) {
+ char *r = p + 1;
+ char *q = skip_spaces(r);
+ *p = ' ';
+ if (q != (p+1))
+ while ((*r++ = *q++));
+ }
+ p++;
+ }
+ ret = do_write_string(ff, s);
+done:
+ free(buf);
+ fclose(file);
+ return ret;
+}
+
+static int write_cpudesc(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+#if defined(__powerpc__) || defined(__hppa__) || defined(__sparc__)
+#define CPUINFO_PROC { "cpu", }
+#elif defined(__s390__)
+#define CPUINFO_PROC { "vendor_id", }
+#elif defined(__sh__)
+#define CPUINFO_PROC { "cpu type", }
+#elif defined(__alpha__) || defined(__mips__)
+#define CPUINFO_PROC { "cpu model", }
+#elif defined(__arm__)
+#define CPUINFO_PROC { "model name", "Processor", }
+#elif defined(__arc__)
+#define CPUINFO_PROC { "Processor", }
+#elif defined(__xtensa__)
+#define CPUINFO_PROC { "core ID", }
+#elif defined(__loongarch__)
+#define CPUINFO_PROC { "Model Name", }
+#else
+#define CPUINFO_PROC { "model name", }
+#endif
+ const char *cpuinfo_procs[] = CPUINFO_PROC;
+#undef CPUINFO_PROC
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(cpuinfo_procs); i++) {
+ int ret;
+ ret = __write_cpudesc(ff, cpuinfo_procs[i]);
+ if (ret >= 0)
+ return ret;
+ }
+ return -1;
+}
+
+
+static int write_nrcpus(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ long nr;
+ u32 nrc, nra;
+ int ret;
+
+ nrc = cpu__max_present_cpu().cpu;
+
+ nr = sysconf(_SC_NPROCESSORS_ONLN);
+ if (nr < 0)
+ return -1;
+
+ nra = (u32)(nr & UINT_MAX);
+
+ ret = do_write(ff, &nrc, sizeof(nrc));
+ if (ret < 0)
+ return ret;
+
+ return do_write(ff, &nra, sizeof(nra));
+}
+
+static int write_event_desc(struct feat_fd *ff,
+ struct evlist *evlist)
+{
+ struct evsel *evsel;
+ u32 nre, nri, sz;
+ int ret;
+
+ nre = evlist->core.nr_entries;
+
+ /*
+ * write number of events
+ */
+ ret = do_write(ff, &nre, sizeof(nre));
+ if (ret < 0)
+ return ret;
+
+ /*
+ * size of perf_event_attr struct
+ */
+ sz = (u32)sizeof(evsel->core.attr);
+ ret = do_write(ff, &sz, sizeof(sz));
+ if (ret < 0)
+ return ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ ret = do_write(ff, &evsel->core.attr, sz);
+ if (ret < 0)
+ return ret;
+ /*
+ * write number of unique id per event
+ * there is one id per instance of an event
+ *
+ * copy into an nri to be independent of the
+ * type of ids,
+ */
+ nri = evsel->core.ids;
+ ret = do_write(ff, &nri, sizeof(nri));
+ if (ret < 0)
+ return ret;
+
+ /*
+ * write event string as passed on cmdline
+ */
+ ret = do_write_string(ff, evsel__name(evsel));
+ if (ret < 0)
+ return ret;
+ /*
+ * write unique ids for this event
+ */
+ ret = do_write(ff, evsel->core.id, evsel->core.ids * sizeof(u64));
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int write_cmdline(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ char pbuf[MAXPATHLEN], *buf;
+ int i, ret, n;
+
+ /* actual path to perf binary */
+ buf = perf_exe(pbuf, MAXPATHLEN);
+
+ /* account for binary path */
+ n = perf_env.nr_cmdline + 1;
+
+ ret = do_write(ff, &n, sizeof(n));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, buf);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0 ; i < perf_env.nr_cmdline; i++) {
+ ret = do_write_string(ff, perf_env.cmdline_argv[i]);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+
+static int write_cpu_topology(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct cpu_topology *tp;
+ u32 i;
+ int ret, j;
+
+ tp = cpu_topology__new();
+ if (!tp)
+ return -1;
+
+ ret = do_write(ff, &tp->package_cpus_lists, sizeof(tp->package_cpus_lists));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < tp->package_cpus_lists; i++) {
+ ret = do_write_string(ff, tp->package_cpus_list[i]);
+ if (ret < 0)
+ goto done;
+ }
+ ret = do_write(ff, &tp->core_cpus_lists, sizeof(tp->core_cpus_lists));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < tp->core_cpus_lists; i++) {
+ ret = do_write_string(ff, tp->core_cpus_list[i]);
+ if (ret < 0)
+ break;
+ }
+
+ ret = perf_env__read_cpu_topology_map(&perf_env);
+ if (ret < 0)
+ goto done;
+
+ for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+ ret = do_write(ff, &perf_env.cpu[j].core_id,
+ sizeof(perf_env.cpu[j].core_id));
+ if (ret < 0)
+ return ret;
+ ret = do_write(ff, &perf_env.cpu[j].socket_id,
+ sizeof(perf_env.cpu[j].socket_id));
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!tp->die_cpus_lists)
+ goto done;
+
+ ret = do_write(ff, &tp->die_cpus_lists, sizeof(tp->die_cpus_lists));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < tp->die_cpus_lists; i++) {
+ ret = do_write_string(ff, tp->die_cpus_list[i]);
+ if (ret < 0)
+ goto done;
+ }
+
+ for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+ ret = do_write(ff, &perf_env.cpu[j].die_id,
+ sizeof(perf_env.cpu[j].die_id));
+ if (ret < 0)
+ return ret;
+ }
+
+done:
+ cpu_topology__delete(tp);
+ return ret;
+}
+
+
+
+static int write_total_mem(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ char *buf = NULL;
+ FILE *fp;
+ size_t len = 0;
+ int ret = -1, n;
+ uint64_t mem;
+
+ fp = fopen("/proc/meminfo", "r");
+ if (!fp)
+ return -1;
+
+ while (getline(&buf, &len, fp) > 0) {
+ ret = strncmp(buf, "MemTotal:", 9);
+ if (!ret)
+ break;
+ }
+ if (!ret) {
+ n = sscanf(buf, "%*s %"PRIu64, &mem);
+ if (n == 1)
+ ret = do_write(ff, &mem, sizeof(mem));
+ } else
+ ret = -1;
+ free(buf);
+ fclose(fp);
+ return ret;
+}
+
+static int write_numa_topology(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct numa_topology *tp;
+ int ret = -1;
+ u32 i;
+
+ tp = numa_topology__new();
+ if (!tp)
+ return -ENOMEM;
+
+ ret = do_write(ff, &tp->nr, sizeof(u32));
+ if (ret < 0)
+ goto err;
+
+ for (i = 0; i < tp->nr; i++) {
+ struct numa_topology_node *n = &tp->nodes[i];
+
+ ret = do_write(ff, &n->node, sizeof(u32));
+ if (ret < 0)
+ goto err;
+
+ ret = do_write(ff, &n->mem_total, sizeof(u64));
+ if (ret)
+ goto err;
+
+ ret = do_write(ff, &n->mem_free, sizeof(u64));
+ if (ret)
+ goto err;
+
+ ret = do_write_string(ff, n->cpus);
+ if (ret < 0)
+ goto err;
+ }
+
+ ret = 0;
+
+err:
+ numa_topology__delete(tp);
+ return ret;
+}
+
+/*
+ * File format:
+ *
+ * struct pmu_mappings {
+ * u32 pmu_num;
+ * struct pmu_map {
+ * u32 type;
+ * char name[];
+ * }[pmu_num];
+ * };
+ */
+
+static int write_pmu_mappings(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *pmu = NULL;
+ u32 pmu_num = 0;
+ int ret;
+
+ /*
+ * Do a first pass to count number of pmu to avoid lseek so this
+ * works in pipe mode as well.
+ */
+ while ((pmu = perf_pmus__scan(pmu)))
+ pmu_num++;
+
+ ret = do_write(ff, &pmu_num, sizeof(pmu_num));
+ if (ret < 0)
+ return ret;
+
+ while ((pmu = perf_pmus__scan(pmu))) {
+ ret = do_write(ff, &pmu->type, sizeof(pmu->type));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, pmu->name);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * File format:
+ *
+ * struct group_descs {
+ * u32 nr_groups;
+ * struct group_desc {
+ * char name[];
+ * u32 leader_idx;
+ * u32 nr_members;
+ * }[nr_groups];
+ * };
+ */
+static int write_group_desc(struct feat_fd *ff,
+ struct evlist *evlist)
+{
+ u32 nr_groups = evlist__nr_groups(evlist);
+ struct evsel *evsel;
+ int ret;
+
+ ret = do_write(ff, &nr_groups, sizeof(nr_groups));
+ if (ret < 0)
+ return ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
+ const char *name = evsel->group_name ?: "{anon_group}";
+ u32 leader_idx = evsel->core.idx;
+ u32 nr_members = evsel->core.nr_members;
+
+ ret = do_write_string(ff, name);
+ if (ret < 0)
+ return ret;
+
+ ret = do_write(ff, &leader_idx, sizeof(leader_idx));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write(ff, &nr_members, sizeof(nr_members));
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Return the CPU id as a raw string.
+ *
+ * Each architecture should provide a more precise id string that
+ * can be use to match the architecture's "mapfile".
+ */
+char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ return NULL;
+}
+
+/* Return zero when the cpuid from the mapfile.csv matches the
+ * cpuid string generated on this platform.
+ * Otherwise return non-zero.
+ */
+int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+ regex_t re;
+ regmatch_t pmatch[1];
+ int match;
+
+ if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
+ /* Warn unable to generate match particular string. */
+ pr_info("Invalid regular expression %s\n", mapcpuid);
+ return 1;
+ }
+
+ match = !regexec(&re, cpuid, 1, pmatch, 0);
+ regfree(&re);
+ if (match) {
+ size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+ /* Verify the entire string matched. */
+ if (match_len == strlen(cpuid))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * default get_cpuid(): nothing gets recorded
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
+ */
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
+{
+ return ENOSYS; /* Not implemented */
+}
+
+static int write_cpuid(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ char buffer[64];
+ int ret;
+
+ ret = get_cpuid(buffer, sizeof(buffer));
+ if (ret)
+ return -1;
+
+ return do_write_string(ff, buffer);
+}
+
+static int write_branch_stack(struct feat_fd *ff __maybe_unused,
+ struct evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+static int write_auxtrace(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ err = auxtrace_index__write(ff->fd, &session->auxtrace_index);
+ if (err < 0)
+ pr_err("Failed to write auxtrace index\n");
+ return err;
+}
+
+static int write_clockid(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ return do_write(ff, &ff->ph->env.clock.clockid_res_ns,
+ sizeof(ff->ph->env.clock.clockid_res_ns));
+}
+
+static int write_clock_data(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ u64 *data64;
+ u32 data32;
+ int ret;
+
+ /* version */
+ data32 = 1;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* clockid */
+ data32 = ff->ph->env.clock.clockid;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* TOD ref time */
+ data64 = &ff->ph->env.clock.tod_ns;
+
+ ret = do_write(ff, data64, sizeof(*data64));
+ if (ret < 0)
+ return ret;
+
+ /* clockid ref time */
+ data64 = &ff->ph->env.clock.clockid_ns;
+
+ return do_write(ff, data64, sizeof(*data64));
+}
+
+static int write_hybrid_topology(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct hybrid_topology *tp;
+ int ret;
+ u32 i;
+
+ tp = hybrid_topology__new();
+ if (!tp)
+ return -ENOENT;
+
+ ret = do_write(ff, &tp->nr, sizeof(u32));
+ if (ret < 0)
+ goto err;
+
+ for (i = 0; i < tp->nr; i++) {
+ struct hybrid_topology_node *n = &tp->nodes[i];
+
+ ret = do_write_string(ff, n->pmu_name);
+ if (ret < 0)
+ goto err;
+
+ ret = do_write_string(ff, n->cpus);
+ if (ret < 0)
+ goto err;
+ }
+
+ ret = 0;
+
+err:
+ hybrid_topology__delete(tp);
+ return ret;
+}
+
+static int write_dir_format(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_session *session;
+ struct perf_data *data;
+
+ session = container_of(ff->ph, struct perf_session, header);
+ data = session->data;
+
+ if (WARN_ON(!perf_data__is_dir(data)))
+ return -1;
+
+ return do_write(ff, &data->dir.version, sizeof(data->dir.version));
+}
+
+/*
+ * Check whether a CPU is online
+ *
+ * Returns:
+ * 1 -> if CPU is online
+ * 0 -> if CPU is offline
+ * -1 -> error case
+ */
+int is_cpu_online(unsigned int cpu)
+{
+ char *str;
+ size_t strlen;
+ char buf[256];
+ int status = -1;
+ struct stat statbuf;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 0;
+
+ /*
+ * Check if /sys/devices/system/cpu/cpux/online file
+ * exists. Some cases cpu0 won't have online file since
+ * it is not expected to be turned off generally.
+ * In kernels without CONFIG_HOTPLUG_CPU, this
+ * file won't exist
+ */
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d/online", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 1;
+
+ /*
+ * Read online file using sysfs__read_str.
+ * If read or open fails, return -1.
+ * If read succeeds, return value from file
+ * which gets stored in "str"
+ */
+ snprintf(buf, sizeof(buf),
+ "devices/system/cpu/cpu%d/online", cpu);
+
+ if (sysfs__read_str(buf, &str, &strlen) < 0)
+ return status;
+
+ status = atoi(str);
+
+ free(str);
+ return status;
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static int write_bpf_prog_info(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+ int ret;
+
+ down_read(&env->bpf_progs.lock);
+
+ ret = do_write(ff, &env->bpf_progs.infos_cnt,
+ sizeof(env->bpf_progs.infos_cnt));
+ if (ret < 0)
+ goto out;
+
+ root = &env->bpf_progs.infos;
+ next = rb_first(root);
+ while (next) {
+ struct bpf_prog_info_node *node;
+ size_t len;
+
+ node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+ next = rb_next(&node->rb_node);
+ len = sizeof(struct perf_bpil) +
+ node->info_linear->data_len;
+
+ /* before writing to file, translate address to offset */
+ bpil_addr_to_offs(node->info_linear);
+ ret = do_write(ff, node->info_linear, len);
+ /*
+ * translate back to address even when do_write() fails,
+ * so that this function never changes the data.
+ */
+ bpil_offs_to_addr(node->info_linear);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ up_read(&env->bpf_progs.lock);
+ return ret;
+}
+
+static int write_bpf_btf(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+ int ret;
+
+ down_read(&env->bpf_progs.lock);
+
+ ret = do_write(ff, &env->bpf_progs.btfs_cnt,
+ sizeof(env->bpf_progs.btfs_cnt));
+
+ if (ret < 0)
+ goto out;
+
+ root = &env->bpf_progs.btfs;
+ next = rb_first(root);
+ while (next) {
+ struct btf_node *node;
+
+ node = rb_entry(next, struct btf_node, rb_node);
+ next = rb_next(&node->rb_node);
+ ret = do_write(ff, &node->id,
+ sizeof(u32) * 2 + node->data_size);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ up_read(&env->bpf_progs.lock);
+ return ret;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+ struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+ struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+ return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+ if (a->level != b->level)
+ return false;
+
+ if (a->line_size != b->line_size)
+ return false;
+
+ if (a->sets != b->sets)
+ return false;
+
+ if (a->ways != b->ways)
+ return false;
+
+ if (strcmp(a->type, b->type))
+ return false;
+
+ if (strcmp(a->size, b->size))
+ return false;
+
+ if (strcmp(a->map, b->map))
+ return false;
+
+ return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+ char path[PATH_MAX], file[PATH_MAX];
+ struct stat st;
+ size_t len;
+
+ scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+ scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+ if (stat(file, &st))
+ return 1;
+
+ scnprintf(file, PATH_MAX, "%s/level", path);
+ if (sysfs__read_int(file, (int *) &cache->level))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+ if (sysfs__read_int(file, (int *) &cache->line_size))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+ if (sysfs__read_int(file, (int *) &cache->sets))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+ if (sysfs__read_int(file, (int *) &cache->ways))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/type", path);
+ if (sysfs__read_str(file, &cache->type, &len))
+ return -1;
+
+ cache->type[len] = 0;
+ cache->type = strim(cache->type);
+
+ scnprintf(file, PATH_MAX, "%s/size", path);
+ if (sysfs__read_str(file, &cache->size, &len)) {
+ zfree(&cache->type);
+ return -1;
+ }
+
+ cache->size[len] = 0;
+ cache->size = strim(cache->size);
+
+ scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+ if (sysfs__read_str(file, &cache->map, &len)) {
+ zfree(&cache->size);
+ zfree(&cache->type);
+ return -1;
+ }
+
+ cache->map[len] = 0;
+ cache->map = strim(cache->map);
+ return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+ fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+/*
+ * Build caches levels for a particular CPU from the data in
+ * /sys/devices/system/cpu/cpu<cpu>/cache/
+ * The cache level data is stored in caches[] from index at
+ * *cntp.
+ */
+int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp)
+{
+ u16 level;
+
+ for (level = 0; level < MAX_CACHE_LVL; level++) {
+ struct cpu_cache_level c;
+ int err;
+ u32 i;
+
+ err = cpu_cache_level__read(&c, cpu, level);
+ if (err < 0)
+ return err;
+
+ if (err == 1)
+ break;
+
+ for (i = 0; i < *cntp; i++) {
+ if (cpu_cache_level__cmp(&c, &caches[i]))
+ break;
+ }
+
+ if (i == *cntp) {
+ caches[*cntp] = c;
+ *cntp = *cntp + 1;
+ } else
+ cpu_cache_level__free(&c);
+ }
+
+ return 0;
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
+{
+ u32 nr, cpu, cnt = 0;
+
+ nr = cpu__max_cpu().cpu;
+
+ for (cpu = 0; cpu < nr; cpu++) {
+ int ret = build_caches_for_cpu(cpu, caches, &cnt);
+
+ if (ret)
+ return ret;
+ }
+ *cntp = cnt;
+ return 0;
+}
+
+static int write_cache(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL;
+ struct cpu_cache_level caches[max_caches];
+ u32 cnt = 0, i, version = 1;
+ int ret;
+
+ ret = build_caches(caches, &cnt);
+ if (ret)
+ goto out;
+
+ qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+ ret = do_write(ff, &version, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &cnt, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < cnt; i++) {
+ struct cpu_cache_level *c = &caches[i];
+
+ #define _W(v) \
+ ret = do_write(ff, &c->v, sizeof(u32)); \
+ if (ret < 0) \
+ goto out;
+
+ _W(level)
+ _W(line_size)
+ _W(sets)
+ _W(ways)
+ #undef _W
+
+ #define _W(v) \
+ ret = do_write_string(ff, (const char *) c->v); \
+ if (ret < 0) \
+ goto out;
+
+ _W(type)
+ _W(size)
+ _W(map)
+ #undef _W
+ }
+
+out:
+ for (i = 0; i < cnt; i++)
+ cpu_cache_level__free(&caches[i]);
+ return ret;
+}
+
+static int write_stat(struct feat_fd *ff __maybe_unused,
+ struct evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+static int write_sample_time(struct feat_fd *ff,
+ struct evlist *evlist)
+{
+ int ret;
+
+ ret = do_write(ff, &evlist->first_sample_time,
+ sizeof(evlist->first_sample_time));
+ if (ret < 0)
+ return ret;
+
+ return do_write(ff, &evlist->last_sample_time,
+ sizeof(evlist->last_sample_time));
+}
+
+
+static int memory_node__read(struct memory_node *n, unsigned long idx)
+{
+ unsigned int phys, size = 0;
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+
+#define for_each_memory(mem, dir) \
+ while ((ent = readdir(dir))) \
+ if (strcmp(ent->d_name, ".") && \
+ strcmp(ent->d_name, "..") && \
+ sscanf(ent->d_name, "memory%u", &mem) == 1)
+
+ scnprintf(path, PATH_MAX,
+ "%s/devices/system/node/node%lu",
+ sysfs__mountpoint(), idx);
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_warning("failed: can't open memory sysfs data\n");
+ return -1;
+ }
+
+ for_each_memory(phys, dir) {
+ size = max(phys, size);
+ }
+
+ size++;
+
+ n->set = bitmap_zalloc(size);
+ if (!n->set) {
+ closedir(dir);
+ return -ENOMEM;
+ }
+
+ n->node = idx;
+ n->size = size;
+
+ rewinddir(dir);
+
+ for_each_memory(phys, dir) {
+ __set_bit(phys, n->set);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+static void memory_node__delete_nodes(struct memory_node *nodesp, u64 cnt)
+{
+ for (u64 i = 0; i < cnt; i++)
+ bitmap_free(nodesp[i].set);
+
+ free(nodesp);
+}
+
+static int memory_node__sort(const void *a, const void *b)
+{
+ const struct memory_node *na = a;
+ const struct memory_node *nb = b;
+
+ return na->node - nb->node;
+}
+
+static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
+{
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ int ret = 0;
+ size_t cnt = 0, size = 0;
+ struct memory_node *nodes = NULL;
+
+ scnprintf(path, PATH_MAX, "%s/devices/system/node/",
+ sysfs__mountpoint());
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_debug2("%s: couldn't read %s, does this arch have topology information?\n",
+ __func__, path);
+ return -1;
+ }
+
+ while (!ret && (ent = readdir(dir))) {
+ unsigned int idx;
+ int r;
+
+ if (!strcmp(ent->d_name, ".") ||
+ !strcmp(ent->d_name, ".."))
+ continue;
+
+ r = sscanf(ent->d_name, "node%u", &idx);
+ if (r != 1)
+ continue;
+
+ if (cnt >= size) {
+ struct memory_node *new_nodes =
+ reallocarray(nodes, cnt + 4, sizeof(*nodes));
+
+ if (!new_nodes) {
+ pr_err("Failed to write MEM_TOPOLOGY, size %zd nodes\n", size);
+ ret = -ENOMEM;
+ goto out;
+ }
+ nodes = new_nodes;
+ size += 4;
+ }
+ ret = memory_node__read(&nodes[cnt], idx);
+ if (!ret)
+ cnt += 1;
+ }
+out:
+ closedir(dir);
+ if (!ret) {
+ *cntp = cnt;
+ *nodesp = nodes;
+ qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+ } else
+ memory_node__delete_nodes(nodes, cnt);
+
+ return ret;
+}
+
+/*
+ * The MEM_TOPOLOGY holds physical memory map for every
+ * node in system. The format of data is as follows:
+ *
+ * 0 - version | for future changes
+ * 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ * 16 - count | number of nodes
+ *
+ * For each node we store map of physical indexes for
+ * each node:
+ *
+ * 32 - node id | node index
+ * 40 - size | size of bitmap
+ * 48 - bitmap | bitmap of memory indexes that belongs to node
+ */
+static int write_mem_topology(struct feat_fd *ff __maybe_unused,
+ struct evlist *evlist __maybe_unused)
+{
+ struct memory_node *nodes = NULL;
+ u64 bsize, version = 1, i, nr = 0;
+ int ret;
+
+ ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
+ (unsigned long long *) &bsize);
+ if (ret)
+ return ret;
+
+ ret = build_mem_topology(&nodes, &nr);
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &version, sizeof(version));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &bsize, sizeof(bsize));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &nr, sizeof(nr));
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node *n = &nodes[i];
+
+ #define _W(v) \
+ ret = do_write(ff, &n->v, sizeof(n->v)); \
+ if (ret < 0) \
+ goto out;
+
+ _W(node)
+ _W(size)
+
+ #undef _W
+
+ ret = do_write_bitmap(ff, n->set, n->size);
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ memory_node__delete_nodes(nodes, nr);
+ return ret;
+}
+
+static int write_compressed(struct feat_fd *ff __maybe_unused,
+ struct evlist *evlist __maybe_unused)
+{
+ int ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio));
+ if (ret)
+ return ret;
+
+ return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
+}
+
+static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
+ bool write_pmu)
+{
+ struct perf_pmu_caps *caps = NULL;
+ int ret;
+
+ ret = do_write(ff, &pmu->nr_caps, sizeof(pmu->nr_caps));
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(caps, &pmu->caps, list) {
+ ret = do_write_string(ff, caps->name);
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, caps->value);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (write_pmu) {
+ ret = do_write_string(ff, pmu->name);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int write_cpu_pmu_caps(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *cpu_pmu = perf_pmus__find("cpu");
+ int ret;
+
+ if (!cpu_pmu)
+ return -ENOENT;
+
+ ret = perf_pmu__caps_parse(cpu_pmu);
+ if (ret < 0)
+ return ret;
+
+ return __write_pmu_caps(ff, cpu_pmu, false);
+}
+
+static int write_pmu_caps(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *pmu = NULL;
+ int nr_pmu = 0;
+ int ret;
+
+ while ((pmu = perf_pmus__scan(pmu))) {
+ if (!strcmp(pmu->name, "cpu")) {
+ /*
+ * The "cpu" PMU is special and covered by
+ * HEADER_CPU_PMU_CAPS. Note, core PMUs are
+ * counted/written here for ARM, s390 and Intel hybrid.
+ */
+ continue;
+ }
+ if (perf_pmu__caps_parse(pmu) <= 0)
+ continue;
+ nr_pmu++;
+ }
+
+ ret = do_write(ff, &nr_pmu, sizeof(nr_pmu));
+ if (ret < 0)
+ return ret;
+
+ if (!nr_pmu)
+ return 0;
+
+ /*
+ * Note older perf tools assume core PMUs come first, this is a property
+ * of perf_pmus__scan.
+ */
+ pmu = NULL;
+ while ((pmu = perf_pmus__scan(pmu))) {
+ if (!strcmp(pmu->name, "cpu")) {
+ /* Skip as above. */
+ continue;
+ }
+ if (perf_pmu__caps_parse(pmu) <= 0)
+ continue;
+ ret = __write_pmu_caps(ff, pmu, true);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static void print_hostname(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
+}
+
+static void print_osrelease(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# os release : %s\n", ff->ph->env.os_release);
+}
+
+static void print_arch(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# arch : %s\n", ff->ph->env.arch);
+}
+
+static void print_cpudesc(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# cpudesc : %s\n", ff->ph->env.cpu_desc);
+}
+
+static void print_nrcpus(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# nrcpus online : %u\n", ff->ph->env.nr_cpus_online);
+ fprintf(fp, "# nrcpus avail : %u\n", ff->ph->env.nr_cpus_avail);
+}
+
+static void print_version(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# perf version : %s\n", ff->ph->env.version);
+}
+
+static void print_cmdline(struct feat_fd *ff, FILE *fp)
+{
+ int nr, i;
+
+ nr = ff->ph->env.nr_cmdline;
+
+ fprintf(fp, "# cmdline : ");
+
+ for (i = 0; i < nr; i++) {
+ char *argv_i = strdup(ff->ph->env.cmdline_argv[i]);
+ if (!argv_i) {
+ fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]);
+ } else {
+ char *mem = argv_i;
+ do {
+ char *quote = strchr(argv_i, '\'');
+ if (!quote)
+ break;
+ *quote++ = '\0';
+ fprintf(fp, "%s\\\'", argv_i);
+ argv_i = quote;
+ } while (1);
+ fprintf(fp, "%s ", argv_i);
+ free(mem);
+ }
+ }
+ fputc('\n', fp);
+}
+
+static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_header *ph = ff->ph;
+ int cpu_nr = ph->env.nr_cpus_avail;
+ int nr, i;
+ char *str;
+
+ nr = ph->env.nr_sibling_cores;
+ str = ph->env.sibling_cores;
+
+ for (i = 0; i < nr; i++) {
+ fprintf(fp, "# sibling sockets : %s\n", str);
+ str += strlen(str) + 1;
+ }
+
+ if (ph->env.nr_sibling_dies) {
+ nr = ph->env.nr_sibling_dies;
+ str = ph->env.sibling_dies;
+
+ for (i = 0; i < nr; i++) {
+ fprintf(fp, "# sibling dies : %s\n", str);
+ str += strlen(str) + 1;
+ }
+ }
+
+ nr = ph->env.nr_sibling_threads;
+ str = ph->env.sibling_threads;
+
+ for (i = 0; i < nr; i++) {
+ fprintf(fp, "# sibling threads : %s\n", str);
+ str += strlen(str) + 1;
+ }
+
+ if (ph->env.nr_sibling_dies) {
+ if (ph->env.cpu != NULL) {
+ for (i = 0; i < cpu_nr; i++)
+ fprintf(fp, "# CPU %d: Core ID %d, "
+ "Die ID %d, Socket ID %d\n",
+ i, ph->env.cpu[i].core_id,
+ ph->env.cpu[i].die_id,
+ ph->env.cpu[i].socket_id);
+ } else
+ fprintf(fp, "# Core ID, Die ID and Socket ID "
+ "information is not available\n");
+ } else {
+ if (ph->env.cpu != NULL) {
+ for (i = 0; i < cpu_nr; i++)
+ fprintf(fp, "# CPU %d: Core ID %d, "
+ "Socket ID %d\n",
+ i, ph->env.cpu[i].core_id,
+ ph->env.cpu[i].socket_id);
+ } else
+ fprintf(fp, "# Core ID and Socket ID "
+ "information is not available\n");
+ }
+}
+
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+ ff->ph->env.clock.clockid_res_ns * 1000);
+}
+
+static void print_clock_data(struct feat_fd *ff, FILE *fp)
+{
+ struct timespec clockid_ns;
+ char tstr[64], date[64];
+ struct timeval tod_ns;
+ clockid_t clockid;
+ struct tm ltime;
+ u64 ref;
+
+ if (!ff->ph->env.clock.enabled) {
+ fprintf(fp, "# reference time disabled\n");
+ return;
+ }
+
+ /* Compute TOD time. */
+ ref = ff->ph->env.clock.tod_ns;
+ tod_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= tod_ns.tv_sec * NSEC_PER_SEC;
+ tod_ns.tv_usec = ref / NSEC_PER_USEC;
+
+ /* Compute clockid time. */
+ ref = ff->ph->env.clock.clockid_ns;
+ clockid_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= clockid_ns.tv_sec * NSEC_PER_SEC;
+ clockid_ns.tv_nsec = ref;
+
+ clockid = ff->ph->env.clock.clockid;
+
+ if (localtime_r(&tod_ns.tv_sec, &ltime) == NULL)
+ snprintf(tstr, sizeof(tstr), "<error>");
+ else {
+ strftime(date, sizeof(date), "%F %T", &ltime);
+ scnprintf(tstr, sizeof(tstr), "%s.%06d",
+ date, (int) tod_ns.tv_usec);
+ }
+
+ fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
+ fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
+ tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec,
+ (long) clockid_ns.tv_sec, clockid_ns.tv_nsec,
+ clockid_name(clockid));
+}
+
+static void print_hybrid_topology(struct feat_fd *ff, FILE *fp)
+{
+ int i;
+ struct hybrid_node *n;
+
+ fprintf(fp, "# hybrid cpu system:\n");
+ for (i = 0; i < ff->ph->env.nr_hybrid_nodes; i++) {
+ n = &ff->ph->env.hybrid_nodes[i];
+ fprintf(fp, "# %s cpu list : %s\n", n->pmu_name, n->cpus);
+ }
+}
+
+static void print_dir_format(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_session *session;
+ struct perf_data *data;
+
+ session = container_of(ff->ph, struct perf_session, header);
+ data = session->data;
+
+ fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version);
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+
+ down_read(&env->bpf_progs.lock);
+
+ root = &env->bpf_progs.infos;
+ next = rb_first(root);
+
+ while (next) {
+ struct bpf_prog_info_node *node;
+
+ node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+ next = rb_next(&node->rb_node);
+
+ __bpf_event__print_bpf_prog_info(&node->info_linear->info,
+ env, fp);
+ }
+
+ up_read(&env->bpf_progs.lock);
+}
+
+static void print_bpf_btf(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+
+ down_read(&env->bpf_progs.lock);
+
+ root = &env->bpf_progs.btfs;
+ next = rb_first(root);
+
+ while (next) {
+ struct btf_node *node;
+
+ node = rb_entry(next, struct btf_node, rb_node);
+ next = rb_next(&node->rb_node);
+ fprintf(fp, "# btf info of id %u\n", node->id);
+ }
+
+ up_read(&env->bpf_progs.lock);
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static void free_event_desc(struct evsel *events)
+{
+ struct evsel *evsel;
+
+ if (!events)
+ return;
+
+ for (evsel = events; evsel->core.attr.size; evsel++) {
+ zfree(&evsel->name);
+ zfree(&evsel->core.id);
+ }
+
+ free(events);
+}
+
+static bool perf_attr_check(struct perf_event_attr *attr)
+{
+ if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) {
+ pr_warning("Reserved bits are set unexpectedly. "
+ "Please update perf tool.\n");
+ return false;
+ }
+
+ if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) {
+ pr_warning("Unknown sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->sample_type);
+ return false;
+ }
+
+ if (attr->read_format & ~(PERF_FORMAT_MAX-1)) {
+ pr_warning("Unknown read format (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->read_format);
+ return false;
+ }
+
+ if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+ (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) {
+ pr_warning("Unknown branch sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->branch_sample_type);
+
+ return false;
+ }
+
+ return true;
+}
+
+static struct evsel *read_event_desc(struct feat_fd *ff)
+{
+ struct evsel *evsel, *events = NULL;
+ u64 *id;
+ void *buf = NULL;
+ u32 nre, sz, nr, i, j;
+ size_t msz;
+
+ /* number of events */
+ if (do_read_u32(ff, &nre))
+ goto error;
+
+ if (do_read_u32(ff, &sz))
+ goto error;
+
+ /* buffer to hold on file attr struct */
+ buf = malloc(sz);
+ if (!buf)
+ goto error;
+
+ /* the last event terminates with evsel->core.attr.size == 0: */
+ events = calloc(nre + 1, sizeof(*events));
+ if (!events)
+ goto error;
+
+ msz = sizeof(evsel->core.attr);
+ if (sz < msz)
+ msz = sz;
+
+ for (i = 0, evsel = events; i < nre; evsel++, i++) {
+ evsel->core.idx = i;
+
+ /*
+ * must read entire on-file attr struct to
+ * sync up with layout.
+ */
+ if (__do_read(ff, buf, sz))
+ goto error;
+
+ if (ff->ph->needs_swap)
+ perf_event__attr_swap(buf);
+
+ memcpy(&evsel->core.attr, buf, msz);
+
+ if (!perf_attr_check(&evsel->core.attr))
+ goto error;
+
+ if (do_read_u32(ff, &nr))
+ goto error;
+
+ if (ff->ph->needs_swap)
+ evsel->needs_swap = true;
+
+ evsel->name = do_read_string(ff);
+ if (!evsel->name)
+ goto error;
+
+ if (!nr)
+ continue;
+
+ id = calloc(nr, sizeof(*id));
+ if (!id)
+ goto error;
+ evsel->core.ids = nr;
+ evsel->core.id = id;
+
+ for (j = 0 ; j < nr; j++) {
+ if (do_read_u64(ff, id))
+ goto error;
+ id++;
+ }
+ }
+out:
+ free(buf);
+ return events;
+error:
+ free_event_desc(events);
+ events = NULL;
+ goto out;
+}
+
+static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
+ void *priv __maybe_unused)
+{
+ return fprintf(fp, ", %s = %s", name, val);
+}
+
+static void print_event_desc(struct feat_fd *ff, FILE *fp)
+{
+ struct evsel *evsel, *events;
+ u32 j;
+ u64 *id;
+
+ if (ff->events)
+ events = ff->events;
+ else
+ events = read_event_desc(ff);
+
+ if (!events) {
+ fprintf(fp, "# event desc: not available or unable to read\n");
+ return;
+ }
+
+ for (evsel = events; evsel->core.attr.size; evsel++) {
+ fprintf(fp, "# event : name = %s, ", evsel->name);
+
+ if (evsel->core.ids) {
+ fprintf(fp, ", id = {");
+ for (j = 0, id = evsel->core.id; j < evsel->core.ids; j++, id++) {
+ if (j)
+ fputc(',', fp);
+ fprintf(fp, " %"PRIu64, *id);
+ }
+ fprintf(fp, " }");
+ }
+
+ perf_event_attr__fprintf(fp, &evsel->core.attr, __desc_attr__fprintf, NULL);
+
+ fputc('\n', fp);
+ }
+
+ free_event_desc(events);
+ ff->events = NULL;
+}
+
+static void print_total_mem(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# total memory : %llu kB\n", ff->ph->env.total_mem);
+}
+
+static void print_numa_topology(struct feat_fd *ff, FILE *fp)
+{
+ int i;
+ struct numa_node *n;
+
+ for (i = 0; i < ff->ph->env.nr_numa_nodes; i++) {
+ n = &ff->ph->env.numa_nodes[i];
+
+ fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB,"
+ " free = %"PRIu64" kB\n",
+ n->node, n->mem_total, n->mem_free);
+
+ fprintf(fp, "# node%u cpu list : ", n->node);
+ cpu_map__fprintf(n->map, fp);
+ }
+}
+
+static void print_cpuid(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# cpuid : %s\n", ff->ph->env.cpuid);
+}
+
+static void print_branch_stack(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains samples with branch stack\n");
+}
+
+static void print_auxtrace(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
+}
+
+static void print_stat(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains stat data\n");
+}
+
+static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
+{
+ int i;
+
+ fprintf(fp, "# CPU cache info:\n");
+ for (i = 0; i < ff->ph->env.caches_cnt; i++) {
+ fprintf(fp, "# ");
+ cpu_cache_level__fprintf(fp, &ff->ph->env.caches[i]);
+ }
+}
+
+static void print_compressed(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
+ ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown",
+ ff->ph->env.comp_level, ff->ph->env.comp_ratio);
+}
+
+static void __print_pmu_caps(FILE *fp, int nr_caps, char **caps, char *pmu_name)
+{
+ const char *delimiter = "";
+ int i;
+
+ if (!nr_caps) {
+ fprintf(fp, "# %s pmu capabilities: not available\n", pmu_name);
+ return;
+ }
+
+ fprintf(fp, "# %s pmu capabilities: ", pmu_name);
+ for (i = 0; i < nr_caps; i++) {
+ fprintf(fp, "%s%s", delimiter, caps[i]);
+ delimiter = ", ";
+ }
+
+ fprintf(fp, "\n");
+}
+
+static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+ __print_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps,
+ ff->ph->env.cpu_pmu_caps, (char *)"cpu");
+}
+
+static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+ struct pmu_caps *pmu_caps;
+
+ for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) {
+ pmu_caps = &ff->ph->env.pmu_caps[i];
+ __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
+ pmu_caps->pmu_name);
+ }
+}
+
+static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
+{
+ const char *delimiter = "# pmu mappings: ";
+ char *str, *tmp;
+ u32 pmu_num;
+ u32 type;
+
+ pmu_num = ff->ph->env.nr_pmu_mappings;
+ if (!pmu_num) {
+ fprintf(fp, "# pmu mappings: not available\n");
+ return;
+ }
+
+ str = ff->ph->env.pmu_mappings;
+
+ while (pmu_num) {
+ type = strtoul(str, &tmp, 0);
+ if (*tmp != ':')
+ goto error;
+
+ str = tmp + 1;
+ fprintf(fp, "%s%s = %" PRIu32, delimiter, str, type);
+
+ delimiter = ", ";
+ str += strlen(str) + 1;
+ pmu_num--;
+ }
+
+ fprintf(fp, "\n");
+
+ if (!pmu_num)
+ return;
+error:
+ fprintf(fp, "# pmu mappings: unable to read\n");
+}
+
+static void print_group_desc(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_session *session;
+ struct evsel *evsel;
+ u32 nr = 0;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
+ fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel));
+
+ nr = evsel->core.nr_members - 1;
+ } else if (nr) {
+ fprintf(fp, ",%s", evsel__name(evsel));
+
+ if (--nr == 0)
+ fprintf(fp, "}\n");
+ }
+ }
+}
+
+static void print_sample_time(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_session *session;
+ char time_buf[32];
+ double d;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ timestamp__scnprintf_usec(session->evlist->first_sample_time,
+ time_buf, sizeof(time_buf));
+ fprintf(fp, "# time of first sample : %s\n", time_buf);
+
+ timestamp__scnprintf_usec(session->evlist->last_sample_time,
+ time_buf, sizeof(time_buf));
+ fprintf(fp, "# time of last sample : %s\n", time_buf);
+
+ d = (double)(session->evlist->last_sample_time -
+ session->evlist->first_sample_time) / NSEC_PER_MSEC;
+
+ fprintf(fp, "# sample duration : %10.3f ms\n", d);
+}
+
+static void memory_node__fprintf(struct memory_node *n,
+ unsigned long long bsize, FILE *fp)
+{
+ char buf_map[100], buf_size[50];
+ unsigned long long size;
+
+ size = bsize * bitmap_weight(n->set, n->size);
+ unit_number__scnprintf(buf_size, 50, size);
+
+ bitmap_scnprintf(n->set, n->size, buf_map, 100);
+ fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
+}
+
+static void print_mem_topology(struct feat_fd *ff, FILE *fp)
+{
+ struct memory_node *nodes;
+ int i, nr;
+
+ nodes = ff->ph->env.memory_nodes;
+ nr = ff->ph->env.nr_memory_nodes;
+
+ fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
+ nr, ff->ph->env.memory_bsize);
+
+ for (i = 0; i < nr; i++) {
+ memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+ }
+}
+
+static int __event_process_build_id(struct perf_record_header_build_id *bev,
+ char *filename,
+ struct perf_session *session)
+{
+ int err = -1;
+ struct machine *machine;
+ u16 cpumode;
+ struct dso *dso;
+ enum dso_space_type dso_space;
+
+ machine = perf_session__findnew_machine(session, bev->pid);
+ if (!machine)
+ goto out;
+
+ cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ dso_space = DSO_SPACE__KERNEL;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ dso_space = DSO_SPACE__KERNEL_GUEST;
+ break;
+ case PERF_RECORD_MISC_USER:
+ case PERF_RECORD_MISC_GUEST_USER:
+ dso_space = DSO_SPACE__USER;
+ break;
+ default:
+ goto out;
+ }
+
+ dso = machine__findnew_dso(machine, filename);
+ if (dso != NULL) {
+ char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
+ size_t size = BUILD_ID_SIZE;
+
+ if (bev->header.misc & PERF_RECORD_MISC_BUILD_ID_SIZE)
+ size = bev->size;
+
+ build_id__init(&bid, bev->data, size);
+ dso__set_build_id(dso, &bid);
+ dso->header_build_id = 1;
+
+ if (dso_space != DSO_SPACE__USER) {
+ struct kmod_path m = { .name = NULL, };
+
+ if (!kmod_path__parse_name(&m, filename) && m.kmod)
+ dso__set_module_info(dso, &m, machine);
+
+ dso->kernel = dso_space;
+ free(m.name);
+ }
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+ pr_debug("build id event received for %s: %s [%zu]\n",
+ dso->long_name, sbuild_id, size);
+ dso__put(dso);
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+ int input, u64 offset, u64 size)
+{
+ struct perf_session *session = container_of(header, struct perf_session, header);
+ struct {
+ struct perf_event_header header;
+ u8 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+ char filename[0];
+ } old_bev;
+ struct perf_record_header_build_id bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size;
+
+ while (offset < limit) {
+ ssize_t len;
+
+ if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+ return -1;
+
+ if (header->needs_swap)
+ perf_event_header__bswap(&old_bev.header);
+
+ len = old_bev.header.size - sizeof(old_bev);
+ if (readn(input, filename, len) != len)
+ return -1;
+
+ bev.header = old_bev.header;
+
+ /*
+ * As the pid is the missing value, we need to fill
+ * it properly. The header.misc value give us nice hint.
+ */
+ bev.pid = HOST_KERNEL_ID;
+ if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+ bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+ bev.pid = DEFAULT_GUEST_KERNEL_ID;
+
+ memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+ __event_process_build_id(&bev, filename, session);
+
+ offset += bev.header.size;
+ }
+
+ return 0;
+}
+
+static int perf_header__read_build_ids(struct perf_header *header,
+ int input, u64 offset, u64 size)
+{
+ struct perf_session *session = container_of(header, struct perf_session, header);
+ struct perf_record_header_build_id bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size, orig_offset = offset;
+ int err = -1;
+
+ while (offset < limit) {
+ ssize_t len;
+
+ if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
+ goto out;
+
+ if (header->needs_swap)
+ perf_event_header__bswap(&bev.header);
+
+ len = bev.header.size - sizeof(bev);
+ if (readn(input, filename, len) != len)
+ goto out;
+ /*
+ * The a1645ce1 changeset:
+ *
+ * "perf: 'perf kvm' tool for monitoring guest performance from host"
+ *
+ * Added a field to struct perf_record_header_build_id that broke the file
+ * format.
+ *
+ * Since the kernel build-id is the first entry, process the
+ * table using the old format if the well known
+ * '[kernel.kallsyms]' string for the kernel build-id has the
+ * first 4 characters chopped off (where the pid_t sits).
+ */
+ if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+ if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+ return -1;
+ return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+ }
+
+ __event_process_build_id(&bev, filename, session);
+
+ offset += bev.header.size;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+/* Macro for features that simply need to read and store a string. */
+#define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \
+static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \
+{\
+ free(ff->ph->env.__feat_env); \
+ ff->ph->env.__feat_env = do_read_string(ff); \
+ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \
+}
+
+FEAT_PROCESS_STR_FUN(hostname, hostname);
+FEAT_PROCESS_STR_FUN(osrelease, os_release);
+FEAT_PROCESS_STR_FUN(version, version);
+FEAT_PROCESS_STR_FUN(arch, arch);
+FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc);
+FEAT_PROCESS_STR_FUN(cpuid, cpuid);
+
+#ifdef HAVE_LIBTRACEEVENT
+static int process_tracing_data(struct feat_fd *ff, void *data)
+{
+ ssize_t ret = trace_report(ff->fd, data, false);
+
+ return ret < 0 ? -1 : 0;
+}
+#endif
+
+static int process_build_id(struct feat_fd *ff, void *data __maybe_unused)
+{
+ if (perf_header__read_build_ids(ff->ph, ff->fd, ff->offset, ff->size))
+ pr_debug("Failed to read buildids, continuing...\n");
+ return 0;
+}
+
+static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused)
+{
+ int ret;
+ u32 nr_cpus_avail, nr_cpus_online;
+
+ ret = do_read_u32(ff, &nr_cpus_avail);
+ if (ret)
+ return ret;
+
+ ret = do_read_u32(ff, &nr_cpus_online);
+ if (ret)
+ return ret;
+ ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail;
+ ff->ph->env.nr_cpus_online = (int)nr_cpus_online;
+ return 0;
+}
+
+static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused)
+{
+ u64 total_mem;
+ int ret;
+
+ ret = do_read_u64(ff, &total_mem);
+ if (ret)
+ return -1;
+ ff->ph->env.total_mem = (unsigned long long)total_mem;
+ return 0;
+}
+
+static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.idx == idx)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static void evlist__set_event_name(struct evlist *evlist, struct evsel *event)
+{
+ struct evsel *evsel;
+
+ if (!event->name)
+ return;
+
+ evsel = evlist__find_by_index(evlist, event->core.idx);
+ if (!evsel)
+ return;
+
+ if (evsel->name)
+ return;
+
+ evsel->name = strdup(event->name);
+}
+
+static int
+process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_session *session;
+ struct evsel *evsel, *events = read_event_desc(ff);
+
+ if (!events)
+ return 0;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ if (session->data->is_pipe) {
+ /* Save events for reading later by print_event_desc,
+ * since they can't be read again in pipe mode. */
+ ff->events = events;
+ }
+
+ for (evsel = events; evsel->core.attr.size; evsel++)
+ evlist__set_event_name(session->evlist, evsel);
+
+ if (!session->data->is_pipe)
+ free_event_desc(events);
+
+ return 0;
+}
+
+static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused)
+{
+ char *str, *cmdline = NULL, **argv = NULL;
+ u32 nr, i, len = 0;
+
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ ff->ph->env.nr_cmdline = nr;
+
+ cmdline = zalloc(ff->size + nr + 1);
+ if (!cmdline)
+ return -1;
+
+ argv = zalloc(sizeof(char *) * (nr + 1));
+ if (!argv)
+ goto error;
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ argv[i] = cmdline + len;
+ memcpy(argv[i], str, strlen(str) + 1);
+ len += strlen(str) + 1;
+ free(str);
+ }
+ ff->ph->env.cmdline = cmdline;
+ ff->ph->env.cmdline_argv = (const char **) argv;
+ return 0;
+
+error:
+ free(argv);
+ free(cmdline);
+ return -1;
+}
+
+static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+ u32 nr, i;
+ char *str;
+ struct strbuf sb;
+ int cpu_nr = ff->ph->env.nr_cpus_avail;
+ u64 size = 0;
+ struct perf_header *ph = ff->ph;
+ bool do_core_id_test = true;
+
+ ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
+ if (!ph->env.cpu)
+ return -1;
+
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ ph->env.nr_sibling_cores = nr;
+ size += sizeof(u32);
+ if (strbuf_init(&sb, 128) < 0)
+ goto free_cpu;
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
+ size += string_size(str);
+ free(str);
+ }
+ ph->env.sibling_cores = strbuf_detach(&sb, NULL);
+
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ ph->env.nr_sibling_threads = nr;
+ size += sizeof(u32);
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
+ size += string_size(str);
+ free(str);
+ }
+ ph->env.sibling_threads = strbuf_detach(&sb, NULL);
+
+ /*
+ * The header may be from old perf,
+ * which doesn't include core id and socket id information.
+ */
+ if (ff->size <= size) {
+ zfree(&ph->env.cpu);
+ return 0;
+ }
+
+ /* On s390 the socket_id number is not related to the numbers of cpus.
+ * The socket_id number might be higher than the numbers of cpus.
+ * This depends on the configuration.
+ * AArch64 is the same.
+ */
+ if (ph->env.arch && (!strncmp(ph->env.arch, "s390", 4)
+ || !strncmp(ph->env.arch, "aarch64", 7)))
+ do_core_id_test = false;
+
+ for (i = 0; i < (u32)cpu_nr; i++) {
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ ph->env.cpu[i].core_id = nr;
+ size += sizeof(u32);
+
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) {
+ pr_debug("socket_id number is too big."
+ "You may need to upgrade the perf tool.\n");
+ goto free_cpu;
+ }
+
+ ph->env.cpu[i].socket_id = nr;
+ size += sizeof(u32);
+ }
+
+ /*
+ * The header may be from old perf,
+ * which doesn't include die information.
+ */
+ if (ff->size <= size)
+ return 0;
+
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ ph->env.nr_sibling_dies = nr;
+ size += sizeof(u32);
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
+ size += string_size(str);
+ free(str);
+ }
+ ph->env.sibling_dies = strbuf_detach(&sb, NULL);
+
+ for (i = 0; i < (u32)cpu_nr; i++) {
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ ph->env.cpu[i].die_id = nr;
+ }
+
+ return 0;
+
+error:
+ strbuf_release(&sb);
+free_cpu:
+ zfree(&ph->env.cpu);
+ return -1;
+}
+
+static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct numa_node *nodes, *n;
+ u32 nr, i;
+ char *str;
+
+ /* nr nodes */
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ n = &nodes[i];
+
+ /* node number */
+ if (do_read_u32(ff, &n->node))
+ goto error;
+
+ if (do_read_u64(ff, &n->mem_total))
+ goto error;
+
+ if (do_read_u64(ff, &n->mem_free))
+ goto error;
+
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ n->map = perf_cpu_map__new(str);
+ if (!n->map)
+ goto error;
+
+ free(str);
+ }
+ ff->ph->env.nr_numa_nodes = nr;
+ ff->ph->env.numa_nodes = nodes;
+ return 0;
+
+error:
+ free(nodes);
+ return -1;
+}
+
+static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
+{
+ char *name;
+ u32 pmu_num;
+ u32 type;
+ struct strbuf sb;
+
+ if (do_read_u32(ff, &pmu_num))
+ return -1;
+
+ if (!pmu_num) {
+ pr_debug("pmu mappings not available\n");
+ return 0;
+ }
+
+ ff->ph->env.nr_pmu_mappings = pmu_num;
+ if (strbuf_init(&sb, 128) < 0)
+ return -1;
+
+ while (pmu_num) {
+ if (do_read_u32(ff, &type))
+ goto error;
+
+ name = do_read_string(ff);
+ if (!name)
+ goto error;
+
+ if (strbuf_addf(&sb, "%u:%s", type, name) < 0)
+ goto error;
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, "", 1) < 0)
+ goto error;
+
+ if (!strcmp(name, "msr"))
+ ff->ph->env.msr_pmu_type = type;
+
+ free(name);
+ pmu_num--;
+ }
+ ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
+ return 0;
+
+error:
+ strbuf_release(&sb);
+ return -1;
+}
+
+static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+ size_t ret = -1;
+ u32 i, nr, nr_groups;
+ struct perf_session *session;
+ struct evsel *evsel, *leader = NULL;
+ struct group_desc {
+ char *name;
+ u32 leader_idx;
+ u32 nr_members;
+ } *desc;
+
+ if (do_read_u32(ff, &nr_groups))
+ return -1;
+
+ ff->ph->env.nr_groups = nr_groups;
+ if (!nr_groups) {
+ pr_debug("group desc not available\n");
+ return 0;
+ }
+
+ desc = calloc(nr_groups, sizeof(*desc));
+ if (!desc)
+ return -1;
+
+ for (i = 0; i < nr_groups; i++) {
+ desc[i].name = do_read_string(ff);
+ if (!desc[i].name)
+ goto out_free;
+
+ if (do_read_u32(ff, &desc[i].leader_idx))
+ goto out_free;
+
+ if (do_read_u32(ff, &desc[i].nr_members))
+ goto out_free;
+ }
+
+ /*
+ * Rebuild group relationship based on the group_desc
+ */
+ session = container_of(ff->ph, struct perf_session, header);
+
+ i = nr = 0;
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (i < nr_groups && evsel->core.idx == (int) desc[i].leader_idx) {
+ evsel__set_leader(evsel, evsel);
+ /* {anon_group} is a dummy name */
+ if (strcmp(desc[i].name, "{anon_group}")) {
+ evsel->group_name = desc[i].name;
+ desc[i].name = NULL;
+ }
+ evsel->core.nr_members = desc[i].nr_members;
+
+ if (i >= nr_groups || nr > 0) {
+ pr_debug("invalid group desc\n");
+ goto out_free;
+ }
+
+ leader = evsel;
+ nr = evsel->core.nr_members - 1;
+ i++;
+ } else if (nr) {
+ /* This is a group member */
+ evsel__set_leader(evsel, leader);
+
+ nr--;
+ }
+ }
+
+ if (i != nr_groups || nr != 0) {
+ pr_debug("invalid group desc\n");
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ for (i = 0; i < nr_groups; i++)
+ zfree(&desc[i].name);
+ free(desc);
+
+ return ret;
+}
+
+static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ err = auxtrace_index__process(ff->fd, ff->size, session,
+ ff->ph->needs_swap);
+ if (err < 0)
+ pr_err("Failed to process auxtrace index\n");
+ return err;
+}
+
+static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct cpu_cache_level *caches;
+ u32 cnt, i, version;
+
+ if (do_read_u32(ff, &version))
+ return -1;
+
+ if (version != 1)
+ return -1;
+
+ if (do_read_u32(ff, &cnt))
+ return -1;
+
+ caches = zalloc(sizeof(*caches) * cnt);
+ if (!caches)
+ return -1;
+
+ for (i = 0; i < cnt; i++) {
+ struct cpu_cache_level c;
+
+ #define _R(v) \
+ if (do_read_u32(ff, &c.v))\
+ goto out_free_caches; \
+
+ _R(level)
+ _R(line_size)
+ _R(sets)
+ _R(ways)
+ #undef _R
+
+ #define _R(v) \
+ c.v = do_read_string(ff); \
+ if (!c.v) \
+ goto out_free_caches;
+
+ _R(type)
+ _R(size)
+ _R(map)
+ #undef _R
+
+ caches[i] = c;
+ }
+
+ ff->ph->env.caches = caches;
+ ff->ph->env.caches_cnt = cnt;
+ return 0;
+out_free_caches:
+ free(caches);
+ return -1;
+}
+
+static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_session *session;
+ u64 first_sample_time, last_sample_time;
+ int ret;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ ret = do_read_u64(ff, &first_sample_time);
+ if (ret)
+ return -1;
+
+ ret = do_read_u64(ff, &last_sample_time);
+ if (ret)
+ return -1;
+
+ session->evlist->first_sample_time = first_sample_time;
+ session->evlist->last_sample_time = last_sample_time;
+ return 0;
+}
+
+static int process_mem_topology(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct memory_node *nodes;
+ u64 version, i, nr, bsize;
+ int ret = -1;
+
+ if (do_read_u64(ff, &version))
+ return -1;
+
+ if (version != 1)
+ return -1;
+
+ if (do_read_u64(ff, &bsize))
+ return -1;
+
+ if (do_read_u64(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -1;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node n;
+
+ #define _R(v) \
+ if (do_read_u64(ff, &n.v)) \
+ goto out; \
+
+ _R(node)
+ _R(size)
+
+ #undef _R
+
+ if (do_read_bitmap(ff, &n.set, &n.size))
+ goto out;
+
+ nodes[i] = n;
+ }
+
+ ff->ph->env.memory_bsize = bsize;
+ ff->ph->env.memory_nodes = nodes;
+ ff->ph->env.nr_memory_nodes = nr;
+ ret = 0;
+
+out:
+ if (ret)
+ free(nodes);
+ return ret;
+}
+
+static int process_clockid(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns))
+ return -1;
+
+ return 0;
+}
+
+static int process_clock_data(struct feat_fd *ff,
+ void *_data __maybe_unused)
+{
+ u32 data32;
+ u64 data64;
+
+ /* version */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ if (data32 != 1)
+ return -1;
+
+ /* clockid */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ ff->ph->env.clock.clockid = data32;
+
+ /* TOD ref time */
+ if (do_read_u64(ff, &data64))
+ return -1;
+
+ ff->ph->env.clock.tod_ns = data64;
+
+ /* clockid ref time */
+ if (do_read_u64(ff, &data64))
+ return -1;
+
+ ff->ph->env.clock.clockid_ns = data64;
+ ff->ph->env.clock.enabled = true;
+ return 0;
+}
+
+static int process_hybrid_topology(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct hybrid_node *nodes, *n;
+ u32 nr, i;
+
+ /* nr nodes */
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ n = &nodes[i];
+
+ n->pmu_name = do_read_string(ff);
+ if (!n->pmu_name)
+ goto error;
+
+ n->cpus = do_read_string(ff);
+ if (!n->cpus)
+ goto error;
+ }
+
+ ff->ph->env.nr_hybrid_nodes = nr;
+ ff->ph->env.hybrid_nodes = nodes;
+ return 0;
+
+error:
+ for (i = 0; i < nr; i++) {
+ free(nodes[i].pmu_name);
+ free(nodes[i].cpus);
+ }
+
+ free(nodes);
+ return -1;
+}
+
+static int process_dir_format(struct feat_fd *ff,
+ void *_data __maybe_unused)
+{
+ struct perf_session *session;
+ struct perf_data *data;
+
+ session = container_of(ff->ph, struct perf_session, header);
+ data = session->data;
+
+ if (WARN_ON(!perf_data__is_dir(data)))
+ return -1;
+
+ return do_read_u64(ff, &data->dir.version);
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct bpf_prog_info_node *info_node;
+ struct perf_env *env = &ff->ph->env;
+ struct perf_bpil *info_linear;
+ u32 count, i;
+ int err = -1;
+
+ if (ff->ph->needs_swap) {
+ pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n");
+ return 0;
+ }
+
+ if (do_read_u32(ff, &count))
+ return -1;
+
+ down_write(&env->bpf_progs.lock);
+
+ for (i = 0; i < count; ++i) {
+ u32 info_len, data_len;
+
+ info_linear = NULL;
+ info_node = NULL;
+ if (do_read_u32(ff, &info_len))
+ goto out;
+ if (do_read_u32(ff, &data_len))
+ goto out;
+
+ if (info_len > sizeof(struct bpf_prog_info)) {
+ pr_warning("detected invalid bpf_prog_info\n");
+ goto out;
+ }
+
+ info_linear = malloc(sizeof(struct perf_bpil) +
+ data_len);
+ if (!info_linear)
+ goto out;
+ info_linear->info_len = sizeof(struct bpf_prog_info);
+ info_linear->data_len = data_len;
+ if (do_read_u64(ff, (u64 *)(&info_linear->arrays)))
+ goto out;
+ if (__do_read(ff, &info_linear->info, info_len))
+ goto out;
+ if (info_len < sizeof(struct bpf_prog_info))
+ memset(((void *)(&info_linear->info)) + info_len, 0,
+ sizeof(struct bpf_prog_info) - info_len);
+
+ if (__do_read(ff, info_linear->data, data_len))
+ goto out;
+
+ info_node = malloc(sizeof(struct bpf_prog_info_node));
+ if (!info_node)
+ goto out;
+
+ /* after reading from file, translate offset to address */
+ bpil_offs_to_addr(info_linear);
+ info_node->info_linear = info_linear;
+ __perf_env__insert_bpf_prog_info(env, info_node);
+ }
+
+ up_write(&env->bpf_progs.lock);
+ return 0;
+out:
+ free(info_linear);
+ free(info_node);
+ up_write(&env->bpf_progs.lock);
+ return err;
+}
+
+static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct btf_node *node = NULL;
+ u32 count, i;
+ int err = -1;
+
+ if (ff->ph->needs_swap) {
+ pr_warning("interpreting btf from systems with endianness is not yet supported\n");
+ return 0;
+ }
+
+ if (do_read_u32(ff, &count))
+ return -1;
+
+ down_write(&env->bpf_progs.lock);
+
+ for (i = 0; i < count; ++i) {
+ u32 id, data_size;
+
+ if (do_read_u32(ff, &id))
+ goto out;
+ if (do_read_u32(ff, &data_size))
+ goto out;
+
+ node = malloc(sizeof(struct btf_node) + data_size);
+ if (!node)
+ goto out;
+
+ node->id = id;
+ node->data_size = data_size;
+
+ if (__do_read(ff, node->data, data_size))
+ goto out;
+
+ __perf_env__insert_btf(env, node);
+ node = NULL;
+ }
+
+ err = 0;
+out:
+ up_write(&env->bpf_progs.lock);
+ free(node);
+ return err;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static int process_compressed(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ if (do_read_u32(ff, &(ff->ph->env.comp_ver)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_type)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_level)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_ratio)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len)))
+ return -1;
+
+ return 0;
+}
+
+static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
+ char ***caps, unsigned int *max_branches)
+{
+ char *name, *value, *ptr;
+ u32 nr_pmu_caps, i;
+
+ *nr_caps = 0;
+ *caps = NULL;
+
+ if (do_read_u32(ff, &nr_pmu_caps))
+ return -1;
+
+ if (!nr_pmu_caps)
+ return 0;
+
+ *caps = zalloc(sizeof(char *) * nr_pmu_caps);
+ if (!*caps)
+ return -1;
+
+ for (i = 0; i < nr_pmu_caps; i++) {
+ name = do_read_string(ff);
+ if (!name)
+ goto error;
+
+ value = do_read_string(ff);
+ if (!value)
+ goto free_name;
+
+ if (asprintf(&ptr, "%s=%s", name, value) < 0)
+ goto free_value;
+
+ (*caps)[i] = ptr;
+
+ if (!strcmp(name, "branches"))
+ *max_branches = atoi(value);
+
+ free(value);
+ free(name);
+ }
+ *nr_caps = nr_pmu_caps;
+ return 0;
+
+free_value:
+ free(value);
+free_name:
+ free(name);
+error:
+ for (; i > 0; i--)
+ free((*caps)[i - 1]);
+ free(*caps);
+ *caps = NULL;
+ *nr_caps = 0;
+ return -1;
+}
+
+static int process_cpu_pmu_caps(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
+ &ff->ph->env.cpu_pmu_caps,
+ &ff->ph->env.max_branches);
+
+ if (!ret && !ff->ph->env.cpu_pmu_caps)
+ pr_debug("cpu pmu capabilities not available\n");
+ return ret;
+}
+
+static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct pmu_caps *pmu_caps;
+ u32 nr_pmu, i;
+ int ret;
+ int j;
+
+ if (do_read_u32(ff, &nr_pmu))
+ return -1;
+
+ if (!nr_pmu) {
+ pr_debug("pmu capabilities not available\n");
+ return 0;
+ }
+
+ pmu_caps = zalloc(sizeof(*pmu_caps) * nr_pmu);
+ if (!pmu_caps)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_pmu; i++) {
+ ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps,
+ &pmu_caps[i].caps,
+ &pmu_caps[i].max_branches);
+ if (ret)
+ goto err;
+
+ pmu_caps[i].pmu_name = do_read_string(ff);
+ if (!pmu_caps[i].pmu_name) {
+ ret = -1;
+ goto err;
+ }
+ if (!pmu_caps[i].nr_caps) {
+ pr_debug("%s pmu capabilities not available\n",
+ pmu_caps[i].pmu_name);
+ }
+ }
+
+ ff->ph->env.nr_pmus_with_caps = nr_pmu;
+ ff->ph->env.pmu_caps = pmu_caps;
+ return 0;
+
+err:
+ for (i = 0; i < nr_pmu; i++) {
+ for (j = 0; j < pmu_caps[i].nr_caps; j++)
+ free(pmu_caps[i].caps[j]);
+ free(pmu_caps[i].caps);
+ free(pmu_caps[i].pmu_name);
+ }
+
+ free(pmu_caps);
+ return ret;
+}
+
+#define FEAT_OPR(n, func, __full_only) \
+ [HEADER_##n] = { \
+ .name = __stringify(n), \
+ .write = write_##func, \
+ .print = print_##func, \
+ .full_only = __full_only, \
+ .process = process_##func, \
+ .synthesize = true \
+ }
+
+#define FEAT_OPN(n, func, __full_only) \
+ [HEADER_##n] = { \
+ .name = __stringify(n), \
+ .write = write_##func, \
+ .print = print_##func, \
+ .full_only = __full_only, \
+ .process = process_##func \
+ }
+
+/* feature_ops not implemented: */
+#define print_tracing_data NULL
+#define print_build_id NULL
+
+#define process_branch_stack NULL
+#define process_stat NULL
+
+// Only used in util/synthetic-events.c
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
+
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
+#ifdef HAVE_LIBTRACEEVENT
+ FEAT_OPN(TRACING_DATA, tracing_data, false),
+#endif
+ FEAT_OPN(BUILD_ID, build_id, false),
+ FEAT_OPR(HOSTNAME, hostname, false),
+ FEAT_OPR(OSRELEASE, osrelease, false),
+ FEAT_OPR(VERSION, version, false),
+ FEAT_OPR(ARCH, arch, false),
+ FEAT_OPR(NRCPUS, nrcpus, false),
+ FEAT_OPR(CPUDESC, cpudesc, false),
+ FEAT_OPR(CPUID, cpuid, false),
+ FEAT_OPR(TOTAL_MEM, total_mem, false),
+ FEAT_OPR(EVENT_DESC, event_desc, false),
+ FEAT_OPR(CMDLINE, cmdline, false),
+ FEAT_OPR(CPU_TOPOLOGY, cpu_topology, true),
+ FEAT_OPR(NUMA_TOPOLOGY, numa_topology, true),
+ FEAT_OPN(BRANCH_STACK, branch_stack, false),
+ FEAT_OPR(PMU_MAPPINGS, pmu_mappings, false),
+ FEAT_OPR(GROUP_DESC, group_desc, false),
+ FEAT_OPN(AUXTRACE, auxtrace, false),
+ FEAT_OPN(STAT, stat, false),
+ FEAT_OPN(CACHE, cache, true),
+ FEAT_OPR(SAMPLE_TIME, sample_time, false),
+ FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
+ FEAT_OPR(CLOCKID, clockid, false),
+ FEAT_OPN(DIR_FORMAT, dir_format, false),
+#ifdef HAVE_LIBBPF_SUPPORT
+ FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
+ FEAT_OPR(BPF_BTF, bpf_btf, false),
+#endif
+ FEAT_OPR(COMPRESSED, compressed, false),
+ FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
+ FEAT_OPR(CLOCK_DATA, clock_data, false),
+ FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
+ FEAT_OPR(PMU_CAPS, pmu_caps, false),
+};
+
+struct header_print_data {
+ FILE *fp;
+ bool full; /* extended list of headers */
+};
+
+static int perf_file_section__fprintf_info(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data)
+{
+ struct header_print_data *hd = data;
+ struct feat_fd ff;
+
+ if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+ pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+ "%d, continuing...\n", section->offset, feat);
+ return 0;
+ }
+ if (feat >= HEADER_LAST_FEATURE) {
+ pr_warning("unknown feature %d\n", feat);
+ return 0;
+ }
+ if (!feat_ops[feat].print)
+ return 0;
+
+ ff = (struct feat_fd) {
+ .fd = fd,
+ .ph = ph,
+ };
+
+ if (!feat_ops[feat].full_only || hd->full)
+ feat_ops[feat].print(&ff, hd->fp);
+ else
+ fprintf(hd->fp, "# %s info available, use -I to display\n",
+ feat_ops[feat].name);
+
+ return 0;
+}
+
+int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
+{
+ struct header_print_data hd;
+ struct perf_header *header = &session->header;
+ int fd = perf_data__fd(session->data);
+ struct stat st;
+ time_t stctime;
+ int ret, bit;
+
+ hd.fp = fp;
+ hd.full = full;
+
+ ret = fstat(fd, &st);
+ if (ret == -1)
+ return -1;
+
+ stctime = st.st_mtime;
+ fprintf(fp, "# captured on : %s", ctime(&stctime));
+
+ fprintf(fp, "# header version : %u\n", header->version);
+ fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset);
+ fprintf(fp, "# data size : %" PRIu64 "\n", header->data_size);
+ fprintf(fp, "# feat offset : %" PRIu64 "\n", header->feat_offset);
+
+ perf_header__process_sections(header, fd, &hd,
+ perf_file_section__fprintf_info);
+
+ if (session->data->is_pipe)
+ return 0;
+
+ fprintf(fp, "# missing features: ");
+ for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) {
+ if (bit)
+ fprintf(fp, "%s ", feat_ops[bit].name);
+ }
+
+ fprintf(fp, "\n");
+ return 0;
+}
+
+struct header_fw {
+ struct feat_writer fw;
+ struct feat_fd *ff;
+};
+
+static int feat_writer_cb(struct feat_writer *fw, void *buf, size_t sz)
+{
+ struct header_fw *h = container_of(fw, struct header_fw, fw);
+
+ return do_write(h->ff, buf, sz);
+}
+
+static int do_write_feat(struct feat_fd *ff, int type,
+ struct perf_file_section **p,
+ struct evlist *evlist,
+ struct feat_copier *fc)
+{
+ int err;
+ int ret = 0;
+
+ if (perf_header__has_feat(ff->ph, type)) {
+ if (!feat_ops[type].write)
+ return -1;
+
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ (*p)->offset = lseek(ff->fd, 0, SEEK_CUR);
+
+ /*
+ * Hook to let perf inject copy features sections from the input
+ * file.
+ */
+ if (fc && fc->copy) {
+ struct header_fw h = {
+ .fw.write = feat_writer_cb,
+ .ff = ff,
+ };
+
+ /* ->copy() returns 0 if the feature was not copied */
+ err = fc->copy(fc, type, &h.fw);
+ } else {
+ err = 0;
+ }
+ if (!err)
+ err = feat_ops[type].write(ff, evlist);
+ if (err < 0) {
+ pr_debug("failed to write feature %s\n", feat_ops[type].name);
+
+ /* undo anything written */
+ lseek(ff->fd, (*p)->offset, SEEK_SET);
+
+ return -1;
+ }
+ (*p)->size = lseek(ff->fd, 0, SEEK_CUR) - (*p)->offset;
+ (*p)++;
+ }
+ return ret;
+}
+
+static int perf_header__adds_write(struct perf_header *header,
+ struct evlist *evlist, int fd,
+ struct feat_copier *fc)
+{
+ int nr_sections;
+ struct feat_fd ff;
+ struct perf_file_section *feat_sec, *p;
+ int sec_size;
+ u64 sec_start;
+ int feat;
+ int err;
+
+ ff = (struct feat_fd){
+ .fd = fd,
+ .ph = header,
+ };
+
+ nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+ if (!nr_sections)
+ return 0;
+
+ feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
+ if (feat_sec == NULL)
+ return -ENOMEM;
+
+ sec_size = sizeof(*feat_sec) * nr_sections;
+
+ sec_start = header->feat_offset;
+ lseek(fd, sec_start + sec_size, SEEK_SET);
+
+ for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+ if (do_write_feat(&ff, feat, &p, evlist, fc))
+ perf_header__clear_feat(header, feat);
+ }
+
+ lseek(fd, sec_start, SEEK_SET);
+ /*
+ * may write more than needed due to dropped feature, but
+ * this is okay, reader will skip the missing entries
+ */
+ err = do_write(&ff, feat_sec, sec_size);
+ if (err < 0)
+ pr_debug("failed to write feature section\n");
+ free(feat_sec);
+ return err;
+}
+
+int perf_header__write_pipe(int fd)
+{
+ struct perf_pipe_file_header f_header;
+ struct feat_fd ff;
+ int err;
+
+ ff = (struct feat_fd){ .fd = fd };
+
+ f_header = (struct perf_pipe_file_header){
+ .magic = PERF_MAGIC,
+ .size = sizeof(f_header),
+ };
+
+ err = do_write(&ff, &f_header, sizeof(f_header));
+ if (err < 0) {
+ pr_debug("failed to write perf pipe header\n");
+ return err;
+ }
+
+ return 0;
+}
+
+static int perf_session__do_write_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd, bool at_exit,
+ struct feat_copier *fc)
+{
+ struct perf_file_header f_header;
+ struct perf_file_attr f_attr;
+ struct perf_header *header = &session->header;
+ struct evsel *evsel;
+ struct feat_fd ff;
+ u64 attr_offset;
+ int err;
+
+ ff = (struct feat_fd){ .fd = fd};
+ lseek(fd, sizeof(f_header), SEEK_SET);
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ evsel->id_offset = lseek(fd, 0, SEEK_CUR);
+ err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ return err;
+ }
+ }
+
+ attr_offset = lseek(ff.fd, 0, SEEK_CUR);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
+ /*
+ * We are likely in "perf inject" and have read
+ * from an older file. Update attr size so that
+ * reader gets the right offset to the ids.
+ */
+ evsel->core.attr.size = sizeof(evsel->core.attr);
+ }
+ f_attr = (struct perf_file_attr){
+ .attr = evsel->core.attr,
+ .ids = {
+ .offset = evsel->id_offset,
+ .size = evsel->core.ids * sizeof(u64),
+ }
+ };
+ err = do_write(&ff, &f_attr, sizeof(f_attr));
+ if (err < 0) {
+ pr_debug("failed to write perf header attribute\n");
+ return err;
+ }
+ }
+
+ if (!header->data_offset)
+ header->data_offset = lseek(fd, 0, SEEK_CUR);
+ header->feat_offset = header->data_offset + header->data_size;
+
+ if (at_exit) {
+ err = perf_header__adds_write(header, evlist, fd, fc);
+ if (err < 0)
+ return err;
+ }
+
+ f_header = (struct perf_file_header){
+ .magic = PERF_MAGIC,
+ .size = sizeof(f_header),
+ .attr_size = sizeof(f_attr),
+ .attrs = {
+ .offset = attr_offset,
+ .size = evlist->core.nr_entries * sizeof(f_attr),
+ },
+ .data = {
+ .offset = header->data_offset,
+ .size = header->data_size,
+ },
+ /* event_types is ignored, store zeros */
+ };
+
+ memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
+
+ lseek(fd, 0, SEEK_SET);
+ err = do_write(&ff, &f_header, sizeof(f_header));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ return err;
+ }
+ lseek(fd, header->data_offset + header->data_size, SEEK_SET);
+
+ return 0;
+}
+
+int perf_session__write_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd, bool at_exit)
+{
+ return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
+}
+
+size_t perf_session__data_offset(const struct evlist *evlist)
+{
+ struct evsel *evsel;
+ size_t data_offset;
+
+ data_offset = sizeof(struct perf_file_header);
+ evlist__for_each_entry(evlist, evsel) {
+ data_offset += evsel->core.ids * sizeof(u64);
+ }
+ data_offset += evlist->core.nr_entries * sizeof(struct perf_file_attr);
+
+ return data_offset;
+}
+
+int perf_session__inject_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd,
+ struct feat_copier *fc)
+{
+ return perf_session__do_write_header(session, evlist, fd, true, fc);
+}
+
+static int perf_header__getbuffer64(struct perf_header *header,
+ int fd, void *buf, size_t size)
+{
+ if (readn(fd, buf, size) <= 0)
+ return -1;
+
+ if (header->needs_swap)
+ mem_bswap_64(buf, size);
+
+ return 0;
+}
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+ void *data,
+ int (*process)(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data))
+{
+ struct perf_file_section *feat_sec, *sec;
+ int nr_sections;
+ int sec_size;
+ int feat;
+ int err;
+
+ nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+ if (!nr_sections)
+ return 0;
+
+ feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
+ if (!feat_sec)
+ return -1;
+
+ sec_size = sizeof(*feat_sec) * nr_sections;
+
+ lseek(fd, header->feat_offset, SEEK_SET);
+
+ err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
+ if (err < 0)
+ goto out_free;
+
+ for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) {
+ err = process(sec++, header, feat, fd, data);
+ if (err < 0)
+ goto out_free;
+ }
+ err = 0;
+out_free:
+ free(feat_sec);
+ return err;
+}
+
+static const int attr_file_abi_sizes[] = {
+ [0] = PERF_ATTR_SIZE_VER0,
+ [1] = PERF_ATTR_SIZE_VER1,
+ [2] = PERF_ATTR_SIZE_VER2,
+ [3] = PERF_ATTR_SIZE_VER3,
+ [4] = PERF_ATTR_SIZE_VER4,
+ 0,
+};
+
+/*
+ * In the legacy file format, the magic number is not used to encode endianness.
+ * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
+ * on ABI revisions, we need to try all combinations for all endianness to
+ * detect the endianness.
+ */
+static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+ uint64_t ref_size, attr_size;
+ int i;
+
+ for (i = 0 ; attr_file_abi_sizes[i]; i++) {
+ ref_size = attr_file_abi_sizes[i]
+ + sizeof(struct perf_file_section);
+ if (hdr_sz != ref_size) {
+ attr_size = bswap_64(hdr_sz);
+ if (attr_size != ref_size)
+ continue;
+
+ ph->needs_swap = true;
+ }
+ pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
+ i,
+ ph->needs_swap);
+ return 0;
+ }
+ /* could not determine endianness */
+ return -1;
+}
+
+#define PERF_PIPE_HDR_VER0 16
+
+static const size_t attr_pipe_abi_sizes[] = {
+ [0] = PERF_PIPE_HDR_VER0,
+ 0,
+};
+
+/*
+ * In the legacy pipe format, there is an implicit assumption that endianness
+ * between host recording the samples, and host parsing the samples is the
+ * same. This is not always the case given that the pipe output may always be
+ * redirected into a file and analyzed on a different machine with possibly a
+ * different endianness and perf_event ABI revisions in the perf tool itself.
+ */
+static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+ u64 attr_size;
+ int i;
+
+ for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
+ if (hdr_sz != attr_pipe_abi_sizes[i]) {
+ attr_size = bswap_64(hdr_sz);
+ if (attr_size != hdr_sz)
+ continue;
+
+ ph->needs_swap = true;
+ }
+ pr_debug("Pipe ABI%d perf.data file detected\n", i);
+ return 0;
+ }
+ return -1;
+}
+
+bool is_perf_magic(u64 magic)
+{
+ if (!memcmp(&magic, __perf_magic1, sizeof(magic))
+ || magic == __perf_magic2
+ || magic == __perf_magic2_sw)
+ return true;
+
+ return false;
+}
+
+static int check_magic_endian(u64 magic, uint64_t hdr_sz,
+ bool is_pipe, struct perf_header *ph)
+{
+ int ret;
+
+ /* check for legacy format */
+ ret = memcmp(&magic, __perf_magic1, sizeof(magic));
+ if (ret == 0) {
+ ph->version = PERF_HEADER_VERSION_1;
+ pr_debug("legacy perf.data format\n");
+ if (is_pipe)
+ return try_all_pipe_abis(hdr_sz, ph);
+
+ return try_all_file_abis(hdr_sz, ph);
+ }
+ /*
+ * the new magic number serves two purposes:
+ * - unique number to identify actual perf.data files
+ * - encode endianness of file
+ */
+ ph->version = PERF_HEADER_VERSION_2;
+
+ /* check magic number with one endianness */
+ if (magic == __perf_magic2)
+ return 0;
+
+ /* check magic number with opposite endianness */
+ if (magic != __perf_magic2_sw)
+ return -1;
+
+ ph->needs_swap = true;
+
+ return 0;
+}
+
+int perf_file_header__read(struct perf_file_header *header,
+ struct perf_header *ph, int fd)
+{
+ ssize_t ret;
+
+ lseek(fd, 0, SEEK_SET);
+
+ ret = readn(fd, header, sizeof(*header));
+ if (ret <= 0)
+ return -1;
+
+ if (check_magic_endian(header->magic,
+ header->attr_size, false, ph) < 0) {
+ pr_debug("magic/endian check failed\n");
+ return -1;
+ }
+
+ if (ph->needs_swap) {
+ mem_bswap_64(header, offsetof(struct perf_file_header,
+ adds_features));
+ }
+
+ if (header->size != sizeof(*header)) {
+ /* Support the previous format */
+ if (header->size == offsetof(typeof(*header), adds_features))
+ bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+ else
+ return -1;
+ } else if (ph->needs_swap) {
+ /*
+ * feature bitmap is declared as an array of unsigned longs --
+ * not good since its size can differ between the host that
+ * generated the data file and the host analyzing the file.
+ *
+ * We need to handle endianness, but we don't know the size of
+ * the unsigned long where the file was generated. Take a best
+ * guess at determining it: try 64-bit swap first (ie., file
+ * created on a 64-bit host), and check if the hostname feature
+ * bit is set (this feature bit is forced on as of fbe96f2).
+ * If the bit is not, undo the 64-bit swap and try a 32-bit
+ * swap. If the hostname bit is still not set (e.g., older data
+ * file), punt and fallback to the original behavior --
+ * clearing all feature bits and setting buildid.
+ */
+ mem_bswap_64(&header->adds_features,
+ BITS_TO_U64(HEADER_FEAT_BITS));
+
+ if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+ /* unswap as u64 */
+ mem_bswap_64(&header->adds_features,
+ BITS_TO_U64(HEADER_FEAT_BITS));
+
+ /* unswap as u32 */
+ mem_bswap_32(&header->adds_features,
+ BITS_TO_U32(HEADER_FEAT_BITS));
+ }
+
+ if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+ bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+ __set_bit(HEADER_BUILD_ID, header->adds_features);
+ }
+ }
+
+ memcpy(&ph->adds_features, &header->adds_features,
+ sizeof(ph->adds_features));
+
+ ph->data_offset = header->data.offset;
+ ph->data_size = header->data.size;
+ ph->feat_offset = header->data.offset + header->data.size;
+ return 0;
+}
+
+static int perf_file_section__process(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data)
+{
+ struct feat_fd fdd = {
+ .fd = fd,
+ .ph = ph,
+ .size = section->size,
+ .offset = section->offset,
+ };
+
+ if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+ pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+ "%d, continuing...\n", section->offset, feat);
+ return 0;
+ }
+
+ if (feat >= HEADER_LAST_FEATURE) {
+ pr_debug("unknown feature %d, continuing...\n", feat);
+ return 0;
+ }
+
+ if (!feat_ops[feat].process)
+ return 0;
+
+ return feat_ops[feat].process(&fdd, data);
+}
+
+static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
+ struct perf_header *ph,
+ struct perf_data* data,
+ bool repipe, int repipe_fd)
+{
+ struct feat_fd ff = {
+ .fd = repipe_fd,
+ .ph = ph,
+ };
+ ssize_t ret;
+
+ ret = perf_data__read(data, header, sizeof(*header));
+ if (ret <= 0)
+ return -1;
+
+ if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
+ pr_debug("endian/magic failed\n");
+ return -1;
+ }
+
+ if (ph->needs_swap)
+ header->size = bswap_64(header->size);
+
+ if (repipe && do_write(&ff, header, sizeof(*header)) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int perf_header__read_pipe(struct perf_session *session, int repipe_fd)
+{
+ struct perf_header *header = &session->header;
+ struct perf_pipe_file_header f_header;
+
+ if (perf_file_header__read_pipe(&f_header, header, session->data,
+ session->repipe, repipe_fd) < 0) {
+ pr_debug("incompatible file format\n");
+ return -EINVAL;
+ }
+
+ return f_header.size == sizeof(f_header) ? 0 : -1;
+}
+
+static int read_attr(int fd, struct perf_header *ph,
+ struct perf_file_attr *f_attr)
+{
+ struct perf_event_attr *attr = &f_attr->attr;
+ size_t sz, left;
+ size_t our_sz = sizeof(f_attr->attr);
+ ssize_t ret;
+
+ memset(f_attr, 0, sizeof(*f_attr));
+
+ /* read minimal guaranteed structure */
+ ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
+ if (ret <= 0) {
+ pr_debug("cannot read %d bytes of header attr\n",
+ PERF_ATTR_SIZE_VER0);
+ return -1;
+ }
+
+ /* on file perf_event_attr size */
+ sz = attr->size;
+
+ if (ph->needs_swap)
+ sz = bswap_32(sz);
+
+ if (sz == 0) {
+ /* assume ABI0 */
+ sz = PERF_ATTR_SIZE_VER0;
+ } else if (sz > our_sz) {
+ pr_debug("file uses a more recent and unsupported ABI"
+ " (%zu bytes extra)\n", sz - our_sz);
+ return -1;
+ }
+ /* what we have not yet read and that we know about */
+ left = sz - PERF_ATTR_SIZE_VER0;
+ if (left) {
+ void *ptr = attr;
+ ptr += PERF_ATTR_SIZE_VER0;
+
+ ret = readn(fd, ptr, left);
+ }
+ /* read perf_file_section, ids are read in caller */
+ ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
+
+ return ret <= 0 ? -1 : 0;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent)
+{
+ struct tep_event *event;
+ char bf[128];
+
+ /* already prepared */
+ if (evsel->tp_format)
+ return 0;
+
+ if (pevent == NULL) {
+ pr_debug("broken or missing trace data\n");
+ return -1;
+ }
+
+ event = tep_find_event(pevent, evsel->core.attr.config);
+ if (event == NULL) {
+ pr_debug("cannot find event format for %d\n", (int)evsel->core.attr.config);
+ return -1;
+ }
+
+ if (!evsel->name) {
+ snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name);
+ evsel->name = strdup(bf);
+ if (evsel->name == NULL)
+ return -1;
+ }
+
+ evsel->tp_format = event;
+ return 0;
+}
+
+static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_handle *pevent)
+{
+ struct evsel *pos;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (pos->core.attr.type == PERF_TYPE_TRACEPOINT &&
+ evsel__prepare_tracepoint_event(pos, pevent))
+ return -1;
+ }
+
+ return 0;
+}
+#endif
+
+int perf_session__read_header(struct perf_session *session, int repipe_fd)
+{
+ struct perf_data *data = session->data;
+ struct perf_header *header = &session->header;
+ struct perf_file_header f_header;
+ struct perf_file_attr f_attr;
+ u64 f_id;
+ int nr_attrs, nr_ids, i, j, err;
+ int fd = perf_data__fd(data);
+
+ session->evlist = evlist__new();
+ if (session->evlist == NULL)
+ return -ENOMEM;
+
+ session->evlist->env = &header->env;
+ session->machines.host.env = &header->env;
+
+ /*
+ * We can read 'pipe' data event from regular file,
+ * check for the pipe header regardless of source.
+ */
+ err = perf_header__read_pipe(session, repipe_fd);
+ if (!err || perf_data__is_pipe(data)) {
+ data->is_pipe = true;
+ return err;
+ }
+
+ if (perf_file_header__read(&f_header, header, fd) < 0)
+ return -EINVAL;
+
+ if (header->needs_swap && data->in_place_update) {
+ pr_err("In-place update not supported when byte-swapping is required\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Sanity check that perf.data was written cleanly; data size is
+ * initialized to 0 and updated only if the on_exit function is run.
+ * If data size is still 0 then the file contains only partial
+ * information. Just warn user and process it as much as it can.
+ */
+ if (f_header.data.size == 0) {
+ pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
+ "Was the 'perf record' command properly terminated?\n",
+ data->file.path);
+ }
+
+ if (f_header.attr_size == 0) {
+ pr_err("ERROR: The %s file's attr size field is 0 which is unexpected.\n"
+ "Was the 'perf record' command properly terminated?\n",
+ data->file.path);
+ return -EINVAL;
+ }
+
+ nr_attrs = f_header.attrs.size / f_header.attr_size;
+ lseek(fd, f_header.attrs.offset, SEEK_SET);
+
+ for (i = 0; i < nr_attrs; i++) {
+ struct evsel *evsel;
+ off_t tmp;
+
+ if (read_attr(fd, header, &f_attr) < 0)
+ goto out_errno;
+
+ if (header->needs_swap) {
+ f_attr.ids.size = bswap_64(f_attr.ids.size);
+ f_attr.ids.offset = bswap_64(f_attr.ids.offset);
+ perf_event__attr_swap(&f_attr.attr);
+ }
+
+ tmp = lseek(fd, 0, SEEK_CUR);
+ evsel = evsel__new(&f_attr.attr);
+
+ if (evsel == NULL)
+ goto out_delete_evlist;
+
+ evsel->needs_swap = header->needs_swap;
+ /*
+ * Do it before so that if perf_evsel__alloc_id fails, this
+ * entry gets purged too at evlist__delete().
+ */
+ evlist__add(session->evlist, evsel);
+
+ nr_ids = f_attr.ids.size / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+ * hattr->ids threads.
+ */
+ if (perf_evsel__alloc_id(&evsel->core, 1, nr_ids))
+ goto out_delete_evlist;
+
+ lseek(fd, f_attr.ids.offset, SEEK_SET);
+
+ for (j = 0; j < nr_ids; j++) {
+ if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
+ goto out_errno;
+
+ perf_evlist__id_add(&session->evlist->core, &evsel->core, 0, j, f_id);
+ }
+
+ lseek(fd, tmp, SEEK_SET);
+ }
+
+#ifdef HAVE_LIBTRACEEVENT
+ perf_header__process_sections(header, fd, &session->tevent,
+ perf_file_section__process);
+
+ if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent))
+ goto out_delete_evlist;
+#else
+ perf_header__process_sections(header, fd, NULL, perf_file_section__process);
+#endif
+
+ return 0;
+out_errno:
+ return -errno;
+
+out_delete_evlist:
+ evlist__delete(session->evlist);
+ session->evlist = NULL;
+ return -ENOMEM;
+}
+
+int perf_event__process_feature(struct perf_session *session,
+ union perf_event *event)
+{
+ struct perf_tool *tool = session->tool;
+ struct feat_fd ff = { .fd = 0 };
+ struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event;
+ int type = fe->header.type;
+ u64 feat = fe->feat_id;
+ int ret = 0;
+
+ if (type < 0 || type >= PERF_RECORD_HEADER_MAX) {
+ pr_warning("invalid record type %d in pipe-mode\n", type);
+ return 0;
+ }
+ if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) {
+ pr_warning("invalid record type %d in pipe-mode\n", type);
+ return -1;
+ }
+
+ if (!feat_ops[feat].process)
+ return 0;
+
+ ff.buf = (void *)fe->data;
+ ff.size = event->header.size - sizeof(*fe);
+ ff.ph = &session->header;
+
+ if (feat_ops[feat].process(&ff, NULL)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!feat_ops[feat].print || !tool->show_feat_hdr)
+ goto out;
+
+ if (!feat_ops[feat].full_only ||
+ tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) {
+ feat_ops[feat].print(&ff, stdout);
+ } else {
+ fprintf(stdout, "# %s info available, use -I to display\n",
+ feat_ops[feat].name);
+ }
+out:
+ free_event_desc(ff.events);
+ return ret;
+}
+
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
+{
+ struct perf_record_event_update *ev = &event->event_update;
+ struct perf_cpu_map *map;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id: %" PRI_lu64 "\n", ev->id);
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__SCALE:
+ ret += fprintf(fp, "... scale: %f\n", ev->scale.scale);
+ break;
+ case PERF_EVENT_UPDATE__UNIT:
+ ret += fprintf(fp, "... unit: %s\n", ev->unit);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ ret += fprintf(fp, "... name: %s\n", ev->name);
+ break;
+ case PERF_EVENT_UPDATE__CPUS:
+ ret += fprintf(fp, "... ");
+
+ map = cpu_map__new_data(&ev->cpus.cpus);
+ if (map) {
+ ret += cpu_map__fprintf(map, fp);
+ perf_cpu_map__put(map);
+ } else
+ ret += fprintf(fp, "failed to get cpus\n");
+ break;
+ default:
+ ret += fprintf(fp, "... unknown type\n");
+ break;
+ }
+
+ return ret;
+}
+
+int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct evlist **pevlist)
+{
+ u32 i, n_ids;
+ u64 *ids;
+ struct evsel *evsel;
+ struct evlist *evlist = *pevlist;
+
+ if (evlist == NULL) {
+ *pevlist = evlist = evlist__new();
+ if (evlist == NULL)
+ return -ENOMEM;
+ }
+
+ evsel = evsel__new(&event->attr.attr);
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ evlist__add(evlist, evsel);
+
+ n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size;
+ n_ids = n_ids / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+ * hattr->ids threads.
+ */
+ if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
+ return -ENOMEM;
+
+ ids = perf_record_header_attr_id(event);
+ for (i = 0; i < n_ids; i++) {
+ perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]);
+ }
+
+ return 0;
+}
+
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct evlist **pevlist)
+{
+ struct perf_record_event_update *ev = &event->event_update;
+ struct evlist *evlist;
+ struct evsel *evsel;
+ struct perf_cpu_map *map;
+
+ if (dump_trace)
+ perf_event__fprintf_event_update(event, stdout);
+
+ if (!pevlist || *pevlist == NULL)
+ return -EINVAL;
+
+ evlist = *pevlist;
+
+ evsel = evlist__id2evsel(evlist, ev->id);
+ if (evsel == NULL)
+ return -EINVAL;
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__UNIT:
+ free((char *)evsel->unit);
+ evsel->unit = strdup(ev->unit);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ free(evsel->name);
+ evsel->name = strdup(ev->name);
+ break;
+ case PERF_EVENT_UPDATE__SCALE:
+ evsel->scale = ev->scale.scale;
+ break;
+ case PERF_EVENT_UPDATE__CPUS:
+ map = cpu_map__new_data(&ev->cpus.cpus);
+ if (map) {
+ perf_cpu_map__put(evsel->core.own_cpus);
+ evsel->core.own_cpus = map;
+ } else
+ pr_err("failed to get event_update cpus\n");
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+int perf_event__process_tracing_data(struct perf_session *session,
+ union perf_event *event)
+{
+ ssize_t size_read, padding, size = event->tracing_data.size;
+ int fd = perf_data__fd(session->data);
+ char buf[BUFSIZ];
+
+ /*
+ * The pipe fd is already in proper place and in any case
+ * we can't move it, and we'd screw the case where we read
+ * 'pipe' data from regular file. The trace_report reads
+ * data from 'fd' so we need to set it directly behind the
+ * event, where the tracing data starts.
+ */
+ if (!perf_data__is_pipe(session->data)) {
+ off_t offset = lseek(fd, 0, SEEK_CUR);
+
+ /* setup for reading amidst mmap */
+ lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
+ SEEK_SET);
+ }
+
+ size_read = trace_report(fd, &session->tevent,
+ session->repipe);
+ padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
+
+ if (readn(fd, buf, padding) < 0) {
+ pr_err("%s: reading input file", __func__);
+ return -1;
+ }
+ if (session->repipe) {
+ int retw = write(STDOUT_FILENO, buf, padding);
+ if (retw <= 0 || retw != padding) {
+ pr_err("%s: repiping tracing data padding", __func__);
+ return -1;
+ }
+ }
+
+ if (size_read + padding != size) {
+ pr_err("%s: tracing data size mismatch", __func__);
+ return -1;
+ }
+
+ evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent);
+
+ return size_read + padding;
+}
+#endif
+
+int perf_event__process_build_id(struct perf_session *session,
+ union perf_event *event)
+{
+ __event_process_build_id(&event->build_id,
+ event->build_id.filename,
+ session);
+ return 0;
+}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
new file mode 100644
index 000000000..7c16a250e
--- /dev/null
+++ b/tools/perf/util/header.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HEADER_H
+#define __PERF_HEADER_H
+
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <sys/types.h>
+#include <stdio.h> // FILE
+#include <stdbool.h>
+#include <linux/bitmap.h>
+#include <linux/types.h>
+#include "env.h"
+#include "pmu.h"
+
+enum {
+ HEADER_RESERVED = 0, /* always cleared */
+ HEADER_FIRST_FEATURE = 1,
+ HEADER_TRACING_DATA = 1,
+ HEADER_BUILD_ID,
+
+ HEADER_HOSTNAME,
+ HEADER_OSRELEASE,
+ HEADER_VERSION,
+ HEADER_ARCH,
+ HEADER_NRCPUS,
+ HEADER_CPUDESC,
+ HEADER_CPUID,
+ HEADER_TOTAL_MEM,
+ HEADER_CMDLINE,
+ HEADER_EVENT_DESC,
+ HEADER_CPU_TOPOLOGY,
+ HEADER_NUMA_TOPOLOGY,
+ HEADER_BRANCH_STACK,
+ HEADER_PMU_MAPPINGS,
+ HEADER_GROUP_DESC,
+ HEADER_AUXTRACE,
+ HEADER_STAT,
+ HEADER_CACHE,
+ HEADER_SAMPLE_TIME,
+ HEADER_MEM_TOPOLOGY,
+ HEADER_CLOCKID,
+ HEADER_DIR_FORMAT,
+ HEADER_BPF_PROG_INFO,
+ HEADER_BPF_BTF,
+ HEADER_COMPRESSED,
+ HEADER_CPU_PMU_CAPS,
+ HEADER_CLOCK_DATA,
+ HEADER_HYBRID_TOPOLOGY,
+ HEADER_PMU_CAPS,
+ HEADER_LAST_FEATURE,
+ HEADER_FEAT_BITS = 256,
+};
+
+enum perf_header_version {
+ PERF_HEADER_VERSION_1,
+ PERF_HEADER_VERSION_2,
+};
+
+struct perf_file_section {
+ u64 offset;
+ u64 size;
+};
+
+struct perf_file_header {
+ u64 magic;
+ u64 size;
+ u64 attr_size;
+ struct perf_file_section attrs;
+ struct perf_file_section data;
+ /* event_types is ignored */
+ struct perf_file_section event_types;
+ DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+};
+
+struct perf_pipe_file_header {
+ u64 magic;
+ u64 size;
+};
+
+struct perf_header;
+
+int perf_file_header__read(struct perf_file_header *header,
+ struct perf_header *ph, int fd);
+
+struct perf_header {
+ enum perf_header_version version;
+ bool needs_swap;
+ u64 data_offset;
+ u64 data_size;
+ u64 feat_offset;
+ DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+ struct perf_env env;
+};
+
+struct feat_fd {
+ struct perf_header *ph;
+ int fd;
+ void *buf; /* Either buf != NULL or fd >= 0 */
+ ssize_t offset;
+ size_t size;
+ struct evsel *events;
+};
+
+struct perf_header_feature_ops {
+ int (*write)(struct feat_fd *ff, struct evlist *evlist);
+ void (*print)(struct feat_fd *ff, FILE *fp);
+ int (*process)(struct feat_fd *ff, void *data);
+ const char *name;
+ bool full_only;
+ bool synthesize;
+};
+
+struct evlist;
+struct perf_session;
+struct perf_tool;
+union perf_event;
+
+extern const char perf_version_string[];
+
+int perf_session__read_header(struct perf_session *session, int repipe_fd);
+int perf_session__write_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd, bool at_exit);
+int perf_header__write_pipe(int fd);
+
+/* feat_writer writes a feature section to output */
+struct feat_writer {
+ int (*write)(struct feat_writer *fw, void *buf, size_t sz);
+};
+
+/* feat_copier copies a feature section using feat_writer to output */
+struct feat_copier {
+ int (*copy)(struct feat_copier *fc, int feat, struct feat_writer *fw);
+};
+
+int perf_session__inject_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd,
+ struct feat_copier *fc);
+
+size_t perf_session__data_offset(const struct evlist *evlist);
+
+void perf_header__set_feat(struct perf_header *header, int feat);
+void perf_header__clear_feat(struct perf_header *header, int feat);
+bool perf_header__has_feat(const struct perf_header *header, int feat);
+
+int perf_header__set_cmdline(int argc, const char **argv);
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+ void *data,
+ int (*process)(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data));
+
+int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+int perf_event__process_feature(struct perf_session *session,
+ union perf_event *event);
+int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
+ struct evlist **pevlist);
+int perf_event__process_event_update(struct perf_tool *tool,
+ union perf_event *event,
+ struct evlist **pevlist);
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
+#ifdef HAVE_LIBTRACEEVENT
+int perf_event__process_tracing_data(struct perf_session *session,
+ union perf_event *event);
+#endif
+int perf_event__process_build_id(struct perf_session *session,
+ union perf_event *event);
+bool is_perf_magic(u64 magic);
+
+#define NAME_ALIGN 64
+
+struct feat_fd;
+
+int do_write(struct feat_fd *fd, const void *buf, size_t size);
+
+int write_padded(struct feat_fd *fd, const void *bf,
+ size_t count, size_t count_aligned);
+
+#define MAX_CACHE_LVL 4
+
+int is_cpu_online(unsigned int cpu);
+int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp);
+
+/*
+ * arch specific callback
+ */
+int get_cpuid(char *buffer, size_t sz);
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
+int strcmp_cpuid_str(const char *s1, const char *s2);
+#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
new file mode 100644
index 000000000..eab99ea6a
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cache.h"
+#include "config.h"
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <subcmd/help.h>
+#include "../builtin.h"
+#include "levenshtein.h"
+#include <linux/zalloc.h>
+
+static int autocorrect;
+
+static int perf_unknown_cmd_config(const char *var, const char *value,
+ void *cb __maybe_unused)
+{
+ if (!strcmp(var, "help.autocorrect"))
+ return perf_config_int(&autocorrect, var,value);
+
+ return 0;
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+ const struct cmdname *const *c1 = p1, *const *c2 = p2;
+ const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+ int l1 = (*c1)->len;
+ int l2 = (*c2)->len;
+ return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+ unsigned int i, nr = cmds->cnt + old->cnt;
+ void *tmp;
+
+ if (nr > cmds->alloc) {
+ /* Choose bigger one to alloc */
+ if (alloc_nr(cmds->alloc) < nr)
+ cmds->alloc = nr;
+ else
+ cmds->alloc = alloc_nr(cmds->alloc);
+ tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names));
+ if (!tmp)
+ return -1;
+ cmds->names = tmp;
+ }
+ for (i = 0; i < old->cnt; i++)
+ cmds->names[cmds->cnt++] = old->names[i];
+ zfree(&old->names);
+ old->cnt = 0;
+ return 0;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+ unsigned int i, n = 0, best_similarity = 0;
+ struct cmdnames main_cmds, other_cmds;
+
+ memset(&main_cmds, 0, sizeof(main_cmds));
+ memset(&other_cmds, 0, sizeof(main_cmds));
+
+ perf_config(perf_unknown_cmd_config, NULL);
+
+ load_command_list("perf-", &main_cmds, &other_cmds);
+
+ if (add_cmd_list(&main_cmds, &other_cmds) < 0) {
+ fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n");
+ goto end;
+ }
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(main_cmds.names), cmdname_compare);
+ uniq(&main_cmds);
+
+ if (main_cmds.cnt) {
+ /* This reuses cmdname->len for similarity index */
+ for (i = 0; i < main_cmds.cnt; ++i)
+ main_cmds.names[i]->len =
+ levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(*main_cmds.names), levenshtein_compare);
+
+ best_similarity = main_cmds.names[0]->len;
+ n = 1;
+ while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+ ++n;
+ }
+
+ if (autocorrect && n == 1) {
+ const char *assumed = main_cmds.names[0]->name;
+
+ main_cmds.names[0] = NULL;
+ clean_cmdnames(&main_cmds);
+ clean_cmdnames(&other_cmds);
+ fprintf(stderr, "WARNING: You called a perf program named '%s', "
+ "which does not exist.\n"
+ "Continuing under the assumption that you meant '%s'\n",
+ cmd, assumed);
+ if (autocorrect > 0) {
+ fprintf(stderr, "in %0.1f seconds automatically...\n",
+ (float)autocorrect/10.0);
+ poll(NULL, 0, autocorrect * 100);
+ }
+ return assumed;
+ }
+
+ fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+ if (main_cmds.cnt && best_similarity < 6) {
+ fprintf(stderr, "\nDid you mean %s?\n",
+ n < 2 ? "this": "one of these");
+
+ for (i = 0; i < n; i++)
+ fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+ }
+end:
+ clean_cmdnames(&main_cmds);
+ clean_cmdnames(&other_cmds);
+ exit(1);
+}
diff --git a/tools/perf/util/help-unknown-cmd.h b/tools/perf/util/help-unknown-cmd.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.h
diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build
new file mode 100644
index 000000000..db3db8b75
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o
diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c
new file mode 100644
index 000000000..a17c423a5
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/bitops.h>
+#include <stdarg.h>
+
+#include "../color.h"
+#include "hisi-ptt-pkt-decoder.h"
+
+/*
+ * For 8DW format, the bit[31:11] of DW0 is always 0x1fffff, which can be
+ * used to distinguish the data format.
+ * 8DW format is like:
+ * bits [ 31:11 ][ 10:0 ]
+ * |---------------------------------------|-------------------|
+ * DW0 [ 0x1fffff ][ Reserved (0x7ff) ]
+ * DW1 [ Prefix ]
+ * DW2 [ Header DW0 ]
+ * DW3 [ Header DW1 ]
+ * DW4 [ Header DW2 ]
+ * DW5 [ Header DW3 ]
+ * DW6 [ Reserved (0x0) ]
+ * DW7 [ Time ]
+ *
+ * 4DW format is like:
+ * bits [31:30] [ 29:25 ][24][23][22][21][ 20:11 ][ 10:0 ]
+ * |-----|---------|---|---|---|---|-------------|-------------|
+ * DW0 [ Fmt ][ Type ][T9][T8][TH][SO][ Length ][ Time ]
+ * DW1 [ Header DW1 ]
+ * DW2 [ Header DW2 ]
+ * DW3 [ Header DW3 ]
+ */
+
+enum hisi_ptt_8dw_pkt_field_type {
+ HISI_PTT_8DW_CHK_AND_RSV0,
+ HISI_PTT_8DW_PREFIX,
+ HISI_PTT_8DW_HEAD0,
+ HISI_PTT_8DW_HEAD1,
+ HISI_PTT_8DW_HEAD2,
+ HISI_PTT_8DW_HEAD3,
+ HISI_PTT_8DW_RSV1,
+ HISI_PTT_8DW_TIME,
+ HISI_PTT_8DW_TYPE_MAX
+};
+
+enum hisi_ptt_4dw_pkt_field_type {
+ HISI_PTT_4DW_HEAD1,
+ HISI_PTT_4DW_HEAD2,
+ HISI_PTT_4DW_HEAD3,
+ HISI_PTT_4DW_TYPE_MAX
+};
+
+static const char * const hisi_ptt_8dw_pkt_field_name[] = {
+ [HISI_PTT_8DW_PREFIX] = "Prefix",
+ [HISI_PTT_8DW_HEAD0] = "Header DW0",
+ [HISI_PTT_8DW_HEAD1] = "Header DW1",
+ [HISI_PTT_8DW_HEAD2] = "Header DW2",
+ [HISI_PTT_8DW_HEAD3] = "Header DW3",
+ [HISI_PTT_8DW_TIME] = "Time"
+};
+
+static const char * const hisi_ptt_4dw_pkt_field_name[] = {
+ [HISI_PTT_4DW_HEAD1] = "Header DW1",
+ [HISI_PTT_4DW_HEAD2] = "Header DW2",
+ [HISI_PTT_4DW_HEAD3] = "Header DW3",
+};
+
+union hisi_ptt_4dw {
+ struct {
+ uint32_t format : 2;
+ uint32_t type : 5;
+ uint32_t t9 : 1;
+ uint32_t t8 : 1;
+ uint32_t th : 1;
+ uint32_t so : 1;
+ uint32_t len : 10;
+ uint32_t time : 11;
+ };
+ uint32_t value;
+};
+
+static void hisi_ptt_print_pkt(const unsigned char *buf, int pos, const char *desc)
+{
+ const char *color = PERF_COLOR_BLUE;
+ int i;
+
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < HISI_PTT_FIELD_LENTH; i++)
+ color_fprintf(stdout, color, "%02x ", buf[pos + i]);
+ for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++)
+ color_fprintf(stdout, color, " ");
+ color_fprintf(stdout, color, " %s\n", desc);
+}
+
+static int hisi_ptt_8dw_kpt_desc(const unsigned char *buf, int pos)
+{
+ int i;
+
+ for (i = 0; i < HISI_PTT_8DW_TYPE_MAX; i++) {
+ /* Do not show 8DW check field and reserved fields */
+ if (i == HISI_PTT_8DW_CHK_AND_RSV0 || i == HISI_PTT_8DW_RSV1) {
+ pos += HISI_PTT_FIELD_LENTH;
+ continue;
+ }
+
+ hisi_ptt_print_pkt(buf, pos, hisi_ptt_8dw_pkt_field_name[i]);
+ pos += HISI_PTT_FIELD_LENTH;
+ }
+
+ return hisi_ptt_pkt_size[HISI_PTT_8DW_PKT];
+}
+
+static void hisi_ptt_4dw_print_dw0(const unsigned char *buf, int pos)
+{
+ const char *color = PERF_COLOR_BLUE;
+ union hisi_ptt_4dw dw0;
+ int i;
+
+ dw0.value = *(uint32_t *)(buf + pos);
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < HISI_PTT_FIELD_LENTH; i++)
+ color_fprintf(stdout, color, "%02x ", buf[pos + i]);
+ for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++)
+ color_fprintf(stdout, color, " ");
+
+ color_fprintf(stdout, color,
+ " %s %x %s %x %s %x %s %x %s %x %s %x %s %x %s %x\n",
+ "Format", dw0.format, "Type", dw0.type, "T9", dw0.t9,
+ "T8", dw0.t8, "TH", dw0.th, "SO", dw0.so, "Length",
+ dw0.len, "Time", dw0.time);
+}
+
+static int hisi_ptt_4dw_kpt_desc(const unsigned char *buf, int pos)
+{
+ int i;
+
+ hisi_ptt_4dw_print_dw0(buf, pos);
+ pos += HISI_PTT_FIELD_LENTH;
+
+ for (i = 0; i < HISI_PTT_4DW_TYPE_MAX; i++) {
+ hisi_ptt_print_pkt(buf, pos, hisi_ptt_4dw_pkt_field_name[i]);
+ pos += HISI_PTT_FIELD_LENTH;
+ }
+
+ return hisi_ptt_pkt_size[HISI_PTT_4DW_PKT];
+}
+
+int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type)
+{
+ if (type == HISI_PTT_8DW_PKT)
+ return hisi_ptt_8dw_kpt_desc(buf, pos);
+
+ return hisi_ptt_4dw_kpt_desc(buf, pos);
+}
diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h
new file mode 100644
index 000000000..e78f1b5bc
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#ifndef INCLUDE__HISI_PTT_PKT_DECODER_H__
+#define INCLUDE__HISI_PTT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define HISI_PTT_8DW_CHECK_MASK GENMASK(31, 11)
+#define HISI_PTT_IS_8DW_PKT GENMASK(31, 11)
+#define HISI_PTT_MAX_SPACE_LEN 10
+#define HISI_PTT_FIELD_LENTH 4
+
+enum hisi_ptt_pkt_type {
+ HISI_PTT_4DW_PKT,
+ HISI_PTT_8DW_PKT,
+ HISI_PTT_PKT_MAX
+};
+
+static int hisi_ptt_pkt_size[] = {
+ [HISI_PTT_4DW_PKT] = 16,
+ [HISI_PTT_8DW_PKT] = 32,
+};
+
+int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type);
+
+#endif
diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c
new file mode 100644
index 000000000..764d660d3
--- /dev/null
+++ b/tools/perf/util/hisi-ptt.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "debug.h"
+#include "evsel.h"
+#include "hisi-ptt.h"
+#include "hisi-ptt-decoder/hisi-ptt-pkt-decoder.h"
+#include "machine.h"
+#include "session.h"
+#include "tool.h"
+#include <internal/lib.h>
+
+struct hisi_ptt {
+ struct auxtrace auxtrace;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ u32 pmu_type;
+};
+
+struct hisi_ptt_queue {
+ struct hisi_ptt *ptt;
+ struct auxtrace_buffer *buffer;
+};
+
+static enum hisi_ptt_pkt_type hisi_ptt_check_packet_type(unsigned char *buf)
+{
+ uint32_t head = *(uint32_t *)buf;
+
+ if ((HISI_PTT_8DW_CHECK_MASK & head) == HISI_PTT_IS_8DW_PKT)
+ return HISI_PTT_8DW_PKT;
+
+ return HISI_PTT_4DW_PKT;
+}
+
+static void hisi_ptt_dump(struct hisi_ptt *ptt __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ const char *color = PERF_COLOR_BLUE;
+ enum hisi_ptt_pkt_type type;
+ size_t pos = 0;
+ int pkt_len;
+
+ type = hisi_ptt_check_packet_type(buf);
+ len = round_down(len, hisi_ptt_pkt_size[type]);
+ color_fprintf(stdout, color, ". ... HISI PTT data: size %zu bytes\n",
+ len);
+
+ while (len > 0) {
+ pkt_len = hisi_ptt_pkt_desc(buf, pos, type);
+ if (!pkt_len)
+ color_fprintf(stdout, color, " Bad packet!\n");
+
+ pos += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+
+ hisi_ptt_dump(ptt, buf, len);
+}
+
+static int hisi_ptt_process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
+ auxtrace);
+ int fd = perf_data__fd(session->data);
+ int size = event->auxtrace.size;
+ void *data = malloc(size);
+ off_t data_offset;
+ int err;
+
+ if (!data)
+ return -errno;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = readn(fd, data, size);
+ if (err != (ssize_t)size) {
+ free(data);
+ return -errno;
+ }
+
+ if (dump_trace)
+ hisi_ptt_dump_event(ptt, data, size);
+
+ free(data);
+ return 0;
+}
+
+static int hisi_ptt_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static void hisi_ptt_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static void hisi_ptt_free(struct perf_session *session)
+{
+ struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
+ auxtrace);
+
+ session->auxtrace = NULL;
+ free(ptt);
+}
+
+static bool hisi_ptt_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, auxtrace);
+
+ return evsel->core.attr.type == ptt->pmu_type;
+}
+
+static void hisi_ptt_print_info(__u64 type)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, " PMU Type %" PRId64 "\n", (s64) type);
+}
+
+int hisi_ptt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ struct hisi_ptt *ptt;
+
+ if (auxtrace_info->header.size < HISI_PTT_AUXTRACE_PRIV_SIZE +
+ sizeof(struct perf_record_auxtrace_info))
+ return -EINVAL;
+
+ ptt = zalloc(sizeof(*ptt));
+ if (!ptt)
+ return -ENOMEM;
+
+ ptt->session = session;
+ ptt->machine = &session->machines.host; /* No kvm support */
+ ptt->auxtrace_type = auxtrace_info->type;
+ ptt->pmu_type = auxtrace_info->priv[0];
+
+ ptt->auxtrace.process_event = hisi_ptt_process_event;
+ ptt->auxtrace.process_auxtrace_event = hisi_ptt_process_auxtrace_event;
+ ptt->auxtrace.flush_events = hisi_ptt_flush;
+ ptt->auxtrace.free_events = hisi_ptt_free_events;
+ ptt->auxtrace.free = hisi_ptt_free;
+ ptt->auxtrace.evsel_is_auxtrace = hisi_ptt_evsel_is_auxtrace;
+ session->auxtrace = &ptt->auxtrace;
+
+ hisi_ptt_print_info(auxtrace_info->priv[0]);
+
+ return 0;
+}
diff --git a/tools/perf/util/hisi-ptt.h b/tools/perf/util/hisi-ptt.h
new file mode 100644
index 000000000..2db9b4056
--- /dev/null
+++ b/tools/perf/util/hisi-ptt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#ifndef INCLUDE__PERF_HISI_PTT_H__
+#define INCLUDE__PERF_HISI_PTT_H__
+
+#define HISI_PTT_PMU_NAME "hisi_ptt"
+#define HISI_PTT_AUXTRACE_PRIV_SIZE sizeof(u64)
+
+struct auxtrace_record *hisi_ptt_recording_init(int *err,
+ struct perf_pmu *hisi_ptt_pmu);
+
+int hisi_ptt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
new file mode 100644
index 000000000..ac8c0ef48
--- /dev/null
+++ b/tools/perf/util/hist.c
@@ -0,0 +1,2927 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "callchain.h"
+#include "debug.h"
+#include "dso.h"
+#include "build-id.h"
+#include "hist.h"
+#include "kvm-stat.h"
+#include "map.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "mem-events.h"
+#include "session.h"
+#include "namespaces.h"
+#include "cgroup.h"
+#include "sort.h"
+#include "units.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "annotate.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "thread.h"
+#include "block-info.h"
+#include "ui/progress.h"
+#include <errno.h>
+#include <math.h>
+#include <inttypes.h>
+#include <sys/param.h>
+#include <linux/rbtree.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+ struct hist_entry *he);
+static bool hists__filter_entry_by_thread(struct hists *hists,
+ struct hist_entry *he);
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+ struct hist_entry *he);
+static bool hists__filter_entry_by_socket(struct hists *hists,
+ struct hist_entry *he);
+
+u16 hists__col_len(struct hists *hists, enum hist_column col)
+{
+ return hists->col_len[col];
+}
+
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+ hists->col_len[col] = len;
+}
+
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+ if (len > hists__col_len(hists, col)) {
+ hists__set_col_len(hists, col, len);
+ return true;
+ }
+ return false;
+}
+
+void hists__reset_col_len(struct hists *hists)
+{
+ enum hist_column col;
+
+ for (col = 0; col < HISTC_NR_COLS; ++col)
+ hists__set_col_len(hists, col, 0);
+}
+
+static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
+{
+ const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+
+ if (hists__col_len(hists, dso) < unresolved_col_width &&
+ !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
+ !symbol_conf.dso_list)
+ hists__set_col_len(hists, dso, unresolved_col_width);
+}
+
+void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
+{
+ const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+ int symlen;
+ u16 len;
+
+ if (h->block_info)
+ return;
+ /*
+ * +4 accounts for '[x] ' priv level info
+ * +2 accounts for 0x prefix on raw addresses
+ * +3 accounts for ' y ' symtab origin info
+ */
+ if (h->ms.sym) {
+ symlen = h->ms.sym->namelen + 4;
+ if (verbose > 0)
+ symlen += BITS_PER_LONG / 4 + 2 + 3;
+ hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO);
+ }
+
+ len = thread__comm_len(h->thread);
+ if (hists__new_col_len(hists, HISTC_COMM, len))
+ hists__set_col_len(hists, HISTC_THREAD, len + 8);
+
+ if (h->ms.map) {
+ len = dso__name_len(map__dso(h->ms.map));
+ hists__new_col_len(hists, HISTC_DSO, len);
+ }
+
+ if (h->parent)
+ hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
+
+ if (h->branch_info) {
+ if (h->branch_info->from.ms.sym) {
+ symlen = (int)h->branch_info->from.ms.sym->namelen + 4;
+ if (verbose > 0)
+ symlen += BITS_PER_LONG / 4 + 2 + 3;
+ hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+
+ symlen = dso__name_len(map__dso(h->branch_info->from.ms.map));
+ hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+ hists__new_col_len(hists, HISTC_ADDR_FROM, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
+ }
+
+ if (h->branch_info->to.ms.sym) {
+ symlen = (int)h->branch_info->to.ms.sym->namelen + 4;
+ if (verbose > 0)
+ symlen += BITS_PER_LONG / 4 + 2 + 3;
+ hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+
+ symlen = dso__name_len(map__dso(h->branch_info->to.ms.map));
+ hists__new_col_len(hists, HISTC_DSO_TO, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+ hists__new_col_len(hists, HISTC_ADDR_TO, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
+ }
+
+ if (h->branch_info->srcline_from)
+ hists__new_col_len(hists, HISTC_SRCLINE_FROM,
+ strlen(h->branch_info->srcline_from));
+ if (h->branch_info->srcline_to)
+ hists__new_col_len(hists, HISTC_SRCLINE_TO,
+ strlen(h->branch_info->srcline_to));
+ }
+
+ if (h->mem_info) {
+ if (h->mem_info->daddr.ms.sym) {
+ symlen = (int)h->mem_info->daddr.ms.sym->namelen + 4
+ + unresolved_col_width + 2;
+ hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+ symlen);
+ hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+ symlen + 1);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+ symlen);
+ hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+ symlen);
+ }
+
+ if (h->mem_info->iaddr.ms.sym) {
+ symlen = (int)h->mem_info->iaddr.ms.sym->namelen + 4
+ + unresolved_col_width + 2;
+ hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+ symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+ symlen);
+ }
+
+ if (h->mem_info->daddr.ms.map) {
+ symlen = dso__name_len(map__dso(h->mem_info->daddr.ms.map));
+ hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
+ symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+ }
+
+ hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
+ unresolved_col_width + 4 + 2);
+
+ hists__new_col_len(hists, HISTC_MEM_DATA_PAGE_SIZE,
+ unresolved_col_width + 4 + 2);
+
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
+ hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+ }
+
+ hists__new_col_len(hists, HISTC_CGROUP, 6);
+ hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+ hists__new_col_len(hists, HISTC_CPU, 3);
+ hists__new_col_len(hists, HISTC_SOCKET, 6);
+ hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
+ hists__new_col_len(hists, HISTC_MEM_TLB, 22);
+ hists__new_col_len(hists, HISTC_MEM_SNOOP, 12);
+ hists__new_col_len(hists, HISTC_MEM_LVL, 36 + 3);
+ hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
+ hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+ hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
+ hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
+ hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
+ hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2);
+
+ if (symbol_conf.nanosecs)
+ hists__new_col_len(hists, HISTC_TIME, 16);
+ else
+ hists__new_col_len(hists, HISTC_TIME, 12);
+ hists__new_col_len(hists, HISTC_CODE_PAGE_SIZE, 6);
+
+ if (h->srcline) {
+ len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
+ hists__new_col_len(hists, HISTC_SRCLINE, len);
+ }
+
+ if (h->srcfile)
+ hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
+
+ if (h->transaction)
+ hists__new_col_len(hists, HISTC_TRANSACTION,
+ hist_entry__transaction_len());
+
+ if (h->trace_output)
+ hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
+
+ if (h->cgroup) {
+ const char *cgrp_name = "unknown";
+ struct cgroup *cgrp = cgroup__find(maps__machine(h->ms.maps)->env,
+ h->cgroup);
+ if (cgrp != NULL)
+ cgrp_name = cgrp->name;
+
+ hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name));
+ }
+}
+
+void hists__output_recalc_col_len(struct hists *hists, int max_rows)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *n;
+ int row = 0;
+
+ hists__reset_col_len(hists);
+
+ while (next && row++ < max_rows) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ if (!n->filtered)
+ hists__calc_col_len(hists, n);
+ next = rb_next(&n->rb_node);
+ }
+}
+
+static void he_stat__add_cpumode_period(struct he_stat *he_stat,
+ unsigned int cpumode, u64 period)
+{
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ he_stat->period_sys += period;
+ break;
+ case PERF_RECORD_MISC_USER:
+ he_stat->period_us += period;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ he_stat->period_guest_sys += period;
+ break;
+ case PERF_RECORD_MISC_GUEST_USER:
+ he_stat->period_guest_us += period;
+ break;
+ default:
+ break;
+ }
+}
+
+static long hist_time(unsigned long htime)
+{
+ unsigned long time_quantum = symbol_conf.time_quantum;
+ if (time_quantum)
+ return (htime / time_quantum) * time_quantum;
+ return htime;
+}
+
+static void he_stat__add_period(struct he_stat *he_stat, u64 period)
+{
+ he_stat->period += period;
+ he_stat->nr_events += 1;
+}
+
+static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
+{
+ dest->period += src->period;
+ dest->period_sys += src->period_sys;
+ dest->period_us += src->period_us;
+ dest->period_guest_sys += src->period_guest_sys;
+ dest->period_guest_us += src->period_guest_us;
+ dest->nr_events += src->nr_events;
+}
+
+static void he_stat__decay(struct he_stat *he_stat)
+{
+ he_stat->period = (he_stat->period * 7) / 8;
+ he_stat->nr_events = (he_stat->nr_events * 7) / 8;
+ /* XXX need decay for weight too? */
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
+static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
+{
+ u64 prev_period = he->stat.period;
+ u64 diff;
+
+ if (prev_period == 0)
+ return true;
+
+ he_stat__decay(&he->stat);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__decay(he->stat_acc);
+ decay_callchain(he->callchain);
+
+ diff = prev_period - he->stat.period;
+
+ if (!he->depth) {
+ hists->stats.total_period -= diff;
+ if (!he->filtered)
+ hists->stats.total_non_filtered_period -= diff;
+ }
+
+ if (!he->leaf) {
+ struct hist_entry *child;
+ struct rb_node *node = rb_first_cached(&he->hroot_out);
+ while (node) {
+ child = rb_entry(node, struct hist_entry, rb_node);
+ node = rb_next(node);
+
+ if (hists__decay_entry(hists, child))
+ hists__delete_entry(hists, child);
+ }
+ }
+
+ return he->stat.period == 0;
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
+{
+ struct rb_root_cached *root_in;
+ struct rb_root_cached *root_out;
+
+ if (he->parent_he) {
+ root_in = &he->parent_he->hroot_in;
+ root_out = &he->parent_he->hroot_out;
+ } else {
+ if (hists__has(hists, need_collapse))
+ root_in = &hists->entries_collapsed;
+ else
+ root_in = hists->entries_in;
+ root_out = &hists->entries;
+ }
+
+ rb_erase_cached(&he->rb_node_in, root_in);
+ rb_erase_cached(&he->rb_node, root_out);
+
+ --hists->nr_entries;
+ if (!he->filtered)
+ --hists->nr_non_filtered_entries;
+
+ hist_entry__delete(he);
+}
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *n;
+
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ next = rb_next(&n->rb_node);
+ if (((zap_user && n->level == '.') ||
+ (zap_kernel && n->level != '.') ||
+ hists__decay_entry(hists, n))) {
+ hists__delete_entry(hists, n);
+ }
+ }
+}
+
+void hists__delete_entries(struct hists *hists)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *n;
+
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ next = rb_next(&n->rb_node);
+
+ hists__delete_entry(hists, n);
+ }
+}
+
+struct hist_entry *hists__get_entry(struct hists *hists, int idx)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *n;
+ int i = 0;
+
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ if (i == idx)
+ return n;
+
+ next = rb_next(&n->rb_node);
+ i++;
+ }
+
+ return NULL;
+}
+
+/*
+ * histogram, sorted on item, collects periods
+ */
+
+static int hist_entry__init(struct hist_entry *he,
+ struct hist_entry *template,
+ bool sample_self,
+ size_t callchain_size)
+{
+ *he = *template;
+ he->callchain_size = callchain_size;
+
+ if (symbol_conf.cumulate_callchain) {
+ he->stat_acc = malloc(sizeof(he->stat));
+ if (he->stat_acc == NULL)
+ return -ENOMEM;
+ memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+ if (!sample_self)
+ memset(&he->stat, 0, sizeof(he->stat));
+ }
+
+ he->ms.maps = maps__get(he->ms.maps);
+ he->ms.map = map__get(he->ms.map);
+
+ if (he->branch_info) {
+ /*
+ * This branch info is (a part of) allocated from
+ * sample__resolve_bstack() and will be freed after
+ * adding new entries. So we need to save a copy.
+ */
+ he->branch_info = malloc(sizeof(*he->branch_info));
+ if (he->branch_info == NULL)
+ goto err;
+
+ memcpy(he->branch_info, template->branch_info,
+ sizeof(*he->branch_info));
+
+ he->branch_info->from.ms.map = map__get(he->branch_info->from.ms.map);
+ he->branch_info->to.ms.map = map__get(he->branch_info->to.ms.map);
+ }
+
+ if (he->mem_info) {
+ he->mem_info->iaddr.ms.map = map__get(he->mem_info->iaddr.ms.map);
+ he->mem_info->daddr.ms.map = map__get(he->mem_info->daddr.ms.map);
+ }
+
+ if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
+ callchain_init(he->callchain);
+
+ if (he->raw_data) {
+ he->raw_data = memdup(he->raw_data, he->raw_size);
+ if (he->raw_data == NULL)
+ goto err_infos;
+ }
+
+ if (he->srcline && he->srcline != SRCLINE_UNKNOWN) {
+ he->srcline = strdup(he->srcline);
+ if (he->srcline == NULL)
+ goto err_rawdata;
+ }
+
+ if (symbol_conf.res_sample) {
+ he->res_samples = calloc(sizeof(struct res_sample),
+ symbol_conf.res_sample);
+ if (!he->res_samples)
+ goto err_srcline;
+ }
+
+ INIT_LIST_HEAD(&he->pairs.node);
+ he->thread = thread__get(he->thread);
+ he->hroot_in = RB_ROOT_CACHED;
+ he->hroot_out = RB_ROOT_CACHED;
+
+ if (!symbol_conf.report_hierarchy)
+ he->leaf = true;
+
+ return 0;
+
+err_srcline:
+ zfree(&he->srcline);
+
+err_rawdata:
+ zfree(&he->raw_data);
+
+err_infos:
+ if (he->branch_info) {
+ map__put(he->branch_info->from.ms.map);
+ map__put(he->branch_info->to.ms.map);
+ zfree(&he->branch_info);
+ }
+ if (he->mem_info) {
+ map__put(he->mem_info->iaddr.ms.map);
+ map__put(he->mem_info->daddr.ms.map);
+ }
+err:
+ maps__zput(he->ms.maps);
+ map__zput(he->ms.map);
+ zfree(&he->stat_acc);
+ return -ENOMEM;
+}
+
+static void *hist_entry__zalloc(size_t size)
+{
+ return zalloc(size + sizeof(struct hist_entry));
+}
+
+static void hist_entry__free(void *ptr)
+{
+ free(ptr);
+}
+
+static struct hist_entry_ops default_ops = {
+ .new = hist_entry__zalloc,
+ .free = hist_entry__free,
+};
+
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+ bool sample_self)
+{
+ struct hist_entry_ops *ops = template->ops;
+ size_t callchain_size = 0;
+ struct hist_entry *he;
+ int err = 0;
+
+ if (!ops)
+ ops = template->ops = &default_ops;
+
+ if (symbol_conf.use_callchain)
+ callchain_size = sizeof(struct callchain_root);
+
+ he = ops->new(callchain_size);
+ if (he) {
+ err = hist_entry__init(he, template, sample_self, callchain_size);
+ if (err) {
+ ops->free(he);
+ he = NULL;
+ }
+ }
+
+ return he;
+}
+
+static u8 symbol__parent_filter(const struct symbol *parent)
+{
+ if (symbol_conf.exclude_other && parent == NULL)
+ return 1 << HIST_FILTER__PARENT;
+ return 0;
+}
+
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+ if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
+ return;
+
+ he->hists->callchain_period += period;
+ if (!he->filtered)
+ he->hists->callchain_non_filtered_period += period;
+}
+
+static struct hist_entry *hists__findnew_entry(struct hists *hists,
+ struct hist_entry *entry,
+ const struct addr_location *al,
+ bool sample_self)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct hist_entry *he;
+ int64_t cmp;
+ u64 period = entry->stat.period;
+ bool leftmost = true;
+
+ p = &hists->entries_in->rb_root.rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ /*
+ * Make sure that it receives arguments in a same order as
+ * hist_entry__collapse() so that we can use an appropriate
+ * function when searching an entry regardless which sort
+ * keys were used.
+ */
+ cmp = hist_entry__cmp(he, entry);
+ if (!cmp) {
+ if (sample_self) {
+ he_stat__add_period(&he->stat, period);
+ hist_entry__add_callchain_period(he, period);
+ }
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_period(he->stat_acc, period);
+
+ /*
+ * This mem info was allocated from sample__resolve_mem
+ * and will not be used anymore.
+ */
+ mem_info__zput(entry->mem_info);
+
+ block_info__zput(entry->block_info);
+
+ kvm_info__zput(entry->kvm_info);
+
+ /* If the map of an existing hist_entry has
+ * become out-of-date due to an exec() or
+ * similar, update it. Otherwise we will
+ * mis-adjust symbol addresses when computing
+ * the history counter to increment.
+ */
+ if (he->ms.map != entry->ms.map) {
+ map__put(he->ms.map);
+ he->ms.map = map__get(entry->ms.map);
+ }
+ goto out;
+ }
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+
+ he = hist_entry__new(entry, sample_self);
+ if (!he)
+ return NULL;
+
+ if (sample_self)
+ hist_entry__add_callchain_period(he, period);
+ hists->nr_entries++;
+
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color_cached(&he->rb_node_in, hists->entries_in, leftmost);
+out:
+ if (sample_self)
+ he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
+ return he;
+}
+
+static unsigned random_max(unsigned high)
+{
+ unsigned thresh = -high % high;
+ for (;;) {
+ unsigned r = random();
+ if (r >= thresh)
+ return r % high;
+ }
+}
+
+static void hists__res_sample(struct hist_entry *he, struct perf_sample *sample)
+{
+ struct res_sample *r;
+ int j;
+
+ if (he->num_res < symbol_conf.res_sample) {
+ j = he->num_res++;
+ } else {
+ j = random_max(symbol_conf.res_sample);
+ }
+ r = &he->res_samples[j];
+ r->time = sample->time;
+ r->cpu = sample->cpu;
+ r->tid = sample->tid;
+}
+
+static struct hist_entry*
+__hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct kvm_info *ki,
+ struct block_info *block_info,
+ struct perf_sample *sample,
+ bool sample_self,
+ struct hist_entry_ops *ops)
+{
+ struct namespaces *ns = thread__namespaces(al->thread);
+ struct hist_entry entry = {
+ .thread = al->thread,
+ .comm = thread__comm(al->thread),
+ .cgroup_id = {
+ .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
+ .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
+ },
+ .cgroup = sample->cgroup,
+ .ms = {
+ .maps = al->maps,
+ .map = al->map,
+ .sym = al->sym,
+ },
+ .srcline = (char *) al->srcline,
+ .socket = al->socket,
+ .cpu = al->cpu,
+ .cpumode = al->cpumode,
+ .ip = al->addr,
+ .level = al->level,
+ .code_page_size = sample->code_page_size,
+ .stat = {
+ .nr_events = 1,
+ .period = sample->period,
+ },
+ .parent = sym_parent,
+ .filtered = symbol__parent_filter(sym_parent) | al->filtered,
+ .hists = hists,
+ .branch_info = bi,
+ .mem_info = mi,
+ .kvm_info = ki,
+ .block_info = block_info,
+ .transaction = sample->transaction,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
+ .ops = ops,
+ .time = hist_time(sample->time),
+ .weight = sample->weight,
+ .ins_lat = sample->ins_lat,
+ .p_stage_cyc = sample->p_stage_cyc,
+ .simd_flags = sample->simd_flags,
+ }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
+
+ if (!hists->has_callchains && he && he->callchain_size != 0)
+ hists->has_callchains = true;
+ if (he && symbol_conf.res_sample)
+ hists__res_sample(he, sample);
+ return he;
+}
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct kvm_info *ki,
+ struct perf_sample *sample,
+ bool sample_self)
+{
+ return __hists__add_entry(hists, al, sym_parent, bi, mi, ki, NULL,
+ sample, sample_self, NULL);
+}
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+ struct hist_entry_ops *ops,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct kvm_info *ki,
+ struct perf_sample *sample,
+ bool sample_self)
+{
+ return __hists__add_entry(hists, al, sym_parent, bi, mi, ki, NULL,
+ sample, sample_self, ops);
+}
+
+struct hist_entry *hists__add_entry_block(struct hists *hists,
+ struct addr_location *al,
+ struct block_info *block_info)
+{
+ struct hist_entry entry = {
+ .block_info = block_info,
+ .hists = hists,
+ .ms = {
+ .maps = al->maps,
+ .map = al->map,
+ .sym = al->sym,
+ },
+ }, *he = hists__findnew_entry(hists, &entry, al, false);
+
+ return he;
+}
+
+static int
+iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct perf_sample *sample = iter->sample;
+ struct mem_info *mi;
+
+ mi = sample__resolve_mem(sample, al);
+ if (mi == NULL)
+ return -ENOMEM;
+
+ iter->priv = mi;
+ return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ u64 cost;
+ struct mem_info *mi = iter->priv;
+ struct hists *hists = evsel__hists(iter->evsel);
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he;
+
+ if (mi == NULL)
+ return -EINVAL;
+
+ cost = sample->weight;
+ if (!cost)
+ cost = 1;
+
+ /*
+ * must pass period=weight in order to get the correct
+ * sorting from hists__collapse_resort() which is solely
+ * based on periods. We want sorting be done on nr_events * weight
+ * and this is indirectly achieved by passing period=weight here
+ * and the he_stat__add_period() function.
+ */
+ sample->period = cost;
+
+ he = hists__add_entry(hists, al, iter->parent, NULL, mi, NULL,
+ sample, true);
+ if (!he)
+ return -ENOMEM;
+
+ iter->he = he;
+ return 0;
+}
+
+static int
+iter_finish_mem_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ struct evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he = iter->he;
+ int err = -EINVAL;
+
+ if (he == NULL)
+ goto out;
+
+ hists__inc_nr_samples(hists, he->filtered);
+
+ err = hist_entry__append_callchain(he, iter->sample);
+
+out:
+ /*
+ * We don't need to free iter->priv (mem_info) here since the mem info
+ * was either already freed in hists__findnew_entry() or passed to a
+ * new hist entry by hist_entry__new().
+ */
+ iter->priv = NULL;
+
+ iter->he = NULL;
+ return err;
+}
+
+static int
+iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct branch_info *bi;
+ struct perf_sample *sample = iter->sample;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (!bi)
+ return -ENOMEM;
+
+ iter->curr = 0;
+ iter->total = sample->branch_stack->nr;
+
+ iter->priv = bi;
+ return 0;
+}
+
+static int
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct branch_info *bi = iter->priv;
+ int i = iter->curr;
+
+ if (bi == NULL)
+ return 0;
+
+ if (iter->curr >= iter->total)
+ return 0;
+
+ maps__put(al->maps);
+ al->maps = maps__get(bi[i].to.ms.maps);
+ map__put(al->map);
+ al->map = map__get(bi[i].to.ms.map);
+ al->sym = bi[i].to.ms.sym;
+ al->addr = bi[i].to.addr;
+ return 1;
+}
+
+static int
+iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct branch_info *bi;
+ struct evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he = NULL;
+ int i = iter->curr;
+ int err = 0;
+
+ bi = iter->priv;
+
+ if (iter->hide_unresolved && !(bi[i].from.ms.sym && bi[i].to.ms.sym))
+ goto out;
+
+ /*
+ * The report shows the percentage of total branches captured
+ * and not events sampled. Thus we use a pseudo period of 1.
+ */
+ sample->period = 1;
+ sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
+
+ he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL, NULL,
+ sample, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ hists__inc_nr_samples(hists, he->filtered);
+
+out:
+ iter->he = he;
+ iter->curr++;
+ return err;
+}
+
+static int
+iter_finish_branch_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ zfree(&iter->priv);
+ iter->he = NULL;
+
+ return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he;
+
+ he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+ NULL, sample, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ iter->he = he;
+ return 0;
+}
+
+static int
+iter_finish_normal_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ struct hist_entry *he = iter->he;
+ struct evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+
+ if (he == NULL)
+ return 0;
+
+ iter->he = NULL;
+
+ hists__inc_nr_samples(evsel__hists(evsel), he->filtered);
+
+ return hist_entry__append_callchain(he, sample);
+}
+
+static int
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ struct hist_entry **he_cache;
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ if (cursor == NULL)
+ return -ENOMEM;
+
+ callchain_cursor_commit(cursor);
+
+ /*
+ * This is for detecting cycles or recursions so that they're
+ * cumulated only one time to prevent entries more than 100%
+ * overhead.
+ */
+ he_cache = malloc(sizeof(*he_cache) * (cursor->nr + 1));
+ if (he_cache == NULL)
+ return -ENOMEM;
+
+ iter->priv = he_cache;
+ iter->curr = 0;
+
+ return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry **he_cache = iter->priv;
+ struct hist_entry *he;
+ int err = 0;
+
+ he = hists__add_entry(hists, al, iter->parent, NULL, NULL, NULL,
+ sample, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ iter->he = he;
+ he_cache[iter->curr++] = he;
+
+ hist_entry__append_callchain(he, sample);
+
+ /*
+ * We need to re-initialize the cursor since callchain_append()
+ * advanced the cursor to the end.
+ */
+ callchain_cursor_commit(get_tls_callchain_cursor());
+
+ hists__inc_nr_samples(hists, he->filtered);
+
+ return err;
+}
+
+static int
+iter_next_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(get_tls_callchain_cursor());
+ if (node == NULL)
+ return 0;
+
+ return fill_callchain_info(al, node, iter->hide_unresolved);
+}
+
+static bool
+hist_entry__fast__sym_diff(struct hist_entry *left,
+ struct hist_entry *right)
+{
+ struct symbol *sym_l = left->ms.sym;
+ struct symbol *sym_r = right->ms.sym;
+
+ if (!sym_l && !sym_r)
+ return left->ip != right->ip;
+
+ return !!_sort__sym_cmp(sym_l, sym_r);
+}
+
+
+static int
+iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry **he_cache = iter->priv;
+ struct hist_entry *he;
+ struct hist_entry he_tmp = {
+ .hists = evsel__hists(evsel),
+ .cpu = al->cpu,
+ .thread = al->thread,
+ .comm = thread__comm(al->thread),
+ .ip = al->addr,
+ .ms = {
+ .maps = al->maps,
+ .map = al->map,
+ .sym = al->sym,
+ },
+ .srcline = (char *) al->srcline,
+ .parent = iter->parent,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
+ };
+ int i;
+ struct callchain_cursor cursor, *tls_cursor = get_tls_callchain_cursor();
+ bool fast = hists__has(he_tmp.hists, sym);
+
+ if (tls_cursor == NULL)
+ return -ENOMEM;
+
+ callchain_cursor_snapshot(&cursor, tls_cursor);
+
+ callchain_cursor_advance(tls_cursor);
+
+ /*
+ * Check if there's duplicate entries in the callchain.
+ * It's possible that it has cycles or recursive calls.
+ */
+ for (i = 0; i < iter->curr; i++) {
+ /*
+ * For most cases, there are no duplicate entries in callchain.
+ * The symbols are usually different. Do a quick check for
+ * symbols first.
+ */
+ if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp))
+ continue;
+
+ if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+ /* to avoid calling callback function */
+ iter->he = NULL;
+ return 0;
+ }
+ }
+
+ he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+ NULL, sample, false);
+ if (he == NULL)
+ return -ENOMEM;
+
+ iter->he = he;
+ he_cache[iter->curr++] = he;
+
+ if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
+ callchain_append(he->callchain, &cursor, sample->period);
+ return 0;
+}
+
+static int
+iter_finish_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ zfree(&iter->priv);
+ iter->he = NULL;
+
+ return 0;
+}
+
+const struct hist_iter_ops hist_iter_mem = {
+ .prepare_entry = iter_prepare_mem_entry,
+ .add_single_entry = iter_add_single_mem_entry,
+ .next_entry = iter_next_nop_entry,
+ .add_next_entry = iter_add_next_nop_entry,
+ .finish_entry = iter_finish_mem_entry,
+};
+
+const struct hist_iter_ops hist_iter_branch = {
+ .prepare_entry = iter_prepare_branch_entry,
+ .add_single_entry = iter_add_single_branch_entry,
+ .next_entry = iter_next_branch_entry,
+ .add_next_entry = iter_add_next_branch_entry,
+ .finish_entry = iter_finish_branch_entry,
+};
+
+const struct hist_iter_ops hist_iter_normal = {
+ .prepare_entry = iter_prepare_normal_entry,
+ .add_single_entry = iter_add_single_normal_entry,
+ .next_entry = iter_next_nop_entry,
+ .add_next_entry = iter_add_next_nop_entry,
+ .finish_entry = iter_finish_normal_entry,
+};
+
+const struct hist_iter_ops hist_iter_cumulative = {
+ .prepare_entry = iter_prepare_cumulative_entry,
+ .add_single_entry = iter_add_single_cumulative_entry,
+ .next_entry = iter_next_cumulative_entry,
+ .add_next_entry = iter_add_next_cumulative_entry,
+ .finish_entry = iter_finish_cumulative_entry,
+};
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+ int max_stack_depth, void *arg)
+{
+ int err, err2;
+ struct map *alm = NULL;
+
+ if (al)
+ alm = map__get(al->map);
+
+ err = sample__resolve_callchain(iter->sample, get_tls_callchain_cursor(), &iter->parent,
+ iter->evsel, al, max_stack_depth);
+ if (err) {
+ map__put(alm);
+ return err;
+ }
+
+ err = iter->ops->prepare_entry(iter, al);
+ if (err)
+ goto out;
+
+ err = iter->ops->add_single_entry(iter, al);
+ if (err)
+ goto out;
+
+ if (iter->he && iter->add_entry_cb) {
+ err = iter->add_entry_cb(iter, al, true, arg);
+ if (err)
+ goto out;
+ }
+
+ while (iter->ops->next_entry(iter, al)) {
+ err = iter->ops->add_next_entry(iter, al);
+ if (err)
+ break;
+
+ if (iter->he && iter->add_entry_cb) {
+ err = iter->add_entry_cb(iter, al, false, arg);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ err2 = iter->ops->finish_entry(iter, al);
+ if (!err)
+ err = err2;
+
+ map__put(alm);
+
+ return err;
+}
+
+int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct hists *hists = left->hists;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ hists__for_each_sort_list(hists, fmt) {
+ if (perf_hpp__is_dynamic_entry(fmt) &&
+ !perf_hpp__defined_dynamic_entry(fmt, hists))
+ continue;
+
+ cmp = fmt->cmp(fmt, left, right);
+ if (cmp)
+ break;
+ }
+
+ return cmp;
+}
+
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ struct hists *hists = left->hists;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ hists__for_each_sort_list(hists, fmt) {
+ if (perf_hpp__is_dynamic_entry(fmt) &&
+ !perf_hpp__defined_dynamic_entry(fmt, hists))
+ continue;
+
+ cmp = fmt->collapse(fmt, left, right);
+ if (cmp)
+ break;
+ }
+
+ return cmp;
+}
+
+void hist_entry__delete(struct hist_entry *he)
+{
+ struct hist_entry_ops *ops = he->ops;
+
+ thread__zput(he->thread);
+ maps__zput(he->ms.maps);
+ map__zput(he->ms.map);
+
+ if (he->branch_info) {
+ map__zput(he->branch_info->from.ms.map);
+ map__zput(he->branch_info->to.ms.map);
+ zfree_srcline(&he->branch_info->srcline_from);
+ zfree_srcline(&he->branch_info->srcline_to);
+ zfree(&he->branch_info);
+ }
+
+ if (he->mem_info) {
+ map__zput(he->mem_info->iaddr.ms.map);
+ map__zput(he->mem_info->daddr.ms.map);
+ mem_info__zput(he->mem_info);
+ }
+
+ if (he->block_info)
+ block_info__zput(he->block_info);
+
+ if (he->kvm_info)
+ kvm_info__zput(he->kvm_info);
+
+ zfree(&he->res_samples);
+ zfree(&he->stat_acc);
+ zfree_srcline(&he->srcline);
+ if (he->srcfile && he->srcfile[0])
+ zfree(&he->srcfile);
+ free_callchain(he->callchain);
+ zfree(&he->trace_output);
+ zfree(&he->raw_data);
+ ops->free(he);
+}
+
+/*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max length for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_fmt *fmt, int printed)
+{
+ if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+ const int width = fmt->width(fmt, hpp, he->hists);
+ if (printed < width) {
+ advance_hpp(hpp, printed);
+ printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+ }
+ }
+
+ return printed;
+}
+
+/*
+ * collapse the histogram
+ */
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+ enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+ return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+ enum hist_filter type,
+ fmt_chk_fn check)
+{
+ struct perf_hpp_fmt *fmt;
+ bool type_match = false;
+ struct hist_entry *parent = he->parent_he;
+
+ switch (type) {
+ case HIST_FILTER__THREAD:
+ if (symbol_conf.comm_list == NULL &&
+ symbol_conf.pid_list == NULL &&
+ symbol_conf.tid_list == NULL)
+ return;
+ break;
+ case HIST_FILTER__DSO:
+ if (symbol_conf.dso_list == NULL)
+ return;
+ break;
+ case HIST_FILTER__SYMBOL:
+ if (symbol_conf.sym_list == NULL)
+ return;
+ break;
+ case HIST_FILTER__PARENT:
+ case HIST_FILTER__GUEST:
+ case HIST_FILTER__HOST:
+ case HIST_FILTER__SOCKET:
+ case HIST_FILTER__C2C:
+ default:
+ return;
+ }
+
+ /* if it's filtered by own fmt, it has to have filter bits */
+ perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+ if (check(fmt)) {
+ type_match = true;
+ break;
+ }
+ }
+
+ if (type_match) {
+ /*
+ * If the filter is for current level entry, propagate
+ * filter marker to parents. The marker bit was
+ * already set by default so it only needs to clear
+ * non-filtered entries.
+ */
+ if (!(he->filtered & (1 << type))) {
+ while (parent) {
+ parent->filtered &= ~(1 << type);
+ parent = parent->parent_he;
+ }
+ }
+ } else {
+ /*
+ * If current entry doesn't have matching formats, set
+ * filter marker for upper level entries. it will be
+ * cleared if its lower level entries is not filtered.
+ *
+ * For lower-level entries, it inherits parent's
+ * filter bit so that lower level entries of a
+ * non-filtered entry won't set the filter marker.
+ */
+ if (parent == NULL)
+ he->filtered |= (1 << type);
+ else
+ he->filtered |= (parent->filtered & (1 << type));
+ }
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+ check_thread_entry);
+
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+ perf_hpp__is_dso_entry);
+
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+ perf_hpp__is_sym_entry);
+
+ hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+ struct rb_root_cached *root,
+ struct hist_entry *he,
+ struct hist_entry *parent_he,
+ struct perf_hpp_list *hpp_list)
+{
+ struct rb_node **p = &root->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter, *new;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp;
+ bool leftmost = true;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ cmp = 0;
+ perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, iter, he);
+ if (cmp)
+ break;
+ }
+
+ if (!cmp) {
+ he_stat__add_stat(&iter->stat, &he->stat);
+ return iter;
+ }
+
+ if (cmp < 0)
+ p = &parent->rb_left;
+ else {
+ p = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ new = hist_entry__new(he, true);
+ if (new == NULL)
+ return NULL;
+
+ hists->nr_entries++;
+
+ /* save related format list for output */
+ new->hpp_list = hpp_list;
+ new->parent_he = parent_he;
+
+ hist_entry__apply_hierarchy_filters(new);
+
+ /* some fields are now passed to 'new' */
+ perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+ if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+ he->trace_output = NULL;
+ else
+ new->trace_output = NULL;
+
+ if (perf_hpp__is_srcline_entry(fmt))
+ he->srcline = NULL;
+ else
+ new->srcline = NULL;
+
+ if (perf_hpp__is_srcfile_entry(fmt))
+ he->srcfile = NULL;
+ else
+ new->srcfile = NULL;
+ }
+
+ rb_link_node(&new->rb_node_in, parent, p);
+ rb_insert_color_cached(&new->rb_node_in, root, leftmost);
+ return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+ struct rb_root_cached *root,
+ struct hist_entry *he)
+{
+ struct perf_hpp_list_node *node;
+ struct hist_entry *new_he = NULL;
+ struct hist_entry *parent = NULL;
+ int depth = 0;
+ int ret = 0;
+
+ list_for_each_entry(node, &hists->hpp_formats, list) {
+ /* skip period (overhead) and elided columns */
+ if (node->level == 0 || node->skip)
+ continue;
+
+ /* insert copy of 'he' for each fmt into the hierarchy */
+ new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+ if (new_he == NULL) {
+ ret = -1;
+ break;
+ }
+
+ root = &new_he->hroot_in;
+ new_he->depth = depth++;
+ parent = new_he;
+ }
+
+ if (new_he) {
+ new_he->leaf = true;
+
+ if (hist_entry__has_callchains(new_he) &&
+ symbol_conf.use_callchain) {
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ if (cursor == NULL)
+ return -1;
+
+ callchain_cursor_reset(cursor);
+ if (callchain_merge(cursor,
+ new_he->callchain,
+ he->callchain) < 0)
+ ret = -1;
+ }
+ }
+
+ /* 'he' is no longer used */
+ hist_entry__delete(he);
+
+ /* return 0 (or -1) since it already applied filters */
+ return ret;
+}
+
+static int hists__collapse_insert_entry(struct hists *hists,
+ struct rb_root_cached *root,
+ struct hist_entry *he)
+{
+ struct rb_node **p = &root->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ int64_t cmp;
+ bool leftmost = true;
+
+ if (symbol_conf.report_hierarchy)
+ return hists__hierarchy_insert_entry(hists, root, he);
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ cmp = hist_entry__collapse(iter, he);
+
+ if (!cmp) {
+ int ret = 0;
+
+ he_stat__add_stat(&iter->stat, &he->stat);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_stat(iter->stat_acc, he->stat_acc);
+
+ if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ if (cursor != NULL) {
+ callchain_cursor_reset(cursor);
+ if (callchain_merge(cursor, iter->callchain, he->callchain) < 0)
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+ }
+ hist_entry__delete(he);
+ return ret;
+ }
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+ hists->nr_entries++;
+
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color_cached(&he->rb_node_in, root, leftmost);
+ return 1;
+}
+
+struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists)
+{
+ struct rb_root_cached *root;
+
+ mutex_lock(&hists->lock);
+
+ root = hists->entries_in;
+ if (++hists->entries_in > &hists->entries_in_array[1])
+ hists->entries_in = &hists->entries_in_array[0];
+
+ mutex_unlock(&hists->lock);
+
+ return root;
+}
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
+{
+ hists__filter_entry_by_dso(hists, he);
+ hists__filter_entry_by_thread(hists, he);
+ hists__filter_entry_by_symbol(hists, he);
+ hists__filter_entry_by_socket(hists, he);
+}
+
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+{
+ struct rb_root_cached *root;
+ struct rb_node *next;
+ struct hist_entry *n;
+ int ret;
+
+ if (!hists__has(hists, need_collapse))
+ return 0;
+
+ hists->nr_entries = 0;
+
+ root = hists__get_rotate_entries_in(hists);
+
+ next = rb_first_cached(root);
+
+ while (next) {
+ if (session_done())
+ break;
+ n = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&n->rb_node_in);
+
+ rb_erase_cached(&n->rb_node_in, root);
+ ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+ if (ret < 0)
+ return -1;
+
+ if (ret) {
+ /*
+ * If it wasn't combined with one of the entries already
+ * collapsed, we need to apply the filters that may have
+ * been set by, say, the hist_browser.
+ */
+ hists__apply_filters(hists, n);
+ }
+ if (prog)
+ ui_progress__update(prog, 1);
+ }
+ return 0;
+}
+
+static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
+{
+ struct hists *hists = a->hists;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ hists__for_each_sort_list(hists, fmt) {
+ if (perf_hpp__should_skip(fmt, a->hists))
+ continue;
+
+ cmp = fmt->sort(fmt, a, b);
+ if (cmp)
+ break;
+ }
+
+ return cmp;
+}
+
+static void hists__reset_filter_stats(struct hists *hists)
+{
+ hists->nr_non_filtered_entries = 0;
+ hists->stats.total_non_filtered_period = 0;
+}
+
+void hists__reset_stats(struct hists *hists)
+{
+ hists->nr_entries = 0;
+ hists->stats.total_period = 0;
+
+ hists__reset_filter_stats(hists);
+}
+
+static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h)
+{
+ hists->nr_non_filtered_entries++;
+ hists->stats.total_non_filtered_period += h->stat.period;
+}
+
+void hists__inc_stats(struct hists *hists, struct hist_entry *h)
+{
+ if (!h->filtered)
+ hists__inc_filter_stats(hists, h);
+
+ hists->nr_entries++;
+ hists->stats.total_period += h->stat.period;
+}
+
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ node = rb_first_cached(&hists->entries);
+
+ hists->stats.total_period = 0;
+ hists->stats.total_non_filtered_period = 0;
+
+ /*
+ * recalculate total period using top-level entries only
+ * since lower level entries only see non-filtered entries
+ * but upper level entries have sum of both entries.
+ */
+ while (node) {
+ he = rb_entry(node, struct hist_entry, rb_node);
+ node = rb_next(node);
+
+ hists->stats.total_period += he->stat.period;
+ if (!he->filtered)
+ hists->stats.total_non_filtered_period += he->stat.period;
+ }
+}
+
+static void hierarchy_insert_output_entry(struct rb_root_cached *root,
+ struct hist_entry *he)
+{
+ struct rb_node **p = &root->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ struct perf_hpp_fmt *fmt;
+ bool leftmost = true;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node);
+
+ if (hist_entry__sort(he, iter) > 0)
+ p = &parent->rb_left;
+ else {
+ p = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node(&he->rb_node, parent, p);
+ rb_insert_color_cached(&he->rb_node, root, leftmost);
+
+ /* update column width of dynamic entry */
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ if (fmt->init)
+ fmt->init(fmt, he);
+ }
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+ struct ui_progress *prog,
+ struct rb_root_cached *root_in,
+ struct rb_root_cached *root_out,
+ u64 min_callchain_hits,
+ bool use_callchain)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ *root_out = RB_ROOT_CACHED;
+ node = rb_first_cached(root_in);
+
+ while (node) {
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+ node = rb_next(node);
+
+ hierarchy_insert_output_entry(root_out, he);
+
+ if (prog)
+ ui_progress__update(prog, 1);
+
+ hists->nr_entries++;
+ if (!he->filtered) {
+ hists->nr_non_filtered_entries++;
+ hists__calc_col_len(hists, he);
+ }
+
+ if (!he->leaf) {
+ hists__hierarchy_output_resort(hists, prog,
+ &he->hroot_in,
+ &he->hroot_out,
+ min_callchain_hits,
+ use_callchain);
+ continue;
+ }
+
+ if (!use_callchain)
+ continue;
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL) {
+ u64 total = he->stat.period;
+
+ if (symbol_conf.cumulate_callchain)
+ total = he->stat_acc->period;
+
+ min_callchain_hits = total * (callchain_param.min_percent / 100);
+ }
+
+ callchain_param.sort(&he->sorted_chain, he->callchain,
+ min_callchain_hits, &callchain_param);
+ }
+}
+
+static void __hists__insert_output_entry(struct rb_root_cached *entries,
+ struct hist_entry *he,
+ u64 min_callchain_hits,
+ bool use_callchain)
+{
+ struct rb_node **p = &entries->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ struct perf_hpp_fmt *fmt;
+ bool leftmost = true;
+
+ if (use_callchain) {
+ if (callchain_param.mode == CHAIN_GRAPH_REL) {
+ u64 total = he->stat.period;
+
+ if (symbol_conf.cumulate_callchain)
+ total = he->stat_acc->period;
+
+ min_callchain_hits = total * (callchain_param.min_percent / 100);
+ }
+ callchain_param.sort(&he->sorted_chain, he->callchain,
+ min_callchain_hits, &callchain_param);
+ }
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node);
+
+ if (hist_entry__sort(he, iter) > 0)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node(&he->rb_node, parent, p);
+ rb_insert_color_cached(&he->rb_node, entries, leftmost);
+
+ /* update column width of dynamic entries */
+ perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+ if (fmt->init)
+ fmt->init(fmt, he);
+ }
+}
+
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+ bool use_callchain, hists__resort_cb_t cb,
+ void *cb_arg)
+{
+ struct rb_root_cached *root;
+ struct rb_node *next;
+ struct hist_entry *n;
+ u64 callchain_total;
+ u64 min_callchain_hits;
+
+ callchain_total = hists->callchain_period;
+ if (symbol_conf.filter_relative)
+ callchain_total = hists->callchain_non_filtered_period;
+
+ min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+ hists__reset_stats(hists);
+ hists__reset_col_len(hists);
+
+ if (symbol_conf.report_hierarchy) {
+ hists__hierarchy_output_resort(hists, prog,
+ &hists->entries_collapsed,
+ &hists->entries,
+ min_callchain_hits,
+ use_callchain);
+ hierarchy_recalc_total_periods(hists);
+ return;
+ }
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ next = rb_first_cached(root);
+ hists->entries = RB_ROOT_CACHED;
+
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&n->rb_node_in);
+
+ if (cb && cb(n, cb_arg))
+ continue;
+
+ __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
+ hists__inc_stats(hists, n);
+
+ if (!n->filtered)
+ hists__calc_col_len(hists, n);
+
+ if (prog)
+ ui_progress__update(prog, 1);
+ }
+}
+
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg)
+{
+ bool use_callchain;
+
+ if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+ use_callchain = evsel__has_callchain(evsel);
+ else
+ use_callchain = symbol_conf.use_callchain;
+
+ use_callchain |= symbol_conf.show_branchflag_count;
+
+ output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg);
+}
+
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
+{
+ return evsel__output_resort_cb(evsel, prog, NULL, NULL);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+ output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL);
+}
+
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+ hists__resort_cb_t cb)
+{
+ output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+ if (he->leaf || hmd == HMD_FORCE_SIBLING)
+ return false;
+
+ if (he->unfolded || hmd == HMD_FORCE_CHILD)
+ return true;
+
+ return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+ struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+ while (can_goto_child(he, HMD_NORMAL)) {
+ node = rb_last(&he->hroot_out.rb_root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ }
+ return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+ struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+ if (can_goto_child(he, hmd))
+ node = rb_first_cached(&he->hroot_out);
+ else
+ node = rb_next(node);
+
+ while (node == NULL) {
+ he = he->parent_he;
+ if (he == NULL)
+ break;
+
+ node = rb_next(&he->rb_node);
+ }
+ return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+ struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+ node = rb_prev(node);
+ if (node)
+ return rb_hierarchy_last(node);
+
+ he = he->parent_he;
+ if (he == NULL)
+ return NULL;
+
+ return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+ struct rb_node *node;
+ struct hist_entry *child;
+ float percent;
+
+ if (he->leaf)
+ return false;
+
+ node = rb_first_cached(&he->hroot_out);
+ child = rb_entry(node, struct hist_entry, rb_node);
+
+ while (node && child->filtered) {
+ node = rb_next(node);
+ child = rb_entry(node, struct hist_entry, rb_node);
+ }
+
+ if (node)
+ percent = hist_entry__get_percent_limit(child);
+ else
+ percent = 0;
+
+ return node && percent >= limit;
+}
+
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
+ enum hist_filter filter)
+{
+ h->filtered &= ~(1 << filter);
+
+ if (symbol_conf.report_hierarchy) {
+ struct hist_entry *parent = h->parent_he;
+
+ while (parent) {
+ he_stat__add_stat(&parent->stat, &h->stat);
+
+ parent->filtered &= ~(1 << filter);
+
+ if (parent->filtered)
+ goto next;
+
+ /* force fold unfiltered entry for simplicity */
+ parent->unfolded = false;
+ parent->has_no_entry = false;
+ parent->row_offset = 0;
+ parent->nr_rows = 0;
+next:
+ parent = parent->parent_he;
+ }
+ }
+
+ if (h->filtered)
+ return;
+
+ /* force fold unfiltered entry for simplicity */
+ h->unfolded = false;
+ h->has_no_entry = false;
+ h->row_offset = 0;
+ h->nr_rows = 0;
+
+ hists->stats.nr_non_filtered_samples += h->stat.nr_events;
+
+ hists__inc_filter_stats(hists, h);
+ hists__calc_col_len(hists, h);
+}
+
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (hists->dso_filter != NULL &&
+ (he->ms.map == NULL || map__dso(he->ms.map) != hists->dso_filter)) {
+ he->filtered |= (1 << HIST_FILTER__DSO);
+ return true;
+ }
+
+ return false;
+}
+
+static bool hists__filter_entry_by_thread(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (hists->thread_filter != NULL &&
+ RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(hists->thread_filter)) {
+ he->filtered |= (1 << HIST_FILTER__THREAD);
+ return true;
+ }
+
+ return false;
+}
+
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (hists->symbol_filter_str != NULL &&
+ (!he->ms.sym || strstr(he->ms.sym->name,
+ hists->symbol_filter_str) == NULL)) {
+ he->filtered |= (1 << HIST_FILTER__SYMBOL);
+ return true;
+ }
+
+ return false;
+}
+
+static bool hists__filter_entry_by_socket(struct hists *hists,
+ struct hist_entry *he)
+{
+ if ((hists->socket_filter > -1) &&
+ (he->socket != hists->socket_filter)) {
+ he->filtered |= (1 << HIST_FILTER__SOCKET);
+ return true;
+ }
+
+ return false;
+}
+
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
+{
+ struct rb_node *nd;
+
+ hists->stats.nr_non_filtered_samples = 0;
+
+ hists__reset_filter_stats(hists);
+ hists__reset_col_len(hists);
+
+ for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+ if (filter(hists, h))
+ continue;
+
+ hists__remove_entry_filter(hists, h, type);
+ }
+}
+
+static void resort_filtered_entry(struct rb_root_cached *root,
+ struct hist_entry *he)
+{
+ struct rb_node **p = &root->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ struct rb_root_cached new_root = RB_ROOT_CACHED;
+ struct rb_node *nd;
+ bool leftmost = true;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node);
+
+ if (hist_entry__sort(he, iter) > 0)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node(&he->rb_node, parent, p);
+ rb_insert_color_cached(&he->rb_node, root, leftmost);
+
+ if (he->leaf || he->filtered)
+ return;
+
+ nd = rb_first_cached(&he->hroot_out);
+ while (nd) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+ nd = rb_next(nd);
+ rb_erase_cached(&h->rb_node, &he->hroot_out);
+
+ resort_filtered_entry(&new_root, h);
+ }
+
+ he->hroot_out = new_root;
+}
+
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
+{
+ struct rb_node *nd;
+ struct rb_root_cached new_root = RB_ROOT_CACHED;
+
+ hists->stats.nr_non_filtered_samples = 0;
+
+ hists__reset_filter_stats(hists);
+ hists__reset_col_len(hists);
+
+ nd = rb_first_cached(&hists->entries);
+ while (nd) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ int ret;
+
+ ret = hist_entry__filter(h, type, arg);
+
+ /*
+ * case 1. non-matching type
+ * zero out the period, set filter marker and move to child
+ */
+ if (ret < 0) {
+ memset(&h->stat, 0, sizeof(h->stat));
+ h->filtered |= (1 << type);
+
+ nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+ }
+ /*
+ * case 2. matched type (filter out)
+ * set filter marker and move to next
+ */
+ else if (ret == 1) {
+ h->filtered |= (1 << type);
+
+ nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+ }
+ /*
+ * case 3. ok (not filtered)
+ * add period to hists and parents, erase the filter marker
+ * and move to next sibling
+ */
+ else {
+ hists__remove_entry_filter(hists, h, type);
+
+ nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+ }
+ }
+
+ hierarchy_recalc_total_periods(hists);
+
+ /*
+ * resort output after applying a new filter since filter in a lower
+ * hierarchy can change periods in a upper hierarchy.
+ */
+ nd = rb_first_cached(&hists->entries);
+ while (nd) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+ nd = rb_next(nd);
+ rb_erase_cached(&h->rb_node, &hists->entries);
+
+ resort_filtered_entry(&new_root, h);
+ }
+
+ hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+ hists->thread_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__THREAD,
+ hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+ hists->dso_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__DSO,
+ hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+ hists->symbol_filter_str);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+ hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+ &hists->socket_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+ hists__filter_entry_by_socket);
+}
+
+void events_stats__inc(struct events_stats *stats, u32 type)
+{
+ ++stats->nr_events[0];
+ ++stats->nr_events[type];
+}
+
+static void hists_stats__inc(struct hists_stats *stats)
+{
+ ++stats->nr_samples;
+}
+
+void hists__inc_nr_events(struct hists *hists)
+{
+ hists_stats__inc(&hists->stats);
+}
+
+void hists__inc_nr_samples(struct hists *hists, bool filtered)
+{
+ hists_stats__inc(&hists->stats);
+ if (!filtered)
+ hists->stats.nr_non_filtered_samples++;
+}
+
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost)
+{
+ hists->stats.nr_lost_samples += lost;
+}
+
+static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
+ struct hist_entry *pair)
+{
+ struct rb_root_cached *root;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct hist_entry *he;
+ int64_t cmp;
+ bool leftmost = true;
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ p = &root->rb_root.rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ cmp = hist_entry__collapse(he, pair);
+
+ if (!cmp)
+ goto out;
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+
+ he = hist_entry__new(pair, true);
+ if (he) {
+ memset(&he->stat, 0, sizeof(he->stat));
+ he->hists = hists;
+ if (symbol_conf.cumulate_callchain)
+ memset(he->stat_acc, 0, sizeof(he->stat));
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color_cached(&he->rb_node_in, root, leftmost);
+ hists__inc_stats(hists, he);
+ he->dummy = true;
+ }
+out:
+ return he;
+}
+
+static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
+ struct rb_root_cached *root,
+ struct hist_entry *pair)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct hist_entry *he;
+ struct perf_hpp_fmt *fmt;
+ bool leftmost = true;
+
+ p = &root->rb_root.rb_node;
+ while (*p != NULL) {
+ int64_t cmp = 0;
+
+ parent = *p;
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, he, pair);
+ if (cmp)
+ break;
+ }
+ if (!cmp)
+ goto out;
+
+ if (cmp < 0)
+ p = &parent->rb_left;
+ else {
+ p = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ he = hist_entry__new(pair, true);
+ if (he) {
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color_cached(&he->rb_node_in, root, leftmost);
+
+ he->dummy = true;
+ he->hists = hists;
+ memset(&he->stat, 0, sizeof(he->stat));
+ hists__inc_stats(hists, he);
+ }
+out:
+ return he;
+}
+
+static struct hist_entry *hists__find_entry(struct hists *hists,
+ struct hist_entry *he)
+{
+ struct rb_node *n;
+
+ if (hists__has(hists, need_collapse))
+ n = hists->entries_collapsed.rb_root.rb_node;
+ else
+ n = hists->entries_in->rb_root.rb_node;
+
+ while (n) {
+ struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
+ int64_t cmp = hist_entry__collapse(iter, he);
+
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return iter;
+ }
+
+ return NULL;
+}
+
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *root,
+ struct hist_entry *he)
+{
+ struct rb_node *n = root->rb_root.rb_node;
+
+ while (n) {
+ struct hist_entry *iter;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ iter = rb_entry(n, struct hist_entry, rb_node_in);
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, iter, he);
+ if (cmp)
+ break;
+ }
+
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return iter;
+ }
+
+ return NULL;
+}
+
+static void hists__match_hierarchy(struct rb_root_cached *leader_root,
+ struct rb_root_cached *other_root)
+{
+ struct rb_node *nd;
+ struct hist_entry *pos, *pair;
+
+ for (nd = rb_first_cached(leader_root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+ pair = hists__find_hierarchy_entry(other_root, pos);
+
+ if (pair) {
+ hist_entry__add_pair(pair, pos);
+ hists__match_hierarchy(&pos->hroot_in, &pair->hroot_in);
+ }
+ }
+}
+
+/*
+ * Look for pairs to link to the leader buckets (hist_entries):
+ */
+void hists__match(struct hists *leader, struct hists *other)
+{
+ struct rb_root_cached *root;
+ struct rb_node *nd;
+ struct hist_entry *pos, *pair;
+
+ if (symbol_conf.report_hierarchy) {
+ /* hierarchy report always collapses entries */
+ return hists__match_hierarchy(&leader->entries_collapsed,
+ &other->entries_collapsed);
+ }
+
+ if (hists__has(leader, need_collapse))
+ root = &leader->entries_collapsed;
+ else
+ root = leader->entries_in;
+
+ for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+ pair = hists__find_entry(other, pos);
+
+ if (pair)
+ hist_entry__add_pair(pair, pos);
+ }
+}
+
+static int hists__link_hierarchy(struct hists *leader_hists,
+ struct hist_entry *parent,
+ struct rb_root_cached *leader_root,
+ struct rb_root_cached *other_root)
+{
+ struct rb_node *nd;
+ struct hist_entry *pos, *leader;
+
+ for (nd = rb_first_cached(other_root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+ if (hist_entry__has_pairs(pos)) {
+ bool found = false;
+
+ list_for_each_entry(leader, &pos->pairs.head, pairs.node) {
+ if (leader->hists == leader_hists) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return -1;
+ } else {
+ leader = add_dummy_hierarchy_entry(leader_hists,
+ leader_root, pos);
+ if (leader == NULL)
+ return -1;
+
+ /* do not point parent in the pos */
+ leader->parent_he = parent;
+
+ hist_entry__add_pair(pos, leader);
+ }
+
+ if (!pos->leaf) {
+ if (hists__link_hierarchy(leader_hists, leader,
+ &leader->hroot_in,
+ &pos->hroot_in) < 0)
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Look for entries in the other hists that are not present in the leader, if
+ * we find them, just add a dummy entry on the leader hists, with period=0,
+ * nr_events=0, to serve as the list header.
+ */
+int hists__link(struct hists *leader, struct hists *other)
+{
+ struct rb_root_cached *root;
+ struct rb_node *nd;
+ struct hist_entry *pos, *pair;
+
+ if (symbol_conf.report_hierarchy) {
+ /* hierarchy report always collapses entries */
+ return hists__link_hierarchy(leader, NULL,
+ &leader->entries_collapsed,
+ &other->entries_collapsed);
+ }
+
+ if (hists__has(other, need_collapse))
+ root = &other->entries_collapsed;
+ else
+ root = other->entries_in;
+
+ for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+ if (!hist_entry__has_pairs(pos)) {
+ pair = hists__add_dummy_entry(leader, pos);
+ if (pair == NULL)
+ return -1;
+ hist_entry__add_pair(pos, pair);
+ }
+ }
+
+ return 0;
+}
+
+int hists__unlink(struct hists *hists)
+{
+ struct rb_root_cached *root;
+ struct rb_node *nd;
+ struct hist_entry *pos;
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+ list_del_init(&pos->pairs.node);
+ }
+
+ return 0;
+}
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample, bool nonany_branch_mode,
+ u64 *total_cycles)
+{
+ struct branch_info *bi;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+
+ /* If we have branch cycles always annotate them. */
+ if (bs && bs->nr && entries[0].flags.cycles) {
+ bi = sample__resolve_bstack(sample, al);
+ if (bi) {
+ struct addr_map_symbol *prev = NULL;
+
+ /*
+ * Ignore errors, still want to process the
+ * other entries.
+ *
+ * For non standard branch modes always
+ * force no IPC (prev == NULL)
+ *
+ * Note that perf stores branches reversed from
+ * program order!
+ */
+ for (int i = bs->nr - 1; i >= 0; i--) {
+ addr_map_symbol__account_cycles(&bi[i].from,
+ nonany_branch_mode ? NULL : prev,
+ bi[i].flags.cycles);
+ prev = &bi[i].to;
+
+ if (total_cycles)
+ *total_cycles += bi[i].flags.cycles;
+ }
+ for (unsigned int i = 0; i < bs->nr; i++) {
+ map__put(bi[i].to.ms.map);
+ maps__put(bi[i].to.ms.maps);
+ map__put(bi[i].from.ms.map);
+ maps__put(bi[i].from.ms.maps);
+ }
+ free(bi);
+ }
+ }
+}
+
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp,
+ bool skip_empty)
+{
+ struct evsel *pos;
+ size_t ret = 0;
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+
+ if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples)
+ continue;
+
+ ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
+ if (hists->stats.nr_samples)
+ ret += fprintf(fp, "%16s events: %10d\n",
+ "SAMPLE", hists->stats.nr_samples);
+ if (hists->stats.nr_lost_samples)
+ ret += fprintf(fp, "%16s events: %10d\n",
+ "LOST_SAMPLES", hists->stats.nr_lost_samples);
+ }
+
+ return ret;
+}
+
+
+u64 hists__total_period(struct hists *hists)
+{
+ return symbol_conf.filter_relative ? hists->stats.total_non_filtered_period :
+ hists->stats.total_period;
+}
+
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
+{
+ char unit;
+ int printed;
+ const struct dso *dso = hists->dso_filter;
+ struct thread *thread = hists->thread_filter;
+ int socket_id = hists->socket_filter;
+ unsigned long nr_samples = hists->stats.nr_samples;
+ u64 nr_events = hists->stats.total_period;
+ struct evsel *evsel = hists_to_evsel(hists);
+ const char *ev_name = evsel__name(evsel);
+ char buf[512], sample_freq_str[64] = "";
+ size_t buflen = sizeof(buf);
+ char ref[30] = " show reference callgraph, ";
+ bool enable_ref = false;
+
+ if (symbol_conf.filter_relative) {
+ nr_samples = hists->stats.nr_non_filtered_samples;
+ nr_events = hists->stats.total_non_filtered_period;
+ }
+
+ if (evsel__is_group_event(evsel)) {
+ struct evsel *pos;
+
+ evsel__group_desc(evsel, buf, buflen);
+ ev_name = buf;
+
+ for_each_group_member(pos, evsel) {
+ struct hists *pos_hists = evsel__hists(pos);
+
+ if (symbol_conf.filter_relative) {
+ nr_samples += pos_hists->stats.nr_non_filtered_samples;
+ nr_events += pos_hists->stats.total_non_filtered_period;
+ } else {
+ nr_samples += pos_hists->stats.nr_samples;
+ nr_events += pos_hists->stats.total_period;
+ }
+ }
+ }
+
+ if (symbol_conf.show_ref_callgraph &&
+ strstr(ev_name, "call-graph=no"))
+ enable_ref = true;
+
+ if (show_freq)
+ scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->core.attr.sample_freq);
+
+ nr_samples = convert_unit(nr_samples, &unit);
+ printed = scnprintf(bf, size,
+ "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, evsel->core.nr_members > 1 ? "s" : "",
+ ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+
+
+ if (hists->uid_filter_str)
+ printed += snprintf(bf + printed, size - printed,
+ ", UID: %s", hists->uid_filter_str);
+ if (thread) {
+ if (hists__has(hists, thread)) {
+ printed += scnprintf(bf + printed, size - printed,
+ ", Thread: %s(%d)",
+ (thread__comm_set(thread) ? thread__comm_str(thread) : ""),
+ thread__tid(thread));
+ } else {
+ printed += scnprintf(bf + printed, size - printed,
+ ", Thread: %s",
+ (thread__comm_set(thread) ? thread__comm_str(thread) : ""));
+ }
+ }
+ if (dso)
+ printed += scnprintf(bf + printed, size - printed,
+ ", DSO: %s", dso->short_name);
+ if (socket_id > -1)
+ printed += scnprintf(bf + printed, size - printed,
+ ", Processor Socket: %d", socket_id);
+
+ return printed;
+}
+
+int parse_filter_percentage(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!strcmp(arg, "relative"))
+ symbol_conf.filter_relative = true;
+ else if (!strcmp(arg, "absolute"))
+ symbol_conf.filter_relative = false;
+ else {
+ pr_debug("Invalid percentage: %s\n", arg);
+ return -1;
+ }
+
+ return 0;
+}
+
+int perf_hist_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "hist.percentage"))
+ return parse_filter_percentage(NULL, value, 0);
+
+ return 0;
+}
+
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
+{
+ memset(hists, 0, sizeof(*hists));
+ hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT_CACHED;
+ hists->entries_in = &hists->entries_in_array[0];
+ hists->entries_collapsed = RB_ROOT_CACHED;
+ hists->entries = RB_ROOT_CACHED;
+ mutex_init(&hists->lock);
+ hists->socket_filter = -1;
+ hists->hpp_list = hpp_list;
+ INIT_LIST_HEAD(&hists->hpp_formats);
+ return 0;
+}
+
+static void hists__delete_remaining_entries(struct rb_root_cached *root)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ while (!RB_EMPTY_ROOT(&root->rb_root)) {
+ node = rb_first_cached(root);
+ rb_erase_cached(node, root);
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+ hist_entry__delete(he);
+ }
+}
+
+static void hists__delete_all_entries(struct hists *hists)
+{
+ hists__delete_entries(hists);
+ hists__delete_remaining_entries(&hists->entries_in_array[0]);
+ hists__delete_remaining_entries(&hists->entries_in_array[1]);
+ hists__delete_remaining_entries(&hists->entries_collapsed);
+}
+
+static void hists_evsel__exit(struct evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+ struct perf_hpp_fmt *fmt, *pos;
+ struct perf_hpp_list_node *node, *tmp;
+
+ hists__delete_all_entries(hists);
+
+ list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+ perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+ list_del_init(&fmt->list);
+ free(fmt);
+ }
+ list_del_init(&node->list);
+ free(node);
+ }
+}
+
+static int hists_evsel__init(struct evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+
+ __hists__init(hists, &perf_hpp_list);
+ return 0;
+}
+
+/*
+ * XXX We probably need a hists_evsel__exit() to free the hist_entries
+ * stored in the rbtree...
+ */
+
+int hists__init(void)
+{
+ int err = evsel__object_config(sizeof(struct hists_evsel),
+ hists_evsel__init, hists_evsel__exit);
+ if (err)
+ fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
+
+ return err;
+}
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+ INIT_LIST_HEAD(&list->fields);
+ INIT_LIST_HEAD(&list->sorts);
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
new file mode 100644
index 000000000..afc9f1c7f
--- /dev/null
+++ b/tools/perf/util/hist.h
@@ -0,0 +1,600 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HIST_H
+#define __PERF_HIST_H
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+#include "evsel.h"
+#include "color.h"
+#include "events_stats.h"
+#include "mutex.h"
+
+struct hist_entry;
+struct hist_entry_ops;
+struct addr_location;
+struct map_symbol;
+struct mem_info;
+struct kvm_info;
+struct branch_info;
+struct branch_stack;
+struct block_info;
+struct symbol;
+struct ui_progress;
+
+enum hist_filter {
+ HIST_FILTER__DSO,
+ HIST_FILTER__THREAD,
+ HIST_FILTER__PARENT,
+ HIST_FILTER__SYMBOL,
+ HIST_FILTER__GUEST,
+ HIST_FILTER__HOST,
+ HIST_FILTER__SOCKET,
+ HIST_FILTER__C2C,
+};
+
+enum hist_column {
+ HISTC_SYMBOL,
+ HISTC_TIME,
+ HISTC_DSO,
+ HISTC_THREAD,
+ HISTC_COMM,
+ HISTC_CGROUP_ID,
+ HISTC_CGROUP,
+ HISTC_PARENT,
+ HISTC_CPU,
+ HISTC_SOCKET,
+ HISTC_SRCLINE,
+ HISTC_SRCFILE,
+ HISTC_MISPREDICT,
+ HISTC_IN_TX,
+ HISTC_ABORT,
+ HISTC_SYMBOL_FROM,
+ HISTC_SYMBOL_TO,
+ HISTC_DSO_FROM,
+ HISTC_DSO_TO,
+ HISTC_LOCAL_WEIGHT,
+ HISTC_GLOBAL_WEIGHT,
+ HISTC_CODE_PAGE_SIZE,
+ HISTC_MEM_DADDR_SYMBOL,
+ HISTC_MEM_DADDR_DSO,
+ HISTC_MEM_PHYS_DADDR,
+ HISTC_MEM_DATA_PAGE_SIZE,
+ HISTC_MEM_LOCKED,
+ HISTC_MEM_TLB,
+ HISTC_MEM_LVL,
+ HISTC_MEM_SNOOP,
+ HISTC_MEM_DCACHELINE,
+ HISTC_MEM_IADDR_SYMBOL,
+ HISTC_TRANSACTION,
+ HISTC_CYCLES,
+ HISTC_SRCLINE_FROM,
+ HISTC_SRCLINE_TO,
+ HISTC_TRACE,
+ HISTC_SYM_SIZE,
+ HISTC_DSO_SIZE,
+ HISTC_SYMBOL_IPC,
+ HISTC_MEM_BLOCKED,
+ HISTC_LOCAL_INS_LAT,
+ HISTC_GLOBAL_INS_LAT,
+ HISTC_LOCAL_P_STAGE_CYC,
+ HISTC_GLOBAL_P_STAGE_CYC,
+ HISTC_ADDR_FROM,
+ HISTC_ADDR_TO,
+ HISTC_ADDR,
+ HISTC_SIMD,
+ HISTC_NR_COLS, /* Last entry */
+};
+
+struct thread;
+struct dso;
+
+struct hists {
+ struct rb_root_cached entries_in_array[2];
+ struct rb_root_cached *entries_in;
+ struct rb_root_cached entries;
+ struct rb_root_cached entries_collapsed;
+ u64 nr_entries;
+ u64 nr_non_filtered_entries;
+ u64 callchain_period;
+ u64 callchain_non_filtered_period;
+ struct thread *thread_filter;
+ const struct dso *dso_filter;
+ const char *uid_filter_str;
+ const char *symbol_filter_str;
+ struct mutex lock;
+ struct hists_stats stats;
+ u64 event_stream;
+ u16 col_len[HISTC_NR_COLS];
+ bool has_callchains;
+ int socket_filter;
+ struct perf_hpp_list *hpp_list;
+ struct list_head hpp_formats;
+ int nr_hpp_node;
+};
+
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
+struct hist_entry_iter;
+
+struct hist_iter_ops {
+ int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
+};
+
+struct hist_entry_iter {
+ int total;
+ int curr;
+
+ bool hide_unresolved;
+
+ struct evsel *evsel;
+ struct perf_sample *sample;
+ struct hist_entry *he;
+ struct symbol *parent;
+ void *priv;
+
+ const struct hist_iter_ops *ops;
+ /* user-defined callback function (optional) */
+ int (*add_entry_cb)(struct hist_entry_iter *iter,
+ struct addr_location *al, bool single, void *arg);
+};
+
+extern const struct hist_iter_ops hist_iter_normal;
+extern const struct hist_iter_ops hist_iter_branch;
+extern const struct hist_iter_ops hist_iter_mem;
+extern const struct hist_iter_ops hist_iter_cumulative;
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct kvm_info *ki,
+ struct perf_sample *sample,
+ bool sample_self);
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+ struct hist_entry_ops *ops,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct kvm_info *ki,
+ struct perf_sample *sample,
+ bool sample_self);
+
+struct hist_entry *hists__add_entry_block(struct hists *hists,
+ struct addr_location *al,
+ struct block_info *bi);
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+ int max_stack_depth, void *arg);
+
+struct perf_hpp;
+struct perf_hpp_fmt;
+
+int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+int hist_entry__transaction_len(void);
+int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
+ struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_fmt *fmt, int printed);
+void hist_entry__delete(struct hist_entry *he);
+
+typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
+
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg);
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
+void hists__output_resort(struct hists *hists, struct ui_progress *prog);
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+ hists__resort_cb_t cb);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
+void hists__delete_entries(struct hists *hists);
+void hists__output_recalc_col_len(struct hists *hists, int max_rows);
+
+struct hist_entry *hists__get_entry(struct hists *hists, int idx);
+
+u64 hists__total_period(struct hists *hists);
+void hists__reset_stats(struct hists *hists);
+void hists__inc_stats(struct hists *hists, struct hist_entry *h);
+void hists__inc_nr_events(struct hists *hists);
+void hists__inc_nr_samples(struct hists *hists, bool filtered);
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost);
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+ int max_cols, float min_pcnt, FILE *fp,
+ bool ignore_callchains);
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp,
+ bool skip_empty);
+
+void hists__filter_by_dso(struct hists *hists);
+void hists__filter_by_thread(struct hists *hists);
+void hists__filter_by_symbol(struct hists *hists);
+void hists__filter_by_socket(struct hists *hists);
+
+static inline bool hists__has_filter(struct hists *hists)
+{
+ return hists->thread_filter || hists->dso_filter ||
+ hists->symbol_filter_str || (hists->socket_filter > -1);
+}
+
+u16 hists__col_len(struct hists *hists, enum hist_column col);
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len);
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len);
+void hists__reset_col_len(struct hists *hists);
+void hists__calc_col_len(struct hists *hists, struct hist_entry *he);
+
+void hists__match(struct hists *leader, struct hists *other);
+int hists__link(struct hists *leader, struct hists *other);
+int hists__unlink(struct hists *hists);
+
+struct hists_evsel {
+ struct evsel evsel;
+ struct hists hists;
+};
+
+static inline struct evsel *hists_to_evsel(struct hists *hists)
+{
+ struct hists_evsel *hevsel = container_of(hists, struct hists_evsel, hists);
+ return &hevsel->evsel;
+}
+
+static inline struct hists *evsel__hists(struct evsel *evsel)
+{
+ struct hists_evsel *hevsel = (struct hists_evsel *)evsel;
+ return &hevsel->hists;
+}
+
+static __pure inline bool hists__has_callchains(struct hists *hists)
+{
+ return hists->has_callchains;
+}
+
+int hists__init(void);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
+
+struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists);
+
+struct perf_hpp {
+ char *buf;
+ size_t size;
+ const char *sep;
+ void *ptr;
+ bool skip;
+};
+
+struct perf_hpp_fmt {
+ const char *name;
+ int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists, int line, int *span);
+ int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists);
+ void (*init)(struct perf_hpp_fmt *fmt, struct hist_entry *he);
+ int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he);
+ int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he);
+ int64_t (*cmp)(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b);
+ int64_t (*collapse)(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b);
+ int64_t (*sort)(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b);
+ bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+ void (*free)(struct perf_hpp_fmt *fmt);
+
+ struct list_head list;
+ struct list_head sort_list;
+ bool elide;
+ int len;
+ int user_len;
+ int idx;
+ int level;
+};
+
+struct perf_hpp_list {
+ struct list_head fields;
+ struct list_head sorts;
+
+ int nr_header_lines;
+ int need_collapse;
+ int parent;
+ int sym;
+ int dso;
+ int socket;
+ int thread;
+ int comm;
+};
+
+extern struct perf_hpp_list perf_hpp_list;
+
+struct perf_hpp_list_node {
+ struct list_head list;
+ struct perf_hpp_list hpp;
+ int level;
+ bool skip;
+};
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format);
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format);
+
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+ perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+ perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
+{
+ perf_hpp_list__prepend_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+ list_for_each_entry(format, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp) \
+ list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+ list_for_each_entry(format, &(_list)->sorts, sort_list)
+
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp) \
+ list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
+
+#define hists__for_each_format(hists, format) \
+ perf_hpp_list__for_each_format((hists)->hpp_list, format)
+
+#define hists__for_each_sort_list(hists, format) \
+ perf_hpp_list__for_each_sort_list((hists)->hpp_list, format)
+
+extern struct perf_hpp_fmt perf_hpp__format[];
+
+enum {
+ /* Matches perf_hpp__format array. */
+ PERF_HPP__OVERHEAD,
+ PERF_HPP__OVERHEAD_SYS,
+ PERF_HPP__OVERHEAD_US,
+ PERF_HPP__OVERHEAD_GUEST_SYS,
+ PERF_HPP__OVERHEAD_GUEST_US,
+ PERF_HPP__OVERHEAD_ACC,
+ PERF_HPP__SAMPLES,
+ PERF_HPP__PERIOD,
+
+ PERF_HPP__MAX_INDEX
+};
+
+void perf_hpp__init(void);
+void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+ struct evlist *evlist);
+
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
+
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
+ struct hists *hists)
+{
+ if (format->elide)
+ return true;
+
+ if (perf_hpp__is_dynamic_entry(format) &&
+ !perf_hpp__defined_dynamic_entry(format, hists))
+ return true;
+
+ return false;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
+
+typedef u64 (*hpp_field_fn)(struct hist_entry *he);
+typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
+typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
+
+int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, hpp_field_fn get_field,
+ const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, hpp_field_fn get_field,
+ const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+
+static inline void advance_hpp(struct perf_hpp *hpp, int inc)
+{
+ hpp->buf += inc;
+ hpp->size -= inc;
+}
+
+static inline size_t perf_hpp__use_color(void)
+{
+ return !symbol_conf.field_sep;
+}
+
+static inline size_t perf_hpp__color_overhead(void)
+{
+ return perf_hpp__use_color() ?
+ (COLOR_MAXLEN + sizeof(PERF_COLOR_RESET)) * PERF_HPP__MAX_INDEX
+ : 0;
+}
+
+struct evlist;
+
+struct hist_browser_timer {
+ void (*timer)(void *arg);
+ void *arg;
+ int refresh;
+};
+
+struct annotation_options;
+struct res_sample;
+
+enum rstype {
+ A_NORMAL,
+ A_ASM,
+ A_SOURCE
+};
+
+struct block_hist;
+
+#ifdef HAVE_SLANG_SUPPORT
+#include "../ui/keysyms.h"
+void attr_to_script(char *buf, struct perf_event_attr *attr);
+
+int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *annotation_opts);
+
+int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *annotation_opts);
+
+int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
+ float min_pcnt, struct perf_env *env, bool warn_lost_event,
+ struct annotation_options *annotation_options);
+
+int script_browse(const char *script_opt, struct evsel *evsel);
+
+void run_script(char *cmd);
+int res_sample_browse(struct res_sample *res_samples, int num_res,
+ struct evsel *evsel, enum rstype rstype);
+void res_sample_init(void);
+
+int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
+ float min_percent, struct perf_env *env,
+ struct annotation_options *annotation_opts);
+#else
+static inline
+int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
+ const char *help __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ float min_pcnt __maybe_unused,
+ struct perf_env *env __maybe_unused,
+ bool warn_lost_event __maybe_unused,
+ struct annotation_options *annotation_options __maybe_unused)
+{
+ return 0;
+}
+static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ struct annotation_options *annotation_options __maybe_unused)
+{
+ return 0;
+}
+
+static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ struct annotation_options *annotation_opts __maybe_unused)
+{
+ return 0;
+}
+
+static inline int script_browse(const char *script_opt __maybe_unused,
+ struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+static inline int res_sample_browse(struct res_sample *res_samples __maybe_unused,
+ int num_res __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ enum rstype rstype __maybe_unused)
+{
+ return 0;
+}
+
+static inline void res_sample_init(void) {}
+
+static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ float min_percent __maybe_unused,
+ struct perf_env *env __maybe_unused,
+ struct annotation_options *annotation_opts __maybe_unused)
+{
+ return 0;
+}
+
+#define K_LEFT -1000
+#define K_RIGHT -2000
+#define K_SWITCH_INPUT_DATA -3000
+#define K_RELOAD -4000
+#endif
+
+unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample, bool nonany_branch_mode,
+ u64 *total_cycles);
+
+struct option;
+int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
+int perf_hist_config(const char *var, const char *value);
+
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+ HMD_NORMAL,
+ HMD_FORCE_SIBLING,
+ HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+ enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+ return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT 3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq);
+
+static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t size)
+{
+ return __hists__scnprintf_title(hists, bf, size, true);
+}
+
+#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h
new file mode 100644
index 000000000..3aff4cf44
--- /dev/null
+++ b/tools/perf/util/include/asm/asm-offsets.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* stub */
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 000000000..2270481c7
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 000000000..e9876be63
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
+
+#endif /* PERF_DWARF2_H */
+
diff --git a/tools/perf/util/include/asm/swab.h b/tools/perf/util/include/asm/swab.h
new file mode 100644
index 000000000..ed5389425
--- /dev/null
+++ b/tools/perf/util/include/asm/swab.h
@@ -0,0 +1 @@
+/* stub */
diff --git a/tools/perf/util/include/asm/system.h b/tools/perf/util/include/asm/system.h
new file mode 100644
index 000000000..710cecca9
--- /dev/null
+++ b/tools/perf/util/include/asm/system.h
@@ -0,0 +1 @@
+/* Empty */
diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h
new file mode 100644
index 000000000..548100315
--- /dev/null
+++ b/tools/perf/util/include/asm/uaccess.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_ASM_UACCESS_H_
+#define _PERF_ASM_UACCESS_H_
+
+#define __get_user(src, dest) \
+({ \
+ (src) = *dest; \
+ 0; \
+})
+
+#define get_user __get_user
+
+#define access_ok(addr, size) 1
+
+#endif
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
new file mode 100644
index 000000000..7d99a084e
--- /dev/null
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_DWARF_REGS_H_
+#define _PERF_DWARF_REGS_H_
+
+#ifdef HAVE_DWARF_SUPPORT
+const char *get_arch_regstr(unsigned int n);
+/*
+ * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
+ * n: DWARF register number
+ * machine: ELF machine signature (EM_*)
+ */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
+#endif
+
+#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+/*
+ * Arch should support fetching the offset of a register in pt_regs
+ * by its name. See kernel's regs_query_register_offset in
+ * arch/xxx/kernel/ptrace.c.
+ */
+int regs_query_register_offset(const char *name);
+#endif
+#endif
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 000000000..75e224841
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+/* Some toolchains use other characters (e.g. '`') to mark new line in macro */
+#ifndef ASM_NL
+#define ASM_NL ;
+#endif
+
+#ifndef __ALIGN
+#define __ALIGN .align 4,0x90
+#define __ALIGN_STR ".align 4,0x90"
+#endif
+
+/* SYM_T_FUNC -- type used by assembler to mark functions */
+#ifndef SYM_T_FUNC
+#define SYM_T_FUNC STT_FUNC
+#endif
+
+/* SYM_A_* -- align the symbol? */
+#define SYM_A_ALIGN ALIGN
+
+/* SYM_L_* -- linkage of symbols */
+#define SYM_L_GLOBAL(name) .globl name
+#define SYM_L_WEAK(name) .weak name
+#define SYM_L_LOCAL(name) /* nothing */
+
+#define ALIGN __ALIGN
+
+/* === generic annotations === */
+
+/* SYM_ENTRY -- use only if you have to for non-paired symbols */
+#ifndef SYM_ENTRY
+#define SYM_ENTRY(name, linkage, align...) \
+ linkage(name) ASM_NL \
+ align ASM_NL \
+ name:
+#endif
+
+/* SYM_START -- use only if you have to */
+#ifndef SYM_START
+#define SYM_START(name, linkage, align...) \
+ SYM_ENTRY(name, linkage, align)
+#endif
+
+/* SYM_END -- use only if you have to */
+#ifndef SYM_END
+#define SYM_END(name, sym_type) \
+ .type name sym_type ASM_NL \
+ .set .L__sym_size_##name, .-name ASM_NL \
+ .size name, .-name
+#endif
+
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage) \
+ linkage(alias) ASM_NL \
+ .set alias, name ASM_NL \
+ .type alias sym_type ASM_NL \
+ .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \
+ .size alias, .L__sym_size_##alias
+#endif
+
+/* SYM_FUNC_START -- use for global functions */
+#ifndef SYM_FUNC_START
+#define SYM_FUNC_START(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+/* SYM_FUNC_START_LOCAL -- use for local functions */
+#ifndef SYM_FUNC_START_LOCAL
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
+#endif
+
+/* SYM_FUNC_START_WEAK -- use for weak functions */
+#ifndef SYM_FUNC_START_WEAK
+#define SYM_FUNC_START_WEAK(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN)
+#endif
+
+/*
+ * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
+ * SYM_FUNC_START_WEAK, ...
+ */
+#ifndef SYM_FUNC_END
+#define SYM_FUNC_END(name) \
+ SYM_END(name, SYM_T_FUNC)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
+// In the kernel sources (include/linux/cfi_types.h), this has a different
+// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang
+// definition:
+#ifndef SYM_TYPED_START
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_START(name, linkage, align)
+#endif
+
+#ifndef SYM_TYPED_FUNC_START
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000..ec1b3bd9f
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,939 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/zalloc.h>
+
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "symbol.h"
+#include "session.h"
+#include "tool.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "tsc.h"
+#include "auxtrace.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-bts.h"
+#include "util/synthetic-events.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_BTS_ERR_NOINSN 5
+#define INTEL_BTS_ERR_LOST 9
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define le64_to_cpu bswap_64
+#else
+#define le64_to_cpu
+#endif
+
+struct intel_bts {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ bool sampling_mode;
+ bool snapshot_mode;
+ bool data_queued;
+ u32 pmu_type;
+ struct perf_tsc_conversion tc;
+ bool cap_user_time_zero;
+ struct itrace_synth_opts synth_opts;
+ bool sample_branches;
+ u32 branches_filter;
+ u64 branches_sample_type;
+ u64 branches_id;
+ size_t branches_event_size;
+ unsigned long num_events;
+};
+
+struct intel_bts_queue {
+ struct intel_bts *bts;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ u64 time;
+ struct intel_pt_insn intel_pt_insn;
+ u32 sample_flags;
+};
+
+struct branch {
+ u64 from;
+ u64 to;
+ u64 misc;
+};
+
+static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct branch *branch;
+ size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... Intel BTS data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ if (len >= br_sz)
+ sz = br_sz;
+ else
+ sz = len;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < sz; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < br_sz; i++)
+ color_fprintf(stdout, color, " ");
+ if (len >= br_sz) {
+ branch = (struct branch *)buf;
+ color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
+ le64_to_cpu(branch->from),
+ le64_to_cpu(branch->to),
+ le64_to_cpu(branch->misc) & 0x10 ?
+ "pred" : "miss");
+ } else {
+ color_fprintf(stdout, color, " Bad record!\n");
+ }
+ pos += sz;
+ buf += sz;
+ len -= sz;
+ }
+}
+
+static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ intel_bts_dump(bts, buf, len);
+}
+
+static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
+ sample->tid, 0, "Lost trace data", sample->time);
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq;
+
+ btsq = zalloc(sizeof(struct intel_bts_queue));
+ if (!btsq)
+ return NULL;
+
+ btsq->bts = bts;
+ btsq->queue_nr = queue_nr;
+ btsq->pid = -1;
+ btsq->tid = -1;
+ btsq->cpu = -1;
+
+ return btsq;
+}
+
+static int intel_bts_setup_queue(struct intel_bts *bts,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (!btsq) {
+ btsq = intel_bts_alloc_queue(bts, queue_nr);
+ if (!btsq)
+ return -ENOMEM;
+ queue->priv = btsq;
+
+ if (queue->cpu != -1)
+ btsq->cpu = queue->cpu;
+ btsq->tid = queue->tid;
+ }
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!btsq->on_heap && !btsq->buffer) {
+ int ret;
+
+ btsq->buffer = auxtrace_buffer__next(queue, NULL);
+ if (!btsq->buffer)
+ return 0;
+
+ ret = auxtrace_heap__add(&bts->heap, queue_nr,
+ btsq->buffer->reference);
+ if (ret)
+ return ret;
+ btsq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int intel_bts_setup_queues(struct intel_bts *bts)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < bts->queues.nr_queues; i++) {
+ ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
+ i);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static inline int intel_bts_update_queues(struct intel_bts *bts)
+{
+ if (bts->queues.new_data) {
+ bts->queues.new_data = false;
+ return intel_bts_setup_queues(bts);
+ }
+ return 0;
+}
+
+static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b)
+{
+ size_t offs, len;
+
+ if (len_a > len_b)
+ offs = len_a - len_b;
+ else
+ offs = 0;
+
+ for (; offs < len_a; offs += sizeof(struct branch)) {
+ len = len_a - offs;
+ if (!memcmp(buf_a + offs, buf_b, len))
+ return buf_b + len;
+ }
+
+ return buf_b;
+}
+
+static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *b)
+{
+ struct auxtrace_buffer *a;
+ void *start;
+
+ if (b->list.prev == &queue->head)
+ return 0;
+ a = list_entry(b->list.prev, struct auxtrace_buffer, list);
+ start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
+ if (!start)
+ return -EINVAL;
+ b->use_size = b->data + b->size - start;
+ b->use_data = start;
+ return 0;
+}
+
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+ return machine__kernel_ip(bts->machine, ip) ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
+static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int ret;
+ struct intel_bts *bts = btsq->bts;
+ union perf_event event;
+ struct perf_sample sample = { .ip = 0, };
+
+ if (bts->synth_opts.initial_skip &&
+ bts->num_events++ <= bts->synth_opts.initial_skip)
+ return 0;
+
+ sample.ip = le64_to_cpu(branch->from);
+ sample.cpumode = intel_bts_cpumode(bts, sample.ip);
+ sample.pid = btsq->pid;
+ sample.tid = btsq->tid;
+ sample.addr = le64_to_cpu(branch->to);
+ sample.id = btsq->bts->branches_id;
+ sample.stream_id = btsq->bts->branches_id;
+ sample.period = 1;
+ sample.cpu = btsq->cpu;
+ sample.flags = btsq->sample_flags;
+ sample.insn_len = btsq->intel_pt_insn.length;
+ memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
+
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = sample.cpumode;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
+ if (bts->synth_opts.inject) {
+ event.sample.header.size = bts->branches_event_size;
+ ret = perf_event__synthesize_sample(&event,
+ bts->branches_sample_type,
+ 0, &sample);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
+ if (ret)
+ pr_err("Intel BTS: failed to deliver branch event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
+{
+ struct machine *machine = btsq->bts->machine;
+ struct thread *thread;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+ ssize_t len;
+ bool x86_64;
+ int err = -1;
+
+ thread = machine__find_thread(machine, -1, btsq->tid);
+ if (!thread)
+ return -1;
+
+ len = thread__memcpy(thread, machine, buf, ip, INTEL_PT_INSN_BUF_SZ, &x86_64);
+ if (len <= 0)
+ goto out_put;
+
+ if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
+ goto out_put;
+
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
+ pid_t tid, u64 ip)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
+ "Failed to get instruction", 0);
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int err;
+
+ if (!branch->from) {
+ if (branch->to)
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+ else
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ } else if (!branch->to) {
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ btsq->intel_pt_insn.length = 0;
+ } else {
+ err = intel_bts_get_next_insn(btsq, branch->from);
+ if (err) {
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ if (!btsq->bts->synth_opts.errors)
+ return 0;
+ err = intel_bts_synth_error(btsq->bts, btsq->cpu,
+ btsq->pid, btsq->tid,
+ branch->from);
+ return err;
+ }
+ btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
+ /* Check for an async branch into the kernel */
+ if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
+ machine__kernel_ip(btsq->bts->machine, branch->to) &&
+ btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET))
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
+ struct auxtrace_buffer *buffer,
+ struct thread *thread)
+{
+ struct branch *branch;
+ size_t sz, bsz = sizeof(struct branch);
+ u32 filter = btsq->bts->branches_filter;
+ int err = 0;
+
+ if (buffer->use_data) {
+ sz = buffer->use_size;
+ branch = buffer->use_data;
+ } else {
+ sz = buffer->size;
+ branch = buffer->data;
+ }
+
+ if (!btsq->bts->sample_branches)
+ return 0;
+
+ for (; sz > bsz; branch += 1, sz -= bsz) {
+ if (!branch->from && !branch->to)
+ continue;
+ intel_bts_get_branch_type(btsq, branch);
+ if (btsq->bts->synth_opts.thread_stack)
+ thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
+ le64_to_cpu(branch->from),
+ le64_to_cpu(branch->to),
+ btsq->intel_pt_insn.length,
+ buffer->buffer_nr + 1, true, 0, 0);
+ if (filter && !(filter & btsq->sample_flags))
+ continue;
+ err = intel_bts_synth_branch_sample(btsq, branch);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
+{
+ struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
+ struct auxtrace_queue *queue;
+ struct thread *thread;
+ int err;
+
+ if (btsq->done)
+ return 1;
+
+ if (btsq->pid == -1) {
+ thread = machine__find_thread(btsq->bts->machine, -1,
+ btsq->tid);
+ if (thread)
+ btsq->pid = thread__pid(thread);
+ } else {
+ thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
+ btsq->tid);
+ }
+
+ queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
+
+ if (!buffer)
+ buffer = auxtrace_buffer__next(queue, NULL);
+
+ if (!buffer) {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ err = 1;
+ goto out_put;
+ }
+
+ /* Currently there is no support for split buffers */
+ if (buffer->consecutive) {
+ err = -EINVAL;
+ goto out_put;
+ }
+
+ if (!buffer->data) {
+ int fd = perf_data__fd(btsq->bts->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+ }
+
+ if (btsq->bts->snapshot_mode && !buffer->consecutive &&
+ intel_bts_do_fix_overlap(queue, buffer)) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+
+ if (!btsq->bts->synth_opts.callchain &&
+ !btsq->bts->synth_opts.thread_stack && thread &&
+ (!old_buffer || btsq->bts->sampling_mode ||
+ (btsq->bts->snapshot_mode && !buffer->consecutive)))
+ thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
+
+ err = intel_bts_process_buffer(btsq, buffer, thread);
+
+ auxtrace_buffer__drop_data(buffer);
+
+ btsq->buffer = auxtrace_buffer__next(queue, buffer);
+ if (btsq->buffer) {
+ if (timestamp)
+ *timestamp = btsq->buffer->reference;
+ } else {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ }
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
+{
+ u64 ts = 0;
+ int ret;
+
+ while (1) {
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ break;
+ }
+ return 0;
+}
+
+static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
+{
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &bts->queues.queue_array[i];
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (btsq && btsq->tid == tid)
+ return intel_bts_flush_queue(btsq);
+ }
+ return 0;
+}
+
+static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
+{
+ while (1) {
+ unsigned int queue_nr;
+ struct auxtrace_queue *queue;
+ struct intel_bts_queue *btsq;
+ u64 ts = 0;
+ int ret;
+
+ if (!bts->heap.heap_cnt)
+ return 0;
+
+ if (bts->heap.heap_array[0].ordinal > timestamp)
+ return 0;
+
+ queue_nr = bts->heap.heap_array[0].queue_nr;
+ queue = &bts->queues.queue_array[queue_nr];
+ btsq = queue->priv;
+
+ auxtrace_heap__pop(&bts->heap);
+
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ btsq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ u64 timestamp;
+ int err;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("Intel BTS requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &bts->tc);
+ else
+ timestamp = 0;
+
+ err = intel_bts_update_queues(bts);
+ if (err)
+ return err;
+
+ err = intel_bts_process_queues(bts, timestamp);
+ if (err)
+ return err;
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = intel_bts_process_tid_exit(bts, event->fork.tid);
+ if (err)
+ return err;
+ }
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+ bts->synth_opts.errors)
+ err = intel_bts_lost(bts, sample);
+
+ return err;
+}
+
+static int intel_bts_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!bts->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&bts->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ intel_bts_dump_event(bts, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_flush(struct perf_session *session,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ int ret;
+
+ if (dump_trace || bts->sampling_mode)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = intel_bts_update_queues(bts);
+ if (ret < 0)
+ return ret;
+
+ return intel_bts_process_queues(bts, MAX_TIMESTAMP);
+}
+
+static void intel_bts_free_queue(void *priv)
+{
+ struct intel_bts_queue *btsq = priv;
+
+ if (!btsq)
+ return;
+ free(btsq);
+}
+
+static void intel_bts_free_events(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ intel_bts_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ auxtrace_queues__free(queues);
+}
+
+static void intel_bts_free(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ auxtrace_heap__free(&bts->heap);
+ intel_bts_free_events(session);
+ session->auxtrace = NULL;
+ free(bts);
+}
+
+static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ return evsel->core.attr.type == bts->pmu_type;
+}
+
+struct intel_bts_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int intel_bts_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct intel_bts_synth *intel_bts_synth =
+ container_of(tool, struct intel_bts_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(intel_bts_synth->session,
+ event, NULL);
+}
+
+static int intel_bts_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct intel_bts_synth intel_bts_synth;
+
+ memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
+ intel_bts_synth.session = session;
+
+ return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
+ &id, intel_bts_event_synth);
+}
+
+static int intel_bts_synth_events(struct intel_bts *bts,
+ struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == bts->pmu_type && evsel->core.ids) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("There are no selected events with Intel BTS data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+ attr.exclude_user = evsel->core.attr.exclude_user;
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
+ attr.exclude_host = evsel->core.attr.exclude_host;
+ attr.exclude_guest = evsel->core.attr.exclude_guest;
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
+ attr.read_format = evsel->core.attr.read_format;
+
+ id = evsel->core.id[0] + 1000000000;
+ if (!id)
+ id = 1;
+
+ if (bts->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ id, (u64)attr.sample_type);
+ err = intel_bts_synth_event(session, &attr, id);
+ if (err) {
+ pr_err("%s: failed to synthesize 'branches' event type\n",
+ __func__);
+ return err;
+ }
+ bts->sample_branches = true;
+ bts->branches_sample_type = attr.sample_type;
+ bts->branches_id = id;
+ /*
+ * We only use sample types from PERF_SAMPLE_MASK so we can use
+ * __evsel__sample_size() here.
+ */
+ bts->branches_event_size = sizeof(struct perf_record_sample) +
+ __evsel__sample_size(attr.sample_type);
+ }
+
+ return 0;
+}
+
+static const char * const intel_bts_info_fmts[] = {
+ [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+};
+
+static void intel_bts_print_info(__u64 *arr, int start, int finish)
+{
+ int i;
+
+ if (!dump_trace)
+ return;
+
+ for (i = start; i <= finish; i++)
+ fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
+}
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
+ struct intel_bts *bts;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
+ min_sz)
+ return -EINVAL;
+
+ bts = zalloc(sizeof(struct intel_bts));
+ if (!bts)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&bts->queues);
+ if (err)
+ goto err_free;
+
+ bts->session = session;
+ bts->machine = &session->machines.host; /* No kvm support */
+ bts->auxtrace_type = auxtrace_info->type;
+ bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
+ bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
+ bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
+ bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
+ bts->cap_user_time_zero =
+ auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
+ bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
+
+ bts->sampling_mode = false;
+
+ bts->auxtrace.process_event = intel_bts_process_event;
+ bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
+ bts->auxtrace.flush_events = intel_bts_flush;
+ bts->auxtrace.free_events = intel_bts_free_events;
+ bts->auxtrace.free = intel_bts_free;
+ bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace;
+ session->auxtrace = &bts->auxtrace;
+
+ intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_SNAPSHOT_MODE);
+
+ if (dump_trace)
+ return 0;
+
+ if (session->itrace_synth_opts->set) {
+ bts->synth_opts = *session->itrace_synth_opts;
+ } else {
+ itrace_synth_opts__set_default(&bts->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
+ bts->synth_opts.thread_stack =
+ session->itrace_synth_opts->thread_stack;
+ }
+
+ if (bts->synth_opts.calls)
+ bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ if (bts->synth_opts.returns)
+ bts->branches_filter |= PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ err = intel_bts_synth_events(bts, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&bts->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (bts->queues.populated)
+ bts->data_queued = true;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&bts->queues);
+ session->auxtrace = NULL;
+err_free:
+ free(bts);
+ return err;
+}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 000000000..53d5aa027
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel-bts.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__PERF_INTEL_BTS_H__
+#define INCLUDE__PERF_INTEL_BTS_H__
+
+#define INTEL_BTS_PMU_NAME "intel_bts"
+
+enum {
+ INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_TIME_SHIFT,
+ INTEL_BTS_TIME_MULT,
+ INTEL_BTS_TIME_ZERO,
+ INTEL_BTS_CAP_USER_TIME_ZERO,
+ INTEL_BTS_SNAPSHOT_MODE,
+ INTEL_BTS_AUXTRACE_PRIV_MAX,
+};
+
+#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+
+struct auxtrace_record *intel_bts_recording_init(int *err);
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
new file mode 100644
index 000000000..b41c2e9c6
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -0,0 +1,22 @@
+perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+
+inat_tables_script = $(srctree)/tools/arch/x86/tools/gen-insn-attr-x86.awk
+inat_tables_maps = $(srctree)/tools/arch/x86/lib/x86-opcode-map.txt
+
+$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ $(call rule_mkdir)
+ @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
+
+# Busybox's diff doesn't have -I, avoid warning in the case
+
+$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+
+ifeq ($(CC_NO_CLANG), 1)
+ CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+endif
+
+CFLAGS_intel-pt-insn-decoder.o += -Wno-packed
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
new file mode 100644
index 000000000..b450178e3
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -0,0 +1,4513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "intel-pt-pkt-decoder.h"
+#include "intel-pt-decoder.h"
+#include "intel-pt-log.h"
+
+#define BITULL(x) (1ULL << (x))
+
+/* IA32_RTIT_CTL MSR bits */
+#define INTEL_PT_CYC_ENABLE BITULL(1)
+#define INTEL_PT_CYC_THRESHOLD (BITULL(22) | BITULL(21) | BITULL(20) | BITULL(19))
+#define INTEL_PT_CYC_THRESHOLD_SHIFT 19
+
+#define INTEL_PT_BLK_SIZE 1024
+
+#define BIT63 (((uint64_t)1 << 63))
+
+#define SEVEN_BYTES 0xffffffffffffffULL
+
+#define NO_VMCS 0xffffffffffULL
+
+#define INTEL_PT_RETURN 1
+
+/*
+ * Default maximum number of loops with no packets consumed i.e. stuck in a
+ * loop.
+ */
+#define INTEL_PT_MAX_LOOPS 100000
+
+struct intel_pt_blk {
+ struct intel_pt_blk *prev;
+ uint64_t ip[INTEL_PT_BLK_SIZE];
+};
+
+struct intel_pt_stack {
+ struct intel_pt_blk *blk;
+ struct intel_pt_blk *spare;
+ int pos;
+};
+
+enum intel_pt_p_once {
+ INTEL_PT_PRT_ONCE_UNK_VMCS,
+ INTEL_PT_PRT_ONCE_ERANGE,
+};
+
+enum intel_pt_pkt_state {
+ INTEL_PT_STATE_NO_PSB,
+ INTEL_PT_STATE_NO_IP,
+ INTEL_PT_STATE_ERR_RESYNC,
+ INTEL_PT_STATE_IN_SYNC,
+ INTEL_PT_STATE_TNT_CONT,
+ INTEL_PT_STATE_TNT,
+ INTEL_PT_STATE_TIP,
+ INTEL_PT_STATE_TIP_PGD,
+ INTEL_PT_STATE_FUP,
+ INTEL_PT_STATE_FUP_NO_TIP,
+ INTEL_PT_STATE_FUP_IN_PSB,
+ INTEL_PT_STATE_RESAMPLE,
+ INTEL_PT_STATE_VM_TIME_CORRELATION,
+};
+
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+ switch (pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ case INTEL_PT_STATE_NO_IP:
+ case INTEL_PT_STATE_ERR_RESYNC:
+ case INTEL_PT_STATE_IN_SYNC:
+ case INTEL_PT_STATE_TNT_CONT:
+ case INTEL_PT_STATE_RESAMPLE:
+ case INTEL_PT_STATE_VM_TIME_CORRELATION:
+ return true;
+ case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ case INTEL_PT_STATE_FUP:
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ case INTEL_PT_STATE_FUP_IN_PSB:
+ return false;
+ default:
+ return true;
+ };
+}
+
+#ifdef INTEL_PT_STRICT
+#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
+#else
+#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
+#endif
+
+struct intel_pt_decoder {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
+ int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
+ struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs);
+ void *data;
+ struct intel_pt_state state;
+ const unsigned char *buf;
+ size_t len;
+ bool return_compression;
+ bool branch_enable;
+ bool mtc_insn;
+ bool pge;
+ bool have_tma;
+ bool have_cyc;
+ bool fixup_last_mtc;
+ bool have_last_ip;
+ bool in_psb;
+ bool hop;
+ bool leap;
+ bool emulated_ptwrite;
+ bool vm_time_correlation;
+ bool vm_tm_corr_dry_run;
+ bool vm_tm_corr_reliable;
+ bool vm_tm_corr_same_buf;
+ bool vm_tm_corr_continuous;
+ bool nr;
+ bool next_nr;
+ bool iflag;
+ bool next_iflag;
+ enum intel_pt_param_flags flags;
+ uint64_t pos;
+ uint64_t last_ip;
+ uint64_t ip;
+ uint64_t pip_payload;
+ uint64_t timestamp;
+ uint64_t tsc_timestamp;
+ uint64_t ref_timestamp;
+ uint64_t buf_timestamp;
+ uint64_t sample_timestamp;
+ uint64_t ret_addr;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t cycle_cnt;
+ uint64_t cyc_ref_timestamp;
+ uint64_t first_timestamp;
+ uint64_t last_reliable_timestamp;
+ uint64_t vmcs;
+ uint64_t print_once;
+ uint64_t last_ctc;
+ uint32_t last_mtc;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ uint32_t tsc_ctc_mult;
+ uint32_t tsc_slip;
+ uint32_t ctc_rem_mask;
+ int mtc_shift;
+ struct intel_pt_stack stack;
+ enum intel_pt_pkt_state pkt_state;
+ enum intel_pt_pkt_ctx pkt_ctx;
+ enum intel_pt_pkt_ctx prev_pkt_ctx;
+ enum intel_pt_blk_type blk_type;
+ int blk_type_pos;
+ struct intel_pt_pkt packet;
+ struct intel_pt_pkt tnt;
+ int pkt_step;
+ int pkt_len;
+ int last_packet_type;
+ unsigned int cbr;
+ unsigned int cbr_seen;
+ unsigned int max_non_turbo_ratio;
+ double max_non_turbo_ratio_fp;
+ double cbr_cyc_to_tsc;
+ double calc_cyc_to_tsc;
+ bool have_calc_cyc_to_tsc;
+ int exec_mode;
+ unsigned int insn_bytes;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ uint64_t tot_insn_cnt;
+ uint64_t period_insn_cnt;
+ uint64_t period_mask;
+ uint64_t period_ticks;
+ uint64_t last_masked_timestamp;
+ uint64_t tot_cyc_cnt;
+ uint64_t sample_tot_cyc_cnt;
+ uint64_t base_cyc_cnt;
+ uint64_t cyc_cnt_timestamp;
+ uint64_t ctl;
+ uint64_t cyc_threshold;
+ double tsc_to_cyc;
+ bool continuous_period;
+ bool overflow;
+ bool set_fup_tx_flags;
+ bool set_fup_ptw;
+ bool set_fup_mwait;
+ bool set_fup_pwre;
+ bool set_fup_exstop;
+ bool set_fup_bep;
+ bool set_fup_cfe_ip;
+ bool set_fup_cfe;
+ bool set_fup_mode_exec;
+ bool sample_cyc;
+ unsigned int fup_tx_flags;
+ unsigned int tx_flags;
+ uint64_t fup_ptw_payload;
+ uint64_t fup_mwait_payload;
+ uint64_t fup_pwre_payload;
+ uint64_t cbr_payload;
+ uint64_t timestamp_insn_cnt;
+ uint64_t sample_insn_cnt;
+ uint64_t stuck_ip;
+ struct intel_pt_pkt fup_cfe_pkt;
+ int max_loops;
+ int no_progress;
+ int stuck_ip_prd;
+ int stuck_ip_cnt;
+ uint64_t psb_ip;
+ const unsigned char *next_buf;
+ size_t next_len;
+ unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+ int evd_cnt;
+ struct intel_pt_evd evd[INTEL_PT_MAX_EVDS];
+};
+
+static uint64_t intel_pt_lower_power_of_2(uint64_t x)
+{
+ int i;
+
+ for (i = 0; x != 1; i++)
+ x >>= 1;
+
+ return x << i;
+}
+
+__printf(1, 2)
+static void p_log(const char *fmt, ...)
+{
+ char buf[512];
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ fprintf(stderr, "%s\n", buf);
+ intel_pt_log("%s\n", buf);
+}
+
+static bool intel_pt_print_once(struct intel_pt_decoder *decoder,
+ enum intel_pt_p_once id)
+{
+ uint64_t bit = 1ULL << id;
+
+ if (decoder->print_once & bit)
+ return false;
+ decoder->print_once |= bit;
+ return true;
+}
+
+static uint64_t intel_pt_cyc_threshold(uint64_t ctl)
+{
+ if (!(ctl & INTEL_PT_CYC_ENABLE))
+ return 0;
+
+ return (ctl & INTEL_PT_CYC_THRESHOLD) >> INTEL_PT_CYC_THRESHOLD_SHIFT;
+}
+
+static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
+{
+ if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
+ uint64_t period;
+
+ period = intel_pt_lower_power_of_2(decoder->period);
+ decoder->period_mask = ~(period - 1);
+ decoder->period_ticks = period;
+ }
+}
+
+static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
+{
+ if (!d)
+ return 0;
+ return (t / d) * n + ((t % d) * n) / d;
+}
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
+{
+ struct intel_pt_decoder *decoder;
+
+ if (!params->get_trace || !params->walk_insn)
+ return NULL;
+
+ decoder = zalloc(sizeof(struct intel_pt_decoder));
+ if (!decoder)
+ return NULL;
+
+ decoder->get_trace = params->get_trace;
+ decoder->walk_insn = params->walk_insn;
+ decoder->pgd_ip = params->pgd_ip;
+ decoder->lookahead = params->lookahead;
+ decoder->findnew_vmcs_info = params->findnew_vmcs_info;
+ decoder->data = params->data;
+ decoder->return_compression = params->return_compression;
+ decoder->branch_enable = params->branch_enable;
+ decoder->hop = params->quick >= 1;
+ decoder->leap = params->quick >= 2;
+ decoder->vm_time_correlation = params->vm_time_correlation;
+ decoder->vm_tm_corr_dry_run = params->vm_tm_corr_dry_run;
+ decoder->first_timestamp = params->first_timestamp;
+ decoder->last_reliable_timestamp = params->first_timestamp;
+ decoder->max_loops = params->max_loops ? params->max_loops : INTEL_PT_MAX_LOOPS;
+
+ decoder->flags = params->flags;
+
+ decoder->ctl = params->ctl;
+ decoder->period = params->period;
+ decoder->period_type = params->period_type;
+
+ decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
+ decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
+
+ decoder->cyc_threshold = intel_pt_cyc_threshold(decoder->ctl);
+
+ intel_pt_setup_period(decoder);
+
+ decoder->mtc_shift = params->mtc_period;
+ decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
+
+ decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
+ decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
+
+ if (!decoder->tsc_ctc_ratio_n)
+ decoder->tsc_ctc_ratio_d = 0;
+
+ if (decoder->tsc_ctc_ratio_d) {
+ if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
+ decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
+ decoder->tsc_ctc_ratio_d;
+ }
+
+ /*
+ * A TSC packet can slip past MTC packets so that the timestamp appears
+ * to go backwards. One estimate is that can be up to about 40 CPU
+ * cycles, which is certainly less than 0x1000 TSC ticks, but accept
+ * slippage an order of magnitude more to be on the safe side.
+ */
+ decoder->tsc_slip = 0x10000;
+
+ intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
+ intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+ intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
+ intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
+ intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+
+ if (decoder->hop)
+ intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
+
+ return decoder;
+}
+
+void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder,
+ uint64_t first_timestamp)
+{
+ decoder->first_timestamp = first_timestamp;
+}
+
+static void intel_pt_pop_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk = stack->blk;
+
+ stack->blk = blk->prev;
+ if (!stack->spare)
+ stack->spare = blk;
+ else
+ free(blk);
+}
+
+static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
+{
+ if (!stack->pos) {
+ if (!stack->blk)
+ return 0;
+ intel_pt_pop_blk(stack);
+ if (!stack->blk)
+ return 0;
+ stack->pos = INTEL_PT_BLK_SIZE;
+ }
+ return stack->blk->ip[--stack->pos];
+}
+
+static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk;
+
+ if (stack->spare) {
+ blk = stack->spare;
+ stack->spare = NULL;
+ } else {
+ blk = malloc(sizeof(struct intel_pt_blk));
+ if (!blk)
+ return -ENOMEM;
+ }
+
+ blk->prev = stack->blk;
+ stack->blk = blk;
+ stack->pos = 0;
+ return 0;
+}
+
+static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
+{
+ int err;
+
+ if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
+ err = intel_pt_alloc_blk(stack);
+ if (err)
+ return err;
+ }
+
+ stack->blk->ip[stack->pos++] = ip;
+ return 0;
+}
+
+static void intel_pt_clear_stack(struct intel_pt_stack *stack)
+{
+ while (stack->blk)
+ intel_pt_pop_blk(stack);
+ stack->pos = 0;
+}
+
+static void intel_pt_free_stack(struct intel_pt_stack *stack)
+{
+ intel_pt_clear_stack(stack);
+ zfree(&stack->blk);
+ zfree(&stack->spare);
+}
+
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
+{
+ intel_pt_free_stack(&decoder->stack);
+ free(decoder);
+}
+
+static int intel_pt_ext_err(int code)
+{
+ switch (code) {
+ case -ENOMEM:
+ return INTEL_PT_ERR_NOMEM;
+ case -ENOSYS:
+ return INTEL_PT_ERR_INTERN;
+ case -EBADMSG:
+ return INTEL_PT_ERR_BADPKT;
+ case -ENODATA:
+ return INTEL_PT_ERR_NODATA;
+ case -EILSEQ:
+ return INTEL_PT_ERR_NOINSN;
+ case -ENOENT:
+ return INTEL_PT_ERR_MISMAT;
+ case -EOVERFLOW:
+ return INTEL_PT_ERR_OVR;
+ case -ENOSPC:
+ return INTEL_PT_ERR_LOST;
+ case -ELOOP:
+ return INTEL_PT_ERR_NELOOP;
+ case -ECONNRESET:
+ return INTEL_PT_ERR_EPTW;
+ default:
+ return INTEL_PT_ERR_UNK;
+ }
+}
+
+static const char *intel_pt_err_msgs[] = {
+ [INTEL_PT_ERR_NOMEM] = "Memory allocation failed",
+ [INTEL_PT_ERR_INTERN] = "Internal error",
+ [INTEL_PT_ERR_BADPKT] = "Bad packet",
+ [INTEL_PT_ERR_NODATA] = "No more data",
+ [INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
+ [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
+ [INTEL_PT_ERR_OVR] = "Overflow packet",
+ [INTEL_PT_ERR_LOST] = "Lost trace data",
+ [INTEL_PT_ERR_UNK] = "Unknown error!",
+ [INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)",
+ [INTEL_PT_ERR_EPTW] = "Broken emulated ptwrite",
+};
+
+int intel_pt__strerror(int code, char *buf, size_t buflen)
+{
+ if (code < 1 || code >= INTEL_PT_ERR_MAX)
+ code = INTEL_PT_ERR_UNK;
+ strlcpy(buf, intel_pt_err_msgs[code], buflen);
+ return 0;
+}
+
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
+ uint64_t last_ip)
+{
+ uint64_t ip;
+
+ switch (packet->count) {
+ case 1:
+ ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
+ packet->payload;
+ break;
+ case 2:
+ ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
+ packet->payload;
+ break;
+ case 3:
+ ip = packet->payload;
+ /* Sign-extend 6-byte ip */
+ if (ip & (uint64_t)0x800000000000ULL)
+ ip |= (uint64_t)0xffff000000000000ULL;
+ break;
+ case 4:
+ ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+ packet->payload;
+ break;
+ case 6:
+ ip = packet->payload;
+ break;
+ default:
+ return 0;
+ }
+
+ return ip;
+}
+
+static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
+{
+ decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+ decoder->have_last_ip = true;
+}
+
+static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
+{
+ intel_pt_set_last_ip(decoder);
+ decoder->ip = decoder->last_ip;
+}
+
+static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
+ decoder->buf);
+}
+
+static int intel_pt_bug(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Internal error\n");
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ return -ENOSYS;
+}
+
+static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = 0;
+}
+
+static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
+}
+
+static inline void intel_pt_update_pip(struct intel_pt_decoder *decoder)
+{
+ decoder->pip_payload = decoder->packet.payload;
+}
+
+static inline void intel_pt_update_nr(struct intel_pt_decoder *decoder)
+{
+ decoder->next_nr = decoder->pip_payload & 1;
+}
+
+static inline void intel_pt_set_nr(struct intel_pt_decoder *decoder)
+{
+ decoder->nr = decoder->pip_payload & 1;
+ decoder->next_nr = decoder->nr;
+}
+
+static inline void intel_pt_set_pip(struct intel_pt_decoder *decoder)
+{
+ intel_pt_update_pip(decoder);
+ intel_pt_set_nr(decoder);
+}
+
+static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ decoder->pkt_len = 1;
+ decoder->pkt_step = 1;
+ intel_pt_decoder_log_packet(decoder);
+ if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
+ intel_pt_log("ERROR: Bad packet\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR1;
+ }
+ return -EBADMSG;
+}
+
+static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
+{
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ decoder->state.cycles = decoder->tot_cyc_cnt;
+}
+
+static void intel_pt_reposition(struct intel_pt_decoder *decoder)
+{
+ decoder->ip = 0;
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->timestamp = 0;
+ decoder->have_tma = false;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
+{
+ struct intel_pt_buffer buffer = { .buf = 0, };
+ int ret;
+
+ decoder->pkt_step = 0;
+
+ intel_pt_log("Getting more data\n");
+ ret = decoder->get_trace(&buffer, decoder->data);
+ if (ret)
+ return ret;
+ decoder->buf = buffer.buf;
+ decoder->len = buffer.len;
+ if (!decoder->len) {
+ intel_pt_log("No more data\n");
+ return -ENODATA;
+ }
+ decoder->buf_timestamp = buffer.ref_timestamp;
+ if (!buffer.consecutive || reposition) {
+ intel_pt_reposition(decoder);
+ decoder->ref_timestamp = buffer.ref_timestamp;
+ decoder->state.trace_nr = buffer.trace_nr;
+ decoder->vm_tm_corr_same_buf = false;
+ intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
+ decoder->ref_timestamp);
+ return -ENOLINK;
+ }
+
+ return 0;
+}
+
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder,
+ bool reposition)
+{
+ if (!decoder->next_buf)
+ return intel_pt_get_data(decoder, reposition);
+
+ decoder->buf = decoder->next_buf;
+ decoder->len = decoder->next_len;
+ decoder->next_buf = 0;
+ decoder->next_len = 0;
+ return 0;
+}
+
+static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
+{
+ unsigned char *buf = decoder->temp_buf;
+ size_t old_len, len, n;
+ int ret;
+
+ old_len = decoder->len;
+ len = decoder->len;
+ memcpy(buf, decoder->buf, len);
+
+ ret = intel_pt_get_data(decoder, false);
+ if (ret) {
+ decoder->pos += old_len;
+ return ret < 0 ? ret : -EINVAL;
+ }
+
+ n = INTEL_PT_PKT_MAX_SZ - len;
+ if (n > decoder->len)
+ n = decoder->len;
+ memcpy(buf + len, decoder->buf, n);
+ len += n;
+
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
+ ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
+ if (ret < (int)old_len) {
+ decoder->next_buf = decoder->buf;
+ decoder->next_len = decoder->len;
+ decoder->buf = buf;
+ decoder->len = old_len;
+ return intel_pt_bad_packet(decoder);
+ }
+
+ decoder->next_buf = decoder->buf + (ret - old_len);
+ decoder->next_len = decoder->len - (ret - old_len);
+
+ decoder->buf = buf;
+ decoder->len = ret;
+
+ return ret;
+}
+
+struct intel_pt_pkt_info {
+ struct intel_pt_decoder *decoder;
+ struct intel_pt_pkt packet;
+ uint64_t pos;
+ int pkt_len;
+ int last_packet_type;
+ void *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+ intel_pt_pkt_cb_t cb, void *data)
+{
+ struct intel_pt_pkt_info pkt_info;
+ const unsigned char *buf = decoder->buf;
+ enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
+ size_t len = decoder->len;
+ int ret;
+
+ pkt_info.decoder = decoder;
+ pkt_info.pos = decoder->pos;
+ pkt_info.pkt_len = decoder->pkt_step;
+ pkt_info.last_packet_type = decoder->last_packet_type;
+ pkt_info.data = data;
+
+ while (1) {
+ do {
+ pkt_info.pos += pkt_info.pkt_len;
+ buf += pkt_info.pkt_len;
+ len -= pkt_info.pkt_len;
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
+ &pkt_ctx);
+ if (!ret)
+ return INTEL_PT_NEED_MORE_BYTES;
+ if (ret < 0)
+ return ret;
+
+ pkt_info.pkt_len = ret;
+ } while (pkt_info.packet.type == INTEL_PT_PAD);
+
+ ret = cb(&pkt_info);
+ if (ret)
+ return 0;
+
+ pkt_info.last_packet_type = pkt_info.packet.type;
+ }
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+ uint64_t cycle_cnt;
+ unsigned int cbr;
+ uint32_t last_mtc;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t tsc_timestamp;
+ uint64_t timestamp;
+ bool have_tma;
+ bool fixup_last_mtc;
+ bool from_mtc;
+ double cbr_cyc_to_tsc;
+};
+
+/*
+ * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
+ * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
+ * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
+ * packet by copying the missing bits from the current MTC assuming the least
+ * difference between the two, and that the current MTC comes after last_mtc.
+ */
+static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
+ uint32_t *last_mtc)
+{
+ uint32_t first_missing_bit = 1U << (16 - mtc_shift);
+ uint32_t mask = ~(first_missing_bit - 1);
+
+ *last_mtc |= mtc & mask;
+ if (*last_mtc >= mtc) {
+ *last_mtc -= first_missing_bit;
+ *last_mtc &= 0xff;
+ }
+}
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_decoder *decoder = pkt_info->decoder;
+ struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+ uint64_t timestamp;
+ double cyc_to_tsc;
+ unsigned int cbr;
+ uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PIP:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PAD:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ return 0;
+
+ case INTEL_PT_MTC:
+ if (!data->have_tma)
+ return 0;
+
+ mtc = pkt_info->packet.payload;
+ if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
+ data->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &data->last_mtc);
+ }
+ if (mtc > data->last_mtc)
+ mtc_delta = mtc - data->last_mtc;
+ else
+ mtc_delta = mtc + 256 - data->last_mtc;
+ data->ctc_delta += mtc_delta << decoder->mtc_shift;
+ data->last_mtc = mtc;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = data->ctc_timestamp +
+ data->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = data->ctc_timestamp +
+ multdiv(data->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < data->timestamp)
+ return 1;
+
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ data->timestamp = timestamp;
+ return 0;
+ }
+
+ break;
+
+ case INTEL_PT_TSC:
+ /*
+ * For now, do not support using TSC packets - refer
+ * intel_pt_calc_cyc_to_tsc().
+ */
+ if (data->from_mtc)
+ return 1;
+ timestamp = pkt_info->packet.payload |
+ (data->timestamp & (0xffULL << 56));
+ if (data->from_mtc && timestamp < data->timestamp &&
+ data->timestamp - timestamp < decoder->tsc_slip)
+ return 1;
+ if (timestamp < data->timestamp)
+ timestamp += (1ULL << 56);
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ if (data->from_mtc)
+ return 1;
+ data->tsc_timestamp = timestamp;
+ data->timestamp = timestamp;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TMA:
+ if (data->from_mtc)
+ return 1;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return 0;
+
+ ctc = pkt_info->packet.payload;
+ fc = pkt_info->packet.count;
+ ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+ data->ctc_timestamp = data->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ data->ctc_timestamp -=
+ multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ data->ctc_delta = 0;
+ data->have_tma = true;
+ data->fixup_last_mtc = true;
+
+ return 0;
+
+ case INTEL_PT_CYC:
+ data->cycle_cnt += pkt_info->packet.payload;
+ return 0;
+
+ case INTEL_PT_CBR:
+ cbr = pkt_info->packet.payload;
+ if (data->cbr && data->cbr != cbr)
+ return 1;
+ data->cbr = cbr;
+ data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD: /* Does not happen */
+ default:
+ return 1;
+ }
+
+ if (!data->cbr && decoder->cbr) {
+ data->cbr = decoder->cbr;
+ data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+ }
+
+ if (!data->cycle_cnt)
+ return 1;
+
+ cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+ if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+ cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ return 1;
+ }
+
+ decoder->calc_cyc_to_tsc = cyc_to_tsc;
+ decoder->have_calc_cyc_to_tsc = true;
+
+ if (data->cbr) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ } else {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+ cyc_to_tsc, pkt_info->pos);
+ }
+
+ return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+ bool from_mtc)
+{
+ struct intel_pt_calc_cyc_to_tsc_info data = {
+ .cycle_cnt = 0,
+ .cbr = 0,
+ .last_mtc = decoder->last_mtc,
+ .ctc_timestamp = decoder->ctc_timestamp,
+ .ctc_delta = decoder->ctc_delta,
+ .tsc_timestamp = decoder->tsc_timestamp,
+ .timestamp = decoder->timestamp,
+ .have_tma = decoder->have_tma,
+ .fixup_last_mtc = decoder->fixup_last_mtc,
+ .from_mtc = from_mtc,
+ .cbr_cyc_to_tsc = 0,
+ };
+
+ /*
+ * For now, do not support using TSC packets for at least the reasons:
+ * 1) timing might have stopped
+ * 2) TSC packets within PSB+ can slip against CYC packets
+ */
+ if (!from_mtc)
+ return;
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
+static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
+{
+ int ret;
+
+ decoder->last_packet_type = decoder->packet.type;
+
+ do {
+ decoder->pos += decoder->pkt_step;
+ decoder->buf += decoder->pkt_step;
+ decoder->len -= decoder->pkt_step;
+
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder, false);
+ if (ret)
+ return ret;
+ }
+
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
+ ret = intel_pt_get_packet(decoder->buf, decoder->len,
+ &decoder->packet, &decoder->pkt_ctx);
+ if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
+ decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
+ ret = intel_pt_get_split_packet(decoder);
+ if (ret < 0)
+ return ret;
+ }
+ if (ret <= 0)
+ return intel_pt_bad_packet(decoder);
+
+ decoder->pkt_len = ret;
+ decoder->pkt_step = ret;
+ intel_pt_decoder_log_packet(decoder);
+ } while (decoder->packet.type == INTEL_PT_PAD);
+
+ return 0;
+}
+
+static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (decoder->continuous_period) {
+ if (masked_timestamp > decoder->last_masked_timestamp)
+ return 1;
+ } else {
+ timestamp += 1;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (masked_timestamp > decoder->last_masked_timestamp) {
+ decoder->last_masked_timestamp = masked_timestamp;
+ decoder->continuous_period = true;
+ }
+ }
+
+ if (masked_timestamp < decoder->last_masked_timestamp)
+ return decoder->period_ticks;
+
+ return decoder->period_ticks - (timestamp - masked_timestamp);
+}
+
+static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
+{
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ return decoder->period - decoder->period_insn_cnt;
+ case INTEL_PT_PERIOD_TICKS:
+ return intel_pt_next_period(decoder);
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ return 0;
+ }
+}
+
+static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ decoder->period_insn_cnt = 0;
+ break;
+ case INTEL_PT_PERIOD_TICKS:
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (masked_timestamp > decoder->last_masked_timestamp)
+ decoder->last_masked_timestamp = masked_timestamp;
+ else
+ decoder->last_masked_timestamp += decoder->period_ticks;
+ break;
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ break;
+ }
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+}
+
+/*
+ * Sample FUP instruction at the same time as reporting the FUP event, so the
+ * instruction sample gets the same flags as the FUP event.
+ */
+static void intel_pt_sample_fup_insn(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ decoder->state.insn_op = INTEL_PT_OP_OTHER;
+ decoder->state.insn_len = 0;
+
+ if (!decoder->branch_enable || !decoder->pge || decoder->hop ||
+ decoder->ip != decoder->last_ip)
+ return;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+ if (max_insn_cnt != 1)
+ return;
+
+ err = decoder->walk_insn(&intel_pt_insn, &insn_cnt, &decoder->ip,
+ 0, max_insn_cnt, decoder->data);
+ /* Ignore error, it will be reported next walk anyway */
+ if (err)
+ return;
+
+ if (intel_pt_insn.branch != INTEL_PT_BR_NO_BRANCH) {
+ intel_pt_log_at("ERROR: Unexpected branch at FUP instruction", decoder->ip);
+ return;
+ }
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ intel_pt_sample_insn(decoder);
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ decoder->ip += intel_pt_insn.length;
+}
+
+static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
+ struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+
+ err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
+ max_insn_cnt, decoder->data);
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ if (err) {
+ decoder->no_progress = 0;
+ decoder->pkt_state = INTEL_PT_STATE_ERR2;
+ intel_pt_log_at("ERROR: Failed to get instruction",
+ decoder->ip);
+ if (err == -ENOENT)
+ return -ENOLINK;
+ return -EILSEQ;
+ }
+
+ if (ip && decoder->ip == ip) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ intel_pt_sample_insn(decoder);
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn->length;
+ err = INTEL_PT_RETURN;
+ goto out;
+ }
+
+ if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
+ /* Zero-length calls are excluded */
+ if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
+ intel_pt_insn->rel) {
+ err = intel_pt_push(&decoder->stack, decoder->ip +
+ intel_pt_insn->length);
+ if (err)
+ goto out;
+ }
+ } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
+ decoder->ret_addr = intel_pt_pop(&decoder->stack);
+ }
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
+ int cnt = decoder->no_progress++;
+
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn->length +
+ intel_pt_insn->rel;
+ decoder->state.to_ip = decoder->ip;
+ err = INTEL_PT_RETURN;
+
+ /*
+ * Check for being stuck in a loop. This can happen if a
+ * decoder error results in the decoder erroneously setting the
+ * ip to an address that is itself in an infinite loop that
+ * consumes no packets. When that happens, there must be an
+ * unconditional branch.
+ */
+ if (cnt) {
+ if (cnt == 1) {
+ decoder->stuck_ip = decoder->state.to_ip;
+ decoder->stuck_ip_prd = 1;
+ decoder->stuck_ip_cnt = 1;
+ } else if (cnt > decoder->max_loops ||
+ decoder->state.to_ip == decoder->stuck_ip) {
+ intel_pt_log_at("ERROR: Never-ending loop",
+ decoder->state.to_ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ err = -ELOOP;
+ goto out;
+ } else if (!--decoder->stuck_ip_cnt) {
+ decoder->stuck_ip_prd += 1;
+ decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
+ decoder->stuck_ip = decoder->state.to_ip;
+ }
+ }
+ goto out_no_progress;
+ }
+out:
+ decoder->no_progress = 0;
+out_no_progress:
+ decoder->state.insn_op = intel_pt_insn->op;
+ decoder->state.insn_len = intel_pt_insn->length;
+ memcpy(decoder->state.insn, intel_pt_insn->buf,
+ INTEL_PT_INSN_BUF_SZ);
+
+ if (decoder->tx_flags & INTEL_PT_IN_TX)
+ decoder->state.flags |= INTEL_PT_IN_TX;
+
+ return err;
+}
+
+static void intel_pt_mode_exec_status(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->iflag = iflag;
+ decoder->next_iflag = iflag;
+ decoder->state.from_iflag = iflag;
+ decoder->state.to_iflag = iflag;
+}
+
+static void intel_pt_mode_exec(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->next_iflag = iflag;
+}
+
+static void intel_pt_sample_iflag(struct intel_pt_decoder *decoder)
+{
+ decoder->state.type |= INTEL_PT_IFLAG_CHG;
+ decoder->state.from_iflag = decoder->iflag;
+ decoder->state.to_iflag = decoder->next_iflag;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static void intel_pt_sample_iflag_chg(struct intel_pt_decoder *decoder)
+{
+ if (decoder->iflag != decoder->next_iflag)
+ intel_pt_sample_iflag(decoder);
+}
+
+static void intel_pt_clear_fup_event(struct intel_pt_decoder *decoder)
+{
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->evd_cnt = 0;
+ decoder->set_fup_mode_exec = false;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip)
+{
+ enum intel_pt_sample_type type = decoder->state.type;
+ bool sample_fup_insn = false;
+ bool ret = false;
+
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+
+ if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) {
+ bool ip = decoder->set_fup_cfe_ip;
+
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->state.type |= INTEL_PT_EVT;
+ if (!ip && decoder->pge)
+ decoder->state.type |= INTEL_PT_BRANCH;
+ decoder->state.cfe_type = decoder->fup_cfe_pkt.count;
+ decoder->state.cfe_vector = decoder->fup_cfe_pkt.payload;
+ decoder->state.evd_cnt = decoder->evd_cnt;
+ decoder->state.evd = decoder->evd;
+ decoder->evd_cnt = 0;
+ if (ip || decoder->pge)
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ ret = true;
+ }
+ if (decoder->set_fup_mode_exec) {
+ decoder->set_fup_mode_exec = false;
+ intel_pt_sample_iflag(decoder);
+ sample_fup_insn = no_tip;
+ ret = true;
+ }
+ if (decoder->set_fup_tx_flags) {
+ decoder->set_fup_tx_flags = false;
+ decoder->tx_flags = decoder->fup_tx_flags;
+ decoder->state.type |= INTEL_PT_TRANSACTION;
+ if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+ decoder->state.type |= INTEL_PT_BRANCH;
+ decoder->state.flags = decoder->fup_tx_flags;
+ ret = true;
+ }
+ if (decoder->set_fup_ptw) {
+ decoder->set_fup_ptw = false;
+ decoder->state.type |= INTEL_PT_PTW;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ decoder->state.ptw_payload = decoder->fup_ptw_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_mwait) {
+ decoder->set_fup_mwait = false;
+ decoder->state.type |= INTEL_PT_MWAIT_OP;
+ decoder->state.mwait_payload = decoder->fup_mwait_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_pwre) {
+ decoder->set_fup_pwre = false;
+ decoder->state.type |= INTEL_PT_PWR_ENTRY;
+ decoder->state.pwre_payload = decoder->fup_pwre_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_exstop) {
+ decoder->set_fup_exstop = false;
+ decoder->state.type |= INTEL_PT_EX_STOP;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ ret = true;
+ }
+ if (decoder->set_fup_bep) {
+ decoder->set_fup_bep = false;
+ decoder->state.type |= INTEL_PT_BLK_ITEMS;
+ ret = true;
+ }
+ if (decoder->overflow) {
+ decoder->overflow = false;
+ if (!ret && !decoder->pge) {
+ if (decoder->hop) {
+ decoder->state.type = 0;
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ }
+ decoder->pge = true;
+ decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN;
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return true;
+ }
+ }
+ if (ret) {
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ if (sample_fup_insn)
+ intel_pt_sample_fup_insn(decoder);
+ } else {
+ decoder->state.type = type;
+ }
+ return ret;
+}
+
+static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
+ struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip, int err)
+{
+ return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
+ intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
+ ip == decoder->ip + intel_pt_insn->length;
+}
+
+static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t ip;
+ int err;
+
+ ip = decoder->last_ip;
+
+ while (1) {
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err == -EAGAIN ||
+ intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ if (intel_pt_fup_event(decoder, no_tip) && no_tip)
+ return 0;
+ return -EAGAIN;
+ }
+ decoder->set_fup_tx_flags = false;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ intel_pt_log_at("ERROR: Unexpected indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ intel_pt_log_at("ERROR: Unexpected conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN &&
+ decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ (decoder->state.type & INTEL_PT_BRANCH) &&
+ decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+ /* Unconditional branch leaving filter region */
+ decoder->no_progress = 0;
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ intel_pt_update_nr(decoder);
+ return 0;
+ }
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err)
+ return err;
+
+ intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ }
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ } else {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ }
+ }
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+ intel_pt_insn.rel;
+
+ if (decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ decoder->pgd_ip(to_ip, decoder->data)) {
+ /* Conditional branch leaving filter region */
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->ip = to_ip;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = to_ip;
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ return 0;
+ }
+ intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ return intel_pt_bug(decoder);
+}
+
+struct eptw_data {
+ int bit_countdown;
+ uint64_t payload;
+};
+
+static int intel_pt_eptw_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct eptw_data *data = pkt_info->data;
+ int nr_bits;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_PAD:
+ case INTEL_PT_MNT:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_PIP:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ break;
+
+ case INTEL_PT_TNT:
+ nr_bits = data->bit_countdown;
+ if (nr_bits > pkt_info->packet.count)
+ nr_bits = pkt_info->packet.count;
+ data->payload <<= nr_bits;
+ data->payload |= pkt_info->packet.payload >> (64 - nr_bits);
+ data->bit_countdown -= nr_bits;
+ return !data->bit_countdown;
+
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP:
+ case INTEL_PT_BAD:
+ case INTEL_PT_OVF:
+ case INTEL_PT_TRACESTOP:
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int intel_pt_emulated_ptwrite(struct intel_pt_decoder *decoder)
+{
+ int n = 64 - decoder->tnt.count;
+ struct eptw_data data = {
+ .bit_countdown = n,
+ .payload = decoder->tnt.payload >> n,
+ };
+
+ decoder->emulated_ptwrite = false;
+ intel_pt_log("Emulated ptwrite detected\n");
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_eptw_lookahead_cb, &data);
+ if (data.bit_countdown)
+ return -ECONNRESET;
+
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = data.payload;
+ return 0;
+}
+
+static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ while (1) {
+ if (decoder->emulated_ptwrite)
+ return intel_pt_emulated_ptwrite(decoder);
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN) {
+ decoder->emulated_ptwrite = intel_pt_insn.emulated_ptwrite;
+ return 0;
+ }
+ if (err) {
+ decoder->emulated_ptwrite = false;
+ return err;
+ }
+
+ if (intel_pt_insn.op == INTEL_PT_OP_RET) {
+ if (!decoder->return_compression) {
+ intel_pt_log_at("ERROR: RET when expecting conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!decoder->ret_addr) {
+ intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!(decoder->tnt.payload & BIT63)) {
+ intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ decoder->tnt.count -= 1;
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip = decoder->ret_addr;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ /* Handle deferred TIPs */
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type != INTEL_PT_TIP ||
+ decoder->packet.count == 0) {
+ intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+ return -ENOENT;
+ }
+ intel_pt_set_last_ip(decoder);
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ decoder->tnt.count -= 1;
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ if (decoder->tnt.payload & BIT63) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn.length +
+ intel_pt_insn.rel;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+ /* Instruction sample for a non-taken branch */
+ if (decoder->state.type & INTEL_PT_INSTRUCTION) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn.length;
+ return 0;
+ }
+ decoder->sample_cyc = false;
+ decoder->ip += intel_pt_insn.length;
+ if (!decoder->tnt.count) {
+ intel_pt_update_sample_time(decoder);
+ return -EAGAIN;
+ }
+ decoder->tnt.payload <<= 1;
+ continue;
+ }
+
+ return intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
+{
+ unsigned int fup_tx_flags;
+ int err;
+
+ fup_tx_flags = decoder->packet.payload &
+ (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->fup_tx_flags = fup_tx_flags;
+ decoder->set_fup_tx_flags = true;
+ if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
+ *no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
+ decoder->pos);
+ intel_pt_update_in_tx(decoder);
+ }
+ return 0;
+}
+
+static int intel_pt_evd(struct intel_pt_decoder *decoder)
+{
+ if (decoder->evd_cnt >= INTEL_PT_MAX_EVDS) {
+ intel_pt_log_at("ERROR: Too many EVD packets", decoder->pos);
+ return -ENOSYS;
+ }
+ decoder->evd[decoder->evd_cnt++] = (struct intel_pt_evd){
+ .type = decoder->packet.count,
+ .payload = decoder->packet.payload,
+ };
+ return 0;
+}
+
+static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
+{
+ timestamp |= (ref_timestamp & (0xffULL << 56));
+
+ if (timestamp < ref_timestamp) {
+ if (ref_timestamp - timestamp > (1ULL << 55))
+ timestamp += (1ULL << 56);
+ } else {
+ if (timestamp - ref_timestamp > (1ULL << 55))
+ timestamp -= (1ULL << 56);
+ }
+
+ return timestamp;
+}
+
+/* For use only when decoder->vm_time_correlation is true */
+static bool intel_pt_time_in_range(struct intel_pt_decoder *decoder,
+ uint64_t timestamp)
+{
+ uint64_t max_timestamp = decoder->buf_timestamp;
+
+ if (!max_timestamp) {
+ max_timestamp = decoder->last_reliable_timestamp +
+ 0x400000000ULL;
+ }
+ return timestamp >= decoder->last_reliable_timestamp &&
+ timestamp < decoder->buf_timestamp;
+}
+
+static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+ bool bad = false;
+
+ decoder->have_tma = false;
+
+ if (decoder->ref_timestamp) {
+ timestamp = intel_pt_8b_tsc(decoder->packet.payload,
+ decoder->ref_timestamp);
+ decoder->tsc_timestamp = timestamp;
+ decoder->timestamp = timestamp;
+ decoder->ref_timestamp = 0;
+ decoder->timestamp_insn_cnt = 0;
+ } else if (decoder->timestamp) {
+ timestamp = decoder->packet.payload |
+ (decoder->timestamp & (0xffULL << 56));
+ decoder->tsc_timestamp = timestamp;
+ if (timestamp < decoder->timestamp &&
+ decoder->timestamp - timestamp < decoder->tsc_slip) {
+ intel_pt_log_to("Suppressing backwards timestamp",
+ timestamp);
+ timestamp = decoder->timestamp;
+ }
+ if (timestamp < decoder->timestamp) {
+ if (!decoder->buf_timestamp ||
+ (timestamp + (1ULL << 56) < decoder->buf_timestamp)) {
+ intel_pt_log_to("Wraparound timestamp", timestamp);
+ timestamp += (1ULL << 56);
+ decoder->tsc_timestamp = timestamp;
+ } else {
+ intel_pt_log_to("Suppressing bad timestamp", timestamp);
+ timestamp = decoder->timestamp;
+ bad = true;
+ }
+ }
+ if (decoder->vm_time_correlation &&
+ (bad || !intel_pt_time_in_range(decoder, timestamp)) &&
+ intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE))
+ p_log("Timestamp out of range");
+ decoder->timestamp = timestamp;
+ decoder->timestamp_insn_cnt = 0;
+ }
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, false);
+ }
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static int intel_pt_overflow(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Buffer overflow\n");
+ intel_pt_clear_tx_flags(decoder);
+ intel_pt_set_nr(decoder);
+ decoder->timestamp_insn_cnt = 0;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip = 0;
+ decoder->pge = false;
+ intel_pt_clear_fup_event(decoder);
+ decoder->overflow = true;
+ return -EOVERFLOW;
+}
+
+static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder)
+{
+ if (decoder->have_cyc)
+ return;
+
+ decoder->cyc_cnt_timestamp = decoder->timestamp;
+ decoder->base_cyc_cnt = decoder->tot_cyc_cnt;
+}
+
+static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder)
+{
+ decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp;
+
+ if (decoder->pge)
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+}
+
+static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder)
+{
+ uint64_t tot_cyc_cnt, tsc_delta;
+
+ if (decoder->have_cyc)
+ return;
+
+ decoder->sample_cyc = true;
+
+ if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp)
+ return;
+
+ tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp;
+ tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt;
+
+ if (tot_cyc_cnt > decoder->tot_cyc_cnt)
+ decoder->tot_cyc_cnt = tot_cyc_cnt;
+}
+
+static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
+{
+ uint32_t ctc = decoder->packet.payload;
+ uint32_t fc = decoder->packet.count;
+ uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return;
+
+ if (decoder->pge && !decoder->in_psb)
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ else
+ intel_pt_mtc_cyc_cnt_upd(decoder);
+
+ decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+ decoder->last_ctc = ctc - ctc_rem;
+ decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ decoder->ctc_timestamp -= multdiv(ctc_rem,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+ decoder->ctc_delta = 0;
+ decoder->have_tma = true;
+ decoder->fixup_last_mtc = true;
+ intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
+ decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
+}
+
+static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+ uint32_t mtc, mtc_delta;
+
+ if (!decoder->have_tma)
+ return;
+
+ mtc = decoder->packet.payload;
+
+ if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
+ decoder->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &decoder->last_mtc);
+ }
+
+ if (mtc > decoder->last_mtc)
+ mtc_delta = mtc - decoder->last_mtc;
+ else
+ mtc_delta = mtc + 256 - decoder->last_mtc;
+
+ decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = decoder->ctc_timestamp +
+ decoder->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = decoder->ctc_timestamp +
+ multdiv(decoder->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ intel_pt_mtc_cyc_cnt_upd(decoder);
+
+ decoder->timestamp_insn_cnt = 0;
+ decoder->last_mtc = mtc;
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, true);
+ }
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+ unsigned int cbr = decoder->packet.payload & 0xff;
+
+ decoder->cbr_payload = decoder->packet.payload;
+
+ if (decoder->cbr == cbr)
+ return;
+
+ decoder->cbr = cbr;
+ decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+
+ intel_pt_mtc_cyc_cnt_cbr(decoder);
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+ decoder->have_cyc = true;
+
+ decoder->cycle_cnt += decoder->packet.payload;
+ if (decoder->pge)
+ decoder->tot_cyc_cnt += decoder->packet.payload;
+ decoder->sample_cyc = true;
+
+ if (!decoder->cyc_ref_timestamp)
+ return;
+
+ if (decoder->have_calc_cyc_to_tsc)
+ timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+ else if (decoder->cbr)
+ timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+ else
+ return;
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static void intel_pt_bbp(struct intel_pt_decoder *decoder)
+{
+ if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
+ memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
+ decoder->state.items.is_32_bit = false;
+ }
+ decoder->blk_type = decoder->packet.payload;
+ decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
+ if (decoder->blk_type == INTEL_PT_GP_REGS)
+ decoder->state.items.is_32_bit = decoder->packet.count;
+ if (decoder->blk_type_pos < 0) {
+ intel_pt_log("WARNING: Unknown block type %u\n",
+ decoder->blk_type);
+ } else if (decoder->state.items.mask[decoder->blk_type_pos]) {
+ intel_pt_log("WARNING: Duplicate block type %u\n",
+ decoder->blk_type);
+ }
+}
+
+static void intel_pt_bip(struct intel_pt_decoder *decoder)
+{
+ uint32_t id = decoder->packet.count;
+ uint32_t bit = 1 << id;
+ int pos = decoder->blk_type_pos;
+
+ if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
+ intel_pt_log("WARNING: Unknown block item %u type %d\n",
+ id, decoder->blk_type);
+ return;
+ }
+
+ if (decoder->state.items.mask[pos] & bit) {
+ intel_pt_log("WARNING: Duplicate block item %u type %d\n",
+ id, decoder->blk_type);
+ }
+
+ decoder->state.items.mask[pos] |= bit;
+ decoder->state.items.val[pos][id] = decoder->packet.payload;
+}
+
+/* Walk PSB+ packets when already in sync. */
+static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->in_psb = true;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ goto out;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_PSBEND:
+ err = 0;
+ goto out;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ err = -EAGAIN;
+ goto out;
+
+ case INTEL_PT_OVF:
+ err = intel_pt_overflow(decoder);
+ goto out;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ intel_pt_mode_exec_status(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ intel_pt_set_pip(decoder);
+ break;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ if (decoder->packet.count) {
+ intel_pt_set_last_ip(decoder);
+ decoder->psb_ip = decoder->last_ip;
+ }
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+out:
+ decoder->in_psb = false;
+
+ return err;
+}
+
+static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
+ decoder->tx_flags = 0;
+ decoder->state.flags &= ~INTEL_PT_IN_TX;
+ decoder->state.flags |= INTEL_PT_ABORT_TX;
+ } else {
+ decoder->state.flags |= INTEL_PT_ASYNC;
+ }
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_FUP:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ intel_pt_log("ERROR: Missing TIP after FUP\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+ return -ENOENT;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP_PGD:
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ intel_pt_update_nr(decoder);
+ return 0;
+
+ case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ intel_pt_log("Omitting PGE ip " x64_fmt "\n",
+ decoder->ip);
+ decoder->state.from_ip = 0;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ intel_pt_set_nr(decoder);
+ return 0;
+
+ case INTEL_PT_TIP:
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
+ return 0;
+
+ case INTEL_PT_PIP:
+ intel_pt_update_pip(decoder);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ intel_pt_mode_exec(decoder);
+ break;
+
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+static int intel_pt_resample(struct intel_pt_decoder *decoder)
+{
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+}
+
+struct intel_pt_vm_tsc_info {
+ struct intel_pt_pkt pip_packet;
+ struct intel_pt_pkt vmcs_packet;
+ struct intel_pt_pkt tma_packet;
+ bool tsc, pip, vmcs, tma, psbend;
+ uint64_t ctc_delta;
+ uint64_t last_ctc;
+ int max_lookahead;
+};
+
+/* Lookahead and get the PIP, VMCS and TMA packets from PSB+ */
+static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_vm_tsc_info *data = pkt_info->data;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_PAD:
+ case INTEL_PT_MNT:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ break;
+
+ case INTEL_PT_TSC:
+ data->tsc = true;
+ break;
+
+ case INTEL_PT_TMA:
+ data->tma_packet = pkt_info->packet;
+ data->tma = true;
+ break;
+
+ case INTEL_PT_PIP:
+ data->pip_packet = pkt_info->packet;
+ data->pip = true;
+ break;
+
+ case INTEL_PT_VMCS:
+ data->vmcs_packet = pkt_info->packet;
+ data->vmcs = true;
+ break;
+
+ case INTEL_PT_PSBEND:
+ data->psbend = true;
+ return 1;
+
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+struct intel_pt_ovf_fup_info {
+ int max_lookahead;
+ bool found;
+};
+
+/* Lookahead to detect a FUP packet after OVF */
+static int intel_pt_ovf_fup_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_ovf_fup_info *data = pkt_info->data;
+
+ if (pkt_info->packet.type == INTEL_PT_CYC ||
+ pkt_info->packet.type == INTEL_PT_MTC ||
+ pkt_info->packet.type == INTEL_PT_TSC)
+ return !--(data->max_lookahead);
+ data->found = pkt_info->packet.type == INTEL_PT_FUP;
+ return 1;
+}
+
+static bool intel_pt_ovf_fup_lookahead(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_ovf_fup_info data = {
+ .max_lookahead = 16,
+ .found = false,
+ };
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_ovf_fup_lookahead_cb, &data);
+ return data.found;
+}
+
+/* Lookahead and get the TMA packet after TSC */
+static int intel_pt_tma_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_vm_tsc_info *data = pkt_info->data;
+
+ if (pkt_info->packet.type == INTEL_PT_CYC ||
+ pkt_info->packet.type == INTEL_PT_MTC)
+ return !--(data->max_lookahead);
+
+ if (pkt_info->packet.type == INTEL_PT_TMA) {
+ data->tma_packet = pkt_info->packet;
+ data->tma = true;
+ }
+ return 1;
+}
+
+static uint64_t intel_pt_ctc_to_tsc(struct intel_pt_decoder *decoder, uint64_t ctc)
+{
+ if (decoder->tsc_ctc_mult)
+ return ctc * decoder->tsc_ctc_mult;
+ else
+ return multdiv(ctc, decoder->tsc_ctc_ratio_n, decoder->tsc_ctc_ratio_d);
+}
+
+static uint64_t intel_pt_calc_expected_tsc(struct intel_pt_decoder *decoder,
+ uint32_t ctc,
+ uint32_t fc,
+ uint64_t last_ctc_timestamp,
+ uint64_t ctc_delta,
+ uint32_t last_ctc)
+{
+ /* Number of CTC ticks from last_ctc_timestamp to last_mtc */
+ uint64_t last_mtc_ctc = last_ctc + ctc_delta;
+ /*
+ * Number of CTC ticks from there until current TMA packet. We would
+ * expect last_mtc_ctc to be before ctc, but the TSC packet can slip
+ * past an MTC, so a sign-extended value is used.
+ */
+ uint64_t delta = (int16_t)((uint16_t)ctc - (uint16_t)last_mtc_ctc);
+ /* Total CTC ticks from last_ctc_timestamp to current TMA packet */
+ uint64_t new_ctc_delta = ctc_delta + delta;
+ uint64_t expected_tsc;
+
+ /*
+ * Convert CTC ticks to TSC ticks, add the starting point
+ * (last_ctc_timestamp) and the fast counter from the TMA packet.
+ */
+ expected_tsc = last_ctc_timestamp + intel_pt_ctc_to_tsc(decoder, new_ctc_delta) + fc;
+
+ if (intel_pt_enable_logging) {
+ intel_pt_log_x64(last_mtc_ctc);
+ intel_pt_log_x32(last_ctc);
+ intel_pt_log_x64(ctc_delta);
+ intel_pt_log_x64(delta);
+ intel_pt_log_x32(ctc);
+ intel_pt_log_x64(new_ctc_delta);
+ intel_pt_log_x64(last_ctc_timestamp);
+ intel_pt_log_x32(fc);
+ intel_pt_log_x64(intel_pt_ctc_to_tsc(decoder, new_ctc_delta));
+ intel_pt_log_x64(expected_tsc);
+ }
+
+ return expected_tsc;
+}
+
+static uint64_t intel_pt_expected_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data)
+{
+ uint32_t ctc = data->tma_packet.payload;
+ uint32_t fc = data->tma_packet.count;
+
+ return intel_pt_calc_expected_tsc(decoder, ctc, fc,
+ decoder->ctc_timestamp,
+ data->ctc_delta, data->last_ctc);
+}
+
+static void intel_pt_translate_vm_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vmcs_info *vmcs_info)
+{
+ uint64_t payload = decoder->packet.payload;
+
+ /* VMX adds the TSC Offset, so subtract to get host TSC */
+ decoder->packet.payload -= vmcs_info->tsc_offset;
+ /* TSC packet has only 7 bytes */
+ decoder->packet.payload &= SEVEN_BYTES;
+
+ /*
+ * The buffer is mmapped from the data file, so this also updates the
+ * data file.
+ */
+ if (!decoder->vm_tm_corr_dry_run)
+ memcpy((void *)decoder->buf + 1, &decoder->packet.payload, 7);
+
+ intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64
+ " VMCS %#" PRIx64 " TSC Offset %#" PRIx64 "\n",
+ payload, decoder->packet.payload, vmcs_info->vmcs,
+ vmcs_info->tsc_offset);
+}
+
+static void intel_pt_translate_vm_tsc_offset(struct intel_pt_decoder *decoder,
+ uint64_t tsc_offset)
+{
+ struct intel_pt_vmcs_info vmcs_info = {
+ .vmcs = NO_VMCS,
+ .tsc_offset = tsc_offset
+ };
+
+ intel_pt_translate_vm_tsc(decoder, &vmcs_info);
+}
+
+static inline bool in_vm(uint64_t pip_payload)
+{
+ return pip_payload & 1;
+}
+
+static inline bool pip_in_vm(struct intel_pt_pkt *pip_packet)
+{
+ return pip_packet->payload & 1;
+}
+
+static void intel_pt_print_vmcs_info(struct intel_pt_vmcs_info *vmcs_info)
+{
+ p_log("VMCS: %#" PRIx64 " TSC Offset %#" PRIx64,
+ vmcs_info->vmcs, vmcs_info->tsc_offset);
+}
+
+static void intel_pt_vm_tm_corr_psb(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data)
+{
+ memset(data, 0, sizeof(*data));
+ data->ctc_delta = decoder->ctc_delta;
+ data->last_ctc = decoder->last_ctc;
+ intel_pt_pkt_lookahead(decoder, intel_pt_vm_psb_lookahead_cb, data);
+ if (data->tsc && !data->psbend)
+ p_log("ERROR: PSB without PSBEND");
+ decoder->in_psb = data->psbend;
+}
+
+static void intel_pt_vm_tm_corr_first_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data,
+ struct intel_pt_vmcs_info *vmcs_info,
+ uint64_t host_tsc)
+{
+ if (!decoder->in_psb) {
+ /* Can't happen */
+ p_log("ERROR: First TSC is not in PSB+");
+ }
+
+ if (data->pip) {
+ if (pip_in_vm(&data->pip_packet)) { /* Guest */
+ if (vmcs_info && vmcs_info->tsc_offset) {
+ intel_pt_translate_vm_tsc(decoder, vmcs_info);
+ decoder->vm_tm_corr_reliable = true;
+ } else {
+ p_log("ERROR: First TSC, unknown TSC Offset");
+ }
+ } else { /* Host */
+ decoder->vm_tm_corr_reliable = true;
+ }
+ } else { /* Host or Guest */
+ decoder->vm_tm_corr_reliable = false;
+ if (intel_pt_time_in_range(decoder, host_tsc)) {
+ /* Assume Host */
+ } else {
+ /* Assume Guest */
+ if (vmcs_info && vmcs_info->tsc_offset)
+ intel_pt_translate_vm_tsc(decoder, vmcs_info);
+ else
+ p_log("ERROR: First TSC, no PIP, unknown TSC Offset");
+ }
+ }
+}
+
+static void intel_pt_vm_tm_corr_tsc(struct intel_pt_decoder *decoder,
+ struct intel_pt_vm_tsc_info *data)
+{
+ struct intel_pt_vmcs_info *vmcs_info;
+ uint64_t tsc_offset = 0;
+ uint64_t vmcs;
+ bool reliable = true;
+ uint64_t expected_tsc;
+ uint64_t host_tsc;
+ uint64_t ref_timestamp;
+
+ bool assign = false;
+ bool assign_reliable = false;
+
+ /* Already have 'data' for the in_psb case */
+ if (!decoder->in_psb) {
+ memset(data, 0, sizeof(*data));
+ data->ctc_delta = decoder->ctc_delta;
+ data->last_ctc = decoder->last_ctc;
+ data->max_lookahead = 16;
+ intel_pt_pkt_lookahead(decoder, intel_pt_tma_lookahead_cb, data);
+ if (decoder->pge) {
+ data->pip = true;
+ data->pip_packet.payload = decoder->pip_payload;
+ }
+ }
+
+ /* Calculations depend on having TMA packets */
+ if (!data->tma) {
+ p_log("ERROR: TSC without TMA");
+ return;
+ }
+
+ vmcs = data->vmcs ? data->vmcs_packet.payload : decoder->vmcs;
+ if (vmcs == NO_VMCS)
+ vmcs = 0;
+
+ vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs);
+
+ ref_timestamp = decoder->timestamp ? decoder->timestamp : decoder->buf_timestamp;
+ host_tsc = intel_pt_8b_tsc(decoder->packet.payload, ref_timestamp);
+
+ if (!decoder->ctc_timestamp) {
+ intel_pt_vm_tm_corr_first_tsc(decoder, data, vmcs_info, host_tsc);
+ return;
+ }
+
+ expected_tsc = intel_pt_expected_tsc(decoder, data);
+
+ tsc_offset = host_tsc - expected_tsc;
+
+ /* Determine if TSC is from Host or Guest */
+ if (data->pip) {
+ if (pip_in_vm(&data->pip_packet)) { /* Guest */
+ if (!vmcs_info) {
+ /* PIP NR=1 without VMCS cannot happen */
+ p_log("ERROR: Missing VMCS");
+ intel_pt_translate_vm_tsc_offset(decoder, tsc_offset);
+ decoder->vm_tm_corr_reliable = false;
+ return;
+ }
+ } else { /* Host */
+ decoder->last_reliable_timestamp = host_tsc;
+ decoder->vm_tm_corr_reliable = true;
+ return;
+ }
+ } else { /* Host or Guest */
+ reliable = false; /* Host/Guest is a guess, so not reliable */
+ if (decoder->in_psb) {
+ if (!tsc_offset)
+ return; /* Zero TSC Offset, assume Host */
+ /*
+ * TSC packet has only 7 bytes of TSC. We have no
+ * information about the Guest's 8th byte, but it
+ * doesn't matter because we only need 7 bytes.
+ * Here, since the 8th byte is unreliable and
+ * irrelevant, compare only 7 byes.
+ */
+ if (vmcs_info &&
+ (tsc_offset & SEVEN_BYTES) ==
+ (vmcs_info->tsc_offset & SEVEN_BYTES)) {
+ /* Same TSC Offset as last VMCS, assume Guest */
+ goto guest;
+ }
+ }
+ /*
+ * Check if the host_tsc is within the expected range.
+ * Note, we could narrow the range more by looking ahead for
+ * the next host TSC in the same buffer, but we don't bother to
+ * do that because this is probably good enough.
+ */
+ if (host_tsc >= expected_tsc && intel_pt_time_in_range(decoder, host_tsc)) {
+ /* Within expected range for Host TSC, assume Host */
+ decoder->vm_tm_corr_reliable = false;
+ return;
+ }
+ }
+
+guest: /* Assuming Guest */
+
+ /* Determine whether to assign TSC Offset */
+ if (vmcs_info && vmcs_info->vmcs) {
+ if (vmcs_info->tsc_offset && vmcs_info->reliable) {
+ assign = false;
+ } else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_reliable &&
+ decoder->vm_tm_corr_continuous && decoder->vm_tm_corr_same_buf) {
+ /* Continuous tracing, TSC in a PSB is not a time loss */
+ assign = true;
+ assign_reliable = true;
+ } else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_same_buf) {
+ /*
+ * Unlikely to be a time loss TSC in a PSB which is not
+ * at the start of a buffer.
+ */
+ assign = true;
+ assign_reliable = false;
+ }
+ }
+
+ /* Record VMCS TSC Offset */
+ if (assign && (vmcs_info->tsc_offset != tsc_offset ||
+ vmcs_info->reliable != assign_reliable)) {
+ bool print = vmcs_info->tsc_offset != tsc_offset;
+
+ vmcs_info->tsc_offset = tsc_offset;
+ vmcs_info->reliable = assign_reliable;
+ if (print)
+ intel_pt_print_vmcs_info(vmcs_info);
+ }
+
+ /* Determine what TSC Offset to use */
+ if (vmcs_info && vmcs_info->tsc_offset) {
+ if (!vmcs_info->reliable)
+ reliable = false;
+ intel_pt_translate_vm_tsc(decoder, vmcs_info);
+ } else {
+ reliable = false;
+ if (vmcs_info) {
+ if (!vmcs_info->error_printed) {
+ p_log("ERROR: Unknown TSC Offset for VMCS %#" PRIx64,
+ vmcs_info->vmcs);
+ vmcs_info->error_printed = true;
+ }
+ } else {
+ if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS))
+ p_log("ERROR: Unknown VMCS");
+ }
+ intel_pt_translate_vm_tsc_offset(decoder, tsc_offset);
+ }
+
+ decoder->vm_tm_corr_reliable = reliable;
+}
+
+static void intel_pt_vm_tm_corr_pebs_tsc(struct intel_pt_decoder *decoder)
+{
+ uint64_t host_tsc = decoder->packet.payload;
+ uint64_t guest_tsc = decoder->packet.payload;
+ struct intel_pt_vmcs_info *vmcs_info;
+ uint64_t vmcs;
+
+ vmcs = decoder->vmcs;
+ if (vmcs == NO_VMCS)
+ vmcs = 0;
+
+ vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs);
+
+ if (decoder->pge) {
+ if (in_vm(decoder->pip_payload)) { /* Guest */
+ if (!vmcs_info) {
+ /* PIP NR=1 without VMCS cannot happen */
+ p_log("ERROR: Missing VMCS");
+ }
+ } else { /* Host */
+ return;
+ }
+ } else { /* Host or Guest */
+ if (intel_pt_time_in_range(decoder, host_tsc)) {
+ /* Within expected range for Host TSC, assume Host */
+ return;
+ }
+ }
+
+ if (vmcs_info) {
+ /* Translate Guest TSC to Host TSC */
+ host_tsc = ((guest_tsc & SEVEN_BYTES) - vmcs_info->tsc_offset) & SEVEN_BYTES;
+ host_tsc = intel_pt_8b_tsc(host_tsc, decoder->timestamp);
+ intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64
+ " VMCS %#" PRIx64 " TSC Offset %#" PRIx64 "\n",
+ guest_tsc, host_tsc, vmcs_info->vmcs,
+ vmcs_info->tsc_offset);
+ if (!intel_pt_time_in_range(decoder, host_tsc) &&
+ intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE))
+ p_log("Timestamp out of range");
+ } else {
+ if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS))
+ p_log("ERROR: Unknown VMCS");
+ host_tsc = decoder->timestamp;
+ }
+
+ decoder->packet.payload = host_tsc;
+
+ if (!decoder->vm_tm_corr_dry_run)
+ memcpy((void *)decoder->buf + 1, &host_tsc, 8);
+}
+
+static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_vm_tsc_info data = { .psbend = false };
+ bool pge;
+ int err;
+
+ if (decoder->in_psb)
+ intel_pt_vm_tm_corr_psb(decoder, &data);
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err == -ENOLINK)
+ continue;
+ if (err)
+ break;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->pge = false;
+ decoder->vm_tm_corr_continuous = false;
+ break;
+
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ break;
+
+ case INTEL_PT_OVF:
+ decoder->in_psb = false;
+ pge = decoder->pge;
+ decoder->pge = intel_pt_ovf_fup_lookahead(decoder);
+ if (pge != decoder->pge)
+ intel_pt_log("Surprising PGE change in OVF!");
+ if (!decoder->pge)
+ decoder->vm_tm_corr_continuous = false;
+ break;
+
+ case INTEL_PT_FUP:
+ if (decoder->in_psb)
+ decoder->pge = true;
+ break;
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->vm_tm_corr_continuous = false;
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ intel_pt_vm_tm_corr_psb(decoder, &data);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->pip_payload = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_vm_tm_corr_tsc(decoder, &data);
+ intel_pt_calc_tsc_timestamp(decoder);
+ decoder->vm_tm_corr_same_buf = true;
+ decoder->vm_tm_corr_continuous = decoder->pge;
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PSBEND:
+ decoder->in_psb = false;
+ data.psbend = false;
+ break;
+
+ case INTEL_PT_VMCS:
+ if (decoder->packet.payload != NO_VMCS)
+ decoder->vmcs = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_BBP:
+ decoder->blk_type = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_BIP:
+ if (decoder->blk_type == INTEL_PT_PEBS_BASIC &&
+ decoder->packet.count == 2)
+ intel_pt_vm_tm_corr_pebs_tsc(decoder);
+ break;
+
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ decoder->blk_type = 0;
+ break;
+
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BAD: /* Does not happen */
+ default:
+ break;
+ }
+ }
+
+ return err;
+}
+
+#define HOP_PROCESS 0
+#define HOP_IGNORE 1
+#define HOP_RETURN 2
+#define HOP_AGAIN 3
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
+
+/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
+static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
+{
+ *err = 0;
+
+ /* Leap from PSB to PSB, getting ip from FUP within PSB+ */
+ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
+ *err = intel_pt_scan_for_psb(decoder);
+ if (*err)
+ return HOP_RETURN;
+ }
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ return HOP_IGNORE;
+
+ case INTEL_PT_TIP_PGD:
+ decoder->pge = false;
+ if (!decoder->packet.count) {
+ intel_pt_set_nr(decoder);
+ return HOP_IGNORE;
+ }
+ intel_pt_set_ip(decoder);
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ intel_pt_update_nr(decoder);
+ return HOP_RETURN;
+
+ case INTEL_PT_TIP:
+ if (!decoder->packet.count) {
+ intel_pt_set_nr(decoder);
+ return HOP_IGNORE;
+ }
+ intel_pt_set_ip(decoder);
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
+ return HOP_RETURN;
+
+ case INTEL_PT_FUP:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ if (decoder->set_fup_mwait || decoder->set_fup_pwre)
+ *no_tip = true;
+ if (!decoder->branch_enable || !decoder->pge)
+ *no_tip = true;
+ if (*no_tip) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ intel_pt_fup_event(decoder, *no_tip);
+ return HOP_RETURN;
+ }
+ intel_pt_fup_event(decoder, *no_tip);
+ decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH;
+ *err = intel_pt_walk_fup_tip(decoder);
+ if (!*err && decoder->state.to_ip)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ return HOP_RETURN;
+
+ case INTEL_PT_PSB:
+ decoder->state.psb_offset = decoder->pos;
+ decoder->psb_ip = 0;
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ *err = intel_pt_walk_psbend(decoder);
+ if (*err == -EAGAIN)
+ return HOP_AGAIN;
+ if (*err)
+ return HOP_RETURN;
+ decoder->state.type = INTEL_PT_PSB_EVT;
+ if (decoder->psb_ip) {
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ decoder->ip = decoder->psb_ip;
+ }
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ default:
+ return HOP_PROCESS;
+ }
+}
+
+struct intel_pt_psb_info {
+ struct intel_pt_pkt fup_packet;
+ bool fup;
+ int after_psbend;
+};
+
+/* Lookahead and get the FUP packet from PSB+ */
+static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_psb_info *data = pkt_info->data;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_PAD:
+ case INTEL_PT_MNT:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_CBR:
+ case INTEL_PT_PIP:
+ if (data->after_psbend) {
+ data->after_psbend -= 1;
+ if (!data->after_psbend)
+ return 1;
+ }
+ break;
+
+ case INTEL_PT_FUP:
+ if (data->after_psbend)
+ return 1;
+ if (data->fup || pkt_info->packet.count == 0)
+ return 1;
+ data->fup_packet = pkt_info->packet;
+ data->fup = true;
+ break;
+
+ case INTEL_PT_PSBEND:
+ if (!data->fup)
+ return 1;
+ /* Keep going to check for a TIP.PGE */
+ data->after_psbend = 6;
+ break;
+
+ case INTEL_PT_TIP_PGE:
+ /* Ignore FUP in PSB+ if followed by TIP.PGE */
+ if (data->after_psbend)
+ data->fup = false;
+ return 1;
+
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ if (data->after_psbend) {
+ data->after_psbend -= 1;
+ if (!data->after_psbend)
+ return 1;
+ break;
+ }
+ return 1;
+
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TRACESTOP:
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int intel_pt_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->last_ip = 0;
+ decoder->psb_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psbend(decoder);
+ if (err)
+ return err;
+ decoder->state.type = INTEL_PT_PSB_EVT;
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+ return 0;
+}
+
+static int intel_pt_fup_in_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ if (decoder->ip != decoder->last_ip) {
+ err = intel_pt_walk_fup(decoder);
+ if (!err || err != -EAGAIN)
+ return err;
+ }
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ err = intel_pt_psb(decoder);
+ if (err) {
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err)
+{
+ struct intel_pt_psb_info data = { .fup = false };
+
+ if (!decoder->branch_enable)
+ return false;
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data);
+ if (!data.fup)
+ return false;
+
+ decoder->packet = data.fup_packet;
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_FUP_IN_PSB;
+
+ *err = intel_pt_fup_in_psb(decoder);
+
+ return true;
+}
+
+static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
+{
+ int last_packet_type = INTEL_PT_PAD;
+ bool no_tip = false;
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+next:
+ err = 0;
+ if (decoder->cyc_threshold) {
+ if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC)
+ decoder->sample_cyc = false;
+ last_packet_type = decoder->packet.type;
+ }
+
+ if (decoder->hop) {
+ switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
+ case HOP_IGNORE:
+ continue;
+ case HOP_RETURN:
+ return err;
+ case HOP_AGAIN:
+ goto next;
+ default:
+ break;
+ }
+ }
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ if (!decoder->packet.count)
+ break;
+ decoder->tnt = decoder->packet;
+ decoder->pkt_state = INTEL_PT_STATE_TNT;
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ break;
+ return err;
+
+ case INTEL_PT_TIP_PGD:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_TIP_PGE: {
+ decoder->pge = true;
+ decoder->overflow = false;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ intel_pt_set_nr(decoder);
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero TIP.PGE",
+ decoder->pos);
+ break;
+ }
+ intel_pt_sample_iflag_chg(decoder);
+ intel_pt_set_ip(decoder);
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ /*
+ * In hop mode, resample to get the to_ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ return 0;
+ }
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_FUP:
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero FUP",
+ decoder->pos);
+ no_tip = false;
+ break;
+ }
+ intel_pt_set_last_ip(decoder);
+ if (!decoder->branch_enable || !decoder->pge) {
+ decoder->ip = decoder->last_ip;
+ if (intel_pt_fup_event(decoder, no_tip))
+ return 0;
+ no_tip = false;
+ break;
+ }
+ if (decoder->set_fup_mwait)
+ no_tip = true;
+ if (no_tip)
+ decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
+ err = intel_pt_walk_fup(decoder);
+ if (err != -EAGAIN)
+ return err;
+ if (no_tip) {
+ no_tip = false;
+ break;
+ }
+ return intel_pt_walk_fup_tip(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ decoder->state.psb_offset = decoder->pos;
+ decoder->psb_ip = 0;
+ if (intel_pt_psb_with_fup(decoder, &err))
+ return err;
+ err = intel_pt_psb(decoder);
+ if (err == -EAGAIN)
+ goto next;
+ return err;
+
+ case INTEL_PT_PIP:
+ intel_pt_update_pip(decoder);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type != INTEL_PT_PERIOD_MTC)
+ break;
+ /*
+ * Ensure that there has been an instruction since the
+ * last MTC.
+ */
+ if (!decoder->mtc_insn)
+ break;
+ decoder->mtc_insn = false;
+ /* Ensure that there is a timestamp */
+ if (!decoder->timestamp)
+ break;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->mtc_insn = false;
+ return 0;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ intel_pt_mode_exec(decoder);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_mode_exec = true;
+ no_tip = true;
+ }
+ goto next;
+
+ case INTEL_PT_MODE_TSX:
+ /* MODE_TSX need not be followed by FUP */
+ if (!decoder->pge || decoder->in_psb) {
+ intel_pt_update_in_tx(decoder);
+ break;
+ }
+ err = intel_pt_mode_tsx(decoder, &no_tip);
+ if (err)
+ return err;
+ goto next;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ case INTEL_PT_PTWRITE_IP:
+ decoder->fup_ptw_payload = decoder->packet.payload;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_ptw = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_PTWRITE:
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_MWAIT:
+ decoder->fup_mwait_payload = decoder->packet.payload;
+ decoder->set_fup_mwait = true;
+ break;
+
+ case INTEL_PT_PWRE:
+ if (decoder->set_fup_mwait) {
+ decoder->fup_pwre_payload =
+ decoder->packet.payload;
+ decoder->set_fup_pwre = true;
+ break;
+ }
+ decoder->state.type = INTEL_PT_PWR_ENTRY;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_EXSTOP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_exstop = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EXSTOP:
+ decoder->state.type = INTEL_PT_EX_STOP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_PWRX:
+ decoder->state.type = INTEL_PT_PWR_EXIT;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_BBP:
+ intel_pt_bbp(decoder);
+ break;
+
+ case INTEL_PT_BIP:
+ intel_pt_bip(decoder);
+ break;
+
+ case INTEL_PT_BEP:
+ decoder->state.type = INTEL_PT_BLK_ITEMS;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_BEP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_bep = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after BEP",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_CFE:
+ decoder->fup_cfe_pkt = decoder->packet;
+ decoder->set_fup_cfe = true;
+ if (!decoder->pge) {
+ intel_pt_fup_event(decoder, true);
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_CFE_IP:
+ decoder->fup_cfe_pkt = decoder->packet;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_cfe_ip = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after CFE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EVD:
+ err = intel_pt_evd(decoder);
+ if (err)
+ return err;
+ break;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+ return decoder->packet.count &&
+ (decoder->have_last_ip || decoder->packet.count == 3 ||
+ decoder->packet.count == 6);
+}
+
+/* Walk PSB+ packets to get in sync. */
+static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->in_psb = true;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ goto out;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ fallthrough;
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ intel_pt_log("ERROR: Unexpected packet\n");
+ err = -ENOENT;
+ goto out;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ if (intel_pt_have_ip(decoder)) {
+ uint64_t current_ip = decoder->ip;
+
+ intel_pt_set_ip(decoder);
+ decoder->psb_ip = decoder->ip;
+ if (current_ip)
+ intel_pt_log_to("Setting IP",
+ decoder->ip);
+ }
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ intel_pt_set_pip(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ intel_pt_mode_exec_status(decoder);
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ fallthrough;
+
+ case INTEL_PT_TNT:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ if (decoder->ip)
+ decoder->pkt_state = INTEL_PT_STATE_ERR4;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ err = -ENOENT;
+ goto out;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ err = intel_pt_bug(decoder);
+ goto out;
+
+ case INTEL_PT_OVF:
+ err = intel_pt_overflow(decoder);
+ goto out;
+
+ case INTEL_PT_PSBEND:
+ err = 0;
+ goto out;
+
+ case INTEL_PT_PSB:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+out:
+ decoder->in_psb = false;
+
+ return err;
+}
+
+static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ decoder->pge = false;
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (!decoder->ip)
+ break;
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ return 0;
+
+ case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (!decoder->ip)
+ break;
+ decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ return 0;
+
+ case INTEL_PT_TIP:
+ decoder->pge = true;
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (!decoder->ip)
+ break;
+ return 0;
+
+ case INTEL_PT_FUP:
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ intel_pt_set_pip(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ intel_pt_mode_exec_status(decoder);
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ decoder->state.psb_offset = decoder->pos;
+ decoder->psb_ip = 0;
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+ decoder->state.type = INTEL_PT_PSB_EVT;
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_TNT:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ intel_pt_clear_fup_event(decoder);
+ decoder->overflow = false;
+
+ if (!decoder->branch_enable) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.type = 0; /* Do not have a sample */
+ return 0;
+ }
+
+ intel_pt_log("Scanning for full IP\n");
+ err = intel_pt_walk_to_ip(decoder);
+ if (err || ((decoder->state.type & INTEL_PT_PSB_EVT) && !decoder->ip))
+ return err;
+
+ /* In hop mode, resample to get the to_ip as an "instruction" sample */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ intel_pt_log_to("Setting IP", decoder->ip);
+
+ return 0;
+}
+
+static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
+{
+ const unsigned char *end = decoder->buf + decoder->len;
+ size_t i;
+
+ for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
+ if (i > decoder->len)
+ continue;
+ if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
+ return i;
+ }
+ return 0;
+}
+
+static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
+{
+ size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
+ const char *psb = INTEL_PT_PSB_STR;
+
+ if (rest_psb > decoder->len ||
+ memcmp(decoder->buf, psb + part_psb, rest_psb))
+ return 0;
+
+ return rest_psb;
+}
+
+static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
+ int part_psb)
+{
+ int rest_psb, ret;
+
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+
+ ret = intel_pt_get_next_data(decoder, false);
+ if (ret)
+ return ret;
+
+ rest_psb = intel_pt_rest_psb(decoder, part_psb);
+ if (!rest_psb)
+ return 0;
+
+ decoder->pos -= part_psb;
+ decoder->next_buf = decoder->buf + rest_psb;
+ decoder->next_len = decoder->len - rest_psb;
+ memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ decoder->buf = decoder->temp_buf;
+ decoder->len = INTEL_PT_PSB_LEN;
+
+ return 0;
+}
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
+{
+ unsigned char *next;
+ int ret;
+
+ intel_pt_log("Scanning for PSB\n");
+ while (1) {
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder, false);
+ if (ret)
+ return ret;
+ }
+
+ next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
+ INTEL_PT_PSB_LEN);
+ if (!next) {
+ int part_psb;
+
+ part_psb = intel_pt_part_psb(decoder);
+ if (part_psb) {
+ ret = intel_pt_get_split_psb(decoder, part_psb);
+ if (ret)
+ return ret;
+ } else {
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+ }
+ continue;
+ }
+
+ decoder->pkt_step = next - decoder->buf;
+ return intel_pt_get_next_packet(decoder);
+ }
+}
+
+static int intel_pt_sync(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->have_last_ip = false;
+ decoder->last_ip = 0;
+ decoder->psb_ip = 0;
+ decoder->ip = 0;
+ intel_pt_clear_stack(&decoder->stack);
+
+ err = intel_pt_scan_for_psb(decoder);
+ if (err)
+ return err;
+
+ if (decoder->vm_time_correlation) {
+ decoder->in_psb = true;
+ if (!decoder->timestamp)
+ decoder->timestamp = 1;
+ decoder->state.type = 0;
+ decoder->pkt_state = INTEL_PT_STATE_VM_TIME_CORRELATION;
+ return 0;
+ }
+
+ decoder->have_last_ip = true;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+
+ decoder->state.type = INTEL_PT_PSB_EVT; /* Only PSB sample */
+ decoder->state.from_ip = decoder->psb_ip;
+ decoder->state.to_ip = 0;
+
+ if (decoder->ip) {
+ /*
+ * In hop mode, resample to get the PSB FUP ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ }
+
+ return 0;
+}
+
+static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t est = decoder->sample_insn_cnt << 1;
+
+ if (!decoder->cbr || !decoder->max_non_turbo_ratio)
+ goto out;
+
+ est *= decoder->max_non_turbo_ratio;
+ est /= decoder->cbr;
+out:
+ return decoder->sample_timestamp + est;
+}
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ do {
+ decoder->state.type = INTEL_PT_BRANCH;
+ decoder->state.flags = 0;
+
+ switch (decoder->pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ err = intel_pt_sync(decoder);
+ break;
+ case INTEL_PT_STATE_NO_IP:
+ decoder->have_last_ip = false;
+ decoder->last_ip = 0;
+ decoder->ip = 0;
+ fallthrough;
+ case INTEL_PT_STATE_ERR_RESYNC:
+ err = intel_pt_sync_ip(decoder);
+ break;
+ case INTEL_PT_STATE_IN_SYNC:
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TNT_CONT:
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ err = intel_pt_walk_tip(decoder);
+ break;
+ case INTEL_PT_STATE_FUP:
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_fup_tip(decoder);
+ break;
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_FUP_IN_PSB:
+ err = intel_pt_fup_in_psb(decoder);
+ break;
+ case INTEL_PT_STATE_RESAMPLE:
+ err = intel_pt_resample(decoder);
+ break;
+ case INTEL_PT_STATE_VM_TIME_CORRELATION:
+ err = intel_pt_vm_time_correlation(decoder);
+ break;
+ default:
+ err = intel_pt_bug(decoder);
+ break;
+ }
+ } while (err == -ENOLINK);
+
+ if (err) {
+ decoder->state.err = intel_pt_ext_err(err);
+ if (err != -EOVERFLOW)
+ decoder->state.from_ip = decoder->ip;
+ intel_pt_update_sample_time(decoder);
+ decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+ intel_pt_set_nr(decoder);
+ } else {
+ decoder->state.err = 0;
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->cbr_seen = decoder->cbr;
+ if (!decoder->state.type) {
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ }
+ decoder->state.type |= INTEL_PT_CBR_CHG;
+ decoder->state.cbr_payload = decoder->cbr_payload;
+ decoder->state.cbr = decoder->cbr;
+ }
+ if (intel_pt_sample_time(decoder->pkt_state)) {
+ intel_pt_update_sample_time(decoder);
+ if (decoder->sample_cyc) {
+ decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+ decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
+ decoder->sample_cyc = false;
+ }
+ }
+ /*
+ * When using only TSC/MTC to compute cycles, IPC can be
+ * sampled as soon as the cycle count changes.
+ */
+ if (!decoder->have_cyc)
+ decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
+ }
+
+ /* Let PSB event always have TSC timestamp */
+ if ((decoder->state.type & INTEL_PT_PSB_EVT) && decoder->tsc_timestamp)
+ decoder->sample_timestamp = decoder->tsc_timestamp;
+
+ decoder->state.from_nr = decoder->nr;
+ decoder->state.to_nr = decoder->next_nr;
+ decoder->nr = decoder->next_nr;
+
+ decoder->state.timestamp = decoder->sample_timestamp;
+ decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
+ decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+ decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
+
+ return &decoder->state;
+}
+
+/**
+ * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the next PSB packet if
+ * there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
+ * @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_step_psb - move buffer pointer to the start of the following PSB
+ * packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the following PSB packet
+ * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
+ * pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ if (!*len)
+ return false;
+
+ next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_last_psb - find the last PSB packet in a buffer.
+ * @buf: buffer
+ * @len: size of buffer
+ *
+ * This function finds the last PSB in a buffer.
+ *
+ * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
+ */
+static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
+{
+ const char *n = INTEL_PT_PSB_STR;
+ unsigned char *p;
+ size_t k;
+
+ if (len < INTEL_PT_PSB_LEN)
+ return NULL;
+
+ k = len - INTEL_PT_PSB_LEN + 1;
+ while (1) {
+ p = memrchr(buf, n[0], k);
+ if (!p)
+ return NULL;
+ if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
+ return p;
+ k = p - buf;
+ if (!k)
+ return NULL;
+ }
+}
+
+/**
+ * intel_pt_next_tsc - find and return next TSC.
+ * @buf: buffer
+ * @len: size of buffer
+ * @tsc: TSC value returned
+ * @rem: returns remaining size when TSC is found
+ *
+ * Find a TSC packet in @buf and return the TSC value. This function assumes
+ * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
+ * PSBEND packet is found.
+ *
+ * Return: %true if TSC is found, false otherwise.
+ */
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
+ size_t *rem)
+{
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
+ struct intel_pt_pkt packet;
+ int ret;
+
+ while (len) {
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
+ if (ret <= 0)
+ return false;
+ if (packet.type == INTEL_PT_TSC) {
+ *tsc = packet.payload;
+ *rem = len;
+ return true;
+ }
+ if (packet.type == INTEL_PT_PSBEND)
+ return false;
+ buf += ret;
+ len -= ret;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_tsc_cmp - compare 7-byte TSCs.
+ * @tsc1: first TSC to compare
+ * @tsc2: second TSC to compare
+ *
+ * This function compares 7-byte TSC values allowing for the possibility that
+ * TSC wrapped around. Generally it is not possible to know if TSC has wrapped
+ * around so for that purpose this function assumes the absolute difference is
+ * less than half the maximum difference.
+ *
+ * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
+ * after @tsc2.
+ */
+static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
+{
+ const uint64_t halfway = (1ULL << 55);
+
+ if (tsc1 == tsc2)
+ return 0;
+
+ if (tsc1 < tsc2) {
+ if (tsc2 - tsc1 < halfway)
+ return -1;
+ else
+ return 1;
+ } else {
+ if (tsc1 - tsc2 < halfway)
+ return 1;
+ else
+ return -1;
+ }
+}
+
+#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
+
+/**
+ * adj_for_padding - adjust overlap to account for padding.
+ * @buf_b: second buffer
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ *
+ * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
+ * accordingly.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts
+ */
+static unsigned char *adj_for_padding(unsigned char *buf_b,
+ unsigned char *buf_a, size_t len_a)
+{
+ unsigned char *p = buf_b - MAX_PADDING;
+ unsigned char *q = buf_a + len_a - MAX_PADDING;
+ int i;
+
+ for (i = MAX_PADDING; i; i--, p++, q++) {
+ if (*p != *q)
+ break;
+ }
+
+ return p;
+}
+
+/**
+ * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
+ * using TSC.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
+ * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling
+ *
+ * If the trace contains TSC we can look at the last TSC of @buf_a and the
+ * first TSC of @buf_b in order to determine if the buffers overlap, and then
+ * walk forward in @buf_b until a later TSC is found. A precondition is that
+ * @buf_a and @buf_b are positioned at a PSB.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
+ size_t len_a,
+ unsigned char *buf_b,
+ size_t len_b, bool *consecutive,
+ bool ooo_tsc)
+{
+ uint64_t tsc_a, tsc_b;
+ unsigned char *p;
+ size_t len, rem_a, rem_b;
+
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No PSB in buf_a => no overlap */
+
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
+ /* The last PSB+ in buf_a is incomplete, so go back one more */
+ len_a -= len;
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No full PSB+ => assume no overlap */
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
+ return buf_b; /* No TSC in buf_a => assume no overlap */
+ }
+
+ while (1) {
+ /* Ignore PSB+ with no TSC */
+ if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
+ int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
+
+ /* Same TSC, so buffers are consecutive */
+ if (!cmp && rem_b >= rem_a) {
+ unsigned char *start;
+
+ *consecutive = true;
+ start = buf_b + len_b - (rem_b - rem_a);
+ return adj_for_padding(start, buf_a, len_a);
+ }
+ if (cmp < 0 && !ooo_tsc)
+ return buf_b; /* tsc_a < tsc_b => no overlap */
+ }
+
+ if (!intel_pt_step_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB in buf_b => no data */
+ }
+}
+
+/**
+ * intel_pt_find_overlap - determine start of non-overlapped trace data.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @have_tsc: can use TSC packets to detect overlap
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
+ * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling
+ *
+ * When trace samples or snapshots are recorded there is the possibility that
+ * the data overlaps. Note that, for the purposes of decoding, data is only
+ * useful if it begins with a PSB packet.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc, bool *consecutive,
+ bool ooo_tsc)
+{
+ unsigned char *found;
+
+ /* Buffer 'b' must start at PSB so throw away everything before that */
+ if (!intel_pt_next_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB */
+
+ if (!intel_pt_next_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+
+ if (have_tsc) {
+ found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
+ consecutive, ooo_tsc);
+ if (found)
+ return found;
+ }
+
+ /*
+ * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
+ * we can ignore the first part of buffer 'a'.
+ */
+ while (len_b < len_a) {
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+
+ /* Now len_b >= len_a */
+ while (1) {
+ /* Potential overlap so check the bytes */
+ found = memmem(buf_a, len_a, buf_b, len_a);
+ if (found) {
+ *consecutive = true;
+ return adj_for_padding(buf_b + len_a, buf_a, len_a);
+ }
+
+ /* Try again at next PSB in buffer 'a' */
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+}
+
+/**
+ * struct fast_forward_data - data used by intel_pt_ff_cb().
+ * @timestamp: timestamp to fast forward towards
+ * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than
+ * the fast forward timestamp.
+ */
+struct fast_forward_data {
+ uint64_t timestamp;
+ uint64_t buf_timestamp;
+};
+
+/**
+ * intel_pt_ff_cb - fast forward lookahead callback.
+ * @buffer: Intel PT trace buffer
+ * @data: opaque pointer to fast forward data (struct fast_forward_data)
+ *
+ * Determine if @buffer trace is past the fast forward timestamp.
+ *
+ * Return: 1 (stop lookahead) if @buffer trace is past the fast forward
+ * timestamp, and 0 otherwise.
+ */
+static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data)
+{
+ struct fast_forward_data *d = data;
+ unsigned char *buf;
+ uint64_t tsc;
+ size_t rem;
+ size_t len;
+
+ buf = (unsigned char *)buffer->buf;
+ len = buffer->len;
+
+ if (!intel_pt_next_psb(&buf, &len) ||
+ !intel_pt_next_tsc(buf, len, &tsc, &rem))
+ return 0;
+
+ tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp);
+
+ intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n",
+ tsc, buffer->ref_timestamp);
+
+ /*
+ * If the buffer contains a timestamp earlier that the fast forward
+ * timestamp, then record it, else stop.
+ */
+ if (tsc < d->timestamp)
+ d->buf_timestamp = buffer->ref_timestamp;
+ else
+ return 1;
+
+ return 0;
+}
+
+/**
+ * intel_pt_fast_forward - reposition decoder forwards.
+ * @decoder: Intel PT decoder
+ * @timestamp: timestamp to fast forward towards
+ *
+ * Reposition decoder at the last PSB with a timestamp earlier than @timestamp.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp)
+{
+ struct fast_forward_data d = { .timestamp = timestamp };
+ unsigned char *buf;
+ size_t len;
+ int err;
+
+ intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp);
+
+ /* Find buffer timestamp of buffer to fast forward to */
+ err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d);
+ if (err < 0)
+ return err;
+
+ /* Walk to buffer with same buffer timestamp */
+ if (d.buf_timestamp) {
+ do {
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+ err = intel_pt_get_next_data(decoder, true);
+ /* -ENOLINK means non-consecutive trace */
+ if (err && err != -ENOLINK)
+ return err;
+ } while (decoder->buf_timestamp != d.buf_timestamp);
+ }
+
+ if (!decoder->buf)
+ return 0;
+
+ buf = (unsigned char *)decoder->buf;
+ len = decoder->len;
+
+ if (!intel_pt_next_psb(&buf, &len))
+ return 0;
+
+ /*
+ * Walk PSBs while the PSB timestamp is less than the fast forward
+ * timestamp.
+ */
+ do {
+ uint64_t tsc;
+ size_t rem;
+
+ if (!intel_pt_next_tsc(buf, len, &tsc, &rem))
+ break;
+ tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp);
+ /*
+ * A TSC packet can slip past MTC packets but, after fast
+ * forward, decoding starts at the TSC timestamp. That means
+ * the timestamps may not be exactly the same as the timestamps
+ * that would have been decoded without fast forward.
+ */
+ if (tsc < timestamp) {
+ intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc);
+ decoder->pos += decoder->len - len;
+ decoder->buf = buf;
+ decoder->len = len;
+ intel_pt_reposition(decoder);
+ } else {
+ break;
+ }
+ } while (intel_pt_step_psb(&buf, &len));
+
+ return 0;
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
new file mode 100644
index 000000000..c773028df
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_DECODER_H__
+#define INCLUDE__INTEL_PT_DECODER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <linux/rbtree.h>
+
+#include "intel-pt-insn-decoder.h"
+
+#define INTEL_PT_IN_TX (1 << 0)
+#define INTEL_PT_ABORT_TX (1 << 1)
+#define INTEL_PT_IFLAG (1 << 2)
+#define INTEL_PT_ASYNC (1 << 2)
+#define INTEL_PT_FUP_IP (1 << 3)
+#define INTEL_PT_SAMPLE_IPC (1 << 4)
+
+enum intel_pt_sample_type {
+ INTEL_PT_BRANCH = 1 << 0,
+ INTEL_PT_INSTRUCTION = 1 << 1,
+ INTEL_PT_TRANSACTION = 1 << 2,
+ INTEL_PT_PTW = 1 << 3,
+ INTEL_PT_MWAIT_OP = 1 << 4,
+ INTEL_PT_PWR_ENTRY = 1 << 5,
+ INTEL_PT_EX_STOP = 1 << 6,
+ INTEL_PT_PWR_EXIT = 1 << 7,
+ INTEL_PT_CBR_CHG = 1 << 8,
+ INTEL_PT_TRACE_BEGIN = 1 << 9,
+ INTEL_PT_TRACE_END = 1 << 10,
+ INTEL_PT_BLK_ITEMS = 1 << 11,
+ INTEL_PT_PSB_EVT = 1 << 12,
+ INTEL_PT_EVT = 1 << 13,
+ INTEL_PT_IFLAG_CHG = 1 << 14,
+};
+
+enum intel_pt_period_type {
+ INTEL_PT_PERIOD_NONE,
+ INTEL_PT_PERIOD_INSTRUCTIONS,
+ INTEL_PT_PERIOD_TICKS,
+ INTEL_PT_PERIOD_MTC,
+};
+
+enum {
+ INTEL_PT_ERR_NOMEM = 1,
+ INTEL_PT_ERR_INTERN,
+ INTEL_PT_ERR_BADPKT,
+ INTEL_PT_ERR_NODATA,
+ INTEL_PT_ERR_NOINSN,
+ INTEL_PT_ERR_MISMAT,
+ INTEL_PT_ERR_OVR,
+ INTEL_PT_ERR_LOST,
+ INTEL_PT_ERR_UNK,
+ INTEL_PT_ERR_NELOOP,
+ INTEL_PT_ERR_EPTW,
+ INTEL_PT_ERR_MAX,
+};
+
+enum intel_pt_param_flags {
+ /*
+ * FUP packet can contain next linear instruction pointer instead of
+ * current linear instruction pointer.
+ */
+ INTEL_PT_FUP_WITH_NLIP = 1 << 0,
+};
+
+enum intel_pt_blk_type {
+ INTEL_PT_GP_REGS = 1,
+ INTEL_PT_PEBS_BASIC = 4,
+ INTEL_PT_PEBS_MEM = 5,
+ INTEL_PT_LBR_0 = 8,
+ INTEL_PT_LBR_1 = 9,
+ INTEL_PT_LBR_2 = 10,
+ INTEL_PT_XMM = 16,
+ INTEL_PT_BLK_TYPE_MAX
+};
+
+/*
+ * The block type numbers are not sequential but here they are given sequential
+ * positions to avoid wasting space for array placement.
+ */
+enum intel_pt_blk_type_pos {
+ INTEL_PT_GP_REGS_POS,
+ INTEL_PT_PEBS_BASIC_POS,
+ INTEL_PT_PEBS_MEM_POS,
+ INTEL_PT_LBR_0_POS,
+ INTEL_PT_LBR_1_POS,
+ INTEL_PT_LBR_2_POS,
+ INTEL_PT_XMM_POS,
+ INTEL_PT_BLK_TYPE_CNT
+};
+
+/* Get the array position for a block type */
+static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
+{
+#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
+ const int map[INTEL_PT_BLK_TYPE_MAX] = {
+ BLK_TYPE(GP_REGS),
+ BLK_TYPE(PEBS_BASIC),
+ BLK_TYPE(PEBS_MEM),
+ BLK_TYPE(LBR_0),
+ BLK_TYPE(LBR_1),
+ BLK_TYPE(LBR_2),
+ BLK_TYPE(XMM),
+ };
+#undef BLK_TYPE
+
+ return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
+}
+
+#define INTEL_PT_BLK_ITEM_ID_CNT 32
+
+/*
+ * Use unions so that the block items can be accessed by name or by array index.
+ * There is an array of 32-bit masks for each block type, which indicate which
+ * values are present. Then arrays of 32 64-bit values for each block type.
+ */
+struct intel_pt_blk_items {
+ union {
+ uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
+ struct {
+ uint32_t has_rflags:1;
+ uint32_t has_rip:1;
+ uint32_t has_rax:1;
+ uint32_t has_rcx:1;
+ uint32_t has_rdx:1;
+ uint32_t has_rbx:1;
+ uint32_t has_rsp:1;
+ uint32_t has_rbp:1;
+ uint32_t has_rsi:1;
+ uint32_t has_rdi:1;
+ uint32_t has_r8:1;
+ uint32_t has_r9:1;
+ uint32_t has_r10:1;
+ uint32_t has_r11:1;
+ uint32_t has_r12:1;
+ uint32_t has_r13:1;
+ uint32_t has_r14:1;
+ uint32_t has_r15:1;
+ uint32_t has_unused_0:14;
+ uint32_t has_ip:1;
+ uint32_t has_applicable_counters:1;
+ uint32_t has_timestamp:1;
+ uint32_t has_unused_1:29;
+ uint32_t has_mem_access_address:1;
+ uint32_t has_mem_aux_info:1;
+ uint32_t has_mem_access_latency:1;
+ uint32_t has_tsx_aux_info:1;
+ uint32_t has_unused_2:28;
+ uint32_t has_lbr_0;
+ uint32_t has_lbr_1;
+ uint32_t has_lbr_2;
+ uint32_t has_xmm;
+ };
+ };
+ union {
+ uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
+ struct {
+ struct {
+ uint64_t rflags;
+ uint64_t rip;
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t rsp;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
+ };
+ struct {
+ uint64_t ip;
+ uint64_t applicable_counters;
+ uint64_t timestamp;
+ uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
+ };
+ struct {
+ uint64_t mem_access_address;
+ uint64_t mem_aux_info;
+ uint64_t mem_access_latency;
+ uint64_t tsx_aux_info;
+ uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
+ };
+ uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
+ };
+ };
+ bool is_32_bit;
+};
+
+struct intel_pt_vmcs_info {
+ struct rb_node rb_node;
+ uint64_t vmcs;
+ uint64_t tsc_offset;
+ bool reliable;
+ bool error_printed;
+};
+
+/*
+ * Maximum number of event trace data in one go, assuming at most 1 per type
+ * and 6-bits of type in the EVD packet.
+ */
+#define INTEL_PT_MAX_EVDS 64
+
+/* Event trace data from EVD packet */
+struct intel_pt_evd {
+ int type;
+ uint64_t payload;
+};
+
+struct intel_pt_state {
+ enum intel_pt_sample_type type;
+ bool from_nr;
+ bool to_nr;
+ bool from_iflag;
+ bool to_iflag;
+ int err;
+ uint64_t from_ip;
+ uint64_t to_ip;
+ uint64_t tot_insn_cnt;
+ uint64_t tot_cyc_cnt;
+ uint64_t cycles;
+ uint64_t timestamp;
+ uint64_t est_timestamp;
+ uint64_t trace_nr;
+ uint64_t ptw_payload;
+ uint64_t mwait_payload;
+ uint64_t pwre_payload;
+ uint64_t pwrx_payload;
+ uint64_t cbr_payload;
+ uint64_t psb_offset;
+ uint32_t cbr;
+ uint32_t flags;
+ enum intel_pt_insn_op insn_op;
+ int insn_len;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+ struct intel_pt_blk_items items;
+ int cfe_type;
+ int cfe_vector;
+ int evd_cnt;
+ struct intel_pt_evd *evd;
+};
+
+struct intel_pt_insn;
+
+struct intel_pt_buffer {
+ const unsigned char *buf;
+ size_t len;
+ bool consecutive;
+ uint64_t ref_timestamp;
+ uint64_t trace_nr;
+};
+
+typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *);
+
+struct intel_pt_params {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
+ int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
+ struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs);
+ void *data;
+ bool return_compression;
+ bool branch_enable;
+ bool vm_time_correlation;
+ bool vm_tm_corr_dry_run;
+ uint64_t first_timestamp;
+ uint64_t ctl;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ unsigned max_non_turbo_ratio;
+ unsigned int mtc_period;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ enum intel_pt_param_flags flags;
+ unsigned int quick;
+ int max_loops;
+};
+
+struct intel_pt_decoder;
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp);
+
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc, bool *consecutive,
+ bool ooo_tsc);
+
+int intel_pt__strerror(int code, char *buf, size_t buflen);
+
+void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder,
+ uint64_t first_timestamp);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
new file mode 100644
index 000000000..c5d57027e
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_insn_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include "../../../arch/x86/include/asm/insn.h"
+
+#include "../../../arch/x86/lib/inat.c"
+#include "../../../arch/x86/lib/insn.c"
+
+#include "event.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "dump-insn.h"
+#include "util/sample.h"
+
+#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
+#error Instruction buffer size too small
+#endif
+
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
+static void intel_pt_insn_decoder(struct insn *insn,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
+ enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
+ int ext;
+
+ intel_pt_insn->rel = 0;
+ intel_pt_insn->emulated_ptwrite = false;
+
+ if (insn_is_avx(insn)) {
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
+ intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
+ intel_pt_insn->length = insn->length;
+ return;
+ }
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xf:
+ switch (insn->opcode.bytes[1]) {
+ case 0x01:
+ switch (insn->modrm.bytes[0]) {
+ case 0xc2: /* vmlaunch */
+ case 0xc3: /* vmresume */
+ op = INTEL_PT_OP_VMENTRY;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xca:
+ switch (insn->prefixes.bytes[3]) {
+ case 0xf2: /* erets */
+ op = INTEL_PT_OP_ERETS;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xf3: /* eretu */
+ op = INTEL_PT_OP_ERETU;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ op = INTEL_PT_OP_SYSCALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ op = INTEL_PT_OP_SYSRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x80 ... 0x8f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x70 ... 0x7f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ op = INTEL_PT_OP_RET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcf: /* iret */
+ op = INTEL_PT_OP_IRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcc ... 0xce: /* int */
+ op = INTEL_PT_OP_INT;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe8: /* call near rel */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0x9a: /* call far absolute */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe0 ... 0xe2: /* loop */
+ op = INTEL_PT_OP_LOOP;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe3: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe9: /* jmp */
+ case 0xeb: /* jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0xea: /* far jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 4:
+ case 5:
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ intel_pt_insn->op = op;
+ intel_pt_insn->branch = branch;
+ intel_pt_insn->length = insn->length;
+
+ if (branch == INTEL_PT_BR_CONDITIONAL ||
+ branch == INTEL_PT_BR_UNCONDITIONAL) {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ switch (insn->immediate.nbytes) {
+ case 1:
+ intel_pt_insn->rel = insn->immediate.value;
+ break;
+ case 2:
+ intel_pt_insn->rel =
+ bswap_16((short)insn->immediate.value);
+ break;
+ case 4:
+ intel_pt_insn->rel = bswap_32(insn->immediate.value);
+ break;
+ default:
+ intel_pt_insn->rel = 0;
+ break;
+ }
+#else
+ intel_pt_insn->rel = insn->immediate.value;
+#endif
+ }
+}
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ struct insn insn;
+ int ret;
+
+ ret = insn_decode(&insn, buf, len,
+ x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+ if (ret < 0 || insn.length > len)
+ return -1;
+
+ intel_pt_insn_decoder(&insn, intel_pt_insn);
+ if (insn.length < INTEL_PT_INSN_BUF_SZ)
+ memcpy(intel_pt_insn->buf, buf, insn.length);
+ else
+ memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_BUF_SZ);
+ return 0;
+}
+
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+{
+ struct intel_pt_insn in;
+ if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
+ return -1;
+ return in.branch != INTEL_PT_BR_NO_BRANCH;
+}
+
+const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
+ u8 *inbuf, int inlen, int *lenp)
+{
+ struct insn insn;
+ int n, i, ret;
+ int left;
+
+ ret = insn_decode(&insn, inbuf, inlen,
+ x->is64bit ? INSN_MODE_64 : INSN_MODE_32);
+
+ if (ret < 0 || insn.length > inlen)
+ return "<bad>";
+ if (lenp)
+ *lenp = insn.length;
+ left = sizeof(x->out);
+ n = snprintf(x->out, left, "insn: ");
+ left -= n;
+ for (i = 0; i < insn.length; i++) {
+ n += snprintf(x->out + n, left, "%02x ", inbuf[i]);
+ left -= n;
+ }
+ return x->out;
+}
+
+const char *branch_name[] = {
+ [INTEL_PT_OP_OTHER] = "Other",
+ [INTEL_PT_OP_CALL] = "Call",
+ [INTEL_PT_OP_RET] = "Ret",
+ [INTEL_PT_OP_JCC] = "Jcc",
+ [INTEL_PT_OP_JMP] = "Jmp",
+ [INTEL_PT_OP_LOOP] = "Loop",
+ [INTEL_PT_OP_IRET] = "IRet",
+ [INTEL_PT_OP_INT] = "Int",
+ [INTEL_PT_OP_SYSCALL] = "Syscall",
+ [INTEL_PT_OP_SYSRET] = "Sysret",
+ [INTEL_PT_OP_VMENTRY] = "VMentry",
+ [INTEL_PT_OP_ERETS] = "Erets",
+ [INTEL_PT_OP_ERETU] = "Eretu",
+};
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op)
+{
+ return branch_name[op];
+}
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len)
+{
+ switch (intel_pt_insn->branch) {
+ case INTEL_PT_BR_CONDITIONAL:
+ case INTEL_PT_BR_UNCONDITIONAL:
+ return snprintf(buf, buf_len, "%s %s%d",
+ intel_pt_insn_name(intel_pt_insn->op),
+ intel_pt_insn->rel > 0 ? "+" : "",
+ intel_pt_insn->rel);
+ case INTEL_PT_BR_NO_BRANCH:
+ case INTEL_PT_BR_INDIRECT:
+ return snprintf(buf, buf_len, "%s",
+ intel_pt_insn_name(intel_pt_insn->op));
+ default:
+ break;
+ }
+ return 0;
+}
+
+int intel_pt_insn_type(enum intel_pt_insn_op op)
+{
+ switch (op) {
+ case INTEL_PT_OP_OTHER:
+ return 0;
+ case INTEL_PT_OP_CALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
+ case INTEL_PT_OP_RET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
+ case INTEL_PT_OP_JCC:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_JMP:
+ return PERF_IP_FLAG_BRANCH;
+ case INTEL_PT_OP_LOOP:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_IRET:
+ case INTEL_PT_OP_ERETS:
+ case INTEL_PT_OP_ERETU:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_INT:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_SYSCALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET;
+ case INTEL_PT_OP_SYSRET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_SYSCALLRET;
+ case INTEL_PT_OP_VMENTRY:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_VMENTRY;
+ default:
+ return 0;
+ }
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
new file mode 100644
index 000000000..7fb7fe3a1
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_insn_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
+#define INCLUDE__INTEL_PT_INSN_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_INSN_DESC_MAX 32
+#define INTEL_PT_INSN_BUF_SZ 16
+
+enum intel_pt_insn_op {
+ INTEL_PT_OP_OTHER,
+ INTEL_PT_OP_CALL,
+ INTEL_PT_OP_RET,
+ INTEL_PT_OP_JCC,
+ INTEL_PT_OP_JMP,
+ INTEL_PT_OP_LOOP,
+ INTEL_PT_OP_IRET,
+ INTEL_PT_OP_INT,
+ INTEL_PT_OP_SYSCALL,
+ INTEL_PT_OP_SYSRET,
+ INTEL_PT_OP_VMENTRY,
+ INTEL_PT_OP_ERETS,
+ INTEL_PT_OP_ERETU,
+};
+
+enum intel_pt_insn_branch {
+ INTEL_PT_BR_NO_BRANCH,
+ INTEL_PT_BR_INDIRECT,
+ INTEL_PT_BR_CONDITIONAL,
+ INTEL_PT_BR_UNCONDITIONAL,
+};
+
+struct intel_pt_insn {
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
+ bool emulated_ptwrite;
+ int length;
+ int32_t rel;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+};
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn);
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op);
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len);
+
+int intel_pt_insn_type(enum intel_pt_insn_op op);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
new file mode 100644
index 000000000..ef55d6232
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_log.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <linux/zalloc.h>
+#include <linux/kernel.h>
+
+#include "intel-pt-log.h"
+#include "intel-pt-insn-decoder.h"
+
+#include "intel-pt-pkt-decoder.h"
+
+#define MAX_LOG_NAME 256
+
+#define DFLT_BUF_SZ (16 * 1024)
+
+struct log_buf {
+ char *buf;
+ size_t buf_sz;
+ size_t head;
+ bool wrapped;
+ FILE *backend;
+};
+
+static FILE *f;
+static char log_name[MAX_LOG_NAME];
+bool intel_pt_enable_logging;
+static bool intel_pt_dump_log_on_error;
+static unsigned int intel_pt_log_on_error_size;
+static struct log_buf log_buf;
+
+void *intel_pt_log_fp(void)
+{
+ return f;
+}
+
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size)
+{
+ intel_pt_enable_logging = true;
+ intel_pt_dump_log_on_error = dump_log_on_error;
+ intel_pt_log_on_error_size = log_on_error_size;
+}
+
+void intel_pt_log_disable(void)
+{
+ if (f)
+ fflush(f);
+ intel_pt_enable_logging = false;
+}
+
+void intel_pt_log_set_name(const char *name)
+{
+ strncpy(log_name, name, MAX_LOG_NAME - 5);
+ strcat(log_name, ".log");
+}
+
+static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
+ int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < len; i++)
+ fprintf(f, " %02x", buf[i]);
+ for (; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static void intel_pt_print_no_data(uint64_t pos, int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static ssize_t log_buf__write(void *cookie, const char *buf, size_t size)
+{
+ struct log_buf *b = cookie;
+ size_t sz = size;
+
+ if (!b->buf)
+ return size;
+
+ while (sz) {
+ size_t space = b->buf_sz - b->head;
+ size_t n = min(space, sz);
+
+ memcpy(b->buf + b->head, buf, n);
+ sz -= n;
+ buf += n;
+ b->head += n;
+ if (sz && b->head >= b->buf_sz) {
+ b->head = 0;
+ b->wrapped = true;
+ }
+ }
+ return size;
+}
+
+static int log_buf__close(void *cookie)
+{
+ struct log_buf *b = cookie;
+
+ zfree(&b->buf);
+ return 0;
+}
+
+static FILE *log_buf__open(struct log_buf *b, FILE *backend, unsigned int sz)
+{
+ cookie_io_functions_t fns = {
+ .write = log_buf__write,
+ .close = log_buf__close,
+ };
+ FILE *file;
+
+ memset(b, 0, sizeof(*b));
+ b->buf_sz = sz;
+ b->buf = malloc(b->buf_sz);
+ b->backend = backend;
+ file = fopencookie(b, "a", fns);
+ if (!file)
+ zfree(&b->buf);
+ return file;
+}
+
+static bool remove_first_line(const char **p, size_t *n)
+{
+ for (; *n && **p != '\n'; ++*p, --*n)
+ ;
+ if (*n) {
+ *p += 1;
+ *n -= 1;
+ return true;
+ }
+ return false;
+}
+
+static void write_lines(const char *p, size_t n, FILE *fp, bool *remove_first)
+{
+ if (*remove_first)
+ *remove_first = !remove_first_line(&p, &n);
+ fwrite(p, n, 1, fp);
+}
+
+static void log_buf__dump(struct log_buf *b)
+{
+ bool remove_first = false;
+
+ if (!b->buf)
+ return;
+
+ fflush(f); /* Could update b->head and b->wrapped */
+ fprintf(b->backend, "Dumping debug log buffer\n");
+ if (b->wrapped) {
+ remove_first = true;
+ write_lines(b->buf + b->head, b->buf_sz - b->head, b->backend, &remove_first);
+ }
+ write_lines(b->buf, b->head, b->backend, &remove_first);
+ fprintf(b->backend, "End of debug log buffer dump\n");
+
+ b->head = 0;
+ b->wrapped = false;
+}
+
+void intel_pt_log_dump_buf(void)
+{
+ log_buf__dump(&log_buf);
+}
+
+static int intel_pt_log_open(void)
+{
+ if (!intel_pt_enable_logging)
+ return -1;
+
+ if (f)
+ return 0;
+
+ if (log_name[0])
+ f = fopen(log_name, "w+");
+ else
+ f = stdout;
+ if (f && intel_pt_dump_log_on_error)
+ f = log_buf__open(&log_buf, f, intel_pt_log_on_error_size);
+ if (!f) {
+ intel_pt_enable_logging = false;
+ return -1;
+ }
+
+ return 0;
+}
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf)
+{
+ char desc[INTEL_PT_PKT_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_data(buf, pkt_len, pos, 0);
+ intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
+ fprintf(f, "%s\n", desc);
+}
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+ size_t len = intel_pt_insn->length;
+
+ if (intel_pt_log_open())
+ return;
+
+ if (len > INTEL_PT_INSN_BUF_SZ)
+ len = INTEL_PT_INSN_BUF_SZ;
+ intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_no_data(ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log(const char *fmt, ...)
+{
+ va_list args;
+
+ if (intel_pt_log_open())
+ return;
+
+ va_start(args, fmt);
+ vfprintf(f, fmt, args);
+ va_end(args);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
new file mode 100644
index 000000000..354d7d23f
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_log.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_LOG_H__
+#define INCLUDE__INTEL_PT_LOG_H__
+
+#include <linux/compiler.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+struct intel_pt_pkt;
+
+void *intel_pt_log_fp(void);
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size);
+void intel_pt_log_disable(void);
+void intel_pt_log_set_name(const char *name);
+void intel_pt_log_dump_buf(void);
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf);
+
+struct intel_pt_insn;
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip);
+
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
+
+#define intel_pt_log(fmt, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_packet(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_packet(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_insn(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_insn(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_insn_no_data(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define x64_fmt "0x%" PRIx64
+
+extern bool intel_pt_enable_logging;
+
+static inline void intel_pt_log_at(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s at " x64_fmt "\n", msg, u);
+}
+
+static inline void intel_pt_log_to(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s to " x64_fmt "\n", msg, u);
+}
+
+#define intel_pt_log_var(var, fmt) intel_pt_log("%s: " #var " " fmt "\n", __func__, var)
+
+#define intel_pt_log_x32(var) intel_pt_log_var(var, "%#x")
+#define intel_pt_log_x64(var) intel_pt_log_var(var, "%#" PRIx64)
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
new file mode 100644
index 000000000..af9710622
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -0,0 +1,800 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_pkt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/compiler.h>
+
+#include "intel-pt-pkt-decoder.h"
+
+#define BIT(n) (1 << (n))
+
+#define BIT63 ((uint64_t)1 << 63)
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+ memcpy((d), (s), (n)); \
+ *(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const packet_name[] = {
+ [INTEL_PT_BAD] = "Bad Packet!",
+ [INTEL_PT_PAD] = "PAD",
+ [INTEL_PT_TNT] = "TNT",
+ [INTEL_PT_TIP_PGD] = "TIP.PGD",
+ [INTEL_PT_TIP_PGE] = "TIP.PGE",
+ [INTEL_PT_TSC] = "TSC",
+ [INTEL_PT_TMA] = "TMA",
+ [INTEL_PT_MODE_EXEC] = "MODE.Exec",
+ [INTEL_PT_MODE_TSX] = "MODE.TSX",
+ [INTEL_PT_MTC] = "MTC",
+ [INTEL_PT_TIP] = "TIP",
+ [INTEL_PT_FUP] = "FUP",
+ [INTEL_PT_CYC] = "CYC",
+ [INTEL_PT_VMCS] = "VMCS",
+ [INTEL_PT_PSB] = "PSB",
+ [INTEL_PT_PSBEND] = "PSBEND",
+ [INTEL_PT_CBR] = "CBR",
+ [INTEL_PT_TRACESTOP] = "TraceSTOP",
+ [INTEL_PT_PIP] = "PIP",
+ [INTEL_PT_OVF] = "OVF",
+ [INTEL_PT_MNT] = "MNT",
+ [INTEL_PT_PTWRITE] = "PTWRITE",
+ [INTEL_PT_PTWRITE_IP] = "PTWRITE",
+ [INTEL_PT_EXSTOP] = "EXSTOP",
+ [INTEL_PT_EXSTOP_IP] = "EXSTOP",
+ [INTEL_PT_MWAIT] = "MWAIT",
+ [INTEL_PT_PWRE] = "PWRE",
+ [INTEL_PT_PWRX] = "PWRX",
+ [INTEL_PT_BBP] = "BBP",
+ [INTEL_PT_BIP] = "BIP",
+ [INTEL_PT_BEP] = "BEP",
+ [INTEL_PT_BEP_IP] = "BEP",
+ [INTEL_PT_CFE] = "CFE",
+ [INTEL_PT_CFE_IP] = "CFE",
+ [INTEL_PT_EVD] = "EVD",
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
+{
+ return packet_name[type];
+}
+
+static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload;
+ int count;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ payload = le64_to_cpu(*(uint64_t *)buf);
+
+ for (count = 47; count; count--) {
+ if (payload & BIT63)
+ break;
+ payload <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = payload << 1;
+ return 8;
+}
+
+static int intel_pt_get_pip(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload = 0;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_PIP;
+ memcpy_le64(&payload, buf + 2, 6);
+ packet->payload = payload;
+
+ return 8;
+}
+
+static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_TRACESTOP;
+ return 2;
+}
+
+static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_CBR;
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
+ return 4;
+}
+
+static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int count = (52 - 5) >> 3;
+
+ if (count < 1 || count > 7)
+ return INTEL_PT_BAD_PACKET;
+
+ if (len < count + 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_VMCS;
+ packet->count = count;
+ memcpy_le64(&packet->payload, buf + 2, count);
+
+ return count + 2;
+}
+
+static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_OVF;
+ return 2;
+}
+
+static int intel_pt_get_psb(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int i;
+
+ if (len < 16)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ for (i = 2; i < 16; i += 2) {
+ if (buf[i] != 2 || buf[i + 1] != 0x82)
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = INTEL_PT_PSB;
+ return 16;
+}
+
+static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PSBEND;
+ return 2;
+}
+
+static int intel_pt_get_tma(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_TMA;
+ packet->payload = buf[2] | (buf[3] << 8);
+ packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
+ return 7;
+}
+
+static int intel_pt_get_pad(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PAD;
+ return 1;
+}
+
+static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MNT;
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11;
+}
+
+static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[2]) {
+ case 0x88: /* MNT */
+ return intel_pt_get_mnt(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ packet->count = (buf[1] >> 5) & 0x3;
+ packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+ INTEL_PT_PTWRITE;
+
+ switch (packet->count) {
+ case 0:
+ if (len < 6)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+ return 6;
+ case 1:
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP;
+ return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP_IP;
+ return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MWAIT;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRE;
+ memcpy_le64(&packet->payload, buf + 2, 2);
+ return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRX;
+ memcpy_le64(&packet->payload, buf + 2, 5);
+ return 7;
+}
+
+static int intel_pt_get_bbp(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BBP;
+ packet->count = buf[2] >> 7;
+ packet->payload = buf[2] & 0x1f;
+ return 3;
+}
+
+static int intel_pt_get_bip_4(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 4);
+ return 5;
+}
+
+static int intel_pt_get_bip_8(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 9)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 8);
+ return 9;
+}
+
+static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP;
+ return 2;
+}
+
+static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP_IP;
+ return 2;
+}
+
+static int intel_pt_get_cfe(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = buf[2] & 0x80 ? INTEL_PT_CFE_IP : INTEL_PT_CFE;
+ packet->count = buf[2] & 0x1f;
+ packet->payload = buf[3];
+ return 4;
+}
+
+static int intel_pt_get_evd(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_EVD;
+ packet->count = buf[2] & 0x3f;
+ packet->payload = buf[3];
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11;
+}
+
+static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ if ((buf[1] & 0x1f) == 0x12)
+ return intel_pt_get_ptwrite(buf, len, packet);
+
+ switch (buf[1]) {
+ case 0xa3: /* Long TNT */
+ return intel_pt_get_long_tnt(buf, len, packet);
+ case 0x43: /* PIP */
+ return intel_pt_get_pip(buf, len, packet);
+ case 0x83: /* TraceStop */
+ return intel_pt_get_tracestop(packet);
+ case 0x03: /* CBR */
+ return intel_pt_get_cbr(buf, len, packet);
+ case 0xc8: /* VMCS */
+ return intel_pt_get_vmcs(buf, len, packet);
+ case 0xf3: /* OVF */
+ return intel_pt_get_ovf(packet);
+ case 0x82: /* PSB */
+ return intel_pt_get_psb(buf, len, packet);
+ case 0x23: /* PSBEND */
+ return intel_pt_get_psbend(packet);
+ case 0x73: /* TMA */
+ return intel_pt_get_tma(buf, len, packet);
+ case 0xC3: /* 3-byte header */
+ return intel_pt_get_3byte(buf, len, packet);
+ case 0x62: /* EXSTOP no IP */
+ return intel_pt_get_exstop(packet);
+ case 0xE2: /* EXSTOP with IP */
+ return intel_pt_get_exstop_ip(packet);
+ case 0xC2: /* MWAIT */
+ return intel_pt_get_mwait(buf, len, packet);
+ case 0x22: /* PWRE */
+ return intel_pt_get_pwre(buf, len, packet);
+ case 0xA2: /* PWRX */
+ return intel_pt_get_pwrx(buf, len, packet);
+ case 0x63: /* BBP */
+ return intel_pt_get_bbp(buf, len, packet);
+ case 0x33: /* BEP no IP */
+ return intel_pt_get_bep(len, packet);
+ case 0xb3: /* BEP with IP */
+ return intel_pt_get_bep_ip(len, packet);
+ case 0x13: /* CFE */
+ return intel_pt_get_cfe(buf, len, packet);
+ case 0x53: /* EVD */
+ return intel_pt_get_evd(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_short_tnt(unsigned int byte,
+ struct intel_pt_pkt *packet)
+{
+ int count;
+
+ for (count = 6; count; count--) {
+ if (byte & BIT(7))
+ break;
+ byte <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = (uint64_t)byte << 57;
+
+ return 1;
+}
+
+static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
+ size_t len, struct intel_pt_pkt *packet)
+{
+ unsigned int offs = 1, shift;
+ uint64_t payload = byte >> 3;
+
+ byte >>= 2;
+ len -= 1;
+ for (shift = 5; byte & 1; shift += 7) {
+ if (offs > 9)
+ return INTEL_PT_BAD_PACKET;
+ if (len < offs)
+ return INTEL_PT_NEED_MORE_BYTES;
+ byte = buf[offs++];
+ payload |= ((uint64_t)byte >> 1) << shift;
+ }
+
+ packet->type = INTEL_PT_CYC;
+ packet->payload = payload;
+ return offs;
+}
+
+static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
+ const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int ip_len;
+
+ packet->count = byte >> 5;
+
+ switch (packet->count) {
+ case 0:
+ ip_len = 0;
+ break;
+ case 1:
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 2;
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+ break;
+ case 2:
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 4;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
+ break;
+ case 3:
+ case 4:
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 6;
+ memcpy_le64(&packet->payload, buf + 1, 6);
+ break;
+ case 6:
+ if (len < 9)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 8;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = type;
+
+ return ip_len + 1;
+}
+
+static int intel_pt_get_mode(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[1] >> 5) {
+ case 0:
+ packet->type = INTEL_PT_MODE_EXEC;
+ packet->count = buf[1];
+ switch (buf[1] & 3) {
+ case 0:
+ packet->payload = 16;
+ break;
+ case 1:
+ packet->payload = 64;
+ break;
+ case 2:
+ packet->payload = 32;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ break;
+ case 1:
+ packet->type = INTEL_PT_MODE_TSX;
+ if ((buf[1] & 3) == 3)
+ return INTEL_PT_BAD_PACKET;
+ packet->payload = buf[1] & 3;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ return 2;
+}
+
+static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_TSC;
+ memcpy_le64(&packet->payload, buf + 1, 7);
+ return 8;
+}
+
+static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MTC;
+ packet->payload = buf[1];
+ return 2;
+}
+
+static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx ctx)
+{
+ unsigned int byte;
+
+ memset(packet, 0, sizeof(struct intel_pt_pkt));
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ byte = buf[0];
+
+ switch (ctx) {
+ case INTEL_PT_NO_CTX:
+ break;
+ case INTEL_PT_BLK_4_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_4(buf, len, packet);
+ break;
+ case INTEL_PT_BLK_8_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_8(buf, len, packet);
+ break;
+ default:
+ break;
+ }
+
+ if (!(byte & BIT(0))) {
+ if (byte == 0)
+ return intel_pt_get_pad(packet);
+ if (byte == 2)
+ return intel_pt_get_ext(buf, len, packet);
+ return intel_pt_get_short_tnt(byte, packet);
+ }
+
+ if ((byte & 2))
+ return intel_pt_get_cyc(byte, buf, len, packet);
+
+ switch (byte & 0x1f) {
+ case 0x0D:
+ return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
+ case 0x11:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
+ packet);
+ case 0x01:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
+ packet);
+ case 0x1D:
+ return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
+ case 0x19:
+ switch (byte) {
+ case 0x99:
+ return intel_pt_get_mode(buf, len, packet);
+ case 0x19:
+ return intel_pt_get_tsc(buf, len, packet);
+ case 0x59:
+ return intel_pt_get_mtc(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx)
+{
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ case INTEL_PT_MNT:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BIP:
+ break;
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ *ctx = INTEL_PT_NO_CTX;
+ break;
+ case INTEL_PT_BBP:
+ if (packet->count)
+ *ctx = INTEL_PT_BLK_4_CTX;
+ else
+ *ctx = INTEL_PT_BLK_8_CTX;
+ break;
+ default:
+ break;
+ }
+}
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx)
+{
+ int ret;
+
+ ret = intel_pt_do_get_packet(buf, len, packet, *ctx);
+ if (ret > 0) {
+ while (ret < 8 && len > (size_t)ret && !buf[ret])
+ ret += 1;
+ intel_pt_upd_pkt_ctx(packet, ctx);
+ }
+ return ret;
+}
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
+ size_t buf_len)
+{
+ int ret, i, nr;
+ unsigned long long payload = packet->payload;
+ const char *name = intel_pt_pkt_name(packet->type);
+
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_OVF:
+ return snprintf(buf, buf_len, "%s", name);
+ case INTEL_PT_TNT: {
+ size_t blen = buf_len;
+
+ ret = snprintf(buf, blen, "%s ", name);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ for (i = 0; i < packet->count; i++) {
+ if (payload & BIT63)
+ ret = snprintf(buf, blen, "T");
+ else
+ ret = snprintf(buf, blen, "N");
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ payload <<= 1;
+ }
+ ret = snprintf(buf, blen, " (%d)", packet->count);
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ if (!(packet->count))
+ return snprintf(buf, buf_len, "%s no ip", name);
+ fallthrough;
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MTC:
+ case INTEL_PT_MNT:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TSC:
+ return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
+ case INTEL_PT_TMA:
+ return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
+ (unsigned)payload, packet->count);
+ case INTEL_PT_MODE_EXEC:
+ return snprintf(buf, buf_len, "%s IF:%d %lld",
+ name, !!(packet->count & 4), payload);
+ case INTEL_PT_MODE_TSX:
+ return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
+ name, (unsigned)(payload >> 1) & 1,
+ (unsigned)payload & 1);
+ case INTEL_PT_PIP:
+ nr = packet->payload & INTEL_PT_VMX_NR_FLAG ? 1 : 0;
+ payload &= ~INTEL_PT_VMX_NR_FLAG;
+ ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
+ name, payload >> 1, nr);
+ return ret;
+ case INTEL_PT_PTWRITE:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+ case INTEL_PT_PTWRITE_IP:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+ case INTEL_PT_BEP:
+ case INTEL_PT_EXSTOP:
+ return snprintf(buf, buf_len, "%s IP:0", name);
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_EXSTOP_IP:
+ return snprintf(buf, buf_len, "%s IP:1", name);
+ case INTEL_PT_MWAIT:
+ return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+ name, payload, (unsigned int)(payload & 0xff),
+ (unsigned int)((payload >> 32) & 0x3));
+ case INTEL_PT_PWRE:
+ return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+ name, payload, !!(payload & 0x80),
+ (unsigned int)((payload >> 12) & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_PWRX:
+ return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+ name, payload,
+ (unsigned int)((payload >> 4) & 0xf),
+ (unsigned int)(payload & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_BBP:
+ return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx",
+ name, packet->count ? "4" : "8", payload);
+ case INTEL_PT_BIP:
+ return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
+ name, packet->count, payload);
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ return snprintf(buf, buf_len, "%s IP:%d Type 0x%02x Vector 0x%llx",
+ name, packet->type == INTEL_PT_CFE_IP, packet->count, payload);
+ case INTEL_PT_EVD:
+ return snprintf(buf, buf_len, "%s Type 0x%02x Payload 0x%llx",
+ name, packet->count, payload);
+ default:
+ break;
+ }
+ return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+ name, payload, packet->count);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
new file mode 100644
index 000000000..496ba4be8
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_pkt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
+#define INCLUDE__INTEL_PT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_PKT_DESC_MAX 256
+
+#define INTEL_PT_NEED_MORE_BYTES -1
+#define INTEL_PT_BAD_PACKET -2
+
+#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \
+ "\002\202\002\202\002\202\002\202"
+#define INTEL_PT_PSB_LEN 16
+
+#define INTEL_PT_PKT_MAX_SZ 16
+
+#define INTEL_PT_VMX_NR_FLAG 1
+
+enum intel_pt_pkt_type {
+ INTEL_PT_BAD,
+ INTEL_PT_PAD,
+ INTEL_PT_TNT,
+ INTEL_PT_TIP_PGD,
+ INTEL_PT_TIP_PGE,
+ INTEL_PT_TSC,
+ INTEL_PT_TMA,
+ INTEL_PT_MODE_EXEC,
+ INTEL_PT_MODE_TSX,
+ INTEL_PT_MTC,
+ INTEL_PT_TIP,
+ INTEL_PT_FUP,
+ INTEL_PT_CYC,
+ INTEL_PT_VMCS,
+ INTEL_PT_PSB,
+ INTEL_PT_PSBEND,
+ INTEL_PT_CBR,
+ INTEL_PT_TRACESTOP,
+ INTEL_PT_PIP,
+ INTEL_PT_OVF,
+ INTEL_PT_MNT,
+ INTEL_PT_PTWRITE,
+ INTEL_PT_PTWRITE_IP,
+ INTEL_PT_EXSTOP,
+ INTEL_PT_EXSTOP_IP,
+ INTEL_PT_MWAIT,
+ INTEL_PT_PWRE,
+ INTEL_PT_PWRX,
+ INTEL_PT_BBP,
+ INTEL_PT_BIP,
+ INTEL_PT_BEP,
+ INTEL_PT_BEP_IP,
+ INTEL_PT_CFE,
+ INTEL_PT_CFE_IP,
+ INTEL_PT_EVD,
+};
+
+struct intel_pt_pkt {
+ enum intel_pt_pkt_type type;
+ int count;
+ uint64_t payload;
+};
+
+/*
+ * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP
+ * packets only occur in the context of a block (i.e. between BBP and BEP), that
+ * context must be recorded and passed to the packet decoder.
+ */
+enum intel_pt_pkt_ctx {
+ INTEL_PT_NO_CTX, /* BIP packets are invalid */
+ INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */
+ INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
+
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
+
+#endif
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
new file mode 100644
index 000000000..f38893e0b
--- /dev/null
+++ b/tools/perf/util/intel-pt.c
@@ -0,0 +1,4496 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#include <inttypes.h>
+#include <linux/perf_event.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+
+#include "session.h"
+#include "machine.h"
+#include "memswap.h"
+#include "sort.h"
+#include "tool.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "map.h"
+#include "color.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "symbol.h"
+#include "callchain.h"
+#include "dso.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "tsc.h"
+#include "intel-pt.h"
+#include "config.h"
+#include "util/perf_api_probe.h"
+#include "util/synthetic-events.h"
+#include "time-utils.h"
+
+#include "../arch/x86/include/uapi/asm/perf_regs.h"
+
+#include "intel-pt-decoder/intel-pt-log.h"
+#include "intel-pt-decoder/intel-pt-decoder.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_PT_CFG_PASS_THRU BIT_ULL(0)
+#define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4)
+#define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13)
+#define INTEL_PT_CFG_EVT_EN BIT_ULL(31)
+#define INTEL_PT_CFG_TNT_DIS BIT_ULL(55)
+
+struct range {
+ u64 start;
+ u64 end;
+};
+
+struct intel_pt {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ struct evsel *switch_evsel;
+ struct thread *unknown_thread;
+ bool timeless_decoding;
+ bool sampling_mode;
+ bool snapshot_mode;
+ bool per_cpu_mmaps;
+ bool have_tsc;
+ bool data_queued;
+ bool est_tsc;
+ bool sync_switch;
+ bool sync_switch_not_supported;
+ bool mispred_all;
+ bool use_thread_stack;
+ bool callstack;
+ bool cap_event_trace;
+ bool have_guest_sideband;
+ unsigned int br_stack_sz;
+ unsigned int br_stack_sz_plus;
+ int have_sched_switch;
+ u32 pmu_type;
+ u64 kernel_start;
+ u64 switch_ip;
+ u64 ptss_ip;
+ u64 first_timestamp;
+
+ struct perf_tsc_conversion tc;
+ bool cap_user_time_zero;
+
+ struct itrace_synth_opts synth_opts;
+
+ bool sample_instructions;
+ u64 instructions_sample_type;
+ u64 instructions_id;
+
+ bool sample_cycles;
+ u64 cycles_sample_type;
+ u64 cycles_id;
+
+ bool sample_branches;
+ u32 branches_filter;
+ u64 branches_sample_type;
+ u64 branches_id;
+
+ bool sample_transactions;
+ u64 transactions_sample_type;
+ u64 transactions_id;
+
+ bool sample_ptwrites;
+ u64 ptwrites_sample_type;
+ u64 ptwrites_id;
+
+ bool sample_pwr_events;
+ u64 pwr_events_sample_type;
+ u64 mwait_id;
+ u64 pwre_id;
+ u64 exstop_id;
+ u64 pwrx_id;
+ u64 cbr_id;
+ u64 psb_id;
+
+ bool single_pebs;
+ bool sample_pebs;
+ struct evsel *pebs_evsel;
+
+ u64 evt_sample_type;
+ u64 evt_id;
+
+ u64 iflag_chg_sample_type;
+ u64 iflag_chg_id;
+
+ u64 tsc_bit;
+ u64 mtc_bit;
+ u64 mtc_freq_bits;
+ u32 tsc_ctc_ratio_n;
+ u32 tsc_ctc_ratio_d;
+ u64 cyc_bit;
+ u64 noretcomp_bit;
+ unsigned max_non_turbo_ratio;
+ unsigned cbr2khz;
+ int max_loops;
+
+ unsigned long num_events;
+
+ char *filter;
+ struct addr_filters filts;
+
+ struct range *time_ranges;
+ unsigned int range_cnt;
+
+ struct ip_callchain *chain;
+ struct branch_stack *br_stack;
+
+ u64 dflt_tsc_offset;
+ struct rb_root vmcs_info;
+};
+
+enum switch_state {
+ INTEL_PT_SS_NOT_TRACING,
+ INTEL_PT_SS_UNKNOWN,
+ INTEL_PT_SS_TRACING,
+ INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
+ INTEL_PT_SS_EXPECTING_SWITCH_IP,
+};
+
+/* applicable_counters is 64-bits */
+#define INTEL_PT_MAX_PEBS 64
+
+struct intel_pt_pebs_event {
+ struct evsel *evsel;
+ u64 id;
+};
+
+struct intel_pt_queue {
+ struct intel_pt *pt;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ struct auxtrace_buffer *old_buffer;
+ void *decoder;
+ const struct intel_pt_state *state;
+ struct ip_callchain *chain;
+ struct branch_stack *last_branch;
+ union perf_event *event_buf;
+ bool on_heap;
+ bool stop;
+ bool step_through_buffers;
+ bool use_buffer_pid_tid;
+ bool sync_switch;
+ bool sample_ipc;
+ pid_t pid, tid;
+ int cpu;
+ int switch_state;
+ pid_t next_tid;
+ struct thread *thread;
+ struct machine *guest_machine;
+ struct thread *guest_thread;
+ struct thread *unknown_guest_thread;
+ pid_t guest_machine_pid;
+ pid_t guest_pid;
+ pid_t guest_tid;
+ int vcpu;
+ bool exclude_kernel;
+ bool have_sample;
+ u64 time;
+ u64 timestamp;
+ u64 sel_timestamp;
+ bool sel_start;
+ unsigned int sel_idx;
+ u32 flags;
+ u16 insn_len;
+ u64 last_insn_cnt;
+ u64 ipc_insn_cnt;
+ u64 ipc_cyc_cnt;
+ u64 last_in_insn_cnt;
+ u64 last_in_cyc_cnt;
+ u64 last_cy_insn_cnt;
+ u64 last_cy_cyc_cnt;
+ u64 last_br_insn_cnt;
+ u64 last_br_cyc_cnt;
+ unsigned int cbr_seen;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+ struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS];
+};
+
+static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct intel_pt_pkt packet;
+ size_t pos = 0;
+ int ret, pkt_len, i;
+ char desc[INTEL_PT_PKT_DESC_MAX];
+ const char *color = PERF_COLOR_BLUE;
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
+
+ color_fprintf(stdout, color,
+ ". ... Intel Processor Trace data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
+ if (ret > 0)
+ pkt_len = ret;
+ else
+ pkt_len = 1;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < pkt_len; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < 16; i++)
+ color_fprintf(stdout, color, " ");
+ if (ret > 0) {
+ ret = intel_pt_pkt_desc(&packet, desc,
+ INTEL_PT_PKT_DESC_MAX);
+ if (ret > 0)
+ color_fprintf(stdout, color, " %s\n", desc);
+ } else {
+ color_fprintf(stdout, color, " Bad packet!\n");
+ }
+ pos += pkt_len;
+ buf += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ intel_pt_dump(pt, buf, len);
+}
+
+static void intel_pt_log_event(union perf_event *event)
+{
+ FILE *f = intel_pt_log_fp();
+
+ if (!intel_pt_enable_logging || !f)
+ return;
+
+ perf_event__fprintf(event, NULL, f);
+}
+
+static void intel_pt_dump_sample(struct perf_session *session,
+ struct perf_sample *sample)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ printf("\n");
+ intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
+}
+
+static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
+{
+ struct perf_time_interval *range = pt->synth_opts.ptime_range;
+ int n = pt->synth_opts.range_num;
+
+ if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return true;
+
+ if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return false;
+
+ /* perf_time__ranges_skip_sample does not work if time is zero */
+ if (!tm)
+ tm = 1;
+
+ return !n || !perf_time__ranges_skip_sample(range, n, tm);
+}
+
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root,
+ u64 vmcs,
+ u64 dflt_tsc_offset)
+{
+ struct rb_node **p = &rb_root->rb_node;
+ struct rb_node *parent = NULL;
+ struct intel_pt_vmcs_info *v;
+
+ while (*p) {
+ parent = *p;
+ v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node);
+
+ if (v->vmcs == vmcs)
+ return v;
+
+ if (vmcs < v->vmcs)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ v = zalloc(sizeof(*v));
+ if (v) {
+ v->vmcs = vmcs;
+ v->tsc_offset = dflt_tsc_offset;
+ v->reliable = dflt_tsc_offset;
+
+ rb_link_node(&v->rb_node, parent, p);
+ rb_insert_color(&v->rb_node, rb_root);
+ }
+
+ return v;
+}
+
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs)
+{
+ struct intel_pt_queue *ptq = data;
+ struct intel_pt *pt = ptq->pt;
+
+ if (!vmcs && !pt->dflt_tsc_offset)
+ return NULL;
+
+ return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset);
+}
+
+static void intel_pt_free_vmcs_info(struct intel_pt *pt)
+{
+ struct intel_pt_vmcs_info *v;
+ struct rb_node *n;
+
+ n = rb_first(&pt->vmcs_info);
+ while (n) {
+ v = rb_entry(n, struct intel_pt_vmcs_info, rb_node);
+ n = rb_next(n);
+ rb_erase(&v->rb_node, &pt->vmcs_info);
+ free(v);
+ }
+}
+
+static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
+ struct auxtrace_buffer *b)
+{
+ bool consecutive = false;
+ void *start;
+
+ start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
+ pt->have_tsc, &consecutive,
+ pt->synth_opts.vm_time_correlation);
+ if (!start)
+ return -EINVAL;
+ /*
+ * In the case of vm_time_correlation, the overlap might contain TSC
+ * packets that will not be fixed, and that will then no longer work for
+ * overlap detection. Avoid that by zeroing out the overlap.
+ */
+ if (pt->synth_opts.vm_time_correlation)
+ memset(b->data, 0, start - b->data);
+ b->use_size = b->data + b->size - start;
+ b->use_data = start;
+ if (b->use_size && consecutive)
+ b->consecutive = true;
+ return 0;
+}
+
+static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer,
+ struct auxtrace_buffer *old_buffer,
+ struct intel_pt_buffer *b)
+{
+ bool might_overlap;
+
+ if (!buffer->data) {
+ int fd = perf_data__fd(ptq->pt->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ }
+
+ might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
+ if (might_overlap && !buffer->consecutive && old_buffer &&
+ intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
+ return -ENOMEM;
+
+ if (buffer->use_data) {
+ b->len = buffer->use_size;
+ b->buf = buffer->use_data;
+ } else {
+ b->len = buffer->size;
+ b->buf = buffer->data;
+ }
+ b->ref_timestamp = buffer->reference;
+
+ if (!old_buffer || (might_overlap && !buffer->consecutive)) {
+ b->consecutive = false;
+ b->trace_nr = buffer->buffer_nr + 1;
+ } else {
+ b->consecutive = true;
+ }
+
+ return 0;
+}
+
+/* Do not drop buffers with references - refer intel_pt_get_trace() */
+static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer)
+{
+ if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
+ return;
+
+ auxtrace_buffer__drop_data(buffer);
+}
+
+/* Must be serialized with respect to intel_pt_get_trace() */
+static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
+ void *cb_data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err = 0;
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ while (1) {
+ struct intel_pt_buffer b = { .len = 0 };
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer)
+ break;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
+ if (err)
+ break;
+
+ if (b.len) {
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+ old_buffer = buffer;
+ } else {
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ continue;
+ }
+
+ err = cb(&b, cb_data);
+ if (err)
+ break;
+ }
+
+ if (buffer != old_buffer)
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+
+ return err;
+}
+
+/*
+ * This function assumes data is processed sequentially only.
+ * Must be serialized with respect to intel_pt_lookahead()
+ */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err;
+
+ if (ptq->stop) {
+ b->len = 0;
+ return 0;
+ }
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ ptq->buffer = buffer;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
+ if (err)
+ return err;
+
+ if (ptq->step_through_buffers)
+ ptq->stop = true;
+
+ if (b->len) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ ptq->old_buffer = buffer;
+ } else {
+ auxtrace_buffer__drop_data(buffer);
+ return intel_pt_get_trace(b, data);
+ }
+
+ return 0;
+}
+
+struct intel_pt_cache_entry {
+ struct auxtrace_cache_entry entry;
+ u64 insn_cnt;
+ u64 byte_cnt;
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
+ bool emulated_ptwrite;
+ int length;
+ int32_t rel;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+static int intel_pt_config_div(const char *var, const char *value, void *data)
+{
+ int *d = data;
+ long val;
+
+ if (!strcmp(var, "intel-pt.cache-divisor")) {
+ val = strtol(value, NULL, 0);
+ if (val > 0 && val <= INT_MAX)
+ *d = val;
+ }
+
+ return 0;
+}
+
+static int intel_pt_cache_divisor(void)
+{
+ static int d;
+
+ if (d)
+ return d;
+
+ perf_config(intel_pt_config_div, &d);
+
+ if (!d)
+ d = 64;
+
+ return d;
+}
+
+static unsigned int intel_pt_cache_size(struct dso *dso,
+ struct machine *machine)
+{
+ off_t size;
+
+ size = dso__data_size(dso, machine);
+ size /= intel_pt_cache_divisor();
+ if (size < 1000)
+ return 10;
+ if (size > (1 << 21))
+ return 21;
+ return 32 - __builtin_clz(size);
+}
+
+static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
+ struct machine *machine)
+{
+ struct auxtrace_cache *c;
+ unsigned int bits;
+
+ if (dso->auxtrace_cache)
+ return dso->auxtrace_cache;
+
+ bits = intel_pt_cache_size(dso, machine);
+
+ /* Ignoring cache creation failure */
+ c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
+
+ dso->auxtrace_cache = c;
+
+ return c;
+}
+
+static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
+ u64 offset, u64 insn_cnt, u64 byte_cnt,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+ struct intel_pt_cache_entry *e;
+ int err;
+
+ if (!c)
+ return -ENOMEM;
+
+ e = auxtrace_cache__alloc_entry(c);
+ if (!e)
+ return -ENOMEM;
+
+ e->insn_cnt = insn_cnt;
+ e->byte_cnt = byte_cnt;
+ e->op = intel_pt_insn->op;
+ e->branch = intel_pt_insn->branch;
+ e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite;
+ e->length = intel_pt_insn->length;
+ e->rel = intel_pt_insn->rel;
+ memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
+
+ err = auxtrace_cache__add(c, offset, &e->entry);
+ if (err)
+ auxtrace_cache__free_entry(c, e);
+
+ return err;
+}
+
+static struct intel_pt_cache_entry *
+intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return NULL;
+
+ return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
+}
+
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
+ u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return;
+
+ auxtrace_cache__remove(dso->auxtrace_cache, offset);
+}
+
+static inline bool intel_pt_guest_kernel_ip(uint64_t ip)
+{
+ /* Assumes 64-bit kernel */
+ return ip & (1ULL << 63);
+}
+
+static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr)
+{
+ if (nr) {
+ return intel_pt_guest_kernel_ip(ip) ?
+ PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+ }
+
+ return ip >= ptq->pt->kernel_start ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
+static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip)
+{
+ /* No support for non-zero CS base */
+ if (from_ip)
+ return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr);
+ return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr);
+}
+
+static int intel_pt_get_guest(struct intel_pt_queue *ptq)
+{
+ struct machines *machines = &ptq->pt->session->machines;
+ struct machine *machine;
+ pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid;
+
+ if (ptq->guest_machine && pid == ptq->guest_machine->pid)
+ return 0;
+
+ ptq->guest_machine = NULL;
+ thread__zput(ptq->unknown_guest_thread);
+
+ if (symbol_conf.guest_code) {
+ thread__zput(ptq->guest_thread);
+ ptq->guest_thread = machines__findnew_guest_code(machines, pid);
+ }
+
+ machine = machines__find_guest(machines, pid);
+ if (!machine)
+ return -1;
+
+ ptq->unknown_guest_thread = machine__idle_thread(machine);
+ if (!ptq->unknown_guest_thread)
+ return -1;
+
+ ptq->guest_machine = machine;
+
+ return 0;
+}
+
+static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn)
+{
+ return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL;
+}
+
+#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite "
+#define PTWRITE_MAGIC_LEN 16
+
+static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset)
+{
+ unsigned char buf[PTWRITE_MAGIC_LEN];
+ ssize_t len;
+
+ len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN);
+ if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) {
+ intel_pt_log("Emulated ptwrite signature found\n");
+ return true;
+ }
+ intel_pt_log("Emulated ptwrite signature not found\n");
+ return false;
+}
+
+static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip,
+ uint64_t to_ip, uint64_t max_insn_cnt,
+ void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct machine *machine = ptq->pt->machine;
+ struct thread *thread;
+ struct addr_location al;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+ ssize_t len;
+ int x86_64, ret = 0;
+ u8 cpumode;
+ u64 offset, start_offset, start_ip;
+ u64 insn_cnt = 0;
+ bool one_map = true;
+ bool nr;
+
+
+ addr_location__init(&al);
+ intel_pt_insn->length = 0;
+
+ if (to_ip && *ip == to_ip)
+ goto out_no_cache;
+
+ nr = ptq->state->to_nr;
+ cpumode = intel_pt_nr_cpumode(ptq, *ip, nr);
+
+ if (nr) {
+ if (ptq->pt->have_guest_sideband) {
+ if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) {
+ intel_pt_log("ERROR: guest sideband but no guest machine\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ } else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) ||
+ intel_pt_get_guest(ptq)) {
+ intel_pt_log("ERROR: no guest machine\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ machine = ptq->guest_machine;
+ thread = ptq->guest_thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) {
+ intel_pt_log("ERROR: no guest thread\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ thread = ptq->unknown_guest_thread;
+ }
+ } else {
+ thread = ptq->thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_KERNEL) {
+ intel_pt_log("ERROR: no thread\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ thread = ptq->pt->unknown_thread;
+ }
+ }
+
+ while (1) {
+ struct dso *dso;
+
+ if (!thread__find_map(thread, cpumode, *ip, &al) || !map__dso(al.map)) {
+ if (al.map)
+ intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip);
+ else
+ intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip);
+ addr_location__exit(&al);
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ dso = map__dso(al.map);
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR &&
+ dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) {
+ ret = -ENOENT;
+ goto out_ret;
+ }
+
+ offset = map__map_ip(al.map, *ip);
+
+ if (!to_ip && one_map) {
+ struct intel_pt_cache_entry *e;
+
+ e = intel_pt_cache_lookup(dso, machine, offset);
+ if (e &&
+ (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
+ *insn_cnt_ptr = e->insn_cnt;
+ *ip += e->byte_cnt;
+ intel_pt_insn->op = e->op;
+ intel_pt_insn->branch = e->branch;
+ intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite;
+ intel_pt_insn->length = e->length;
+ intel_pt_insn->rel = e->rel;
+ memcpy(intel_pt_insn->buf, e->insn, INTEL_PT_INSN_BUF_SZ);
+ intel_pt_log_insn_no_data(intel_pt_insn, *ip);
+ ret = 0;
+ goto out_ret;
+ }
+ }
+
+ start_offset = offset;
+ start_ip = *ip;
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al.map);
+
+ x86_64 = dso->is_64_bit;
+
+ while (1) {
+ len = dso__data_read_offset(dso, machine,
+ offset, buf,
+ INTEL_PT_INSN_BUF_SZ);
+ if (len <= 0) {
+ intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ",
+ offset);
+ if (intel_pt_enable_logging)
+ dso__fprintf(dso, intel_pt_log_fp());
+ ret = -EINVAL;
+ goto out_ret;
+ }
+
+ if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) {
+ ret = -EINVAL;
+ goto out_ret;
+ }
+
+ intel_pt_log_insn(intel_pt_insn, *ip);
+
+ insn_cnt += 1;
+
+ if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) {
+ bool eptw;
+ u64 offs;
+
+ if (!intel_pt_jmp_16(intel_pt_insn))
+ goto out;
+ /* Check for emulated ptwrite */
+ offs = offset + intel_pt_insn->length;
+ eptw = intel_pt_emulated_ptwrite(dso, machine, offs);
+ intel_pt_insn->emulated_ptwrite = eptw;
+ goto out;
+ }
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ goto out_no_cache;
+
+ *ip += intel_pt_insn->length;
+
+ if (to_ip && *ip == to_ip) {
+ intel_pt_insn->length = 0;
+ goto out_no_cache;
+ }
+
+ if (*ip >= map__end(al.map))
+ break;
+
+ offset += intel_pt_insn->length;
+ }
+ one_map = false;
+ }
+out:
+ *insn_cnt_ptr = insn_cnt;
+
+ if (!one_map)
+ goto out_no_cache;
+
+ /*
+ * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
+ * entries.
+ */
+ if (to_ip) {
+ struct intel_pt_cache_entry *e;
+
+ e = intel_pt_cache_lookup(map__dso(al.map), machine, start_offset);
+ if (e)
+ goto out_ret;
+ }
+
+ /* Ignore cache errors */
+ intel_pt_cache_add(map__dso(al.map), machine, start_offset, insn_cnt,
+ *ip - start_ip, intel_pt_insn);
+
+out_ret:
+ addr_location__exit(&al);
+ return ret;
+
+out_no_cache:
+ *insn_cnt_ptr = insn_cnt;
+ addr_location__exit(&al);
+ return 0;
+}
+
+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+ uint64_t offset, const char *filename)
+{
+ struct addr_filter *filt;
+ bool have_filter = false;
+ bool hit_tracestop = false;
+ bool hit_filter = false;
+
+ list_for_each_entry(filt, &pt->filts.head, list) {
+ if (filt->start)
+ have_filter = true;
+
+ if ((filename && !filt->filename) ||
+ (!filename && filt->filename) ||
+ (filename && strcmp(filename, filt->filename)))
+ continue;
+
+ if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+ continue;
+
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+ ip, offset, filename ? filename : "[kernel]",
+ filt->start ? "filter" : "stop",
+ filt->addr, filt->size);
+
+ if (filt->start)
+ hit_filter = true;
+ else
+ hit_tracestop = true;
+ }
+
+ if (!hit_tracestop && !hit_filter)
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+ ip, offset, filename ? filename : "[kernel]");
+
+ return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct thread *thread;
+ struct addr_location al;
+ u8 cpumode;
+ u64 offset;
+ int res;
+
+ if (ptq->state->to_nr) {
+ if (intel_pt_guest_kernel_ip(ip))
+ return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+ /* No support for decoding guest user space */
+ return -EINVAL;
+ } else if (ip >= ptq->pt->kernel_start) {
+ return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+ }
+
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = ptq->thread;
+ if (!thread)
+ return -EINVAL;
+
+ addr_location__init(&al);
+ if (!thread__find_map(thread, cpumode, ip, &al) || !map__dso(al.map))
+ return -EINVAL;
+
+ offset = map__map_ip(al.map, ip);
+
+ res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, map__dso(al.map)->long_name);
+ addr_location__exit(&al);
+ return res;
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
+static bool intel_pt_get_config(struct intel_pt *pt,
+ struct perf_event_attr *attr, u64 *config)
+{
+ if (attr->type == pt->pmu_type) {
+ if (config)
+ *config = attr->config;
+ return true;
+ }
+
+ return false;
+}
+
+static bool intel_pt_exclude_kernel(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
+ !evsel->core.attr.exclude_kernel)
+ return false;
+ }
+ return true;
+}
+
+static bool intel_pt_return_compression(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ if (!pt->noretcomp_bit)
+ return true;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ (config & pt->noretcomp_bit))
+ return false;
+ }
+ return true;
+}
+
+static bool intel_pt_branch_enable(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ (config & INTEL_PT_CFG_PASS_THRU) &&
+ !(config & INTEL_PT_CFG_BRANCH_EN))
+ return false;
+ }
+ return true;
+}
+
+static bool intel_pt_disabled_tnt(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ config & INTEL_PT_CFG_TNT_DIS)
+ return true;
+ }
+ return false;
+}
+
+static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ unsigned int shift;
+ u64 config;
+
+ if (!pt->mtc_freq_bits)
+ return 0;
+
+ for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
+ config >>= 1;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config))
+ return (config & pt->mtc_freq_bits) >> shift;
+ }
+ return 0;
+}
+
+static bool intel_pt_timeless_decoding(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ bool timeless_decoding = true;
+ u64 config;
+
+ if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding)
+ return true;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+ return true;
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
+ if (config & pt->tsc_bit)
+ timeless_decoding = false;
+ else
+ return true;
+ }
+ }
+ return timeless_decoding;
+}
+
+static bool intel_pt_tracing_kernel(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
+ !evsel->core.attr.exclude_kernel)
+ return true;
+ }
+ return false;
+}
+
+static bool intel_pt_have_tsc(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ bool have_tsc = false;
+ u64 config;
+
+ if (!pt->tsc_bit)
+ return false;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
+ if (config & pt->tsc_bit)
+ have_tsc = true;
+ else
+ return false;
+ }
+ }
+ return have_tsc;
+}
+
+static bool intel_pt_have_mtc(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ (config & pt->mtc_bit))
+ return true;
+ }
+ return false;
+}
+
+static bool intel_pt_sampling_mode(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) &&
+ evsel->core.attr.aux_sample_size)
+ return true;
+ }
+ return false;
+}
+
+static u64 intel_pt_ctl(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config))
+ return config;
+ }
+ return 0;
+}
+
+static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
+{
+ u64 quot, rem;
+
+ quot = ns / pt->tc.time_mult;
+ rem = ns % pt->tc.time_mult;
+ return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
+ pt->tc.time_mult;
+}
+
+static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
+{
+ size_t sz = sizeof(struct ip_callchain);
+
+ /* Add 1 to callchain_sz for callchain context */
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
+ return zalloc(sz);
+}
+
+static int intel_pt_callchain_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+ evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
+ }
+
+ pt->chain = intel_pt_alloc_chain(pt);
+ if (!pt->chain)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_callchain(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__sample_late(thread, sample->cpu, pt->chain,
+ pt->synth_opts.callchain_sz + 1, sample->ip,
+ pt->kernel_start);
+
+ sample->callchain = pt->chain;
+}
+
+static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt)
+{
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += entry_cnt * sizeof(struct branch_entry);
+ return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+ evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
+ pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz);
+ if (!pt->br_stack)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+ pt->br_stack_sz, sample->ip,
+ pt->kernel_start);
+
+ sample->branch_stack = pt->br_stack;
+ thread__put(thread);
+}
+
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
+
+static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
+ unsigned int queue_nr)
+{
+ struct intel_pt_params params = { .get_trace = 0, };
+ struct perf_env *env = pt->machine->env;
+ struct intel_pt_queue *ptq;
+
+ ptq = zalloc(sizeof(struct intel_pt_queue));
+ if (!ptq)
+ return NULL;
+
+ if (pt->synth_opts.callchain) {
+ ptq->chain = intel_pt_alloc_chain(pt);
+ if (!ptq->chain)
+ goto out_free;
+ }
+
+ if (pt->synth_opts.last_branch || pt->synth_opts.other_events) {
+ unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz);
+
+ ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt);
+ if (!ptq->last_branch)
+ goto out_free;
+ }
+
+ ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!ptq->event_buf)
+ goto out_free;
+
+ ptq->pt = pt;
+ ptq->queue_nr = queue_nr;
+ ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
+ ptq->pid = -1;
+ ptq->tid = -1;
+ ptq->cpu = -1;
+ ptq->next_tid = -1;
+
+ params.get_trace = intel_pt_get_trace;
+ params.walk_insn = intel_pt_walk_next_insn;
+ params.lookahead = intel_pt_lookahead;
+ params.findnew_vmcs_info = intel_pt_findnew_vmcs_info;
+ params.data = ptq;
+ params.return_compression = intel_pt_return_compression(pt);
+ params.branch_enable = intel_pt_branch_enable(pt);
+ params.ctl = intel_pt_ctl(pt);
+ params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
+ params.mtc_period = intel_pt_mtc_period(pt);
+ params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
+ params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+ params.quick = pt->synth_opts.quick;
+ params.vm_time_correlation = pt->synth_opts.vm_time_correlation;
+ params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run;
+ params.first_timestamp = pt->first_timestamp;
+ params.max_loops = pt->max_loops;
+
+ /* Cannot walk code without TNT, so force 'quick' mode */
+ if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick)
+ params.quick = 1;
+
+ if (pt->filts.cnt > 0)
+ params.pgd_ip = intel_pt_pgd_ip;
+
+ if (pt->synth_opts.instructions || pt->synth_opts.cycles) {
+ if (pt->synth_opts.period) {
+ switch (pt->synth_opts.period_type) {
+ case PERF_ITRACE_PERIOD_INSTRUCTIONS:
+ params.period_type =
+ INTEL_PT_PERIOD_INSTRUCTIONS;
+ params.period = pt->synth_opts.period;
+ break;
+ case PERF_ITRACE_PERIOD_TICKS:
+ params.period_type = INTEL_PT_PERIOD_TICKS;
+ params.period = pt->synth_opts.period;
+ break;
+ case PERF_ITRACE_PERIOD_NANOSECS:
+ params.period_type = INTEL_PT_PERIOD_TICKS;
+ params.period = intel_pt_ns_to_ticks(pt,
+ pt->synth_opts.period);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!params.period) {
+ params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
+ params.period = 1;
+ }
+ }
+
+ if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
+ params.flags |= INTEL_PT_FUP_WITH_NLIP;
+
+ ptq->decoder = intel_pt_decoder_new(&params);
+ if (!ptq->decoder)
+ goto out_free;
+
+ return ptq;
+
+out_free:
+ zfree(&ptq->event_buf);
+ zfree(&ptq->last_branch);
+ zfree(&ptq->chain);
+ free(ptq);
+ return NULL;
+}
+
+static void intel_pt_free_queue(void *priv)
+{
+ struct intel_pt_queue *ptq = priv;
+
+ if (!ptq)
+ return;
+ thread__zput(ptq->thread);
+ thread__zput(ptq->guest_thread);
+ thread__zput(ptq->unknown_guest_thread);
+ intel_pt_decoder_free(ptq->decoder);
+ zfree(&ptq->event_buf);
+ zfree(&ptq->last_branch);
+ zfree(&ptq->chain);
+ free(ptq);
+}
+
+static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp)
+{
+ unsigned int i;
+
+ pt->first_timestamp = timestamp;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq && ptq->decoder)
+ intel_pt_set_first_timestamp(ptq->decoder, timestamp);
+ }
+}
+
+static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq)
+{
+ struct machines *machines = &ptq->pt->session->machines;
+ struct machine *machine;
+ pid_t machine_pid = ptq->pid;
+ pid_t tid;
+ int vcpu;
+
+ if (machine_pid <= 0)
+ return 0; /* Not a guest machine */
+
+ machine = machines__find(machines, machine_pid);
+ if (!machine)
+ return 0; /* Not a guest machine */
+
+ if (ptq->guest_machine != machine) {
+ ptq->guest_machine = NULL;
+ thread__zput(ptq->guest_thread);
+ thread__zput(ptq->unknown_guest_thread);
+
+ ptq->unknown_guest_thread = machine__find_thread(machine, 0, 0);
+ if (!ptq->unknown_guest_thread)
+ return -1;
+ ptq->guest_machine = machine;
+ }
+
+ vcpu = ptq->thread ? thread__guest_cpu(ptq->thread) : -1;
+ if (vcpu < 0)
+ return -1;
+
+ tid = machine__get_current_tid(machine, vcpu);
+
+ if (ptq->guest_thread && thread__tid(ptq->guest_thread) != tid)
+ thread__zput(ptq->guest_thread);
+
+ if (!ptq->guest_thread) {
+ ptq->guest_thread = machine__find_thread(machine, -1, tid);
+ if (!ptq->guest_thread)
+ return -1;
+ }
+
+ ptq->guest_machine_pid = machine_pid;
+ ptq->guest_pid = thread__pid(ptq->guest_thread);
+ ptq->guest_tid = tid;
+ ptq->vcpu = vcpu;
+
+ return 0;
+}
+
+static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
+ struct auxtrace_queue *queue)
+{
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (queue->tid == -1 || pt->have_sched_switch) {
+ ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
+ if (ptq->tid == -1)
+ ptq->pid = -1;
+ thread__zput(ptq->thread);
+ }
+
+ if (!ptq->thread && ptq->tid != -1)
+ ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
+
+ if (ptq->thread) {
+ ptq->pid = thread__pid(ptq->thread);
+ if (queue->cpu == -1)
+ ptq->cpu = thread__cpu(ptq->thread);
+ }
+
+ if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) {
+ ptq->guest_machine_pid = 0;
+ ptq->guest_pid = -1;
+ ptq->guest_tid = -1;
+ ptq->vcpu = -1;
+ }
+}
+
+static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ ptq->insn_len = 0;
+ if (ptq->state->flags & INTEL_PT_ABORT_TX) {
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
+ } else if (ptq->state->flags & INTEL_PT_ASYNC) {
+ if (!ptq->state->to_ip)
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ else if (ptq->state->from_nr && !ptq->state->to_nr)
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_VMEXIT;
+ else
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ } else {
+ if (ptq->state->from_ip)
+ ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
+ else
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+ if (ptq->state->flags & INTEL_PT_IN_TX)
+ ptq->flags |= PERF_IP_FLAG_IN_TX;
+ ptq->insn_len = ptq->state->insn_len;
+ memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
+ }
+
+ if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
+ ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
+ if (ptq->state->type & INTEL_PT_TRACE_END)
+ ptq->flags |= PERF_IP_FLAG_TRACE_END;
+
+ if (pt->cap_event_trace) {
+ if (ptq->state->type & INTEL_PT_IFLAG_CHG) {
+ if (!ptq->state->from_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ if (ptq->state->from_iflag != ptq->state->to_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE;
+ } else if (!ptq->state->to_iflag) {
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ }
+ }
+}
+
+static void intel_pt_setup_time_range(struct intel_pt *pt,
+ struct intel_pt_queue *ptq)
+{
+ if (!pt->range_cnt)
+ return;
+
+ ptq->sel_timestamp = pt->time_ranges[0].start;
+ ptq->sel_idx = 0;
+
+ if (ptq->sel_timestamp) {
+ ptq->sel_start = true;
+ } else {
+ ptq->sel_timestamp = pt->time_ranges[0].end;
+ ptq->sel_start = false;
+ }
+}
+
+static int intel_pt_setup_queue(struct intel_pt *pt,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (!ptq) {
+ ptq = intel_pt_alloc_queue(pt, queue_nr);
+ if (!ptq)
+ return -ENOMEM;
+ queue->priv = ptq;
+
+ if (queue->cpu != -1)
+ ptq->cpu = queue->cpu;
+ ptq->tid = queue->tid;
+
+ ptq->cbr_seen = UINT_MAX;
+
+ if (pt->sampling_mode && !pt->snapshot_mode &&
+ pt->timeless_decoding)
+ ptq->step_through_buffers = true;
+
+ ptq->sync_switch = pt->sync_switch;
+
+ intel_pt_setup_time_range(pt, ptq);
+ }
+
+ if (!ptq->on_heap &&
+ (!ptq->sync_switch ||
+ ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
+ const struct intel_pt_state *state;
+ int ret;
+
+ if (pt->timeless_decoding)
+ return 0;
+
+ intel_pt_log("queue %u getting timestamp\n", queue_nr);
+ intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+ queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+ if (ptq->sel_start && ptq->sel_timestamp) {
+ ret = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (ret)
+ return ret;
+ }
+
+ while (1) {
+ state = intel_pt_decode(ptq->decoder);
+ if (state->err) {
+ if (state->err == INTEL_PT_ERR_NODATA) {
+ intel_pt_log("queue %u has no timestamp\n",
+ queue_nr);
+ return 0;
+ }
+ continue;
+ }
+ if (state->timestamp)
+ break;
+ }
+
+ ptq->timestamp = state->timestamp;
+ intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
+ queue_nr, ptq->timestamp);
+ ptq->state = state;
+ ptq->have_sample = true;
+ if (ptq->sel_start && ptq->sel_timestamp &&
+ ptq->timestamp < ptq->sel_timestamp)
+ ptq->have_sample = false;
+ intel_pt_sample_flags(ptq);
+ ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
+ if (ret)
+ return ret;
+ ptq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int intel_pt_setup_queues(struct intel_pt *pt)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static inline bool intel_pt_skip_event(struct intel_pt *pt)
+{
+ return pt->synth_opts.initial_skip &&
+ pt->num_events++ < pt->synth_opts.initial_skip;
+}
+
+/*
+ * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
+ * Also ensure CBR is first non-skipped event by allowing for 4 more samples
+ * from this decoder state.
+ */
+static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt)
+{
+ return pt->synth_opts.initial_skip &&
+ pt->num_events + 4 < pt->synth_opts.initial_skip;
+}
+
+static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ sample->pid = ptq->pid;
+ sample->tid = ptq->tid;
+
+ if (ptq->pt->have_guest_sideband) {
+ if ((ptq->state->from_ip && ptq->state->from_nr) ||
+ (ptq->state->to_ip && ptq->state->to_nr)) {
+ sample->pid = ptq->guest_pid;
+ sample->tid = ptq->guest_tid;
+ sample->machine_pid = ptq->guest_machine_pid;
+ sample->vcpu = ptq->vcpu;
+ }
+ }
+
+ sample->cpu = ptq->cpu;
+ sample->insn_len = ptq->insn_len;
+ memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
+static void intel_pt_prep_b_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ intel_pt_prep_a_sample(ptq, event, sample);
+
+ if (!pt->timeless_decoding)
+ sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+ sample->ip = ptq->state->from_ip;
+ sample->addr = ptq->state->to_ip;
+ sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr);
+ sample->period = 1;
+ sample->flags = ptq->flags;
+
+ event->sample.header.misc = sample->cpumode;
+}
+
+static int intel_pt_inject_event(union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ event->header.size = perf_event__sample_event_size(sample, type, 0);
+ return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+static inline int intel_pt_opt_inject(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ if (!pt->synth_opts.inject)
+ return 0;
+
+ return intel_pt_inject_event(event, sample, type);
+}
+
+static int intel_pt_deliver_synth_event(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ int ret;
+
+ ret = intel_pt_opt_inject(pt, event, sample, type);
+ if (ret)
+ return ret;
+
+ ret = perf_session__deliver_synth_event(pt->session, event, sample);
+ if (ret)
+ pr_err("Intel PT: failed to deliver event, error %d\n", ret);
+
+ return ret;
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct dummy_branch_stack {
+ u64 nr;
+ u64 hw_idx;
+ struct branch_entry entries;
+ } dummy_bs;
+
+ if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
+ return 0;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_b_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->branches_id;
+ sample.stream_id = ptq->pt->branches_id;
+
+ /*
+ * perf report cannot handle events without a branch stack when using
+ * SORT_MODE__BRANCH so make a dummy one.
+ */
+ if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
+ dummy_bs = (struct dummy_branch_stack){
+ .nr = 1,
+ .hw_idx = -1ULL,
+ .entries = {
+ .from = sample.ip,
+ .to = sample.addr,
+ },
+ };
+ sample.branch_stack = (struct branch_stack *)&dummy_bs;
+ }
+
+ if (ptq->sample_ipc)
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
+ ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->branches_sample_type);
+}
+
+static void intel_pt_prep_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ intel_pt_prep_b_sample(pt, ptq, event, sample);
+
+ if (pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+ pt->synth_opts.callchain_sz + 1,
+ sample->ip, pt->kernel_start);
+ sample->callchain = ptq->chain;
+ }
+
+ if (pt->synth_opts.last_branch) {
+ thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch,
+ pt->br_stack_sz);
+ sample->branch_stack = ptq->last_branch;
+ }
+}
+
+static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->instructions_id;
+ sample.stream_id = ptq->pt->instructions_id;
+ if (pt->synth_opts.quick)
+ sample.period = 1;
+ else
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+
+ if (ptq->sample_ipc)
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
+ ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
+ ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->instructions_sample_type);
+}
+
+static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ u64 period = 0;
+
+ if (ptq->sample_ipc)
+ period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt;
+
+ if (!period || intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->cycles_id;
+ sample.stream_id = ptq->pt->cycles_id;
+ sample.period = period;
+
+ sample.cyc_cnt = period;
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_cy_insn_cnt;
+ ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt;
+
+ return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+}
+
+static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->transactions_id;
+ sample.stream_id = ptq->pt->transactions_id;
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->transactions_sample_type);
+}
+
+static void intel_pt_prep_p_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ intel_pt_prep_sample(pt, ptq, event, sample);
+
+ /*
+ * Zero IP is used to mean "trace start" but that is not the case for
+ * power or PTWRITE events with no IP, so clear the flags.
+ */
+ if (!sample->ip)
+ sample->flags = 0;
+}
+
+static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_ptwrite raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->ptwrites_id;
+ sample.stream_id = ptq->pt->ptwrites_id;
+
+ raw.flags = 0;
+ raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+ raw.payload = cpu_to_le64(ptq->state->ptw_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->ptwrites_sample_type);
+}
+
+static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_cbr raw;
+ u32 flags;
+
+ if (intel_pt_skip_cbr_event(pt))
+ return 0;
+
+ ptq->cbr_seen = ptq->state->cbr;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->cbr_id;
+ sample.stream_id = ptq->pt->cbr_id;
+
+ flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
+ raw.flags = cpu_to_le32(flags);
+ raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
+ raw.reserved3 = 0;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_psb raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->psb_id;
+ sample.stream_id = ptq->pt->psb_id;
+ sample.flags = 0;
+
+ raw.reserved = 0;
+ raw.offset = ptq->state->psb_offset;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_mwait raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->mwait_id;
+ sample.stream_id = ptq->pt->mwait_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->mwait_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_pwre raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->pwre_id;
+ sample.stream_id = ptq->pt->pwre_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->pwre_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_exstop raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->exstop_id;
+ sample.stream_id = ptq->pt->exstop_id;
+
+ raw.flags = 0;
+ raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_pwrx raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->pwrx_id;
+ sample.stream_id = ptq->pt->pwrx_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+/*
+ * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
+ * intel_pt_add_gp_regs().
+ */
+static const int pebs_gp_regs[] = {
+ [PERF_REG_X86_FLAGS] = 1,
+ [PERF_REG_X86_IP] = 2,
+ [PERF_REG_X86_AX] = 3,
+ [PERF_REG_X86_CX] = 4,
+ [PERF_REG_X86_DX] = 5,
+ [PERF_REG_X86_BX] = 6,
+ [PERF_REG_X86_SP] = 7,
+ [PERF_REG_X86_BP] = 8,
+ [PERF_REG_X86_SI] = 9,
+ [PERF_REG_X86_DI] = 10,
+ [PERF_REG_X86_R8] = 11,
+ [PERF_REG_X86_R9] = 12,
+ [PERF_REG_X86_R10] = 13,
+ [PERF_REG_X86_R11] = 14,
+ [PERF_REG_X86_R12] = 15,
+ [PERF_REG_X86_R13] = 16,
+ [PERF_REG_X86_R14] = 17,
+ [PERF_REG_X86_R15] = 18,
+};
+
+static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
+ u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
+ u32 bit;
+ int i;
+
+ for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
+ /* Get the PEBS gp_regs array index */
+ int n = pebs_gp_regs[i] - 1;
+
+ if (n < 0)
+ continue;
+ /*
+ * Add only registers that were requested (i.e. 'regs_mask') and
+ * that were provided (i.e. 'mask'), and update the resulting
+ * mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ if (mask & 1 << n && regs_mask & bit) {
+ intr_regs->mask |= bit;
+ *pos++ = gp_regs[n];
+ }
+ }
+
+ return pos;
+}
+
+#ifndef PERF_REG_X86_XMM0
+#define PERF_REG_X86_XMM0 32
+#endif
+
+static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
+ const u64 *xmm = items->xmm;
+
+ /*
+ * If there are any XMM registers, then there should be all of them.
+ * Nevertheless, follow the logic to add only registers that were
+ * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
+ * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
+
+ for (; mask; mask >>= 1, xmm++) {
+ if (mask & 1)
+ *pos++ = *xmm;
+ }
+}
+
+#define LBR_INFO_MISPRED (1ULL << 63)
+#define LBR_INFO_IN_TX (1ULL << 62)
+#define LBR_INFO_ABORT (1ULL << 61)
+#define LBR_INFO_CYCLES 0xffff
+
+/* Refer kernel's intel_pmu_store_pebs_lbrs() */
+static u64 intel_pt_lbr_flags(u64 info)
+{
+ union {
+ struct branch_flags flags;
+ u64 result;
+ } u;
+
+ u.result = 0;
+ u.flags.mispred = !!(info & LBR_INFO_MISPRED);
+ u.flags.predicted = !(info & LBR_INFO_MISPRED);
+ u.flags.in_tx = !!(info & LBR_INFO_IN_TX);
+ u.flags.abort = !!(info & LBR_INFO_ABORT);
+ u.flags.cycles = info & LBR_INFO_CYCLES;
+
+ return u.result;
+}
+
+static void intel_pt_add_lbrs(struct branch_stack *br_stack,
+ const struct intel_pt_blk_items *items)
+{
+ u64 *to;
+ int i;
+
+ br_stack->nr = 0;
+
+ to = &br_stack->entries[0].from;
+
+ for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
+ u32 mask = items->mask[i];
+ const u64 *from = items->val[i];
+
+ for (; mask; mask >>= 3, from += 3) {
+ if ((mask & 7) == 7) {
+ *to++ = from[0];
+ *to++ = from[1];
+ *to++ = intel_pt_lbr_flags(from[2]);
+ br_stack->nr += 1;
+ }
+ }
+ }
+}
+
+static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
+{
+ const struct intel_pt_blk_items *items = &ptq->state->items;
+ struct perf_sample sample = { .ip = 0, };
+ union perf_event *event = ptq->event_buf;
+ struct intel_pt *pt = ptq->pt;
+ u64 sample_type = evsel->core.attr.sample_type;
+ u8 cpumode;
+ u64 regs[8 * sizeof(sample.intr_regs.mask)];
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_a_sample(ptq, event, &sample);
+
+ sample.id = id;
+ sample.stream_id = id;
+
+ if (!evsel->core.attr.freq)
+ sample.period = evsel->core.attr.sample_period;
+
+ /* No support for non-zero CS base */
+ if (items->has_ip)
+ sample.ip = items->ip;
+ else if (items->has_rip)
+ sample.ip = items->rip;
+ else
+ sample.ip = ptq->state->from_ip;
+
+ cpumode = intel_pt_cpumode(ptq, sample.ip, 0);
+
+ event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
+
+ sample.cpumode = cpumode;
+
+ if (sample_type & PERF_SAMPLE_TIME) {
+ u64 timestamp = 0;
+
+ if (items->has_timestamp)
+ timestamp = items->timestamp;
+ else if (!pt->timeless_decoding)
+ timestamp = ptq->timestamp;
+ if (timestamp)
+ sample.time = tsc_to_perf_time(timestamp, &pt->tc);
+ }
+
+ if (sample_type & PERF_SAMPLE_CALLCHAIN &&
+ pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+ pt->synth_opts.callchain_sz, sample.ip,
+ pt->kernel_start);
+ sample.callchain = ptq->chain;
+ }
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR &&
+ (items->mask[INTEL_PT_GP_REGS_POS] ||
+ items->mask[INTEL_PT_XMM_POS])) {
+ u64 regs_mask = evsel->core.attr.sample_regs_intr;
+ u64 *pos;
+
+ sample.intr_regs.abi = items->is_32_bit ?
+ PERF_SAMPLE_REGS_ABI_32 :
+ PERF_SAMPLE_REGS_ABI_64;
+ sample.intr_regs.regs = regs;
+
+ pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+
+ intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ }
+
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ if (items->mask[INTEL_PT_LBR_0_POS] ||
+ items->mask[INTEL_PT_LBR_1_POS] ||
+ items->mask[INTEL_PT_LBR_2_POS]) {
+ intel_pt_add_lbrs(ptq->last_branch, items);
+ } else if (pt->synth_opts.last_branch) {
+ thread_stack__br_sample(ptq->thread, ptq->cpu,
+ ptq->last_branch,
+ pt->br_stack_sz);
+ } else {
+ ptq->last_branch->nr = 0;
+ }
+ sample.branch_stack = ptq->last_branch;
+ }
+
+ if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
+ sample.addr = items->mem_access_address;
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+ /*
+ * Refer kernel's setup_pebs_adaptive_sample_data() and
+ * intel_hsw_weight().
+ */
+ if (items->has_mem_access_latency) {
+ u64 weight = items->mem_access_latency >> 32;
+
+ /*
+ * Starts from SPR, the mem access latency field
+ * contains both cache latency [47:32] and instruction
+ * latency [15:0]. The cache latency is the same as the
+ * mem access latency on previous platforms.
+ *
+ * In practice, no memory access could last than 4G
+ * cycles. Use latency >> 32 to distinguish the
+ * different format of the mem access latency field.
+ */
+ if (weight > 0) {
+ sample.weight = weight & 0xffff;
+ sample.ins_lat = items->mem_access_latency & 0xffff;
+ } else
+ sample.weight = items->mem_access_latency;
+ }
+ if (!sample.weight && items->has_tsx_aux_info) {
+ /* Cycles last block */
+ sample.weight = (u32)items->tsx_aux_info;
+ }
+ }
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
+ u64 ax = items->has_rax ? items->rax : 0;
+ /* Refer kernel's intel_hsw_transaction() */
+ u64 txn = (u8)(items->tsx_aux_info >> 32);
+
+ /* For RTM XABORTs also log the abort code from AX */
+ if (txn & PERF_TXN_TRANSACTION && ax & 1)
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ sample.transaction = txn;
+ }
+
+ return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+}
+
+static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ struct evsel *evsel = pt->pebs_evsel;
+ u64 id = evsel->core.id[0];
+
+ return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
+}
+
+static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_blk_items *items = &ptq->state->items;
+ struct intel_pt_pebs_event *pe;
+ struct intel_pt *pt = ptq->pt;
+ int err = -EINVAL;
+ int hw_id;
+
+ if (!items->has_applicable_counters || !items->applicable_counters) {
+ if (!pt->single_pebs)
+ pr_err("PEBS-via-PT record with no applicable_counters\n");
+ return intel_pt_synth_single_pebs_sample(ptq);
+ }
+
+ for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) {
+ pe = &ptq->pebs[hw_id];
+ if (!pe->evsel) {
+ if (!pt->single_pebs)
+ pr_err("PEBS-via-PT record with no matching event, hw_id %d\n",
+ hw_id);
+ return intel_pt_synth_single_pebs_sample(ptq);
+ }
+ err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct {
+ struct perf_synth_intel_evt cfe;
+ struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
+ } raw;
+ int i;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->evt_id;
+ sample.stream_id = ptq->pt->evt_id;
+
+ raw.cfe.type = ptq->state->cfe_type;
+ raw.cfe.reserved = 0;
+ raw.cfe.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+ raw.cfe.vector = ptq->state->cfe_vector;
+ raw.cfe.evd_cnt = ptq->state->evd_cnt;
+
+ for (i = 0; i < ptq->state->evd_cnt; i++) {
+ raw.evd[i].et = 0;
+ raw.evd[i].evd_type = ptq->state->evd[i].type;
+ raw.evd[i].payload = ptq->state->evd[i].payload;
+ }
+
+ sample.raw_size = perf_synth__raw_size(raw) +
+ ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->evt_sample_type);
+}
+
+static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_iflag_chg raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->iflag_chg_id;
+ sample.stream_id = ptq->pt->iflag_chg_id;
+
+ raw.flags = 0;
+ raw.iflag = ptq->state->to_iflag;
+
+ if (ptq->state->type & INTEL_PT_BRANCH) {
+ raw.via_branch = 1;
+ raw.branch_ip = ptq->state->to_ip;
+ } else {
+ sample.addr = 0;
+ }
+ sample.flags = ptq->flags;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->iflag_chg_sample_type);
+}
+
+static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
+ pid_t pid, pid_t tid, u64 ip, u64 timestamp,
+ pid_t machine_pid, int vcpu)
+{
+ bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT;
+ union perf_event event;
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+ int err;
+
+ if (pt->synth_opts.error_minus_flags) {
+ if (code == INTEL_PT_ERR_OVR &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
+ return 0;
+ if (code == INTEL_PT_ERR_LOST &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
+ return 0;
+ }
+
+ intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
+
+ auxtrace_synth_guest_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ code, cpu, pid, tid, ip, msg, timestamp,
+ machine_pid, vcpu);
+
+ if (intel_pt_enable_logging && !log_on_stdout) {
+ FILE *fp = intel_pt_log_fp();
+
+ if (fp)
+ perf_event__fprintf_auxtrace_error(&event, fp);
+ }
+
+ if (code != INTEL_PT_ERR_LOST && dump_log_on_error)
+ intel_pt_log_dump_buf();
+
+ err = perf_session__deliver_synth_event(pt->session, &event, NULL);
+ if (err)
+ pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
+ const struct intel_pt_state *state)
+{
+ struct intel_pt *pt = ptq->pt;
+ u64 tm = ptq->timestamp;
+ pid_t machine_pid = 0;
+ pid_t pid = ptq->pid;
+ pid_t tid = ptq->tid;
+ int vcpu = -1;
+
+ tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
+
+ if (pt->have_guest_sideband && state->from_nr) {
+ machine_pid = ptq->guest_machine_pid;
+ vcpu = ptq->vcpu;
+ pid = ptq->guest_pid;
+ tid = ptq->guest_tid;
+ }
+
+ return intel_pt_synth_error(pt, state->err, ptq->cpu, pid, tid,
+ state->from_ip, tm, machine_pid, vcpu);
+}
+
+static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
+{
+ struct auxtrace_queue *queue;
+ pid_t tid = ptq->next_tid;
+ int err;
+
+ if (tid == -1)
+ return 0;
+
+ intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
+
+ err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
+
+ queue = &pt->queues.queue_array[ptq->queue_nr];
+ intel_pt_set_pid_tid_cpu(pt, queue);
+
+ ptq->next_tid = -1;
+
+ return err;
+}
+
+static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ return ip == pt->switch_ip &&
+ (ptq->flags & PERF_IP_FLAG_BRANCH) &&
+ !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
+}
+
+#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
+ INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
+
+static int intel_pt_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct intel_pt *pt = ptq->pt;
+ int err;
+
+ if (!ptq->have_sample)
+ return 0;
+
+ ptq->have_sample = false;
+
+ if (pt->synth_opts.approx_ipc) {
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+ ptq->ipc_cyc_cnt = ptq->state->cycles;
+ ptq->sample_ipc = true;
+ } else {
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+ ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
+ ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC;
+ }
+
+ /* Ensure guest code maps are set up */
+ if (symbol_conf.guest_code && (state->from_nr || state->to_nr))
+ intel_pt_get_guest(ptq);
+
+ /*
+ * Do PEBS first to allow for the possibility that the PEBS timestamp
+ * precedes the current timestamp.
+ */
+ if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
+ err = intel_pt_synth_pebs_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (pt->synth_opts.intr_events) {
+ if (state->type & INTEL_PT_EVT) {
+ err = intel_pt_synth_events_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_IFLAG_CHG) {
+ err = intel_pt_synth_iflag_chg_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+
+ if (pt->sample_pwr_events) {
+ if (state->type & INTEL_PT_PSB_EVT) {
+ err = intel_pt_synth_psb_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (ptq->state->cbr != ptq->cbr_seen) {
+ err = intel_pt_synth_cbr_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_PWR_EVT) {
+ if (state->type & INTEL_PT_MWAIT_OP) {
+ err = intel_pt_synth_mwait_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_PWR_ENTRY) {
+ err = intel_pt_synth_pwre_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_EX_STOP) {
+ err = intel_pt_synth_exstop_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_PWR_EXIT) {
+ err = intel_pt_synth_pwrx_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+ }
+
+ if (state->type & INTEL_PT_INSTRUCTION) {
+ if (pt->sample_instructions) {
+ err = intel_pt_synth_instruction_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (pt->sample_cycles) {
+ err = intel_pt_synth_cycle_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+
+ if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
+ err = intel_pt_synth_transaction_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
+ err = intel_pt_synth_ptwrite_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (!(state->type & INTEL_PT_BRANCH))
+ return 0;
+
+ if (pt->use_thread_stack) {
+ thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
+ state->from_ip, state->to_ip, ptq->insn_len,
+ state->trace_nr, pt->callstack,
+ pt->br_stack_sz_plus,
+ pt->mispred_all);
+ } else {
+ thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
+ }
+
+ if (pt->sample_branches) {
+ if (state->from_nr != state->to_nr &&
+ state->from_ip && state->to_ip) {
+ struct intel_pt_state *st = (struct intel_pt_state *)state;
+ u64 to_ip = st->to_ip;
+ u64 from_ip = st->from_ip;
+
+ /*
+ * perf cannot handle having different machines for ip
+ * and addr, so create 2 branches.
+ */
+ st->to_ip = 0;
+ err = intel_pt_synth_branch_sample(ptq);
+ if (err)
+ return err;
+ st->from_ip = 0;
+ st->to_ip = to_ip;
+ err = intel_pt_synth_branch_sample(ptq);
+ st->from_ip = from_ip;
+ } else {
+ err = intel_pt_synth_branch_sample(ptq);
+ }
+ if (err)
+ return err;
+ }
+
+ if (!ptq->sync_switch)
+ return 0;
+
+ if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ err = intel_pt_next_tid(pt, ptq);
+ if (err)
+ return err;
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ default:
+ ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
+ return 1;
+ }
+ } else if (!state->to_ip) {
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+ } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+ } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+ state->to_ip == pt->ptss_ip &&
+ (ptq->flags & PERF_IP_FLAG_CALL)) {
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ }
+
+ return 0;
+}
+
+static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
+{
+ struct machine *machine = pt->machine;
+ struct map *map;
+ struct symbol *sym, *start;
+ u64 ip, switch_ip = 0;
+ const char *ptss;
+
+ if (ptss_ip)
+ *ptss_ip = 0;
+
+ map = machine__kernel_map(machine);
+ if (!map)
+ return 0;
+
+ if (map__load(map))
+ return 0;
+
+ start = dso__first_symbol(map__dso(map));
+
+ for (sym = start; sym; sym = dso__next_symbol(sym)) {
+ if (sym->binding == STB_GLOBAL &&
+ !strcmp(sym->name, "__switch_to")) {
+ ip = map__unmap_ip(map, sym->start);
+ if (ip >= map__start(map) && ip < map__end(map)) {
+ switch_ip = ip;
+ break;
+ }
+ }
+ }
+
+ if (!switch_ip || !ptss_ip)
+ return 0;
+
+ if (pt->have_sched_switch == 1)
+ ptss = "perf_trace_sched_switch";
+ else
+ ptss = "__perf_event_task_sched_out";
+
+ for (sym = start; sym; sym = dso__next_symbol(sym)) {
+ if (!strcmp(sym->name, ptss)) {
+ ip = map__unmap_ip(map, sym->start);
+ if (ip >= map__start(map) && ip < map__end(map)) {
+ *ptss_ip = ip;
+ break;
+ }
+ }
+ }
+
+ return switch_ip;
+}
+
+static void intel_pt_enable_sync_switch(struct intel_pt *pt)
+{
+ unsigned int i;
+
+ if (pt->sync_switch_not_supported)
+ return;
+
+ pt->sync_switch = true;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq)
+ ptq->sync_switch = true;
+ }
+}
+
+static void intel_pt_disable_sync_switch(struct intel_pt *pt)
+{
+ unsigned int i;
+
+ pt->sync_switch = false;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq) {
+ ptq->sync_switch = false;
+ intel_pt_next_tid(pt, ptq);
+ }
+ }
+}
+
+/*
+ * To filter against time ranges, it is only necessary to look at the next start
+ * or end time.
+ */
+static bool intel_pt_next_time(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ if (ptq->sel_start) {
+ /* Next time is an end time */
+ ptq->sel_start = false;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
+ return true;
+ } else if (ptq->sel_idx + 1 < pt->range_cnt) {
+ /* Next time is a start time */
+ ptq->sel_start = true;
+ ptq->sel_idx += 1;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
+ return true;
+ }
+
+ /* No next time */
+ return false;
+}
+
+static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
+{
+ int err;
+
+ while (1) {
+ if (ptq->sel_start) {
+ if (ptq->timestamp >= ptq->sel_timestamp) {
+ /* After start time, so consider next time */
+ intel_pt_next_time(ptq);
+ if (!ptq->sel_timestamp) {
+ /* No end time */
+ return 0;
+ }
+ /* Check against end time */
+ continue;
+ }
+ /* Before start time, so fast forward */
+ ptq->have_sample = false;
+ if (ptq->sel_timestamp > *ff_timestamp) {
+ if (ptq->sync_switch) {
+ intel_pt_next_tid(ptq->pt, ptq);
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+ }
+ *ff_timestamp = ptq->sel_timestamp;
+ err = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (err)
+ return err;
+ }
+ return 0;
+ } else if (ptq->timestamp > ptq->sel_timestamp) {
+ /* After end time, so consider next time */
+ if (!intel_pt_next_time(ptq)) {
+ /* No next time range, so stop decoding */
+ ptq->have_sample = false;
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+ return 1;
+ }
+ /* Check against next start time */
+ continue;
+ } else {
+ /* Before end time */
+ return 0;
+ }
+ }
+}
+
+static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct intel_pt *pt = ptq->pt;
+ u64 ff_timestamp = 0;
+ int err;
+
+ if (!pt->kernel_start) {
+ pt->kernel_start = machine__kernel_start(pt->machine);
+ if (pt->per_cpu_mmaps &&
+ (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
+ !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
+ !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) {
+ pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
+ if (pt->switch_ip) {
+ intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
+ pt->switch_ip, pt->ptss_ip);
+ intel_pt_enable_sync_switch(pt);
+ }
+ }
+ }
+
+ intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+ ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+ while (1) {
+ err = intel_pt_sample(ptq);
+ if (err)
+ return err;
+
+ state = intel_pt_decode(ptq->decoder);
+ if (state->err) {
+ if (state->err == INTEL_PT_ERR_NODATA)
+ return 1;
+ if (ptq->sync_switch &&
+ state->from_ip >= pt->kernel_start) {
+ ptq->sync_switch = false;
+ intel_pt_next_tid(pt, ptq);
+ }
+ ptq->timestamp = state->est_timestamp;
+ if (pt->synth_opts.errors) {
+ err = intel_ptq_synth_error(ptq, state);
+ if (err)
+ return err;
+ }
+ continue;
+ }
+
+ ptq->state = state;
+ ptq->have_sample = true;
+ intel_pt_sample_flags(ptq);
+
+ /* Use estimated TSC upon return to user space */
+ if (pt->est_tsc &&
+ (state->from_ip >= pt->kernel_start || !state->from_ip) &&
+ state->to_ip && state->to_ip < pt->kernel_start) {
+ intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+ state->timestamp, state->est_timestamp);
+ ptq->timestamp = state->est_timestamp;
+ /* Use estimated TSC in unknown switch state */
+ } else if (ptq->sync_switch &&
+ ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+ intel_pt_is_switch_ip(ptq, state->to_ip) &&
+ ptq->next_tid == -1) {
+ intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+ state->timestamp, state->est_timestamp);
+ ptq->timestamp = state->est_timestamp;
+ } else if (state->timestamp > ptq->timestamp) {
+ ptq->timestamp = state->timestamp;
+ }
+
+ if (ptq->sel_timestamp) {
+ err = intel_pt_time_filter(ptq, &ff_timestamp);
+ if (err)
+ return err;
+ }
+
+ if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
+ *timestamp = ptq->timestamp;
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static inline int intel_pt_update_queues(struct intel_pt *pt)
+{
+ if (pt->queues.new_data) {
+ pt->queues.new_data = false;
+ return intel_pt_setup_queues(pt);
+ }
+ return 0;
+}
+
+static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct intel_pt_queue *ptq;
+
+ if (!pt->heap.heap_cnt)
+ return 0;
+
+ if (pt->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = pt->heap.heap_array[0].queue_nr;
+ queue = &pt->queues.queue_array[queue_nr];
+ ptq = queue->priv;
+
+ intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
+ queue_nr, pt->heap.heap_array[0].ordinal,
+ timestamp);
+
+ auxtrace_heap__pop(&pt->heap);
+
+ if (pt->heap.heap_cnt) {
+ ts = pt->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ intel_pt_set_pid_tid_cpu(pt, queue);
+
+ ret = intel_pt_run_decoder(ptq, &ts);
+
+ if (ret < 0) {
+ auxtrace_heap__add(&pt->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ ptq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
+ u64 time_)
+{
+ struct auxtrace_queues *queues = &pt->queues;
+ unsigned int i;
+ u64 ts = 0;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq && (tid == -1 || ptq->tid == tid)) {
+ ptq->time = time_;
+ intel_pt_set_pid_tid_cpu(pt, queue);
+ intel_pt_run_decoder(ptq, &ts);
+ }
+ }
+ return 0;
+}
+
+static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq,
+ struct auxtrace_queue *queue,
+ struct perf_sample *sample)
+{
+ struct machine *m = ptq->pt->machine;
+
+ ptq->pid = sample->pid;
+ ptq->tid = sample->tid;
+ ptq->cpu = queue->cpu;
+
+ intel_pt_log("queue %u cpu %d pid %d tid %d\n",
+ ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+ thread__zput(ptq->thread);
+
+ if (ptq->tid == -1)
+ return;
+
+ if (ptq->pid == -1) {
+ ptq->thread = machine__find_thread(m, -1, ptq->tid);
+ if (ptq->thread)
+ ptq->pid = thread__pid(ptq->thread);
+ return;
+ }
+
+ ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid);
+}
+
+static int intel_pt_process_timeless_sample(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct auxtrace_queue *queue;
+ struct intel_pt_queue *ptq;
+ u64 ts = 0;
+
+ queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
+ if (!queue)
+ return -EINVAL;
+
+ ptq = queue->priv;
+ if (!ptq)
+ return 0;
+
+ ptq->stop = false;
+ ptq->time = sample->time;
+ intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample);
+ intel_pt_run_decoder(ptq, &ts);
+ return 0;
+}
+
+static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
+{
+ return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
+ sample->pid, sample->tid, 0, sample->time,
+ sample->machine_pid, sample->vcpu);
+}
+
+static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
+{
+ unsigned i, j;
+
+ if (cpu < 0 || !pt->queues.nr_queues)
+ return NULL;
+
+ if ((unsigned)cpu >= pt->queues.nr_queues)
+ i = pt->queues.nr_queues - 1;
+ else
+ i = cpu;
+
+ if (pt->queues.queue_array[i].cpu == cpu)
+ return pt->queues.queue_array[i].priv;
+
+ for (j = 0; i > 0; j++) {
+ if (pt->queues.queue_array[--i].cpu == cpu)
+ return pt->queues.queue_array[i].priv;
+ }
+
+ for (; j < pt->queues.nr_queues; j++) {
+ if (pt->queues.queue_array[j].cpu == cpu)
+ return pt->queues.queue_array[j].priv;
+ }
+
+ return NULL;
+}
+
+static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
+ u64 timestamp)
+{
+ struct intel_pt_queue *ptq;
+ int err;
+
+ if (!pt->sync_switch)
+ return 1;
+
+ ptq = intel_pt_cpu_to_ptq(pt, cpu);
+ if (!ptq || !ptq->sync_switch)
+ return 1;
+
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ break;
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_TRACING:
+ ptq->next_tid = tid;
+ ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
+ return 0;
+ case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+ if (!ptq->on_heap) {
+ ptq->timestamp = perf_time_to_tsc(timestamp,
+ &pt->tc);
+ err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
+ ptq->timestamp);
+ if (err)
+ return err;
+ ptq->on_heap = true;
+ }
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
+ break;
+ default:
+ break;
+ }
+
+ ptq->next_tid = -1;
+
+ return 1;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static int intel_pt_process_switch(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ pid_t tid;
+ int cpu, ret;
+ struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id);
+
+ if (evsel != pt->switch_evsel)
+ return 0;
+
+ tid = evsel__intval(evsel, sample, "next_pid");
+ cpu = sample->cpu;
+
+ intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ cpu, tid, sample->time, perf_time_to_tsc(sample->time,
+ &pt->tc));
+
+ ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+ if (ret <= 0)
+ return ret;
+
+ return machine__set_current_tid(pt->machine, cpu, -1, tid);
+}
+#endif /* HAVE_LIBTRACEEVENT */
+
+static int intel_pt_context_switch_in(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ pid_t pid = sample->pid;
+ pid_t tid = sample->tid;
+ int cpu = sample->cpu;
+
+ if (pt->sync_switch) {
+ struct intel_pt_queue *ptq;
+
+ ptq = intel_pt_cpu_to_ptq(pt, cpu);
+ if (ptq && ptq->sync_switch) {
+ ptq->next_tid = -1;
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_TRACING:
+ break;
+ case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /*
+ * If the current tid has not been updated yet, ensure it is now that
+ * a "switch in" event has occurred.
+ */
+ if (machine__get_current_tid(pt->machine, cpu) == tid)
+ return 0;
+
+ return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
+static int intel_pt_guest_context_switch(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ struct machines *machines = &pt->session->machines;
+ struct machine *machine = machines__find(machines, sample->machine_pid);
+
+ pt->have_guest_sideband = true;
+
+ /*
+ * sync_switch cannot handle guest machines at present, so just disable
+ * it.
+ */
+ pt->sync_switch_not_supported = true;
+ if (pt->sync_switch)
+ intel_pt_disable_sync_switch(pt);
+
+ if (out)
+ return 0;
+
+ if (!machine)
+ return -EINVAL;
+
+ return machine__set_current_tid(machine, sample->vcpu, sample->pid, sample->tid);
+}
+
+static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
+ struct perf_sample *sample)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ pid_t pid, tid;
+ int cpu, ret;
+
+ if (perf_event__is_guest(event))
+ return intel_pt_guest_context_switch(pt, event, sample);
+
+ cpu = sample->cpu;
+
+ if (pt->have_sched_switch == 3) {
+ if (!out)
+ return intel_pt_context_switch_in(pt, sample);
+ if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
+ pr_err("Expecting CPU-wide context switch event\n");
+ return -EINVAL;
+ }
+ pid = event->context_switch.next_prev_pid;
+ tid = event->context_switch.next_prev_tid;
+ } else {
+ if (out)
+ return 0;
+ pid = sample->pid;
+ tid = sample->tid;
+ }
+
+ if (tid == -1)
+ intel_pt_log("context_switch event has no tid\n");
+
+ ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+ if (ret <= 0)
+ return ret;
+
+ return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
+static int intel_pt_process_itrace_start(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (!pt->per_cpu_mmaps)
+ return 0;
+
+ intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ sample->cpu, event->itrace_start.pid,
+ event->itrace_start.tid, sample->time,
+ perf_time_to_tsc(sample->time, &pt->tc));
+
+ return machine__set_current_tid(pt->machine, sample->cpu,
+ event->itrace_start.pid,
+ event->itrace_start.tid);
+}
+
+static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ u64 hw_id = event->aux_output_hw_id.hw_id;
+ struct auxtrace_queue *queue;
+ struct intel_pt_queue *ptq;
+ struct evsel *evsel;
+
+ queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
+ evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id);
+ if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) {
+ pr_err("Bad AUX output hardware ID\n");
+ return -EINVAL;
+ }
+
+ ptq = queue->priv;
+
+ ptq->pebs[hw_id].evsel = evsel;
+ ptq->pebs[hw_id].id = sample->id;
+
+ return 0;
+}
+
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ if (!al->map || addr < map__start(al->map) || addr >= map__end(al->map)) {
+ if (!thread__find_map(thread, cpumode, addr, al))
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Invalidate all instruction cache entries that overlap the text poke */
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
+{
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
+ /* Assume text poke begins in a basic block no more than 4096 bytes */
+ int cnt = 4096 + event->text_poke.new_len;
+ struct thread *thread = pt->unknown_thread;
+ struct addr_location al;
+ struct machine *machine = pt->machine;
+ struct intel_pt_cache_entry *e;
+ u64 offset;
+ int ret = 0;
+
+ addr_location__init(&al);
+ if (!event->text_poke.new_len)
+ goto out;
+
+ for (; cnt; cnt--, addr--) {
+ struct dso *dso;
+
+ if (intel_pt_find_map(thread, cpumode, addr, &al)) {
+ if (addr < event->text_poke.addr)
+ goto out;
+ continue;
+ }
+
+ dso = map__dso(al.map);
+ if (!dso || !dso->auxtrace_cache)
+ continue;
+
+ offset = map__map_ip(al.map, addr);
+
+ e = intel_pt_cache_lookup(dso, machine, offset);
+ if (!e)
+ continue;
+
+ if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
+ /*
+ * No overlap. Working backwards there cannot be another
+ * basic block that overlaps the text poke if there is a
+ * branch instruction before the text poke address.
+ */
+ if (e->branch != INTEL_PT_BR_NO_BRANCH)
+ goto out;
+ } else {
+ intel_pt_cache_invalidate(dso, machine, offset);
+ intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
+ dso->long_name, addr);
+ }
+ }
+out:
+ addr_location__exit(&al);
+ return ret;
+}
+
+static int intel_pt_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ u64 timestamp;
+ int err = 0;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("Intel Processor Trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &pt->tc);
+ else
+ timestamp = 0;
+
+ if (timestamp || pt->timeless_decoding) {
+ err = intel_pt_update_queues(pt);
+ if (err)
+ return err;
+ }
+
+ if (pt->timeless_decoding) {
+ if (pt->sampling_mode) {
+ if (sample->aux_sample.size)
+ err = intel_pt_process_timeless_sample(pt,
+ sample);
+ } else if (event->header.type == PERF_RECORD_EXIT) {
+ err = intel_pt_process_timeless_queues(pt,
+ event->fork.tid,
+ sample->time);
+ }
+ } else if (timestamp) {
+ if (!pt->first_timestamp)
+ intel_pt_first_timestamp(pt, timestamp);
+ err = intel_pt_process_queues(pt, timestamp);
+ }
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ if (pt->synth_opts.add_callchain && !sample->callchain)
+ intel_pt_add_callchain(pt, sample);
+ if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+ intel_pt_add_br_stack(pt, sample);
+ }
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+ pt->synth_opts.errors) {
+ err = intel_pt_lost(pt, sample);
+ if (err)
+ return err;
+ }
+
+#ifdef HAVE_LIBTRACEEVENT
+ if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
+ err = intel_pt_process_switch(pt, sample);
+ else
+#endif
+ if (event->header.type == PERF_RECORD_ITRACE_START)
+ err = intel_pt_process_itrace_start(pt, event, sample);
+ else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID)
+ err = intel_pt_process_aux_output_hw_id(pt, event, sample);
+ else if (event->header.type == PERF_RECORD_SWITCH ||
+ event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+ err = intel_pt_context_switch(pt, event, sample);
+
+ if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
+ err = intel_pt_text_poke(pt, event);
+
+ if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
+ intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+ event->header.type, sample->cpu, sample->time, timestamp);
+ intel_pt_log_event(event);
+ }
+
+ return err;
+}
+
+static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ int ret;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = intel_pt_update_queues(pt);
+ if (ret < 0)
+ return ret;
+
+ if (pt->timeless_decoding)
+ return intel_pt_process_timeless_queues(pt, -1,
+ MAX_TIMESTAMP - 1);
+
+ return intel_pt_process_queues(pt, MAX_TIMESTAMP);
+}
+
+static void intel_pt_free_events(struct perf_session *session)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ struct auxtrace_queues *queues = &pt->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ intel_pt_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ intel_pt_log_disable();
+ auxtrace_queues__free(queues);
+}
+
+static void intel_pt_free(struct perf_session *session)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ auxtrace_heap__free(&pt->heap);
+ intel_pt_free_events(session);
+ session->auxtrace = NULL;
+ intel_pt_free_vmcs_info(pt);
+ thread__put(pt->unknown_thread);
+ addr_filters__exit(&pt->filts);
+ zfree(&pt->chain);
+ zfree(&pt->filter);
+ zfree(&pt->time_ranges);
+ zfree(&pt->br_stack);
+ free(pt);
+}
+
+static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ return evsel->core.attr.type == pt->pmu_type;
+}
+
+static int intel_pt_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ if (!pt->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&pt->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ intel_pt_dump_event(pt, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int intel_pt_queue_data(struct perf_session *session,
+ struct perf_sample *sample,
+ union perf_event *event, u64 data_offset)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ u64 timestamp;
+
+ if (event) {
+ return auxtrace_queues__add_event(&pt->queues, session, event,
+ data_offset, NULL);
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &pt->tc);
+ else
+ timestamp = 0;
+
+ return auxtrace_queues__add_sample(&pt->queues, session, sample,
+ data_offset, timestamp);
+}
+
+struct intel_pt_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int intel_pt_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct intel_pt_synth *intel_pt_synth =
+ container_of(tool, struct intel_pt_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(intel_pt_synth->session, event,
+ NULL);
+}
+
+static int intel_pt_synth_event(struct perf_session *session, const char *name,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct intel_pt_synth intel_pt_synth;
+ int err;
+
+ pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ name, id, (u64)attr->sample_type);
+
+ memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
+ intel_pt_synth.session = session;
+
+ err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+ &id, intel_pt_event_synth);
+ if (err)
+ pr_err("%s: failed to synthesize '%s' event type\n",
+ __func__, name);
+
+ return err;
+}
+
+static void intel_pt_set_event_name(struct evlist *evlist, u64 id,
+ const char *name)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.id && evsel->core.id[0] == id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup(name);
+ break;
+ }
+ }
+}
+
+static struct evsel *intel_pt_evsel(struct intel_pt *pt,
+ struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static int intel_pt_synth_events(struct intel_pt *pt,
+ struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel = intel_pt_evsel(pt, evlist);
+ struct perf_event_attr attr;
+ u64 id;
+ int err;
+
+ if (!evsel) {
+ pr_debug("There are no selected events with Intel Processor Trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ if (pt->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+ if (!pt->per_cpu_mmaps)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+ attr.exclude_user = evsel->core.attr.exclude_user;
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
+ attr.exclude_host = evsel->core.attr.exclude_host;
+ attr.exclude_guest = evsel->core.attr.exclude_guest;
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
+ attr.read_format = evsel->core.attr.read_format;
+
+ id = evsel->core.id[0] + 1000000000;
+ if (!id)
+ id = 1;
+
+ if (pt->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ err = intel_pt_synth_event(session, "branches", &attr, id);
+ if (err)
+ return err;
+ pt->sample_branches = true;
+ pt->branches_sample_type = attr.sample_type;
+ pt->branches_id = id;
+ id += 1;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+ }
+
+ if (pt->synth_opts.callchain)
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ if (pt->synth_opts.last_branch) {
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
+
+ if (pt->synth_opts.instructions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+ attr.sample_period =
+ intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+ else
+ attr.sample_period = pt->synth_opts.period;
+ err = intel_pt_synth_event(session, "instructions", &attr, id);
+ if (err)
+ return err;
+ pt->sample_instructions = true;
+ pt->instructions_sample_type = attr.sample_type;
+ pt->instructions_id = id;
+ id += 1;
+ }
+
+ if (pt->synth_opts.cycles) {
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+ attr.sample_period =
+ intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+ else
+ attr.sample_period = pt->synth_opts.period;
+ err = intel_pt_synth_event(session, "cycles", &attr, id);
+ if (err)
+ return err;
+ pt->sample_cycles = true;
+ pt->cycles_sample_type = attr.sample_type;
+ pt->cycles_id = id;
+ id += 1;
+ }
+
+ attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
+ attr.sample_period = 1;
+
+ if (pt->synth_opts.transactions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ err = intel_pt_synth_event(session, "transactions", &attr, id);
+ if (err)
+ return err;
+ pt->sample_transactions = true;
+ pt->transactions_sample_type = attr.sample_type;
+ pt->transactions_id = id;
+ intel_pt_set_event_name(evlist, id, "transactions");
+ id += 1;
+ }
+
+ attr.type = PERF_TYPE_SYNTH;
+ attr.sample_type |= PERF_SAMPLE_RAW;
+
+ if (pt->synth_opts.ptwrites) {
+ attr.config = PERF_SYNTH_INTEL_PTWRITE;
+ err = intel_pt_synth_event(session, "ptwrite", &attr, id);
+ if (err)
+ return err;
+ pt->sample_ptwrites = true;
+ pt->ptwrites_sample_type = attr.sample_type;
+ pt->ptwrites_id = id;
+ intel_pt_set_event_name(evlist, id, "ptwrite");
+ id += 1;
+ }
+
+ if (pt->synth_opts.pwr_events) {
+ pt->sample_pwr_events = true;
+ pt->pwr_events_sample_type = attr.sample_type;
+
+ attr.config = PERF_SYNTH_INTEL_CBR;
+ err = intel_pt_synth_event(session, "cbr", &attr, id);
+ if (err)
+ return err;
+ pt->cbr_id = id;
+ intel_pt_set_event_name(evlist, id, "cbr");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PSB;
+ err = intel_pt_synth_event(session, "psb", &attr, id);
+ if (err)
+ return err;
+ pt->psb_id = id;
+ intel_pt_set_event_name(evlist, id, "psb");
+ id += 1;
+ }
+
+ if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) {
+ attr.config = PERF_SYNTH_INTEL_MWAIT;
+ err = intel_pt_synth_event(session, "mwait", &attr, id);
+ if (err)
+ return err;
+ pt->mwait_id = id;
+ intel_pt_set_event_name(evlist, id, "mwait");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PWRE;
+ err = intel_pt_synth_event(session, "pwre", &attr, id);
+ if (err)
+ return err;
+ pt->pwre_id = id;
+ intel_pt_set_event_name(evlist, id, "pwre");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_EXSTOP;
+ err = intel_pt_synth_event(session, "exstop", &attr, id);
+ if (err)
+ return err;
+ pt->exstop_id = id;
+ intel_pt_set_event_name(evlist, id, "exstop");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PWRX;
+ err = intel_pt_synth_event(session, "pwrx", &attr, id);
+ if (err)
+ return err;
+ pt->pwrx_id = id;
+ intel_pt_set_event_name(evlist, id, "pwrx");
+ id += 1;
+ }
+
+ if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) {
+ attr.config = PERF_SYNTH_INTEL_EVT;
+ err = intel_pt_synth_event(session, "evt", &attr, id);
+ if (err)
+ return err;
+ pt->evt_sample_type = attr.sample_type;
+ pt->evt_id = id;
+ intel_pt_set_event_name(evlist, id, "evt");
+ id += 1;
+ }
+
+ if (pt->synth_opts.intr_events && pt->cap_event_trace) {
+ attr.config = PERF_SYNTH_INTEL_IFLAG_CHG;
+ err = intel_pt_synth_event(session, "iflag", &attr, id);
+ if (err)
+ return err;
+ pt->iflag_chg_sample_type = attr.sample_type;
+ pt->iflag_chg_id = id;
+ intel_pt_set_event_name(evlist, id, "iflag");
+ id += 1;
+ }
+
+ return 0;
+}
+
+static void intel_pt_setup_pebs_events(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ if (!pt->synth_opts.other_events)
+ return;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (evsel->core.attr.aux_output && evsel->core.id) {
+ if (pt->single_pebs) {
+ pt->single_pebs = false;
+ return;
+ }
+ pt->single_pebs = true;
+ pt->sample_pebs = true;
+ pt->pebs_evsel = evsel;
+ }
+ }
+}
+
+static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ const char *name = evsel__name(evsel);
+
+ if (!strcmp(name, "sched:sched_switch"))
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static bool intel_pt_find_switch(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.context_switch)
+ return true;
+ }
+
+ return false;
+}
+
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+ struct intel_pt *pt = data;
+
+ if (!strcmp(var, "intel-pt.mispred-all"))
+ pt->mispred_all = perf_config_bool(var, value);
+
+ if (!strcmp(var, "intel-pt.max-loops"))
+ perf_config_int(&pt->max_loops, var, value);
+
+ return 0;
+}
+
+/* Find least TSC which converts to ns or later */
+static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm < ns)
+ break;
+ tsc -= 1;
+ }
+
+ while (tm < ns)
+ tm = tsc_to_perf_time(++tsc, &pt->tc);
+
+ return tsc;
+}
+
+/* Find greatest TSC which converts to ns or earlier */
+static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm > ns)
+ break;
+ tsc += 1;
+ }
+
+ while (tm > ns)
+ tm = tsc_to_perf_time(--tsc, &pt->tc);
+
+ return tsc;
+}
+
+static int intel_pt_setup_time_ranges(struct intel_pt *pt,
+ struct itrace_synth_opts *opts)
+{
+ struct perf_time_interval *p = opts->ptime_range;
+ int n = opts->range_num;
+ int i;
+
+ if (!n || !p || pt->timeless_decoding)
+ return 0;
+
+ pt->time_ranges = calloc(n, sizeof(struct range));
+ if (!pt->time_ranges)
+ return -ENOMEM;
+
+ pt->range_cnt = n;
+
+ intel_pt_log("%s: %u range(s)\n", __func__, n);
+
+ for (i = 0; i < n; i++) {
+ struct range *r = &pt->time_ranges[i];
+ u64 ts = p[i].start;
+ u64 te = p[i].end;
+
+ /*
+ * Take care to ensure the TSC range matches the perf-time range
+ * when converted back to perf-time.
+ */
+ r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
+ r->end = te ? intel_pt_tsc_end(te, pt) : 0;
+
+ intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
+ i, ts, te);
+ intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
+ i, r->start, r->end);
+ }
+
+ return 0;
+}
+
+static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
+{
+ struct intel_pt_vmcs_info *vmcs_info;
+ u64 tsc_offset, vmcs;
+ char *p = *args;
+
+ errno = 0;
+
+ p = skip_spaces(p);
+ if (!*p)
+ return 1;
+
+ tsc_offset = strtoull(p, &p, 0);
+ if (errno)
+ return -errno;
+ p = skip_spaces(p);
+ if (*p != ':') {
+ pt->dflt_tsc_offset = tsc_offset;
+ *args = p;
+ return 0;
+ }
+ p += 1;
+ while (1) {
+ vmcs = strtoull(p, &p, 0);
+ if (errno)
+ return -errno;
+ if (!vmcs)
+ return -EINVAL;
+ vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset);
+ if (!vmcs_info)
+ return -ENOMEM;
+ p = skip_spaces(p);
+ if (*p != ',')
+ break;
+ p += 1;
+ }
+ *args = p;
+ return 0;
+}
+
+static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt)
+{
+ char *args = pt->synth_opts.vm_tm_corr_args;
+ int ret;
+
+ if (!args)
+ return 0;
+
+ do {
+ ret = intel_pt_parse_vm_tm_corr_arg(pt, &args);
+ } while (!ret);
+
+ if (ret < 0) {
+ pr_err("Failed to parse VM Time Correlation options\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static const char * const intel_pt_info_fmts[] = {
+ [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
+ [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
+ [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
+ [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+ [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
+ [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
+ [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n",
+ [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
+ [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
+ [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
+ [INTEL_PT_MAX_NONTURBO_RATIO] = " Max non-turbo ratio %"PRIu64"\n",
+ [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n",
+};
+
+static void intel_pt_print_info(__u64 *arr, int start, int finish)
+{
+ int i;
+
+ if (!dump_trace)
+ return;
+
+ for (i = start; i <= finish; i++) {
+ const char *fmt = intel_pt_info_fmts[i];
+
+ if (fmt)
+ fprintf(stdout, fmt, arr[i]);
+ }
+}
+
+static void intel_pt_print_info_str(const char *name, const char *str)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, " %-20s%s\n", name, str ? str : "");
+}
+
+static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos)
+{
+ return auxtrace_info->header.size >=
+ sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1));
+}
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
+ struct intel_pt *pt;
+ void *info_end;
+ __u64 *info;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
+ min_sz)
+ return -EINVAL;
+
+ pt = zalloc(sizeof(struct intel_pt));
+ if (!pt)
+ return -ENOMEM;
+
+ pt->vmcs_info = RB_ROOT;
+
+ addr_filters__init(&pt->filts);
+
+ err = perf_config(intel_pt_perf_config, pt);
+ if (err)
+ goto err_free;
+
+ err = auxtrace_queues__init(&pt->queues);
+ if (err)
+ goto err_free;
+
+ if (session->itrace_synth_opts->set) {
+ pt->synth_opts = *session->itrace_synth_opts;
+ } else {
+ struct itrace_synth_opts *opts = session->itrace_synth_opts;
+
+ itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample);
+ if (!opts->default_no_sample && !opts->inject) {
+ pt->synth_opts.branches = false;
+ pt->synth_opts.callchain = true;
+ pt->synth_opts.add_callchain = true;
+ }
+ pt->synth_opts.thread_stack = opts->thread_stack;
+ }
+
+ if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT))
+ intel_pt_log_set_name(INTEL_PT_PMU_NAME);
+
+ pt->session = session;
+ pt->machine = &session->machines.host; /* No kvm support */
+ pt->auxtrace_type = auxtrace_info->type;
+ pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
+ pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
+ pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
+ pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
+ pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
+ pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
+ pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
+ pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
+ pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
+ pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
+ intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
+ INTEL_PT_PER_CPU_MMAPS);
+
+ if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
+ pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
+ pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
+ pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
+ pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
+ pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
+ intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
+ INTEL_PT_CYC_BIT);
+ }
+
+ if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
+ pt->max_non_turbo_ratio =
+ auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+ intel_pt_print_info(&auxtrace_info->priv[0],
+ INTEL_PT_MAX_NONTURBO_RATIO,
+ INTEL_PT_MAX_NONTURBO_RATIO);
+ }
+
+ info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+ info_end = (void *)auxtrace_info + auxtrace_info->header.size;
+
+ if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
+ size_t len;
+
+ len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
+ intel_pt_print_info(&auxtrace_info->priv[0],
+ INTEL_PT_FILTER_STR_LEN,
+ INTEL_PT_FILTER_STR_LEN);
+ if (len) {
+ const char *filter = (const char *)info;
+
+ len = roundup(len + 1, 8);
+ info += len >> 3;
+ if ((void *)info > info_end) {
+ pr_err("%s: bad filter string length\n", __func__);
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ pt->filter = memdup(filter, len);
+ if (!pt->filter) {
+ err = -ENOMEM;
+ goto err_free_queues;
+ }
+ if (session->header.needs_swap)
+ mem_bswap_64(pt->filter, len);
+ if (pt->filter[len - 1]) {
+ pr_err("%s: filter string not null terminated\n", __func__);
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ err = addr_filters__parse_bare_filter(&pt->filts,
+ filter);
+ if (err)
+ goto err_free_queues;
+ }
+ intel_pt_print_info_str("Filter string", pt->filter);
+ }
+
+ if ((void *)info < info_end) {
+ pt->cap_event_trace = *info++;
+ if (dump_trace)
+ fprintf(stdout, " Cap Event Trace %d\n",
+ pt->cap_event_trace);
+ }
+
+ pt->timeless_decoding = intel_pt_timeless_decoding(pt);
+ if (pt->timeless_decoding && !pt->tc.time_mult)
+ pt->tc.time_mult = 1;
+ pt->have_tsc = intel_pt_have_tsc(pt);
+ pt->sampling_mode = intel_pt_sampling_mode(pt);
+ pt->est_tsc = !pt->timeless_decoding;
+
+ if (pt->synth_opts.vm_time_correlation) {
+ if (pt->timeless_decoding) {
+ pr_err("Intel PT has no time information for VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ if (session->itrace_synth_opts->ptime_range) {
+ pr_err("Time ranges cannot be specified with VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ /* Currently TSC Offset is calculated using MTC packets */
+ if (!intel_pt_have_mtc(pt)) {
+ pr_err("MTC packets must have been enabled for VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ err = intel_pt_parse_vm_tm_corr_args(pt);
+ if (err)
+ goto err_free_queues;
+ }
+
+ pt->unknown_thread = thread__new(999999999, 999999999);
+ if (!pt->unknown_thread) {
+ err = -ENOMEM;
+ goto err_free_queues;
+ }
+
+ err = thread__set_comm(pt->unknown_thread, "unknown", 0);
+ if (err)
+ goto err_delete_thread;
+ if (thread__init_maps(pt->unknown_thread, pt->machine)) {
+ err = -ENOMEM;
+ goto err_delete_thread;
+ }
+
+ pt->auxtrace.process_event = intel_pt_process_event;
+ pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
+ pt->auxtrace.queue_data = intel_pt_queue_data;
+ pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample;
+ pt->auxtrace.flush_events = intel_pt_flush;
+ pt->auxtrace.free_events = intel_pt_free_events;
+ pt->auxtrace.free = intel_pt_free;
+ pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
+ session->auxtrace = &pt->auxtrace;
+
+ if (dump_trace)
+ return 0;
+
+ if (pt->have_sched_switch == 1) {
+ pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
+ if (!pt->switch_evsel) {
+ pr_err("%s: missing sched_switch event\n", __func__);
+ err = -EINVAL;
+ goto err_delete_thread;
+ }
+ } else if (pt->have_sched_switch == 2 &&
+ !intel_pt_find_switch(session->evlist)) {
+ pr_err("%s: missing context_switch attribute flag\n", __func__);
+ err = -EINVAL;
+ goto err_delete_thread;
+ }
+
+ if (pt->synth_opts.log) {
+ bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ unsigned int log_on_error_size = pt->synth_opts.log_on_error_size;
+
+ intel_pt_log_enable(log_on_error, log_on_error_size);
+ }
+
+ /* Maximum non-turbo ratio is TSC freq / 100 MHz */
+ if (pt->tc.time_mult) {
+ u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
+
+ if (!pt->max_non_turbo_ratio)
+ pt->max_non_turbo_ratio =
+ (tsc_freq + 50000000) / 100000000;
+ intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
+ intel_pt_log("Maximum non-turbo ratio %u\n",
+ pt->max_non_turbo_ratio);
+ pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
+ }
+
+ err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
+ if (err)
+ goto err_delete_thread;
+
+ if (pt->synth_opts.calls)
+ pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ if (pt->synth_opts.returns)
+ pt->branches_filter |= PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
+ !symbol_conf.use_callchain) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ symbol_conf.use_callchain = false;
+ pt->synth_opts.callchain = false;
+ pt->synth_opts.add_callchain = false;
+ }
+ }
+
+ if (pt->synth_opts.add_callchain) {
+ err = intel_pt_callchain_init(pt);
+ if (err)
+ goto err_delete_thread;
+ }
+
+ if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
+ pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+ pt->br_stack_sz_plus = pt->br_stack_sz;
+ }
+
+ if (pt->synth_opts.add_last_branch) {
+ err = intel_pt_br_stack_init(pt);
+ if (err)
+ goto err_delete_thread;
+ /*
+ * Additional branch stack size to cater for tracing from the
+ * actual sample ip to where the sample time is recorded.
+ * Measured at about 200 branches, but generously set to 1024.
+ * If kernel space is not being traced, then add just 1 for the
+ * branch to kernel space.
+ */
+ if (intel_pt_tracing_kernel(pt))
+ pt->br_stack_sz_plus += 1024;
+ else
+ pt->br_stack_sz_plus += 1;
+ }
+
+ pt->use_thread_stack = pt->synth_opts.callchain ||
+ pt->synth_opts.add_callchain ||
+ pt->synth_opts.thread_stack ||
+ pt->synth_opts.last_branch ||
+ pt->synth_opts.add_last_branch;
+
+ pt->callstack = pt->synth_opts.callchain ||
+ pt->synth_opts.add_callchain ||
+ pt->synth_opts.thread_stack;
+
+ err = intel_pt_synth_events(pt, session);
+ if (err)
+ goto err_delete_thread;
+
+ intel_pt_setup_pebs_events(pt);
+
+ if (perf_data__is_pipe(session->data)) {
+ pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n"
+ " The output cannot relied upon. In particular,\n"
+ " timestamps and the order of events may be incorrect.\n");
+ }
+
+ if (pt->sampling_mode || list_empty(&session->auxtrace_index))
+ err = auxtrace_queue_data(session, true, true);
+ else
+ err = auxtrace_queues__process_index(&pt->queues, session);
+ if (err)
+ goto err_delete_thread;
+
+ if (pt->queues.populated)
+ pt->data_queued = true;
+
+ if (pt->timeless_decoding)
+ pr_debug2("Intel PT decoding without timestamps\n");
+
+ return 0;
+
+err_delete_thread:
+ zfree(&pt->chain);
+ thread__zput(pt->unknown_thread);
+err_free_queues:
+ intel_pt_log_disable();
+ auxtrace_queues__free(&pt->queues);
+ session->auxtrace = NULL;
+err_free:
+ addr_filters__exit(&pt->filts);
+ zfree(&pt->filter);
+ zfree(&pt->time_ranges);
+ free(pt);
+ return err;
+}
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
new file mode 100644
index 000000000..c7d6068e3
--- /dev/null
+++ b/tools/perf/util/intel-pt.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt.h: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#ifndef INCLUDE__PERF_INTEL_PT_H__
+#define INCLUDE__PERF_INTEL_PT_H__
+
+#define INTEL_PT_PMU_NAME "intel_pt"
+
+enum {
+ INTEL_PT_PMU_TYPE,
+ INTEL_PT_TIME_SHIFT,
+ INTEL_PT_TIME_MULT,
+ INTEL_PT_TIME_ZERO,
+ INTEL_PT_CAP_USER_TIME_ZERO,
+ INTEL_PT_TSC_BIT,
+ INTEL_PT_NORETCOMP_BIT,
+ INTEL_PT_HAVE_SCHED_SWITCH,
+ INTEL_PT_SNAPSHOT_MODE,
+ INTEL_PT_PER_CPU_MMAPS,
+ INTEL_PT_MTC_BIT,
+ INTEL_PT_MTC_FREQ_BITS,
+ INTEL_PT_TSC_CTC_N,
+ INTEL_PT_TSC_CTC_D,
+ INTEL_PT_CYC_BIT,
+ INTEL_PT_MAX_NONTURBO_RATIO,
+ INTEL_PT_FILTER_STR_LEN,
+ INTEL_PT_AUXTRACE_PRIV_MAX,
+};
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+struct perf_event_attr;
+struct perf_pmu;
+
+struct auxtrace_record *intel_pt_recording_init(int *err);
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+
+#endif
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
new file mode 100644
index 000000000..934092199
--- /dev/null
+++ b/tools/perf/util/intlist.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Based on intlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+#include "intlist.h"
+
+static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused,
+ const void *entry)
+{
+ unsigned long i = (unsigned long)entry;
+ struct rb_node *rc = NULL;
+ struct int_node *node = malloc(sizeof(*node));
+
+ if (node != NULL) {
+ node->i = i;
+ node->priv = NULL;
+ rc = &node->rb_node;
+ }
+
+ return rc;
+}
+
+static void int_node__delete(struct int_node *ilist)
+{
+ free(ilist);
+}
+
+static void intlist__node_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+ int_node__delete(node);
+}
+
+static int intlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+ unsigned long i = (unsigned long)entry;
+ struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+ if (node->i > i)
+ return 1;
+ else if (node->i < i)
+ return -1;
+
+ return 0;
+}
+
+int intlist__add(struct intlist *ilist, unsigned long i)
+{
+ return rblist__add_node(&ilist->rblist, (void *)i);
+}
+
+void intlist__remove(struct intlist *ilist, struct int_node *node)
+{
+ rblist__remove_node(&ilist->rblist, &node->rb_node);
+}
+
+static struct int_node *__intlist__findnew(struct intlist *ilist,
+ unsigned long i, bool create)
+{
+ struct int_node *node = NULL;
+ struct rb_node *rb_node;
+
+ if (ilist == NULL)
+ return NULL;
+
+ if (create)
+ rb_node = rblist__findnew(&ilist->rblist, (void *)i);
+ else
+ rb_node = rblist__find(&ilist->rblist, (void *)i);
+
+ if (rb_node)
+ node = container_of(rb_node, struct int_node, rb_node);
+
+ return node;
+}
+
+struct int_node *intlist__find(struct intlist *ilist, unsigned long i)
+{
+ return __intlist__findnew(ilist, i, false);
+}
+
+struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i)
+{
+ return __intlist__findnew(ilist, i, true);
+}
+
+static int intlist__parse_list(struct intlist *ilist, const char *s)
+{
+ char *sep;
+ int err;
+
+ do {
+ unsigned long value = strtol(s, &sep, 10);
+ err = -EINVAL;
+ if (*sep != ',' && *sep != '\0')
+ break;
+ err = intlist__add(ilist, value);
+ if (err)
+ break;
+ s = sep + 1;
+ } while (*sep != '\0');
+
+ return err;
+}
+
+struct intlist *intlist__new(const char *slist)
+{
+ struct intlist *ilist = malloc(sizeof(*ilist));
+
+ if (ilist != NULL) {
+ rblist__init(&ilist->rblist);
+ ilist->rblist.node_cmp = intlist__node_cmp;
+ ilist->rblist.node_new = intlist__node_new;
+ ilist->rblist.node_delete = intlist__node_delete;
+
+ if (slist && intlist__parse_list(ilist, slist))
+ goto out_delete;
+ }
+
+ return ilist;
+out_delete:
+ intlist__delete(ilist);
+ return NULL;
+}
+
+void intlist__delete(struct intlist *ilist)
+{
+ if (ilist != NULL)
+ rblist__delete(&ilist->rblist);
+}
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx)
+{
+ struct int_node *node = NULL;
+ struct rb_node *rb_node;
+
+ rb_node = rblist__entry(&ilist->rblist, idx);
+ if (rb_node)
+ node = container_of(rb_node, struct int_node, rb_node);
+
+ return node;
+}
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
new file mode 100644
index 000000000..e336b174d
--- /dev/null
+++ b/tools/perf/util/intlist.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_INTLIST_H
+#define __PERF_INTLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct int_node {
+ struct rb_node rb_node;
+ unsigned long i;
+ void *priv;
+};
+
+struct intlist {
+ struct rblist rblist;
+};
+
+struct intlist *intlist__new(const char *slist);
+void intlist__delete(struct intlist *ilist);
+
+void intlist__remove(struct intlist *ilist, struct int_node *in);
+int intlist__add(struct intlist *ilist, unsigned long i);
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx);
+struct int_node *intlist__find(struct intlist *ilist, unsigned long i);
+struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i);
+
+static inline bool intlist__has_entry(struct intlist *ilist, unsigned long i)
+{
+ return intlist__find(ilist, i) != NULL;
+}
+
+static inline bool intlist__empty(const struct intlist *ilist)
+{
+ return rblist__empty(&ilist->rblist);
+}
+
+static inline unsigned int intlist__nr_entries(const struct intlist *ilist)
+{
+ return rblist__nr_entries(&ilist->rblist);
+}
+
+/* For intlist iteration */
+static inline struct int_node *intlist__first(struct intlist *ilist)
+{
+ struct rb_node *rn = rb_first_cached(&ilist->rblist.entries);
+ return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+static inline struct int_node *intlist__next(struct int_node *in)
+{
+ struct rb_node *rn;
+ if (!in)
+ return NULL;
+ rn = rb_next(&in->rb_node);
+ return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+
+/**
+ * intlist__for_each_entry - iterate over a intlist
+ * @pos: the &struct int_node to use as a loop cursor.
+ * @ilist: the &struct intlist for loop.
+ */
+#define intlist__for_each_entry(pos, ilist) \
+ for (pos = intlist__first(ilist); pos; pos = intlist__next(pos))
+
+/**
+ * intlist__for_each_entry_safe - iterate over a intlist safe against removal of
+ * int_node
+ * @pos: the &struct int_node to use as a loop cursor.
+ * @n: another &struct int_node to use as temporary storage.
+ * @ilist: the &struct intlist for loop.
+ */
+#define intlist__for_each_entry_safe(pos, n, ilist) \
+ for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\
+ pos = n, n = intlist__next(n))
+#endif /* __PERF_INTLIST_H */
diff --git a/tools/perf/util/iostat.c b/tools/perf/util/iostat.c
new file mode 100644
index 000000000..b770bd473
--- /dev/null
+++ b/tools/perf/util/iostat.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/iostat.h"
+#include "util/debug.h"
+
+enum iostat_mode_t iostat_mode = IOSTAT_NONE;
+
+__weak int iostat_prepare(struct evlist *evlist __maybe_unused,
+ struct perf_stat_config *config __maybe_unused)
+{
+ return -1;
+}
+
+__weak int iostat_parse(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ pr_err("iostat mode is not supported on current platform\n");
+ return -1;
+}
+
+__weak void iostat_list(struct evlist *evlist __maybe_unused,
+ struct perf_stat_config *config __maybe_unused)
+{
+}
+
+__weak void iostat_release(struct evlist *evlist __maybe_unused)
+{
+}
+
+__weak void iostat_print_header_prefix(struct perf_stat_config *config __maybe_unused)
+{
+}
+
+__weak void iostat_print_metric(struct perf_stat_config *config __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct perf_stat_output_ctx *out __maybe_unused)
+{
+}
+
+__weak void iostat_prefix(struct evlist *evlist __maybe_unused,
+ struct perf_stat_config *config __maybe_unused,
+ char *prefix __maybe_unused,
+ struct timespec *ts __maybe_unused)
+{
+}
+
+__weak void iostat_print_counters(struct evlist *evlist __maybe_unused,
+ struct perf_stat_config *config __maybe_unused,
+ struct timespec *ts __maybe_unused,
+ char *prefix __maybe_unused,
+ iostat_print_counter_t print_cnt_cb __maybe_unused,
+ void *arg __maybe_unused)
+{
+}
diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h
new file mode 100644
index 000000000..a4e7299c5
--- /dev/null
+++ b/tools/perf/util/iostat.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * perf iostat
+ *
+ * Copyright (C) 2020, Intel Corporation
+ *
+ * Authors: Alexander Antonov <alexander.antonov@linux.intel.com>
+ */
+
+#ifndef _IOSTAT_H
+#define _IOSTAT_H
+
+#include <subcmd/parse-options.h>
+#include "util/stat.h"
+#include "util/parse-events.h"
+#include "util/evlist.h"
+
+struct option;
+struct perf_stat_config;
+struct evlist;
+struct timespec;
+
+enum iostat_mode_t {
+ IOSTAT_NONE = -1,
+ IOSTAT_RUN = 0,
+ IOSTAT_LIST = 1
+};
+
+extern enum iostat_mode_t iostat_mode;
+
+typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, void *);
+
+int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config);
+int iostat_parse(const struct option *opt, const char *str,
+ int unset __maybe_unused);
+void iostat_list(struct evlist *evlist, struct perf_stat_config *config);
+void iostat_release(struct evlist *evlist);
+void iostat_prefix(struct evlist *evlist, struct perf_stat_config *config,
+ char *prefix, struct timespec *ts);
+void iostat_print_header_prefix(struct perf_stat_config *config);
+void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
+ struct perf_stat_output_ctx *out);
+void iostat_print_counters(struct evlist *evlist,
+ struct perf_stat_config *config, struct timespec *ts,
+ char *prefix, iostat_print_counter_t print_cnt_cb, void *arg);
+
+#endif /* _IOSTAT_H */
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
new file mode 100644
index 000000000..fb810e1b2
--- /dev/null
+++ b/tools/perf/util/jit.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+int jit_process(struct perf_session *session, struct perf_data *output,
+ struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes);
+
+int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
new file mode 100644
index 000000000..6b2b96c16
--- /dev/null
+++ b/tools/perf/util/jitdump.c
@@ -0,0 +1,888 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <linux/stringify.h>
+
+#include "build-id.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "namespaces.h"
+#include "symbol.h"
+#include <elf.h>
+
+#include "tsc.h"
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "thread.h"
+
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+
+struct jit_buf_desc {
+ struct perf_data *output;
+ struct perf_session *session;
+ struct machine *machine;
+ struct nsinfo *nsi;
+ union jr_entry *entry;
+ void *buf;
+ uint64_t sample_type;
+ size_t bufsize;
+ FILE *in;
+ bool needs_bswap; /* handles cross-endianness */
+ bool use_arch_timestamp;
+ void *debug_data;
+ void *unwinding_data;
+ uint64_t unwinding_size;
+ uint64_t unwinding_mapped_size;
+ uint64_t eh_frame_hdr_size;
+ size_t nr_debug_entries;
+ uint32_t code_load_count;
+ u64 bytes_written;
+ struct rb_root code_root;
+ char dir[PATH_MAX];
+};
+
+struct jit_tool {
+ struct perf_tool tool;
+ struct perf_data output;
+ struct perf_data input;
+ u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(struct jit_buf_desc *jd,
+ char *filename,
+ const char *sym,
+ uint64_t code_addr,
+ const void *code,
+ int csize,
+ void *debug,
+ int nr_debug_entries,
+ void *unwinding,
+ uint32_t unwinding_header_size,
+ uint32_t unwinding_size)
+{
+ int ret, fd, saved_errno;
+ struct nscookie nsc;
+
+ if (verbose > 0)
+ fprintf(stderr, "write ELF image %s\n", filename);
+
+ nsinfo__mountns_enter(jd->nsi, &nsc);
+ fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+ saved_errno = errno;
+ nsinfo__mountns_exit(&nsc);
+ if (fd == -1) {
+ pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(saved_errno));
+ return -1;
+ }
+
+ ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries,
+ unwinding, unwinding_header_size, unwinding_size);
+
+ close(fd);
+
+ if (ret) {
+ nsinfo__mountns_enter(jd->nsi, &nsc);
+ unlink(filename);
+ nsinfo__mountns_exit(&nsc);
+ }
+
+ return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+ if (!(jd && jd->in))
+ return;
+ funlockfile(jd->in);
+ fclose(jd->in);
+ jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ /*
+ * check that all events use CLOCK_MONOTONIC
+ */
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.use_clockid == 0 || evsel->core.attr.clockid != CLOCK_MONOTONIC)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+ struct jitheader header;
+ struct nscookie nsc;
+ struct jr_prefix *prefix;
+ ssize_t bs, bsz = 0;
+ void *n, *buf = NULL;
+ int ret, retval = -1;
+
+ nsinfo__mountns_enter(jd->nsi, &nsc);
+ jd->in = fopen(name, "r");
+ nsinfo__mountns_exit(&nsc);
+ if (!jd->in)
+ return -1;
+
+ bsz = hmax(sizeof(header), sizeof(*prefix));
+
+ buf = malloc(bsz);
+ if (!buf)
+ goto error;
+
+ /*
+ * protect from writer modifying the file while we are reading it
+ */
+ flockfile(jd->in);
+
+ ret = fread(buf, sizeof(header), 1, jd->in);
+ if (ret != 1)
+ goto error;
+
+ memcpy(&header, buf, sizeof(header));
+
+ if (header.magic != JITHEADER_MAGIC) {
+ if (header.magic != JITHEADER_MAGIC_SW)
+ goto error;
+ jd->needs_bswap = true;
+ }
+
+ if (jd->needs_bswap) {
+ header.version = bswap_32(header.version);
+ header.total_size = bswap_32(header.total_size);
+ header.pid = bswap_32(header.pid);
+ header.elf_mach = bswap_32(header.elf_mach);
+ header.timestamp = bswap_64(header.timestamp);
+ header.flags = bswap_64(header.flags);
+ }
+
+ jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
+ if (verbose > 2)
+ pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
+ header.version,
+ header.total_size,
+ (unsigned long long)header.timestamp,
+ header.pid,
+ header.elf_mach,
+ jd->use_arch_timestamp);
+
+ if (header.version > JITHEADER_VERSION) {
+ pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION),
+ header.version);
+ goto error;
+ }
+
+ if (header.flags & JITDUMP_FLAGS_RESERVED) {
+ pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+ (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+ goto error;
+ }
+
+ if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) {
+ pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
+ goto error;
+ }
+
+ /*
+ * validate event is using the correct clockid
+ */
+ if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) {
+ pr_err("error, jitted code must be sampled with perf record -k 1\n");
+ goto error;
+ }
+
+ bs = header.total_size - sizeof(header);
+
+ if (bs > bsz) {
+ n = realloc(buf, bs);
+ if (!n)
+ goto error;
+ bsz = bs;
+ buf = n;
+ /* read extra we do not know about */
+ ret = fread(buf, bs - bsz, 1, jd->in);
+ if (ret != 1)
+ goto error;
+ }
+ /*
+ * keep dirname for generating files and mmap records
+ */
+ strcpy(jd->dir, name);
+ dirname(jd->dir);
+ free(buf);
+
+ return 0;
+error:
+ free(buf);
+ funlockfile(jd->in);
+ fclose(jd->in);
+ return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+ struct jr_prefix *prefix;
+ union jr_entry *jr;
+ void *addr;
+ size_t bs, size;
+ int id, ret;
+
+ if (!(jd && jd->in))
+ return NULL;
+
+ if (jd->buf == NULL) {
+ size_t sz = getpagesize();
+ if (sz < sizeof(*prefix))
+ sz = sizeof(*prefix);
+
+ jd->buf = malloc(sz);
+ if (jd->buf == NULL)
+ return NULL;
+
+ jd->bufsize = sz;
+ }
+
+ prefix = jd->buf;
+
+ /*
+ * file is still locked at this point
+ */
+ ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+ if (ret != 1)
+ return NULL;
+
+ if (jd->needs_bswap) {
+ prefix->id = bswap_32(prefix->id);
+ prefix->total_size = bswap_32(prefix->total_size);
+ prefix->timestamp = bswap_64(prefix->timestamp);
+ }
+ id = prefix->id;
+ size = prefix->total_size;
+
+ bs = (size_t)size;
+ if (bs < sizeof(*prefix))
+ return NULL;
+
+ if (id >= JIT_CODE_MAX) {
+ pr_warning("next_entry: unknown record type %d, skipping\n", id);
+ }
+ if (bs > jd->bufsize) {
+ void *n;
+ n = realloc(jd->buf, bs);
+ if (!n)
+ return NULL;
+ jd->buf = n;
+ jd->bufsize = bs;
+ }
+
+ addr = ((void *)jd->buf) + sizeof(*prefix);
+
+ ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+ if (ret != 1)
+ return NULL;
+
+ jr = (union jr_entry *)jd->buf;
+
+ switch(id) {
+ case JIT_CODE_DEBUG_INFO:
+ if (jd->needs_bswap) {
+ uint64_t n;
+ jr->info.code_addr = bswap_64(jr->info.code_addr);
+ jr->info.nr_entry = bswap_64(jr->info.nr_entry);
+ for (n = 0 ; n < jr->info.nr_entry; n++) {
+ jr->info.entries[n].addr = bswap_64(jr->info.entries[n].addr);
+ jr->info.entries[n].lineno = bswap_32(jr->info.entries[n].lineno);
+ jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+ }
+ }
+ break;
+ case JIT_CODE_UNWINDING_INFO:
+ if (jd->needs_bswap) {
+ jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size);
+ jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size);
+ jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size);
+ }
+ break;
+ case JIT_CODE_CLOSE:
+ break;
+ case JIT_CODE_LOAD:
+ if (jd->needs_bswap) {
+ jr->load.pid = bswap_32(jr->load.pid);
+ jr->load.tid = bswap_32(jr->load.tid);
+ jr->load.vma = bswap_64(jr->load.vma);
+ jr->load.code_addr = bswap_64(jr->load.code_addr);
+ jr->load.code_size = bswap_64(jr->load.code_size);
+ jr->load.code_index= bswap_64(jr->load.code_index);
+ }
+ jd->code_load_count++;
+ break;
+ case JIT_CODE_MOVE:
+ if (jd->needs_bswap) {
+ jr->move.pid = bswap_32(jr->move.pid);
+ jr->move.tid = bswap_32(jr->move.tid);
+ jr->move.vma = bswap_64(jr->move.vma);
+ jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+ jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+ jr->move.code_size = bswap_64(jr->move.code_size);
+ jr->move.code_index = bswap_64(jr->move.code_index);
+ }
+ break;
+ case JIT_CODE_MAX:
+ default:
+ /* skip unknown record (we have read them) */
+ break;
+ }
+ return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+ ssize_t size;
+
+ size = perf_data__write(jd->output, event, event->header.size);
+ if (size < 0)
+ return -1;
+
+ jd->bytes_written += size;
+ return 0;
+}
+
+static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+ return nsinfo__tgid(jd->nsi);
+ return jr->load.pid;
+}
+
+static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+ return nsinfo__pid(jd->nsi);
+ return jr->load.tid;
+}
+
+static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
+{
+ struct perf_tsc_conversion tc = { .time_shift = 0, };
+ struct perf_record_time_conv *time_conv = &jd->session->time_conv;
+
+ if (!jd->use_arch_timestamp)
+ return timestamp;
+
+ tc.time_shift = time_conv->time_shift;
+ tc.time_mult = time_conv->time_mult;
+ tc.time_zero = time_conv->time_zero;
+
+ /*
+ * The event TIME_CONV was extended for the fields from "time_cycles"
+ * when supported cap_user_time_short, for backward compatibility,
+ * checks the event size and assigns these extended fields if these
+ * fields are contained in the event.
+ */
+ if (event_contains(*time_conv, time_cycles)) {
+ tc.time_cycles = time_conv->time_cycles;
+ tc.time_mask = time_conv->time_mask;
+ tc.cap_user_time_zero = time_conv->cap_user_time_zero;
+ tc.cap_user_time_short = time_conv->cap_user_time_short;
+
+ if (!tc.cap_user_time_zero)
+ return 0;
+ }
+
+ return tsc_to_perf_time(timestamp, &tc);
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ struct perf_sample sample;
+ union perf_event *event;
+ struct perf_tool *tool = jd->session->tool;
+ uint64_t code, addr;
+ uintptr_t uaddr;
+ char *filename;
+ struct stat st;
+ size_t size;
+ u16 idr_size;
+ const char *sym;
+ uint64_t count;
+ int ret, csize, usize;
+ pid_t nspid, pid, tid;
+ struct {
+ u32 pid, tid;
+ u64 time;
+ } *id;
+
+ nspid = jr->load.pid;
+ pid = jr_entry_pid(jd, jr);
+ tid = jr_entry_tid(jd, jr);
+ csize = jr->load.code_size;
+ usize = jd->unwinding_mapped_size;
+ addr = jr->load.code_addr;
+ sym = (void *)((unsigned long)jr + sizeof(jr->load));
+ code = (unsigned long)jr + jr->load.p.total_size - csize;
+ count = jr->load.code_index;
+ idr_size = jd->machine->id_hdr_size;
+
+ event = calloc(1, sizeof(*event) + idr_size);
+ if (!event)
+ return -1;
+
+ filename = event->mmap2.filename;
+ size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
+ jd->dir,
+ nspid,
+ count);
+
+ size++; /* for \0 */
+
+ size = PERF_ALIGN(size, sizeof(u64));
+ uaddr = (uintptr_t)code;
+ ret = jit_emit_elf(jd, filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries,
+ jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size);
+
+ if (jd->debug_data && jd->nr_debug_entries) {
+ zfree(&jd->debug_data);
+ jd->nr_debug_entries = 0;
+ }
+
+ if (jd->unwinding_data && jd->eh_frame_hdr_size) {
+ zfree(&jd->unwinding_data);
+ jd->eh_frame_hdr_size = 0;
+ jd->unwinding_mapped_size = 0;
+ jd->unwinding_size = 0;
+ }
+
+ if (ret) {
+ free(event);
+ return -1;
+ }
+ if (nsinfo__stat(filename, &st, jd->nsi))
+ memset(&st, 0, sizeof(st));
+
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size) + idr_size);
+
+ event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+ event->mmap2.start = addr;
+ event->mmap2.len = usize ? ALIGN_8(csize) + usize : csize;
+ event->mmap2.pid = pid;
+ event->mmap2.tid = tid;
+ event->mmap2.ino = st.st_ino;
+ event->mmap2.maj = major(st.st_dev);
+ event->mmap2.min = minor(st.st_dev);
+ event->mmap2.prot = st.st_mode;
+ event->mmap2.flags = MAP_SHARED;
+ event->mmap2.ino_generation = 1;
+
+ id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+ if (jd->sample_type & PERF_SAMPLE_TID) {
+ id->pid = pid;
+ id->tid = tid;
+ }
+ if (jd->sample_type & PERF_SAMPLE_TIME)
+ id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+ /*
+ * create pseudo sample to induce dso hit increment
+ * use first address as sample address
+ */
+ memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.pid = pid;
+ sample.tid = tid;
+ sample.time = id->time;
+ sample.ip = addr;
+
+ ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+ if (ret)
+ goto out;
+
+ ret = jit_inject_event(jd, event);
+ /*
+ * mark dso as use to generate buildid in the header
+ */
+ if (!ret)
+ build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+out:
+ free(event);
+ return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ struct perf_sample sample;
+ union perf_event *event;
+ struct perf_tool *tool = jd->session->tool;
+ char *filename;
+ size_t size;
+ struct stat st;
+ int usize;
+ u16 idr_size;
+ int ret;
+ pid_t nspid, pid, tid;
+ struct {
+ u32 pid, tid;
+ u64 time;
+ } *id;
+
+ nspid = jr->load.pid;
+ pid = jr_entry_pid(jd, jr);
+ tid = jr_entry_tid(jd, jr);
+ usize = jd->unwinding_mapped_size;
+ idr_size = jd->machine->id_hdr_size;
+
+ /*
+ * +16 to account for sample_id_all (hack)
+ */
+ event = calloc(1, sizeof(*event) + 16);
+ if (!event)
+ return -1;
+
+ filename = event->mmap2.filename;
+ size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
+ jd->dir,
+ nspid,
+ jr->move.code_index);
+
+ size++; /* for \0 */
+
+ if (nsinfo__stat(filename, &st, jd->nsi))
+ memset(&st, 0, sizeof(st));
+
+ size = PERF_ALIGN(size, sizeof(u64));
+
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size) + idr_size);
+ event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+ event->mmap2.start = jr->move.new_code_addr;
+ event->mmap2.len = usize ? ALIGN_8(jr->move.code_size) + usize
+ : jr->move.code_size;
+ event->mmap2.pid = pid;
+ event->mmap2.tid = tid;
+ event->mmap2.ino = st.st_ino;
+ event->mmap2.maj = major(st.st_dev);
+ event->mmap2.min = minor(st.st_dev);
+ event->mmap2.prot = st.st_mode;
+ event->mmap2.flags = MAP_SHARED;
+ event->mmap2.ino_generation = 1;
+
+ id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+ if (jd->sample_type & PERF_SAMPLE_TID) {
+ id->pid = pid;
+ id->tid = tid;
+ }
+ if (jd->sample_type & PERF_SAMPLE_TIME)
+ id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+ /*
+ * create pseudo sample to induce dso hit increment
+ * use first address as sample address
+ */
+ memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.pid = pid;
+ sample.tid = tid;
+ sample.time = id->time;
+ sample.ip = jr->move.new_code_addr;
+
+ ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+ if (ret)
+ return ret;
+
+ ret = jit_inject_event(jd, event);
+ if (!ret)
+ build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+ return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ void *data;
+ size_t sz;
+
+ if (!(jd && jr))
+ return -1;
+
+ sz = jr->prefix.total_size - sizeof(jr->info);
+ data = malloc(sz);
+ if (!data)
+ return -1;
+
+ memcpy(data, &jr->info.entries, sz);
+
+ jd->debug_data = data;
+
+ /*
+ * we must use nr_entry instead of size here because
+ * we cannot distinguish actual entry from padding otherwise
+ */
+ jd->nr_debug_entries = jr->info.nr_entry;
+
+ return 0;
+}
+
+static int
+jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ void *unwinding_data;
+ uint32_t unwinding_data_size;
+
+ if (!(jd && jr))
+ return -1;
+
+ unwinding_data_size = jr->prefix.total_size - sizeof(jr->unwinding);
+ unwinding_data = malloc(unwinding_data_size);
+ if (!unwinding_data)
+ return -1;
+
+ memcpy(unwinding_data, &jr->unwinding.unwinding_data,
+ unwinding_data_size);
+
+ jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size;
+ jd->unwinding_size = jr->unwinding.unwinding_size;
+ jd->unwinding_mapped_size = jr->unwinding.mapped_size;
+ jd->unwinding_data = unwinding_data;
+
+ return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+ union jr_entry *jr;
+ int ret = 0;
+
+ while ((jr = jit_get_next_entry(jd))) {
+ switch(jr->prefix.id) {
+ case JIT_CODE_LOAD:
+ ret = jit_repipe_code_load(jd, jr);
+ break;
+ case JIT_CODE_MOVE:
+ ret = jit_repipe_code_move(jd, jr);
+ break;
+ case JIT_CODE_DEBUG_INFO:
+ ret = jit_repipe_debug_info(jd, jr);
+ break;
+ case JIT_CODE_UNWINDING_INFO:
+ ret = jit_repipe_unwinding_info(jd, jr);
+ break;
+ default:
+ ret = 0;
+ continue;
+ }
+ }
+ return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+ int ret;
+
+ if (verbose > 0)
+ fprintf(stderr, "injecting: %s\n", path);
+
+ ret = jit_open(jd, path);
+ if (ret)
+ return -1;
+
+ ret = jit_process_dump(jd);
+
+ jit_close(jd);
+
+ if (verbose > 0)
+ fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+ return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi)
+ {
+ char *p;
+ char *end = NULL;
+ pid_t pid2;
+
+ if (verbose > 2)
+ fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+ /*
+ * get file name
+ */
+ p = strrchr(mmap_name, '/');
+ if (!p)
+ return -1;
+
+ /*
+ * match prefix
+ */
+ if (strncmp(p, "/jit-", 5))
+ return -1;
+
+ /*
+ * skip prefix
+ */
+ p += 5;
+
+ /*
+ * must be followed by a pid
+ */
+ if (!isdigit(*p))
+ return -1;
+
+ pid2 = (int)strtol(p, &end, 10);
+ if (!end)
+ return -1;
+
+ /*
+ * pid does not match mmap pid
+ * pid==0 in system-wide mode (synthesized)
+ */
+ if (pid && pid2 != nsinfo__nstgid(nsi))
+ return -1;
+ /*
+ * validate suffix
+ */
+ if (strcmp(end, ".dump"))
+ return -1;
+
+ if (verbose > 0)
+ fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+ return 0;
+}
+
+static void jit_add_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__findnew_thread(machine, pid, pid);
+
+ if (!thread) {
+ pr_err("%s: thread %d not found or created\n", __func__, pid);
+ return;
+ }
+
+ thread__set_priv(thread, (void *)true);
+ thread__put(thread);
+}
+
+static bool jit_has_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__find_thread(machine, pid, pid);
+ void *priv;
+
+ if (!thread)
+ return false;
+
+ priv = thread__priv(thread);
+ thread__put(thread);
+ return (bool)priv;
+}
+
+int
+jit_process(struct perf_session *session,
+ struct perf_data *output,
+ struct machine *machine,
+ char *filename,
+ pid_t pid,
+ pid_t tid,
+ u64 *nbytes)
+{
+ struct thread *thread;
+ struct nsinfo *nsi;
+ struct evsel *first;
+ struct jit_buf_desc jd;
+ int ret;
+
+ thread = machine__findnew_thread(machine, pid, tid);
+ if (thread == NULL) {
+ pr_err("problem processing JIT mmap event, skipping it.\n");
+ return 0;
+ }
+
+ nsi = nsinfo__get(thread__nsinfo(thread));
+ thread__put(thread);
+
+ /*
+ * first, detect marker mmap (i.e., the jitdump mmap)
+ */
+ if (jit_detect(filename, pid, nsi)) {
+ nsinfo__put(nsi);
+
+ /*
+ * Strip //anon*, [anon:* and /memfd:* mmaps if we processed a jitdump for this pid
+ */
+ if (jit_has_pid(machine, pid) &&
+ ((strncmp(filename, "//anon", 6) == 0) ||
+ (strncmp(filename, "[anon:", 6) == 0) ||
+ (strncmp(filename, "/memfd:", 7) == 0)))
+ return 1;
+
+ return 0;
+ }
+
+ memset(&jd, 0, sizeof(jd));
+
+ jd.session = session;
+ jd.output = output;
+ jd.machine = machine;
+ jd.nsi = nsi;
+
+ /*
+ * track sample_type to compute id_all layout
+ * perf sets the same sample type to all events as of now
+ */
+ first = evlist__first(session->evlist);
+ jd.sample_type = first->core.attr.sample_type;
+
+ *nbytes = 0;
+
+ ret = jit_inject(&jd, filename);
+ if (!ret) {
+ jit_add_pid(machine, pid);
+ *nbytes = jd.bytes_written;
+ ret = 1;
+ }
+
+ nsinfo__put(jd.nsi);
+ free(jd.buf);
+
+ return ret;
+}
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
new file mode 100644
index 000000000..ab2842def
--- /dev/null
+++ b/tools/perf/util/jitdump.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC 0x4A695444
+#define JITHEADER_MAGIC_SW 0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+#define ALIGN_8(x) (((x) + 7) & (~7))
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+ JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT,
+ JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT)
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+ (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+ uint32_t magic; /* characters "jItD" */
+ uint32_t version; /* header version */
+ uint32_t total_size; /* total size of header */
+ uint32_t elf_mach; /* elf mach target */
+ uint32_t pad1; /* reserved */
+ uint32_t pid; /* JIT process id */
+ uint64_t timestamp; /* timestamp */
+ uint64_t flags; /* flags */
+};
+
+enum jit_record_type {
+ JIT_CODE_LOAD = 0,
+ JIT_CODE_MOVE = 1,
+ JIT_CODE_DEBUG_INFO = 2,
+ JIT_CODE_CLOSE = 3,
+ JIT_CODE_UNWINDING_INFO = 4,
+
+ JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+ uint32_t id;
+ uint32_t total_size;
+ uint64_t timestamp;
+};
+
+struct jr_code_load {
+ struct jr_prefix p;
+
+ uint32_t pid;
+ uint32_t tid;
+ uint64_t vma;
+ uint64_t code_addr;
+ uint64_t code_size;
+ uint64_t code_index;
+};
+
+struct jr_code_close {
+ struct jr_prefix p;
+};
+
+struct jr_code_move {
+ struct jr_prefix p;
+
+ uint32_t pid;
+ uint32_t tid;
+ uint64_t vma;
+ uint64_t old_code_addr;
+ uint64_t new_code_addr;
+ uint64_t code_size;
+ uint64_t code_index;
+};
+
+struct debug_entry {
+ uint64_t addr;
+ int lineno; /* source line number starting at 1 */
+ int discrim; /* column discriminator, 0 is default */
+ const char name[]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+ struct jr_prefix p;
+
+ uint64_t code_addr;
+ uint64_t nr_entry;
+ struct debug_entry entries[];
+};
+
+struct jr_code_unwinding_info {
+ struct jr_prefix p;
+
+ uint64_t unwinding_size;
+ uint64_t eh_frame_hdr_size;
+ uint64_t mapped_size;
+ const char unwinding_data[];
+};
+
+union jr_entry {
+ struct jr_code_debug_info info;
+ struct jr_code_close close;
+ struct jr_code_load load;
+ struct jr_code_move move;
+ struct jr_prefix prefix;
+ struct jr_code_unwinding_info unwinding;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+ void *a = ent + 1;
+ size_t l = strlen(ent->name) + 1;
+ return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+ void *a = ent + 1;
+ return a;
+}
+
+#endif /* !JITDUMP_H */
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
new file mode 100644
index 000000000..3e9ac754c
--- /dev/null
+++ b/tools/perf/util/kvm-stat.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_KVM_STAT_H
+#define __PERF_KVM_STAT_H
+
+#ifdef HAVE_KVM_STAT_SUPPORT
+
+#include "tool.h"
+#include "sort.h"
+#include "stat.h"
+#include "symbol.h"
+#include "record.h"
+
+#include <stdlib.h>
+#include <linux/zalloc.h>
+
+#define KVM_EVENT_NAME_LEN 40
+
+struct evsel;
+struct evlist;
+struct perf_session;
+
+struct event_key {
+ #define INVALID_KEY (~0ULL)
+ u64 key;
+ int info;
+ struct exit_reasons_table *exit_reasons;
+};
+
+struct kvm_info {
+ char name[KVM_EVENT_NAME_LEN];
+ refcount_t refcnt;
+};
+
+struct kvm_event_stats {
+ u64 time;
+ struct stats stats;
+};
+
+struct perf_kvm_stat;
+
+struct kvm_event {
+ struct list_head hash_entry;
+
+ struct perf_kvm_stat *perf_kvm;
+ struct event_key key;
+
+ struct kvm_event_stats total;
+
+ #define DEFAULT_VCPU_NUM 8
+ int max_vcpu;
+ struct kvm_event_stats *vcpu;
+
+ struct hist_entry he;
+};
+
+struct child_event_ops {
+ void (*get_key)(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+ const char *name;
+};
+
+struct kvm_events_ops {
+ bool (*is_begin_event)(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+ bool (*is_end_event)(struct evsel *evsel,
+ struct perf_sample *sample, struct event_key *key);
+ struct child_event_ops *child_ops;
+ void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
+ char *decode);
+ const char *name;
+};
+
+struct exit_reasons_table {
+ unsigned long exit_code;
+ const char *reason;
+};
+
+struct perf_kvm_stat {
+ struct perf_tool tool;
+ struct record_opts opts;
+ struct evlist *evlist;
+ struct perf_session *session;
+
+ const char *file_name;
+ const char *report_event;
+ const char *sort_key;
+ int trace_vcpu;
+
+ /* Used when process events */
+ struct addr_location al;
+
+ struct exit_reasons_table *exit_reasons;
+ const char *exit_reasons_isa;
+
+ struct kvm_events_ops *events_ops;
+
+ u64 total_time;
+ u64 total_count;
+ u64 lost_events;
+ u64 duration;
+
+ struct intlist *pid_list;
+
+ int timerfd;
+ unsigned int display_time;
+ bool live;
+ bool force;
+ bool use_stdio;
+};
+
+struct kvm_reg_events_ops {
+ const char *name;
+ struct kvm_events_ops *ops;
+};
+
+void exit_event_get_key(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+bool exit_event_begin(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+bool exit_event_end(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+ struct event_key *key,
+ char *decode);
+
+bool kvm_exit_event(struct evsel *evsel);
+bool kvm_entry_event(struct evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
+
+#define define_exit_reasons_table(name, symbols) \
+ static struct exit_reasons_table name[] = { \
+ symbols, { -1, NULL } \
+ }
+
+/*
+ * arch specific callbacks and data structures
+ */
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
+
+extern const char *kvm_events_tp[];
+extern struct kvm_reg_events_ops kvm_reg_events_ops[];
+extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
+
+static inline struct kvm_info *kvm_info__get(struct kvm_info *ki)
+{
+ if (ki)
+ refcount_inc(&ki->refcnt);
+ return ki;
+}
+
+static inline void kvm_info__put(struct kvm_info *ki)
+{
+ if (ki && refcount_dec_and_test(&ki->refcnt))
+ free(ki);
+}
+
+static inline void __kvm_info__zput(struct kvm_info **ki)
+{
+ kvm_info__put(*ki);
+ *ki = NULL;
+}
+
+#define kvm_info__zput(ki) __kvm_info__zput(&ki)
+
+static inline struct kvm_info *kvm_info__new(void)
+{
+ struct kvm_info *ki;
+
+ ki = zalloc(sizeof(*ki));
+ if (ki)
+ refcount_set(&ki->refcnt, 1);
+
+ return ki;
+}
+
+#else /* HAVE_KVM_STAT_SUPPORT */
+// We use this unconditionally in hists__findnew_entry() and hist_entry__delete()
+#define kvm_info__zput(ki) do { } while (0)
+#endif /* HAVE_KVM_STAT_SUPPORT */
+
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
+#endif /* __PERF_KVM_STAT_H */
diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h
new file mode 100644
index 000000000..53b732755
--- /dev/null
+++ b/tools/perf/util/kwork.h
@@ -0,0 +1,257 @@
+#ifndef PERF_UTIL_KWORK_H
+#define PERF_UTIL_KWORK_H
+
+#include "util/tool.h"
+#include "util/time-utils.h"
+
+#include <linux/bitmap.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct perf_sample;
+struct perf_session;
+
+enum kwork_class_type {
+ KWORK_CLASS_IRQ,
+ KWORK_CLASS_SOFTIRQ,
+ KWORK_CLASS_WORKQUEUE,
+ KWORK_CLASS_MAX,
+};
+
+enum kwork_report_type {
+ KWORK_REPORT_RUNTIME,
+ KWORK_REPORT_LATENCY,
+ KWORK_REPORT_TIMEHIST,
+};
+
+enum kwork_trace_type {
+ KWORK_TRACE_RAISE,
+ KWORK_TRACE_ENTRY,
+ KWORK_TRACE_EXIT,
+ KWORK_TRACE_MAX,
+};
+
+/*
+ * data structure:
+ *
+ * +==================+ +============+ +======================+
+ * | class | | work | | atom |
+ * +==================+ +============+ +======================+
+ * +------------+ | +-----+ | | +------+ | | +-------+ +-----+ |
+ * | perf_kwork | +-> | irq | --------|+-> | eth0 | --+-> | raise | - | ... | --+ +-----------+
+ * +-----+------+ || +-----+ ||| +------+ ||| +-------+ +-----+ | | | |
+ * | || ||| ||| | +-> | atom_page |
+ * | || ||| ||| +-------+ +-----+ | | |
+ * | class_list ||| |+-> | entry | - | ... | ----> | |
+ * | || ||| ||| +-------+ +-----+ | | |
+ * | || ||| ||| | +-> | |
+ * | || ||| ||| +-------+ +-----+ | | | |
+ * | || ||| |+-> | exit | - | ... | --+ +-----+-----+
+ * | || ||| | | +-------+ +-----+ | |
+ * | || ||| | | | |
+ * | || ||| +-----+ | | | |
+ * | || |+-> | ... | | | | |
+ * | || | | +-----+ | | | |
+ * | || | | | | | |
+ * | || +---------+ | | +-----+ | | +-------+ +-----+ | |
+ * | +-> | softirq | -------> | RCU | ---+-> | raise | - | ... | --+ +-----+-----+
+ * | || +---------+ | | +-----+ ||| +-------+ +-----+ | | | |
+ * | || | | ||| | +-> | atom_page |
+ * | || | | ||| +-------+ +-----+ | | |
+ * | || | | |+-> | entry | - | ... | ----> | |
+ * | || | | ||| +-------+ +-----+ | | |
+ * | || | | ||| | +-> | |
+ * | || | | ||| +-------+ +-----+ | | | |
+ * | || | | |+-> | exit | - | ... | --+ +-----+-----+
+ * | || | | | | +-------+ +-----+ | |
+ * | || | | | | | |
+ * | || +-----------+ | | +-----+ | | | |
+ * | +-> | workqueue | -----> | ... | | | | |
+ * | | +-----------+ | | +-----+ | | | |
+ * | +==================+ +============+ +======================+ |
+ * | |
+ * +----> atom_page_list ---------------------------------------------------------+
+ *
+ */
+
+struct kwork_atom {
+ struct list_head list;
+ u64 time;
+ struct kwork_atom *prev;
+
+ void *page_addr;
+ unsigned long bit_inpage;
+};
+
+#define NR_ATOM_PER_PAGE 128
+struct kwork_atom_page {
+ struct list_head list;
+ struct kwork_atom atoms[NR_ATOM_PER_PAGE];
+ DECLARE_BITMAP(bitmap, NR_ATOM_PER_PAGE);
+};
+
+struct kwork_class;
+struct kwork_work {
+ /*
+ * class field
+ */
+ struct rb_node node;
+ struct kwork_class *class;
+
+ /*
+ * work field
+ */
+ u64 id;
+ int cpu;
+ char *name;
+
+ /*
+ * atom field
+ */
+ u64 nr_atoms;
+ struct list_head atom_list[KWORK_TRACE_MAX];
+
+ /*
+ * runtime report
+ */
+ u64 max_runtime;
+ u64 max_runtime_start;
+ u64 max_runtime_end;
+ u64 total_runtime;
+
+ /*
+ * latency report
+ */
+ u64 max_latency;
+ u64 max_latency_start;
+ u64 max_latency_end;
+ u64 total_latency;
+};
+
+struct kwork_class {
+ struct list_head list;
+ const char *name;
+ enum kwork_class_type type;
+
+ unsigned int nr_tracepoints;
+ const struct evsel_str_handler *tp_handlers;
+
+ struct rb_root_cached work_root;
+
+ int (*class_init)(struct kwork_class *class,
+ struct perf_session *session);
+
+ void (*work_init)(struct kwork_class *class,
+ struct kwork_work *work,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine);
+
+ void (*work_name)(struct kwork_work *work,
+ char *buf, int len);
+};
+
+struct perf_kwork;
+struct trace_kwork_handler {
+ int (*raise_event)(struct perf_kwork *kwork,
+ struct kwork_class *class, struct evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
+ int (*entry_event)(struct perf_kwork *kwork,
+ struct kwork_class *class, struct evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
+ int (*exit_event)(struct perf_kwork *kwork,
+ struct kwork_class *class, struct evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+};
+
+struct perf_kwork {
+ /*
+ * metadata
+ */
+ struct perf_tool tool;
+ struct list_head class_list;
+ struct list_head atom_page_list;
+ struct list_head sort_list, cmp_id;
+ struct rb_root_cached sorted_work_root;
+ const struct trace_kwork_handler *tp_handler;
+
+ /*
+ * profile filters
+ */
+ const char *profile_name;
+
+ const char *cpu_list;
+ DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
+ const char *time_str;
+ struct perf_time_interval ptime;
+
+ /*
+ * options for command
+ */
+ bool force;
+ const char *event_list_str;
+ enum kwork_report_type report;
+
+ /*
+ * options for subcommand
+ */
+ bool summary;
+ const char *sort_order;
+ bool show_callchain;
+ unsigned int max_stack;
+ bool use_bpf;
+
+ /*
+ * statistics
+ */
+ u64 timestart;
+ u64 timeend;
+
+ unsigned long nr_events;
+ unsigned long nr_lost_chunks;
+ unsigned long nr_lost_events;
+
+ u64 all_runtime;
+ u64 all_count;
+ u64 nr_skipped_events[KWORK_TRACE_MAX + 1];
+};
+
+struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct kwork_work *key);
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork);
+int perf_kwork__report_read_bpf(struct perf_kwork *kwork);
+void perf_kwork__report_cleanup_bpf(void);
+
+void perf_kwork__trace_start(void);
+void perf_kwork__trace_finish(void);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_kwork__report_read_bpf(struct perf_kwork *kwork __maybe_unused)
+{
+ return -1;
+}
+
+static inline void perf_kwork__report_cleanup_bpf(void) {}
+
+static inline void perf_kwork__trace_start(void) {}
+static inline void perf_kwork__trace_finish(void) {}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* PERF_UTIL_KWORK_H */
diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c
new file mode 100644
index 000000000..6a6712635
--- /dev/null
+++ b/tools/perf/util/levenshtein.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "levenshtein.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings. To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substitution, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+ int w, int s, int a, int d)
+{
+ int len1 = strlen(string1), len2 = strlen(string2);
+ int *row0 = malloc(sizeof(int) * (len2 + 1));
+ int *row1 = malloc(sizeof(int) * (len2 + 1));
+ int *row2 = malloc(sizeof(int) * (len2 + 1));
+ int i, j;
+
+ for (j = 0; j <= len2; j++)
+ row1[j] = j * a;
+ for (i = 0; i < len1; i++) {
+ int *dummy;
+
+ row2[0] = (i + 1) * d;
+ for (j = 0; j < len2; j++) {
+ /* substitution */
+ row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+ /* swap */
+ if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+ string1[i] == string2[j - 1] &&
+ row2[j + 1] > row0[j - 1] + w)
+ row2[j + 1] = row0[j - 1] + w;
+ /* deletion */
+ if (row2[j + 1] > row1[j + 1] + d)
+ row2[j + 1] = row1[j + 1] + d;
+ /* insertion */
+ if (row2[j + 1] > row2[j] + a)
+ row2[j + 1] = row2[j] + a;
+ }
+
+ dummy = row0;
+ row0 = row1;
+ row1 = row2;
+ row2 = dummy;
+ }
+
+ i = row1[len2];
+ free(row0);
+ free(row1);
+ free(row2);
+
+ return i;
+}
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h
new file mode 100644
index 000000000..34ca173c5
--- /dev/null
+++ b/tools/perf/util/levenshtein.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LEVENSHTEIN_H
+#define __PERF_LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+ int swap_penalty, int substition_penalty,
+ int insertion_penalty, int deletion_penalty);
+
+#endif /* __PERF_LEVENSHTEIN_H */
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
new file mode 100644
index 000000000..37ecef0c5
--- /dev/null
+++ b/tools/perf/util/libunwind/arm64.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the definition of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+
+#include "unwind.h"
+#include "libunwind-aarch64.h"
+#define perf_event_arm_regs perf_event_arm64_regs
+#include <../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#undef perf_event_arm_regs
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c
new file mode 100644
index 000000000..1697dece1
--- /dev/null
+++ b/tools/perf/util/libunwind/x86_32.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the definition of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+
+#include "unwind.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
new file mode 100644
index 000000000..fa16532c9
--- /dev/null
+++ b/tools/perf/util/lock-contention.h
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_LOCK_CONTENTION_H
+#define PERF_LOCK_CONTENTION_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct lock_filter {
+ int nr_types;
+ int nr_addrs;
+ int nr_syms;
+ unsigned int *types;
+ unsigned long *addrs;
+ char **syms;
+};
+
+struct lock_stat {
+ struct hlist_node hash_entry;
+ struct rb_node rb; /* used for sorting */
+
+ u64 addr; /* address of lockdep_map, used as ID */
+ char *name; /* for strcpy(), we cannot use const */
+ u64 *callstack;
+
+ unsigned int nr_acquire;
+ unsigned int nr_acquired;
+ unsigned int nr_contended;
+ unsigned int nr_release;
+
+ union {
+ unsigned int nr_readlock;
+ unsigned int flags;
+ };
+ unsigned int nr_trylock;
+
+ /* these times are in nano sec. */
+ u64 avg_wait_time;
+ u64 wait_time_total;
+ u64 wait_time_min;
+ u64 wait_time_max;
+
+ int broken; /* flag of blacklist */
+ int combined;
+};
+
+/*
+ * States of lock_seq_stat
+ *
+ * UNINITIALIZED is required for detecting first event of acquire.
+ * As the nature of lock events, there is no guarantee
+ * that the first event for the locks are acquire,
+ * it can be acquired, contended or release.
+ */
+#define SEQ_STATE_UNINITIALIZED 0 /* initial state */
+#define SEQ_STATE_RELEASED 1
+#define SEQ_STATE_ACQUIRING 2
+#define SEQ_STATE_ACQUIRED 3
+#define SEQ_STATE_READ_ACQUIRED 4
+#define SEQ_STATE_CONTENDED 5
+
+/*
+ * MAX_LOCK_DEPTH
+ * Imported from include/linux/sched.h.
+ * Should this be synchronized?
+ */
+#define MAX_LOCK_DEPTH 48
+
+struct lock_stat *lock_stat_find(u64 addr);
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack);
+
+/*
+ * struct lock_seq_stat:
+ * Place to put on state of one lock sequence
+ * 1) acquire -> acquired -> release
+ * 2) acquire -> contended -> acquired -> release
+ * 3) acquire (with read or try) -> release
+ * 4) Are there other patterns?
+ */
+struct lock_seq_stat {
+ struct list_head list;
+ int state;
+ u64 prev_event_time;
+ u64 addr;
+
+ int read_count;
+};
+
+struct thread_stat {
+ struct rb_node rb;
+
+ u32 tid;
+ struct list_head seq_list;
+};
+
+/*
+ * CONTENTION_STACK_DEPTH
+ * Number of stack trace entries to find callers
+ */
+#define CONTENTION_STACK_DEPTH 8
+
+/*
+ * CONTENTION_STACK_SKIP
+ * Number of stack trace entries to skip when finding callers.
+ * The first few entries belong to the locking implementation itself.
+ */
+#define CONTENTION_STACK_SKIP 4
+
+/*
+ * flags for lock:contention_begin
+ * Imported from include/trace/events/lock.h.
+ */
+#define LCB_F_SPIN (1U << 0)
+#define LCB_F_READ (1U << 1)
+#define LCB_F_WRITE (1U << 2)
+#define LCB_F_RT (1U << 3)
+#define LCB_F_PERCPU (1U << 4)
+#define LCB_F_MUTEX (1U << 5)
+
+struct evlist;
+struct machine;
+struct target;
+
+struct lock_contention_fails {
+ int task;
+ int stack;
+ int time;
+ int data;
+};
+
+struct lock_contention {
+ struct evlist *evlist;
+ struct target *target;
+ struct machine *machine;
+ struct hlist_head *result;
+ struct lock_filter *filters;
+ struct lock_contention_fails fails;
+ unsigned long map_nr_entries;
+ int max_stack;
+ int stack_skip;
+ int aggr_mode;
+ int owner;
+ int nr_filtered;
+ bool save_callstack;
+};
+
+#ifdef HAVE_BPF_SKEL
+
+int lock_contention_prepare(struct lock_contention *con);
+int lock_contention_start(void);
+int lock_contention_stop(void);
+int lock_contention_read(struct lock_contention *con);
+int lock_contention_finish(void);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
+{
+ return 0;
+}
+
+static inline int lock_contention_start(void) { return 0; }
+static inline int lock_contention_stop(void) { return 0; }
+static inline int lock_contention_finish(void) { return 0; }
+
+static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
+{
+ return 0;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* PERF_LOCK_CONTENTION_H */
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
new file mode 100644
index 000000000..af9a97612
--- /dev/null
+++ b/tools/perf/util/lzma.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <lzma.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "compress.h"
+#include "debug.h"
+#include <string.h>
+#include <unistd.h>
+#include <internal/lib.h>
+
+#define BUFSIZE 8192
+
+static const char *lzma_strerror(lzma_ret ret)
+{
+ switch ((int) ret) {
+ case LZMA_MEM_ERROR:
+ return "Memory allocation failed";
+ case LZMA_OPTIONS_ERROR:
+ return "Unsupported decompressor flags";
+ case LZMA_FORMAT_ERROR:
+ return "The input is not in the .xz format";
+ case LZMA_DATA_ERROR:
+ return "Compressed file is corrupt";
+ case LZMA_BUF_ERROR:
+ return "Compressed file is truncated or otherwise corrupt";
+ default:
+ return "Unknown error, possibly a bug";
+ }
+}
+
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+ lzma_action action = LZMA_RUN;
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_ret ret;
+ int err = -1;
+
+ u8 buf_in[BUFSIZE];
+ u8 buf_out[BUFSIZE];
+ FILE *infile;
+
+ infile = fopen(input, "rb");
+ if (!infile) {
+ pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
+ return -1;
+ }
+
+ ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+ if (ret != LZMA_OK) {
+ pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret);
+ goto err_fclose;
+ }
+
+ strm.next_in = NULL;
+ strm.avail_in = 0;
+ strm.next_out = buf_out;
+ strm.avail_out = sizeof(buf_out);
+
+ while (1) {
+ if (strm.avail_in == 0 && !feof(infile)) {
+ strm.next_in = buf_in;
+ strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile);
+
+ if (ferror(infile)) {
+ pr_debug("lzma: read error: %s\n", strerror(errno));
+ goto err_lzma_end;
+ }
+
+ if (feof(infile))
+ action = LZMA_FINISH;
+ }
+
+ ret = lzma_code(&strm, action);
+
+ if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
+ ssize_t write_size = sizeof(buf_out) - strm.avail_out;
+
+ if (writen(output_fd, buf_out, write_size) != write_size) {
+ pr_debug("lzma: write error: %s\n", strerror(errno));
+ goto err_lzma_end;
+ }
+
+ strm.next_out = buf_out;
+ strm.avail_out = sizeof(buf_out);
+ }
+
+ if (ret != LZMA_OK) {
+ if (ret == LZMA_STREAM_END)
+ break;
+
+ pr_debug("lzma: failed %s\n", lzma_strerror(ret));
+ goto err_lzma_end;
+ }
+ }
+
+ err = 0;
+err_lzma_end:
+ lzma_end(&strm);
+err_fclose:
+ fclose(infile);
+ return err;
+}
+
+bool lzma_is_compressed(const char *input)
+{
+ int fd = open(input, O_RDONLY);
+ const uint8_t magic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+ char buf[6] = { 0 };
+ ssize_t rc;
+
+ if (fd < 0)
+ return -1;
+
+ rc = read(fd, buf, sizeof(buf));
+ close(fd);
+ return rc == sizeof(buf) ?
+ memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
new file mode 100644
index 000000000..e6a8d758f
--- /dev/null
+++ b/tools/perf/util/machine.c
@@ -0,0 +1,3453 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <stdlib.h>
+#include "callchain.h"
+#include "debug.h"
+#include "dso.h"
+#include "env.h"
+#include "event.h"
+#include "evsel.h"
+#include "hist.h"
+#include "machine.h"
+#include "map.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "mem-events.h"
+#include "path.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "sort.h"
+#include "strlist.h"
+#include "target.h"
+#include "thread.h"
+#include "util.h"
+#include "vdso.h"
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "unwind.h"
+#include "linux/hash.h"
+#include "asm/bug.h"
+#include "bpf-event.h"
+#include <internal/lib.h> // page_size
+#include "cgroup.h"
+#include "arm64-frame-pointer-unwind-support.h"
+
+#include <linux/ctype.h>
+#include <symbol/kallsyms.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
+ struct thread *th, bool lock);
+
+static struct dso *machine__kernel_dso(struct machine *machine)
+{
+ return map__dso(machine->vmlinux_map);
+}
+
+static void dsos__init(struct dsos *dsos)
+{
+ INIT_LIST_HEAD(&dsos->head);
+ dsos->root = RB_ROOT;
+ init_rwsem(&dsos->lock);
+}
+
+static void machine__threads_init(struct machine *machine)
+{
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ threads->entries = RB_ROOT_CACHED;
+ init_rwsem(&threads->lock);
+ threads->nr = 0;
+ INIT_LIST_HEAD(&threads->dead);
+ threads->last_match = NULL;
+ }
+}
+
+static int thread_rb_node__cmp_tid(const void *key, const struct rb_node *nd)
+{
+ int to_find = (int) *((pid_t *)key);
+
+ return to_find - (int)thread__tid(rb_entry(nd, struct thread_rb_node, rb_node)->thread);
+}
+
+static struct thread_rb_node *thread_rb_node__find(const struct thread *th,
+ struct rb_root *tree)
+{
+ pid_t to_find = thread__tid(th);
+ struct rb_node *nd = rb_find(&to_find, tree, thread_rb_node__cmp_tid);
+
+ return rb_entry(nd, struct thread_rb_node, rb_node);
+}
+
+static int machine__set_mmap_name(struct machine *machine)
+{
+ if (machine__is_host(machine))
+ machine->mmap_name = strdup("[kernel.kallsyms]");
+ else if (machine__is_default_guest(machine))
+ machine->mmap_name = strdup("[guest.kernel.kallsyms]");
+ else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
+ machine->pid) < 0)
+ machine->mmap_name = NULL;
+
+ return machine->mmap_name ? 0 : -ENOMEM;
+}
+
+static void thread__set_guest_comm(struct thread *thread, pid_t pid)
+{
+ char comm[64];
+
+ snprintf(comm, sizeof(comm), "[guest/%d]", pid);
+ thread__set_comm(thread, comm, 0);
+}
+
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
+{
+ int err = -ENOMEM;
+
+ memset(machine, 0, sizeof(*machine));
+ machine->kmaps = maps__new(machine);
+ if (machine->kmaps == NULL)
+ return -ENOMEM;
+
+ RB_CLEAR_NODE(&machine->rb_node);
+ dsos__init(&machine->dsos);
+
+ machine__threads_init(machine);
+
+ machine->vdso_info = NULL;
+ machine->env = NULL;
+
+ machine->pid = pid;
+
+ machine->id_hdr_size = 0;
+ machine->kptr_restrict_warned = false;
+ machine->comm_exec = false;
+ machine->kernel_start = 0;
+ machine->vmlinux_map = NULL;
+
+ machine->root_dir = strdup(root_dir);
+ if (machine->root_dir == NULL)
+ goto out;
+
+ if (machine__set_mmap_name(machine))
+ goto out;
+
+ if (pid != HOST_KERNEL_ID) {
+ struct thread *thread = machine__findnew_thread(machine, -1,
+ pid);
+
+ if (thread == NULL)
+ goto out;
+
+ thread__set_guest_comm(thread, pid);
+ thread__put(thread);
+ }
+
+ machine->current_tid = NULL;
+ err = 0;
+
+out:
+ if (err) {
+ zfree(&machine->kmaps);
+ zfree(&machine->root_dir);
+ zfree(&machine->mmap_name);
+ }
+ return 0;
+}
+
+struct machine *machine__new_host(void)
+{
+ struct machine *machine = malloc(sizeof(*machine));
+
+ if (machine != NULL) {
+ machine__init(machine, "", HOST_KERNEL_ID);
+
+ if (machine__create_kernel_maps(machine) < 0)
+ goto out_delete;
+ }
+
+ return machine;
+out_delete:
+ free(machine);
+ return NULL;
+}
+
+struct machine *machine__new_kallsyms(void)
+{
+ struct machine *machine = machine__new_host();
+ /*
+ * FIXME:
+ * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
+ * ask for not using the kcore parsing code, once this one is fixed
+ * to create a map per module.
+ */
+ if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
+ machine__delete(machine);
+ machine = NULL;
+ }
+
+ return machine;
+}
+
+static void dsos__purge(struct dsos *dsos)
+{
+ struct dso *pos, *n;
+
+ down_write(&dsos->lock);
+
+ list_for_each_entry_safe(pos, n, &dsos->head, node) {
+ RB_CLEAR_NODE(&pos->rb_node);
+ pos->root = NULL;
+ list_del_init(&pos->node);
+ dso__put(pos);
+ }
+
+ up_write(&dsos->lock);
+}
+
+static void dsos__exit(struct dsos *dsos)
+{
+ dsos__purge(dsos);
+ exit_rwsem(&dsos->lock);
+}
+
+void machine__delete_threads(struct machine *machine)
+{
+ struct rb_node *nd;
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ down_write(&threads->lock);
+ nd = rb_first_cached(&threads->entries);
+ while (nd) {
+ struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
+
+ nd = rb_next(nd);
+ __machine__remove_thread(machine, trb, trb->thread, false);
+ }
+ up_write(&threads->lock);
+ }
+}
+
+void machine__exit(struct machine *machine)
+{
+ int i;
+
+ if (machine == NULL)
+ return;
+
+ machine__destroy_kernel_maps(machine);
+ maps__zput(machine->kmaps);
+ dsos__exit(&machine->dsos);
+ machine__exit_vdso(machine);
+ zfree(&machine->root_dir);
+ zfree(&machine->mmap_name);
+ zfree(&machine->current_tid);
+ zfree(&machine->kallsyms_filename);
+
+ machine__delete_threads(machine);
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+
+ exit_rwsem(&threads->lock);
+ }
+}
+
+void machine__delete(struct machine *machine)
+{
+ if (machine) {
+ machine__exit(machine);
+ free(machine);
+ }
+}
+
+void machines__init(struct machines *machines)
+{
+ machine__init(&machines->host, "", HOST_KERNEL_ID);
+ machines->guests = RB_ROOT_CACHED;
+}
+
+void machines__exit(struct machines *machines)
+{
+ machine__exit(&machines->host);
+ /* XXX exit guest */
+}
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+ const char *root_dir)
+{
+ struct rb_node **p = &machines->guests.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct machine *pos, *machine = malloc(sizeof(*machine));
+ bool leftmost = true;
+
+ if (machine == NULL)
+ return NULL;
+
+ if (machine__init(machine, root_dir, pid) != 0) {
+ free(machine);
+ return NULL;
+ }
+
+ while (*p != NULL) {
+ parent = *p;
+ pos = rb_entry(parent, struct machine, rb_node);
+ if (pid < pos->pid)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node(&machine->rb_node, parent, p);
+ rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost);
+
+ machine->machines = machines;
+
+ return machine;
+}
+
+void machines__set_comm_exec(struct machines *machines, bool comm_exec)
+{
+ struct rb_node *nd;
+
+ machines->host.comm_exec = comm_exec;
+
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+ machine->comm_exec = comm_exec;
+ }
+}
+
+struct machine *machines__find(struct machines *machines, pid_t pid)
+{
+ struct rb_node **p = &machines->guests.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct machine *machine;
+ struct machine *default_machine = NULL;
+
+ if (pid == HOST_KERNEL_ID)
+ return &machines->host;
+
+ while (*p != NULL) {
+ parent = *p;
+ machine = rb_entry(parent, struct machine, rb_node);
+ if (pid < machine->pid)
+ p = &(*p)->rb_left;
+ else if (pid > machine->pid)
+ p = &(*p)->rb_right;
+ else
+ return machine;
+ if (!machine->pid)
+ default_machine = machine;
+ }
+
+ return default_machine;
+}
+
+struct machine *machines__findnew(struct machines *machines, pid_t pid)
+{
+ char path[PATH_MAX];
+ const char *root_dir = "";
+ struct machine *machine = machines__find(machines, pid);
+
+ if (machine && (machine->pid == pid))
+ goto out;
+
+ if ((pid != HOST_KERNEL_ID) &&
+ (pid != DEFAULT_GUEST_KERNEL_ID) &&
+ (symbol_conf.guestmount)) {
+ sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
+ if (access(path, R_OK)) {
+ static struct strlist *seen;
+
+ if (!seen)
+ seen = strlist__new(NULL, NULL);
+
+ if (!strlist__has_entry(seen, path)) {
+ pr_err("Can't access file %s\n", path);
+ strlist__add(seen, path);
+ }
+ machine = NULL;
+ goto out;
+ }
+ root_dir = path;
+ }
+
+ machine = machines__add(machines, pid, root_dir);
+out:
+ return machine;
+}
+
+struct machine *machines__find_guest(struct machines *machines, pid_t pid)
+{
+ struct machine *machine = machines__find(machines, pid);
+
+ if (!machine)
+ machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
+ return machine;
+}
+
+/*
+ * A common case for KVM test programs is that the test program acts as the
+ * hypervisor, creating, running and destroying the virtual machine, and
+ * providing the guest object code from its own object code. In this case,
+ * the VM is not running an OS, but only the functions loaded into it by the
+ * hypervisor test program, and conveniently, loaded at the same virtual
+ * addresses.
+ *
+ * Normally to resolve addresses, MMAP events are needed to map addresses
+ * back to the object code and debug symbols for that object code.
+ *
+ * Currently, there is no way to get such mapping information from guests
+ * but, in the scenario described above, the guest has the same mappings
+ * as the hypervisor, so support for that scenario can be achieved.
+ *
+ * To support that, copy the host thread's maps to the guest thread's maps.
+ * Note, we do not discover the guest until we encounter a guest event,
+ * which works well because it is not until then that we know that the host
+ * thread's maps have been set up.
+ *
+ * This function returns the guest thread. Apart from keeping the data
+ * structures sane, using a thread belonging to the guest machine, instead
+ * of the host thread, allows it to have its own comm (refer
+ * thread__set_guest_comm()).
+ */
+static struct thread *findnew_guest_code(struct machine *machine,
+ struct machine *host_machine,
+ pid_t pid)
+{
+ struct thread *host_thread;
+ struct thread *thread;
+ int err;
+
+ if (!machine)
+ return NULL;
+
+ thread = machine__findnew_thread(machine, -1, pid);
+ if (!thread)
+ return NULL;
+
+ /* Assume maps are set up if there are any */
+ if (maps__nr_maps(thread__maps(thread)))
+ return thread;
+
+ host_thread = machine__find_thread(host_machine, -1, pid);
+ if (!host_thread)
+ goto out_err;
+
+ thread__set_guest_comm(thread, pid);
+
+ /*
+ * Guest code can be found in hypervisor process at the same address
+ * so copy host maps.
+ */
+ err = maps__clone(thread, thread__maps(host_thread));
+ thread__put(host_thread);
+ if (err)
+ goto out_err;
+
+ return thread;
+
+out_err:
+ thread__zput(thread);
+ return NULL;
+}
+
+struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid)
+{
+ struct machine *host_machine = machines__find(machines, HOST_KERNEL_ID);
+ struct machine *machine = machines__findnew(machines, pid);
+
+ return findnew_guest_code(machine, host_machine, pid);
+}
+
+struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid)
+{
+ struct machines *machines = machine->machines;
+ struct machine *host_machine;
+
+ if (!machines)
+ return NULL;
+
+ host_machine = machines__find(machines, HOST_KERNEL_ID);
+
+ return findnew_guest_code(machine, host_machine, pid);
+}
+
+void machines__process_guests(struct machines *machines,
+ machine__process_t process, void *data)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ process(pos, data);
+ }
+}
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
+{
+ struct rb_node *node;
+ struct machine *machine;
+
+ machines->host.id_hdr_size = id_hdr_size;
+
+ for (node = rb_first_cached(&machines->guests); node;
+ node = rb_next(node)) {
+ machine = rb_entry(node, struct machine, rb_node);
+ machine->id_hdr_size = id_hdr_size;
+ }
+
+ return;
+}
+
+static void machine__update_thread_pid(struct machine *machine,
+ struct thread *th, pid_t pid)
+{
+ struct thread *leader;
+
+ if (pid == thread__pid(th) || pid == -1 || thread__pid(th) != -1)
+ return;
+
+ thread__set_pid(th, pid);
+
+ if (thread__pid(th) == thread__tid(th))
+ return;
+
+ leader = __machine__findnew_thread(machine, thread__pid(th), thread__pid(th));
+ if (!leader)
+ goto out_err;
+
+ if (!thread__maps(leader))
+ thread__set_maps(leader, maps__new(machine));
+
+ if (!thread__maps(leader))
+ goto out_err;
+
+ if (thread__maps(th) == thread__maps(leader))
+ goto out_put;
+
+ if (thread__maps(th)) {
+ /*
+ * Maps are created from MMAP events which provide the pid and
+ * tid. Consequently there never should be any maps on a thread
+ * with an unknown pid. Just print an error if there are.
+ */
+ if (!maps__empty(thread__maps(th)))
+ pr_err("Discarding thread maps for %d:%d\n",
+ thread__pid(th), thread__tid(th));
+ maps__put(thread__maps(th));
+ }
+
+ thread__set_maps(th, maps__get(thread__maps(leader)));
+out_put:
+ thread__put(leader);
+ return;
+out_err:
+ pr_err("Failed to join map groups for %d:%d\n", thread__pid(th), thread__tid(th));
+ goto out_put;
+}
+
+/*
+ * Front-end cache - TID lookups come in blocks,
+ * so most of the time we dont have to look up
+ * the full rbtree:
+ */
+static struct thread*
+__threads__get_last_match(struct threads *threads, struct machine *machine,
+ int pid, int tid)
+{
+ struct thread *th;
+
+ th = threads->last_match;
+ if (th != NULL) {
+ if (thread__tid(th) == tid) {
+ machine__update_thread_pid(machine, th, pid);
+ return thread__get(th);
+ }
+ thread__put(threads->last_match);
+ threads->last_match = NULL;
+ }
+
+ return NULL;
+}
+
+static struct thread*
+threads__get_last_match(struct threads *threads, struct machine *machine,
+ int pid, int tid)
+{
+ struct thread *th = NULL;
+
+ if (perf_singlethreaded)
+ th = __threads__get_last_match(threads, machine, pid, tid);
+
+ return th;
+}
+
+static void
+__threads__set_last_match(struct threads *threads, struct thread *th)
+{
+ thread__put(threads->last_match);
+ threads->last_match = thread__get(th);
+}
+
+static void
+threads__set_last_match(struct threads *threads, struct thread *th)
+{
+ if (perf_singlethreaded)
+ __threads__set_last_match(threads, th);
+}
+
+/*
+ * Caller must eventually drop thread->refcnt returned with a successful
+ * lookup/new thread inserted.
+ */
+static struct thread *____machine__findnew_thread(struct machine *machine,
+ struct threads *threads,
+ pid_t pid, pid_t tid,
+ bool create)
+{
+ struct rb_node **p = &threads->entries.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct thread *th;
+ struct thread_rb_node *nd;
+ bool leftmost = true;
+
+ th = threads__get_last_match(threads, machine, pid, tid);
+ if (th)
+ return th;
+
+ while (*p != NULL) {
+ parent = *p;
+ th = rb_entry(parent, struct thread_rb_node, rb_node)->thread;
+
+ if (thread__tid(th) == tid) {
+ threads__set_last_match(threads, th);
+ machine__update_thread_pid(machine, th, pid);
+ return thread__get(th);
+ }
+
+ if (tid < thread__tid(th))
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+
+ if (!create)
+ return NULL;
+
+ th = thread__new(pid, tid);
+ if (th == NULL)
+ return NULL;
+
+ nd = malloc(sizeof(*nd));
+ if (nd == NULL) {
+ thread__put(th);
+ return NULL;
+ }
+ nd->thread = th;
+
+ rb_link_node(&nd->rb_node, parent, p);
+ rb_insert_color_cached(&nd->rb_node, &threads->entries, leftmost);
+ /*
+ * We have to initialize maps separately after rb tree is updated.
+ *
+ * The reason is that we call machine__findnew_thread within
+ * thread__init_maps to find the thread leader and that would screwed
+ * the rb tree.
+ */
+ if (thread__init_maps(th, machine)) {
+ pr_err("Thread init failed thread %d\n", pid);
+ rb_erase_cached(&nd->rb_node, &threads->entries);
+ RB_CLEAR_NODE(&nd->rb_node);
+ free(nd);
+ thread__put(th);
+ return NULL;
+ }
+ /*
+ * It is now in the rbtree, get a ref
+ */
+ threads__set_last_match(threads, th);
+ ++threads->nr;
+
+ return thread__get(th);
+}
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
+{
+ return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
+}
+
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
+ pid_t tid)
+{
+ struct threads *threads = machine__threads(machine, tid);
+ struct thread *th;
+
+ down_write(&threads->lock);
+ th = __machine__findnew_thread(machine, pid, tid);
+ up_write(&threads->lock);
+ return th;
+}
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+ pid_t tid)
+{
+ struct threads *threads = machine__threads(machine, tid);
+ struct thread *th;
+
+ down_read(&threads->lock);
+ th = ____machine__findnew_thread(machine, threads, pid, tid, false);
+ up_read(&threads->lock);
+ return th;
+}
+
+/*
+ * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
+ * So here a single thread is created for that, but actually there is a separate
+ * idle task per cpu, so there should be one 'struct thread' per cpu, but there
+ * is only 1. That causes problems for some tools, requiring workarounds. For
+ * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
+ */
+struct thread *machine__idle_thread(struct machine *machine)
+{
+ struct thread *thread = machine__findnew_thread(machine, 0, 0);
+
+ if (!thread || thread__set_comm(thread, "swapper", 0) ||
+ thread__set_namespaces(thread, 0, NULL))
+ pr_err("problem inserting idle task for machine pid %d\n", machine->pid);
+
+ return thread;
+}
+
+struct comm *machine__thread_exec_comm(struct machine *machine,
+ struct thread *thread)
+{
+ if (machine->comm_exec)
+ return thread__exec_comm(thread);
+ else
+ return thread__comm(thread);
+}
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->comm.pid,
+ event->comm.tid);
+ bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+ int err = 0;
+
+ if (exec)
+ machine->comm_exec = true;
+
+ if (dump_trace)
+ perf_event__fprintf_comm(event, stdout);
+
+ if (thread == NULL ||
+ __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
+ dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+ err = -1;
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
+int machine__process_namespaces_event(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->namespaces.pid,
+ event->namespaces.tid);
+ int err = 0;
+
+ WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
+ "\nWARNING: kernel seems to support more namespaces than perf"
+ " tool.\nTry updating the perf tool..\n\n");
+
+ WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
+ "\nWARNING: perf tool seems to support more namespaces than"
+ " the kernel.\nTry updating the kernel..\n\n");
+
+ if (dump_trace)
+ perf_event__fprintf_namespaces(event, stdout);
+
+ if (thread == NULL ||
+ thread__set_namespaces(thread, sample->time, &event->namespaces)) {
+ dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
+ err = -1;
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
+int machine__process_cgroup_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct cgroup *cgrp;
+
+ if (dump_trace)
+ perf_event__fprintf_cgroup(event, stdout);
+
+ cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path);
+ if (cgrp == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int machine__process_lost_event(struct machine *machine __maybe_unused,
+ union perf_event *event, struct perf_sample *sample __maybe_unused)
+{
+ dump_printf(": id:%" PRI_lu64 ": lost:%" PRI_lu64 "\n",
+ event->lost.id, event->lost.lost);
+ return 0;
+}
+
+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+ union perf_event *event, struct perf_sample *sample)
+{
+ dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "\n",
+ sample->id, event->lost_samples.lost);
+ return 0;
+}
+
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+ struct kmod_path *m,
+ const char *filename)
+{
+ struct dso *dso;
+
+ down_write(&machine->dsos.lock);
+
+ dso = __dsos__find(&machine->dsos, m->name, true);
+ if (!dso) {
+ dso = __dsos__addnew(&machine->dsos, m->name);
+ if (dso == NULL)
+ goto out_unlock;
+
+ dso__set_module_info(dso, m, machine);
+ dso__set_long_name(dso, strdup(filename), true);
+ dso->kernel = DSO_SPACE__KERNEL;
+ }
+
+ dso__get(dso);
+out_unlock:
+ up_write(&machine->dsos.lock);
+ return dso;
+}
+
+int machine__process_aux_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_aux(event, stdout);
+ return 0;
+}
+
+int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_itrace_start(event, stdout);
+ return 0;
+}
+
+int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_aux_output_hw_id(event, stdout);
+ return 0;
+}
+
+int machine__process_switch_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_switch(event, stdout);
+ return 0;
+}
+
+static int machine__process_ksymbol_register(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct symbol *sym;
+ struct dso *dso;
+ struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
+ bool put_map = false;
+ int err = 0;
+
+ if (!map) {
+ dso = dso__new(event->ksymbol.name);
+
+ if (!dso) {
+ err = -ENOMEM;
+ goto out;
+ }
+ dso->kernel = DSO_SPACE__KERNEL;
+ map = map__new2(0, dso);
+ dso__put(dso);
+ if (!map) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /*
+ * The inserted map has a get on it, we need to put to release
+ * the reference count here, but do it after all accesses are
+ * done.
+ */
+ put_map = true;
+ if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
+ dso->binary_type = DSO_BINARY_TYPE__OOL;
+ dso->data.file_size = event->ksymbol.len;
+ dso__set_loaded(dso);
+ }
+
+ map__set_start(map, event->ksymbol.addr);
+ map__set_end(map, map__start(map) + event->ksymbol.len);
+ err = maps__insert(machine__kernel_maps(machine), map);
+ if (err) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ dso__set_loaded(dso);
+
+ if (is_bpf_image(event->ksymbol.name)) {
+ dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE;
+ dso__set_long_name(dso, "", false);
+ }
+ } else {
+ dso = map__dso(map);
+ }
+
+ sym = symbol__new(map__map_ip(map, map__start(map)),
+ event->ksymbol.len,
+ 0, 0, event->ksymbol.name);
+ if (!sym) {
+ err = -ENOMEM;
+ goto out;
+ }
+ dso__insert_symbol(dso, sym);
+out:
+ if (put_map)
+ map__put(map);
+ return err;
+}
+
+static int machine__process_ksymbol_unregister(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct symbol *sym;
+ struct map *map;
+
+ map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
+ if (!map)
+ return 0;
+
+ if (RC_CHK_ACCESS(map) != RC_CHK_ACCESS(machine->vmlinux_map))
+ maps__remove(machine__kernel_maps(machine), map);
+ else {
+ struct dso *dso = map__dso(map);
+
+ sym = dso__find_symbol(dso, map__map_ip(map, map__start(map)));
+ if (sym)
+ dso__delete_symbol(dso, sym);
+ }
+
+ return 0;
+}
+
+int machine__process_ksymbol(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (dump_trace)
+ perf_event__fprintf_ksymbol(event, stdout);
+
+ if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
+ return machine__process_ksymbol_unregister(machine, event,
+ sample);
+ return machine__process_ksymbol_register(machine, event, sample);
+}
+
+int machine__process_text_poke(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct map *map = maps__find(machine__kernel_maps(machine), event->text_poke.addr);
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct dso *dso = map ? map__dso(map) : NULL;
+
+ if (dump_trace)
+ perf_event__fprintf_text_poke(event, machine, stdout);
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ if (cpumode != PERF_RECORD_MISC_KERNEL) {
+ pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
+ return 0;
+ }
+
+ if (dso) {
+ u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len;
+ int ret;
+
+ /*
+ * Kernel maps might be changed when loading symbols so loading
+ * must be done prior to using kernel maps.
+ */
+ map__load(map);
+ ret = dso__data_write_cache_addr(dso, map, machine,
+ event->text_poke.addr,
+ new_bytes,
+ event->text_poke.new_len);
+ if (ret != event->text_poke.new_len)
+ pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ } else {
+ pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ }
+
+ return 0;
+}
+
+static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
+ const char *filename)
+{
+ struct map *map = NULL;
+ struct kmod_path m;
+ struct dso *dso;
+ int err;
+
+ if (kmod_path__parse_name(&m, filename))
+ return NULL;
+
+ dso = machine__findnew_module_dso(machine, &m, filename);
+ if (dso == NULL)
+ goto out;
+
+ map = map__new2(start, dso);
+ if (map == NULL)
+ goto out;
+
+ err = maps__insert(machine__kernel_maps(machine), map);
+ /* If maps__insert failed, return NULL. */
+ if (err) {
+ map__put(map);
+ map = NULL;
+ }
+out:
+ /* put the dso here, corresponding to machine__findnew_module_dso */
+ dso__put(dso);
+ zfree(&m.name);
+ return map;
+}
+
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
+{
+ struct rb_node *nd;
+ size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
+
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret += __dsos__fprintf(&pos->dsos.head, fp);
+ }
+
+ return ret;
+}
+
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
+}
+
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ struct rb_node *nd;
+ size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
+
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
+ }
+ return ret;
+}
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
+{
+ int i;
+ size_t printed = 0;
+ struct dso *kdso = machine__kernel_dso(machine);
+
+ if (kdso->has_build_id) {
+ char filename[PATH_MAX];
+ if (dso__build_id_filename(kdso, filename, sizeof(filename),
+ false))
+ printed += fprintf(fp, "[0] %s\n", filename);
+ }
+
+ for (i = 0; i < vmlinux_path__nr_entries; ++i)
+ printed += fprintf(fp, "[%d] %s\n",
+ i + kdso->has_build_id, vmlinux_path[i]);
+
+ return printed;
+}
+
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+ struct rb_node *nd;
+ size_t ret;
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+
+ down_read(&threads->lock);
+
+ ret = fprintf(fp, "Threads: %u\n", threads->nr);
+
+ for (nd = rb_first_cached(&threads->entries); nd;
+ nd = rb_next(nd)) {
+ struct thread *pos = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
+
+ ret += thread__fprintf(pos, fp);
+ }
+
+ up_read(&threads->lock);
+ }
+ return ret;
+}
+
+static struct dso *machine__get_kernel(struct machine *machine)
+{
+ const char *vmlinux_name = machine->mmap_name;
+ struct dso *kernel;
+
+ if (machine__is_host(machine)) {
+ if (symbol_conf.vmlinux_name)
+ vmlinux_name = symbol_conf.vmlinux_name;
+
+ kernel = machine__findnew_kernel(machine, vmlinux_name,
+ "[kernel]", DSO_SPACE__KERNEL);
+ } else {
+ if (symbol_conf.default_guest_vmlinux_name)
+ vmlinux_name = symbol_conf.default_guest_vmlinux_name;
+
+ kernel = machine__findnew_kernel(machine, vmlinux_name,
+ "[guest.kernel]",
+ DSO_SPACE__KERNEL_GUEST);
+ }
+
+ if (kernel != NULL && (!kernel->has_build_id))
+ dso__read_running_kernel_build_id(kernel, machine);
+
+ return kernel;
+}
+
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+ size_t bufsz)
+{
+ if (machine__is_default_guest(machine))
+ scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
+ else
+ scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir);
+}
+
+const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
+
+/* Figure out the start address of kernel map from /proc/kallsyms.
+ * Returns the name of the start symbol in *symbol_name. Pass in NULL as
+ * symbol_name if it's not that important.
+ */
+static int machine__get_running_kernel_start(struct machine *machine,
+ const char **symbol_name,
+ u64 *start, u64 *end)
+{
+ char filename[PATH_MAX];
+ int i, err = -1;
+ const char *name;
+ u64 addr = 0;
+
+ machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+ if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+ return 0;
+
+ for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+ err = kallsyms__get_function_start(filename, name, &addr);
+ if (!err)
+ break;
+ }
+
+ if (err)
+ return -1;
+
+ if (symbol_name)
+ *symbol_name = name;
+
+ *start = addr;
+
+ err = kallsyms__get_symbol_start(filename, "_edata", &addr);
+ if (err)
+ err = kallsyms__get_function_start(filename, "_etext", &addr);
+ if (!err)
+ *end = addr;
+
+ return 0;
+}
+
+int machine__create_extra_kernel_map(struct machine *machine,
+ struct dso *kernel,
+ struct extra_kernel_map *xm)
+{
+ struct kmap *kmap;
+ struct map *map;
+ int err;
+
+ map = map__new2(xm->start, kernel);
+ if (!map)
+ return -ENOMEM;
+
+ map__set_end(map, xm->end);
+ map__set_pgoff(map, xm->pgoff);
+
+ kmap = map__kmap(map);
+
+ strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
+
+ err = maps__insert(machine__kernel_maps(machine), map);
+
+ if (!err) {
+ pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
+ kmap->name, map__start(map), map__end(map));
+ }
+
+ map__put(map);
+
+ return err;
+}
+
+static u64 find_entry_trampoline(struct dso *dso)
+{
+ /* Duplicates are removed so lookup all aliases */
+ const char *syms[] = {
+ "_entry_trampoline",
+ "__entry_trampoline_start",
+ "entry_SYSCALL_64_trampoline",
+ };
+ struct symbol *sym = dso__first_symbol(dso);
+ unsigned int i;
+
+ for (; sym; sym = dso__next_symbol(sym)) {
+ if (sym->binding != STB_GLOBAL)
+ continue;
+ for (i = 0; i < ARRAY_SIZE(syms); i++) {
+ if (!strcmp(sym->name, syms[i]))
+ return sym->start;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * These values can be used for kernels that do not have symbols for the entry
+ * trampolines in kallsyms.
+ */
+#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL
+#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000
+#define X86_64_ENTRY_TRAMPOLINE 0x6000
+
+/* Map x86_64 PTI entry trampolines */
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+ struct dso *kernel)
+{
+ struct maps *kmaps = machine__kernel_maps(machine);
+ int nr_cpus_avail, cpu;
+ bool found = false;
+ struct map_rb_node *rb_node;
+ u64 pgoff;
+
+ /*
+ * In the vmlinux case, pgoff is a virtual address which must now be
+ * mapped to a vmlinux offset.
+ */
+ maps__for_each_entry(kmaps, rb_node) {
+ struct map *dest_map, *map = rb_node->map;
+ struct kmap *kmap = __map__kmap(map);
+
+ if (!kmap || !is_entry_trampoline(kmap->name))
+ continue;
+
+ dest_map = maps__find(kmaps, map__pgoff(map));
+ if (dest_map != map)
+ map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map)));
+ found = true;
+ }
+ if (found || machine->trampolines_mapped)
+ return 0;
+
+ pgoff = find_entry_trampoline(kernel);
+ if (!pgoff)
+ return 0;
+
+ nr_cpus_avail = machine__nr_cpus_avail(machine);
+
+ /* Add a 1 page map for each CPU's entry trampoline */
+ for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
+ u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
+ cpu * X86_64_CPU_ENTRY_AREA_SIZE +
+ X86_64_ENTRY_TRAMPOLINE;
+ struct extra_kernel_map xm = {
+ .start = va,
+ .end = va + page_size,
+ .pgoff = pgoff,
+ };
+
+ strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);
+
+ if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
+ return -1;
+ }
+
+ machine->trampolines_mapped = nr_cpus_avail;
+
+ return 0;
+}
+
+int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
+ struct dso *kernel __maybe_unused)
+{
+ return 0;
+}
+
+static int
+__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+{
+ /* In case of renewal the kernel map, destroy previous one */
+ machine__destroy_kernel_maps(machine);
+
+ map__put(machine->vmlinux_map);
+ machine->vmlinux_map = map__new2(0, kernel);
+ if (machine->vmlinux_map == NULL)
+ return -ENOMEM;
+
+ map__set_map_ip(machine->vmlinux_map, identity__map_ip);
+ map__set_unmap_ip(machine->vmlinux_map, identity__map_ip);
+ return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map);
+}
+
+void machine__destroy_kernel_maps(struct machine *machine)
+{
+ struct kmap *kmap;
+ struct map *map = machine__kernel_map(machine);
+
+ if (map == NULL)
+ return;
+
+ kmap = map__kmap(map);
+ maps__remove(machine__kernel_maps(machine), map);
+ if (kmap && kmap->ref_reloc_sym) {
+ zfree((char **)&kmap->ref_reloc_sym->name);
+ zfree(&kmap->ref_reloc_sym);
+ }
+
+ map__zput(machine->vmlinux_map);
+}
+
+int machines__create_guest_kernel_maps(struct machines *machines)
+{
+ int ret = 0;
+ struct dirent **namelist = NULL;
+ int i, items = 0;
+ char path[PATH_MAX];
+ pid_t pid;
+ char *endp;
+
+ if (symbol_conf.default_guest_vmlinux_name ||
+ symbol_conf.default_guest_modules ||
+ symbol_conf.default_guest_kallsyms) {
+ machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
+ }
+
+ if (symbol_conf.guestmount) {
+ items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
+ if (items <= 0)
+ return -ENOENT;
+ for (i = 0; i < items; i++) {
+ if (!isdigit(namelist[i]->d_name[0])) {
+ /* Filter out . and .. */
+ continue;
+ }
+ pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
+ if ((*endp != '\0') ||
+ (endp == namelist[i]->d_name) ||
+ (errno == ERANGE)) {
+ pr_debug("invalid directory (%s). Skipping.\n",
+ namelist[i]->d_name);
+ continue;
+ }
+ sprintf(path, "%s/%s/proc/kallsyms",
+ symbol_conf.guestmount,
+ namelist[i]->d_name);
+ ret = access(path, R_OK);
+ if (ret) {
+ pr_debug("Can't access file %s\n", path);
+ goto failure;
+ }
+ machines__create_kernel_maps(machines, pid);
+ }
+failure:
+ free(namelist);
+ }
+
+ return ret;
+}
+
+void machines__destroy_kernel_maps(struct machines *machines)
+{
+ struct rb_node *next = rb_first_cached(&machines->guests);
+
+ machine__destroy_kernel_maps(&machines->host);
+
+ while (next) {
+ struct machine *pos = rb_entry(next, struct machine, rb_node);
+
+ next = rb_next(&pos->rb_node);
+ rb_erase_cached(&pos->rb_node, &machines->guests);
+ machine__delete(pos);
+ }
+}
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid)
+{
+ struct machine *machine = machines__findnew(machines, pid);
+
+ if (machine == NULL)
+ return -1;
+
+ return machine__create_kernel_maps(machine);
+}
+
+int machine__load_kallsyms(struct machine *machine, const char *filename)
+{
+ struct map *map = machine__kernel_map(machine);
+ struct dso *dso = map__dso(map);
+ int ret = __dso__load_kallsyms(dso, filename, map, true);
+
+ if (ret > 0) {
+ dso__set_loaded(dso);
+ /*
+ * Since /proc/kallsyms will have multiple sessions for the
+ * kernel, with modules between them, fixup the end of all
+ * sections.
+ */
+ maps__fixup_end(machine__kernel_maps(machine));
+ }
+
+ return ret;
+}
+
+int machine__load_vmlinux_path(struct machine *machine)
+{
+ struct map *map = machine__kernel_map(machine);
+ struct dso *dso = map__dso(map);
+ int ret = dso__load_vmlinux_path(dso, map);
+
+ if (ret > 0)
+ dso__set_loaded(dso);
+
+ return ret;
+}
+
+static char *get_kernel_version(const char *root_dir)
+{
+ char version[PATH_MAX];
+ FILE *file;
+ char *name, *tmp;
+ const char *prefix = "Linux version ";
+
+ sprintf(version, "%s/proc/version", root_dir);
+ file = fopen(version, "r");
+ if (!file)
+ return NULL;
+
+ tmp = fgets(version, sizeof(version), file);
+ fclose(file);
+ if (!tmp)
+ return NULL;
+
+ name = strstr(version, prefix);
+ if (!name)
+ return NULL;
+ name += strlen(prefix);
+ tmp = strchr(name, ' ');
+ if (tmp)
+ *tmp = '\0';
+
+ return strdup(name);
+}
+
+static bool is_kmod_dso(struct dso *dso)
+{
+ return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
+}
+
+static int maps__set_module_path(struct maps *maps, const char *path, struct kmod_path *m)
+{
+ char *long_name;
+ struct dso *dso;
+ struct map *map = maps__find_by_name(maps, m->name);
+
+ if (map == NULL)
+ return 0;
+
+ long_name = strdup(path);
+ if (long_name == NULL)
+ return -ENOMEM;
+
+ dso = map__dso(map);
+ dso__set_long_name(dso, long_name, true);
+ dso__kernel_module_get_build_id(dso, "");
+
+ /*
+ * Full name could reveal us kmod compression, so
+ * we need to update the symtab_type if needed.
+ */
+ if (m->comp && is_kmod_dso(dso)) {
+ dso->symtab_type++;
+ dso->comp = m->comp;
+ }
+
+ return 0;
+}
+
+static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth)
+{
+ struct dirent *dent;
+ DIR *dir = opendir(dir_name);
+ int ret = 0;
+
+ if (!dir) {
+ pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+ return -1;
+ }
+
+ while ((dent = readdir(dir)) != NULL) {
+ char path[PATH_MAX];
+ struct stat st;
+
+ /*sshfs might return bad dent->d_type, so we have to stat*/
+ path__join(path, sizeof(path), dir_name, dent->d_name);
+ if (stat(path, &st))
+ continue;
+
+ if (S_ISDIR(st.st_mode)) {
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ /* Do not follow top-level source and build symlinks */
+ if (depth == 0) {
+ if (!strcmp(dent->d_name, "source") ||
+ !strcmp(dent->d_name, "build"))
+ continue;
+ }
+
+ ret = maps__set_modules_path_dir(maps, path, depth + 1);
+ if (ret < 0)
+ goto out;
+ } else {
+ struct kmod_path m;
+
+ ret = kmod_path__parse_name(&m, dent->d_name);
+ if (ret)
+ goto out;
+
+ if (m.kmod)
+ ret = maps__set_module_path(maps, path, &m);
+
+ zfree(&m.name);
+
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ closedir(dir);
+ return ret;
+}
+
+static int machine__set_modules_path(struct machine *machine)
+{
+ char *version;
+ char modules_path[PATH_MAX];
+
+ version = get_kernel_version(machine->root_dir);
+ if (!version)
+ return -1;
+
+ snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s",
+ machine->root_dir, version);
+ free(version);
+
+ return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0);
+}
+int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
+ u64 *size __maybe_unused,
+ const char *name __maybe_unused)
+{
+ return 0;
+}
+
+static int machine__create_module(void *arg, const char *name, u64 start,
+ u64 size)
+{
+ struct machine *machine = arg;
+ struct map *map;
+
+ if (arch__fix_module_text_start(&start, &size, name) < 0)
+ return -1;
+
+ map = machine__addnew_module_map(machine, start, name);
+ if (map == NULL)
+ return -1;
+ map__set_end(map, start + size);
+
+ dso__kernel_module_get_build_id(map__dso(map), machine->root_dir);
+ map__put(map);
+ return 0;
+}
+
+static int machine__create_modules(struct machine *machine)
+{
+ const char *modules;
+ char path[PATH_MAX];
+
+ if (machine__is_default_guest(machine)) {
+ modules = symbol_conf.default_guest_modules;
+ } else {
+ snprintf(path, PATH_MAX, "%s/proc/modules", machine->root_dir);
+ modules = path;
+ }
+
+ if (symbol__restricted_filename(modules, "/proc/modules"))
+ return -1;
+
+ if (modules__parse(modules, machine, machine__create_module))
+ return -1;
+
+ if (!machine__set_modules_path(machine))
+ return 0;
+
+ pr_debug("Problems setting modules path maps, continuing anyway...\n");
+
+ return 0;
+}
+
+static void machine__set_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ map__set_start(machine->vmlinux_map, start);
+ map__set_end(machine->vmlinux_map, end);
+ /*
+ * Be a bit paranoid here, some perf.data file came with
+ * a zero sized synthesized MMAP event for the kernel.
+ */
+ if (start == 0 && end == 0)
+ map__set_end(machine->vmlinux_map, ~0ULL);
+}
+
+static int machine__update_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ struct map *orig, *updated;
+ int err;
+
+ orig = machine->vmlinux_map;
+ updated = map__get(orig);
+
+ machine->vmlinux_map = updated;
+ machine__set_kernel_mmap(machine, start, end);
+ maps__remove(machine__kernel_maps(machine), orig);
+ err = maps__insert(machine__kernel_maps(machine), updated);
+ map__put(orig);
+
+ return err;
+}
+
+int machine__create_kernel_maps(struct machine *machine)
+{
+ struct dso *kernel = machine__get_kernel(machine);
+ const char *name = NULL;
+ u64 start = 0, end = ~0ULL;
+ int ret;
+
+ if (kernel == NULL)
+ return -1;
+
+ ret = __machine__create_kernel_maps(machine, kernel);
+ if (ret < 0)
+ goto out_put;
+
+ if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
+ if (machine__is_host(machine))
+ pr_debug("Problems creating module maps, "
+ "continuing anyway...\n");
+ else
+ pr_debug("Problems creating module maps for guest %d, "
+ "continuing anyway...\n", machine->pid);
+ }
+
+ if (!machine__get_running_kernel_start(machine, &name, &start, &end)) {
+ if (name &&
+ map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, start)) {
+ machine__destroy_kernel_maps(machine);
+ ret = -1;
+ goto out_put;
+ }
+
+ /*
+ * we have a real start address now, so re-order the kmaps
+ * assume it's the last in the kmaps
+ */
+ ret = machine__update_kernel_mmap(machine, start, end);
+ if (ret < 0)
+ goto out_put;
+ }
+
+ if (machine__create_extra_kernel_maps(machine, kernel))
+ pr_debug("Problems creating extra kernel maps, continuing anyway...\n");
+
+ if (end == ~0ULL) {
+ /* update end address of the kernel map using adjacent module address */
+ struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine),
+ machine__kernel_map(machine));
+ struct map_rb_node *next = map_rb_node__next(rb_node);
+
+ if (next)
+ machine__set_kernel_mmap(machine, start, map__start(next->map));
+ }
+
+out_put:
+ dso__put(kernel);
+ return ret;
+}
+
+static bool machine__uses_kcore(struct machine *machine)
+{
+ struct dso *dso;
+
+ list_for_each_entry(dso, &machine->dsos.head, node) {
+ if (dso__is_kcore(dso))
+ return true;
+ }
+
+ return false;
+}
+
+static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
+ struct extra_kernel_map *xm)
+{
+ return machine__is(machine, "x86_64") &&
+ is_entry_trampoline(xm->name);
+}
+
+static int machine__process_extra_kernel_map(struct machine *machine,
+ struct extra_kernel_map *xm)
+{
+ struct dso *kernel = machine__kernel_dso(machine);
+
+ if (kernel == NULL)
+ return -1;
+
+ return machine__create_extra_kernel_map(machine, kernel, xm);
+}
+
+static int machine__process_kernel_mmap_event(struct machine *machine,
+ struct extra_kernel_map *xm,
+ struct build_id *bid)
+{
+ enum dso_space_type dso_space;
+ bool is_kernel_mmap;
+ const char *mmap_name = machine->mmap_name;
+
+ /* If we have maps from kcore then we do not need or want any others */
+ if (machine__uses_kcore(machine))
+ return 0;
+
+ if (machine__is_host(machine))
+ dso_space = DSO_SPACE__KERNEL;
+ else
+ dso_space = DSO_SPACE__KERNEL_GUEST;
+
+ is_kernel_mmap = memcmp(xm->name, mmap_name, strlen(mmap_name) - 1) == 0;
+ if (!is_kernel_mmap && !machine__is_host(machine)) {
+ /*
+ * If the event was recorded inside the guest and injected into
+ * the host perf.data file, then it will match a host mmap_name,
+ * so try that - see machine__set_mmap_name().
+ */
+ mmap_name = "[kernel.kallsyms]";
+ is_kernel_mmap = memcmp(xm->name, mmap_name, strlen(mmap_name) - 1) == 0;
+ }
+ if (xm->name[0] == '/' ||
+ (!is_kernel_mmap && xm->name[0] == '[')) {
+ struct map *map = machine__addnew_module_map(machine, xm->start, xm->name);
+
+ if (map == NULL)
+ goto out_problem;
+
+ map__set_end(map, map__start(map) + xm->end - xm->start);
+
+ if (build_id__is_defined(bid))
+ dso__set_build_id(map__dso(map), bid);
+
+ map__put(map);
+ } else if (is_kernel_mmap) {
+ const char *symbol_name = xm->name + strlen(mmap_name);
+ /*
+ * Should be there already, from the build-id table in
+ * the header.
+ */
+ struct dso *kernel = NULL;
+ struct dso *dso;
+
+ down_read(&machine->dsos.lock);
+
+ list_for_each_entry(dso, &machine->dsos.head, node) {
+
+ /*
+ * The cpumode passed to is_kernel_module is not the
+ * cpumode of *this* event. If we insist on passing
+ * correct cpumode to is_kernel_module, we should
+ * record the cpumode when we adding this dso to the
+ * linked list.
+ *
+ * However we don't really need passing correct
+ * cpumode. We know the correct cpumode must be kernel
+ * mode (if not, we should not link it onto kernel_dsos
+ * list).
+ *
+ * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+ * is_kernel_module() treats it as a kernel cpumode.
+ */
+
+ if (!dso->kernel ||
+ is_kernel_module(dso->long_name,
+ PERF_RECORD_MISC_CPUMODE_UNKNOWN))
+ continue;
+
+
+ kernel = dso__get(dso);
+ break;
+ }
+
+ up_read(&machine->dsos.lock);
+
+ if (kernel == NULL)
+ kernel = machine__findnew_dso(machine, machine->mmap_name);
+ if (kernel == NULL)
+ goto out_problem;
+
+ kernel->kernel = dso_space;
+ if (__machine__create_kernel_maps(machine, kernel) < 0) {
+ dso__put(kernel);
+ goto out_problem;
+ }
+
+ if (strstr(kernel->long_name, "vmlinux"))
+ dso__set_short_name(kernel, "[kernel.vmlinux]", false);
+
+ if (machine__update_kernel_mmap(machine, xm->start, xm->end) < 0) {
+ dso__put(kernel);
+ goto out_problem;
+ }
+
+ if (build_id__is_defined(bid))
+ dso__set_build_id(kernel, bid);
+
+ /*
+ * Avoid using a zero address (kptr_restrict) for the ref reloc
+ * symbol. Effectively having zero here means that at record
+ * time /proc/sys/kernel/kptr_restrict was non zero.
+ */
+ if (xm->pgoff != 0) {
+ map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
+ symbol_name,
+ xm->pgoff);
+ }
+
+ if (machine__is_default_guest(machine)) {
+ /*
+ * preload dso of guest kernel and modules
+ */
+ dso__load(kernel, machine__kernel_map(machine));
+ }
+ dso__put(kernel);
+ } else if (perf_event__is_extra_kernel_mmap(machine, xm)) {
+ return machine__process_extra_kernel_map(machine, xm);
+ }
+ return 0;
+out_problem:
+ return -1;
+}
+
+int machine__process_mmap2_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ struct map *map;
+ struct dso_id dso_id = {
+ .maj = event->mmap2.maj,
+ .min = event->mmap2.min,
+ .ino = event->mmap2.ino,
+ .ino_generation = event->mmap2.ino_generation,
+ };
+ struct build_id __bid, *bid = NULL;
+ int ret = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_mmap2(event, stdout);
+
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+ bid = &__bid;
+ build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size);
+ }
+
+ if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+ struct extra_kernel_map xm = {
+ .start = event->mmap2.start,
+ .end = event->mmap2.start + event->mmap2.len,
+ .pgoff = event->mmap2.pgoff,
+ };
+
+ strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN);
+ ret = machine__process_kernel_mmap_event(machine, &xm, bid);
+ if (ret < 0)
+ goto out_problem;
+ return 0;
+ }
+
+ thread = machine__findnew_thread(machine, event->mmap2.pid,
+ event->mmap2.tid);
+ if (thread == NULL)
+ goto out_problem;
+
+ map = map__new(machine, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff,
+ &dso_id, event->mmap2.prot,
+ event->mmap2.flags, bid,
+ event->mmap2.filename, thread);
+
+ if (map == NULL)
+ goto out_problem_map;
+
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
+ thread__put(thread);
+ map__put(map);
+ return 0;
+
+out_problem_insert:
+ map__put(map);
+out_problem_map:
+ thread__put(thread);
+out_problem:
+ dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
+ return 0;
+}
+
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ struct map *map;
+ u32 prot = 0;
+ int ret = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_mmap(event, stdout);
+
+ if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+ struct extra_kernel_map xm = {
+ .start = event->mmap.start,
+ .end = event->mmap.start + event->mmap.len,
+ .pgoff = event->mmap.pgoff,
+ };
+
+ strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+ ret = machine__process_kernel_mmap_event(machine, &xm, NULL);
+ if (ret < 0)
+ goto out_problem;
+ return 0;
+ }
+
+ thread = machine__findnew_thread(machine, event->mmap.pid,
+ event->mmap.tid);
+ if (thread == NULL)
+ goto out_problem;
+
+ if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
+ prot = PROT_EXEC;
+
+ map = map__new(machine, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff,
+ NULL, prot, 0, NULL, event->mmap.filename, thread);
+
+ if (map == NULL)
+ goto out_problem_map;
+
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
+ thread__put(thread);
+ map__put(map);
+ return 0;
+
+out_problem_insert:
+ map__put(map);
+out_problem_map:
+ thread__put(thread);
+out_problem:
+ dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+ return 0;
+}
+
+static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
+ struct thread *th, bool lock)
+{
+ struct threads *threads = machine__threads(machine, thread__tid(th));
+
+ if (!nd)
+ nd = thread_rb_node__find(th, &threads->entries.rb_root);
+
+ if (threads->last_match && RC_CHK_ACCESS(threads->last_match) == RC_CHK_ACCESS(th))
+ threads__set_last_match(threads, NULL);
+
+ if (lock)
+ down_write(&threads->lock);
+
+ BUG_ON(refcount_read(thread__refcnt(th)) == 0);
+
+ thread__put(nd->thread);
+ rb_erase_cached(&nd->rb_node, &threads->entries);
+ RB_CLEAR_NODE(&nd->rb_node);
+ --threads->nr;
+
+ free(nd);
+
+ if (lock)
+ up_write(&threads->lock);
+}
+
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+ return __machine__remove_thread(machine, NULL, th, true);
+}
+
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__find_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+ struct thread *parent = machine__findnew_thread(machine,
+ event->fork.ppid,
+ event->fork.ptid);
+ bool do_maps_clone = true;
+ int err = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_task(event, stdout);
+
+ /*
+ * There may be an existing thread that is not actually the parent,
+ * either because we are processing events out of order, or because the
+ * (fork) event that would have removed the thread was lost. Assume the
+ * latter case and continue on as best we can.
+ */
+ if (thread__pid(parent) != (pid_t)event->fork.ppid) {
+ dump_printf("removing erroneous parent thread %d/%d\n",
+ thread__pid(parent), thread__tid(parent));
+ machine__remove_thread(machine, parent);
+ thread__put(parent);
+ parent = machine__findnew_thread(machine, event->fork.ppid,
+ event->fork.ptid);
+ }
+
+ /* if a thread currently exists for the thread id remove it */
+ if (thread != NULL) {
+ machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
+
+ thread = machine__findnew_thread(machine, event->fork.pid,
+ event->fork.tid);
+ /*
+ * When synthesizing FORK events, we are trying to create thread
+ * objects for the already running tasks on the machine.
+ *
+ * Normally, for a kernel FORK event, we want to clone the parent's
+ * maps because that is what the kernel just did.
+ *
+ * But when synthesizing, this should not be done. If we do, we end up
+ * with overlapping maps as we process the synthesized MMAP2 events that
+ * get delivered shortly thereafter.
+ *
+ * Use the FORK event misc flags in an internal way to signal this
+ * situation, so we can elide the map clone when appropriate.
+ */
+ if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+ do_maps_clone = false;
+
+ if (thread == NULL || parent == NULL ||
+ thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
+ dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
+ err = -1;
+ }
+ thread__put(thread);
+ thread__put(parent);
+
+ return err;
+}
+
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct thread *thread = machine__find_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+
+ if (dump_trace)
+ perf_event__fprintf_task(event, stdout);
+
+ if (thread != NULL)
+ thread__put(thread);
+
+ return 0;
+}
+
+int machine__process_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ int ret;
+
+ switch (event->header.type) {
+ case PERF_RECORD_COMM:
+ ret = machine__process_comm_event(machine, event, sample); break;
+ case PERF_RECORD_MMAP:
+ ret = machine__process_mmap_event(machine, event, sample); break;
+ case PERF_RECORD_NAMESPACES:
+ ret = machine__process_namespaces_event(machine, event, sample); break;
+ case PERF_RECORD_CGROUP:
+ ret = machine__process_cgroup_event(machine, event, sample); break;
+ case PERF_RECORD_MMAP2:
+ ret = machine__process_mmap2_event(machine, event, sample); break;
+ case PERF_RECORD_FORK:
+ ret = machine__process_fork_event(machine, event, sample); break;
+ case PERF_RECORD_EXIT:
+ ret = machine__process_exit_event(machine, event, sample); break;
+ case PERF_RECORD_LOST:
+ ret = machine__process_lost_event(machine, event, sample); break;
+ case PERF_RECORD_AUX:
+ ret = machine__process_aux_event(machine, event); break;
+ case PERF_RECORD_ITRACE_START:
+ ret = machine__process_itrace_start_event(machine, event); break;
+ case PERF_RECORD_LOST_SAMPLES:
+ ret = machine__process_lost_samples_event(machine, event, sample); break;
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ ret = machine__process_switch_event(machine, event); break;
+ case PERF_RECORD_KSYMBOL:
+ ret = machine__process_ksymbol(machine, event, sample); break;
+ case PERF_RECORD_BPF_EVENT:
+ ret = machine__process_bpf(machine, event, sample); break;
+ case PERF_RECORD_TEXT_POKE:
+ ret = machine__process_text_poke(machine, event, sample); break;
+ case PERF_RECORD_AUX_OUTPUT_HW_ID:
+ ret = machine__process_aux_output_hw_id_event(machine, event); break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
+{
+ if (!regexec(regex, sym->name, 0, NULL, 0))
+ return true;
+ return false;
+}
+
+static void ip__resolve_ams(struct thread *thread,
+ struct addr_map_symbol *ams,
+ u64 ip)
+{
+ struct addr_location al;
+
+ addr_location__init(&al);
+ /*
+ * We cannot use the header.misc hint to determine whether a
+ * branch stack address is user, kernel, guest, hypervisor.
+ * Branches may straddle the kernel/user/hypervisor boundaries.
+ * Thus, we have to try consecutively until we find a match
+ * or else, the symbol is unknown
+ */
+ thread__find_cpumode_addr_location(thread, ip, &al);
+
+ ams->addr = ip;
+ ams->al_addr = al.addr;
+ ams->al_level = al.level;
+ ams->ms.maps = maps__get(al.maps);
+ ams->ms.sym = al.sym;
+ ams->ms.map = map__get(al.map);
+ ams->phys_addr = 0;
+ ams->data_page_size = 0;
+ addr_location__exit(&al);
+}
+
+static void ip__resolve_data(struct thread *thread,
+ u8 m, struct addr_map_symbol *ams,
+ u64 addr, u64 phys_addr, u64 daddr_page_size)
+{
+ struct addr_location al;
+
+ addr_location__init(&al);
+
+ thread__find_symbol(thread, m, addr, &al);
+
+ ams->addr = addr;
+ ams->al_addr = al.addr;
+ ams->al_level = al.level;
+ ams->ms.maps = maps__get(al.maps);
+ ams->ms.sym = al.sym;
+ ams->ms.map = map__get(al.map);
+ ams->phys_addr = phys_addr;
+ ams->data_page_size = daddr_page_size;
+ addr_location__exit(&al);
+}
+
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+ struct addr_location *al)
+{
+ struct mem_info *mi = mem_info__new();
+
+ if (!mi)
+ return NULL;
+
+ ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
+ ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
+ sample->addr, sample->phys_addr,
+ sample->data_page_size);
+ mi->data_src.val = sample->data_src;
+
+ return mi;
+}
+
+static char *callchain_srcline(struct map_symbol *ms, u64 ip)
+{
+ struct map *map = ms->map;
+ char *srcline = NULL;
+ struct dso *dso;
+
+ if (!map || callchain_param.key == CCKEY_FUNCTION)
+ return srcline;
+
+ dso = map__dso(map);
+ srcline = srcline__tree_find(&dso->srclines, ip);
+ if (!srcline) {
+ bool show_sym = false;
+ bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+
+ srcline = get_srcline(dso, map__rip_2objdump(map, ip),
+ ms->sym, show_sym, show_addr, ip);
+ srcline__tree_insert(&dso->srclines, ip, srcline);
+ }
+
+ return srcline;
+}
+
+struct iterations {
+ int nr_loop_iter;
+ u64 cycles;
+};
+
+static int add_callchain_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode,
+ u64 ip,
+ bool branch,
+ struct branch_flags *flags,
+ struct iterations *iter,
+ u64 branch_from)
+{
+ struct map_symbol ms = {};
+ struct addr_location al;
+ int nr_loop_iter = 0, err = 0;
+ u64 iter_cycles = 0;
+ const char *srcline = NULL;
+
+ addr_location__init(&al);
+ al.filtered = 0;
+ al.sym = NULL;
+ al.srcline = NULL;
+ if (!cpumode) {
+ thread__find_cpumode_addr_location(thread, ip, &al);
+ } else {
+ if (ip >= PERF_CONTEXT_MAX) {
+ switch (ip) {
+ case PERF_CONTEXT_HV:
+ *cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ *cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ *cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: "
+ "%"PRId64"\n", (s64) ip);
+ /*
+ * It seems the callchain is corrupted.
+ * Discard all.
+ */
+ callchain_cursor_reset(cursor);
+ err = 1;
+ goto out;
+ }
+ goto out;
+ }
+ thread__find_symbol(thread, *cpumode, ip, &al);
+ }
+
+ if (al.sym != NULL) {
+ if (perf_hpp_list.parent && !*parent &&
+ symbol__match_regex(al.sym, &parent_regex))
+ *parent = al.sym;
+ else if (have_ignore_callees && root_al &&
+ symbol__match_regex(al.sym, &ignore_callees_regex)) {
+ /* Treat this symbol as the root,
+ forgetting its callees. */
+ addr_location__copy(root_al, &al);
+ callchain_cursor_reset(cursor);
+ }
+ }
+
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
+ goto out;
+
+ if (iter) {
+ nr_loop_iter = iter->nr_loop_iter;
+ iter_cycles = iter->cycles;
+ }
+
+ ms.maps = maps__get(al.maps);
+ ms.map = map__get(al.map);
+ ms.sym = al.sym;
+ srcline = callchain_srcline(&ms, al.addr);
+ err = callchain_cursor_append(cursor, ip, &ms,
+ branch, flags, nr_loop_iter,
+ iter_cycles, branch_from, srcline);
+out:
+ addr_location__exit(&al);
+ maps__put(ms.maps);
+ map__put(ms.map);
+ return err;
+}
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+ struct addr_location *al)
+{
+ unsigned int i;
+ const struct branch_stack *bs = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
+
+ if (!bi)
+ return NULL;
+
+ for (i = 0; i < bs->nr; i++) {
+ ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
+ ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
+ bi[i].flags = entries[i].flags;
+ }
+ return bi;
+}
+
+static void save_iterations(struct iterations *iter,
+ struct branch_entry *be, int nr)
+{
+ int i;
+
+ iter->nr_loop_iter++;
+ iter->cycles = 0;
+
+ for (i = 0; i < nr; i++)
+ iter->cycles += be[i].flags.cycles;
+}
+
+#define CHASHSZ 127
+#define CHASHBITS 7
+#define NO_ENTRY 0xff
+
+#define PERF_MAX_BRANCH_DEPTH 127
+
+/* Remove loops. */
+static int remove_loops(struct branch_entry *l, int nr,
+ struct iterations *iter)
+{
+ int i, j, off;
+ unsigned char chash[CHASHSZ];
+
+ memset(chash, NO_ENTRY, sizeof(chash));
+
+ BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
+
+ for (i = 0; i < nr; i++) {
+ int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
+
+ /* no collision handling for now */
+ if (chash[h] == NO_ENTRY) {
+ chash[h] = i;
+ } else if (l[chash[h]].from == l[i].from) {
+ bool is_loop = true;
+ /* check if it is a real loop */
+ off = 0;
+ for (j = chash[h]; j < i && i + off < nr; j++, off++)
+ if (l[j].from != l[i + off].from) {
+ is_loop = false;
+ break;
+ }
+ if (is_loop) {
+ j = nr - (i + off);
+ if (j > 0) {
+ save_iterations(iter + i + off,
+ l + i, off);
+
+ memmove(iter + i, iter + i + off,
+ j * sizeof(*iter));
+
+ memmove(l + i, l + i + off,
+ j * sizeof(*l));
+ }
+
+ nr -= off;
+ }
+ }
+ }
+ return nr;
+}
+
+static int lbr_callchain_add_kernel_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 branch_from,
+ bool callee, int end)
+{
+ struct ip_callchain *chain = sample->callchain;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int err, i;
+
+ if (callee) {
+ for (i = 0; i < end + 1; i++) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ for (i = end; i >= 0; i--) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void save_lbr_cursor_node(struct thread *thread,
+ struct callchain_cursor *cursor,
+ int idx)
+{
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
+
+ if (!lbr_stitch)
+ return;
+
+ if (cursor->pos == cursor->nr) {
+ lbr_stitch->prev_lbr_cursor[idx].valid = false;
+ return;
+ }
+
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
+ sizeof(struct callchain_cursor_node));
+
+ lbr_stitch->prev_lbr_cursor[idx].valid = true;
+ cursor->pos++;
+}
+
+static int lbr_callchain_add_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 *branch_from,
+ bool callee)
+{
+ struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int lbr_nr = lbr_stack->nr;
+ struct branch_flags *flags;
+ int err, i;
+ u64 ip;
+
+ /*
+ * The curr and pos are not used in writing session. They are cleared
+ * in callchain_cursor_commit() when the writing session is closed.
+ * Using curr and pos to track the current cursor node.
+ */
+ if (thread__lbr_stitch(thread)) {
+ cursor->curr = NULL;
+ cursor->pos = cursor->nr;
+ if (cursor->nr) {
+ cursor->curr = cursor->first;
+ for (i = 0; i < (int)(cursor->nr - 1); i++)
+ cursor->curr = cursor->curr->next;
+ }
+ }
+
+ if (callee) {
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+
+ /*
+ * The number of cursor node increases.
+ * Move the current cursor node.
+ * But does not need to save current cursor node for entry 0.
+ * It's impossible to stitch the whole LBRs of previous sample.
+ */
+ if (thread__lbr_stitch(thread) && (cursor->pos != cursor->nr)) {
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = 0; i < lbr_nr; i++) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+ return 0;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = lbr_nr - 1; i >= 0; i--) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+
+ if (lbr_nr > 0) {
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor)
+{
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
+ struct callchain_cursor_node *cnode;
+ struct stitch_list *stitch_node;
+ int err;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+ cnode = &stitch_node->cursor;
+
+ err = callchain_cursor_append(cursor, cnode->ip,
+ &cnode->ms,
+ cnode->branch,
+ &cnode->branch_flags,
+ cnode->nr_loop_iter,
+ cnode->iter_cycles,
+ cnode->branch_from,
+ cnode->srcline);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
+ struct stitch_list *stitch_node;
+
+ if (!list_empty(&lbr_stitch->free_lists)) {
+ stitch_node = list_first_entry(&lbr_stitch->free_lists,
+ struct stitch_list, node);
+ list_del(&stitch_node->node);
+
+ return stitch_node;
+ }
+
+ return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+ struct perf_sample *cur,
+ struct perf_sample *prev,
+ unsigned int max_lbr,
+ bool callee)
+{
+ struct branch_stack *cur_stack = cur->branch_stack;
+ struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+ struct branch_stack *prev_stack = prev->branch_stack;
+ struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
+ int i, j, nr_identical_branches = 0;
+ struct stitch_list *stitch_node;
+ u64 cur_base, distance;
+
+ if (!cur_stack || !prev_stack)
+ return false;
+
+ /* Find the physical index of the base-of-stack for current sample. */
+ cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+ distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+ (max_lbr + prev_stack->hw_idx - cur_base);
+ /* Previous sample has shorter stack. Nothing can be stitched. */
+ if (distance + 1 > prev_stack->nr)
+ return false;
+
+ /*
+ * Check if there are identical LBRs between two samples.
+ * Identical LBRs must have same from, to and flags values. Also,
+ * they have to be saved in the same LBR registers (same physical
+ * index).
+ *
+ * Starts from the base-of-stack of current sample.
+ */
+ for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+ if ((prev_entries[i].from != cur_entries[j].from) ||
+ (prev_entries[i].to != cur_entries[j].to) ||
+ (prev_entries[i].flags.value != cur_entries[j].flags.value))
+ break;
+ nr_identical_branches++;
+ }
+
+ if (!nr_identical_branches)
+ return false;
+
+ /*
+ * Save the LBRs between the base-of-stack of previous sample
+ * and the base-of-stack of current sample into lbr_stitch->lists.
+ * These LBRs will be stitched later.
+ */
+ for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+ if (!lbr_stitch->prev_lbr_cursor[i].valid)
+ continue;
+
+ stitch_node = get_stitch_node(thread);
+ if (!stitch_node)
+ return false;
+
+ memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+ sizeof(struct callchain_cursor_node));
+
+ if (callee)
+ list_add(&stitch_node->node, &lbr_stitch->lists);
+ else
+ list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+ }
+
+ return true;
+}
+
+static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
+{
+ if (thread__lbr_stitch(thread))
+ return true;
+
+ thread__set_lbr_stitch(thread, zalloc(sizeof(struct lbr_stitch)));
+ if (!thread__lbr_stitch(thread))
+ goto err;
+
+ thread__lbr_stitch(thread)->prev_lbr_cursor =
+ calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+ if (!thread__lbr_stitch(thread)->prev_lbr_cursor)
+ goto free_lbr_stitch;
+
+ INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists);
+ INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists);
+
+ return true;
+
+free_lbr_stitch:
+ free(thread__lbr_stitch(thread));
+ thread__set_lbr_stitch(thread, NULL);
+err:
+ pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
+ thread__set_lbr_stitch_enable(thread, false);
+ return false;
+}
+
+/*
+ * Resolve LBR callstack chain sample
+ * Return:
+ * 1 on success get LBR callchain information
+ * 0 no available LBR callchain information, should try fp
+ * negative error code on other errors.
+ */
+static int resolve_lbr_callchain_sample(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack,
+ unsigned int max_lbr)
+{
+ bool callee = (callchain_param.order == ORDER_CALLEE);
+ struct ip_callchain *chain = sample->callchain;
+ int chain_nr = min(max_stack, (int)chain->nr), i;
+ struct lbr_stitch *lbr_stitch;
+ bool stitched_lbr = false;
+ u64 branch_from = 0;
+ int err;
+
+ for (i = 0; i < chain_nr; i++) {
+ if (chain->ips[i] == PERF_CONTEXT_USER)
+ break;
+ }
+
+ /* LBR only affects the user callchain */
+ if (i == chain_nr)
+ return 0;
+
+ if (thread__lbr_stitch_enable(thread) && !sample->no_hw_idx &&
+ (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
+ lbr_stitch = thread__lbr_stitch(thread);
+
+ stitched_lbr = has_stitched_lbr(thread, sample,
+ &lbr_stitch->prev_sample,
+ max_lbr, callee);
+
+ if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+ list_replace_init(&lbr_stitch->lists,
+ &lbr_stitch->free_lists);
+ }
+ memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
+ }
+
+ if (callee) {
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ true, i);
+ if (err)
+ goto error;
+
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, true);
+ if (err)
+ goto error;
+
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+ if (err)
+ goto error;
+ }
+
+ } else {
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+ if (err)
+ goto error;
+ }
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, false);
+ if (err)
+ goto error;
+
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ false, i);
+ if (err)
+ goto error;
+ }
+ return 1;
+
+error:
+ return (err < 0) ? err : 0;
+}
+
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode, int ent)
+{
+ int err = 0;
+
+ while (--ent >= 0) {
+ u64 ip = chain->ips[ent];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, cpumode, ip,
+ false, NULL, NULL, 0);
+ break;
+ }
+ }
+ return err;
+}
+
+static u64 get_leaf_frame_caller(struct perf_sample *sample,
+ struct thread *thread, int usr_idx)
+{
+ if (machine__normalized_is(maps__machine(thread__maps(thread)), "arm64"))
+ return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
+ else
+ return 0;
+}
+
+static int thread__resolve_callchain_sample(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ struct branch_stack *branch = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ struct ip_callchain *chain = sample->callchain;
+ int chain_nr = 0;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int i, j, err, nr_entries, usr_idx;
+ int skip_idx = -1;
+ int first_call = 0;
+ u64 leaf_frame_caller;
+
+ if (chain)
+ chain_nr = chain->nr;
+
+ if (evsel__has_branch_callstack(evsel)) {
+ struct perf_env *env = evsel__env(evsel);
+
+ err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
+ root_al, max_stack,
+ !env ? 0 : env->max_branches);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+
+ /*
+ * Based on DWARF debug information, some architectures skip
+ * a callchain entry saved by the kernel.
+ */
+ skip_idx = arch_skip_callchain_idx(thread, chain);
+
+ /*
+ * Add branches to call stack for easier browsing. This gives
+ * more context for a sample than just the callers.
+ *
+ * This uses individual histograms of paths compared to the
+ * aggregated histograms the normal LBR mode uses.
+ *
+ * Limitations for now:
+ * - No extra filters
+ * - No annotations (should annotate somehow)
+ */
+
+ if (branch && callchain_param.branch_callstack) {
+ int nr = min(max_stack, (int)branch->nr);
+ struct branch_entry be[nr];
+ struct iterations iter[nr];
+
+ if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
+ pr_warning("corrupted branch chain. skipping...\n");
+ goto check_calls;
+ }
+
+ for (i = 0; i < nr; i++) {
+ if (callchain_param.order == ORDER_CALLEE) {
+ be[i] = entries[i];
+
+ if (chain == NULL)
+ continue;
+
+ /*
+ * Check for overlap into the callchain.
+ * The return address is one off compared to
+ * the branch entry. To adjust for this
+ * assume the calling instruction is not longer
+ * than 8 bytes.
+ */
+ if (i == skip_idx ||
+ chain->ips[first_call] >= PERF_CONTEXT_MAX)
+ first_call++;
+ else if (be[i].from < chain->ips[first_call] &&
+ be[i].from >= chain->ips[first_call] - 8)
+ first_call++;
+ } else
+ be[i] = entries[branch->nr - i - 1];
+ }
+
+ memset(iter, 0, sizeof(struct iterations) * nr);
+ nr = remove_loops(be, nr, iter);
+
+ for (i = 0; i < nr; i++) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al,
+ NULL, be[i].to,
+ true, &be[i].flags,
+ NULL, be[i].from);
+
+ if (!err)
+ err = add_callchain_ip(thread, cursor, parent, root_al,
+ NULL, be[i].from,
+ true, &be[i].flags,
+ &iter[i], 0);
+ if (err == -EINVAL)
+ break;
+ if (err)
+ return err;
+ }
+
+ if (chain_nr == 0)
+ return 0;
+
+ chain_nr -= nr;
+ }
+
+check_calls:
+ if (chain && callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+ &cpumode, chain->nr - first_call);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ for (i = first_call, nr_entries = 0;
+ i < chain_nr && nr_entries < max_stack; i++) {
+ u64 ip;
+
+ if (callchain_param.order == ORDER_CALLEE)
+ j = i;
+ else
+ j = chain->nr - i - 1;
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+ if (j == skip_idx)
+ continue;
+#endif
+ ip = chain->ips[j];
+ if (ip < PERF_CONTEXT_MAX)
+ ++nr_entries;
+ else if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent,
+ root_al, &cpumode, j);
+ if (err)
+ return (err < 0) ? err : 0;
+ continue;
+ }
+
+ /*
+ * PERF_CONTEXT_USER allows us to locate where the user stack ends.
+ * Depending on callchain_param.order and the position of PERF_CONTEXT_USER,
+ * the index will be different in order to add the missing frame
+ * at the right place.
+ */
+
+ usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1;
+
+ if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) {
+
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx);
+
+ /*
+ * check if leaf_frame_Caller != ip to not add the same
+ * value twice.
+ */
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ false, NULL, NULL, 0);
+
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+
+ return 0;
+}
+
+static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip)
+{
+ struct symbol *sym = ms->sym;
+ struct map *map = ms->map;
+ struct inline_node *inline_node;
+ struct inline_list *ilist;
+ struct dso *dso;
+ u64 addr;
+ int ret = 1;
+ struct map_symbol ilist_ms;
+
+ if (!symbol_conf.inline_name || !map || !sym)
+ return ret;
+
+ addr = map__dso_map_ip(map, ip);
+ addr = map__rip_2objdump(map, addr);
+ dso = map__dso(map);
+
+ inline_node = inlines__tree_find(&dso->inlined_nodes, addr);
+ if (!inline_node) {
+ inline_node = dso__parse_addr_inlines(dso, addr, sym);
+ if (!inline_node)
+ return ret;
+ inlines__tree_insert(&dso->inlined_nodes, inline_node);
+ }
+
+ ilist_ms = (struct map_symbol) {
+ .maps = maps__get(ms->maps),
+ .map = map__get(map),
+ };
+ list_for_each_entry(ilist, &inline_node->val, list) {
+ ilist_ms.sym = ilist->symbol;
+ ret = callchain_cursor_append(cursor, ip, &ilist_ms, false,
+ NULL, 0, 0, 0, ilist->srcline);
+
+ if (ret != 0)
+ return ret;
+ }
+ map__put(ilist_ms.map);
+ maps__put(ilist_ms.maps);
+
+ return ret;
+}
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+ struct callchain_cursor *cursor = arg;
+ const char *srcline = NULL;
+ u64 addr = entry->ip;
+
+ if (symbol_conf.hide_unresolved && entry->ms.sym == NULL)
+ return 0;
+
+ if (append_inlines(cursor, &entry->ms, entry->ip) == 0)
+ return 0;
+
+ /*
+ * Convert entry->ip from a virtual address to an offset in
+ * its corresponding binary.
+ */
+ if (entry->ms.map)
+ addr = map__dso_map_ip(entry->ms.map, entry->ip);
+
+ srcline = callchain_srcline(&entry->ms, addr);
+ return callchain_cursor_append(cursor, entry->ip, &entry->ms,
+ false, NULL, 0, 0, 0, srcline);
+}
+
+static int thread__resolve_callchain_unwind(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ int max_stack)
+{
+ /* Can we do dwarf post unwind? */
+ if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_STACK_USER)))
+ return 0;
+
+ /* Bail out if nothing was captured. */
+ if ((!sample->user_regs.regs) ||
+ (!sample->user_stack.size))
+ return 0;
+
+ return unwind__get_entries(unwind_entry, cursor,
+ thread, sample, max_stack, false);
+}
+
+int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ int ret = 0;
+
+ if (cursor == NULL)
+ return -ENOMEM;
+
+ callchain_cursor_reset(cursor);
+
+ if (callchain_param.order == ORDER_CALLEE) {
+ ret = thread__resolve_callchain_sample(thread, cursor,
+ evsel, sample,
+ parent, root_al,
+ max_stack);
+ if (ret)
+ return ret;
+ ret = thread__resolve_callchain_unwind(thread, cursor,
+ evsel, sample,
+ max_stack);
+ } else {
+ ret = thread__resolve_callchain_unwind(thread, cursor,
+ evsel, sample,
+ max_stack);
+ if (ret)
+ return ret;
+ ret = thread__resolve_callchain_sample(thread, cursor,
+ evsel, sample,
+ parent, root_al,
+ max_stack);
+ }
+
+ return ret;
+}
+
+int machine__for_each_thread(struct machine *machine,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv)
+{
+ struct threads *threads;
+ struct rb_node *nd;
+ int rc = 0;
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ threads = &machine->threads[i];
+ for (nd = rb_first_cached(&threads->entries); nd;
+ nd = rb_next(nd)) {
+ struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
+
+ rc = fn(trb->thread, priv);
+ if (rc != 0)
+ return rc;
+ }
+ }
+ return rc;
+}
+
+int machines__for_each_thread(struct machines *machines,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv)
+{
+ struct rb_node *nd;
+ int rc = 0;
+
+ rc = machine__for_each_thread(&machines->host, fn, priv);
+ if (rc != 0)
+ return rc;
+
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+ rc = machine__for_each_thread(machine, fn, priv);
+ if (rc != 0)
+ return rc;
+ }
+ return rc;
+}
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu)
+{
+ if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz)
+ return -1;
+
+ return machine->current_tid[cpu];
+}
+
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+ pid_t tid)
+{
+ struct thread *thread;
+ const pid_t init_val = -1;
+
+ if (cpu < 0)
+ return -EINVAL;
+
+ if (realloc_array_as_needed(machine->current_tid,
+ machine->current_tid_sz,
+ (unsigned int)cpu,
+ &init_val))
+ return -ENOMEM;
+
+ machine->current_tid[cpu] = tid;
+
+ thread = machine__findnew_thread(machine, pid, tid);
+ if (!thread)
+ return -ENOMEM;
+
+ thread__set_cpu(thread, cpu);
+ thread__put(thread);
+
+ return 0;
+}
+
+/*
+ * Compares the raw arch string. N.B. see instead perf_env__arch() or
+ * machine__normalized_is() if a normalized arch is needed.
+ */
+bool machine__is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
+}
+
+bool machine__normalized_is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__arch(machine->env), arch);
+}
+
+int machine__nr_cpus_avail(struct machine *machine)
+{
+ return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
+}
+
+int machine__get_kernel_start(struct machine *machine)
+{
+ struct map *map = machine__kernel_map(machine);
+ int err = 0;
+
+ /*
+ * The only addresses above 2^63 are kernel addresses of a 64-bit
+ * kernel. Note that addresses are unsigned so that on a 32-bit system
+ * all addresses including kernel addresses are less than 2^32. In
+ * that case (32-bit system), if the kernel mapping is unknown, all
+ * addresses will be assumed to be in user space - see
+ * machine__kernel_ip().
+ */
+ machine->kernel_start = 1ULL << 63;
+ if (map) {
+ err = map__load(map);
+ /*
+ * On x86_64, PTI entry trampolines are less than the
+ * start of kernel text, but still above 2^63. So leave
+ * kernel_start = 1ULL << 63 for x86_64.
+ */
+ if (!err && !machine__is(machine, "x86_64"))
+ machine->kernel_start = map__start(map);
+ }
+ return err;
+}
+
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
+{
+ u8 addr_cpumode = cpumode;
+ bool kernel_ip;
+
+ if (!machine->single_address_space)
+ goto out;
+
+ kernel_ip = machine__kernel_ip(machine, addr);
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ case PERF_RECORD_MISC_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ case PERF_RECORD_MISC_GUEST_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+ break;
+ default:
+ break;
+ }
+out:
+ return addr_cpumode;
+}
+
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id)
+{
+ return dsos__findnew_id(&machine->dsos, filename, id);
+}
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
+{
+ return machine__findnew_dso_id(machine, filename, NULL);
+}
+
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+ struct machine *machine = vmachine;
+ struct map *map;
+ struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
+
+ if (sym == NULL)
+ return NULL;
+
+ *modp = __map__is_kmodule(map) ? (char *)map__dso(map)->short_name : NULL;
+ *addrp = map__unmap_ip(map, sym->start);
+ return sym->name;
+}
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv)
+{
+ struct dso *pos;
+ int err = 0;
+
+ list_for_each_entry(pos, &machine->dsos.head, node) {
+ if (fn(pos, machine, priv))
+ err = -1;
+ }
+ return err;
+}
+
+int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv)
+{
+ struct maps *maps = machine__kernel_maps(machine);
+ struct map_rb_node *pos;
+ int err = 0;
+
+ maps__for_each_entry(maps, pos) {
+ err = fn(pos->map, priv);
+ if (err != 0) {
+ break;
+ }
+ }
+ return err;
+}
+
+bool machine__is_lock_function(struct machine *machine, u64 addr)
+{
+ if (!machine->sched.text_start) {
+ struct map *kmap;
+ struct symbol *sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_start", &kmap);
+
+ if (!sym) {
+ /* to avoid retry */
+ machine->sched.text_start = 1;
+ return false;
+ }
+
+ machine->sched.text_start = map__unmap_ip(kmap, sym->start);
+
+ /* should not fail from here */
+ sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap);
+ machine->sched.text_end = map__unmap_ip(kmap, sym->start);
+
+ sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_start", &kmap);
+ machine->lock.text_start = map__unmap_ip(kmap, sym->start);
+
+ sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_end", &kmap);
+ machine->lock.text_end = map__unmap_ip(kmap, sym->start);
+ }
+
+ /* failed to get kernel symbols */
+ if (machine->sched.text_start == 1)
+ return false;
+
+ /* mutex and rwsem functions are in sched text section */
+ if (machine->sched.text_start <= addr && addr < machine->sched.text_end)
+ return true;
+
+ /* spinlock functions are in lock text section */
+ if (machine->lock.text_start <= addr && addr < machine->lock.text_end)
+ return true;
+
+ return false;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
new file mode 100644
index 000000000..d034ecaf8
--- /dev/null
+++ b/tools/perf/util/machine.h
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MACHINE_H
+#define __PERF_MACHINE_H
+
+#include <sys/types.h>
+#include <linux/rbtree.h>
+#include "maps.h"
+#include "dsos.h"
+#include "rwsem.h"
+
+struct addr_location;
+struct branch_stack;
+struct dso;
+struct dso_id;
+struct evsel;
+struct perf_sample;
+struct symbol;
+struct target;
+struct thread;
+union perf_event;
+struct machines;
+
+/* Native host kernel uses -1 as pid index in machine */
+#define HOST_KERNEL_ID (-1)
+#define DEFAULT_GUEST_KERNEL_ID (0)
+
+extern const char *ref_reloc_sym_names[];
+
+struct vdso_info;
+
+#define THREADS__TABLE_BITS 8
+#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)
+
+struct threads {
+ struct rb_root_cached entries;
+ struct rw_semaphore lock;
+ unsigned int nr;
+ struct list_head dead;
+ struct thread *last_match;
+};
+
+struct machine {
+ struct rb_node rb_node;
+ pid_t pid;
+ u16 id_hdr_size;
+ bool comm_exec;
+ bool kptr_restrict_warned;
+ bool single_address_space;
+ char *root_dir;
+ char *mmap_name;
+ char *kallsyms_filename;
+ struct threads threads[THREADS__TABLE_SIZE];
+ struct vdso_info *vdso_info;
+ struct perf_env *env;
+ struct dsos dsos;
+ struct maps *kmaps;
+ struct map *vmlinux_map;
+ u64 kernel_start;
+ struct {
+ u64 text_start;
+ u64 text_end;
+ } sched, lock;
+ pid_t *current_tid;
+ size_t current_tid_sz;
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+ struct machines *machines;
+ bool trampolines_mapped;
+};
+
+static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
+{
+ /* Cast it to handle tid == -1 */
+ return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
+/*
+ * The main kernel (vmlinux) map
+ */
+static inline
+struct map *machine__kernel_map(struct machine *machine)
+{
+ return machine->vmlinux_map;
+}
+
+/*
+ * kernel (the one returned by machine__kernel_map()) plus kernel modules maps
+ */
+static inline
+struct maps *machine__kernel_maps(struct machine *machine)
+{
+ return machine->kmaps;
+}
+
+int machine__get_kernel_start(struct machine *machine);
+
+static inline u64 machine__kernel_start(struct machine *machine)
+{
+ if (!machine->kernel_start)
+ machine__get_kernel_start(machine);
+ return machine->kernel_start;
+}
+
+static inline bool machine__kernel_ip(struct machine *machine, u64 ip)
+{
+ u64 kernel_start = machine__kernel_start(machine);
+
+ return ip >= kernel_start;
+}
+
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr);
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+ pid_t tid);
+struct thread *machine__idle_thread(struct machine *machine);
+struct comm *machine__thread_exec_comm(struct machine *machine,
+ struct thread *thread);
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_lost_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_aux_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_itrace_start_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_aux_output_hw_id_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_switch_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_namespaces_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_cgroup_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_ksymbol(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_text_poke(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+
+typedef void (*machine__process_t)(struct machine *machine, void *data);
+
+struct machines {
+ struct machine host;
+ struct rb_root_cached guests;
+};
+
+void machines__init(struct machines *machines);
+void machines__exit(struct machines *machines);
+
+void machines__process_guests(struct machines *machines,
+ machine__process_t process, void *data);
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+ const char *root_dir);
+struct machine *machines__find(struct machines *machines, pid_t pid);
+struct machine *machines__findnew(struct machines *machines, pid_t pid);
+struct machine *machines__find_guest(struct machines *machines, pid_t pid);
+struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid);
+struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid);
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
+void machines__set_comm_exec(struct machines *machines, bool comm_exec);
+
+struct machine *machine__new_host(void);
+struct machine *machine__new_kallsyms(void);
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
+void machine__exit(struct machine *machine);
+void machine__delete_threads(struct machine *machine);
+void machine__delete(struct machine *machine);
+void machine__remove_thread(struct machine *machine, struct thread *th);
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+ struct addr_location *al);
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+ struct addr_location *al);
+
+struct callchain_cursor;
+
+int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack);
+
+/*
+ * Default guest kernel is defined by parameter --guestkallsyms
+ * and --guestmodules
+ */
+static inline bool machine__is_default_guest(struct machine *machine)
+{
+ return machine ? machine->pid == DEFAULT_GUEST_KERNEL_ID : false;
+}
+
+static inline bool machine__is_host(struct machine *machine)
+{
+ return machine ? machine->pid == HOST_KERNEL_ID : false;
+}
+
+bool machine__is_lock_function(struct machine *machine, u64 addr);
+bool machine__is(struct machine *machine, const char *arch);
+bool machine__normalized_is(struct machine *machine, const char *arch);
+int machine__nr_cpus_avail(struct machine *machine);
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id);
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
+
+size_t machine__fprintf(struct machine *machine, FILE *fp);
+
+static inline
+struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
+ struct map **mapp)
+{
+ return maps__find_symbol(machine->kmaps, addr, mapp);
+}
+
+static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+ const char *name,
+ struct map **mapp)
+{
+ return maps__find_symbol_by_name(machine->kmaps, name, mapp);
+}
+
+int arch__fix_module_text_start(u64 *start, u64 *size, const char *name);
+
+int machine__load_kallsyms(struct machine *machine, const char *filename);
+
+int machine__load_vmlinux_path(struct machine *machine);
+
+size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm);
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp);
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm);
+
+void machine__destroy_kernel_maps(struct machine *machine);
+int machine__create_kernel_maps(struct machine *machine);
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid);
+int machines__create_guest_kernel_maps(struct machines *machines);
+void machines__destroy_kernel_maps(struct machines *machines);
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
+
+typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv);
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn,
+ void *priv);
+
+typedef int (*machine__map_t)(struct map *map, void *priv);
+int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn,
+ void *priv);
+
+int machine__for_each_thread(struct machine *machine,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv);
+int machines__for_each_thread(struct machines *machines,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv);
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu);
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+ pid_t tid);
+/*
+ * For use with libtraceevent's tep_set_function_resolver()
+ */
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
+
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+ size_t bufsz);
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+ struct dso *kernel);
+
+/* Kernel-space maps for symbols that are outside the main kernel map and module maps */
+struct extra_kernel_map {
+ u64 start;
+ u64 end;
+ u64 pgoff;
+ char name[KMAP_NAME_LEN];
+};
+
+int machine__create_extra_kernel_map(struct machine *machine,
+ struct dso *kernel,
+ struct extra_kernel_map *xm);
+
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+ struct dso *kernel);
+
+int machine__resolve(struct machine *machine, struct addr_location *al,
+ struct perf_sample *sample);
+
+#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
new file mode 100644
index 000000000..f64b83004
--- /dev/null
+++ b/tools/perf/util/map.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "namespaces.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "thread.h"
+#include "vdso.h"
+
+static inline int is_android_lib(const char *filename)
+{
+ return strstarts(filename, "/data/app-lib/") ||
+ strstarts(filename, "/system/lib/");
+}
+
+static inline bool replace_android_lib(const char *filename, char *newfilename)
+{
+ const char *libname;
+ char *app_abi;
+ size_t app_abi_length, new_length;
+ size_t lib_length = 0;
+
+ libname = strrchr(filename, '/');
+ if (libname)
+ lib_length = strlen(libname);
+
+ app_abi = getenv("APP_ABI");
+ if (!app_abi)
+ return false;
+
+ app_abi_length = strlen(app_abi);
+
+ if (strstarts(filename, "/data/app-lib/")) {
+ char *apk_path;
+
+ if (!app_abi_length)
+ return false;
+
+ new_length = 7 + app_abi_length + lib_length;
+
+ apk_path = getenv("APK_PATH");
+ if (apk_path) {
+ new_length += strlen(apk_path) + 1;
+ if (new_length > PATH_MAX)
+ return false;
+ snprintf(newfilename, new_length,
+ "%s/libs/%s/%s", apk_path, app_abi, libname);
+ } else {
+ if (new_length > PATH_MAX)
+ return false;
+ snprintf(newfilename, new_length,
+ "libs/%s/%s", app_abi, libname);
+ }
+ return true;
+ }
+
+ if (strstarts(filename, "/system/lib/")) {
+ char *ndk, *app;
+ const char *arch;
+ int ndk_length, app_length;
+
+ ndk = getenv("NDK_ROOT");
+ app = getenv("APP_PLATFORM");
+
+ if (!(ndk && app))
+ return false;
+
+ ndk_length = strlen(ndk);
+ app_length = strlen(app);
+
+ if (!(ndk_length && app_length && app_abi_length))
+ return false;
+
+ arch = !strncmp(app_abi, "arm", 3) ? "arm" :
+ !strncmp(app_abi, "mips", 4) ? "mips" :
+ !strncmp(app_abi, "x86", 3) ? "x86" : NULL;
+
+ if (!arch)
+ return false;
+
+ new_length = 27 + ndk_length +
+ app_length + lib_length
+ + strlen(arch);
+
+ if (new_length > PATH_MAX)
+ return false;
+ snprintf(newfilename, new_length,
+ "%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+ ndk_length, ndk, app_length, app, arch, libname);
+
+ return true;
+ }
+ return false;
+}
+
+void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
+{
+ map__set_start(map, start);
+ map__set_end(map, end);
+ map__set_pgoff(map, pgoff);
+ map__set_reloc(map, 0);
+ map__set_dso(map, dso__get(dso));
+ map__set_map_ip(map, map__dso_map_ip);
+ map__set_unmap_ip(map, map__dso_unmap_ip);
+ map__set_erange_warned(map, false);
+ refcount_set(map__refcnt(map), 1);
+}
+
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+ u64 pgoff, struct dso_id *id,
+ u32 prot, u32 flags, struct build_id *bid,
+ char *filename, struct thread *thread)
+{
+ struct map *result;
+ RC_STRUCT(map) *map;
+ struct nsinfo *nsi = NULL;
+ struct nsinfo *nnsi;
+
+ map = malloc(sizeof(*map));
+ if (ADD_RC_CHK(result, map)) {
+ char newfilename[PATH_MAX];
+ struct dso *dso, *header_bid_dso;
+ int anon, no_dso, vdso, android;
+
+ android = is_android_lib(filename);
+ anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
+ vdso = is_vdso_map(filename);
+ no_dso = is_no_dso_memory(filename);
+ map->prot = prot;
+ map->flags = flags;
+ nsi = nsinfo__get(thread__nsinfo(thread));
+
+ if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
+ snprintf(newfilename, sizeof(newfilename),
+ "/tmp/perf-%d.map", nsinfo__pid(nsi));
+ filename = newfilename;
+ }
+
+ if (android) {
+ if (replace_android_lib(filename, newfilename))
+ filename = newfilename;
+ }
+
+ if (vdso) {
+ /* The vdso maps are always on the host and not the
+ * container. Ensure that we don't use setns to look
+ * them up.
+ */
+ nnsi = nsinfo__copy(nsi);
+ if (nnsi) {
+ nsinfo__put(nsi);
+ nsinfo__clear_need_setns(nnsi);
+ nsi = nnsi;
+ }
+ pgoff = 0;
+ dso = machine__findnew_vdso(machine, thread);
+ } else
+ dso = machine__findnew_dso_id(machine, filename, id);
+
+ if (dso == NULL)
+ goto out_delete;
+
+ map__init(result, start, start + len, pgoff, dso);
+
+ if (anon || no_dso) {
+ map->map_ip = map->unmap_ip = identity__map_ip;
+
+ /*
+ * Set memory without DSO as loaded. All map__find_*
+ * functions still return NULL, and we avoid the
+ * unnecessary map__load warning.
+ */
+ if (!(prot & PROT_EXEC))
+ dso__set_loaded(dso);
+ }
+ mutex_lock(&dso->lock);
+ nsinfo__put(dso->nsinfo);
+ dso->nsinfo = nsi;
+ mutex_unlock(&dso->lock);
+
+ if (build_id__is_defined(bid)) {
+ dso__set_build_id(dso, bid);
+ } else {
+ /*
+ * If the mmap event had no build ID, search for an existing dso from the
+ * build ID header by name. Otherwise only the dso loaded at the time of
+ * reading the header will have the build ID set and all future mmaps will
+ * have it missing.
+ */
+ down_read(&machine->dsos.lock);
+ header_bid_dso = __dsos__find(&machine->dsos, filename, false);
+ up_read(&machine->dsos.lock);
+ if (header_bid_dso && header_bid_dso->header_build_id) {
+ dso__set_build_id(dso, &header_bid_dso->bid);
+ dso->header_build_id = 1;
+ }
+ }
+ dso__put(dso);
+ }
+ return result;
+out_delete:
+ nsinfo__put(nsi);
+ RC_CHK_FREE(result);
+ return NULL;
+}
+
+/*
+ * Constructor variant for modules (where we know from /proc/modules where
+ * they are loaded) and for vmlinux, where only after we load all the
+ * symbols we'll know where it starts and ends.
+ */
+struct map *map__new2(u64 start, struct dso *dso)
+{
+ struct map *result;
+ RC_STRUCT(map) *map;
+
+ map = calloc(1, sizeof(*map) + (dso->kernel ? sizeof(struct kmap) : 0));
+ if (ADD_RC_CHK(result, map)) {
+ /*
+ * ->end will be filled after we load all the symbols
+ */
+ map__init(result, start, 0, 0, dso);
+ }
+
+ return result;
+}
+
+bool __map__is_kernel(const struct map *map)
+{
+ if (!map__dso(map)->kernel)
+ return false;
+ return machine__kernel_map(maps__machine(map__kmaps((struct map *)map))) == map;
+}
+
+bool __map__is_extra_kernel_map(const struct map *map)
+{
+ struct kmap *kmap = __map__kmap((struct map *)map);
+
+ return kmap && kmap->name[0];
+}
+
+bool __map__is_bpf_prog(const struct map *map)
+{
+ const char *name;
+ struct dso *dso = map__dso(map);
+
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return true;
+
+ /*
+ * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+ * guess the type based on name.
+ */
+ name = dso->short_name;
+ return name && (strstr(name, "bpf_prog_") == name);
+}
+
+bool __map__is_bpf_image(const struct map *map)
+{
+ const char *name;
+ struct dso *dso = map__dso(map);
+
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE)
+ return true;
+
+ /*
+ * If PERF_RECORD_KSYMBOL is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_IMAGE. In such cases, we can
+ * guess the type based on name.
+ */
+ name = dso->short_name;
+ return name && is_bpf_image(name);
+}
+
+bool __map__is_ool(const struct map *map)
+{
+ const struct dso *dso = map__dso(map);
+
+ return dso && dso->binary_type == DSO_BINARY_TYPE__OOL;
+}
+
+bool map__has_symbols(const struct map *map)
+{
+ return dso__has_symbols(map__dso(map));
+}
+
+static void map__exit(struct map *map)
+{
+ BUG_ON(refcount_read(map__refcnt(map)) != 0);
+ dso__zput(RC_CHK_ACCESS(map)->dso);
+}
+
+void map__delete(struct map *map)
+{
+ map__exit(map);
+ RC_CHK_FREE(map);
+}
+
+void map__put(struct map *map)
+{
+ if (map && refcount_dec_and_test(map__refcnt(map)))
+ map__delete(map);
+ else
+ RC_CHK_PUT(map);
+}
+
+void map__fixup_start(struct map *map)
+{
+ struct dso *dso = map__dso(map);
+ struct rb_root_cached *symbols = &dso->symbols;
+ struct rb_node *nd = rb_first_cached(symbols);
+
+ if (nd != NULL) {
+ struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+
+ map__set_start(map, sym->start);
+ }
+}
+
+void map__fixup_end(struct map *map)
+{
+ struct dso *dso = map__dso(map);
+ struct rb_root_cached *symbols = &dso->symbols;
+ struct rb_node *nd = rb_last(&symbols->rb_root);
+
+ if (nd != NULL) {
+ struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+ map__set_end(map, sym->end);
+ }
+}
+
+#define DSO__DELETED "(deleted)"
+
+int map__load(struct map *map)
+{
+ struct dso *dso = map__dso(map);
+ const char *name = dso->long_name;
+ int nr;
+
+ if (dso__loaded(dso))
+ return 0;
+
+ nr = dso__load(dso, map);
+ if (nr < 0) {
+ if (dso->has_build_id) {
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+ pr_debug("%s with build id %s not found", name, sbuild_id);
+ } else
+ pr_debug("Failed to open %s", name);
+
+ pr_debug(", continuing without symbols\n");
+ return -1;
+ } else if (nr == 0) {
+#ifdef HAVE_LIBELF_SUPPORT
+ const size_t len = strlen(name);
+ const size_t real_len = len - sizeof(DSO__DELETED);
+
+ if (len > sizeof(DSO__DELETED) &&
+ strcmp(name + real_len + 1, DSO__DELETED) == 0) {
+ pr_debug("%.*s was updated (is prelink enabled?). "
+ "Restart the long running apps that use it!\n",
+ (int)real_len, name);
+ } else {
+ pr_debug("no symbols found in %s, maybe install a debug package?\n", name);
+ }
+#endif
+ return -1;
+ }
+
+ return 0;
+}
+
+struct symbol *map__find_symbol(struct map *map, u64 addr)
+{
+ if (map__load(map) < 0)
+ return NULL;
+
+ return dso__find_symbol(map__dso(map), addr);
+}
+
+struct symbol *map__find_symbol_by_name_idx(struct map *map, const char *name, size_t *idx)
+{
+ struct dso *dso;
+
+ if (map__load(map) < 0)
+ return NULL;
+
+ dso = map__dso(map);
+ dso__sort_by_name(dso);
+
+ return dso__find_symbol_by_name(dso, name, idx);
+}
+
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
+{
+ size_t idx;
+
+ return map__find_symbol_by_name_idx(map, name, &idx);
+}
+
+struct map *map__clone(struct map *from)
+{
+ struct map *result;
+ RC_STRUCT(map) *map;
+ size_t size = sizeof(RC_STRUCT(map));
+ struct dso *dso = map__dso(from);
+
+ if (dso && dso->kernel)
+ size += sizeof(struct kmap);
+
+ map = memdup(RC_CHK_ACCESS(from), size);
+ if (ADD_RC_CHK(result, map)) {
+ refcount_set(&map->refcnt, 1);
+ map->dso = dso__get(dso);
+ }
+
+ return result;
+}
+
+size_t map__fprintf(struct map *map, FILE *fp)
+{
+ const struct dso *dso = map__dso(map);
+
+ return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
+ map__start(map), map__end(map), map__pgoff(map), dso->name);
+}
+
+static bool prefer_dso_long_name(const struct dso *dso, bool print_off)
+{
+ return dso->long_name &&
+ (symbol_conf.show_kernel_path ||
+ (print_off && (dso->name[0] == '[' || dso__is_kcore(dso))));
+}
+
+static size_t __map__fprintf_dsoname(struct map *map, bool print_off, FILE *fp)
+{
+ char buf[symbol_conf.pad_output_len_dso + 1];
+ const char *dsoname = "[unknown]";
+ const struct dso *dso = map ? map__dso(map) : NULL;
+
+ if (dso) {
+ if (prefer_dso_long_name(dso, print_off))
+ dsoname = dso->long_name;
+ else
+ dsoname = dso->name;
+ }
+
+ if (symbol_conf.pad_output_len_dso) {
+ scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname);
+ dsoname = buf;
+ }
+
+ return fprintf(fp, "%s", dsoname);
+}
+
+size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+{
+ return __map__fprintf_dsoname(map, false, fp);
+}
+
+size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp)
+{
+ const struct dso *dso = map ? map__dso(map) : NULL;
+ int printed = 0;
+
+ if (print_off && (!dso || !dso__is_object_file(dso)))
+ print_off = false;
+ printed += fprintf(fp, " (");
+ printed += __map__fprintf_dsoname(map, print_off, fp);
+ if (print_off)
+ printed += fprintf(fp, "+0x%" PRIx64, addr);
+ printed += fprintf(fp, ")");
+
+ return printed;
+}
+
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym)
+{
+ if (map == NULL)
+ return SRCLINE_UNKNOWN;
+
+ return get_srcline(map__dso(map), map__rip_2objdump(map, addr), sym, true, true, addr);
+}
+
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+ FILE *fp)
+{
+ const struct dso *dso = map ? map__dso(map) : NULL;
+ int ret = 0;
+
+ if (dso) {
+ char *srcline = map__srcline(map, addr, NULL);
+ if (srcline != SRCLINE_UNKNOWN)
+ ret = fprintf(fp, "%s%s", prefix, srcline);
+ zfree_srcline(&srcline);
+ }
+ return ret;
+}
+
+void srccode_state_free(struct srccode_state *state)
+{
+ zfree(&state->srcfile);
+ state->line = 0;
+}
+
+/**
+ * map__rip_2objdump - convert symbol start address to objdump address.
+ * @map: memory map
+ * @rip: symbol start address
+ *
+ * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
+ * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is
+ * relative to section start.
+ *
+ * Return: Address suitable for passing to "objdump --start-address="
+ */
+u64 map__rip_2objdump(struct map *map, u64 rip)
+{
+ struct kmap *kmap = __map__kmap(map);
+ const struct dso *dso = map__dso(map);
+
+ /*
+ * vmlinux does not have program headers for PTI entry trampolines and
+ * kcore may not either. However the trampoline object code is on the
+ * main kernel map, so just use that instead.
+ */
+ if (kmap && is_entry_trampoline(kmap->name) && kmap->kmaps) {
+ struct machine *machine = maps__machine(kmap->kmaps);
+
+ if (machine) {
+ struct map *kernel_map = machine__kernel_map(machine);
+
+ if (kernel_map)
+ map = kernel_map;
+ }
+ }
+
+ if (!dso->adjust_symbols)
+ return rip;
+
+ if (dso->rel)
+ return rip - map__pgoff(map);
+
+ /*
+ * kernel modules also have DSO_TYPE_USER in dso->kernel,
+ * but all kernel modules are ET_REL, so won't get here.
+ */
+ if (dso->kernel == DSO_SPACE__USER)
+ return rip + dso->text_offset;
+
+ return map__unmap_ip(map, rip) - map__reloc(map);
+}
+
+/**
+ * map__objdump_2mem - convert objdump address to a memory address.
+ * @map: memory map
+ * @ip: objdump address
+ *
+ * Closely related to map__rip_2objdump(), this function takes an address from
+ * objdump and converts it to a memory address. Note this assumes that @map
+ * contains the address. To be sure the result is valid, check it forwards
+ * e.g. map__rip_2objdump(map__map_ip(map, map__objdump_2mem(map, ip))) == ip
+ *
+ * Return: Memory address.
+ */
+u64 map__objdump_2mem(struct map *map, u64 ip)
+{
+ const struct dso *dso = map__dso(map);
+
+ if (!dso->adjust_symbols)
+ return map__unmap_ip(map, ip);
+
+ if (dso->rel)
+ return map__unmap_ip(map, ip + map__pgoff(map));
+
+ /*
+ * kernel modules also have DSO_TYPE_USER in dso->kernel,
+ * but all kernel modules are ET_REL, so won't get here.
+ */
+ if (dso->kernel == DSO_SPACE__USER)
+ return map__unmap_ip(map, ip - dso->text_offset);
+
+ return ip + map__reloc(map);
+}
+
+bool map__contains_symbol(const struct map *map, const struct symbol *sym)
+{
+ u64 ip = map__unmap_ip(map, sym->start);
+
+ return ip >= map__start(map) && ip < map__end(map);
+}
+
+struct kmap *__map__kmap(struct map *map)
+{
+ const struct dso *dso = map__dso(map);
+
+ if (!dso || !dso->kernel)
+ return NULL;
+ return (struct kmap *)(&RC_CHK_ACCESS(map)[1]);
+}
+
+struct kmap *map__kmap(struct map *map)
+{
+ struct kmap *kmap = __map__kmap(map);
+
+ if (!kmap)
+ pr_err("Internal error: map__kmap with a non-kernel map\n");
+ return kmap;
+}
+
+struct maps *map__kmaps(struct map *map)
+{
+ struct kmap *kmap = map__kmap(map);
+
+ if (!kmap || !kmap->kmaps) {
+ pr_err("Internal error: map__kmaps with a non-kernel map\n");
+ return NULL;
+ }
+ return kmap->kmaps;
+}
+
+u64 map__dso_map_ip(const struct map *map, u64 ip)
+{
+ return ip - map__start(map) + map__pgoff(map);
+}
+
+u64 map__dso_unmap_ip(const struct map *map, u64 ip)
+{
+ return ip + map__start(map) - map__pgoff(map);
+}
+
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip)
+{
+ return ip;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
new file mode 100644
index 000000000..1b53d53ad
--- /dev/null
+++ b/tools/perf/util/map.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MAP_H
+#define __PERF_MAP_H
+
+#include <linux/refcount.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <internal/rc_check.h>
+
+struct dso;
+struct maps;
+struct machine;
+
+DECLARE_RC_STRUCT(map) {
+ u64 start;
+ u64 end;
+ bool erange_warned:1;
+ bool priv:1;
+ u32 prot;
+ u64 pgoff;
+ u64 reloc;
+
+ /* ip -> dso rip */
+ u64 (*map_ip)(const struct map *, u64);
+ /* dso rip -> ip */
+ u64 (*unmap_ip)(const struct map *, u64);
+
+ struct dso *dso;
+ refcount_t refcnt;
+ u32 flags;
+};
+
+struct kmap;
+
+struct kmap *__map__kmap(struct map *map);
+struct kmap *map__kmap(struct map *map);
+struct maps *map__kmaps(struct map *map);
+
+/* ip -> dso rip */
+u64 map__dso_map_ip(const struct map *map, u64 ip);
+/* dso rip -> ip */
+u64 map__dso_unmap_ip(const struct map *map, u64 ip);
+/* Returns ip */
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip);
+
+static inline struct dso *map__dso(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->dso;
+}
+
+static inline u64 map__map_ip(const struct map *map, u64 ip)
+{
+ return RC_CHK_ACCESS(map)->map_ip(map, ip);
+}
+
+static inline u64 map__unmap_ip(const struct map *map, u64 ip)
+{
+ return RC_CHK_ACCESS(map)->unmap_ip(map, ip);
+}
+
+static inline void *map__map_ip_ptr(struct map *map)
+{
+ return RC_CHK_ACCESS(map)->map_ip;
+}
+
+static inline void* map__unmap_ip_ptr(struct map *map)
+{
+ return RC_CHK_ACCESS(map)->unmap_ip;
+}
+
+static inline u64 map__start(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->start;
+}
+
+static inline u64 map__end(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->end;
+}
+
+static inline u64 map__pgoff(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->pgoff;
+}
+
+static inline u64 map__reloc(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->reloc;
+}
+
+static inline u32 map__flags(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->flags;
+}
+
+static inline u32 map__prot(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->prot;
+}
+
+static inline bool map__priv(const struct map *map)
+{
+ return RC_CHK_ACCESS(map)->priv;
+}
+
+static inline refcount_t *map__refcnt(struct map *map)
+{
+ return &RC_CHK_ACCESS(map)->refcnt;
+}
+
+static inline bool map__erange_warned(struct map *map)
+{
+ return RC_CHK_ACCESS(map)->erange_warned;
+}
+
+static inline size_t map__size(const struct map *map)
+{
+ return map__end(map) - map__start(map);
+}
+
+/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
+u64 map__rip_2objdump(struct map *map, u64 rip);
+
+/* objdump address -> memory address */
+u64 map__objdump_2mem(struct map *map, u64 ip);
+
+struct symbol;
+struct thread;
+
+/* map__for_each_symbol - iterate over the symbols in the given map
+ *
+ * @map: the 'struct map *' in which symbols are iterated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ * Note: caller must ensure map->dso is not NULL (map is loaded).
+ */
+#define map__for_each_symbol(map, pos, n) \
+ dso__for_each_symbol(map__dso(map), pos, n)
+
+/* map__for_each_symbol_with_name - iterate over the symbols in the given map
+ * that have the given name
+ *
+ * @map: the 'struct map *' in which symbols are iterated
+ * @sym_name: the symbol name
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @idx: the cursor index in the symbol names array
+ */
+#define __map__for_each_symbol_by_name(map, sym_name, pos, idx) \
+ for (pos = map__find_symbol_by_name_idx(map, sym_name, &idx); \
+ pos && \
+ !symbol__match_symbol_name(pos->name, sym_name, \
+ SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
+ pos = dso__next_symbol_by_name(map__dso(map), &idx))
+
+#define map__for_each_symbol_by_name(map, sym_name, pos, idx) \
+ __map__for_each_symbol_by_name(map, sym_name, (pos), idx)
+
+void map__init(struct map *map,
+ u64 start, u64 end, u64 pgoff, struct dso *dso);
+
+struct dso_id;
+struct build_id;
+
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+ u64 pgoff, struct dso_id *id, u32 prot, u32 flags,
+ struct build_id *bid, char *filename, struct thread *thread);
+struct map *map__new2(u64 start, struct dso *dso);
+void map__delete(struct map *map);
+struct map *map__clone(struct map *map);
+
+static inline struct map *map__get(struct map *map)
+{
+ struct map *result;
+
+ if (RC_CHK_GET(result, map))
+ refcount_inc(map__refcnt(map));
+
+ return result;
+}
+
+void map__put(struct map *map);
+
+static inline void __map__zput(struct map **map)
+{
+ map__put(*map);
+ *map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
+size_t map__fprintf(struct map *map, FILE *fp);
+size_t map__fprintf_dsoname(struct map *map, FILE *fp);
+size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp);
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+ FILE *fp);
+
+int map__load(struct map *map);
+struct symbol *map__find_symbol(struct map *map, u64 addr);
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
+struct symbol *map__find_symbol_by_name_idx(struct map *map, const char *name, size_t *idx);
+void map__fixup_start(struct map *map);
+void map__fixup_end(struct map *map);
+
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
+ u64 addr);
+
+bool __map__is_kernel(const struct map *map);
+bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);
+bool __map__is_bpf_image(const struct map *map);
+bool __map__is_ool(const struct map *map);
+
+static inline bool __map__is_kmodule(const struct map *map)
+{
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+ !__map__is_bpf_prog(map) && !__map__is_ool(map) &&
+ !__map__is_bpf_image(map);
+}
+
+bool map__has_symbols(const struct map *map);
+
+bool map__contains_symbol(const struct map *map, const struct symbol *sym);
+
+#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
+
+static inline bool is_entry_trampoline(const char *name)
+{
+ return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
+}
+
+static inline bool is_bpf_image(const char *name)
+{
+ return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
+ strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
+}
+
+static inline int is_anon_memory(const char *filename)
+{
+ return !strcmp(filename, "//anon") ||
+ !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+ !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
+}
+
+static inline int is_no_dso_memory(const char *filename)
+{
+ return !strncmp(filename, "[stack", 6) ||
+ !strncmp(filename, "/SYSV", 5) ||
+ !strcmp(filename, "[heap]");
+}
+
+static inline void map__set_start(struct map *map, u64 start)
+{
+ RC_CHK_ACCESS(map)->start = start;
+}
+
+static inline void map__set_end(struct map *map, u64 end)
+{
+ RC_CHK_ACCESS(map)->end = end;
+}
+
+static inline void map__set_pgoff(struct map *map, u64 pgoff)
+{
+ RC_CHK_ACCESS(map)->pgoff = pgoff;
+}
+
+static inline void map__add_pgoff(struct map *map, u64 inc)
+{
+ RC_CHK_ACCESS(map)->pgoff += inc;
+}
+
+static inline void map__set_reloc(struct map *map, u64 reloc)
+{
+ RC_CHK_ACCESS(map)->reloc = reloc;
+}
+
+static inline void map__set_priv(struct map *map, int priv)
+{
+ RC_CHK_ACCESS(map)->priv = priv;
+}
+
+static inline void map__set_erange_warned(struct map *map, bool erange_warned)
+{
+ RC_CHK_ACCESS(map)->erange_warned = erange_warned;
+}
+
+static inline void map__set_dso(struct map *map, struct dso *dso)
+{
+ RC_CHK_ACCESS(map)->dso = dso;
+}
+
+static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip))
+{
+ RC_CHK_ACCESS(map)->map_ip = map_ip;
+}
+
+static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip))
+{
+ RC_CHK_ACCESS(map)->unmap_ip = unmap_ip;
+}
+#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
new file mode 100644
index 000000000..e08817b0c
--- /dev/null
+++ b/tools/perf/util/map_symbol.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_MAP_SYMBOL
+#define __PERF_MAP_SYMBOL 1
+
+#include <linux/types.h>
+
+struct maps;
+struct map;
+struct symbol;
+
+struct map_symbol {
+ struct maps *maps;
+ struct map *map;
+ struct symbol *sym;
+};
+
+struct addr_map_symbol {
+ struct map_symbol ms;
+ u64 addr;
+ u64 al_addr;
+ char al_level;
+ u64 phys_addr;
+ u64 data_page_size;
+};
+#endif // __PERF_MAP_SYMBOL
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
new file mode 100644
index 000000000..233438c95
--- /dev/null
+++ b/tools/perf/util/maps.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "maps.h"
+#include "thread.h"
+#include "ui/ui.h"
+#include "unwind.h"
+
+static void maps__init(struct maps *maps, struct machine *machine)
+{
+ refcount_set(maps__refcnt(maps), 1);
+ init_rwsem(maps__lock(maps));
+ RC_CHK_ACCESS(maps)->entries = RB_ROOT;
+ RC_CHK_ACCESS(maps)->machine = machine;
+ RC_CHK_ACCESS(maps)->last_search_by_name = NULL;
+ RC_CHK_ACCESS(maps)->nr_maps = 0;
+ RC_CHK_ACCESS(maps)->maps_by_name = NULL;
+}
+
+static void __maps__free_maps_by_name(struct maps *maps)
+{
+ /*
+ * Free everything to try to do it from the rbtree in the next search
+ */
+ for (unsigned int i = 0; i < maps__nr_maps(maps); i++)
+ map__put(maps__maps_by_name(maps)[i]);
+
+ zfree(&RC_CHK_ACCESS(maps)->maps_by_name);
+ RC_CHK_ACCESS(maps)->nr_maps_allocated = 0;
+}
+
+static int __maps__insert(struct maps *maps, struct map *map)
+{
+ struct rb_node **p = &maps__entries(maps)->rb_node;
+ struct rb_node *parent = NULL;
+ const u64 ip = map__start(map);
+ struct map_rb_node *m, *new_rb_node;
+
+ new_rb_node = malloc(sizeof(*new_rb_node));
+ if (!new_rb_node)
+ return -ENOMEM;
+
+ RB_CLEAR_NODE(&new_rb_node->rb_node);
+ new_rb_node->map = map__get(map);
+
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct map_rb_node, rb_node);
+ if (ip < map__start(m->map))
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&new_rb_node->rb_node, parent, p);
+ rb_insert_color(&new_rb_node->rb_node, maps__entries(maps));
+ return 0;
+}
+
+int maps__insert(struct maps *maps, struct map *map)
+{
+ int err;
+ const struct dso *dso = map__dso(map);
+
+ down_write(maps__lock(maps));
+ err = __maps__insert(maps, map);
+ if (err)
+ goto out;
+
+ ++RC_CHK_ACCESS(maps)->nr_maps;
+
+ if (dso && dso->kernel) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+
+
+ /*
+ * If we already performed some search by name, then we need to add the just
+ * inserted map and resort.
+ */
+ if (maps__maps_by_name(maps)) {
+ if (maps__nr_maps(maps) > RC_CHK_ACCESS(maps)->nr_maps_allocated) {
+ int nr_allocate = maps__nr_maps(maps) * 2;
+ struct map **maps_by_name = realloc(maps__maps_by_name(maps),
+ nr_allocate * sizeof(map));
+
+ if (maps_by_name == NULL) {
+ __maps__free_maps_by_name(maps);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name;
+ RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate;
+ }
+ maps__maps_by_name(maps)[maps__nr_maps(maps) - 1] = map__get(map);
+ __maps__sort_by_name(maps);
+ }
+ out:
+ up_write(maps__lock(maps));
+ return err;
+}
+
+static void __maps__remove(struct maps *maps, struct map_rb_node *rb_node)
+{
+ rb_erase_init(&rb_node->rb_node, maps__entries(maps));
+ map__put(rb_node->map);
+ free(rb_node);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+ struct map_rb_node *rb_node;
+
+ down_write(maps__lock(maps));
+ if (RC_CHK_ACCESS(maps)->last_search_by_name == map)
+ RC_CHK_ACCESS(maps)->last_search_by_name = NULL;
+
+ rb_node = maps__find_node(maps, map);
+ assert(rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map));
+ __maps__remove(maps, rb_node);
+ if (maps__maps_by_name(maps))
+ __maps__free_maps_by_name(maps);
+ --RC_CHK_ACCESS(maps)->nr_maps;
+ up_write(maps__lock(maps));
+}
+
+static void __maps__purge(struct maps *maps)
+{
+ struct map_rb_node *pos, *next;
+
+ if (maps__maps_by_name(maps))
+ __maps__free_maps_by_name(maps);
+
+ maps__for_each_entry_safe(maps, pos, next) {
+ rb_erase_init(&pos->rb_node, maps__entries(maps));
+ map__put(pos->map);
+ free(pos);
+ }
+}
+
+static void maps__exit(struct maps *maps)
+{
+ down_write(maps__lock(maps));
+ __maps__purge(maps);
+ up_write(maps__lock(maps));
+}
+
+bool maps__empty(struct maps *maps)
+{
+ return !maps__first(maps);
+}
+
+struct maps *maps__new(struct machine *machine)
+{
+ struct maps *result;
+ RC_STRUCT(maps) *maps = zalloc(sizeof(*maps));
+
+ if (ADD_RC_CHK(result, maps))
+ maps__init(result, machine);
+
+ return result;
+}
+
+static void maps__delete(struct maps *maps)
+{
+ maps__exit(maps);
+ unwind__finish_access(maps);
+ RC_CHK_FREE(maps);
+}
+
+struct maps *maps__get(struct maps *maps)
+{
+ struct maps *result;
+
+ if (RC_CHK_GET(result, maps))
+ refcount_inc(maps__refcnt(maps));
+
+ return result;
+}
+
+void maps__put(struct maps *maps)
+{
+ if (maps && refcount_dec_and_test(maps__refcnt(maps)))
+ maps__delete(maps);
+ else
+ RC_CHK_PUT(maps);
+}
+
+struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
+{
+ struct map *map = maps__find(maps, addr);
+
+ /* Ensure map is loaded before using map->map_ip */
+ if (map != NULL && map__load(map) >= 0) {
+ if (mapp != NULL)
+ *mapp = map;
+ return map__find_symbol(map, map__map_ip(map, addr));
+ }
+
+ return NULL;
+}
+
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
+{
+ struct symbol *sym;
+ struct map_rb_node *pos;
+
+ down_read(maps__lock(maps));
+
+ maps__for_each_entry(maps, pos) {
+ sym = map__find_symbol_by_name(pos->map, name);
+
+ if (sym == NULL)
+ continue;
+ if (!map__contains_symbol(pos->map, sym)) {
+ sym = NULL;
+ continue;
+ }
+ if (mapp != NULL)
+ *mapp = pos->map;
+ goto out;
+ }
+
+ sym = NULL;
+out:
+ up_read(maps__lock(maps));
+ return sym;
+}
+
+int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
+{
+ if (ams->addr < map__start(ams->ms.map) || ams->addr >= map__end(ams->ms.map)) {
+ if (maps == NULL)
+ return -1;
+ ams->ms.map = maps__find(maps, ams->addr);
+ if (ams->ms.map == NULL)
+ return -1;
+ }
+
+ ams->al_addr = map__map_ip(ams->ms.map, ams->addr);
+ ams->ms.sym = map__find_symbol(ams->ms.map, ams->al_addr);
+
+ return ams->ms.sym ? 0 : -1;
+}
+
+size_t maps__fprintf(struct maps *maps, FILE *fp)
+{
+ size_t printed = 0;
+ struct map_rb_node *pos;
+
+ down_read(maps__lock(maps));
+
+ maps__for_each_entry(maps, pos) {
+ printed += fprintf(fp, "Map:");
+ printed += map__fprintf(pos->map, fp);
+ if (verbose > 2) {
+ printed += dso__fprintf(map__dso(pos->map), fp);
+ printed += fprintf(fp, "--\n");
+ }
+ }
+
+ up_read(maps__lock(maps));
+
+ return printed;
+}
+
+int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
+{
+ struct rb_root *root;
+ struct rb_node *next, *first;
+ int err = 0;
+
+ down_write(maps__lock(maps));
+
+ root = maps__entries(maps);
+
+ /*
+ * Find first map where end > map->start.
+ * Same as find_vma() in kernel.
+ */
+ next = root->rb_node;
+ first = NULL;
+ while (next) {
+ struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node);
+
+ if (map__end(pos->map) > map__start(map)) {
+ first = next;
+ if (map__start(pos->map) <= map__start(map))
+ break;
+ next = next->rb_left;
+ } else
+ next = next->rb_right;
+ }
+
+ next = first;
+ while (next && !err) {
+ struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ /*
+ * Stop if current map starts after map->end.
+ * Maps are ordered by start: next will not overlap for sure.
+ */
+ if (map__start(pos->map) >= map__end(map))
+ break;
+
+ if (verbose >= 2) {
+
+ if (use_browser) {
+ pr_debug("overlapping maps in %s (disable tui for more info)\n",
+ map__dso(map)->name);
+ } else {
+ fputs("overlapping maps:\n", fp);
+ map__fprintf(map, fp);
+ map__fprintf(pos->map, fp);
+ }
+ }
+
+ rb_erase_init(&pos->rb_node, root);
+ /*
+ * Now check if we need to create new maps for areas not
+ * overlapped by the new map:
+ */
+ if (map__start(map) > map__start(pos->map)) {
+ struct map *before = map__clone(pos->map);
+
+ if (before == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ map__set_end(before, map__start(map));
+ err = __maps__insert(maps, before);
+ if (err) {
+ map__put(before);
+ goto put_map;
+ }
+
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(before, fp);
+ map__put(before);
+ }
+
+ if (map__end(map) < map__end(pos->map)) {
+ struct map *after = map__clone(pos->map);
+
+ if (after == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ map__set_start(after, map__end(map));
+ map__add_pgoff(after, map__end(map) - map__start(pos->map));
+ assert(map__map_ip(pos->map, map__end(map)) ==
+ map__map_ip(after, map__end(map)));
+ err = __maps__insert(maps, after);
+ if (err) {
+ map__put(after);
+ goto put_map;
+ }
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(after, fp);
+ map__put(after);
+ }
+put_map:
+ map__put(pos->map);
+ free(pos);
+ }
+ up_write(maps__lock(maps));
+ return err;
+}
+
+/*
+ * XXX This should not really _copy_ te maps, but refcount them.
+ */
+int maps__clone(struct thread *thread, struct maps *parent)
+{
+ struct maps *maps = thread__maps(thread);
+ int err;
+ struct map_rb_node *rb_node;
+
+ down_read(maps__lock(parent));
+
+ maps__for_each_entry(parent, rb_node) {
+ struct map *new = map__clone(rb_node->map);
+
+ if (new == NULL) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ err = unwind__prepare_access(maps, new, NULL);
+ if (err)
+ goto out_unlock;
+
+ err = maps__insert(maps, new);
+ if (err)
+ goto out_unlock;
+
+ map__put(new);
+ }
+
+ err = 0;
+out_unlock:
+ up_read(maps__lock(parent));
+ return err;
+}
+
+struct map_rb_node *maps__find_node(struct maps *maps, struct map *map)
+{
+ struct map_rb_node *rb_node;
+
+ maps__for_each_entry(maps, rb_node) {
+ if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map))
+ return rb_node;
+ }
+ return NULL;
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+ struct rb_node *p;
+ struct map_rb_node *m;
+
+
+ down_read(maps__lock(maps));
+
+ p = maps__entries(maps)->rb_node;
+ while (p != NULL) {
+ m = rb_entry(p, struct map_rb_node, rb_node);
+ if (ip < map__start(m->map))
+ p = p->rb_left;
+ else if (ip >= map__end(m->map))
+ p = p->rb_right;
+ else
+ goto out;
+ }
+
+ m = NULL;
+out:
+ up_read(maps__lock(maps));
+ return m ? m->map : NULL;
+}
+
+struct map_rb_node *maps__first(struct maps *maps)
+{
+ struct rb_node *first = rb_first(maps__entries(maps));
+
+ if (first)
+ return rb_entry(first, struct map_rb_node, rb_node);
+ return NULL;
+}
+
+struct map_rb_node *map_rb_node__next(struct map_rb_node *node)
+{
+ struct rb_node *next;
+
+ if (!node)
+ return NULL;
+
+ next = rb_next(&node->rb_node);
+
+ if (!next)
+ return NULL;
+
+ return rb_entry(next, struct map_rb_node, rb_node);
+}
diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h
new file mode 100644
index 000000000..83144e064
--- /dev/null
+++ b/tools/perf/util/maps.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MAPS_H
+#define __PERF_MAPS_H
+
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include "rwsem.h"
+#include <internal/rc_check.h>
+
+struct ref_reloc_sym;
+struct machine;
+struct map;
+struct maps;
+struct thread;
+
+struct map_rb_node {
+ struct rb_node rb_node;
+ struct map *map;
+};
+
+struct map_rb_node *maps__first(struct maps *maps);
+struct map_rb_node *map_rb_node__next(struct map_rb_node *node);
+struct map_rb_node *maps__find_node(struct maps *maps, struct map *map);
+struct map *maps__find(struct maps *maps, u64 addr);
+
+#define maps__for_each_entry(maps, map) \
+ for (map = maps__first(maps); map; map = map_rb_node__next(map))
+
+#define maps__for_each_entry_safe(maps, map, next) \
+ for (map = maps__first(maps), next = map_rb_node__next(map); map; \
+ map = next, next = map_rb_node__next(map))
+
+DECLARE_RC_STRUCT(maps) {
+ struct rb_root entries;
+ struct rw_semaphore lock;
+ struct machine *machine;
+ struct map *last_search_by_name;
+ struct map **maps_by_name;
+ refcount_t refcnt;
+ unsigned int nr_maps;
+ unsigned int nr_maps_allocated;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+ void *addr_space;
+ const struct unwind_libunwind_ops *unwind_libunwind_ops;
+#endif
+};
+
+#define KMAP_NAME_LEN 256
+
+struct kmap {
+ struct ref_reloc_sym *ref_reloc_sym;
+ struct maps *kmaps;
+ char name[KMAP_NAME_LEN];
+};
+
+struct maps *maps__new(struct machine *machine);
+bool maps__empty(struct maps *maps);
+int maps__clone(struct thread *thread, struct maps *parent);
+
+struct maps *maps__get(struct maps *maps);
+void maps__put(struct maps *maps);
+
+static inline void __maps__zput(struct maps **map)
+{
+ maps__put(*map);
+ *map = NULL;
+}
+
+#define maps__zput(map) __maps__zput(&map)
+
+static inline struct rb_root *maps__entries(struct maps *maps)
+{
+ return &RC_CHK_ACCESS(maps)->entries;
+}
+
+static inline struct machine *maps__machine(struct maps *maps)
+{
+ return RC_CHK_ACCESS(maps)->machine;
+}
+
+static inline struct rw_semaphore *maps__lock(struct maps *maps)
+{
+ return &RC_CHK_ACCESS(maps)->lock;
+}
+
+static inline struct map **maps__maps_by_name(struct maps *maps)
+{
+ return RC_CHK_ACCESS(maps)->maps_by_name;
+}
+
+static inline unsigned int maps__nr_maps(const struct maps *maps)
+{
+ return RC_CHK_ACCESS(maps)->nr_maps;
+}
+
+static inline refcount_t *maps__refcnt(struct maps *maps)
+{
+ return &RC_CHK_ACCESS(maps)->refcnt;
+}
+
+#ifdef HAVE_LIBUNWIND_SUPPORT
+static inline void *maps__addr_space(struct maps *maps)
+{
+ return RC_CHK_ACCESS(maps)->addr_space;
+}
+
+static inline const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps)
+{
+ return RC_CHK_ACCESS(maps)->unwind_libunwind_ops;
+}
+#endif
+
+size_t maps__fprintf(struct maps *maps, FILE *fp);
+
+int maps__insert(struct maps *maps, struct map *map);
+void maps__remove(struct maps *maps, struct map *map);
+
+struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp);
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp);
+
+struct addr_map_symbol;
+
+int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams);
+
+int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp);
+
+struct map *maps__find_by_name(struct maps *maps, const char *name);
+
+int maps__merge_in(struct maps *kmaps, struct map *new_map);
+
+void __maps__sort_by_name(struct maps *maps);
+
+#endif // __PERF_MAPS_H
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
new file mode 100644
index 000000000..3a2e36878
--- /dev/null
+++ b/tools/perf/util/mem-events.c
@@ -0,0 +1,707 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include <linux/kernel.h>
+#include "map_symbol.h"
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+#include "pmu.h"
+#include "pmus.h"
+
+unsigned int perf_mem_events__loads_ldlat = 30;
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+ E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"),
+ E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"),
+ E(NULL, NULL, NULL),
+};
+#undef E
+
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
+struct perf_mem_event * __weak perf_mem_events__ptr(int i)
+{
+ if (i >= PERF_MEM_EVENTS__MAX)
+ return NULL;
+
+ return &perf_mem_events[i];
+}
+
+const char * __weak perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
+{
+ struct perf_mem_event *e = perf_mem_events__ptr(i);
+
+ if (!e)
+ return NULL;
+
+ if (i == PERF_MEM_EVENTS__LOAD) {
+ if (!mem_loads_name__init) {
+ mem_loads_name__init = true;
+ scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ e->name, perf_mem_events__loads_ldlat);
+ }
+ return mem_loads_name;
+ }
+
+ return e->name;
+}
+
+__weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
+{
+ return false;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+ char *tok, *saveptr = NULL;
+ bool found = false;
+ char *buf;
+ int j;
+
+ /* We need buffer that we know we can write to. */
+ buf = malloc(strlen(str) + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ strcpy(buf, str);
+
+ tok = strtok_r((char *)buf, ",", &saveptr);
+
+ while (tok) {
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ struct perf_mem_event *e = perf_mem_events__ptr(j);
+
+ if (!e->tag)
+ continue;
+
+ if (strstr(e->tag, tok))
+ e->record = found = true;
+ }
+
+ tok = strtok_r(NULL, ",", &saveptr);
+ }
+
+ free(buf);
+
+ if (found)
+ return 0;
+
+ pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+ return -1;
+}
+
+static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu,
+ struct perf_mem_event *e)
+{
+ char sysfs_name[100];
+ char path[PATH_MAX];
+ struct stat st;
+
+ scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
+ scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
+ return !stat(path, &st);
+}
+
+int perf_mem_events__init(void)
+{
+ const char *mnt = sysfs__mount();
+ bool found = false;
+ int j;
+
+ if (!mnt)
+ return -ENOENT;
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ struct perf_mem_event *e = perf_mem_events__ptr(j);
+ struct perf_pmu *pmu = NULL;
+
+ /*
+ * If the event entry isn't valid, skip initialization
+ * and "e->supported" will keep false.
+ */
+ if (!e->tag)
+ continue;
+
+ /*
+ * Scan all PMUs not just core ones, since perf mem/c2c on
+ * platforms like AMD uses IBS OP PMU which is independent
+ * of core PMU.
+ */
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ e->supported |= perf_mem_event__supported(mnt, pmu, e);
+ if (e->supported) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ return found ? 0 : -ENOENT;
+}
+
+void perf_mem_events__list(void)
+{
+ int j;
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ struct perf_mem_event *e = perf_mem_events__ptr(j);
+
+ fprintf(stderr, "%-*s%-*s%s",
+ e->tag ? 13 : 0,
+ e->tag ? : "",
+ e->tag && verbose > 0 ? 25 : 0,
+ e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "",
+ e->supported ? ": available\n" : "");
+ }
+}
+
+static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
+ int idx)
+{
+ const char *mnt = sysfs__mount();
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ if (!perf_mem_event__supported(mnt, pmu, e)) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events__name(idx, pmu->name));
+ }
+ }
+}
+
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+ char **rec_tmp, int *tmp_nr)
+{
+ const char *mnt = sysfs__mount();
+ int i = *argv_nr, k = 0;
+ struct perf_mem_event *e;
+
+ for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ e = perf_mem_events__ptr(j);
+ if (!e->record)
+ continue;
+
+ if (perf_pmus__num_mem_pmus() == 1) {
+ if (!e->supported) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events__name(j, NULL));
+ return -1;
+ }
+
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = perf_mem_events__name(j, NULL);
+ } else {
+ struct perf_pmu *pmu = NULL;
+
+ if (!e->supported) {
+ perf_mem_events__print_unsupport_hybrid(e, j);
+ return -1;
+ }
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ const char *s = perf_mem_events__name(j, pmu->name);
+
+ if (!perf_mem_event__supported(mnt, pmu, e))
+ continue;
+
+ rec_argv[i++] = "-e";
+ if (s) {
+ char *copy = strdup(s);
+ if (!copy)
+ return -1;
+
+ rec_argv[i++] = copy;
+ rec_tmp[k++] = copy;
+ }
+ }
+ }
+ }
+
+ *argv_nr = i;
+ *tmp_nr = k;
+ return 0;
+}
+
+static const char * const tlb_access[] = {
+ "N/A",
+ "HIT",
+ "MISS",
+ "L1",
+ "L2",
+ "Walker",
+ "Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ size_t l = 0, i;
+ u64 m = PERF_MEM_TLB_NA;
+ u64 hit, miss;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (mem_info)
+ m = mem_info->data_src.mem_dtlb;
+
+ hit = m & PERF_MEM_TLB_HIT;
+ miss = m & PERF_MEM_TLB_MISS;
+
+ /* already taken care of */
+ m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+ for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (l) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, tlb_access[i]);
+ }
+ if (*out == '\0')
+ l += scnprintf(out, sz - l, "N/A");
+ if (hit)
+ l += scnprintf(out + l, sz - l, " hit");
+ if (miss)
+ l += scnprintf(out + l, sz - l, " miss");
+
+ return l;
+}
+
+static const char * const mem_lvl[] = {
+ "N/A",
+ "HIT",
+ "MISS",
+ "L1",
+ "LFB/MAB",
+ "L2",
+ "L3",
+ "Local RAM",
+ "Remote RAM (1 hop)",
+ "Remote RAM (2 hops)",
+ "Remote Cache (1 hop)",
+ "Remote Cache (2 hops)",
+ "I/O",
+ "Uncached",
+};
+
+static const char * const mem_lvlnum[] = {
+ [PERF_MEM_LVLNUM_UNC] = "Uncached",
+ [PERF_MEM_LVLNUM_CXL] = "CXL",
+ [PERF_MEM_LVLNUM_IO] = "I/O",
+ [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
+ [PERF_MEM_LVLNUM_LFB] = "LFB/MAB",
+ [PERF_MEM_LVLNUM_RAM] = "RAM",
+ [PERF_MEM_LVLNUM_PMEM] = "PMEM",
+ [PERF_MEM_LVLNUM_NA] = "N/A",
+};
+
+static const char * const mem_hops[] = {
+ "N/A",
+ /*
+ * While printing, 'Remote' will be added to represent
+ * 'Remote core, same node' accesses as remote field need
+ * to be set with mem_hops field.
+ */
+ "core, same node",
+ "node, same socket",
+ "socket, same board",
+ "board",
+};
+
+static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ u64 op = PERF_MEM_LOCK_NA;
+ int l;
+
+ if (mem_info)
+ op = mem_info->data_src.mem_op;
+
+ if (op & PERF_MEM_OP_NA)
+ l = scnprintf(out, sz, "N/A");
+ else if (op & PERF_MEM_OP_LOAD)
+ l = scnprintf(out, sz, "LOAD");
+ else if (op & PERF_MEM_OP_STORE)
+ l = scnprintf(out, sz, "STORE");
+ else if (op & PERF_MEM_OP_PFETCH)
+ l = scnprintf(out, sz, "PFETCH");
+ else if (op & PERF_MEM_OP_EXEC)
+ l = scnprintf(out, sz, "EXEC");
+ else
+ l = scnprintf(out, sz, "No");
+
+ return l;
+}
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ union perf_mem_data_src data_src;
+ int printed = 0;
+ size_t l = 0;
+ size_t i;
+ int lvl;
+ char hit_miss[5] = {0};
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (!mem_info)
+ goto na;
+
+ data_src = mem_info->data_src;
+
+ if (data_src.mem_lvl & PERF_MEM_LVL_HIT)
+ memcpy(hit_miss, "hit", 3);
+ else if (data_src.mem_lvl & PERF_MEM_LVL_MISS)
+ memcpy(hit_miss, "miss", 4);
+
+ lvl = data_src.mem_lvl_num;
+ if (lvl && lvl != PERF_MEM_LVLNUM_NA) {
+ if (data_src.mem_remote) {
+ strcat(out, "Remote ");
+ l += 7;
+ }
+
+ if (data_src.mem_hops)
+ l += scnprintf(out + l, sz - l, "%s ", mem_hops[data_src.mem_hops]);
+
+ if (mem_lvlnum[lvl])
+ l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
+ else
+ l += scnprintf(out + l, sz - l, "L%d", lvl);
+
+ l += scnprintf(out + l, sz - l, " %s", hit_miss);
+ return l;
+ }
+
+ lvl = data_src.mem_lvl;
+ if (!lvl)
+ goto na;
+
+ lvl &= ~(PERF_MEM_LVL_NA | PERF_MEM_LVL_HIT | PERF_MEM_LVL_MISS);
+ if (!lvl)
+ goto na;
+
+ for (i = 0; lvl && i < ARRAY_SIZE(mem_lvl); i++, lvl >>= 1) {
+ if (!(lvl & 0x1))
+ continue;
+ if (printed++) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, mem_lvl[i]);
+ }
+
+ if (printed) {
+ l += scnprintf(out + l, sz - l, " %s", hit_miss);
+ return l;
+ }
+
+na:
+ strcat(out, "N/A");
+ return 3;
+}
+
+static const char * const snoop_access[] = {
+ "N/A",
+ "None",
+ "Hit",
+ "Miss",
+ "HitM",
+};
+
+static const char * const snoopx_access[] = {
+ "Fwd",
+ "Peer",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ size_t i, l = 0;
+ u64 m = PERF_MEM_SNOOP_NA;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (mem_info)
+ m = mem_info->data_src.mem_snoop;
+
+ for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (l) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, snoop_access[i]);
+ }
+
+ m = 0;
+ if (mem_info)
+ m = mem_info->data_src.mem_snoopx;
+
+ for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+
+ if (l) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, snoopx_access[i]);
+ }
+
+ if (*out == '\0')
+ l += scnprintf(out, sz - l, "N/A");
+
+ return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ u64 mask = PERF_MEM_LOCK_NA;
+ int l;
+
+ if (mem_info)
+ mask = mem_info->data_src.mem_lock;
+
+ if (mask & PERF_MEM_LOCK_NA)
+ l = scnprintf(out, sz, "N/A");
+ else if (mask & PERF_MEM_LOCK_LOCKED)
+ l = scnprintf(out, sz, "Yes");
+ else
+ l = scnprintf(out, sz, "No");
+
+ return l;
+}
+
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ size_t l = 0;
+ u64 mask = PERF_MEM_BLK_NA;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (mem_info)
+ mask = mem_info->data_src.mem_blk;
+
+ if (!mask || (mask & PERF_MEM_BLK_NA)) {
+ l += scnprintf(out + l, sz - l, " N/A");
+ return l;
+ }
+ if (mask & PERF_MEM_BLK_DATA)
+ l += scnprintf(out + l, sz - l, " Data");
+ if (mask & PERF_MEM_BLK_ADDR)
+ l += scnprintf(out + l, sz - l, " Addr");
+
+ return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ int i = 0;
+
+ i += scnprintf(out, sz, "|OP ");
+ i += perf_mem__op_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|LVL ");
+ i += perf_mem__lvl_scnprintf(out + i, sz, mem_info);
+ i += scnprintf(out + i, sz - i, "|SNP ");
+ i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|TLB ");
+ i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|LCK ");
+ i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|BLK ");
+ i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
+
+ return i;
+}
+
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
+{
+ union perf_mem_data_src *data_src = &mi->data_src;
+ u64 daddr = mi->daddr.addr;
+ u64 op = data_src->mem_op;
+ u64 lvl = data_src->mem_lvl;
+ u64 snoop = data_src->mem_snoop;
+ u64 snoopx = data_src->mem_snoopx;
+ u64 lock = data_src->mem_lock;
+ u64 blk = data_src->mem_blk;
+ /*
+ * Skylake might report unknown remote level via this
+ * bit, consider it when evaluating remote HITMs.
+ *
+ * Incase of power, remote field can also be used to denote cache
+ * accesses from the another core of same node. Hence, setting
+ * mrem only when HOPS is zero along with set remote field.
+ */
+ bool mrem = (data_src->mem_remote && !data_src->mem_hops);
+ int err = 0;
+
+#define HITM_INC(__f) \
+do { \
+ stats->__f++; \
+ stats->tot_hitm++; \
+} while (0)
+
+#define PEER_INC(__f) \
+do { \
+ stats->__f++; \
+ stats->tot_peer++; \
+} while (0)
+
+#define P(a, b) PERF_MEM_##a##_##b
+
+ stats->nr_entries++;
+
+ if (lock & P(LOCK, LOCKED)) stats->locks++;
+
+ if (blk & P(BLK, DATA)) stats->blk_data++;
+ if (blk & P(BLK, ADDR)) stats->blk_addr++;
+
+ if (op & P(OP, LOAD)) {
+ /* load */
+ stats->load++;
+
+ if (!daddr) {
+ stats->ld_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->ld_uncache++;
+ if (lvl & P(LVL, IO)) stats->ld_io++;
+ if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
+ if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
+ if (lvl & P(LVL, L2)) {
+ stats->ld_l2hit++;
+
+ if (snoopx & P(SNOOPX, PEER))
+ PEER_INC(lcl_peer);
+ }
+ if (lvl & P(LVL, L3 )) {
+ if (snoop & P(SNOOP, HITM))
+ HITM_INC(lcl_hitm);
+ else
+ stats->ld_llchit++;
+
+ if (snoopx & P(SNOOPX, PEER))
+ PEER_INC(lcl_peer);
+ }
+
+ if (lvl & P(LVL, LOC_RAM)) {
+ stats->lcl_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+
+ if ((lvl & P(LVL, REM_RAM1)) ||
+ (lvl & P(LVL, REM_RAM2)) ||
+ mrem) {
+ stats->rmt_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+ }
+
+ if ((lvl & P(LVL, REM_CCE1)) ||
+ (lvl & P(LVL, REM_CCE2)) ||
+ mrem) {
+ if (snoop & P(SNOOP, HIT)) {
+ stats->rmt_hit++;
+ } else if (snoop & P(SNOOP, HITM)) {
+ HITM_INC(rmt_hitm);
+ } else if (snoopx & P(SNOOPX, PEER)) {
+ stats->rmt_hit++;
+ PEER_INC(rmt_peer);
+ }
+ }
+
+ if ((lvl & P(LVL, MISS)))
+ stats->ld_miss++;
+
+ } else if (op & P(OP, STORE)) {
+ /* store */
+ stats->store++;
+
+ if (!daddr) {
+ stats->st_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->st_uncache++;
+ if (lvl & P(LVL, L1 )) stats->st_l1hit++;
+ }
+ if (lvl & P(LVL, MISS))
+ if (lvl & P(LVL, L1)) stats->st_l1miss++;
+ if (lvl & P(LVL, NA))
+ stats->st_na++;
+ } else {
+ /* unparsable data_src? */
+ stats->noparse++;
+ return -1;
+ }
+
+ if (!mi->daddr.ms.map || !mi->iaddr.ms.map) {
+ stats->nomap++;
+ return -1;
+ }
+
+#undef P
+#undef HITM_INC
+ return err;
+}
+
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
+{
+ stats->nr_entries += add->nr_entries;
+
+ stats->locks += add->locks;
+ stats->store += add->store;
+ stats->st_uncache += add->st_uncache;
+ stats->st_noadrs += add->st_noadrs;
+ stats->st_l1hit += add->st_l1hit;
+ stats->st_l1miss += add->st_l1miss;
+ stats->st_na += add->st_na;
+ stats->load += add->load;
+ stats->ld_excl += add->ld_excl;
+ stats->ld_shared += add->ld_shared;
+ stats->ld_uncache += add->ld_uncache;
+ stats->ld_io += add->ld_io;
+ stats->ld_miss += add->ld_miss;
+ stats->ld_noadrs += add->ld_noadrs;
+ stats->ld_fbhit += add->ld_fbhit;
+ stats->ld_l1hit += add->ld_l1hit;
+ stats->ld_l2hit += add->ld_l2hit;
+ stats->ld_llchit += add->ld_llchit;
+ stats->lcl_hitm += add->lcl_hitm;
+ stats->rmt_hitm += add->rmt_hitm;
+ stats->tot_hitm += add->tot_hitm;
+ stats->lcl_peer += add->lcl_peer;
+ stats->rmt_peer += add->rmt_peer;
+ stats->tot_peer += add->tot_peer;
+ stats->rmt_hit += add->rmt_hit;
+ stats->lcl_dram += add->lcl_dram;
+ stats->rmt_dram += add->rmt_dram;
+ stats->blk_data += add->blk_data;
+ stats->blk_addr += add->blk_addr;
+ stats->nomap += add->nomap;
+ stats->noparse += add->noparse;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
new file mode 100644
index 000000000..b40ad6ea9
--- /dev/null
+++ b/tools/perf/util/mem-events.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/refcount.h>
+#include <linux/perf_event.h>
+#include "stat.h"
+#include "evsel.h"
+
+struct perf_mem_event {
+ bool record;
+ bool supported;
+ const char *tag;
+ const char *name;
+ const char *sysfs_name;
+};
+
+struct mem_info {
+ struct addr_map_symbol iaddr;
+ struct addr_map_symbol daddr;
+ union perf_mem_data_src data_src;
+ refcount_t refcnt;
+};
+
+enum {
+ PERF_MEM_EVENTS__LOAD,
+ PERF_MEM_EVENTS__STORE,
+ PERF_MEM_EVENTS__LOAD_STORE,
+ PERF_MEM_EVENTS__MAX,
+};
+
+extern unsigned int perf_mem_events__loads_ldlat;
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+const char *perf_mem_events__name(int i, const char *pmu_name);
+struct perf_mem_event *perf_mem_events__ptr(int i);
+bool is_mem_loads_aux_event(struct evsel *leader);
+
+void perf_mem_events__list(void);
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+ char **rec_tmp, int *tmp_nr);
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+struct c2c_stats {
+ u32 nr_entries;
+
+ u32 locks; /* count of 'lock' transactions */
+ u32 store; /* count of all stores in trace */
+ u32 st_uncache; /* stores to uncacheable address */
+ u32 st_noadrs; /* cacheable store with no address */
+ u32 st_l1hit; /* count of stores that hit L1D */
+ u32 st_l1miss; /* count of stores that miss L1D */
+ u32 st_na; /* count of stores with memory level is not available */
+ u32 load; /* count of all loads in trace */
+ u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
+ u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
+ u32 ld_uncache; /* loads to uncacheable address */
+ u32 ld_io; /* loads to io address */
+ u32 ld_miss; /* loads miss */
+ u32 ld_noadrs; /* cacheable load with no address */
+ u32 ld_fbhit; /* count of loads hitting Fill Buffer */
+ u32 ld_l1hit; /* count of loads that hit L1D */
+ u32 ld_l2hit; /* count of loads that hit L2D */
+ u32 ld_llchit; /* count of loads that hit LLC */
+ u32 lcl_hitm; /* count of loads with local HITM */
+ u32 rmt_hitm; /* count of loads with remote HITM */
+ u32 tot_hitm; /* count of loads with local and remote HITM */
+ u32 lcl_peer; /* count of loads with local peer cache */
+ u32 rmt_peer; /* count of loads with remote peer cache */
+ u32 tot_peer; /* count of loads with local and remote peer cache */
+ u32 rmt_hit; /* count of loads with remote hit clean; */
+ u32 lcl_dram; /* count of loads miss to local DRAM */
+ u32 rmt_dram; /* count of loads miss to remote DRAM */
+ u32 blk_data; /* count of loads blocked by data */
+ u32 blk_addr; /* count of loads blocked by address conflict */
+ u32 nomap; /* count of load/stores with no phys addrs */
+ u32 noparse; /* count of unparsable data sources */
+};
+
+struct hist_entry;
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
+
+#endif /* __PERF_MEM_EVENTS_H */
diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
new file mode 100644
index 000000000..03a7d7b27
--- /dev/null
+++ b/tools/perf/util/mem2node.c
@@ -0,0 +1,139 @@
+#include <errno.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "env.h"
+#include "mem2node.h"
+
+struct phys_entry {
+ struct rb_node rb_node;
+ u64 start;
+ u64 end;
+ u64 node;
+};
+
+static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct phys_entry *e;
+
+ while (*p != NULL) {
+ parent = *p;
+ e = rb_entry(parent, struct phys_entry, rb_node);
+
+ if (entry->start < e->start)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&entry->rb_node, parent, p);
+ rb_insert_color(&entry->rb_node, root);
+}
+
+static void
+phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node)
+{
+ entry->start = start;
+ entry->end = start + bsize;
+ entry->node = node;
+ RB_CLEAR_NODE(&entry->rb_node);
+}
+
+int mem2node__init(struct mem2node *map, struct perf_env *env)
+{
+ struct memory_node *n, *nodes = &env->memory_nodes[0];
+ struct phys_entry *entries, *tmp_entries;
+ u64 bsize = env->memory_bsize;
+ int i, j = 0, max = 0;
+
+ memset(map, 0x0, sizeof(*map));
+ map->root = RB_ROOT;
+
+ for (i = 0; i < env->nr_memory_nodes; i++) {
+ n = &nodes[i];
+ max += bitmap_weight(n->set, n->size);
+ }
+
+ entries = zalloc(sizeof(*entries) * max);
+ if (!entries)
+ return -ENOMEM;
+
+ for (i = 0; i < env->nr_memory_nodes; i++) {
+ u64 bit;
+
+ n = &nodes[i];
+
+ for (bit = 0; bit < n->size; bit++) {
+ u64 start;
+
+ if (!test_bit(bit, n->set))
+ continue;
+
+ start = bit * bsize;
+
+ /*
+ * Merge nearby areas, we walk in order
+ * through the bitmap, so no need to sort.
+ */
+ if (j > 0) {
+ struct phys_entry *prev = &entries[j - 1];
+
+ if ((prev->end == start) &&
+ (prev->node == n->node)) {
+ prev->end += bsize;
+ continue;
+ }
+ }
+
+ phys_entry__init(&entries[j++], start, bsize, n->node);
+ }
+ }
+
+ /* Cut unused entries, due to merging. */
+ tmp_entries = realloc(entries, sizeof(*entries) * j);
+ if (tmp_entries ||
+ WARN_ONCE(j == 0, "No memory nodes, is CONFIG_MEMORY_HOTPLUG enabled?\n"))
+ entries = tmp_entries;
+
+ for (i = 0; i < j; i++) {
+ pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n",
+ entries[i].node, entries[i].start, entries[i].end);
+
+ phys_entry__insert(&entries[i], &map->root);
+ }
+
+ map->entries = entries;
+ return 0;
+}
+
+void mem2node__exit(struct mem2node *map)
+{
+ zfree(&map->entries);
+}
+
+int mem2node__node(struct mem2node *map, u64 addr)
+{
+ struct rb_node **p, *parent = NULL;
+ struct phys_entry *entry;
+
+ p = &map->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct phys_entry, rb_node);
+ if (addr < entry->start)
+ p = &(*p)->rb_left;
+ else if (addr >= entry->end)
+ p = &(*p)->rb_right;
+ else
+ goto out;
+ }
+
+ entry = NULL;
+out:
+ return entry ? (int) entry->node : -1;
+}
diff --git a/tools/perf/util/mem2node.h b/tools/perf/util/mem2node.h
new file mode 100644
index 000000000..8dfa2b58d
--- /dev/null
+++ b/tools/perf/util/mem2node.h
@@ -0,0 +1,20 @@
+#ifndef __MEM2NODE_H
+#define __MEM2NODE_H
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct perf_env;
+struct phys_entry;
+
+struct mem2node {
+ struct rb_root root;
+ struct phys_entry *entries;
+ int cnt;
+};
+
+int mem2node__init(struct mem2node *map, struct perf_env *env);
+void mem2node__exit(struct mem2node *map);
+int mem2node__node(struct mem2node *map, u64 addr);
+
+#endif /* __MEM2NODE_H */
diff --git a/tools/perf/util/memswap.c b/tools/perf/util/memswap.c
new file mode 100644
index 000000000..c1317e498
--- /dev/null
+++ b/tools/perf/util/memswap.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <byteswap.h>
+#include "memswap.h"
+#include <linux/types.h>
+
+void mem_bswap_32(void *src, int byte_size)
+{
+ u32 *m = src;
+ while (byte_size > 0) {
+ *m = bswap_32(*m);
+ byte_size -= sizeof(u32);
+ ++m;
+ }
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+ u64 *m = src;
+
+ while (byte_size > 0) {
+ *m = bswap_64(*m);
+ byte_size -= sizeof(u64);
+ ++m;
+ }
+}
diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h
new file mode 100644
index 000000000..2c38e8c2d
--- /dev/null
+++ b/tools/perf/util/memswap.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_MEMSWAP_H_
+#define PERF_MEMSWAP_H_
+
+#include <linux/types.h>
+
+union u64_swap {
+ u64 val64;
+ u32 val32[2];
+};
+
+void mem_bswap_64(void *src, int byte_size);
+void mem_bswap_32(void *src, int byte_size);
+
+#endif /* PERF_MEMSWAP_H_ */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
new file mode 100644
index 000000000..bb5faaa25
--- /dev/null
+++ b/tools/perf/util/metricgroup.c
@@ -0,0 +1,1864 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017, Intel Corporation.
+ */
+
+/* Manage metrics and groups of metrics from JSON files */
+
+#include "metricgroup.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "strbuf.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "print-events.h"
+#include "smt.h"
+#include "expr.h"
+#include "rblist.h"
+#include <string.h>
+#include <errno.h>
+#include "strlist.h"
+#include <assert.h>
+#include <linux/ctype.h>
+#include <linux/list_sort.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <perf/cpumap.h>
+#include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
+#include "util.h"
+#include <asm/bug.h>
+#include "cgroup.h"
+#include "util/hashmap.h"
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+ struct evsel *evsel,
+ bool create)
+{
+ struct rb_node *nd;
+ struct metric_event me = {
+ .evsel = evsel
+ };
+
+ if (!metric_events)
+ return NULL;
+
+ nd = rblist__find(metric_events, &me);
+ if (nd)
+ return container_of(nd, struct metric_event, nd);
+ if (create) {
+ rblist__add_node(metric_events, &me);
+ nd = rblist__find(metric_events, &me);
+ if (nd)
+ return container_of(nd, struct metric_event, nd);
+ }
+ return NULL;
+}
+
+static int metric_event_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct metric_event *a = container_of(rb_node,
+ struct metric_event,
+ nd);
+ const struct metric_event *b = entry;
+
+ if (a->evsel == b->evsel)
+ return 0;
+ if ((char *)a->evsel < (char *)b->evsel)
+ return -1;
+ return +1;
+}
+
+static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
+ const void *entry)
+{
+ struct metric_event *me = malloc(sizeof(struct metric_event));
+
+ if (!me)
+ return NULL;
+ memcpy(me, entry, sizeof(struct metric_event));
+ me->evsel = ((struct metric_event *)entry)->evsel;
+ me->is_default = false;
+ INIT_LIST_HEAD(&me->head);
+ return &me->nd;
+}
+
+static void metric_event_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct metric_event *me = container_of(rb_node, struct metric_event, nd);
+ struct metric_expr *expr, *tmp;
+
+ list_for_each_entry_safe(expr, tmp, &me->head, nd) {
+ zfree(&expr->metric_name);
+ zfree(&expr->metric_refs);
+ zfree(&expr->metric_events);
+ free(expr);
+ }
+
+ free(me);
+}
+
+static void metricgroup__rblist_init(struct rblist *metric_events)
+{
+ rblist__init(metric_events);
+ metric_events->node_cmp = metric_event_cmp;
+ metric_events->node_new = metric_event_new;
+ metric_events->node_delete = metric_event_delete;
+}
+
+void metricgroup__rblist_exit(struct rblist *metric_events)
+{
+ rblist__exit(metric_events);
+}
+
+/**
+ * The metric under construction. The data held here will be placed in a
+ * metric_expr.
+ */
+struct metric {
+ struct list_head nd;
+ /**
+ * The expression parse context importantly holding the IDs contained
+ * within the expression.
+ */
+ struct expr_parse_ctx *pctx;
+ const char *pmu;
+ /** The name of the metric such as "IPC". */
+ const char *metric_name;
+ /** Modifier on the metric such as "u" or NULL for none. */
+ const char *modifier;
+ /** The expression to parse, for example, "instructions/cycles". */
+ const char *metric_expr;
+ /** Optional threshold expression where zero value is green, otherwise red. */
+ const char *metric_threshold;
+ /**
+ * The "ScaleUnit" that scales and adds a unit to the metric during
+ * output.
+ */
+ const char *metric_unit;
+ /**
+ * Optional name of the metric group reported
+ * if the Default metric group is being processed.
+ */
+ const char *default_metricgroup_name;
+ /** Optional null terminated array of referenced metrics. */
+ struct metric_ref *metric_refs;
+ /**
+ * Should events of the metric be grouped?
+ */
+ bool group_events;
+ /**
+ * Parsed events for the metric. Optional as events may be taken from a
+ * different metric whose group contains all the IDs necessary for this
+ * one.
+ */
+ struct evlist *evlist;
+};
+
+static void metric__watchdog_constraint_hint(const char *name, bool foot)
+{
+ static bool violate_nmi_constraint;
+
+ if (!foot) {
+ pr_warning("Not grouping metric %s's events.\n", name);
+ violate_nmi_constraint = true;
+ return;
+ }
+
+ if (!violate_nmi_constraint)
+ return;
+
+ pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n"
+ " echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+ " perf stat ...\n"
+ " echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+static bool metric__group_events(const struct pmu_metric *pm)
+{
+ switch (pm->event_grouping) {
+ case MetricNoGroupEvents:
+ return false;
+ case MetricNoGroupEventsNmi:
+ if (!sysctl__nmi_watchdog_enabled())
+ return true;
+ metric__watchdog_constraint_hint(pm->metric_name, /*foot=*/false);
+ return false;
+ case MetricNoGroupEventsSmt:
+ return !smt_on();
+ case MetricGroupEvents:
+ default:
+ return true;
+ }
+}
+
+static void metric__free(struct metric *m)
+{
+ if (!m)
+ return;
+
+ zfree(&m->metric_refs);
+ expr__ctx_free(m->pctx);
+ zfree(&m->modifier);
+ evlist__delete(m->evlist);
+ free(m);
+}
+
+static struct metric *metric__new(const struct pmu_metric *pm,
+ const char *modifier,
+ bool metric_no_group,
+ int runtime,
+ const char *user_requested_cpu_list,
+ bool system_wide)
+{
+ struct metric *m;
+
+ m = zalloc(sizeof(*m));
+ if (!m)
+ return NULL;
+
+ m->pctx = expr__ctx_new();
+ if (!m->pctx)
+ goto out_err;
+
+ m->pmu = pm->pmu ?: "cpu";
+ m->metric_name = pm->metric_name;
+ m->default_metricgroup_name = pm->default_metricgroup_name ?: "";
+ m->modifier = NULL;
+ if (modifier) {
+ m->modifier = strdup(modifier);
+ if (!m->modifier)
+ goto out_err;
+ }
+ m->metric_expr = pm->metric_expr;
+ m->metric_threshold = pm->metric_threshold;
+ m->metric_unit = pm->unit;
+ m->pctx->sctx.user_requested_cpu_list = NULL;
+ if (user_requested_cpu_list) {
+ m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list);
+ if (!m->pctx->sctx.user_requested_cpu_list)
+ goto out_err;
+ }
+ m->pctx->sctx.runtime = runtime;
+ m->pctx->sctx.system_wide = system_wide;
+ m->group_events = !metric_no_group && metric__group_events(pm);
+ m->metric_refs = NULL;
+ m->evlist = NULL;
+
+ return m;
+out_err:
+ metric__free(m);
+ return NULL;
+}
+
+static bool contains_metric_id(struct evsel **metric_events, int num_events,
+ const char *metric_id)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (!strcmp(evsel__metric_id(metric_events[i]), metric_id))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * setup_metric_events - Find a group of events in metric_evlist that correspond
+ * to the IDs from a parsed metric expression.
+ * @pmu: The PMU for the IDs.
+ * @ids: the metric IDs to match.
+ * @metric_evlist: the list of perf events.
+ * @out_metric_events: holds the created metric events array.
+ */
+static int setup_metric_events(const char *pmu, struct hashmap *ids,
+ struct evlist *metric_evlist,
+ struct evsel ***out_metric_events)
+{
+ struct evsel **metric_events;
+ const char *metric_id;
+ struct evsel *ev;
+ size_t ids_size, matched_events, i;
+ bool all_pmus = !strcmp(pmu, "all") || perf_pmus__num_core_pmus() == 1 || !is_pmu_core(pmu);
+
+ *out_metric_events = NULL;
+ ids_size = hashmap__size(ids);
+
+ metric_events = calloc(sizeof(void *), ids_size + 1);
+ if (!metric_events)
+ return -ENOMEM;
+
+ matched_events = 0;
+ evlist__for_each_entry(metric_evlist, ev) {
+ struct expr_id_data *val_ptr;
+
+ /* Don't match events for the wrong hybrid PMU. */
+ if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) &&
+ strcmp(ev->pmu_name, pmu))
+ continue;
+ /*
+ * Check for duplicate events with the same name. For
+ * example, uncore_imc/cas_count_read/ will turn into 6
+ * events per socket on skylakex. Only the first such
+ * event is placed in metric_events.
+ */
+ metric_id = evsel__metric_id(ev);
+ if (contains_metric_id(metric_events, matched_events, metric_id))
+ continue;
+ /*
+ * Does this event belong to the parse context? For
+ * combined or shared groups, this metric may not care
+ * about this event.
+ */
+ if (hashmap__find(ids, metric_id, &val_ptr)) {
+ pr_debug("Matched metric-id %s to %s\n", metric_id, evsel__name(ev));
+ metric_events[matched_events++] = ev;
+
+ if (matched_events >= ids_size)
+ break;
+ }
+ }
+ if (matched_events < ids_size) {
+ free(metric_events);
+ return -EINVAL;
+ }
+ for (i = 0; i < ids_size; i++) {
+ ev = metric_events[i];
+ ev->collect_stat = true;
+
+ /*
+ * The metric leader points to the identically named
+ * event in metric_events.
+ */
+ ev->metric_leader = ev;
+ /*
+ * Mark two events with identical names in the same
+ * group (or globally) as being in use as uncore events
+ * may be duplicated for each pmu. Set the metric leader
+ * of such events to be the event that appears in
+ * metric_events.
+ */
+ metric_id = evsel__metric_id(ev);
+ evlist__for_each_entry_continue(metric_evlist, ev) {
+ if (!strcmp(evsel__metric_id(ev), metric_id))
+ ev->metric_leader = metric_events[i];
+ }
+ }
+ *out_metric_events = metric_events;
+ return 0;
+}
+
+static bool match_metric(const char *n, const char *list)
+{
+ int len;
+ char *m;
+
+ if (!list)
+ return false;
+ if (!strcmp(list, "all"))
+ return true;
+ if (!n)
+ return !strcasecmp(list, "No_group");
+ len = strlen(list);
+ m = strcasestr(n, list);
+ if (!m)
+ return false;
+ if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
+ (m[len] == 0 || m[len] == ';'))
+ return true;
+ return false;
+}
+
+static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric)
+{
+ const char *pm_pmu = pm->pmu ?: "cpu";
+
+ if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu))
+ return false;
+
+ return match_metric(pm->metric_group, metric) ||
+ match_metric(pm->metric_name, metric);
+}
+
+/** struct mep - RB-tree node for building printing information. */
+struct mep {
+ /** nd - RB-tree element. */
+ struct rb_node nd;
+ /** @metric_group: Owned metric group name, separated others with ';'. */
+ char *metric_group;
+ const char *metric_name;
+ const char *metric_desc;
+ const char *metric_long_desc;
+ const char *metric_expr;
+ const char *metric_threshold;
+ const char *metric_unit;
+};
+
+static int mep_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct mep *a = container_of(rb_node, struct mep, nd);
+ struct mep *b = (struct mep *)entry;
+ int ret;
+
+ ret = strcmp(a->metric_group, b->metric_group);
+ if (ret)
+ return ret;
+
+ return strcmp(a->metric_name, b->metric_name);
+}
+
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry)
+{
+ struct mep *me = malloc(sizeof(struct mep));
+
+ if (!me)
+ return NULL;
+
+ memcpy(me, entry, sizeof(struct mep));
+ return &me->nd;
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+ struct rb_node *nd)
+{
+ struct mep *me = container_of(nd, struct mep, nd);
+
+ zfree(&me->metric_group);
+ free(me);
+}
+
+static struct mep *mep_lookup(struct rblist *groups, const char *metric_group,
+ const char *metric_name)
+{
+ struct rb_node *nd;
+ struct mep me = {
+ .metric_group = strdup(metric_group),
+ .metric_name = metric_name,
+ };
+ nd = rblist__find(groups, &me);
+ if (nd) {
+ free(me.metric_group);
+ return container_of(nd, struct mep, nd);
+ }
+ rblist__add_node(groups, &me);
+ nd = rblist__find(groups, &me);
+ if (nd)
+ return container_of(nd, struct mep, nd);
+ return NULL;
+}
+
+static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm,
+ struct rblist *groups)
+{
+ const char *g;
+ char *omg, *mg;
+
+ mg = strdup(pm->metric_group ?: "No_group");
+ if (!mg)
+ return -ENOMEM;
+ omg = mg;
+ while ((g = strsep(&mg, ";")) != NULL) {
+ struct mep *me;
+
+ g = skip_spaces(g);
+ if (strlen(g))
+ me = mep_lookup(groups, g, pm->metric_name);
+ else
+ me = mep_lookup(groups, "No_group", pm->metric_name);
+
+ if (me) {
+ me->metric_desc = pm->desc;
+ me->metric_long_desc = pm->long_desc;
+ me->metric_expr = pm->metric_expr;
+ me->metric_threshold = pm->metric_threshold;
+ me->metric_unit = pm->unit;
+ }
+ }
+ free(omg);
+
+ return 0;
+}
+
+struct metricgroup_iter_data {
+ pmu_metric_iter_fn fn;
+ void *data;
+};
+
+static int metricgroup__sys_event_iter(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table,
+ void *data)
+{
+ struct metricgroup_iter_data *d = data;
+ struct perf_pmu *pmu = NULL;
+
+ if (!pm->metric_expr || !pm->compat)
+ return 0;
+
+ while ((pmu = perf_pmus__scan(pmu))) {
+
+ if (!pmu->id || strcmp(pmu->id, pm->compat))
+ continue;
+
+ return d->fn(pm, table, d->data);
+ }
+ return 0;
+}
+
+static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *vdata)
+{
+ struct rblist *groups = vdata;
+
+ return metricgroup__add_to_mep_groups(pm, groups);
+}
+
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state)
+{
+ struct rblist groups;
+ const struct pmu_metrics_table *table;
+ struct rb_node *node, *next;
+
+ rblist__init(&groups);
+ groups.node_new = mep_new;
+ groups.node_cmp = mep_cmp;
+ groups.node_delete = mep_delete;
+ table = pmu_metrics_table__find();
+ if (table) {
+ pmu_metrics_table__for_each_metric(table,
+ metricgroup__add_to_mep_groups_callback,
+ &groups);
+ }
+ {
+ struct metricgroup_iter_data data = {
+ .fn = metricgroup__add_to_mep_groups_callback,
+ .data = &groups,
+ };
+ pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data);
+ }
+
+ for (node = rb_first_cached(&groups.entries); node; node = next) {
+ struct mep *me = container_of(node, struct mep, nd);
+
+ print_cb->print_metric(print_state,
+ me->metric_group,
+ me->metric_name,
+ me->metric_desc,
+ me->metric_long_desc,
+ me->metric_expr,
+ me->metric_threshold,
+ me->metric_unit);
+ next = rb_next(node);
+ rblist__remove_node(&groups, node);
+ }
+}
+
+static const char *code_characters = ",-=@";
+
+static int encode_metric_id(struct strbuf *sb, const char *x)
+{
+ char *c;
+ int ret = 0;
+
+ for (; *x; x++) {
+ c = strchr(code_characters, *x);
+ if (c) {
+ ret = strbuf_addch(sb, '!');
+ if (ret)
+ break;
+
+ ret = strbuf_addch(sb, '0' + (c - code_characters));
+ if (ret)
+ break;
+ } else {
+ ret = strbuf_addch(sb, *x);
+ if (ret)
+ break;
+ }
+ }
+ return ret;
+}
+
+static int decode_metric_id(struct strbuf *sb, const char *x)
+{
+ const char *orig = x;
+ size_t i;
+ char c;
+ int ret;
+
+ for (; *x; x++) {
+ c = *x;
+ if (*x == '!') {
+ x++;
+ i = *x - '0';
+ if (i > strlen(code_characters)) {
+ pr_err("Bad metric-id encoding in: '%s'", orig);
+ return -1;
+ }
+ c = code_characters[i];
+ }
+ ret = strbuf_addch(sb, c);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifier)
+{
+ struct evsel *ev;
+ struct strbuf sb = STRBUF_INIT;
+ char *cur;
+ int ret = 0;
+
+ evlist__for_each_entry(perf_evlist, ev) {
+ if (!ev->metric_id)
+ continue;
+
+ ret = strbuf_setlen(&sb, 0);
+ if (ret)
+ break;
+
+ ret = decode_metric_id(&sb, ev->metric_id);
+ if (ret)
+ break;
+
+ free((char *)ev->metric_id);
+ ev->metric_id = strdup(sb.buf);
+ if (!ev->metric_id) {
+ ret = -ENOMEM;
+ break;
+ }
+ /*
+ * If the name is just the parsed event, use the metric-id to
+ * give a more friendly display version.
+ */
+ if (strstr(ev->name, "metric-id=")) {
+ bool has_slash = false;
+
+ zfree(&ev->name);
+ for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) {
+ *cur = '/';
+ has_slash = true;
+ }
+
+ if (modifier) {
+ if (!has_slash && !strchr(sb.buf, ':')) {
+ ret = strbuf_addch(&sb, ':');
+ if (ret)
+ break;
+ }
+ ret = strbuf_addstr(&sb, modifier);
+ if (ret)
+ break;
+ }
+ ev->name = strdup(sb.buf);
+ if (!ev->name) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+ }
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int metricgroup__build_event_string(struct strbuf *events,
+ const struct expr_parse_ctx *ctx,
+ const char *modifier,
+ bool group_events)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool no_group = true, has_tool_events = false;
+ bool tool_events[PERF_TOOL_MAX] = {false};
+ int ret = 0;
+
+#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
+
+ hashmap__for_each_entry(ctx->ids, cur, bkt) {
+ const char *sep, *rsep, *id = cur->pkey;
+ enum perf_tool_event ev;
+
+ pr_debug("found event %s\n", id);
+
+ /* Always move tool events outside of the group. */
+ ev = perf_tool_event__from_str(id);
+ if (ev != PERF_TOOL_NONE) {
+ has_tool_events = true;
+ tool_events[ev] = true;
+ continue;
+ }
+ /* Separate events with commas and open the group if necessary. */
+ if (no_group) {
+ if (group_events) {
+ ret = strbuf_addch(events, '{');
+ RETURN_IF_NON_ZERO(ret);
+ }
+
+ no_group = false;
+ } else {
+ ret = strbuf_addch(events, ',');
+ RETURN_IF_NON_ZERO(ret);
+ }
+ /*
+ * Encode the ID as an event string. Add a qualifier for
+ * metric_id that is the original name except with characters
+ * that parse-events can't parse replaced. For example,
+ * 'msr@tsc@' gets added as msr/tsc,metric-id=msr!3tsc!3/
+ */
+ sep = strchr(id, '@');
+ if (sep != NULL) {
+ ret = strbuf_add(events, id, sep - id);
+ RETURN_IF_NON_ZERO(ret);
+ ret = strbuf_addch(events, '/');
+ RETURN_IF_NON_ZERO(ret);
+ rsep = strrchr(sep, '@');
+ ret = strbuf_add(events, sep + 1, rsep - sep - 1);
+ RETURN_IF_NON_ZERO(ret);
+ ret = strbuf_addstr(events, ",metric-id=");
+ RETURN_IF_NON_ZERO(ret);
+ sep = rsep;
+ } else {
+ sep = strchr(id, ':');
+ if (sep != NULL) {
+ ret = strbuf_add(events, id, sep - id);
+ RETURN_IF_NON_ZERO(ret);
+ } else {
+ ret = strbuf_addstr(events, id);
+ RETURN_IF_NON_ZERO(ret);
+ }
+ ret = strbuf_addstr(events, "/metric-id=");
+ RETURN_IF_NON_ZERO(ret);
+ }
+ ret = encode_metric_id(events, id);
+ RETURN_IF_NON_ZERO(ret);
+ ret = strbuf_addstr(events, "/");
+ RETURN_IF_NON_ZERO(ret);
+
+ if (sep != NULL) {
+ ret = strbuf_addstr(events, sep + 1);
+ RETURN_IF_NON_ZERO(ret);
+ }
+ if (modifier) {
+ ret = strbuf_addstr(events, modifier);
+ RETURN_IF_NON_ZERO(ret);
+ }
+ }
+ if (!no_group && group_events) {
+ ret = strbuf_addf(events, "}:W");
+ RETURN_IF_NON_ZERO(ret);
+ }
+ if (has_tool_events) {
+ int i;
+
+ perf_tool_event__for_each_event(i) {
+ if (tool_events[i]) {
+ if (!no_group) {
+ ret = strbuf_addch(events, ',');
+ RETURN_IF_NON_ZERO(ret);
+ }
+ no_group = false;
+ ret = strbuf_addstr(events, perf_tool_event__to_str(i));
+ RETURN_IF_NON_ZERO(ret);
+ }
+ }
+ }
+
+ return ret;
+#undef RETURN_IF_NON_ZERO
+}
+
+int __weak arch_get_runtimeparam(const struct pmu_metric *pm __maybe_unused)
+{
+ return 1;
+}
+
+/*
+ * A singly linked list on the stack of the names of metrics being
+ * processed. Used to identify recursion.
+ */
+struct visited_metric {
+ const char *name;
+ const struct visited_metric *parent;
+};
+
+struct metricgroup_add_iter_data {
+ struct list_head *metric_list;
+ const char *pmu;
+ const char *metric_name;
+ const char *modifier;
+ int *ret;
+ bool *has_match;
+ bool metric_no_group;
+ bool metric_no_threshold;
+ const char *user_requested_cpu_list;
+ bool system_wide;
+ struct metric *root_metric;
+ const struct visited_metric *visited;
+ const struct pmu_metrics_table *table;
+};
+
+static bool metricgroup__find_metric(const char *pmu,
+ const char *metric,
+ const struct pmu_metrics_table *table,
+ struct pmu_metric *pm);
+
+static int add_metric(struct list_head *metric_list,
+ const struct pmu_metric *pm,
+ const char *modifier,
+ bool metric_no_group,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct metric *root_metric,
+ const struct visited_metric *visited,
+ const struct pmu_metrics_table *table);
+
+/**
+ * resolve_metric - Locate metrics within the root metric and recursively add
+ * references to them.
+ * @metric_list: The list the metric is added to.
+ * @pmu: The PMU name to resolve metrics on, or "all" for all PMUs.
+ * @modifier: if non-null event modifiers like "u".
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ * global. Grouping is the default but due to multiplexing the
+ * user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @root_metric: Metrics may reference other metrics to form a tree. In this
+ * case the root_metric holds all the IDs and a list of referenced
+ * metrics. When adding a root this argument is NULL.
+ * @visited: A singly linked list of metric names being added that is used to
+ * detect recursion.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ * architecture perf is running upon.
+ */
+static int resolve_metric(struct list_head *metric_list,
+ const char *pmu,
+ const char *modifier,
+ bool metric_no_group,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct metric *root_metric,
+ const struct visited_metric *visited,
+ const struct pmu_metrics_table *table)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ struct to_resolve {
+ /* The metric to resolve. */
+ struct pmu_metric pm;
+ /*
+ * The key in the IDs map, this may differ from in case,
+ * etc. from pm->metric_name.
+ */
+ const char *key;
+ } *pending = NULL;
+ int i, ret = 0, pending_cnt = 0;
+
+ /*
+ * Iterate all the parsed IDs and if there's a matching metric and it to
+ * the pending array.
+ */
+ hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
+ struct pmu_metric pm;
+
+ if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) {
+ pending = realloc(pending,
+ (pending_cnt + 1) * sizeof(struct to_resolve));
+ if (!pending)
+ return -ENOMEM;
+
+ memcpy(&pending[pending_cnt].pm, &pm, sizeof(pm));
+ pending[pending_cnt].key = cur->pkey;
+ pending_cnt++;
+ }
+ }
+
+ /* Remove the metric IDs from the context. */
+ for (i = 0; i < pending_cnt; i++)
+ expr__del_id(root_metric->pctx, pending[i].key);
+
+ /*
+ * Recursively add all the metrics, IDs are added to the root metric's
+ * context.
+ */
+ for (i = 0; i < pending_cnt; i++) {
+ ret = add_metric(metric_list, &pending[i].pm, modifier, metric_no_group,
+ metric_no_threshold, user_requested_cpu_list, system_wide,
+ root_metric, visited, table);
+ if (ret)
+ break;
+ }
+
+ free(pending);
+ return ret;
+}
+
+/**
+ * __add_metric - Add a metric to metric_list.
+ * @metric_list: The list the metric is added to.
+ * @pm: The pmu_metric containing the metric to be added.
+ * @modifier: if non-null event modifiers like "u".
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ * global. Grouping is the default but due to multiplexing the
+ * user may override.
+ * @metric_no_threshold: Should threshold expressions be ignored?
+ * @runtime: A special argument for the parser only known at runtime.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @root_metric: Metrics may reference other metrics to form a tree. In this
+ * case the root_metric holds all the IDs and a list of referenced
+ * metrics. When adding a root this argument is NULL.
+ * @visited: A singly linked list of metric names being added that is used to
+ * detect recursion.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ * architecture perf is running upon.
+ */
+static int __add_metric(struct list_head *metric_list,
+ const struct pmu_metric *pm,
+ const char *modifier,
+ bool metric_no_group,
+ bool metric_no_threshold,
+ int runtime,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct metric *root_metric,
+ const struct visited_metric *visited,
+ const struct pmu_metrics_table *table)
+{
+ const struct visited_metric *vm;
+ int ret;
+ bool is_root = !root_metric;
+ const char *expr;
+ struct visited_metric visited_node = {
+ .name = pm->metric_name,
+ .parent = visited,
+ };
+
+ for (vm = visited; vm; vm = vm->parent) {
+ if (!strcmp(pm->metric_name, vm->name)) {
+ pr_err("failed: recursion detected for %s\n", pm->metric_name);
+ return -1;
+ }
+ }
+
+ if (is_root) {
+ /*
+ * This metric is the root of a tree and may reference other
+ * metrics that are added recursively.
+ */
+ root_metric = metric__new(pm, modifier, metric_no_group, runtime,
+ user_requested_cpu_list, system_wide);
+ if (!root_metric)
+ return -ENOMEM;
+
+ } else {
+ int cnt = 0;
+
+ /*
+ * This metric was referenced in a metric higher in the
+ * tree. Check if the same metric is already resolved in the
+ * metric_refs list.
+ */
+ if (root_metric->metric_refs) {
+ for (; root_metric->metric_refs[cnt].metric_name; cnt++) {
+ if (!strcmp(pm->metric_name,
+ root_metric->metric_refs[cnt].metric_name))
+ return 0;
+ }
+ }
+
+ /* Create reference. Need space for the entry and the terminator. */
+ root_metric->metric_refs = realloc(root_metric->metric_refs,
+ (cnt + 2) * sizeof(struct metric_ref));
+ if (!root_metric->metric_refs)
+ return -ENOMEM;
+
+ /*
+ * Intentionally passing just const char pointers,
+ * from 'pe' object, so they never go away. We don't
+ * need to change them, so there's no need to create
+ * our own copy.
+ */
+ root_metric->metric_refs[cnt].metric_name = pm->metric_name;
+ root_metric->metric_refs[cnt].metric_expr = pm->metric_expr;
+
+ /* Null terminate array. */
+ root_metric->metric_refs[cnt+1].metric_name = NULL;
+ root_metric->metric_refs[cnt+1].metric_expr = NULL;
+ }
+
+ /*
+ * For both the parent and referenced metrics, we parse
+ * all the metric's IDs and add it to the root context.
+ */
+ ret = 0;
+ expr = pm->metric_expr;
+ if (is_root && pm->metric_threshold) {
+ /*
+ * Threshold expressions are built off the actual metric. Switch
+ * to use that in case of additional necessary events. Change
+ * the visited node name to avoid this being flagged as
+ * recursion. If the threshold events are disabled, just use the
+ * metric's name as a reference. This allows metric threshold
+ * computation if there are sufficient events.
+ */
+ assert(strstr(pm->metric_threshold, pm->metric_name));
+ expr = metric_no_threshold ? pm->metric_name : pm->metric_threshold;
+ visited_node.name = "__threshold__";
+ }
+ if (expr__find_ids(expr, NULL, root_metric->pctx) < 0) {
+ /* Broken metric. */
+ ret = -EINVAL;
+ }
+ if (!ret) {
+ /* Resolve referenced metrics. */
+ const char *pmu = pm->pmu ?: "cpu";
+
+ ret = resolve_metric(metric_list, pmu, modifier, metric_no_group,
+ metric_no_threshold, user_requested_cpu_list,
+ system_wide, root_metric, &visited_node,
+ table);
+ }
+ if (ret) {
+ if (is_root)
+ metric__free(root_metric);
+
+ } else if (is_root)
+ list_add(&root_metric->nd, metric_list);
+
+ return ret;
+}
+
+struct metricgroup__find_metric_data {
+ const char *pmu;
+ const char *metric;
+ struct pmu_metric *pm;
+};
+
+static int metricgroup__find_metric_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *vdata)
+{
+ struct metricgroup__find_metric_data *data = vdata;
+ const char *pm_pmu = pm->pmu ?: "cpu";
+
+ if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu))
+ return 0;
+
+ if (!match_metric(pm->metric_name, data->metric))
+ return 0;
+
+ memcpy(data->pm, pm, sizeof(*pm));
+ return 1;
+}
+
+static bool metricgroup__find_metric(const char *pmu,
+ const char *metric,
+ const struct pmu_metrics_table *table,
+ struct pmu_metric *pm)
+{
+ struct metricgroup__find_metric_data data = {
+ .pmu = pmu,
+ .metric = metric,
+ .pm = pm,
+ };
+
+ return pmu_metrics_table__for_each_metric(table, metricgroup__find_metric_callback, &data)
+ ? true : false;
+}
+
+static int add_metric(struct list_head *metric_list,
+ const struct pmu_metric *pm,
+ const char *modifier,
+ bool metric_no_group,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct metric *root_metric,
+ const struct visited_metric *visited,
+ const struct pmu_metrics_table *table)
+{
+ int ret = 0;
+
+ pr_debug("metric expr %s for %s\n", pm->metric_expr, pm->metric_name);
+
+ if (!strstr(pm->metric_expr, "?")) {
+ ret = __add_metric(metric_list, pm, modifier, metric_no_group,
+ metric_no_threshold, 0, user_requested_cpu_list,
+ system_wide, root_metric, visited, table);
+ } else {
+ int j, count;
+
+ count = arch_get_runtimeparam(pm);
+
+ /* This loop is added to create multiple
+ * events depend on count value and add
+ * those events to metric_list.
+ */
+
+ for (j = 0; j < count && !ret; j++)
+ ret = __add_metric(metric_list, pm, modifier, metric_no_group,
+ metric_no_threshold, j, user_requested_cpu_list,
+ system_wide, root_metric, visited, table);
+ }
+
+ return ret;
+}
+
+static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *data)
+{
+ struct metricgroup_add_iter_data *d = data;
+ int ret;
+
+ if (!match_pm_metric(pm, d->pmu, d->metric_name))
+ return 0;
+
+ ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group,
+ d->metric_no_threshold, d->user_requested_cpu_list,
+ d->system_wide, d->root_metric, d->visited, d->table);
+ if (ret)
+ goto out;
+
+ *(d->has_match) = true;
+
+out:
+ *(d->ret) = ret;
+ return ret;
+}
+
+/**
+ * metric_list_cmp - list_sort comparator that sorts metrics with more events to
+ * the front. tool events are excluded from the count.
+ */
+static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
+ const struct list_head *r)
+{
+ const struct metric *left = container_of(l, struct metric, nd);
+ const struct metric *right = container_of(r, struct metric, nd);
+ struct expr_id_data *data;
+ int i, left_count, right_count;
+
+ left_count = hashmap__size(left->pctx->ids);
+ perf_tool_event__for_each_event(i) {
+ if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data))
+ left_count--;
+ }
+
+ right_count = hashmap__size(right->pctx->ids);
+ perf_tool_event__for_each_event(i) {
+ if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data))
+ right_count--;
+ }
+
+ return right_count - left_count;
+}
+
+/**
+ * default_metricgroup_cmp - Implements complex key for the Default metricgroup
+ * that first sorts by default_metricgroup_name, then
+ * metric_name.
+ */
+static int default_metricgroup_cmp(void *priv __maybe_unused,
+ const struct list_head *l,
+ const struct list_head *r)
+{
+ const struct metric *left = container_of(l, struct metric, nd);
+ const struct metric *right = container_of(r, struct metric, nd);
+ int diff = strcmp(right->default_metricgroup_name, left->default_metricgroup_name);
+
+ if (diff)
+ return diff;
+
+ return strcmp(right->metric_name, left->metric_name);
+}
+
+struct metricgroup__add_metric_data {
+ struct list_head *list;
+ const char *pmu;
+ const char *metric_name;
+ const char *modifier;
+ const char *user_requested_cpu_list;
+ bool metric_no_group;
+ bool metric_no_threshold;
+ bool system_wide;
+ bool has_match;
+};
+
+static int metricgroup__add_metric_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table,
+ void *vdata)
+{
+ struct metricgroup__add_metric_data *data = vdata;
+ int ret = 0;
+
+ if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) {
+ bool metric_no_group = data->metric_no_group ||
+ match_metric(pm->metricgroup_no_group, data->metric_name);
+
+ data->has_match = true;
+ ret = add_metric(data->list, pm, data->modifier, metric_no_group,
+ data->metric_no_threshold, data->user_requested_cpu_list,
+ data->system_wide, /*root_metric=*/NULL,
+ /*visited_metrics=*/NULL, table);
+ }
+ return ret;
+}
+
+/**
+ * metricgroup__add_metric - Find and add a metric, or a metric group.
+ * @pmu: The PMU name to search for metrics on, or "all" for all PMUs.
+ * @metric_name: The name of the metric or metric group. For example, "IPC"
+ * could be the name of a metric and "TopDownL1" the name of a
+ * metric group.
+ * @modifier: if non-null event modifiers like "u".
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ * global. Grouping is the default but due to multiplexing the
+ * user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @metric_list: The list that the metric or metric group are added to.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ * architecture perf is running upon.
+ */
+static int metricgroup__add_metric(const char *pmu, const char *metric_name, const char *modifier,
+ bool metric_no_group, bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct list_head *metric_list,
+ const struct pmu_metrics_table *table)
+{
+ LIST_HEAD(list);
+ int ret;
+ bool has_match = false;
+
+ {
+ struct metricgroup__add_metric_data data = {
+ .list = &list,
+ .pmu = pmu,
+ .metric_name = metric_name,
+ .modifier = modifier,
+ .metric_no_group = metric_no_group,
+ .metric_no_threshold = metric_no_threshold,
+ .user_requested_cpu_list = user_requested_cpu_list,
+ .system_wide = system_wide,
+ .has_match = false,
+ };
+ /*
+ * Iterate over all metrics seeing if metric matches either the
+ * name or group. When it does add the metric to the list.
+ */
+ ret = pmu_metrics_table__for_each_metric(table, metricgroup__add_metric_callback,
+ &data);
+ if (ret)
+ goto out;
+
+ has_match = data.has_match;
+ }
+ {
+ struct metricgroup_iter_data data = {
+ .fn = metricgroup__add_metric_sys_event_iter,
+ .data = (void *) &(struct metricgroup_add_iter_data) {
+ .metric_list = &list,
+ .pmu = pmu,
+ .metric_name = metric_name,
+ .modifier = modifier,
+ .metric_no_group = metric_no_group,
+ .user_requested_cpu_list = user_requested_cpu_list,
+ .system_wide = system_wide,
+ .has_match = &has_match,
+ .ret = &ret,
+ .table = table,
+ },
+ };
+
+ pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data);
+ }
+ /* End of pmu events. */
+ if (!has_match)
+ ret = -EINVAL;
+
+out:
+ /*
+ * add to metric_list so that they can be released
+ * even if it's failed
+ */
+ list_splice(&list, metric_list);
+ return ret;
+}
+
+/**
+ * metricgroup__add_metric_list - Find and add metrics, or metric groups,
+ * specified in a list.
+ * @pmu: A pmu to restrict the metrics to, or "all" for all PMUS.
+ * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1"
+ * would match the IPC and CPI metrics, and TopDownL1 would match all
+ * the metrics in the TopDownL1 group.
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ * global. Grouping is the default but due to multiplexing the
+ * user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @metric_list: The list that metrics are added to.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ * architecture perf is running upon.
+ */
+static int metricgroup__add_metric_list(const char *pmu, const char *list,
+ bool metric_no_group,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide, struct list_head *metric_list,
+ const struct pmu_metrics_table *table)
+{
+ char *list_itr, *list_copy, *metric_name, *modifier;
+ int ret, count = 0;
+
+ list_copy = strdup(list);
+ if (!list_copy)
+ return -ENOMEM;
+ list_itr = list_copy;
+
+ while ((metric_name = strsep(&list_itr, ",")) != NULL) {
+ modifier = strchr(metric_name, ':');
+ if (modifier)
+ *modifier++ = '\0';
+
+ ret = metricgroup__add_metric(pmu, metric_name, modifier,
+ metric_no_group, metric_no_threshold,
+ user_requested_cpu_list,
+ system_wide, metric_list, table);
+ if (ret == -EINVAL)
+ pr_err("Cannot find metric or group `%s'\n", metric_name);
+
+ if (ret)
+ break;
+
+ count++;
+ }
+ free(list_copy);
+
+ if (!ret) {
+ /*
+ * Warn about nmi_watchdog if any parsed metrics had the
+ * NO_NMI_WATCHDOG constraint.
+ */
+ metric__watchdog_constraint_hint(NULL, /*foot=*/true);
+ /* No metrics. */
+ if (count == 0)
+ return -EINVAL;
+ }
+ return ret;
+}
+
+static void metricgroup__free_metrics(struct list_head *metric_list)
+{
+ struct metric *m, *tmp;
+
+ list_for_each_entry_safe (m, tmp, metric_list, nd) {
+ list_del_init(&m->nd);
+ metric__free(m);
+ }
+}
+
+/**
+ * find_tool_events - Search for the pressence of tool events in metric_list.
+ * @metric_list: List to take metrics from.
+ * @tool_events: Array of false values, indices corresponding to tool events set
+ * to true if tool event is found.
+ */
+static void find_tool_events(const struct list_head *metric_list,
+ bool tool_events[PERF_TOOL_MAX])
+{
+ struct metric *m;
+
+ list_for_each_entry(m, metric_list, nd) {
+ int i;
+
+ perf_tool_event__for_each_event(i) {
+ struct expr_id_data *data;
+
+ if (!tool_events[i] &&
+ !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data))
+ tool_events[i] = true;
+ }
+ }
+}
+
+/**
+ * build_combined_expr_ctx - Make an expr_parse_ctx with all !group_events
+ * metric IDs, as the IDs are held in a set,
+ * duplicates will be removed.
+ * @metric_list: List to take metrics from.
+ * @combined: Out argument for result.
+ */
+static int build_combined_expr_ctx(const struct list_head *metric_list,
+ struct expr_parse_ctx **combined)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ struct metric *m;
+ char *dup;
+ int ret;
+
+ *combined = expr__ctx_new();
+ if (!*combined)
+ return -ENOMEM;
+
+ list_for_each_entry(m, metric_list, nd) {
+ if (!m->group_events && !m->modifier) {
+ hashmap__for_each_entry(m->pctx->ids, cur, bkt) {
+ dup = strdup(cur->pkey);
+ if (!dup) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+ ret = expr__add_id(*combined, dup);
+ if (ret)
+ goto err_out;
+ }
+ }
+ }
+ return 0;
+err_out:
+ expr__ctx_free(*combined);
+ *combined = NULL;
+ return ret;
+}
+
+/**
+ * parse_ids - Build the event string for the ids and parse them creating an
+ * evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
+ * @fake_pmu: used when testing metrics not supported by the current CPU.
+ * @ids: the event identifiers parsed from a metric.
+ * @modifier: any modifiers added to the events.
+ * @group_events: should events be placed in a weak group.
+ * @tool_events: entries set true if the tool event of index could be present in
+ * the overall list of metrics.
+ * @out_evlist: the created list of events.
+ */
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+ struct expr_parse_ctx *ids, const char *modifier,
+ bool group_events, const bool tool_events[PERF_TOOL_MAX],
+ struct evlist **out_evlist)
+{
+ struct parse_events_error parse_error;
+ struct evlist *parsed_evlist;
+ struct strbuf events = STRBUF_INIT;
+ int ret;
+
+ *out_evlist = NULL;
+ if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
+ bool added_event = false;
+ int i;
+ /*
+ * We may fail to share events between metrics because a tool
+ * event isn't present in one metric. For example, a ratio of
+ * cache misses doesn't need duration_time but the same events
+ * may be used for a misses per second. Events without sharing
+ * implies multiplexing, that is best avoided, so place
+ * all tool events in every group.
+ *
+ * Also, there may be no ids/events in the expression parsing
+ * context because of constant evaluation, e.g.:
+ * event1 if #smt_on else 0
+ * Add a tool event to avoid a parse error on an empty string.
+ */
+ perf_tool_event__for_each_event(i) {
+ if (tool_events[i]) {
+ char *tmp = strdup(perf_tool_event__to_str(i));
+
+ if (!tmp)
+ return -ENOMEM;
+ ids__insert(ids->ids, tmp);
+ added_event = true;
+ }
+ }
+ if (!added_event && hashmap__size(ids->ids) == 0) {
+ char *tmp = strdup("duration_time");
+
+ if (!tmp)
+ return -ENOMEM;
+ ids__insert(ids->ids, tmp);
+ }
+ }
+ ret = metricgroup__build_event_string(&events, ids, modifier,
+ group_events);
+ if (ret)
+ return ret;
+
+ parsed_evlist = evlist__new();
+ if (!parsed_evlist) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+ pr_debug("Parsing metric events '%s'\n", events.buf);
+ parse_events_error__init(&parse_error);
+ ret = __parse_events(parsed_evlist, events.buf, /*pmu_filter=*/NULL,
+ &parse_error, fake_pmu, /*warn_if_reordered=*/false);
+ if (ret) {
+ parse_events_error__print(&parse_error, events.buf);
+ goto err_out;
+ }
+ ret = decode_all_metric_ids(parsed_evlist, modifier);
+ if (ret)
+ goto err_out;
+
+ *out_evlist = parsed_evlist;
+ parsed_evlist = NULL;
+err_out:
+ parse_events_error__exit(&parse_error);
+ evlist__delete(parsed_evlist);
+ strbuf_release(&events);
+ return ret;
+}
+
+static int parse_groups(struct evlist *perf_evlist,
+ const char *pmu, const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct perf_pmu *fake_pmu,
+ struct rblist *metric_events_list,
+ const struct pmu_metrics_table *table)
+{
+ struct evlist *combined_evlist = NULL;
+ LIST_HEAD(metric_list);
+ struct metric *m;
+ bool tool_events[PERF_TOOL_MAX] = {false};
+ bool is_default = !strcmp(str, "Default");
+ int ret;
+
+ if (metric_events_list->nr_entries == 0)
+ metricgroup__rblist_init(metric_events_list);
+ ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold,
+ user_requested_cpu_list,
+ system_wide, &metric_list, table);
+ if (ret)
+ goto out;
+
+ /* Sort metrics from largest to smallest. */
+ list_sort(NULL, &metric_list, metric_list_cmp);
+
+ if (!metric_no_merge) {
+ struct expr_parse_ctx *combined = NULL;
+
+ find_tool_events(&metric_list, tool_events);
+
+ ret = build_combined_expr_ctx(&metric_list, &combined);
+
+ if (!ret && combined && hashmap__size(combined->ids)) {
+ ret = parse_ids(metric_no_merge, fake_pmu, combined,
+ /*modifier=*/NULL,
+ /*group_events=*/false,
+ tool_events,
+ &combined_evlist);
+ }
+ if (combined)
+ expr__ctx_free(combined);
+
+ if (ret)
+ goto out;
+ }
+
+ if (is_default)
+ list_sort(NULL, &metric_list, default_metricgroup_cmp);
+
+ list_for_each_entry(m, &metric_list, nd) {
+ struct metric_event *me;
+ struct evsel **metric_events;
+ struct evlist *metric_evlist = NULL;
+ struct metric *n;
+ struct metric_expr *expr;
+
+ if (combined_evlist && !m->group_events) {
+ metric_evlist = combined_evlist;
+ } else if (!metric_no_merge) {
+ /*
+ * See if the IDs for this metric are a subset of an
+ * earlier metric.
+ */
+ list_for_each_entry(n, &metric_list, nd) {
+ if (m == n)
+ break;
+
+ if (n->evlist == NULL)
+ continue;
+
+ if ((!m->modifier && n->modifier) ||
+ (m->modifier && !n->modifier) ||
+ (m->modifier && n->modifier &&
+ strcmp(m->modifier, n->modifier)))
+ continue;
+
+ if ((!m->pmu && n->pmu) ||
+ (m->pmu && !n->pmu) ||
+ (m->pmu && n->pmu && strcmp(m->pmu, n->pmu)))
+ continue;
+
+ if (expr__subset_of_ids(n->pctx, m->pctx)) {
+ pr_debug("Events in '%s' fully contained within '%s'\n",
+ m->metric_name, n->metric_name);
+ metric_evlist = n->evlist;
+ break;
+ }
+
+ }
+ }
+ if (!metric_evlist) {
+ ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
+ m->group_events, tool_events, &m->evlist);
+ if (ret)
+ goto out;
+
+ metric_evlist = m->evlist;
+ }
+ ret = setup_metric_events(fake_pmu ? "all" : m->pmu, m->pctx->ids,
+ metric_evlist, &metric_events);
+ if (ret) {
+ pr_err("Cannot resolve IDs for %s: %s\n",
+ m->metric_name, m->metric_expr);
+ goto out;
+ }
+
+ me = metricgroup__lookup(metric_events_list, metric_events[0], true);
+
+ expr = malloc(sizeof(struct metric_expr));
+ if (!expr) {
+ ret = -ENOMEM;
+ free(metric_events);
+ goto out;
+ }
+
+ expr->metric_refs = m->metric_refs;
+ m->metric_refs = NULL;
+ expr->metric_expr = m->metric_expr;
+ if (m->modifier) {
+ char *tmp;
+
+ if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0)
+ expr->metric_name = NULL;
+ else
+ expr->metric_name = tmp;
+ } else
+ expr->metric_name = strdup(m->metric_name);
+
+ if (!expr->metric_name) {
+ ret = -ENOMEM;
+ free(metric_events);
+ goto out;
+ }
+ expr->metric_threshold = m->metric_threshold;
+ expr->metric_unit = m->metric_unit;
+ expr->metric_events = metric_events;
+ expr->runtime = m->pctx->sctx.runtime;
+ expr->default_metricgroup_name = m->default_metricgroup_name;
+ me->is_default = is_default;
+ list_add(&expr->nd, &me->head);
+ }
+
+
+ if (combined_evlist) {
+ evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries);
+ evlist__delete(combined_evlist);
+ }
+
+ list_for_each_entry(m, &metric_list, nd) {
+ if (m->evlist)
+ evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries);
+ }
+
+out:
+ metricgroup__free_metrics(&metric_list);
+ return ret;
+}
+
+int metricgroup__parse_groups(struct evlist *perf_evlist,
+ const char *pmu,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct rblist *metric_events)
+{
+ const struct pmu_metrics_table *table = pmu_metrics_table__find();
+
+ if (!table)
+ return -EINVAL;
+
+ return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
+ metric_no_threshold, user_requested_cpu_list, system_wide,
+ /*fake_pmu=*/NULL, metric_events, table);
+}
+
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ const struct pmu_metrics_table *table,
+ const char *str,
+ struct rblist *metric_events)
+{
+ return parse_groups(evlist, "all", str,
+ /*metric_no_group=*/false,
+ /*metric_no_merge=*/false,
+ /*metric_no_threshold=*/false,
+ /*user_requested_cpu_list=*/NULL,
+ /*system_wide=*/false,
+ &perf_pmu__fake, metric_events, table);
+}
+
+struct metricgroup__has_metric_data {
+ const char *pmu;
+ const char *metric;
+};
+static int metricgroup__has_metric_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *vdata)
+{
+ struct metricgroup__has_metric_data *data = vdata;
+
+ return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0;
+}
+
+bool metricgroup__has_metric(const char *pmu, const char *metric)
+{
+ const struct pmu_metrics_table *table = pmu_metrics_table__find();
+ struct metricgroup__has_metric_data data = {
+ .pmu = pmu,
+ .metric = metric,
+ };
+
+ if (!table)
+ return false;
+
+ return pmu_metrics_table__for_each_metric(table, metricgroup__has_metric_callback, &data)
+ ? true : false;
+}
+
+static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *data)
+{
+ unsigned int *max_level = data;
+ unsigned int level;
+ const char *p = strstr(pm->metric_group ?: "", "TopdownL");
+
+ if (!p || p[8] == '\0')
+ return 0;
+
+ level = p[8] - '0';
+ if (level > *max_level)
+ *max_level = level;
+
+ return 0;
+}
+
+unsigned int metricgroups__topdown_max_level(void)
+{
+ unsigned int max_level = 0;
+ const struct pmu_metrics_table *table = pmu_metrics_table__find();
+
+ if (!table)
+ return false;
+
+ pmu_metrics_table__for_each_metric(table, metricgroup__topdown_max_level_callback,
+ &max_level);
+ return max_level;
+}
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events)
+{
+ unsigned int i;
+
+ for (i = 0; i < rblist__nr_entries(old_metric_events); i++) {
+ struct rb_node *nd;
+ struct metric_event *old_me, *new_me;
+ struct metric_expr *old_expr, *new_expr;
+ struct evsel *evsel;
+ size_t alloc_size;
+ int idx, nr;
+
+ nd = rblist__entry(old_metric_events, i);
+ old_me = container_of(nd, struct metric_event, nd);
+
+ evsel = evlist__find_evsel(evlist, old_me->evsel->core.idx);
+ if (!evsel)
+ return -EINVAL;
+ new_me = metricgroup__lookup(new_metric_events, evsel, true);
+ if (!new_me)
+ return -ENOMEM;
+
+ pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n",
+ cgrp ? cgrp->name : "root", evsel->name, evsel->core.idx);
+
+ list_for_each_entry(old_expr, &old_me->head, nd) {
+ new_expr = malloc(sizeof(*new_expr));
+ if (!new_expr)
+ return -ENOMEM;
+
+ new_expr->metric_expr = old_expr->metric_expr;
+ new_expr->metric_threshold = old_expr->metric_threshold;
+ new_expr->metric_name = strdup(old_expr->metric_name);
+ if (!new_expr->metric_name)
+ return -ENOMEM;
+
+ new_expr->metric_unit = old_expr->metric_unit;
+ new_expr->runtime = old_expr->runtime;
+
+ if (old_expr->metric_refs) {
+ /* calculate number of metric_events */
+ for (nr = 0; old_expr->metric_refs[nr].metric_name; nr++)
+ continue;
+ alloc_size = sizeof(*new_expr->metric_refs);
+ new_expr->metric_refs = calloc(nr + 1, alloc_size);
+ if (!new_expr->metric_refs) {
+ free(new_expr);
+ return -ENOMEM;
+ }
+
+ memcpy(new_expr->metric_refs, old_expr->metric_refs,
+ nr * alloc_size);
+ } else {
+ new_expr->metric_refs = NULL;
+ }
+
+ /* calculate number of metric_events */
+ for (nr = 0; old_expr->metric_events[nr]; nr++)
+ continue;
+ alloc_size = sizeof(*new_expr->metric_events);
+ new_expr->metric_events = calloc(nr + 1, alloc_size);
+ if (!new_expr->metric_events) {
+ zfree(&new_expr->metric_refs);
+ free(new_expr);
+ return -ENOMEM;
+ }
+
+ /* copy evsel in the same position */
+ for (idx = 0; idx < nr; idx++) {
+ evsel = old_expr->metric_events[idx];
+ evsel = evlist__find_evsel(evlist, evsel->core.idx);
+ if (evsel == NULL) {
+ zfree(&new_expr->metric_events);
+ zfree(&new_expr->metric_refs);
+ free(new_expr);
+ return -EINVAL;
+ }
+ new_expr->metric_events[idx] = evsel;
+ }
+
+ list_add(&new_expr->nd, &new_me->head);
+ }
+ }
+ return 0;
+}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
new file mode 100644
index 000000000..d5325c6ec
--- /dev/null
+++ b/tools/perf/util/metricgroup.h
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef METRICGROUP_H
+#define METRICGROUP_H 1
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdbool.h>
+#include "pmu-events/pmu-events.h"
+
+struct evlist;
+struct evsel;
+struct option;
+struct print_callbacks;
+struct rblist;
+struct cgroup;
+
+/**
+ * A node in a rblist keyed by the evsel. The global rblist of metric events
+ * generally exists in perf_stat_config. The evsel is looked up in the rblist
+ * yielding a list of metric_expr.
+ */
+struct metric_event {
+ struct rb_node nd;
+ struct evsel *evsel;
+ bool is_default; /* the metric evsel from the Default metricgroup */
+ struct list_head head; /* list of metric_expr */
+};
+
+/**
+ * A metric referenced by a metric_expr. When parsing a metric expression IDs
+ * will be looked up, matching either a value (from metric_events) or a
+ * metric_ref. A metric_ref will then be parsed recursively. The metric_refs and
+ * metric_events need to be known before parsing so that their values may be
+ * placed in the parse context for lookup.
+ */
+struct metric_ref {
+ const char *metric_name;
+ const char *metric_expr;
+};
+
+/**
+ * One in a list of metric_expr associated with an evsel. The data is used to
+ * generate a metric value during stat output.
+ */
+struct metric_expr {
+ struct list_head nd;
+ /** The expression to parse, for example, "instructions/cycles". */
+ const char *metric_expr;
+ /** The name of the meric such as "IPC". */
+ const char *metric_name;
+ const char *metric_threshold;
+ /**
+ * The "ScaleUnit" that scales and adds a unit to the metric during
+ * output. For example, "6.4e-05MiB" means to scale the resulting metric
+ * by 6.4e-05 (typically converting a unit like cache lines to something
+ * more human intelligible) and then add "MiB" afterward when displayed.
+ */
+ const char *metric_unit;
+ /** Displayed metricgroup name of the Default metricgroup */
+ const char *default_metricgroup_name;
+ /** Null terminated array of events used by the metric. */
+ struct evsel **metric_events;
+ /** Null terminated array of referenced metrics. */
+ struct metric_ref *metric_refs;
+ /** A value substituted for '?' during parsing. */
+ int runtime;
+};
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+ struct evsel *evsel,
+ bool create);
+int metricgroup__parse_groups(struct evlist *perf_evlist,
+ const char *pmu,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ bool metric_no_threshold,
+ const char *user_requested_cpu_list,
+ bool system_wide,
+ struct rblist *metric_events);
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ const struct pmu_metrics_table *table,
+ const char *str,
+ struct rblist *metric_events);
+
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
+bool metricgroup__has_metric(const char *pmu, const char *metric);
+unsigned int metricgroups__topdown_max_level(void);
+int arch_get_runtimeparam(const struct pmu_metric *pm);
+void metricgroup__rblist_exit(struct rblist *metric_events);
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events);
+#endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
new file mode 100644
index 000000000..49093b21e
--- /dev/null
+++ b/tools/perf/util/mmap.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ */
+
+#include <sys/mman.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h> // sysconf()
+#include <perf/mmap.h>
+#ifdef HAVE_LIBNUMA_SUPPORT
+#include <numaif.h>
+#endif
+#include "cpumap.h"
+#include "debug.h"
+#include "event.h"
+#include "mmap.h"
+#include "../perf.h"
+#include <internal/lib.h> /* page_size */
+#include <linux/bitmap.h>
+
+#define MASK_SIZE 1023
+void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag)
+{
+ char buf[MASK_SIZE + 1];
+ size_t len;
+
+ len = bitmap_scnprintf(mask->bits, mask->nbits, buf, MASK_SIZE);
+ buf[len] = '\0';
+ pr_debug("%p: %s mask[%zd]: %s\n", mask, tag, mask->nbits, buf);
+}
+
+size_t mmap__mmap_len(struct mmap *map)
+{
+ return perf_mmap__mmap_len(&map->core);
+}
+
+int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
+ struct auxtrace_mmap_params *mp __maybe_unused,
+ void *userpg __maybe_unused,
+ int fd __maybe_unused)
+{
+ return 0;
+}
+
+void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused,
+ off_t auxtrace_offset __maybe_unused,
+ unsigned int auxtrace_pages __maybe_unused,
+ bool auxtrace_overwrite __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
+ struct evlist *evlist __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ int idx __maybe_unused)
+{
+}
+
+#ifdef HAVE_AIO_SUPPORT
+static int perf_mmap__aio_enabled(struct mmap *map)
+{
+ return map->aio.nr_cblocks > 0;
+}
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
+{
+ map->aio.data[idx] = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (map->aio.data[idx] == MAP_FAILED) {
+ map->aio.data[idx] = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+static void perf_mmap__aio_free(struct mmap *map, int idx)
+{
+ if (map->aio.data[idx]) {
+ munmap(map->aio.data[idx], mmap__mmap_len(map));
+ map->aio.data[idx] = NULL;
+ }
+}
+
+static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity)
+{
+ void *data;
+ size_t mmap_len;
+ unsigned long *node_mask;
+ unsigned long node_index;
+ int err = 0;
+
+ if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
+ data = map->aio.data[idx];
+ mmap_len = mmap__mmap_len(map);
+ node_index = cpu__get_node(cpu);
+ node_mask = bitmap_zalloc(node_index + 1);
+ if (!node_mask) {
+ pr_err("Failed to allocate node mask for mbind: error %m\n");
+ return -1;
+ }
+ __set_bit(node_index, node_mask);
+ if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
+ pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+ data, data + mmap_len, node_index);
+ err = -1;
+ }
+ bitmap_free(node_mask);
+ }
+
+ return err;
+}
+#else /* !HAVE_LIBNUMA_SUPPORT */
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
+{
+ map->aio.data[idx] = malloc(mmap__mmap_len(map));
+ if (map->aio.data[idx] == NULL)
+ return -1;
+
+ return 0;
+}
+
+static void perf_mmap__aio_free(struct mmap *map, int idx)
+{
+ zfree(&(map->aio.data[idx]));
+}
+
+static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused,
+ struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+static int perf_mmap__aio_mmap(struct mmap *map, struct mmap_params *mp)
+{
+ int delta_max, i, prio, ret;
+
+ map->aio.nr_cblocks = mp->nr_cblocks;
+ if (map->aio.nr_cblocks) {
+ map->aio.aiocb = calloc(map->aio.nr_cblocks, sizeof(struct aiocb *));
+ if (!map->aio.aiocb) {
+ pr_debug2("failed to allocate aiocb for data buffer, error %m\n");
+ return -1;
+ }
+ map->aio.cblocks = calloc(map->aio.nr_cblocks, sizeof(struct aiocb));
+ if (!map->aio.cblocks) {
+ pr_debug2("failed to allocate cblocks for data buffer, error %m\n");
+ return -1;
+ }
+ map->aio.data = calloc(map->aio.nr_cblocks, sizeof(void *));
+ if (!map->aio.data) {
+ pr_debug2("failed to allocate data buffer, error %m\n");
+ return -1;
+ }
+ delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
+ for (i = 0; i < map->aio.nr_cblocks; ++i) {
+ ret = perf_mmap__aio_alloc(map, i);
+ if (ret == -1) {
+ pr_debug2("failed to allocate data buffer area, error %m");
+ return -1;
+ }
+ ret = perf_mmap__aio_bind(map, i, map->core.cpu, mp->affinity);
+ if (ret == -1)
+ return -1;
+ /*
+ * Use cblock.aio_fildes value different from -1
+ * to denote started aio write operation on the
+ * cblock so it requires explicit record__aio_sync()
+ * call prior the cblock may be reused again.
+ */
+ map->aio.cblocks[i].aio_fildes = -1;
+ /*
+ * Allocate cblocks with priority delta to have
+ * faster aio write system calls because queued requests
+ * are kept in separate per-prio queues and adding
+ * a new request will iterate thru shorter per-prio
+ * list. Blocks with numbers higher than
+ * _SC_AIO_PRIO_DELTA_MAX go with priority 0.
+ */
+ prio = delta_max - i;
+ map->aio.cblocks[i].aio_reqprio = prio >= 0 ? prio : 0;
+ }
+ }
+
+ return 0;
+}
+
+static void perf_mmap__aio_munmap(struct mmap *map)
+{
+ int i;
+
+ for (i = 0; i < map->aio.nr_cblocks; ++i)
+ perf_mmap__aio_free(map, i);
+ if (map->aio.data)
+ zfree(&map->aio.data);
+ zfree(&map->aio.cblocks);
+ zfree(&map->aio.aiocb);
+}
+#else /* !HAVE_AIO_SUPPORT */
+static int perf_mmap__aio_enabled(struct mmap *map __maybe_unused)
+{
+ return 0;
+}
+
+static int perf_mmap__aio_mmap(struct mmap *map __maybe_unused,
+ struct mmap_params *mp __maybe_unused)
+{
+ return 0;
+}
+
+static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
+{
+}
+#endif
+
+void mmap__munmap(struct mmap *map)
+{
+ bitmap_free(map->affinity_mask.bits);
+
+#ifndef PYTHON_PERF
+ zstd_fini(&map->zstd_data);
+#endif
+
+ perf_mmap__aio_munmap(map);
+ if (map->data != NULL) {
+ munmap(map->data, mmap__mmap_len(map));
+ map->data = NULL;
+ }
+ auxtrace_mmap__munmap(&map->auxtrace_mmap);
+}
+
+static void build_node_mask(int node, struct mmap_cpu_mask *mask)
+{
+ int idx, nr_cpus;
+ struct perf_cpu cpu;
+ const struct perf_cpu_map *cpu_map = NULL;
+
+ cpu_map = cpu_map__online();
+ if (!cpu_map)
+ return;
+
+ nr_cpus = perf_cpu_map__nr(cpu_map);
+ for (idx = 0; idx < nr_cpus; idx++) {
+ cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
+ if (cpu__get_node(cpu) == node)
+ __set_bit(cpu.cpu, mask->bits);
+ }
+}
+
+static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
+{
+ map->affinity_mask.nbits = cpu__max_cpu().cpu;
+ map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits);
+ if (!map->affinity_mask.bits)
+ return -1;
+
+ if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
+ build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
+ else if (mp->affinity == PERF_AFFINITY_CPU)
+ __set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
+
+ return 0;
+}
+
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu)
+{
+ if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) {
+ pr_debug2("failed to mmap perf event ring buffer, error %d\n",
+ errno);
+ return -1;
+ }
+
+ if (mp->affinity != PERF_AFFINITY_SYS &&
+ perf_mmap__setup_affinity_mask(map, mp)) {
+ pr_debug2("failed to alloc mmap affinity mask, error %d\n",
+ errno);
+ return -1;
+ }
+
+ if (verbose == 2)
+ mmap_cpu_mask__scnprintf(&map->affinity_mask, "mmap");
+
+ map->core.flush = mp->flush;
+
+ map->comp_level = mp->comp_level;
+#ifndef PYTHON_PERF
+ if (zstd_init(&map->zstd_data, map->comp_level)) {
+ pr_debug2("failed to init mmap compressor, error %d\n", errno);
+ return -1;
+ }
+#endif
+
+ if (map->comp_level && !perf_mmap__aio_enabled(map)) {
+ map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (map->data == MAP_FAILED) {
+ pr_debug2("failed to mmap data buffer, error %d\n",
+ errno);
+ map->data = NULL;
+ return -1;
+ }
+ }
+
+ if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
+ &mp->auxtrace_mp, map->core.base, fd))
+ return -1;
+
+ return perf_mmap__aio_mmap(map, mp);
+}
+
+int perf_mmap__push(struct mmap *md, void *to,
+ int push(struct mmap *map, void *to, void *buf, size_t size))
+{
+ u64 head = perf_mmap__read_head(&md->core);
+ unsigned char *data = md->core.base + page_size;
+ unsigned long size;
+ void *buf;
+ int rc = 0;
+
+ rc = perf_mmap__read_init(&md->core);
+ if (rc < 0)
+ return (rc == -EAGAIN) ? 1 : -1;
+
+ size = md->core.end - md->core.start;
+
+ if ((md->core.start & md->core.mask) + size != (md->core.end & md->core.mask)) {
+ buf = &data[md->core.start & md->core.mask];
+ size = md->core.mask + 1 - (md->core.start & md->core.mask);
+ md->core.start += size;
+
+ if (push(md, to, buf, size) < 0) {
+ rc = -1;
+ goto out;
+ }
+ }
+
+ buf = &data[md->core.start & md->core.mask];
+ size = md->core.end - md->core.start;
+ md->core.start += size;
+
+ if (push(md, to, buf, size) < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ md->core.prev = head;
+ perf_mmap__consume(&md->core);
+out:
+ return rc;
+}
+
+int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone)
+{
+ clone->nbits = original->nbits;
+ clone->bits = bitmap_zalloc(original->nbits);
+ if (!clone->bits)
+ return -ENOMEM;
+
+ memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original));
+ return 0;
+}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
new file mode 100644
index 000000000..f944c3cd5
--- /dev/null
+++ b/tools/perf/util/mmap.h
@@ -0,0 +1,68 @@
+#ifndef __PERF_MMAP_H
+#define __PERF_MMAP_H 1
+
+#include <internal/mmap.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <perf/cpumap.h>
+#ifdef HAVE_AIO_SUPPORT
+#include <aio.h>
+#endif
+#include "auxtrace.h"
+#include "util/compress.h"
+
+struct aiocb;
+
+struct mmap_cpu_mask {
+ unsigned long *bits;
+ size_t nbits;
+};
+
+#define MMAP_CPU_MASK_BYTES(m) \
+ (BITS_TO_LONGS(((struct mmap_cpu_mask *)m)->nbits) * sizeof(unsigned long))
+
+/**
+ * struct mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
+struct mmap {
+ struct perf_mmap core;
+ struct auxtrace_mmap auxtrace_mmap;
+#ifdef HAVE_AIO_SUPPORT
+ struct {
+ void **data;
+ struct aiocb *cblocks;
+ struct aiocb **aiocb;
+ int nr_cblocks;
+ } aio;
+#endif
+ struct mmap_cpu_mask affinity_mask;
+ void *data;
+ int comp_level;
+ struct perf_data_file *file;
+ struct zstd_data zstd_data;
+};
+
+struct mmap_params {
+ struct perf_mmap_param core;
+ int nr_cblocks, affinity, flush, comp_level;
+ struct auxtrace_mmap_params auxtrace_mp;
+};
+
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu);
+void mmap__munmap(struct mmap *map);
+
+union perf_event *perf_mmap__read_forward(struct mmap *map);
+
+int perf_mmap__push(struct mmap *md, void *to,
+ int push(struct mmap *map, void *to, void *buf, size_t size));
+
+size_t mmap__mmap_len(struct mmap *map);
+
+void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
+
+int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original,
+ struct mmap_cpu_mask *clone);
+
+#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c
new file mode 100644
index 000000000..bca7f0717
--- /dev/null
+++ b/tools/perf/util/mutex.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mutex.h"
+
+#include "debug.h"
+#include <linux/string.h>
+#include <errno.h>
+
+static void check_err(const char *fn, int err)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (err == 0)
+ return;
+
+ pr_err("%s error: '%s'\n", fn, str_error_r(err, sbuf, sizeof(sbuf)));
+}
+
+#define CHECK_ERR(err) check_err(__func__, err)
+
+static void __mutex_init(struct mutex *mtx, bool pshared)
+{
+ pthread_mutexattr_t attr;
+
+ CHECK_ERR(pthread_mutexattr_init(&attr));
+
+#ifndef NDEBUG
+ /* In normal builds enable error checking, such as recursive usage. */
+ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
+#endif
+ if (pshared)
+ CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+ CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr));
+ CHECK_ERR(pthread_mutexattr_destroy(&attr));
+}
+
+void mutex_init(struct mutex *mtx)
+{
+ __mutex_init(mtx, /*pshared=*/false);
+}
+
+void mutex_init_pshared(struct mutex *mtx)
+{
+ __mutex_init(mtx, /*pshared=*/true);
+}
+
+void mutex_destroy(struct mutex *mtx)
+{
+ CHECK_ERR(pthread_mutex_destroy(&mtx->lock));
+}
+
+void mutex_lock(struct mutex *mtx)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ CHECK_ERR(pthread_mutex_lock(&mtx->lock));
+}
+
+void mutex_unlock(struct mutex *mtx)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ CHECK_ERR(pthread_mutex_unlock(&mtx->lock));
+}
+
+bool mutex_trylock(struct mutex *mtx)
+{
+ int ret = pthread_mutex_trylock(&mtx->lock);
+
+ if (ret == 0)
+ return true; /* Lock acquired. */
+
+ if (ret == EBUSY)
+ return false; /* Lock busy. */
+
+ /* Print error. */
+ CHECK_ERR(ret);
+ return false;
+}
+
+static void __cond_init(struct cond *cnd, bool pshared)
+{
+ pthread_condattr_t attr;
+
+ CHECK_ERR(pthread_condattr_init(&attr));
+ if (pshared)
+ CHECK_ERR(pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+ CHECK_ERR(pthread_cond_init(&cnd->cond, &attr));
+ CHECK_ERR(pthread_condattr_destroy(&attr));
+}
+
+void cond_init(struct cond *cnd)
+{
+ __cond_init(cnd, /*pshared=*/false);
+}
+
+void cond_init_pshared(struct cond *cnd)
+{
+ __cond_init(cnd, /*pshared=*/true);
+}
+
+void cond_destroy(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_destroy(&cnd->cond));
+}
+
+void cond_wait(struct cond *cnd, struct mutex *mtx)
+{
+ CHECK_ERR(pthread_cond_wait(&cnd->cond, &mtx->lock));
+}
+
+void cond_signal(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_signal(&cnd->cond));
+}
+
+void cond_broadcast(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_broadcast(&cnd->cond));
+}
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
new file mode 100644
index 000000000..40661120c
--- /dev/null
+++ b/tools/perf/util/mutex.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MUTEX_H
+#define __PERF_MUTEX_H
+
+#include <pthread.h>
+#include <stdbool.h>
+
+/*
+ * A function-like feature checking macro that is a wrapper around
+ * `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a
+ * nonzero constant integer if the attribute is supported or 0 if not.
+ */
+#ifdef __has_attribute
+#define HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define HAVE_ATTRIBUTE(x) 0
+#endif
+
+#if HAVE_ATTRIBUTE(guarded_by) && HAVE_ATTRIBUTE(pt_guarded_by) && \
+ HAVE_ATTRIBUTE(lockable) && HAVE_ATTRIBUTE(exclusive_lock_function) && \
+ HAVE_ATTRIBUTE(exclusive_trylock_function) && HAVE_ATTRIBUTE(exclusive_locks_required) && \
+ HAVE_ATTRIBUTE(no_thread_safety_analysis)
+
+/* Documents if a shared field or global variable needs to be protected by a mutex. */
+#define GUARDED_BY(x) __attribute__((guarded_by(x)))
+
+/*
+ * Documents if the memory location pointed to by a pointer should be guarded by
+ * a mutex when dereferencing the pointer.
+ */
+#define PT_GUARDED_BY(x) __attribute__((pt_guarded_by(x)))
+
+/* Documents if a type is a lockable type. */
+#define LOCKABLE __attribute__((lockable))
+
+/* Documents functions that acquire a lock in the body of a function, and do not release it. */
+#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__)))
+
+/*
+ * Documents functions that expect a lock to be held on entry to the function,
+ * and release it in the body of the function.
+ */
+#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__)))
+
+/* Documents functions that try to acquire a lock, and return success or failure. */
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
+ __attribute__((exclusive_trylock_function(__VA_ARGS__)))
+
+/* Documents a function that expects a mutex to be held prior to entry. */
+#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__)))
+
+/* Turns off thread safety checking within the body of a particular function. */
+#define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
+
+#else
+
+#define GUARDED_BY(x)
+#define PT_GUARDED_BY(x)
+#define LOCKABLE
+#define EXCLUSIVE_LOCK_FUNCTION(...)
+#define UNLOCK_FUNCTION(...)
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
+#define EXCLUSIVE_LOCKS_REQUIRED(...)
+#define NO_THREAD_SAFETY_ANALYSIS
+
+#endif
+
+/*
+ * A wrapper around the mutex implementation that allows perf to error check
+ * usage, etc.
+ */
+struct LOCKABLE mutex {
+ pthread_mutex_t lock;
+};
+
+/* A wrapper around the condition variable implementation. */
+struct cond {
+ pthread_cond_t cond;
+};
+
+/* Default initialize the mtx struct. */
+void mutex_init(struct mutex *mtx);
+/*
+ * Initialize the mtx struct and set the process-shared rather than default
+ * process-private attribute.
+ */
+void mutex_init_pshared(struct mutex *mtx);
+void mutex_destroy(struct mutex *mtx);
+
+void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx);
+void mutex_unlock(struct mutex *mtx) UNLOCK_FUNCTION(*mtx);
+/* Tries to acquire the lock and returns true on success. */
+bool mutex_trylock(struct mutex *mtx) EXCLUSIVE_TRYLOCK_FUNCTION(true, *mtx);
+
+/* Default initialize the cond struct. */
+void cond_init(struct cond *cnd);
+/*
+ * Initialize the cond struct and specify the process-shared rather than default
+ * process-private attribute.
+ */
+void cond_init_pshared(struct cond *cnd);
+void cond_destroy(struct cond *cnd);
+
+void cond_wait(struct cond *cnd, struct mutex *mtx) EXCLUSIVE_LOCKS_REQUIRED(mtx);
+void cond_signal(struct cond *cnd);
+void cond_broadcast(struct cond *cnd);
+
+#endif /* __PERF_MUTEX_H */
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
new file mode 100644
index 000000000..cb185c565
--- /dev/null
+++ b/tools/perf/util/namespaces.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#include "namespaces.h"
+#include "event.h"
+#include "get_current_dir_name.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <asm/bug.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+
+static const char *perf_ns__names[] = {
+ [NET_NS_INDEX] = "net",
+ [UTS_NS_INDEX] = "uts",
+ [IPC_NS_INDEX] = "ipc",
+ [PID_NS_INDEX] = "pid",
+ [USER_NS_INDEX] = "user",
+ [MNT_NS_INDEX] = "mnt",
+ [CGROUP_NS_INDEX] = "cgroup",
+};
+
+const char *perf_ns__name(unsigned int id)
+{
+ if (id >= ARRAY_SIZE(perf_ns__names))
+ return "UNKNOWN";
+ return perf_ns__names[id];
+}
+
+struct namespaces *namespaces__new(struct perf_record_namespaces *event)
+{
+ struct namespaces *namespaces;
+ u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
+ sizeof(struct perf_ns_link_info));
+
+ namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
+ if (!namespaces)
+ return NULL;
+
+ namespaces->end_time = -1;
+
+ if (event)
+ memcpy(namespaces->link_info, event->link_info, link_info_size);
+
+ return namespaces;
+}
+
+void namespaces__free(struct namespaces *namespaces)
+{
+ free(namespaces);
+}
+
+static int nsinfo__get_nspid(pid_t *tgid, pid_t *nstgid, bool *in_pidns, const char *path)
+{
+ FILE *f = NULL;
+ char *statln = NULL;
+ size_t linesz = 0;
+ char *nspid;
+
+ f = fopen(path, "r");
+ if (f == NULL)
+ return -1;
+
+ while (getline(&statln, &linesz, f) != -1) {
+ /* Use tgid if CONFIG_PID_NS is not defined. */
+ if (strstr(statln, "Tgid:") != NULL) {
+ *tgid = (pid_t)strtol(strrchr(statln, '\t'), NULL, 10);
+ *nstgid = *tgid;
+ }
+
+ if (strstr(statln, "NStgid:") != NULL) {
+ nspid = strrchr(statln, '\t');
+ *nstgid = (pid_t)strtol(nspid, NULL, 10);
+ /*
+ * If innermost tgid is not the first, process is in a different
+ * PID namespace.
+ */
+ *in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
+ break;
+ }
+ }
+
+ fclose(f);
+ free(statln);
+ return 0;
+}
+
+int nsinfo__init(struct nsinfo *nsi)
+{
+ char oldns[PATH_MAX];
+ char spath[PATH_MAX];
+ char *newns = NULL;
+ struct stat old_stat;
+ struct stat new_stat;
+ int rv = -1;
+
+ if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+ return rv;
+
+ if (asprintf(&newns, "/proc/%d/ns/mnt", nsinfo__pid(nsi)) == -1)
+ return rv;
+
+ if (stat(oldns, &old_stat) < 0)
+ goto out;
+
+ if (stat(newns, &new_stat) < 0)
+ goto out;
+
+ /* Check if the mount namespaces differ, if so then indicate that we
+ * want to switch as part of looking up dso/map data.
+ */
+ if (old_stat.st_ino != new_stat.st_ino) {
+ RC_CHK_ACCESS(nsi)->need_setns = true;
+ RC_CHK_ACCESS(nsi)->mntns_path = newns;
+ newns = NULL;
+ }
+
+ /* If we're dealing with a process that is in a different PID namespace,
+ * attempt to work out the innermost tgid for the process.
+ */
+ if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsinfo__pid(nsi)) >= PATH_MAX)
+ goto out;
+
+ rv = nsinfo__get_nspid(&RC_CHK_ACCESS(nsi)->tgid, &RC_CHK_ACCESS(nsi)->nstgid,
+ &RC_CHK_ACCESS(nsi)->in_pidns, spath);
+
+out:
+ free(newns);
+ return rv;
+}
+
+static struct nsinfo *nsinfo__alloc(void)
+{
+ struct nsinfo *res;
+ RC_STRUCT(nsinfo) *nsi;
+
+ nsi = calloc(1, sizeof(*nsi));
+ if (ADD_RC_CHK(res, nsi))
+ refcount_set(&nsi->refcnt, 1);
+
+ return res;
+}
+
+struct nsinfo *nsinfo__new(pid_t pid)
+{
+ struct nsinfo *nsi;
+
+ if (pid == 0)
+ return NULL;
+
+ nsi = nsinfo__alloc();
+ if (!nsi)
+ return NULL;
+
+ RC_CHK_ACCESS(nsi)->pid = pid;
+ RC_CHK_ACCESS(nsi)->tgid = pid;
+ RC_CHK_ACCESS(nsi)->nstgid = pid;
+ nsinfo__clear_need_setns(nsi);
+ RC_CHK_ACCESS(nsi)->in_pidns = false;
+ /* Init may fail if the process exits while we're trying to look at its
+ * proc information. In that case, save the pid but don't try to enter
+ * the namespace.
+ */
+ if (nsinfo__init(nsi) == -1)
+ nsinfo__clear_need_setns(nsi);
+
+ return nsi;
+}
+
+static const char *nsinfo__mntns_path(const struct nsinfo *nsi)
+{
+ return RC_CHK_ACCESS(nsi)->mntns_path;
+}
+
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi)
+{
+ struct nsinfo *nnsi;
+
+ if (nsi == NULL)
+ return NULL;
+
+ nnsi = nsinfo__alloc();
+ if (!nnsi)
+ return NULL;
+
+ RC_CHK_ACCESS(nnsi)->pid = nsinfo__pid(nsi);
+ RC_CHK_ACCESS(nnsi)->tgid = nsinfo__tgid(nsi);
+ RC_CHK_ACCESS(nnsi)->nstgid = nsinfo__nstgid(nsi);
+ RC_CHK_ACCESS(nnsi)->need_setns = nsinfo__need_setns(nsi);
+ RC_CHK_ACCESS(nnsi)->in_pidns = nsinfo__in_pidns(nsi);
+ if (nsinfo__mntns_path(nsi)) {
+ RC_CHK_ACCESS(nnsi)->mntns_path = strdup(nsinfo__mntns_path(nsi));
+ if (!RC_CHK_ACCESS(nnsi)->mntns_path) {
+ nsinfo__put(nnsi);
+ return NULL;
+ }
+ }
+
+ return nnsi;
+}
+
+static refcount_t *nsinfo__refcnt(struct nsinfo *nsi)
+{
+ return &RC_CHK_ACCESS(nsi)->refcnt;
+}
+
+static void nsinfo__delete(struct nsinfo *nsi)
+{
+ if (nsi) {
+ WARN_ONCE(refcount_read(nsinfo__refcnt(nsi)) != 0, "nsinfo refcnt unbalanced\n");
+ zfree(&RC_CHK_ACCESS(nsi)->mntns_path);
+ RC_CHK_FREE(nsi);
+ }
+}
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi)
+{
+ struct nsinfo *result;
+
+ if (RC_CHK_GET(result, nsi))
+ refcount_inc(nsinfo__refcnt(nsi));
+
+ return result;
+}
+
+void nsinfo__put(struct nsinfo *nsi)
+{
+ if (nsi && refcount_dec_and_test(nsinfo__refcnt(nsi)))
+ nsinfo__delete(nsi);
+ else
+ RC_CHK_PUT(nsi);
+}
+
+bool nsinfo__need_setns(const struct nsinfo *nsi)
+{
+ return RC_CHK_ACCESS(nsi)->need_setns;
+}
+
+void nsinfo__clear_need_setns(struct nsinfo *nsi)
+{
+ RC_CHK_ACCESS(nsi)->need_setns = false;
+}
+
+pid_t nsinfo__tgid(const struct nsinfo *nsi)
+{
+ return RC_CHK_ACCESS(nsi)->tgid;
+}
+
+pid_t nsinfo__nstgid(const struct nsinfo *nsi)
+{
+ return RC_CHK_ACCESS(nsi)->nstgid;
+}
+
+pid_t nsinfo__pid(const struct nsinfo *nsi)
+{
+ return RC_CHK_ACCESS(nsi)->pid;
+}
+
+pid_t nsinfo__in_pidns(const struct nsinfo *nsi)
+{
+ return RC_CHK_ACCESS(nsi)->in_pidns;
+}
+
+void nsinfo__mountns_enter(struct nsinfo *nsi,
+ struct nscookie *nc)
+{
+ char curpath[PATH_MAX];
+ int oldns = -1;
+ int newns = -1;
+ char *oldcwd = NULL;
+
+ if (nc == NULL)
+ return;
+
+ nc->oldns = -1;
+ nc->newns = -1;
+
+ if (!nsi || !nsinfo__need_setns(nsi))
+ return;
+
+ if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+ return;
+
+ oldcwd = get_current_dir_name();
+ if (!oldcwd)
+ return;
+
+ oldns = open(curpath, O_RDONLY);
+ if (oldns < 0)
+ goto errout;
+
+ newns = open(nsinfo__mntns_path(nsi), O_RDONLY);
+ if (newns < 0)
+ goto errout;
+
+ if (setns(newns, CLONE_NEWNS) < 0)
+ goto errout;
+
+ nc->oldcwd = oldcwd;
+ nc->oldns = oldns;
+ nc->newns = newns;
+ return;
+
+errout:
+ free(oldcwd);
+ if (oldns > -1)
+ close(oldns);
+ if (newns > -1)
+ close(newns);
+}
+
+void nsinfo__mountns_exit(struct nscookie *nc)
+{
+ if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
+ return;
+
+ setns(nc->oldns, CLONE_NEWNS);
+
+ if (nc->oldcwd) {
+ WARN_ON_ONCE(chdir(nc->oldcwd));
+ zfree(&nc->oldcwd);
+ }
+
+ if (nc->oldns > -1) {
+ close(nc->oldns);
+ nc->oldns = -1;
+ }
+
+ if (nc->newns > -1) {
+ close(nc->newns);
+ nc->newns = -1;
+ }
+}
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi)
+{
+ char *rpath;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ rpath = realpath(path, NULL);
+ nsinfo__mountns_exit(&nsc);
+
+ return rpath;
+}
+
+int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi)
+{
+ int ret;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = stat(filename, st);
+ nsinfo__mountns_exit(&nsc);
+
+ return ret;
+}
+
+bool nsinfo__is_in_root_namespace(void)
+{
+ pid_t tgid = 0, nstgid = 0;
+ bool in_pidns = false;
+
+ nsinfo__get_nspid(&tgid, &nstgid, &in_pidns, "/proc/self/status");
+ return !in_pidns;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
new file mode 100644
index 000000000..8c0731c6c
--- /dev/null
+++ b/tools/perf/util/namespaces.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#ifndef __PERF_NAMESPACES_H
+#define __PERF_NAMESPACES_H
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <internal/rc_check.h>
+
+#ifndef HAVE_SETNS_SUPPORT
+int setns(int fd, int nstype);
+#endif
+
+struct perf_record_namespaces;
+
+struct namespaces {
+ struct list_head list;
+ u64 end_time;
+ struct perf_ns_link_info link_info[];
+};
+
+struct namespaces *namespaces__new(struct perf_record_namespaces *event);
+void namespaces__free(struct namespaces *namespaces);
+
+DECLARE_RC_STRUCT(nsinfo) {
+ pid_t pid;
+ pid_t tgid;
+ pid_t nstgid;
+ bool need_setns;
+ bool in_pidns;
+ char *mntns_path;
+ refcount_t refcnt;
+};
+
+struct nscookie {
+ int oldns;
+ int newns;
+ char *oldcwd;
+};
+
+int nsinfo__init(struct nsinfo *nsi);
+struct nsinfo *nsinfo__new(pid_t pid);
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi);
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi);
+void nsinfo__put(struct nsinfo *nsi);
+
+bool nsinfo__need_setns(const struct nsinfo *nsi);
+void nsinfo__clear_need_setns(struct nsinfo *nsi);
+pid_t nsinfo__tgid(const struct nsinfo *nsi);
+pid_t nsinfo__nstgid(const struct nsinfo *nsi);
+pid_t nsinfo__pid(const struct nsinfo *nsi);
+pid_t nsinfo__in_pidns(const struct nsinfo *nsi);
+
+void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
+void nsinfo__mountns_exit(struct nscookie *nc);
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
+int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi);
+
+bool nsinfo__is_in_root_namespace(void);
+
+static inline void __nsinfo__zput(struct nsinfo **nsip)
+{
+ if (nsip) {
+ nsinfo__put(*nsip);
+ *nsip = NULL;
+ }
+}
+
+#define nsinfo__zput(nsi) __nsinfo__zput(&nsi)
+
+const char *perf_ns__name(unsigned int id);
+
+#endif /* __PERF_NAMESPACES_H */
diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
new file mode 100644
index 000000000..2dd67c60f
--- /dev/null
+++ b/tools/perf/util/off_cpu.h
@@ -0,0 +1,38 @@
+#ifndef PERF_UTIL_OFF_CPU_H
+#define PERF_UTIL_OFF_CPU_H
+
+#include <linux/perf_event.h>
+
+struct evlist;
+struct target;
+struct perf_session;
+struct record_opts;
+
+#define OFFCPU_EVENT "offcpu-time"
+
+#define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \
+ PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \
+ PERF_SAMPLE_CGROUP)
+
+
+#ifdef HAVE_BPF_SKEL
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+ struct record_opts *opts);
+int off_cpu_write(struct perf_session *session);
+#else
+static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
+ struct target *target __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return -1;
+}
+
+static inline int off_cpu_write(struct perf_session *session __maybe_unused)
+{
+ return -1;
+}
+#endif
+
+#endif /* PERF_UTIL_OFF_CPU_H */
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
new file mode 100644
index 000000000..8c62611f1
--- /dev/null
+++ b/tools/perf/util/ordered-events.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include "ordered-events.h"
+#include "session.h"
+#include "asm/bug.h"
+#include "debug.h"
+#include "ui/progress.h"
+
+#define pr_N(n, fmt, ...) \
+ eprintf(n, debug_ordered_events, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+
+static void queue_event(struct ordered_events *oe, struct ordered_event *new)
+{
+ struct ordered_event *last = oe->last;
+ u64 timestamp = new->timestamp;
+ struct list_head *p;
+
+ ++oe->nr_events;
+ oe->last = new;
+
+ pr_oe_time2(timestamp, "queue_event nr_events %u\n", oe->nr_events);
+
+ if (!last) {
+ list_add(&new->list, &oe->events);
+ oe->max_timestamp = timestamp;
+ return;
+ }
+
+ /*
+ * last event might point to some random place in the list as it's
+ * the last queued event. We expect that the new event is close to
+ * this.
+ */
+ if (last->timestamp <= timestamp) {
+ while (last->timestamp <= timestamp) {
+ p = last->list.next;
+ if (p == &oe->events) {
+ list_add_tail(&new->list, &oe->events);
+ oe->max_timestamp = timestamp;
+ return;
+ }
+ last = list_entry(p, struct ordered_event, list);
+ }
+ list_add_tail(&new->list, &last->list);
+ } else {
+ while (last->timestamp > timestamp) {
+ p = last->list.prev;
+ if (p == &oe->events) {
+ list_add(&new->list, &oe->events);
+ return;
+ }
+ last = list_entry(p, struct ordered_event, list);
+ }
+ list_add(&new->list, &last->list);
+ }
+}
+
+static union perf_event *__dup_event(struct ordered_events *oe,
+ union perf_event *event)
+{
+ union perf_event *new_event = NULL;
+
+ if (oe->cur_alloc_size < oe->max_alloc_size) {
+ new_event = memdup(event, event->header.size);
+ if (new_event)
+ oe->cur_alloc_size += event->header.size;
+ }
+
+ return new_event;
+}
+
+static union perf_event *dup_event(struct ordered_events *oe,
+ union perf_event *event)
+{
+ return oe->copy_on_queue ? __dup_event(oe, event) : event;
+}
+
+static void __free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+ if (event) {
+ oe->cur_alloc_size -= event->header.size;
+ free(event);
+ }
+}
+
+static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+ if (oe->copy_on_queue)
+ __free_dup_event(oe, event);
+}
+
+#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct ordered_event))
+static struct ordered_event *alloc_event(struct ordered_events *oe,
+ union perf_event *event)
+{
+ struct list_head *cache = &oe->cache;
+ struct ordered_event *new = NULL;
+ union perf_event *new_event;
+ size_t size;
+
+ new_event = dup_event(oe, event);
+ if (!new_event)
+ return NULL;
+
+ /*
+ * We maintain the following scheme of buffers for ordered
+ * event allocation:
+ *
+ * to_free list -> buffer1 (64K)
+ * buffer2 (64K)
+ * ...
+ *
+ * Each buffer keeps an array of ordered events objects:
+ * buffer -> event[0]
+ * event[1]
+ * ...
+ *
+ * Each allocated ordered event is linked to one of
+ * following lists:
+ * - time ordered list 'events'
+ * - list of currently removed events 'cache'
+ *
+ * Allocation of the ordered event uses the following order
+ * to get the memory:
+ * - use recently removed object from 'cache' list
+ * - use available object in current allocation buffer
+ * - allocate new buffer if the current buffer is full
+ *
+ * Removal of ordered event object moves it from events to
+ * the cache list.
+ */
+ size = sizeof(*oe->buffer) + MAX_SAMPLE_BUFFER * sizeof(*new);
+
+ if (!list_empty(cache)) {
+ new = list_entry(cache->next, struct ordered_event, list);
+ list_del_init(&new->list);
+ } else if (oe->buffer) {
+ new = &oe->buffer->event[oe->buffer_idx];
+ if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
+ oe->buffer = NULL;
+ } else if ((oe->cur_alloc_size + size) < oe->max_alloc_size) {
+ oe->buffer = malloc(size);
+ if (!oe->buffer) {
+ free_dup_event(oe, new_event);
+ return NULL;
+ }
+
+ pr("alloc size %" PRIu64 "B (+%zu), max %" PRIu64 "B\n",
+ oe->cur_alloc_size, size, oe->max_alloc_size);
+
+ oe->cur_alloc_size += size;
+ list_add(&oe->buffer->list, &oe->to_free);
+
+ oe->buffer_idx = 1;
+ new = &oe->buffer->event[0];
+ } else {
+ pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
+ return NULL;
+ }
+
+ new->event = new_event;
+ return new;
+}
+
+static struct ordered_event *
+ordered_events__new_event(struct ordered_events *oe, u64 timestamp,
+ union perf_event *event)
+{
+ struct ordered_event *new;
+
+ new = alloc_event(oe, event);
+ if (new) {
+ new->timestamp = timestamp;
+ queue_event(oe, new);
+ }
+
+ return new;
+}
+
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event)
+{
+ list_move(&event->list, &oe->cache);
+ oe->nr_events--;
+ free_dup_event(oe, event->event);
+ event->event = NULL;
+}
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+ u64 timestamp, u64 file_offset, const char *file_path)
+{
+ struct ordered_event *oevent;
+
+ if (!timestamp || timestamp == ~0ULL)
+ return -ETIME;
+
+ if (timestamp < oe->last_flush) {
+ pr_oe_time(timestamp, "out of order event\n");
+ pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n",
+ oe->last_flush_type);
+
+ oe->nr_unordered_events++;
+ }
+
+ oevent = ordered_events__new_event(oe, timestamp, event);
+ if (!oevent) {
+ ordered_events__flush(oe, OE_FLUSH__HALF);
+ oevent = ordered_events__new_event(oe, timestamp, event);
+ }
+
+ if (!oevent)
+ return -ENOMEM;
+
+ oevent->file_offset = file_offset;
+ oevent->file_path = file_path;
+ return 0;
+}
+
+static int do_flush(struct ordered_events *oe, bool show_progress)
+{
+ struct list_head *head = &oe->events;
+ struct ordered_event *tmp, *iter;
+ u64 limit = oe->next_flush;
+ u64 last_ts = oe->last ? oe->last->timestamp : 0ULL;
+ struct ui_progress prog;
+ int ret;
+
+ if (!limit)
+ return 0;
+
+ if (show_progress)
+ ui_progress__init(&prog, oe->nr_events, "Processing time ordered events...");
+
+ list_for_each_entry_safe(iter, tmp, head, list) {
+ if (session_done())
+ return 0;
+
+ if (iter->timestamp > limit)
+ break;
+ ret = oe->deliver(oe, iter);
+ if (ret)
+ return ret;
+
+ ordered_events__delete(oe, iter);
+ oe->last_flush = iter->timestamp;
+
+ if (show_progress)
+ ui_progress__update(&prog, 1);
+ }
+
+ if (list_empty(head))
+ oe->last = NULL;
+ else if (last_ts <= limit)
+ oe->last = list_entry(head->prev, struct ordered_event, list);
+
+ if (show_progress)
+ ui_progress__finish();
+
+ return 0;
+}
+
+static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how,
+ u64 timestamp)
+{
+ static const char * const str[] = {
+ "NONE",
+ "FINAL",
+ "ROUND",
+ "HALF ",
+ "TOP ",
+ "TIME ",
+ };
+ int err;
+ bool show_progress = false;
+
+ if (oe->nr_events == 0)
+ return 0;
+
+ switch (how) {
+ case OE_FLUSH__FINAL:
+ show_progress = true;
+ fallthrough;
+ case OE_FLUSH__TOP:
+ oe->next_flush = ULLONG_MAX;
+ break;
+
+ case OE_FLUSH__HALF:
+ {
+ struct ordered_event *first, *last;
+ struct list_head *head = &oe->events;
+
+ first = list_entry(head->next, struct ordered_event, list);
+ last = oe->last;
+
+ /* Warn if we are called before any event got allocated. */
+ if (WARN_ONCE(!last || list_empty(head), "empty queue"))
+ return 0;
+
+ oe->next_flush = first->timestamp;
+ oe->next_flush += (last->timestamp - first->timestamp) / 2;
+ break;
+ }
+
+ case OE_FLUSH__TIME:
+ oe->next_flush = timestamp;
+ show_progress = false;
+ break;
+
+ case OE_FLUSH__ROUND:
+ case OE_FLUSH__NONE:
+ default:
+ break;
+ }
+
+ pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE %s, nr_events %u\n",
+ str[how], oe->nr_events);
+ pr_oe_time(oe->max_timestamp, "max_timestamp\n");
+
+ err = do_flush(oe, show_progress);
+
+ if (!err) {
+ if (how == OE_FLUSH__ROUND)
+ oe->next_flush = oe->max_timestamp;
+
+ oe->last_flush_type = how;
+ }
+
+ pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush POST %s, nr_events %u\n",
+ str[how], oe->nr_events);
+ pr_oe_time(oe->last_flush, "last_flush\n");
+
+ return err;
+}
+
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
+{
+ return __ordered_events__flush(oe, how, 0);
+}
+
+int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp)
+{
+ return __ordered_events__flush(oe, OE_FLUSH__TIME, timestamp);
+}
+
+u64 ordered_events__first_time(struct ordered_events *oe)
+{
+ struct ordered_event *event;
+
+ if (list_empty(&oe->events))
+ return 0;
+
+ event = list_first_entry(&oe->events, struct ordered_event, list);
+ return event->timestamp;
+}
+
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver,
+ void *data)
+{
+ INIT_LIST_HEAD(&oe->events);
+ INIT_LIST_HEAD(&oe->cache);
+ INIT_LIST_HEAD(&oe->to_free);
+ oe->max_alloc_size = (u64) -1;
+ oe->cur_alloc_size = 0;
+ oe->deliver = deliver;
+ oe->data = data;
+}
+
+static void
+ordered_events_buffer__free(struct ordered_events_buffer *buffer,
+ unsigned int max, struct ordered_events *oe)
+{
+ if (oe->copy_on_queue) {
+ unsigned int i;
+
+ for (i = 0; i < max; i++)
+ __free_dup_event(oe, buffer->event[i].event);
+ }
+
+ free(buffer);
+}
+
+void ordered_events__free(struct ordered_events *oe)
+{
+ struct ordered_events_buffer *buffer, *tmp;
+
+ if (list_empty(&oe->to_free))
+ return;
+
+ /*
+ * Current buffer might not have all the events allocated
+ * yet, we need to free only allocated ones ...
+ */
+ if (oe->buffer) {
+ list_del_init(&oe->buffer->list);
+ ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+ }
+
+ /* ... and continue with the rest */
+ list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
+ list_del_init(&buffer->list);
+ ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe);
+ }
+}
+
+void ordered_events__reinit(struct ordered_events *oe)
+{
+ ordered_events__deliver_t old_deliver = oe->deliver;
+
+ ordered_events__free(oe);
+ memset(oe, '\0', sizeof(*oe));
+ ordered_events__init(oe, old_deliver, oe->data);
+}
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
new file mode 100644
index 000000000..8febbd7c9
--- /dev/null
+++ b/tools/perf/util/ordered-events.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ORDERED_EVENTS_H
+#define __ORDERED_EVENTS_H
+
+#include <linux/types.h>
+
+struct perf_sample;
+
+struct ordered_event {
+ u64 timestamp;
+ u64 file_offset;
+ const char *file_path;
+ union perf_event *event;
+ struct list_head list;
+};
+
+enum oe_flush {
+ OE_FLUSH__NONE,
+ OE_FLUSH__FINAL,
+ OE_FLUSH__ROUND,
+ OE_FLUSH__HALF,
+ OE_FLUSH__TOP,
+ OE_FLUSH__TIME,
+};
+
+struct ordered_events;
+
+typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
+ struct ordered_event *event);
+
+struct ordered_events_buffer {
+ struct list_head list;
+ struct ordered_event event[];
+};
+
+struct ordered_events {
+ u64 last_flush;
+ u64 next_flush;
+ u64 max_timestamp;
+ u64 max_alloc_size;
+ u64 cur_alloc_size;
+ struct list_head events;
+ struct list_head cache;
+ struct list_head to_free;
+ struct ordered_events_buffer *buffer;
+ struct ordered_event *last;
+ ordered_events__deliver_t deliver;
+ int buffer_idx;
+ unsigned int nr_events;
+ enum oe_flush last_flush_type;
+ u32 nr_unordered_events;
+ bool copy_on_queue;
+ void *data;
+};
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+ u64 timestamp, u64 file_offset, const char *file_path);
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
+int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp);
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver,
+ void *data);
+void ordered_events__free(struct ordered_events *oe);
+void ordered_events__reinit(struct ordered_events *oe);
+u64 ordered_events__first_time(struct ordered_events *oe);
+
+static inline
+void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
+{
+ oe->max_alloc_size = size;
+}
+
+static inline
+void ordered_events__set_copy_on_queue(struct ordered_events *oe, bool copy)
+{
+ oe->copy_on_queue = copy;
+}
+
+static inline u64 ordered_events__last_flush_time(struct ordered_events *oe)
+{
+ return oe->last_flush;
+}
+
+#endif /* __ORDERED_EVENTS_H */
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
new file mode 100644
index 000000000..fd67d204d
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/debug.h"
+#include "util/event.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-branch-options.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define BRANCH_OPT(n, m) \
+ { .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+ const char *name;
+ int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+ BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+ BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+ BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+ BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+ BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+ BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+ BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+ BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+ BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+ BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+ BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+ BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
+ BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
+ BRANCH_OPT("no_flags", PERF_SAMPLE_BRANCH_NO_FLAGS),
+ BRANCH_OPT("no_cycles", PERF_SAMPLE_BRANCH_NO_CYCLES),
+ BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
+ BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
+ BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
+ BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
+ BRANCH_END
+};
+
+int parse_branch_str(const char *str, __u64 *mode)
+{
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
+ int ret = 0;
+ char *p, *s;
+ char *os = NULL;
+ const struct branch_mode *br;
+
+ if (str == NULL) {
+ *mode = PERF_SAMPLE_BRANCH_ANY;
+ return 0;
+ }
+
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ for (br = branch_modes; br->name; br++) {
+ if (!strcasecmp(s, br->name))
+ break;
+ }
+ if (!br->name) {
+ ret = -1;
+ pr_warning("unknown branch filter %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= br->mode;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+
+ /* default to any branch */
+ if ((*mode & ~ONLY_PLM) == 0) {
+ *mode = PERF_SAMPLE_BRANCH_ANY;
+ }
+error:
+ free(os);
+ return ret;
+}
+
+int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+ __u64 *mode = (__u64 *)opt->value;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice, -b + --branch-filter for instance
+ */
+ if (*mode) {
+ pr_err("Error: Can't use --branch-any (-b) with --branch-filter (-j).\n");
+ return -1;
+ }
+
+ return parse_branch_str(str, mode);
+}
diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h
new file mode 100644
index 000000000..11d172273
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
+#define _PERF_PARSE_BRANCH_OPTIONS_H 1
+#include <stdint.h>
+int parse_branch_stack(const struct option *opt, const char *str, int unset);
+int parse_branch_str(const char *str, __u64 *mode);
+#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
new file mode 100644
index 000000000..65608a3cb
--- /dev/null
+++ b/tools/perf/util/parse-events.c
@@ -0,0 +1,2694 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/hw_breakpoint.h>
+#include <linux/err.h>
+#include <linux/list_sort.h>
+#include <linux/zalloc.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/param.h>
+#include "term.h"
+#include "evlist.h"
+#include "evsel.h"
+#include <subcmd/parse-options.h>
+#include "parse-events.h"
+#include "string2.h"
+#include "strbuf.h"
+#include "debug.h"
+#include <api/fs/tracing_path.h>
+#include <perf/cpumap.h>
+#include <util/parse-events-bison.h>
+#include <util/parse-events-flex.h>
+#include "pmu.h"
+#include "pmus.h"
+#include "asm/bug.h"
+#include "util/parse-branch-options.h"
+#include "util/evsel_config.h"
+#include "util/event.h"
+#include "util/bpf-filter.h"
+#include "util/util.h"
+#include "tracepoint.h"
+
+#define MAX_NAME_LEN 100
+
+#ifdef PARSER_DEBUG
+extern int parse_events_debug;
+#endif
+static int get_config_terms(struct list_head *head_config,
+ struct list_head *head_terms __maybe_unused);
+
+struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = {
+ .symbol = "cpu-cycles",
+ .alias = "cycles",
+ },
+ [PERF_COUNT_HW_INSTRUCTIONS] = {
+ .symbol = "instructions",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_CACHE_REFERENCES] = {
+ .symbol = "cache-references",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_CACHE_MISSES] = {
+ .symbol = "cache-misses",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {
+ .symbol = "branch-instructions",
+ .alias = "branches",
+ },
+ [PERF_COUNT_HW_BRANCH_MISSES] = {
+ .symbol = "branch-misses",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_BUS_CYCLES] = {
+ .symbol = "bus-cycles",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {
+ .symbol = "stalled-cycles-frontend",
+ .alias = "idle-cycles-frontend",
+ },
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {
+ .symbol = "stalled-cycles-backend",
+ .alias = "idle-cycles-backend",
+ },
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = {
+ .symbol = "ref-cycles",
+ .alias = "",
+ },
+};
+
+struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
+ [PERF_COUNT_SW_CPU_CLOCK] = {
+ .symbol = "cpu-clock",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_TASK_CLOCK] = {
+ .symbol = "task-clock",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_PAGE_FAULTS] = {
+ .symbol = "page-faults",
+ .alias = "faults",
+ },
+ [PERF_COUNT_SW_CONTEXT_SWITCHES] = {
+ .symbol = "context-switches",
+ .alias = "cs",
+ },
+ [PERF_COUNT_SW_CPU_MIGRATIONS] = {
+ .symbol = "cpu-migrations",
+ .alias = "migrations",
+ },
+ [PERF_COUNT_SW_PAGE_FAULTS_MIN] = {
+ .symbol = "minor-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = {
+ .symbol = "major-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_ALIGNMENT_FAULTS] = {
+ .symbol = "alignment-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_EMULATION_FAULTS] = {
+ .symbol = "emulation-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_DUMMY] = {
+ .symbol = "dummy",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_BPF_OUTPUT] = {
+ .symbol = "bpf-output",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_CGROUP_SWITCHES] = {
+ .symbol = "cgroup-switches",
+ .alias = "",
+ },
+};
+
+const char *event_type(int type)
+{
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ return "hardware";
+
+ case PERF_TYPE_SOFTWARE:
+ return "software";
+
+ case PERF_TYPE_TRACEPOINT:
+ return "tracepoint";
+
+ case PERF_TYPE_HW_CACHE:
+ return "hardware-cache";
+
+ default:
+ break;
+ }
+
+ return "unknown";
+}
+
+static char *get_config_str(struct list_head *head_terms, enum parse_events__term_type type_term)
+{
+ struct parse_events_term *term;
+
+ if (!head_terms)
+ return NULL;
+
+ list_for_each_entry(term, head_terms, list)
+ if (term->type_term == type_term)
+ return term->val.str;
+
+ return NULL;
+}
+
+static char *get_config_metric_id(struct list_head *head_terms)
+{
+ return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID);
+}
+
+static char *get_config_name(struct list_head *head_terms)
+{
+ return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
+}
+
+/**
+ * fix_raw - For each raw term see if there is an event (aka alias) in pmu that
+ * matches the raw's string value. If the string value matches an
+ * event then change the term to be an event, if not then change it to
+ * be a config term. For example, "read" may be an event of the PMU or
+ * a raw hex encoding of 0xead. The fix-up is done late so the PMU of
+ * the event can be determined and we don't need to scan all PMUs
+ * ahead-of-time.
+ * @config_terms: the list of terms that may contain a raw term.
+ * @pmu: the PMU to scan for events from.
+ */
+static void fix_raw(struct list_head *config_terms, struct perf_pmu *pmu)
+{
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, config_terms, list) {
+ u64 num;
+
+ if (term->type_term != PARSE_EVENTS__TERM_TYPE_RAW)
+ continue;
+
+ if (perf_pmu__have_event(pmu, term->val.str)) {
+ zfree(&term->config);
+ term->config = term->val.str;
+ term->type_val = PARSE_EVENTS__TERM_TYPE_NUM;
+ term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
+ term->val.num = 1;
+ term->no_value = true;
+ continue;
+ }
+
+ zfree(&term->config);
+ term->config = strdup("config");
+ errno = 0;
+ num = strtoull(term->val.str + 1, NULL, 16);
+ assert(errno == 0);
+ free(term->val.str);
+ term->type_val = PARSE_EVENTS__TERM_TYPE_NUM;
+ term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG;
+ term->val.num = num;
+ term->no_value = false;
+ }
+}
+
+static struct evsel *
+__add_event(struct list_head *list, int *idx,
+ struct perf_event_attr *attr,
+ bool init_attr,
+ const char *name, const char *metric_id, struct perf_pmu *pmu,
+ struct list_head *config_terms, bool auto_merge_stats,
+ const char *cpu_list)
+{
+ struct evsel *evsel;
+ struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
+ cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+
+ if (pmu)
+ perf_pmu__warn_invalid_formats(pmu);
+
+ if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) {
+ perf_pmu__warn_invalid_config(pmu, attr->config, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG, "config");
+ perf_pmu__warn_invalid_config(pmu, attr->config1, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG1, "config1");
+ perf_pmu__warn_invalid_config(pmu, attr->config2, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG2, "config2");
+ perf_pmu__warn_invalid_config(pmu, attr->config3, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG3, "config3");
+ }
+ if (init_attr)
+ event_attr_init(attr);
+
+ evsel = evsel__new_idx(attr, *idx);
+ if (!evsel) {
+ perf_cpu_map__put(cpus);
+ return NULL;
+ }
+
+ (*idx)++;
+ evsel->core.cpus = cpus;
+ evsel->core.own_cpus = perf_cpu_map__get(cpus);
+ evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
+ evsel->core.is_pmu_core = pmu ? pmu->is_core : false;
+ evsel->auto_merge_stats = auto_merge_stats;
+ evsel->pmu = pmu;
+ evsel->pmu_name = pmu ? strdup(pmu->name) : NULL;
+
+ if (name)
+ evsel->name = strdup(name);
+
+ if (metric_id)
+ evsel->metric_id = strdup(metric_id);
+
+ if (config_terms)
+ list_splice_init(config_terms, &evsel->config_terms);
+
+ if (list)
+ list_add_tail(&evsel->core.node, list);
+
+ return evsel;
+}
+
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+ const char *name, const char *metric_id,
+ struct perf_pmu *pmu)
+{
+ return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name,
+ metric_id, pmu, /*config_terms=*/NULL,
+ /*auto_merge_stats=*/false, /*cpu_list=*/NULL);
+}
+
+static int add_event(struct list_head *list, int *idx,
+ struct perf_event_attr *attr, const char *name,
+ const char *metric_id, struct list_head *config_terms)
+{
+ return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id,
+ /*pmu=*/NULL, config_terms,
+ /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM;
+}
+
+static int add_event_tool(struct list_head *list, int *idx,
+ enum perf_tool_event tool_event)
+{
+ struct evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_DUMMY,
+ };
+
+ evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL,
+ /*metric_id=*/NULL, /*pmu=*/NULL,
+ /*config_terms=*/NULL, /*auto_merge_stats=*/false,
+ /*cpu_list=*/"0");
+ if (!evsel)
+ return -ENOMEM;
+ evsel->tool_event = tool_event;
+ if (tool_event == PERF_TOOL_DURATION_TIME
+ || tool_event == PERF_TOOL_USER_TIME
+ || tool_event == PERF_TOOL_SYSTEM_TIME) {
+ free((char *)evsel->unit);
+ evsel->unit = strdup("ns");
+ }
+ return 0;
+}
+
+/**
+ * parse_aliases - search names for entries beginning or equalling str ignoring
+ * case. If mutliple entries in names match str then the longest
+ * is chosen.
+ * @str: The needle to look for.
+ * @names: The haystack to search.
+ * @size: The size of the haystack.
+ * @longest: Out argument giving the length of the matching entry.
+ */
+static int parse_aliases(const char *str, const char *const names[][EVSEL__MAX_ALIASES], int size,
+ int *longest)
+{
+ *longest = -1;
+ for (int i = 0; i < size; i++) {
+ for (int j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) {
+ int n = strlen(names[i][j]);
+
+ if (n > *longest && !strncasecmp(str, names[i][j], n))
+ *longest = n;
+ }
+ if (*longest > 0)
+ return i;
+ }
+
+ return -1;
+}
+
+typedef int config_term_func_t(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+ struct list_head *head,
+ struct parse_events_error *err,
+ config_term_func_t config_term);
+
+/**
+ * parse_events__decode_legacy_cache - Search name for the legacy cache event
+ * name composed of 1, 2 or 3 hyphen
+ * separated sections. The first section is
+ * the cache type while the others are the
+ * optional op and optional result. To make
+ * life hard the names in the table also
+ * contain hyphens and the longest name
+ * should always be selected.
+ */
+int parse_events__decode_legacy_cache(const char *name, int extended_pmu_type, __u64 *config)
+{
+ int len, cache_type = -1, cache_op = -1, cache_result = -1;
+ const char *name_end = &name[strlen(name) + 1];
+ const char *str = name;
+
+ cache_type = parse_aliases(str, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX, &len);
+ if (cache_type == -1)
+ return -EINVAL;
+ str += len + 1;
+
+ if (str < name_end) {
+ cache_op = parse_aliases(str, evsel__hw_cache_op,
+ PERF_COUNT_HW_CACHE_OP_MAX, &len);
+ if (cache_op >= 0) {
+ if (!evsel__is_cache_op_valid(cache_type, cache_op))
+ return -EINVAL;
+ str += len + 1;
+ } else {
+ cache_result = parse_aliases(str, evsel__hw_cache_result,
+ PERF_COUNT_HW_CACHE_RESULT_MAX, &len);
+ if (cache_result >= 0)
+ str += len + 1;
+ }
+ }
+ if (str < name_end) {
+ if (cache_op < 0) {
+ cache_op = parse_aliases(str, evsel__hw_cache_op,
+ PERF_COUNT_HW_CACHE_OP_MAX, &len);
+ if (cache_op >= 0) {
+ if (!evsel__is_cache_op_valid(cache_type, cache_op))
+ return -EINVAL;
+ }
+ } else if (cache_result < 0) {
+ cache_result = parse_aliases(str, evsel__hw_cache_result,
+ PERF_COUNT_HW_CACHE_RESULT_MAX, &len);
+ }
+ }
+
+ /*
+ * Fall back to reads:
+ */
+ if (cache_op == -1)
+ cache_op = PERF_COUNT_HW_CACHE_OP_READ;
+
+ /*
+ * Fall back to accesses:
+ */
+ if (cache_result == -1)
+ cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+ *config = cache_type | (cache_op << 8) | (cache_result << 16);
+ if (perf_pmus__supports_extended_type())
+ *config |= (__u64)extended_pmu_type << PERF_PMU_TYPE_SHIFT;
+ return 0;
+}
+
+/**
+ * parse_events__filter_pmu - returns false if a wildcard PMU should be
+ * considered, true if it should be filtered.
+ */
+bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
+ const struct perf_pmu *pmu)
+{
+ if (parse_state->pmu_filter == NULL)
+ return false;
+
+ return strcmp(parse_state->pmu_filter, pmu->name) != 0;
+}
+
+int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
+ struct parse_events_state *parse_state,
+ struct list_head *head_config)
+{
+ struct perf_pmu *pmu = NULL;
+ bool found_supported = false;
+ const char *config_name = get_config_name(head_config);
+ const char *metric_id = get_config_metric_id(head_config);
+
+ /* Legacy cache events are only supported by core PMUs. */
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ LIST_HEAD(config_terms);
+ struct perf_event_attr attr;
+ int ret;
+
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_HW_CACHE;
+
+ ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config);
+ if (ret)
+ return ret;
+
+ found_supported = true;
+
+ if (head_config) {
+ if (config_attr(&attr, head_config, parse_state->error, config_term_common))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+
+ if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
+ metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
+ /*cpu_list=*/NULL) == NULL)
+ return -ENOMEM;
+
+ free_config_terms(&config_terms);
+ }
+ return found_supported ? 0 : -EINVAL;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static void tracepoint_error(struct parse_events_error *e, int err,
+ const char *sys, const char *name, int column)
+{
+ const char *str;
+ char help[BUFSIZ];
+
+ if (!e)
+ return;
+
+ /*
+ * We get error directly from syscall errno ( > 0),
+ * or from encoded pointer's error ( < 0).
+ */
+ err = abs(err);
+
+ switch (err) {
+ case EACCES:
+ str = "can't access trace events";
+ break;
+ case ENOENT:
+ str = "unknown tracepoint";
+ break;
+ default:
+ str = "failed to add tracepoint";
+ break;
+ }
+
+ tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name);
+ parse_events_error__handle(e, column, strdup(str), strdup(help));
+}
+
+static int add_tracepoint(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config, void *loc_)
+{
+ YYLTYPE *loc = loc_;
+ struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++);
+
+ if (IS_ERR(evsel)) {
+ tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name, loc->first_column);
+ return PTR_ERR(evsel);
+ }
+
+ if (head_config) {
+ LIST_HEAD(config_terms);
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ list_splice(&config_terms, &evsel->config_terms);
+ }
+
+ list_add_tail(&evsel->core.node, list);
+ return 0;
+}
+
+static int add_tracepoint_multi_event(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config, YYLTYPE *loc)
+{
+ char *evt_path;
+ struct dirent *evt_ent;
+ DIR *evt_dir;
+ int ret = 0, found = 0;
+
+ evt_path = get_events_file(sys_name);
+ if (!evt_path) {
+ tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
+ return -1;
+ }
+ evt_dir = opendir(evt_path);
+ if (!evt_dir) {
+ put_events_file(evt_path);
+ tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
+ return -1;
+ }
+
+ while (!ret && (evt_ent = readdir(evt_dir))) {
+ if (!strcmp(evt_ent->d_name, ".")
+ || !strcmp(evt_ent->d_name, "..")
+ || !strcmp(evt_ent->d_name, "enable")
+ || !strcmp(evt_ent->d_name, "filter"))
+ continue;
+
+ if (!strglobmatch(evt_ent->d_name, evt_name))
+ continue;
+
+ found++;
+
+ ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
+ err, head_config, loc);
+ }
+
+ if (!found) {
+ tracepoint_error(err, ENOENT, sys_name, evt_name, loc->first_column);
+ ret = -1;
+ }
+
+ put_events_file(evt_path);
+ closedir(evt_dir);
+ return ret;
+}
+
+static int add_tracepoint_event(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config, YYLTYPE *loc)
+{
+ return strpbrk(evt_name, "*?") ?
+ add_tracepoint_multi_event(list, idx, sys_name, evt_name,
+ err, head_config, loc) :
+ add_tracepoint(list, idx, sys_name, evt_name,
+ err, head_config, loc);
+}
+
+static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config, YYLTYPE *loc)
+{
+ struct dirent *events_ent;
+ DIR *events_dir;
+ int ret = 0;
+
+ events_dir = tracing_events__opendir();
+ if (!events_dir) {
+ tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
+ return -1;
+ }
+
+ while (!ret && (events_ent = readdir(events_dir))) {
+ if (!strcmp(events_ent->d_name, ".")
+ || !strcmp(events_ent->d_name, "..")
+ || !strcmp(events_ent->d_name, "enable")
+ || !strcmp(events_ent->d_name, "header_event")
+ || !strcmp(events_ent->d_name, "header_page"))
+ continue;
+
+ if (!strglobmatch(events_ent->d_name, sys_name))
+ continue;
+
+ ret = add_tracepoint_event(list, idx, events_ent->d_name,
+ evt_name, err, head_config, loc);
+ }
+
+ closedir(events_dir);
+ return ret;
+}
+#endif /* HAVE_LIBTRACEEVENT */
+
+static int
+parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
+{
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ if (!type || !type[i])
+ break;
+
+#define CHECK_SET_TYPE(bit) \
+do { \
+ if (attr->bp_type & bit) \
+ return -EINVAL; \
+ else \
+ attr->bp_type |= bit; \
+} while (0)
+
+ switch (type[i]) {
+ case 'r':
+ CHECK_SET_TYPE(HW_BREAKPOINT_R);
+ break;
+ case 'w':
+ CHECK_SET_TYPE(HW_BREAKPOINT_W);
+ break;
+ case 'x':
+ CHECK_SET_TYPE(HW_BREAKPOINT_X);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+#undef CHECK_SET_TYPE
+
+ if (!attr->bp_type) /* Default */
+ attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+
+ return 0;
+}
+
+int parse_events_add_breakpoint(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u64 addr, char *type, u64 len,
+ struct list_head *head_config __maybe_unused)
+{
+ struct perf_event_attr attr;
+ LIST_HEAD(config_terms);
+ const char *name;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.bp_addr = addr;
+
+ if (parse_breakpoint_type(type, &attr))
+ return -EINVAL;
+
+ /* Provide some defaults if len is not specified */
+ if (!len) {
+ if (attr.bp_type == HW_BREAKPOINT_X)
+ len = sizeof(long);
+ else
+ len = HW_BREAKPOINT_LEN_4;
+ }
+
+ attr.bp_len = len;
+
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.sample_period = 1;
+
+ if (head_config) {
+ if (config_attr(&attr, head_config, parse_state->error,
+ config_term_common))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+
+ name = get_config_name(head_config);
+
+ return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL,
+ &config_terms);
+}
+
+static int check_type_val(struct parse_events_term *term,
+ struct parse_events_error *err,
+ enum parse_events__term_val_type type)
+{
+ if (type == term->type_val)
+ return 0;
+
+ if (err) {
+ parse_events_error__handle(err, term->err_val,
+ type == PARSE_EVENTS__TERM_TYPE_NUM
+ ? strdup("expected numeric value")
+ : strdup("expected string value"),
+ NULL);
+ }
+ return -EINVAL;
+}
+
+static bool config_term_shrinked;
+
+static const char *config_term_name(enum parse_events__term_type term_type)
+{
+ /*
+ * Update according to parse-events.l
+ */
+ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+ [PARSE_EVENTS__TERM_TYPE_USER] = "<sysfs term>",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG] = "config",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG3] = "config3",
+ [PARSE_EVENTS__TERM_TYPE_NAME] = "name",
+ [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period",
+ [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq",
+ [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE] = "branch_type",
+ [PARSE_EVENTS__TERM_TYPE_TIME] = "time",
+ [PARSE_EVENTS__TERM_TYPE_CALLGRAPH] = "call-graph",
+ [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size",
+ [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
+ [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
+ [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
+ [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr",
+ [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
+ [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
+ [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
+ [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore",
+ [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output",
+ [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size",
+ [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id",
+ [PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
+ [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
+ [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
+ };
+ if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
+ return "unknown term";
+
+ return config_term_names[term_type];
+}
+
+static bool
+config_term_avail(enum parse_events__term_type term_type, struct parse_events_error *err)
+{
+ char *err_str;
+
+ if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+ parse_events_error__handle(err, -1,
+ strdup("Invalid term_type"), NULL);
+ return false;
+ }
+ if (!config_term_shrinked)
+ return true;
+
+ switch (term_type) {
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_NAME:
+ case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_PERCORE:
+ return true;
+ case PARSE_EVENTS__TERM_TYPE_USER:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+ case PARSE_EVENTS__TERM_TYPE_RAW:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+ case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ default:
+ if (!err)
+ return false;
+
+ /* term_type is validated so indexing is safe */
+ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'",
+ config_term_name(term_type)) >= 0)
+ parse_events_error__handle(err, -1, err_str, NULL);
+ return false;
+ }
+}
+
+void parse_events__shrink_config_terms(void)
+{
+ config_term_shrinked = true;
+}
+
+static int config_term_common(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+#define CHECK_TYPE_VAL(type) \
+do { \
+ if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+ return -EINVAL; \
+} while (0)
+
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ CHECK_TYPE_VAL(NUM);
+ attr->config = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ CHECK_TYPE_VAL(NUM);
+ attr->config1 = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ CHECK_TYPE_VAL(NUM);
+ attr->config2 = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ CHECK_TYPE_VAL(NUM);
+ attr->config3 = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+ CHECK_TYPE_VAL(STR);
+ if (strcmp(term->val.str, "no") &&
+ parse_branch_str(term->val.str,
+ &attr->branch_sample_type)) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("invalid branch sample type"),
+ NULL);
+ return -EINVAL;
+ }
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num > 1) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("expected 0 or 1"),
+ NULL);
+ return -EINVAL;
+ }
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NAME:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_RAW:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_PERCORE:
+ CHECK_TYPE_VAL(NUM);
+ if ((unsigned int)term->val.num > 1) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("expected 0 or 1"),
+ NULL);
+ return -EINVAL;
+ }
+ break;
+ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num > UINT_MAX) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("too big"),
+ NULL);
+ return -EINVAL;
+ }
+ break;
+ case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+ case PARSE_EVENTS__TERM_TYPE_USER:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+ case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ default:
+ parse_events_error__handle(err, term->err_term,
+ strdup(config_term_name(term->type_term)),
+ parse_events_formats_error_string(NULL));
+ return -EINVAL;
+ }
+
+ /*
+ * Check term availability after basic checking so
+ * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+ *
+ * If check availability at the entry of this function,
+ * user will see "'<sysfs term>' is not usable in 'perf stat'"
+ * if an invalid config term is provided for legacy events
+ * (for example, instructions/badterm/...), which is confusing.
+ */
+ if (!config_term_avail(term->type_term, err))
+ return -EINVAL;
+ return 0;
+#undef CHECK_TYPE_VAL
+}
+
+static int config_term_pmu(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) {
+ const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+
+ if (!pmu) {
+ char *err_str;
+
+ if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
+ parse_events_error__handle(err, term->err_term,
+ err_str, /*help=*/NULL);
+ return -EINVAL;
+ }
+ if (perf_pmu__supports_legacy_cache(pmu)) {
+ attr->type = PERF_TYPE_HW_CACHE;
+ return parse_events__decode_legacy_cache(term->config, pmu->type,
+ &attr->config);
+ } else
+ term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
+ }
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) {
+ const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+
+ if (!pmu) {
+ char *err_str;
+
+ if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
+ parse_events_error__handle(err, term->err_term,
+ err_str, /*help=*/NULL);
+ return -EINVAL;
+ }
+ attr->type = PERF_TYPE_HARDWARE;
+ attr->config = term->val.num;
+ if (perf_pmus__supports_extended_type())
+ attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ return 0;
+ }
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
+ term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG) {
+ /*
+ * Always succeed for sysfs terms, as we dont know
+ * at this point what type they need to have.
+ */
+ return 0;
+ }
+ return config_term_common(attr, term, err);
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static int config_term_tracepoint(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+ return config_term_common(attr, term, err);
+ case PARSE_EVENTS__TERM_TYPE_USER:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_NAME:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+ case PARSE_EVENTS__TERM_TYPE_PERCORE:
+ case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+ case PARSE_EVENTS__TERM_TYPE_RAW:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+ case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ default:
+ if (err) {
+ parse_events_error__handle(err, term->err_term,
+ strdup(config_term_name(term->type_term)),
+ strdup("valid terms: call-graph,stack-size\n"));
+ }
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
+static int config_attr(struct perf_event_attr *attr,
+ struct list_head *head,
+ struct parse_events_error *err,
+ config_term_func_t config_term)
+{
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head, list)
+ if (config_term(attr, term, err))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int get_config_terms(struct list_head *head_config,
+ struct list_head *head_terms __maybe_unused)
+{
+#define ADD_CONFIG_TERM(__type, __weak) \
+ struct evsel_config_term *__t; \
+ \
+ __t = zalloc(sizeof(*__t)); \
+ if (!__t) \
+ return -ENOMEM; \
+ \
+ INIT_LIST_HEAD(&__t->list); \
+ __t->type = EVSEL__CONFIG_TERM_ ## __type; \
+ __t->weak = __weak; \
+ list_add_tail(&__t->list, head_terms)
+
+#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak) \
+do { \
+ ADD_CONFIG_TERM(__type, __weak); \
+ __t->val.__name = __val; \
+} while (0)
+
+#define ADD_CONFIG_TERM_STR(__type, __val, __weak) \
+do { \
+ ADD_CONFIG_TERM(__type, __weak); \
+ __t->val.str = strdup(__val); \
+ if (!__t->val.str) { \
+ zfree(&__t); \
+ return -ENOMEM; \
+ } \
+ __t->free_str = true; \
+} while (0)
+
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+ ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ ADD_CONFIG_TERM_VAL(STACK_USER, stack_user,
+ term->val.num, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ ADD_CONFIG_TERM_VAL(INHERIT, inherit,
+ term->val.num ? 1 : 0, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ ADD_CONFIG_TERM_VAL(INHERIT, inherit,
+ term->val.num ? 0 : 1, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack,
+ term->val.num, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events,
+ term->val.num, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
+ term->val.num ? 1 : 0, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
+ term->val.num ? 0 : 1, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+ ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_PERCORE:
+ ADD_CONFIG_TERM_VAL(PERCORE, percore,
+ term->val.num ? true : false, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
+ term->val.num ? 1 : 0, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+ ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
+ term->val.num, term->weak);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_USER:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_NAME:
+ case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+ case PARSE_EVENTS__TERM_TYPE_RAW:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+ case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
+ * each bit of attr->config that the user has changed.
+ */
+static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
+ struct list_head *head_terms)
+{
+ struct parse_events_term *term;
+ u64 bits = 0;
+ int type;
+
+ list_for_each_entry(term, head_config, list) {
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_USER:
+ type = perf_pmu__format_type(pmu, term->config);
+ if (type != PERF_PMU_FORMAT_VALUE_CONFIG)
+ continue;
+ bits |= perf_pmu__format_bits(pmu, term->config);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ bits = ~(u64)0;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_NAME:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+ case PARSE_EVENTS__TERM_TYPE_PERCORE:
+ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+ case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+ case PARSE_EVENTS__TERM_TYPE_RAW:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+ case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ default:
+ break;
+ }
+ }
+
+ if (bits)
+ ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false);
+
+#undef ADD_CONFIG_TERM
+ return 0;
+}
+
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+ const char *sys, const char *event,
+ struct parse_events_error *err,
+ struct list_head *head_config, void *loc_)
+{
+ YYLTYPE *loc = loc_;
+#ifdef HAVE_LIBTRACEEVENT
+ if (head_config) {
+ struct perf_event_attr attr;
+
+ if (config_attr(&attr, head_config, err,
+ config_term_tracepoint))
+ return -EINVAL;
+ }
+
+ if (strpbrk(sys, "*?"))
+ return add_tracepoint_multi_sys(list, idx, sys, event,
+ err, head_config, loc);
+ else
+ return add_tracepoint_event(list, idx, sys, event,
+ err, head_config, loc);
+#else
+ (void)list;
+ (void)idx;
+ (void)sys;
+ (void)event;
+ (void)head_config;
+ parse_events_error__handle(err, loc->first_column, strdup("unsupported tracepoint"),
+ strdup("libtraceevent is necessary for tracepoint support"));
+ return -1;
+#endif
+}
+
+static int __parse_events_add_numeric(struct parse_events_state *parse_state,
+ struct list_head *list,
+ struct perf_pmu *pmu, u32 type, u32 extended_type,
+ u64 config, struct list_head *head_config)
+{
+ struct perf_event_attr attr;
+ LIST_HEAD(config_terms);
+ const char *name, *metric_id;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = type;
+ attr.config = config;
+ if (extended_type && (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)) {
+ assert(perf_pmus__supports_extended_type());
+ attr.config |= (u64)extended_type << PERF_PMU_TYPE_SHIFT;
+ }
+
+ if (head_config) {
+ if (config_attr(&attr, head_config, parse_state->error,
+ config_term_common))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+
+ name = get_config_name(head_config);
+ metric_id = get_config_metric_id(head_config);
+ ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
+ metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
+ /*cpu_list=*/NULL) ? 0 : -ENOMEM;
+ free_config_terms(&config_terms);
+ return ret;
+}
+
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u32 type, u64 config,
+ struct list_head *head_config,
+ bool wildcard)
+{
+ struct perf_pmu *pmu = NULL;
+ bool found_supported = false;
+
+ /* Wildcards on numeric values are only supported by core PMUs. */
+ if (wildcard && perf_pmus__supports_extended_type()) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ int ret;
+
+ found_supported = true;
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ ret = __parse_events_add_numeric(parse_state, list, pmu,
+ type, pmu->type,
+ config, head_config);
+ if (ret)
+ return ret;
+ }
+ if (found_supported)
+ return 0;
+ }
+ return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type),
+ type, /*extended_type=*/0, config, head_config);
+}
+
+int parse_events_add_tool(struct parse_events_state *parse_state,
+ struct list_head *list,
+ int tool_event)
+{
+ return add_event_tool(list, &parse_state->idx, tool_event);
+}
+
+static bool config_term_percore(struct list_head *config_terms)
+{
+ struct evsel_config_term *term;
+
+ list_for_each_entry(term, config_terms, list) {
+ if (term->type == EVSEL__CONFIG_TERM_PERCORE)
+ return term->val.percore;
+ }
+
+ return false;
+}
+
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+ struct list_head *list, const char *name,
+ struct list_head *head_config,
+ bool auto_merge_stats, void *loc_)
+{
+ struct perf_event_attr attr;
+ struct perf_pmu_info info;
+ struct perf_pmu *pmu;
+ struct evsel *evsel;
+ struct parse_events_error *err = parse_state->error;
+ YYLTYPE *loc = loc_;
+ LIST_HEAD(config_terms);
+
+ pmu = parse_state->fake_pmu ?: perf_pmus__find(name);
+
+ if (!pmu) {
+ char *err_str;
+
+ if (asprintf(&err_str,
+ "Cannot find PMU `%s'. Missing kernel support?",
+ name) >= 0)
+ parse_events_error__handle(err, loc->first_column, err_str, NULL);
+ return -EINVAL;
+ }
+
+ if (verbose > 1) {
+ struct strbuf sb;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+ if (pmu->selectable && !head_config) {
+ strbuf_addf(&sb, "%s//", name);
+ } else {
+ strbuf_addf(&sb, "%s/", name);
+ parse_events_term__to_strbuf(head_config, &sb);
+ strbuf_addch(&sb, '/');
+ }
+ fprintf(stderr, "Attempt to add: %s\n", sb.buf);
+ strbuf_release(&sb);
+ }
+ if (head_config)
+ fix_raw(head_config, pmu);
+
+ if (pmu->default_config) {
+ memcpy(&attr, pmu->default_config,
+ sizeof(struct perf_event_attr));
+ } else {
+ memset(&attr, 0, sizeof(attr));
+ }
+ attr.type = pmu->type;
+
+ if (!head_config) {
+ evsel = __add_event(list, &parse_state->idx, &attr,
+ /*init_attr=*/true, /*name=*/NULL,
+ /*metric_id=*/NULL, pmu,
+ /*config_terms=*/NULL, auto_merge_stats,
+ /*cpu_list=*/NULL);
+ return evsel ? 0 : -ENOMEM;
+ }
+
+ if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info, err))
+ return -EINVAL;
+
+ if (verbose > 1) {
+ struct strbuf sb;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+ parse_events_term__to_strbuf(head_config, &sb);
+ fprintf(stderr, "..after resolving event: %s/%s/\n", name, sb.buf);
+ strbuf_release(&sb);
+ }
+
+ /*
+ * Configure hardcoded terms first, no need to check
+ * return value when called with fail == 0 ;)
+ */
+ if (config_attr(&attr, head_config, parse_state->error, config_term_pmu))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+
+ /*
+ * When using default config, record which bits of attr->config were
+ * changed by the user.
+ */
+ if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms))
+ return -ENOMEM;
+
+ if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
+ free_config_terms(&config_terms);
+ return -EINVAL;
+ }
+
+ evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
+ get_config_name(head_config),
+ get_config_metric_id(head_config), pmu,
+ &config_terms, auto_merge_stats, /*cpu_list=*/NULL);
+ if (!evsel)
+ return -ENOMEM;
+
+ if (evsel->name)
+ evsel->use_config_name = true;
+
+ evsel->percore = config_term_percore(&evsel->config_terms);
+
+ if (parse_state->fake_pmu)
+ return 0;
+
+ free((char *)evsel->unit);
+ evsel->unit = strdup(info.unit);
+ evsel->scale = info.scale;
+ evsel->per_pkg = info.per_pkg;
+ evsel->snapshot = info.snapshot;
+ return 0;
+}
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+ char *str, struct list_head *head,
+ struct list_head **listp, void *loc_)
+{
+ struct parse_events_term *term;
+ struct list_head *list = NULL;
+ struct list_head *orig_head = NULL;
+ struct perf_pmu *pmu = NULL;
+ YYLTYPE *loc = loc_;
+ int ok = 0;
+ const char *config;
+
+ *listp = NULL;
+
+ if (!head) {
+ head = malloc(sizeof(struct list_head));
+ if (!head)
+ goto out_err;
+
+ INIT_LIST_HEAD(head);
+ }
+ config = strdup(str);
+ if (!config)
+ goto out_err;
+
+ if (parse_events_term__num(&term,
+ PARSE_EVENTS__TERM_TYPE_USER,
+ config, /*num=*/1, /*novalue=*/true,
+ loc, /*loc_val=*/NULL) < 0) {
+ zfree(&config);
+ goto out_err;
+ }
+ list_add_tail(&term->list, head);
+
+ /* Add it for all PMUs that support the alias */
+ list = malloc(sizeof(struct list_head));
+ if (!list)
+ goto out_err;
+
+ INIT_LIST_HEAD(list);
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ bool auto_merge_stats;
+
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ if (!perf_pmu__have_event(pmu, str))
+ continue;
+
+ auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
+ parse_events_copy_term_list(head, &orig_head);
+ if (!parse_events_add_pmu(parse_state, list, pmu->name,
+ orig_head, auto_merge_stats, loc)) {
+ struct strbuf sb;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+ parse_events_term__to_strbuf(orig_head, &sb);
+ pr_debug("%s -> %s/%s/\n", str, pmu->name, sb.buf);
+ strbuf_release(&sb);
+ ok++;
+ }
+ parse_events_terms__delete(orig_head);
+ }
+
+ if (parse_state->fake_pmu) {
+ if (!parse_events_add_pmu(parse_state, list, str, head,
+ /*auto_merge_stats=*/true, loc)) {
+ struct strbuf sb;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+ parse_events_term__to_strbuf(head, &sb);
+ pr_debug("%s -> %s/%s/\n", str, "fake_pmu", sb.buf);
+ strbuf_release(&sb);
+ ok++;
+ }
+ }
+
+out_err:
+ if (ok)
+ *listp = list;
+ else
+ free(list);
+
+ parse_events_terms__delete(head);
+ return ok ? 0 : -1;
+}
+
+int parse_events__modifier_group(struct list_head *list,
+ char *event_mod)
+{
+ return parse_events__modifier_event(list, event_mod, true);
+}
+
+void parse_events__set_leader(char *name, struct list_head *list)
+{
+ struct evsel *leader;
+
+ if (list_empty(list)) {
+ WARN_ONCE(true, "WARNING: failed to set leader: empty list");
+ return;
+ }
+
+ leader = list_first_entry(list, struct evsel, core.node);
+ __perf_evlist__set_leader(list, &leader->core);
+ leader->group_name = name;
+}
+
+/* list_event is assumed to point to malloc'ed memory */
+void parse_events_update_lists(struct list_head *list_event,
+ struct list_head *list_all)
+{
+ /*
+ * Called for single event definition. Update the
+ * 'all event' list, and reinit the 'single event'
+ * list, for next event definition.
+ */
+ list_splice_tail(list_event, list_all);
+ free(list_event);
+}
+
+struct event_modifier {
+ int eu;
+ int ek;
+ int eh;
+ int eH;
+ int eG;
+ int eI;
+ int precise;
+ int precise_max;
+ int exclude_GH;
+ int sample_read;
+ int pinned;
+ int weak;
+ int exclusive;
+ int bpf_counter;
+};
+
+static int get_event_modifier(struct event_modifier *mod, char *str,
+ struct evsel *evsel)
+{
+ int eu = evsel ? evsel->core.attr.exclude_user : 0;
+ int ek = evsel ? evsel->core.attr.exclude_kernel : 0;
+ int eh = evsel ? evsel->core.attr.exclude_hv : 0;
+ int eH = evsel ? evsel->core.attr.exclude_host : 0;
+ int eG = evsel ? evsel->core.attr.exclude_guest : 0;
+ int eI = evsel ? evsel->core.attr.exclude_idle : 0;
+ int precise = evsel ? evsel->core.attr.precise_ip : 0;
+ int precise_max = 0;
+ int sample_read = 0;
+ int pinned = evsel ? evsel->core.attr.pinned : 0;
+ int exclusive = evsel ? evsel->core.attr.exclusive : 0;
+
+ int exclude = eu | ek | eh;
+ int exclude_GH = evsel ? evsel->exclude_GH : 0;
+ int weak = 0;
+ int bpf_counter = 0;
+
+ memset(mod, 0, sizeof(*mod));
+
+ while (*str) {
+ if (*str == 'u') {
+ if (!exclude)
+ exclude = eu = ek = eh = 1;
+ if (!exclude_GH && !perf_guest)
+ eG = 1;
+ eu = 0;
+ } else if (*str == 'k') {
+ if (!exclude)
+ exclude = eu = ek = eh = 1;
+ ek = 0;
+ } else if (*str == 'h') {
+ if (!exclude)
+ exclude = eu = ek = eh = 1;
+ eh = 0;
+ } else if (*str == 'G') {
+ if (!exclude_GH)
+ exclude_GH = eG = eH = 1;
+ eG = 0;
+ } else if (*str == 'H') {
+ if (!exclude_GH)
+ exclude_GH = eG = eH = 1;
+ eH = 0;
+ } else if (*str == 'I') {
+ eI = 1;
+ } else if (*str == 'p') {
+ precise++;
+ /* use of precise requires exclude_guest */
+ if (!exclude_GH)
+ eG = 1;
+ } else if (*str == 'P') {
+ precise_max = 1;
+ } else if (*str == 'S') {
+ sample_read = 1;
+ } else if (*str == 'D') {
+ pinned = 1;
+ } else if (*str == 'e') {
+ exclusive = 1;
+ } else if (*str == 'W') {
+ weak = 1;
+ } else if (*str == 'b') {
+ bpf_counter = 1;
+ } else
+ break;
+
+ ++str;
+ }
+
+ /*
+ * precise ip:
+ *
+ * 0 - SAMPLE_IP can have arbitrary skid
+ * 1 - SAMPLE_IP must have constant skid
+ * 2 - SAMPLE_IP requested to have 0 skid
+ * 3 - SAMPLE_IP must have 0 skid
+ *
+ * See also PERF_RECORD_MISC_EXACT_IP
+ */
+ if (precise > 3)
+ return -EINVAL;
+
+ mod->eu = eu;
+ mod->ek = ek;
+ mod->eh = eh;
+ mod->eH = eH;
+ mod->eG = eG;
+ mod->eI = eI;
+ mod->precise = precise;
+ mod->precise_max = precise_max;
+ mod->exclude_GH = exclude_GH;
+ mod->sample_read = sample_read;
+ mod->pinned = pinned;
+ mod->weak = weak;
+ mod->bpf_counter = bpf_counter;
+ mod->exclusive = exclusive;
+
+ return 0;
+}
+
+/*
+ * Basic modifier sanity check to validate it contains only one
+ * instance of any modifier (apart from 'p') present.
+ */
+static int check_modifier(char *str)
+{
+ char *p = str;
+
+ /* The sizeof includes 0 byte as well. */
+ if (strlen(str) > (sizeof("ukhGHpppPSDIWeb") - 1))
+ return -1;
+
+ while (*p) {
+ if (*p != 'p' && strchr(p + 1, *p))
+ return -1;
+ p++;
+ }
+
+ return 0;
+}
+
+int parse_events__modifier_event(struct list_head *list, char *str, bool add)
+{
+ struct evsel *evsel;
+ struct event_modifier mod;
+
+ if (str == NULL)
+ return 0;
+
+ if (check_modifier(str))
+ return -EINVAL;
+
+ if (!add && get_event_modifier(&mod, str, NULL))
+ return -EINVAL;
+
+ __evlist__for_each_entry(list, evsel) {
+ if (add && get_event_modifier(&mod, str, evsel))
+ return -EINVAL;
+
+ evsel->core.attr.exclude_user = mod.eu;
+ evsel->core.attr.exclude_kernel = mod.ek;
+ evsel->core.attr.exclude_hv = mod.eh;
+ evsel->core.attr.precise_ip = mod.precise;
+ evsel->core.attr.exclude_host = mod.eH;
+ evsel->core.attr.exclude_guest = mod.eG;
+ evsel->core.attr.exclude_idle = mod.eI;
+ evsel->exclude_GH = mod.exclude_GH;
+ evsel->sample_read = mod.sample_read;
+ evsel->precise_max = mod.precise_max;
+ evsel->weak_group = mod.weak;
+ evsel->bpf_counter = mod.bpf_counter;
+
+ if (evsel__is_group_leader(evsel)) {
+ evsel->core.attr.pinned = mod.pinned;
+ evsel->core.attr.exclusive = mod.exclusive;
+ }
+ }
+
+ return 0;
+}
+
+int parse_events_name(struct list_head *list, const char *name)
+{
+ struct evsel *evsel;
+
+ __evlist__for_each_entry(list, evsel) {
+ if (!evsel->name) {
+ evsel->name = strdup(name);
+ if (!evsel->name)
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_events__scanner(const char *str,
+ FILE *input,
+ struct parse_events_state *parse_state)
+{
+ YY_BUFFER_STATE buffer;
+ void *scanner;
+ int ret;
+
+ ret = parse_events_lex_init_extra(parse_state, &scanner);
+ if (ret)
+ return ret;
+
+ if (str)
+ buffer = parse_events__scan_string(str, scanner);
+ else
+ parse_events_set_in(input, scanner);
+
+#ifdef PARSER_DEBUG
+ parse_events_debug = 1;
+ parse_events_set_debug(1, scanner);
+#endif
+ ret = parse_events_parse(parse_state, scanner);
+
+ if (str) {
+ parse_events__flush_buffer(buffer, scanner);
+ parse_events__delete_buffer(buffer, scanner);
+ }
+ parse_events_lex_destroy(scanner);
+ return ret;
+}
+
+/*
+ * parse event config string, return a list of event terms.
+ */
+int parse_events_terms(struct list_head *terms, const char *str, FILE *input)
+{
+ struct parse_events_state parse_state = {
+ .terms = NULL,
+ .stoken = PE_START_TERMS,
+ };
+ int ret;
+
+ ret = parse_events__scanner(str, input, &parse_state);
+
+ if (!ret) {
+ list_splice(parse_state.terms, terms);
+ zfree(&parse_state.terms);
+ return 0;
+ }
+
+ parse_events_terms__delete(parse_state.terms);
+ return ret;
+}
+
+static int evsel__compute_group_pmu_name(struct evsel *evsel,
+ const struct list_head *head)
+{
+ struct evsel *leader = evsel__leader(evsel);
+ struct evsel *pos;
+ const char *group_pmu_name;
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ if (!pmu) {
+ /*
+ * For PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE types the PMU
+ * is a core PMU, but in heterogeneous systems this is
+ * unknown. For now pick the first core PMU.
+ */
+ pmu = perf_pmus__scan_core(NULL);
+ }
+ if (!pmu) {
+ pr_debug("No PMU found for '%s'\n", evsel__name(evsel));
+ return -EINVAL;
+ }
+ group_pmu_name = pmu->name;
+ /*
+ * Software events may be in a group with other uncore PMU events. Use
+ * the pmu_name of the first non-software event to avoid breaking the
+ * software event out of the group.
+ *
+ * Aux event leaders, like intel_pt, expect a group with events from
+ * other PMUs, so substitute the AUX event's PMU in this case.
+ */
+ if (perf_pmu__is_software(pmu) || evsel__is_aux_event(leader)) {
+ struct perf_pmu *leader_pmu = evsel__find_pmu(leader);
+
+ if (!leader_pmu) {
+ /* As with determining pmu above. */
+ leader_pmu = perf_pmus__scan_core(NULL);
+ }
+ /*
+ * Starting with the leader, find the first event with a named
+ * non-software PMU. for_each_group_(member|evsel) isn't used as
+ * the list isn't yet sorted putting evsel's in the same group
+ * together.
+ */
+ if (leader_pmu && !perf_pmu__is_software(leader_pmu)) {
+ group_pmu_name = leader_pmu->name;
+ } else if (leader->core.nr_members > 1) {
+ list_for_each_entry(pos, head, core.node) {
+ struct perf_pmu *pos_pmu;
+
+ if (pos == leader || evsel__leader(pos) != leader)
+ continue;
+ pos_pmu = evsel__find_pmu(pos);
+ if (!pos_pmu) {
+ /* As with determining pmu above. */
+ pos_pmu = perf_pmus__scan_core(NULL);
+ }
+ if (pos_pmu && !perf_pmu__is_software(pos_pmu)) {
+ group_pmu_name = pos_pmu->name;
+ break;
+ }
+ }
+ }
+ }
+ /* Assign the actual name taking care that the fake PMU lacks a name. */
+ evsel->group_pmu_name = strdup(group_pmu_name ?: "fake");
+ return evsel->group_pmu_name ? 0 : -ENOMEM;
+}
+
+__weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
+{
+ /* Order by insertion index. */
+ return lhs->core.idx - rhs->core.idx;
+}
+
+static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct list_head *r)
+{
+ const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node);
+ const struct evsel *lhs = container_of(lhs_core, struct evsel, core);
+ const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node);
+ const struct evsel *rhs = container_of(rhs_core, struct evsel, core);
+ int *force_grouped_idx = _fg_idx;
+ int lhs_sort_idx, rhs_sort_idx, ret;
+ const char *lhs_pmu_name, *rhs_pmu_name;
+ bool lhs_has_group, rhs_has_group;
+
+ /*
+ * First sort by grouping/leader. Read the leader idx only if the evsel
+ * is part of a group, by default ungrouped events will be sorted
+ * relative to grouped events based on where the first ungrouped event
+ * occurs. If both events don't have a group we want to fall-through to
+ * the arch specific sorting, that can reorder and fix things like
+ * Intel's topdown events.
+ */
+ if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
+ lhs_has_group = true;
+ lhs_sort_idx = lhs_core->leader->idx;
+ } else {
+ lhs_has_group = false;
+ lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
+ ? *force_grouped_idx
+ : lhs_core->idx;
+ }
+ if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
+ rhs_has_group = true;
+ rhs_sort_idx = rhs_core->leader->idx;
+ } else {
+ rhs_has_group = false;
+ rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
+ ? *force_grouped_idx
+ : rhs_core->idx;
+ }
+
+ if (lhs_sort_idx != rhs_sort_idx)
+ return lhs_sort_idx - rhs_sort_idx;
+
+ /* Group by PMU if there is a group. Groups can't span PMUs. */
+ if (lhs_has_group && rhs_has_group) {
+ lhs_pmu_name = lhs->group_pmu_name;
+ rhs_pmu_name = rhs->group_pmu_name;
+ ret = strcmp(lhs_pmu_name, rhs_pmu_name);
+ if (ret)
+ return ret;
+ }
+
+ /* Architecture specific sorting. */
+ return arch_evlist__cmp(lhs, rhs);
+}
+
+static int parse_events__sort_events_and_fix_groups(struct list_head *list)
+{
+ int idx = 0, force_grouped_idx = -1;
+ struct evsel *pos, *cur_leader = NULL;
+ struct perf_evsel *cur_leaders_grp = NULL;
+ bool idx_changed = false, cur_leader_force_grouped = false;
+ int orig_num_leaders = 0, num_leaders = 0;
+ int ret;
+
+ /*
+ * Compute index to insert ungrouped events at. Place them where the
+ * first ungrouped event appears.
+ */
+ list_for_each_entry(pos, list, core.node) {
+ const struct evsel *pos_leader = evsel__leader(pos);
+
+ ret = evsel__compute_group_pmu_name(pos, list);
+ if (ret)
+ return ret;
+
+ if (pos == pos_leader)
+ orig_num_leaders++;
+
+ /*
+ * Ensure indexes are sequential, in particular for multiple
+ * event lists being merged. The indexes are used to detect when
+ * the user order is modified.
+ */
+ pos->core.idx = idx++;
+
+ /* Remember an index to sort all forced grouped events together to. */
+ if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
+ arch_evsel__must_be_in_group(pos))
+ force_grouped_idx = pos->core.idx;
+ }
+
+ /* Sort events. */
+ list_sort(&force_grouped_idx, list, evlist__cmp);
+
+ /*
+ * Recompute groups, splitting for PMUs and adding groups for events
+ * that require them.
+ */
+ idx = 0;
+ list_for_each_entry(pos, list, core.node) {
+ const struct evsel *pos_leader = evsel__leader(pos);
+ const char *pos_pmu_name = pos->group_pmu_name;
+ const char *cur_leader_pmu_name;
+ bool pos_force_grouped = force_grouped_idx != -1 &&
+ arch_evsel__must_be_in_group(pos);
+
+ /* Reset index and nr_members. */
+ if (pos->core.idx != idx)
+ idx_changed = true;
+ pos->core.idx = idx++;
+ pos->core.nr_members = 0;
+
+ /*
+ * Set the group leader respecting the given groupings and that
+ * groups can't span PMUs.
+ */
+ if (!cur_leader)
+ cur_leader = pos;
+
+ cur_leader_pmu_name = cur_leader->group_pmu_name;
+ if ((cur_leaders_grp != pos->core.leader &&
+ (!pos_force_grouped || !cur_leader_force_grouped)) ||
+ strcmp(cur_leader_pmu_name, pos_pmu_name)) {
+ /* Event is for a different group/PMU than last. */
+ cur_leader = pos;
+ /*
+ * Remember the leader's group before it is overwritten,
+ * so that later events match as being in the same
+ * group.
+ */
+ cur_leaders_grp = pos->core.leader;
+ /*
+ * Avoid forcing events into groups with events that
+ * don't need to be in the group.
+ */
+ cur_leader_force_grouped = pos_force_grouped;
+ }
+ if (pos_leader != cur_leader) {
+ /* The leader changed so update it. */
+ evsel__set_leader(pos, cur_leader);
+ }
+ }
+ list_for_each_entry(pos, list, core.node) {
+ struct evsel *pos_leader = evsel__leader(pos);
+
+ if (pos == pos_leader)
+ num_leaders++;
+ pos_leader->core.nr_members++;
+ }
+ return (idx_changed || num_leaders != orig_num_leaders) ? 1 : 0;
+}
+
+int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
+ struct parse_events_error *err, struct perf_pmu *fake_pmu,
+ bool warn_if_reordered)
+{
+ struct parse_events_state parse_state = {
+ .list = LIST_HEAD_INIT(parse_state.list),
+ .idx = evlist->core.nr_entries,
+ .error = err,
+ .stoken = PE_START_EVENTS,
+ .fake_pmu = fake_pmu,
+ .pmu_filter = pmu_filter,
+ .match_legacy_cache_terms = true,
+ };
+ int ret, ret2;
+
+ ret = parse_events__scanner(str, /*input=*/ NULL, &parse_state);
+
+ if (!ret && list_empty(&parse_state.list)) {
+ WARN_ONCE(true, "WARNING: event parser found nothing\n");
+ return -1;
+ }
+
+ ret2 = parse_events__sort_events_and_fix_groups(&parse_state.list);
+ if (ret2 < 0)
+ return ret;
+
+ if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus)
+ pr_warning("WARNING: events were regrouped to match PMUs\n");
+
+ /*
+ * Add list to the evlist even with errors to allow callers to clean up.
+ */
+ evlist__splice_list_tail(evlist, &parse_state.list);
+
+ if (!ret) {
+ struct evsel *last;
+
+ last = evlist__last(evlist);
+ last->cmdline_group_boundary = true;
+
+ return 0;
+ }
+
+ /*
+ * There are 2 users - builtin-record and builtin-test objects.
+ * Both call evlist__delete in case of error, so we dont
+ * need to bother.
+ */
+ return ret;
+}
+
+int parse_event(struct evlist *evlist, const char *str)
+{
+ struct parse_events_error err;
+ int ret;
+
+ parse_events_error__init(&err);
+ ret = parse_events(evlist, str, &err);
+ parse_events_error__exit(&err);
+ return ret;
+}
+
+void parse_events_error__init(struct parse_events_error *err)
+{
+ bzero(err, sizeof(*err));
+}
+
+void parse_events_error__exit(struct parse_events_error *err)
+{
+ zfree(&err->str);
+ zfree(&err->help);
+ zfree(&err->first_str);
+ zfree(&err->first_help);
+}
+
+void parse_events_error__handle(struct parse_events_error *err, int idx,
+ char *str, char *help)
+{
+ if (WARN(!str || !err, "WARNING: failed to provide error string or struct\n"))
+ goto out_free;
+ switch (err->num_errors) {
+ case 0:
+ err->idx = idx;
+ err->str = str;
+ err->help = help;
+ break;
+ case 1:
+ err->first_idx = err->idx;
+ err->idx = idx;
+ err->first_str = err->str;
+ err->str = str;
+ err->first_help = err->help;
+ err->help = help;
+ break;
+ default:
+ pr_debug("Multiple errors dropping message: %s (%s)\n",
+ err->str, err->help ?: "<no help>");
+ free(err->str);
+ err->str = str;
+ free(err->help);
+ err->help = help;
+ break;
+ }
+ err->num_errors++;
+ return;
+
+out_free:
+ free(str);
+ free(help);
+}
+
+#define MAX_WIDTH 1000
+static int get_term_width(void)
+{
+ struct winsize ws;
+
+ get_term_dimensions(&ws);
+ return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
+}
+
+static void __parse_events_error__print(int err_idx, const char *err_str,
+ const char *err_help, const char *event)
+{
+ const char *str = "invalid or unsupported event: ";
+ char _buf[MAX_WIDTH];
+ char *buf = (char *) event;
+ int idx = 0;
+ if (err_str) {
+ /* -2 for extra '' in the final fprintf */
+ int width = get_term_width() - 2;
+ int len_event = strlen(event);
+ int len_str, max_len, cut = 0;
+
+ /*
+ * Maximum error index indent, we will cut
+ * the event string if it's bigger.
+ */
+ int max_err_idx = 13;
+
+ /*
+ * Let's be specific with the message when
+ * we have the precise error.
+ */
+ str = "event syntax error: ";
+ len_str = strlen(str);
+ max_len = width - len_str;
+
+ buf = _buf;
+
+ /* We're cutting from the beginning. */
+ if (err_idx > max_err_idx)
+ cut = err_idx - max_err_idx;
+
+ strncpy(buf, event + cut, max_len);
+
+ /* Mark cut parts with '..' on both sides. */
+ if (cut)
+ buf[0] = buf[1] = '.';
+
+ if ((len_event - cut) > max_len) {
+ buf[max_len - 1] = buf[max_len - 2] = '.';
+ buf[max_len] = 0;
+ }
+
+ idx = len_str + err_idx - cut;
+ }
+
+ fprintf(stderr, "%s'%s'\n", str, buf);
+ if (idx) {
+ fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err_str);
+ if (err_help)
+ fprintf(stderr, "\n%s\n", err_help);
+ }
+}
+
+void parse_events_error__print(struct parse_events_error *err,
+ const char *event)
+{
+ if (!err->num_errors)
+ return;
+
+ __parse_events_error__print(err->idx, err->str, err->help, event);
+
+ if (err->num_errors > 1) {
+ fputs("\nInitial error:\n", stderr);
+ __parse_events_error__print(err->first_idx, err->first_str,
+ err->first_help, event);
+ }
+}
+
+#undef MAX_WIDTH
+
+int parse_events_option(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct parse_events_option_args *args = opt->value;
+ struct parse_events_error err;
+ int ret;
+
+ parse_events_error__init(&err);
+ ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err,
+ /*fake_pmu=*/NULL, /*warn_if_reordered=*/true);
+
+ if (ret) {
+ parse_events_error__print(&err, str);
+ fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+ }
+ parse_events_error__exit(&err);
+
+ return ret;
+}
+
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset)
+{
+ struct parse_events_option_args *args = opt->value;
+ int ret;
+
+ if (*args->evlistp == NULL) {
+ *args->evlistp = evlist__new();
+
+ if (*args->evlistp == NULL) {
+ fprintf(stderr, "Not enough memory to create evlist\n");
+ return -1;
+ }
+ }
+ ret = parse_events_option(opt, str, unset);
+ if (ret) {
+ evlist__delete(*args->evlistp);
+ *args->evlistp = NULL;
+ }
+
+ return ret;
+}
+
+static int
+foreach_evsel_in_last_glob(struct evlist *evlist,
+ int (*func)(struct evsel *evsel,
+ const void *arg),
+ const void *arg)
+{
+ struct evsel *last = NULL;
+ int err;
+
+ /*
+ * Don't return when list_empty, give func a chance to report
+ * error when it found last == NULL.
+ *
+ * So no need to WARN here, let *func do this.
+ */
+ if (evlist->core.nr_entries > 0)
+ last = evlist__last(evlist);
+
+ do {
+ err = (*func)(last, arg);
+ if (err)
+ return -1;
+ if (!last)
+ return 0;
+
+ if (last->core.node.prev == &evlist->core.entries)
+ return 0;
+ last = list_entry(last->core.node.prev, struct evsel, core.node);
+ } while (!last->cmdline_group_boundary);
+
+ return 0;
+}
+
+static int set_filter(struct evsel *evsel, const void *arg)
+{
+ const char *str = arg;
+ bool found = false;
+ int nr_addr_filters = 0;
+ struct perf_pmu *pmu = NULL;
+
+ if (evsel == NULL) {
+ fprintf(stderr,
+ "--filter option should follow a -e tracepoint or HW tracer option\n");
+ return -1;
+ }
+
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+ if (evsel__append_tp_filter(evsel, str) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
+ }
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL)
+ if (pmu->type == evsel->core.attr.type) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ perf_pmu__scan_file(pmu, "nr_addr_filters",
+ "%d", &nr_addr_filters);
+
+ if (!nr_addr_filters)
+ return perf_bpf_filter__parse(&evsel->bpf_filters, str);
+
+ if (evsel__append_addr_filter(evsel, str) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int parse_filter(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+
+ return foreach_evsel_in_last_glob(evlist, set_filter,
+ (const void *)str);
+}
+
+static int add_exclude_perf_filter(struct evsel *evsel,
+ const void *arg __maybe_unused)
+{
+ char new_filter[64];
+
+ if (evsel == NULL || evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+ fprintf(stderr,
+ "--exclude-perf option should follow a -e tracepoint option\n");
+ return -1;
+ }
+
+ snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
+
+ if (evsel__append_tp_filter(evsel, new_filter) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int exclude_perf(const struct option *opt,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+
+ return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter,
+ NULL);
+}
+
+int parse_events__is_hardcoded_term(struct parse_events_term *term)
+{
+ return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
+}
+
+static int new_term(struct parse_events_term **_term,
+ struct parse_events_term *temp,
+ char *str, u64 num)
+{
+ struct parse_events_term *term;
+
+ term = malloc(sizeof(*term));
+ if (!term)
+ return -ENOMEM;
+
+ *term = *temp;
+ INIT_LIST_HEAD(&term->list);
+ term->weak = false;
+
+ switch (term->type_val) {
+ case PARSE_EVENTS__TERM_TYPE_NUM:
+ term->val.num = num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STR:
+ term->val.str = str;
+ break;
+ default:
+ free(term);
+ return -EINVAL;
+ }
+
+ *_term = term;
+ return 0;
+}
+
+int parse_events_term__num(struct parse_events_term **term,
+ enum parse_events__term_type type_term,
+ const char *config, u64 num,
+ bool no_value,
+ void *loc_term_, void *loc_val_)
+{
+ YYLTYPE *loc_term = loc_term_;
+ YYLTYPE *loc_val = loc_val_;
+
+ struct parse_events_term temp = {
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = type_term,
+ .config = config ? : strdup(config_term_name(type_term)),
+ .no_value = no_value,
+ .err_term = loc_term ? loc_term->first_column : 0,
+ .err_val = loc_val ? loc_val->first_column : 0,
+ };
+
+ return new_term(term, &temp, /*str=*/NULL, num);
+}
+
+int parse_events_term__str(struct parse_events_term **term,
+ enum parse_events__term_type type_term,
+ char *config, char *str,
+ void *loc_term_, void *loc_val_)
+{
+ YYLTYPE *loc_term = loc_term_;
+ YYLTYPE *loc_val = loc_val_;
+
+ struct parse_events_term temp = {
+ .type_val = PARSE_EVENTS__TERM_TYPE_STR,
+ .type_term = type_term,
+ .config = config,
+ .err_term = loc_term ? loc_term->first_column : 0,
+ .err_val = loc_val ? loc_val->first_column : 0,
+ };
+
+ return new_term(term, &temp, str, /*num=*/0);
+}
+
+int parse_events_term__term(struct parse_events_term **term,
+ enum parse_events__term_type term_lhs,
+ enum parse_events__term_type term_rhs,
+ void *loc_term, void *loc_val)
+{
+ return parse_events_term__str(term, term_lhs, NULL,
+ strdup(config_term_name(term_rhs)),
+ loc_term, loc_val);
+}
+
+int parse_events_term__clone(struct parse_events_term **new,
+ struct parse_events_term *term)
+{
+ char *str;
+ struct parse_events_term temp = *term;
+
+ temp.used = false;
+ if (term->config) {
+ temp.config = strdup(term->config);
+ if (!temp.config)
+ return -ENOMEM;
+ }
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+ return new_term(new, &temp, /*str=*/NULL, term->val.num);
+
+ str = strdup(term->val.str);
+ if (!str)
+ return -ENOMEM;
+ return new_term(new, &temp, str, /*num=*/0);
+}
+
+void parse_events_term__delete(struct parse_events_term *term)
+{
+ if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM)
+ zfree(&term->val.str);
+
+ zfree(&term->config);
+ free(term);
+}
+
+int parse_events_copy_term_list(struct list_head *old,
+ struct list_head **new)
+{
+ struct parse_events_term *term, *n;
+ int ret;
+
+ if (!old) {
+ *new = NULL;
+ return 0;
+ }
+
+ *new = malloc(sizeof(struct list_head));
+ if (!*new)
+ return -ENOMEM;
+ INIT_LIST_HEAD(*new);
+
+ list_for_each_entry (term, old, list) {
+ ret = parse_events_term__clone(&n, term);
+ if (ret)
+ return ret;
+ list_add_tail(&n->list, *new);
+ }
+ return 0;
+}
+
+void parse_events_terms__purge(struct list_head *terms)
+{
+ struct parse_events_term *term, *h;
+
+ list_for_each_entry_safe(term, h, terms, list) {
+ list_del_init(&term->list);
+ parse_events_term__delete(term);
+ }
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+ if (!terms)
+ return;
+ parse_events_terms__purge(terms);
+ free(terms);
+}
+
+int parse_events_term__to_strbuf(struct list_head *term_list, struct strbuf *sb)
+{
+ struct parse_events_term *term;
+ bool first = true;
+
+ if (!term_list)
+ return 0;
+
+ list_for_each_entry(term, term_list, list) {
+ int ret;
+
+ if (!first) {
+ ret = strbuf_addch(sb, ',');
+ if (ret < 0)
+ return ret;
+ }
+ first = false;
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+ if (term->no_value) {
+ assert(term->val.num == 1);
+ ret = strbuf_addf(sb, "%s", term->config);
+ } else
+ ret = strbuf_addf(sb, "%s=%#"PRIx64, term->config, term->val.num);
+ else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+ if (term->config) {
+ ret = strbuf_addf(sb, "%s=", term->config);
+ if (ret < 0)
+ return ret;
+ } else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) {
+ ret = strbuf_addf(sb, "%s=", config_term_name(term->type_term));
+ if (ret < 0)
+ return ret;
+ }
+ assert(!term->no_value);
+ ret = strbuf_addf(sb, "%s", term->val.str);
+ }
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+ int idx, const char *str)
+{
+ if (!parse_state->error)
+ return;
+
+ parse_events_error__handle(parse_state->error, idx, strdup(str), NULL);
+}
+
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+ int i;
+ bool first = true;
+
+ buf[0] = '\0';
+ for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+ const char *name = config_term_name(i);
+
+ if (!config_term_avail(i, NULL))
+ continue;
+ if (!name)
+ continue;
+ if (name[0] == '<')
+ continue;
+
+ if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+ return;
+
+ if (!first)
+ strcat(buf, ",");
+ else
+ first = false;
+ strcat(buf, name);
+ }
+}
+
+/*
+ * Return string contains valid config terms of an event.
+ * @additional_terms: For terms such as PMU sysfs terms.
+ */
+char *parse_events_formats_error_string(char *additional_terms)
+{
+ char *str;
+ /* "no-overwrite" is the longest name */
+ char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+ (sizeof("no-overwrite") - 1)];
+
+ config_terms_list(static_terms, sizeof(static_terms));
+ /* valid terms */
+ if (additional_terms) {
+ if (asprintf(&str, "valid terms: %s,%s",
+ additional_terms, static_terms) < 0)
+ goto fail;
+ } else {
+ if (asprintf(&str, "valid terms: %s", static_terms) < 0)
+ goto fail;
+ }
+ return str;
+
+fail:
+ return NULL;
+}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
new file mode 100644
index 000000000..594e5d2dc
--- /dev/null
+++ b/tools/perf/util/parse-events.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_PARSE_EVENTS_H
+#define __PERF_PARSE_EVENTS_H
+/*
+ * Parse symbolic events/counts passed in as options:
+ */
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <stdio.h>
+#include <string.h>
+
+struct evsel;
+struct evlist;
+struct parse_events_error;
+
+struct option;
+struct perf_pmu;
+struct strbuf;
+
+const char *event_type(int type);
+
+/* Arguments encoded in opt->value. */
+struct parse_events_option_args {
+ struct evlist **evlistp;
+ const char *pmu_filter;
+};
+int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
+__attribute__((nonnull(1, 2, 4)))
+int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
+ struct parse_events_error *error, struct perf_pmu *fake_pmu,
+ bool warn_if_reordered);
+
+__attribute__((nonnull(1, 2, 3)))
+static inline int parse_events(struct evlist *evlist, const char *str,
+ struct parse_events_error *err)
+{
+ return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL,
+ /*warn_if_reordered=*/true);
+}
+
+int parse_event(struct evlist *evlist, const char *str);
+
+int parse_events_terms(struct list_head *terms, const char *str, FILE *input);
+int parse_filter(const struct option *opt, const char *str, int unset);
+int exclude_perf(const struct option *opt, const char *arg, int unset);
+
+enum parse_events__term_val_type {
+ PARSE_EVENTS__TERM_TYPE_NUM,
+ PARSE_EVENTS__TERM_TYPE_STR,
+};
+
+enum parse_events__term_type {
+ PARSE_EVENTS__TERM_TYPE_USER,
+ PARSE_EVENTS__TERM_TYPE_CONFIG,
+ PARSE_EVENTS__TERM_TYPE_CONFIG1,
+ PARSE_EVENTS__TERM_TYPE_CONFIG2,
+ PARSE_EVENTS__TERM_TYPE_CONFIG3,
+ PARSE_EVENTS__TERM_TYPE_NAME,
+ PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
+ PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
+ PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+ PARSE_EVENTS__TERM_TYPE_TIME,
+ PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+ PARSE_EVENTS__TERM_TYPE_STACKSIZE,
+ PARSE_EVENTS__TERM_TYPE_NOINHERIT,
+ PARSE_EVENTS__TERM_TYPE_INHERIT,
+ PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+ PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
+ PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
+ PARSE_EVENTS__TERM_TYPE_OVERWRITE,
+ PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+ PARSE_EVENTS__TERM_TYPE_PERCORE,
+ PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT,
+ PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE,
+ PARSE_EVENTS__TERM_TYPE_METRIC_ID,
+ PARSE_EVENTS__TERM_TYPE_RAW,
+ PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
+ PARSE_EVENTS__TERM_TYPE_HARDWARE,
+#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1)
+};
+
+struct parse_events_term {
+ /** @list: The term list the term is a part of. */
+ struct list_head list;
+ /**
+ * @config: The left-hand side of a term assignment, so the term
+ * "event=8" would have the config be "event"
+ */
+ const char *config;
+ /**
+ * @val: The right-hand side of a term assignment that can either be a
+ * string or a number depending on type_val.
+ */
+ union {
+ char *str;
+ u64 num;
+ } val;
+ /** @type_val: The union variable in val to be used for the term. */
+ enum parse_events__term_val_type type_val;
+ /**
+ * @type_term: A predefined term type or PARSE_EVENTS__TERM_TYPE_USER
+ * when not inbuilt.
+ */
+ enum parse_events__term_type type_term;
+ /**
+ * @err_term: The column index of the term from parsing, used during
+ * error output.
+ */
+ int err_term;
+ /**
+ * @err_val: The column index of the val from parsing, used during error
+ * output.
+ */
+ int err_val;
+ /** @used: Was the term used during parameterized-eval. */
+ bool used;
+ /**
+ * @weak: A term from the sysfs or json encoding of an event that
+ * shouldn't override terms coming from the command line.
+ */
+ bool weak;
+ /**
+ * @no_value: Is there no value. If a numeric term has no value then the
+ * value is assumed to be 1. An event name also has no value.
+ */
+ bool no_value;
+};
+
+struct parse_events_error {
+ int num_errors; /* number of errors encountered */
+ int idx; /* index in the parsed string */
+ char *str; /* string to display at the index */
+ char *help; /* optional help string */
+ int first_idx;/* as above, but for the first encountered error */
+ char *first_str;
+ char *first_help;
+};
+
+struct parse_events_state {
+ /* The list parsed events are placed on. */
+ struct list_head list;
+ /* The updated index used by entries as they are added. */
+ int idx;
+ /* Error information. */
+ struct parse_events_error *error;
+ /* Holds returned terms for term parsing. */
+ struct list_head *terms;
+ /* Start token. */
+ int stoken;
+ /* Special fake PMU marker for testing. */
+ struct perf_pmu *fake_pmu;
+ /* If non-null, when wildcard matching only match the given PMU. */
+ const char *pmu_filter;
+ /* Should PE_LEGACY_NAME tokens be generated for config terms? */
+ bool match_legacy_cache_terms;
+ /* Were multiple PMUs scanned to find events? */
+ bool wild_card_pmus;
+};
+
+bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
+ const struct perf_pmu *pmu);
+void parse_events__shrink_config_terms(void);
+int parse_events__is_hardcoded_term(struct parse_events_term *term);
+int parse_events_term__num(struct parse_events_term **term,
+ enum parse_events__term_type type_term,
+ const char *config, u64 num,
+ bool novalue,
+ void *loc_term, void *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+ enum parse_events__term_type type_term,
+ char *config, char *str,
+ void *loc_term, void *loc_val);
+int parse_events_term__term(struct parse_events_term **term,
+ enum parse_events__term_type term_lhs,
+ enum parse_events__term_type term_rhs,
+ void *loc_term, void *loc_val);
+int parse_events_term__clone(struct parse_events_term **new,
+ struct parse_events_term *term);
+void parse_events_term__delete(struct parse_events_term *term);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+int parse_events_term__to_strbuf(struct list_head *term_list, struct strbuf *sb);
+int parse_events__modifier_event(struct list_head *list, char *str, bool add);
+int parse_events__modifier_group(struct list_head *list, char *event_mod);
+int parse_events_name(struct list_head *list, const char *name);
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+ const char *sys, const char *event,
+ struct parse_events_error *error,
+ struct list_head *head_config, void *loc);
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u32 type, u64 config,
+ struct list_head *head_config,
+ bool wildcard);
+int parse_events_add_tool(struct parse_events_state *parse_state,
+ struct list_head *list,
+ int tool_event);
+int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
+ struct parse_events_state *parse_state,
+ struct list_head *head_config);
+int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config);
+int parse_events_add_breakpoint(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u64 addr, char *type, u64 len,
+ struct list_head *head_config);
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+ struct list_head *list, const char *name,
+ struct list_head *head_config,
+ bool auto_merge_stats, void *loc);
+
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+ const char *name, const char *metric_id,
+ struct perf_pmu *pmu);
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+ char *str,
+ struct list_head *head_config,
+ struct list_head **listp, void *loc);
+
+int parse_events_copy_term_list(struct list_head *old,
+ struct list_head **new);
+
+void parse_events__set_leader(char *name, struct list_head *list);
+void parse_events_update_lists(struct list_head *list_event,
+ struct list_head *list_all);
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+ int idx, const char *str);
+
+struct event_symbol {
+ const char *symbol;
+ const char *alias;
+};
+extern struct event_symbol event_symbols_hw[];
+extern struct event_symbol event_symbols_sw[];
+
+char *parse_events_formats_error_string(char *additional_terms);
+
+void parse_events_error__init(struct parse_events_error *err);
+void parse_events_error__exit(struct parse_events_error *err);
+void parse_events_error__handle(struct parse_events_error *err, int idx,
+ char *str, char *help);
+void parse_events_error__print(struct parse_events_error *err,
+ const char *event);
+
+#ifdef HAVE_LIBELF_SUPPORT
+/*
+ * If the probe point starts with '%',
+ * or starts with "sdt_" and has a ':' but no '=',
+ * then it should be a SDT/cached probe point.
+ */
+static inline bool is_sdt_event(char *str)
+{
+ return (str[0] == '%' ||
+ (!strncmp(str, "sdt_", 4) &&
+ !!strchr(str, ':') && !strchr(str, '=')));
+}
+#else
+static inline bool is_sdt_event(char *str __maybe_unused)
+{
+ return false;
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
+#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
new file mode 100644
index 000000000..4ef4b6f17
--- /dev/null
+++ b/tools/perf/util/parse-events.l
@@ -0,0 +1,353 @@
+
+%option reentrant
+%option bison-bridge
+%option prefix="parse_events_"
+%option stack
+%option bison-locations
+%option yylineno
+%option reject
+
+%{
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "parse-events.h"
+#include "parse-events-bison.h"
+#include "evsel.h"
+
+char *parse_events_get_text(yyscan_t yyscanner);
+YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+ u64 num;
+
+ errno = 0;
+ num = strtoull(str, NULL, base);
+ if (errno)
+ return PE_ERROR;
+
+ yylval->num = num;
+ return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ return __value(yylval, text, base, PE_VALUE);
+}
+
+static int str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ if (text[0] != '\'') {
+ yylval->str = strdup(text);
+ } else {
+ /*
+ * If a text tag specified on the command line
+ * contains opening single quite ' then it is
+ * expected that the tag ends with single quote
+ * as well, like this:
+ * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
+ * quotes need to be escaped to bypass shell
+ * processing.
+ */
+ yylval->str = strndup(&text[1], strlen(text) - 2);
+ }
+
+ return token;
+}
+
+static int lc_str(yyscan_t scanner, const struct parse_events_state *state)
+{
+ return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME);
+}
+
+/*
+ * This function is called when the parser gets two kind of input:
+ *
+ * @cfg1 or @cfg2=config
+ *
+ * The leading '@' is stripped off before 'cfg1' and 'cfg2=config' are given to
+ * bison. In the latter case it is necessary to keep the string intact so that
+ * the PMU kernel driver can determine what configurable is associated to
+ * 'config'.
+ */
+static int drv_str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ /* Strip off the '@' */
+ yylval->str = strdup(text + 1);
+ return token;
+}
+
+#define REWIND(__alloc) \
+do { \
+ YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \
+ char *text = parse_events_get_text(yyscanner); \
+ \
+ if (__alloc) \
+ __yylval->str = strdup(text); \
+ \
+ yycolumn -= strlen(text); \
+ yyless(0); \
+} while (0)
+
+static int sym(yyscan_t scanner, int type, int config)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+ yylval->num = (type << 16) + config;
+ return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
+}
+
+static int tool(yyscan_t scanner, enum perf_tool_event event)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+ yylval->num = event;
+ return PE_VALUE_SYM_TOOL;
+}
+
+static int term(yyscan_t scanner, enum parse_events__term_type type)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+ yylval->num = type;
+ return PE_TERM;
+}
+
+static int hw_term(yyscan_t scanner, int config)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ yylval->hardware_term.str = strdup(text);
+ yylval->hardware_term.num = PERF_TYPE_HARDWARE + config;
+ return PE_TERM_HW;
+}
+
+#define YY_USER_ACTION \
+do { \
+ yylloc->last_column = yylloc->first_column; \
+ yylloc->first_column = yycolumn; \
+ yycolumn += yyleng; \
+} while (0);
+
+#define USER_REJECT \
+ yycolumn -= yyleng; \
+ REJECT
+
+%}
+
+%x mem
+%s config
+%x event
+
+group [^,{}/]*[{][^}]*[}][^,{}/]*
+event_pmu [^,{}/]+[/][^/]*[/][^,{}/]*
+event [^,{}/]+
+
+num_dec [0-9]+
+num_hex 0x[a-fA-F0-9]+
+num_raw_hex [a-fA-F0-9]+
+name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]*
+name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
+name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
+/*
+ * If you add a modifier you need to update check_modifier().
+ * Also, the letters in modifier_event must not be in modifier_bp.
+ */
+modifier_event [ukhpPGHSDIWeb]+
+modifier_bp [rwx]{1,3}
+lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node)
+lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss)
+digit [0-9]
+non_digit [^0-9]
+
+%%
+
+%{
+ struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner);
+ {
+ int start_token = _parse_state->stoken;
+
+ if (start_token == PE_START_TERMS)
+ BEGIN(config);
+ else if (start_token == PE_START_EVENTS)
+ BEGIN(event);
+
+ if (start_token) {
+ _parse_state->stoken = 0;
+ /*
+ * The flex parser does not init locations variable
+ * via the scan_string interface, so we need do the
+ * init in here.
+ */
+ yycolumn = 0;
+ return start_token;
+ }
+ }
+%}
+
+<event>{
+
+{group} {
+ BEGIN(INITIAL);
+ REWIND(0);
+ }
+
+{event_pmu} |
+{event} {
+ BEGIN(INITIAL);
+ REWIND(1);
+ return PE_EVENT_NAME;
+ }
+
+<<EOF>> {
+ BEGIN(INITIAL);
+ REWIND(0);
+ }
+, {
+ return ',';
+ }
+}
+
+<config>{
+ /*
+ * Please update config_term_names when new static term is added.
+ */
+config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
+config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
+config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
+config3 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG3); }
+name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
+period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
+branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
+inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
+no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
+overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
+no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
+percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
+aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
+aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
+metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
+cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
+r{num_raw_hex} { return str(yyscanner, PE_RAW); }
+r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
+, { return ','; }
+"/" { BEGIN(INITIAL); return '/'; }
+{lc_type} { return lc_str(yyscanner, _parse_state); }
+{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
+{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
+{name_minus} { return str(yyscanner, PE_NAME); }
+@{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
+}
+
+<mem>{
+{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); }
+ /*
+ * The colon before memory access modifiers can get mixed up with the
+ * colon before event modifiers. Fortunately none of the option letters
+ * are the same, so trailing context can be used disambiguate the two
+ * cases.
+ */
+":"/{modifier_bp} { return PE_BP_COLON; }
+ /*
+ * The slash before memory length can get mixed up with the slash before
+ * config terms. Fortunately config terms do not start with a numeric
+ * digit, so trailing context can be used disambiguate the two cases.
+ */
+"/"/{digit} { return PE_BP_SLASH; }
+"/"/{non_digit} { BEGIN(config); return '/'; }
+{num_dec} { return value(yyscanner, 10); }
+{num_hex} { return value(yyscanner, 16); }
+ /*
+ * We need to separate 'mem:' scanner part, in order to get specific
+ * modifier bits parsed out. Otherwise we would need to handle PE_NAME
+ * and we'd need to parse it manually. During the escape from <mem>
+ * state we need to put the escaping char back, so we dont miss it.
+ */
+. { unput(*yytext); BEGIN(INITIAL); }
+ /*
+ * We destroy the scanner after reaching EOF,
+ * but anyway just to be sure get back to INIT state.
+ */
+<<EOF>> { BEGIN(INITIAL); }
+}
+
+cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
+task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
+page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
+minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
+major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
+context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
+cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
+alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
+emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
+dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
+user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); }
+system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
+bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
+
+{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); }
+{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
+{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
+mem: { BEGIN(mem); return PE_PREFIX_MEM; }
+r{num_raw_hex} { return str(yyscanner, PE_RAW); }
+{num_dec} { return value(yyscanner, 10); }
+{num_hex} { return value(yyscanner, 16); }
+
+{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
+{name} { return str(yyscanner, PE_NAME); }
+{name_tag} { return str(yyscanner, PE_NAME); }
+"/" { BEGIN(config); return '/'; }
+, { BEGIN(event); return ','; }
+: { return ':'; }
+"{" { BEGIN(event); return '{'; }
+"}" { return '}'; }
+= { return '='; }
+\n { }
+. { }
+
+%%
+
+int parse_events_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
new file mode 100644
index 000000000..c3a86ef4b
--- /dev/null
+++ b/tools/perf/util/parse-events.y
@@ -0,0 +1,870 @@
+%define api.pure full
+%parse-param {void *_parse_state}
+%parse-param {void *scanner}
+%lex-param {void* scanner}
+%locations
+
+%{
+
+#define YYDEBUG 1
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include "pmu.h"
+#include "pmus.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "parse-events-bison.h"
+
+int parse_events_lex(YYSTYPE * yylval_param, YYLTYPE * yylloc_param , void *yyscanner);
+void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg);
+
+#define PE_ABORT(val) \
+do { \
+ if (val == -ENOMEM) \
+ YYNOMEM; \
+ YYABORT; \
+} while (0)
+
+static struct list_head* alloc_list(void)
+{
+ struct list_head *list;
+
+ list = malloc(sizeof(*list));
+ if (!list)
+ return NULL;
+
+ INIT_LIST_HEAD(list);
+ return list;
+}
+
+static void free_list_evsel(struct list_head* list_evsel)
+{
+ struct evsel *evsel, *tmp;
+
+ list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) {
+ list_del_init(&evsel->core.node);
+ evsel__delete(evsel);
+ }
+ free(list_evsel);
+}
+
+%}
+
+%token PE_START_EVENTS PE_START_TERMS
+%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM
+%token PE_VALUE_SYM_TOOL
+%token PE_EVENT_NAME
+%token PE_RAW PE_NAME
+%token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH
+%token PE_LEGACY_CACHE
+%token PE_PREFIX_MEM
+%token PE_ERROR
+%token PE_DRV_CFG_TERM
+%token PE_TERM_HW
+%type <num> PE_VALUE
+%type <num> PE_VALUE_SYM_HW
+%type <num> PE_VALUE_SYM_SW
+%type <num> PE_VALUE_SYM_TOOL
+%type <num> PE_TERM
+%type <num> value_sym
+%type <str> PE_RAW
+%type <str> PE_NAME
+%type <str> PE_LEGACY_CACHE
+%type <str> PE_MODIFIER_EVENT
+%type <str> PE_MODIFIER_BP
+%type <str> PE_EVENT_NAME
+%type <str> PE_DRV_CFG_TERM
+%type <str> name_or_raw
+%destructor { free ($$); } <str>
+%type <term> event_term
+%destructor { parse_events_term__delete ($$); } <term>
+%type <list_terms> event_config
+%type <list_terms> opt_event_config
+%type <list_terms> opt_pmu_config
+%destructor { parse_events_terms__delete ($$); } <list_terms>
+%type <list_evsel> event_pmu
+%type <list_evsel> event_legacy_symbol
+%type <list_evsel> event_legacy_cache
+%type <list_evsel> event_legacy_mem
+%type <list_evsel> event_legacy_tracepoint
+%type <list_evsel> event_legacy_numeric
+%type <list_evsel> event_legacy_raw
+%type <list_evsel> event_def
+%type <list_evsel> event_mod
+%type <list_evsel> event_name
+%type <list_evsel> event
+%type <list_evsel> events
+%type <list_evsel> group_def
+%type <list_evsel> group
+%type <list_evsel> groups
+%destructor { free_list_evsel ($$); } <list_evsel>
+%type <tracepoint_name> tracepoint_name
+%destructor { free ($$.sys); free ($$.event); } <tracepoint_name>
+%type <hardware_term> PE_TERM_HW
+%destructor { free ($$.str); } <hardware_term>
+
+%union
+{
+ char *str;
+ u64 num;
+ struct list_head *list_evsel;
+ struct list_head *list_terms;
+ struct parse_events_term *term;
+ struct tracepoint_name {
+ char *sys;
+ char *event;
+ } tracepoint_name;
+ struct hardware_term {
+ char *str;
+ u64 num;
+ } hardware_term;
+}
+%%
+
+start:
+PE_START_EVENTS start_events
+|
+PE_START_TERMS start_terms
+
+start_events: groups
+{
+ struct parse_events_state *parse_state = _parse_state;
+
+ /* frees $1 */
+ parse_events_update_lists($1, &parse_state->list);
+}
+
+groups:
+groups ',' group
+{
+ struct list_head *list = $1;
+ struct list_head *group = $3;
+
+ /* frees $3 */
+ parse_events_update_lists(group, list);
+ $$ = list;
+}
+|
+groups ',' event
+{
+ struct list_head *list = $1;
+ struct list_head *event = $3;
+
+ /* frees $3 */
+ parse_events_update_lists(event, list);
+ $$ = list;
+}
+|
+group
+|
+event
+
+group:
+group_def ':' PE_MODIFIER_EVENT
+{
+ struct list_head *list = $1;
+ int err;
+
+ err = parse_events__modifier_group(list, $3);
+ free($3);
+ if (err) {
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+
+ parse_events_error__handle(error, @3.first_column,
+ strdup("Bad modifier"), NULL);
+ free_list_evsel(list);
+ YYABORT;
+ }
+ $$ = list;
+}
+|
+group_def
+
+group_def:
+PE_NAME '{' events '}'
+{
+ struct list_head *list = $3;
+
+ /* Takes ownership of $1. */
+ parse_events__set_leader($1, list);
+ $$ = list;
+}
+|
+'{' events '}'
+{
+ struct list_head *list = $2;
+
+ parse_events__set_leader(NULL, list);
+ $$ = list;
+}
+
+events:
+events ',' event
+{
+ struct list_head *event = $3;
+ struct list_head *list = $1;
+
+ /* frees $3 */
+ parse_events_update_lists(event, list);
+ $$ = list;
+}
+|
+event
+
+event: event_mod
+
+event_mod:
+event_name PE_MODIFIER_EVENT
+{
+ struct list_head *list = $1;
+ int err;
+
+ /*
+ * Apply modifier on all events added by single event definition
+ * (there could be more events added for multiple tracepoint
+ * definitions via '*?'.
+ */
+ err = parse_events__modifier_event(list, $2, false);
+ free($2);
+ if (err) {
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+
+ parse_events_error__handle(error, @2.first_column,
+ strdup("Bad modifier"), NULL);
+ free_list_evsel(list);
+ YYABORT;
+ }
+ $$ = list;
+}
+|
+event_name
+
+event_name:
+PE_EVENT_NAME event_def
+{
+ int err;
+
+ err = parse_events_name($2, $1);
+ free($1);
+ if (err) {
+ free_list_evsel($2);
+ YYNOMEM;
+ }
+ $$ = $2;
+}
+|
+event_def
+
+event_def: event_pmu |
+ event_legacy_symbol |
+ event_legacy_cache sep_dc |
+ event_legacy_mem sep_dc |
+ event_legacy_tracepoint sep_dc |
+ event_legacy_numeric sep_dc |
+ event_legacy_raw sep_dc
+
+event_pmu:
+PE_NAME opt_pmu_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL;
+ char *pattern = NULL;
+
+#define CLEANUP \
+ do { \
+ parse_events_terms__delete($2); \
+ parse_events_terms__delete(orig_terms); \
+ free(list); \
+ free($1); \
+ free(pattern); \
+ } while(0)
+
+ if (parse_events_copy_term_list($2, &orig_terms)) {
+ CLEANUP;
+ YYNOMEM;
+ }
+
+ list = alloc_list();
+ if (!list) {
+ CLEANUP;
+ YYNOMEM;
+ }
+ /* Attempt to add to list assuming $1 is a PMU name. */
+ if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false, &@1)) {
+ struct perf_pmu *pmu = NULL;
+ int ok = 0;
+
+ /* Failure to add, try wildcard expansion of $1 as a PMU name. */
+ if (asprintf(&pattern, "%s*", $1) < 0) {
+ CLEANUP;
+ YYNOMEM;
+ }
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ const char *name = pmu->name;
+
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ if (!strncmp(name, "uncore_", 7) &&
+ strncmp($1, "uncore_", 7))
+ name += 7;
+ if (!perf_pmu__match(pattern, name, $1) ||
+ !perf_pmu__match(pattern, pmu->alias_name, $1)) {
+ bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
+
+ if (parse_events_copy_term_list(orig_terms, &terms)) {
+ CLEANUP;
+ YYNOMEM;
+ }
+ if (!parse_events_add_pmu(parse_state, list, pmu->name, terms,
+ auto_merge_stats, &@1)) {
+ ok++;
+ parse_state->wild_card_pmus = true;
+ }
+ parse_events_terms__delete(terms);
+ }
+ }
+
+ if (!ok) {
+ /* Failure to add, assume $1 is an event name. */
+ zfree(&list);
+ ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list, &@1);
+ $2 = NULL;
+ }
+ if (!ok) {
+ struct parse_events_error *error = parse_state->error;
+ char *help;
+
+ if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", $1) < 0)
+ help = NULL;
+ parse_events_error__handle(error, @1.first_column,
+ strdup("Bad event or PMU"),
+ help);
+ CLEANUP;
+ YYABORT;
+ }
+ }
+ $$ = list;
+ list = NULL;
+ CLEANUP;
+#undef CLEANUP
+}
+|
+PE_NAME sep_dc
+{
+ struct list_head *list;
+ int err;
+
+ err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1);
+ if (err < 0) {
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ char *help;
+
+ if (asprintf(&help, "Unable to find event on a PMU of '%s'", $1) < 0)
+ help = NULL;
+ parse_events_error__handle(error, @1.first_column, strdup("Bad event name"), help);
+ free($1);
+ PE_ABORT(err);
+ }
+ free($1);
+ $$ = list;
+}
+
+value_sym:
+PE_VALUE_SYM_HW
+|
+PE_VALUE_SYM_SW
+
+event_legacy_symbol:
+value_sym '/' event_config '/'
+{
+ struct list_head *list;
+ int type = $1 >> 16;
+ int config = $1 & 255;
+ int err;
+ bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard);
+ parse_events_terms__delete($3);
+ if (err) {
+ free_list_evsel(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+|
+value_sym sep_slash_slash_dc
+{
+ struct list_head *list;
+ int type = $1 >> 16;
+ int config = $1 & 255;
+ bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard);
+ if (err)
+ PE_ABORT(err);
+ $$ = list;
+}
+|
+PE_VALUE_SYM_TOOL sep_slash_slash_dc
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_tool(_parse_state, list, $1);
+ if (err)
+ YYNOMEM;
+ $$ = list;
+}
+
+event_legacy_cache:
+PE_LEGACY_CACHE opt_event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+
+ err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2);
+
+ parse_events_terms__delete($2);
+ free($1);
+ if (err) {
+ free_list_evsel(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+
+event_legacy_mem:
+PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, $6, $4, $7);
+ parse_events_terms__delete($7);
+ free($6);
+ if (err) {
+ free(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, NULL, $4, $5);
+ parse_events_terms__delete($5);
+ if (err) {
+ free(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, $4, 0, $5);
+ parse_events_terms__delete($5);
+ free($4);
+ if (err) {
+ free(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE opt_event_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, NULL, 0, $3);
+ parse_events_terms__delete($3);
+ if (err) {
+ free(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+
+event_legacy_tracepoint:
+tracepoint_name opt_event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+ if (error)
+ error->idx = @1.first_column;
+
+ err = parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event,
+ error, $2, &@1);
+
+ parse_events_terms__delete($2);
+ free($1.sys);
+ free($1.event);
+ if (err) {
+ free(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+
+tracepoint_name:
+PE_NAME ':' PE_NAME
+{
+ struct tracepoint_name tracepoint = {$1, $3};
+
+ $$ = tracepoint;
+}
+
+event_legacy_numeric:
+PE_VALUE ':' PE_VALUE opt_event_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4,
+ /*wildcard=*/false);
+ parse_events_terms__delete($4);
+ if (err) {
+ free(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+
+event_legacy_raw:
+PE_RAW opt_event_config
+{
+ struct list_head *list;
+ int err;
+ u64 num;
+
+ list = alloc_list();
+ if (!list)
+ YYNOMEM;
+ errno = 0;
+ num = strtoull($1 + 1, NULL, 16);
+ /* Given the lexer will only give [a-fA-F0-9]+ a failure here should be impossible. */
+ if (errno)
+ YYABORT;
+ free($1);
+ err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2,
+ /*wildcard=*/false);
+ parse_events_terms__delete($2);
+ if (err) {
+ free(list);
+ PE_ABORT(err);
+ }
+ $$ = list;
+}
+
+opt_event_config:
+'/' event_config '/'
+{
+ $$ = $2;
+}
+|
+'/' '/'
+{
+ $$ = NULL;
+}
+|
+{
+ $$ = NULL;
+}
+
+opt_pmu_config:
+'/' event_config '/'
+{
+ $$ = $2;
+}
+|
+'/' '/'
+{
+ $$ = NULL;
+}
+
+start_terms: event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ if (parse_state->terms) {
+ parse_events_terms__delete ($1);
+ YYABORT;
+ }
+ parse_state->terms = $1;
+}
+
+event_config:
+event_config ',' event_term
+{
+ struct list_head *head = $1;
+ struct parse_events_term *term = $3;
+
+ if (!head) {
+ parse_events_term__delete(term);
+ YYABORT;
+ }
+ list_add_tail(&term->list, head);
+ $$ = $1;
+}
+|
+event_term
+{
+ struct list_head *head = malloc(sizeof(*head));
+ struct parse_events_term *term = $1;
+
+ if (!head)
+ YYNOMEM;
+ INIT_LIST_HEAD(head);
+ list_add_tail(&term->list, head);
+ $$ = head;
+}
+
+name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE
+
+event_term:
+PE_RAW
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
+ strdup("raw"), $1, &@1, &@1);
+
+ if (err) {
+ free($1);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+name_or_raw '=' name_or_raw
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3, &@1, &@3);
+
+ if (err) {
+ free($1);
+ free($3);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+name_or_raw '=' PE_VALUE
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $3, /*novalue=*/false, &@1, &@3);
+
+ if (err) {
+ free($1);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+name_or_raw '=' PE_TERM_HW
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $3.str, &@1, &@3);
+
+ if (err) {
+ free($1);
+ free($3.str);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+PE_LEGACY_CACHE
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
+ $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL);
+
+ if (err) {
+ free($1);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+PE_NAME
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL);
+
+ if (err) {
+ free($1);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+PE_TERM_HW
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
+ $1.str, $1.num & 255, /*novalue=*/false,
+ &@1, /*loc_val=*/NULL);
+
+ if (err) {
+ free($1.str);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+PE_TERM '=' name_or_raw
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__str(&term, (enum parse_events__term_type)$1,
+ /*config=*/NULL, $3, &@1, &@3);
+
+ if (err) {
+ free($3);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+PE_TERM '=' PE_TERM_HW
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__str(&term, (enum parse_events__term_type)$1,
+ /*config=*/NULL, $3.str, &@1, &@3);
+
+ if (err) {
+ free($3.str);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+PE_TERM '=' PE_TERM
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__term(&term,
+ (enum parse_events__term_type)$1,
+ (enum parse_events__term_type)$3,
+ &@1, &@3);
+
+ if (err)
+ PE_ABORT(err);
+
+ $$ = term;
+}
+|
+PE_TERM '=' PE_VALUE
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__num(&term, (enum parse_events__term_type)$1,
+ /*config=*/NULL, $3, /*novalue=*/false, &@1, &@3);
+
+ if (err)
+ PE_ABORT(err);
+
+ $$ = term;
+}
+|
+PE_TERM
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__num(&term, (enum parse_events__term_type)$1,
+ /*config=*/NULL, /*num=*/1, /*novalue=*/true,
+ &@1, /*loc_val=*/NULL);
+
+ if (err)
+ PE_ABORT(err);
+
+ $$ = term;
+}
+|
+PE_DRV_CFG_TERM
+{
+ struct parse_events_term *term;
+ char *config = strdup($1);
+ int err;
+
+ if (!config)
+ YYNOMEM;
+ err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL);
+ if (err) {
+ free($1);
+ free(config);
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+
+sep_dc: ':' |
+
+sep_slash_slash_dc: '/' '/' | ':' |
+
+%%
+
+void parse_events_error(YYLTYPE *loc, void *parse_state,
+ void *scanner __maybe_unused,
+ char const *msg __maybe_unused)
+{
+ parse_events_evlist_error(parse_state, loc->last_column, "parser error");
+}
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
new file mode 100644
index 000000000..a4a100425
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/perf_regs.h"
+#include "util/parse-regs-options.h"
+
+static int
+__parse_regs(const struct option *opt, const char *str, int unset, bool intr)
+{
+ uint64_t *mode = (uint64_t *)opt->value;
+ const struct sample_reg *r = NULL;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+ uint64_t mask;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice
+ */
+ if (*mode)
+ return -1;
+
+ if (intr)
+ mask = arch__intr_reg_mask();
+ else
+ mask = arch__user_reg_mask();
+
+ /* str may be NULL in case no arg is passed to -I */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ if (!strcmp(s, "?")) {
+ fprintf(stderr, "available registers: ");
+#ifdef HAVE_PERF_REGS_SUPPORT
+ for (r = sample_reg_masks; r->name; r++) {
+ if (r->mask & mask)
+ fprintf(stderr, "%s ", r->name);
+ }
+#endif
+ fputc('\n', stderr);
+ /* just printing available regs */
+ goto error;
+ }
+#ifdef HAVE_PERF_REGS_SUPPORT
+ for (r = sample_reg_masks; r->name; r++) {
+ if ((r->mask & mask) && !strcasecmp(s, r->name))
+ break;
+ }
+#endif
+ if (!r || !r->name) {
+ ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
+ s, intr ? "-I" : "--user-regs=");
+ goto error;
+ }
+
+ *mode |= r->mask;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ /* default to all possible regs */
+ if (*mode == 0)
+ *mode = mask;
+error:
+ free(os);
+ return ret;
+}
+
+int
+parse_user_regs(const struct option *opt, const char *str, int unset)
+{
+ return __parse_regs(opt, str, unset, false);
+}
+
+int
+parse_intr_regs(const struct option *opt, const char *str, int unset)
+{
+ return __parse_regs(opt, str, unset, true);
+}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
new file mode 100644
index 000000000..2b23d25c6
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_REGS_OPTIONS_H
+#define _PERF_PARSE_REGS_OPTIONS_H 1
+struct option;
+int parse_user_regs(const struct option *opt, const char *str, int unset);
+int parse_intr_regs(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/tools/perf/util/parse-sublevel-options.c b/tools/perf/util/parse-sublevel-options.c
new file mode 100644
index 000000000..a841d17ff
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.c
@@ -0,0 +1,70 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "util/debug.h"
+#include "util/parse-sublevel-options.h"
+
+static int parse_one_sublevel_option(const char *str,
+ struct sublevel_option *opts)
+{
+ struct sublevel_option *opt = opts;
+ char *vstr, *s = strdup(str);
+ int v = 1;
+
+ if (!s) {
+ pr_err("no memory\n");
+ return -1;
+ }
+
+ vstr = strchr(s, '=');
+ if (vstr)
+ *vstr++ = 0;
+
+ while (opt->name) {
+ if (!strcmp(s, opt->name))
+ break;
+ opt++;
+ }
+
+ if (!opt->name) {
+ pr_err("Unknown option name '%s'\n", s);
+ free(s);
+ return -1;
+ }
+
+ if (vstr)
+ v = atoi(vstr);
+
+ *opt->value_ptr = v;
+ free(s);
+ return 0;
+}
+
+/* parse options like --foo a=<n>,b,c... */
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts)
+{
+ char *s = strdup(str);
+ char *p = NULL;
+ int ret;
+
+ if (!s) {
+ pr_err("no memory\n");
+ return -1;
+ }
+
+ p = strtok(s, ",");
+ while (p) {
+ ret = parse_one_sublevel_option(p, opts);
+ if (ret) {
+ free(s);
+ return ret;
+ }
+
+ p = strtok(NULL, ",");
+ }
+
+ free(s);
+ return 0;
+}
diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h
new file mode 100644
index 000000000..578b18ef0
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.h
@@ -0,0 +1,11 @@
+#ifndef _PERF_PARSE_SUBLEVEL_OPTIONS_H
+#define _PERF_PARSE_SUBLEVEL_OPTIONS_H
+
+struct sublevel_option {
+ const char *name;
+ int *value_ptr;
+};
+
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts);
+
+#endif
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
new file mode 100644
index 000000000..00adf872b
--- /dev/null
+++ b/tools/perf/util/path.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "path.h"
+#include "cache.h"
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <unistd.h>
+
+static char *cleanup_path(char *path)
+{
+ /* Clean it up */
+ if (!memcmp(path, "./", 2)) {
+ path += 2;
+ while (*path == '/')
+ path++;
+ }
+ return path;
+}
+
+char *mkpath(char *path_buf, size_t sz, const char *fmt, ...)
+{
+ va_list args;
+ unsigned len;
+
+ va_start(args, fmt);
+ len = vsnprintf(path_buf, sz, fmt, args);
+ va_end(args);
+ if (len >= sz)
+ strncpy(path_buf, "/bad-path/", sz);
+ return cleanup_path(path_buf);
+}
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2)
+{
+ return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2);
+}
+
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3)
+{
+ return scnprintf(bf, size, "%s%s%s%s%s", path1, path1[0] ? "/" : "",
+ path2, path2[0] ? "/" : "", path3);
+}
+
+bool is_regular_file(const char *file)
+{
+ struct stat st;
+
+ if (stat(file, &st))
+ return false;
+
+ return S_ISREG(st.st_mode);
+}
+
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+bool is_directory(const char *base_path, const struct dirent *dent)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
+ if (stat(path, &st))
+ return false;
+
+ return S_ISDIR(st.st_mode);
+}
+
+bool is_executable_file(const char *base_path, const struct dirent *dent)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
+ if (stat(path, &st))
+ return false;
+
+ return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR);
+}
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
new file mode 100644
index 000000000..d94902c22
--- /dev/null
+++ b/tools/perf/util/path.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PATH_H
+#define _PERF_PATH_H
+
+#include <stddef.h>
+#include <stdbool.h>
+
+struct dirent;
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2);
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3);
+
+bool is_regular_file(const char *file);
+bool is_directory(const char *base_path, const struct dirent *dent);
+bool is_executable_file(const char *base_path, const struct dirent *dent);
+
+#endif /* _PERF_PATH_H */
diff --git a/tools/perf/util/perf-hooks-list.h b/tools/perf/util/perf-hooks-list.h
new file mode 100644
index 000000000..2867c07ee
--- /dev/null
+++ b/tools/perf/util/perf-hooks-list.h
@@ -0,0 +1,3 @@
+PERF_HOOK(record_start)
+PERF_HOOK(record_end)
+PERF_HOOK(test)
diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c
new file mode 100644
index 000000000..7a0ab3507
--- /dev/null
+++ b/tools/perf/util/perf-hooks.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * perf_hooks.c
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <setjmp.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include "util/debug.h"
+#include "util/perf-hooks.h"
+
+static sigjmp_buf jmpbuf;
+static const struct perf_hook_desc *current_perf_hook;
+
+void perf_hooks__invoke(const struct perf_hook_desc *desc)
+{
+ if (!(desc && desc->p_hook_func && *desc->p_hook_func))
+ return;
+
+ if (sigsetjmp(jmpbuf, 1)) {
+ pr_warning("Fatal error (SEGFAULT) in perf hook '%s'\n",
+ desc->hook_name);
+ *(current_perf_hook->p_hook_func) = NULL;
+ } else {
+ current_perf_hook = desc;
+ (**desc->p_hook_func)(desc->hook_ctx);
+ }
+ current_perf_hook = NULL;
+}
+
+void perf_hooks__recover(void)
+{
+ if (current_perf_hook)
+ siglongjmp(jmpbuf, 1);
+}
+
+#define PERF_HOOK(name) \
+perf_hook_func_t __perf_hook_func_##name = NULL; \
+struct perf_hook_desc __perf_hook_desc_##name = \
+ {.hook_name = #name, \
+ .p_hook_func = &__perf_hook_func_##name, \
+ .hook_ctx = NULL};
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+#define PERF_HOOK(name) \
+ &__perf_hook_desc_##name,
+
+static struct perf_hook_desc *perf_hooks[] = {
+#include "perf-hooks-list.h"
+};
+#undef PERF_HOOK
+
+int perf_hooks__set_hook(const char *hook_name,
+ perf_hook_func_t hook_func,
+ void *hook_ctx)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+ if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+ continue;
+
+ if (*(perf_hooks[i]->p_hook_func))
+ pr_warning("Overwrite existing hook: %s\n", hook_name);
+ *(perf_hooks[i]->p_hook_func) = hook_func;
+ perf_hooks[i]->hook_ctx = hook_ctx;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+perf_hook_func_t perf_hooks__get_hook(const char *hook_name)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+ if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+ continue;
+
+ return *(perf_hooks[i]->p_hook_func);
+ }
+ return ERR_PTR(-ENOENT);
+}
diff --git a/tools/perf/util/perf-hooks.h b/tools/perf/util/perf-hooks.h
new file mode 100644
index 000000000..27fbec62d
--- /dev/null
+++ b/tools/perf/util/perf-hooks.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_PERF_HOOKS_H
+#define PERF_UTIL_PERF_HOOKS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*perf_hook_func_t)(void *ctx);
+struct perf_hook_desc {
+ const char * const hook_name;
+ perf_hook_func_t * const p_hook_func;
+ void *hook_ctx;
+};
+
+extern void perf_hooks__invoke(const struct perf_hook_desc *);
+extern void perf_hooks__recover(void);
+
+#define PERF_HOOK(name) \
+extern struct perf_hook_desc __perf_hook_desc_##name; \
+static inline void perf_hooks__invoke_##name(void) \
+{ \
+ perf_hooks__invoke(&__perf_hook_desc_##name); \
+}
+
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+extern int
+perf_hooks__set_hook(const char *hook_name,
+ perf_hook_func_t hook_func,
+ void *hook_ctx);
+
+extern perf_hook_func_t
+perf_hooks__get_hook(const char *hook_name);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/util/perf-regs-arch/Build b/tools/perf/util/perf-regs-arch/Build
new file mode 100644
index 000000000..d9d596d33
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/Build
@@ -0,0 +1,9 @@
+perf-y += perf_regs_aarch64.o
+perf-y += perf_regs_arm.o
+perf-y += perf_regs_csky.o
+perf-y += perf_regs_loongarch.o
+perf-y += perf_regs_mips.o
+perf-y += perf_regs_powerpc.o
+perf-y += perf_regs_riscv.o
+perf-y += perf_regs_s390.o
+perf-y += perf_regs_x86.o
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c b/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c
new file mode 100644
index 000000000..696566c54
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/arm64/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_arm64(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ case PERF_REG_ARM64_VG:
+ return "vg";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_arm64(void)
+{
+ return PERF_REG_ARM64_PC;
+}
+
+uint64_t __perf_reg_sp_arm64(void)
+{
+ return PERF_REG_ARM64_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_arm.c b/tools/perf/util/perf-regs-arch/perf_regs_arm.c
new file mode 100644
index 000000000..700fd07cd
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_arm.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/arm/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_arm(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_arm(void)
+{
+ return PERF_REG_ARM_PC;
+}
+
+uint64_t __perf_reg_sp_arm(void)
+{
+ return PERF_REG_ARM_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_csky.c b/tools/perf/util/perf-regs-arch/perf_regs_csky.c
new file mode 100644
index 000000000..a2841094e
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_csky.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../arch/csky/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_csky(int id)
+{
+ switch (id) {
+ case PERF_REG_CSKY_A0:
+ return "a0";
+ case PERF_REG_CSKY_A1:
+ return "a1";
+ case PERF_REG_CSKY_A2:
+ return "a2";
+ case PERF_REG_CSKY_A3:
+ return "a3";
+ case PERF_REG_CSKY_REGS0:
+ return "regs0";
+ case PERF_REG_CSKY_REGS1:
+ return "regs1";
+ case PERF_REG_CSKY_REGS2:
+ return "regs2";
+ case PERF_REG_CSKY_REGS3:
+ return "regs3";
+ case PERF_REG_CSKY_REGS4:
+ return "regs4";
+ case PERF_REG_CSKY_REGS5:
+ return "regs5";
+ case PERF_REG_CSKY_REGS6:
+ return "regs6";
+ case PERF_REG_CSKY_REGS7:
+ return "regs7";
+ case PERF_REG_CSKY_REGS8:
+ return "regs8";
+ case PERF_REG_CSKY_REGS9:
+ return "regs9";
+ case PERF_REG_CSKY_SP:
+ return "sp";
+ case PERF_REG_CSKY_LR:
+ return "lr";
+ case PERF_REG_CSKY_PC:
+ return "pc";
+#if defined(__CSKYABIV2__)
+ case PERF_REG_CSKY_EXREGS0:
+ return "exregs0";
+ case PERF_REG_CSKY_EXREGS1:
+ return "exregs1";
+ case PERF_REG_CSKY_EXREGS2:
+ return "exregs2";
+ case PERF_REG_CSKY_EXREGS3:
+ return "exregs3";
+ case PERF_REG_CSKY_EXREGS4:
+ return "exregs4";
+ case PERF_REG_CSKY_EXREGS5:
+ return "exregs5";
+ case PERF_REG_CSKY_EXREGS6:
+ return "exregs6";
+ case PERF_REG_CSKY_EXREGS7:
+ return "exregs7";
+ case PERF_REG_CSKY_EXREGS8:
+ return "exregs8";
+ case PERF_REG_CSKY_EXREGS9:
+ return "exregs9";
+ case PERF_REG_CSKY_EXREGS10:
+ return "exregs10";
+ case PERF_REG_CSKY_EXREGS11:
+ return "exregs11";
+ case PERF_REG_CSKY_EXREGS12:
+ return "exregs12";
+ case PERF_REG_CSKY_EXREGS13:
+ return "exregs13";
+ case PERF_REG_CSKY_EXREGS14:
+ return "exregs14";
+ case PERF_REG_CSKY_TLS:
+ return "tls";
+ case PERF_REG_CSKY_HI:
+ return "hi";
+ case PERF_REG_CSKY_LO:
+ return "lo";
+#endif
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_csky(void)
+{
+ return PERF_REG_CSKY_PC;
+}
+
+uint64_t __perf_reg_sp_csky(void)
+{
+ return PERF_REG_CSKY_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c b/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c
new file mode 100644
index 000000000..a9ba0f934
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/loongarch/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_loongarch(int id)
+{
+ switch (id) {
+ case PERF_REG_LOONGARCH_PC:
+ return "PC";
+ case PERF_REG_LOONGARCH_R1:
+ return "%r1";
+ case PERF_REG_LOONGARCH_R2:
+ return "%r2";
+ case PERF_REG_LOONGARCH_R3:
+ return "%r3";
+ case PERF_REG_LOONGARCH_R4:
+ return "%r4";
+ case PERF_REG_LOONGARCH_R5:
+ return "%r5";
+ case PERF_REG_LOONGARCH_R6:
+ return "%r6";
+ case PERF_REG_LOONGARCH_R7:
+ return "%r7";
+ case PERF_REG_LOONGARCH_R8:
+ return "%r8";
+ case PERF_REG_LOONGARCH_R9:
+ return "%r9";
+ case PERF_REG_LOONGARCH_R10:
+ return "%r10";
+ case PERF_REG_LOONGARCH_R11:
+ return "%r11";
+ case PERF_REG_LOONGARCH_R12:
+ return "%r12";
+ case PERF_REG_LOONGARCH_R13:
+ return "%r13";
+ case PERF_REG_LOONGARCH_R14:
+ return "%r14";
+ case PERF_REG_LOONGARCH_R15:
+ return "%r15";
+ case PERF_REG_LOONGARCH_R16:
+ return "%r16";
+ case PERF_REG_LOONGARCH_R17:
+ return "%r17";
+ case PERF_REG_LOONGARCH_R18:
+ return "%r18";
+ case PERF_REG_LOONGARCH_R19:
+ return "%r19";
+ case PERF_REG_LOONGARCH_R20:
+ return "%r20";
+ case PERF_REG_LOONGARCH_R21:
+ return "%r21";
+ case PERF_REG_LOONGARCH_R22:
+ return "%r22";
+ case PERF_REG_LOONGARCH_R23:
+ return "%r23";
+ case PERF_REG_LOONGARCH_R24:
+ return "%r24";
+ case PERF_REG_LOONGARCH_R25:
+ return "%r25";
+ case PERF_REG_LOONGARCH_R26:
+ return "%r26";
+ case PERF_REG_LOONGARCH_R27:
+ return "%r27";
+ case PERF_REG_LOONGARCH_R28:
+ return "%r28";
+ case PERF_REG_LOONGARCH_R29:
+ return "%r29";
+ case PERF_REG_LOONGARCH_R30:
+ return "%r30";
+ case PERF_REG_LOONGARCH_R31:
+ return "%r31";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_loongarch(void)
+{
+ return PERF_REG_LOONGARCH_PC;
+}
+
+uint64_t __perf_reg_sp_loongarch(void)
+{
+ return PERF_REG_LOONGARCH_R3;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_mips.c b/tools/perf/util/perf-regs-arch/perf_regs_mips.c
new file mode 100644
index 000000000..5a45830cf
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_mips.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/mips/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_mips(int id)
+{
+ switch (id) {
+ case PERF_REG_MIPS_PC:
+ return "PC";
+ case PERF_REG_MIPS_R1:
+ return "$1";
+ case PERF_REG_MIPS_R2:
+ return "$2";
+ case PERF_REG_MIPS_R3:
+ return "$3";
+ case PERF_REG_MIPS_R4:
+ return "$4";
+ case PERF_REG_MIPS_R5:
+ return "$5";
+ case PERF_REG_MIPS_R6:
+ return "$6";
+ case PERF_REG_MIPS_R7:
+ return "$7";
+ case PERF_REG_MIPS_R8:
+ return "$8";
+ case PERF_REG_MIPS_R9:
+ return "$9";
+ case PERF_REG_MIPS_R10:
+ return "$10";
+ case PERF_REG_MIPS_R11:
+ return "$11";
+ case PERF_REG_MIPS_R12:
+ return "$12";
+ case PERF_REG_MIPS_R13:
+ return "$13";
+ case PERF_REG_MIPS_R14:
+ return "$14";
+ case PERF_REG_MIPS_R15:
+ return "$15";
+ case PERF_REG_MIPS_R16:
+ return "$16";
+ case PERF_REG_MIPS_R17:
+ return "$17";
+ case PERF_REG_MIPS_R18:
+ return "$18";
+ case PERF_REG_MIPS_R19:
+ return "$19";
+ case PERF_REG_MIPS_R20:
+ return "$20";
+ case PERF_REG_MIPS_R21:
+ return "$21";
+ case PERF_REG_MIPS_R22:
+ return "$22";
+ case PERF_REG_MIPS_R23:
+ return "$23";
+ case PERF_REG_MIPS_R24:
+ return "$24";
+ case PERF_REG_MIPS_R25:
+ return "$25";
+ case PERF_REG_MIPS_R28:
+ return "$28";
+ case PERF_REG_MIPS_R29:
+ return "$29";
+ case PERF_REG_MIPS_R30:
+ return "$30";
+ case PERF_REG_MIPS_R31:
+ return "$31";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_mips(void)
+{
+ return PERF_REG_MIPS_PC;
+}
+
+uint64_t __perf_reg_sp_mips(void)
+{
+ return PERF_REG_MIPS_R29;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c b/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c
new file mode 100644
index 000000000..1f0d682db
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/powerpc/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_powerpc(int id)
+{
+ switch (id) {
+ case PERF_REG_POWERPC_R0:
+ return "r0";
+ case PERF_REG_POWERPC_R1:
+ return "r1";
+ case PERF_REG_POWERPC_R2:
+ return "r2";
+ case PERF_REG_POWERPC_R3:
+ return "r3";
+ case PERF_REG_POWERPC_R4:
+ return "r4";
+ case PERF_REG_POWERPC_R5:
+ return "r5";
+ case PERF_REG_POWERPC_R6:
+ return "r6";
+ case PERF_REG_POWERPC_R7:
+ return "r7";
+ case PERF_REG_POWERPC_R8:
+ return "r8";
+ case PERF_REG_POWERPC_R9:
+ return "r9";
+ case PERF_REG_POWERPC_R10:
+ return "r10";
+ case PERF_REG_POWERPC_R11:
+ return "r11";
+ case PERF_REG_POWERPC_R12:
+ return "r12";
+ case PERF_REG_POWERPC_R13:
+ return "r13";
+ case PERF_REG_POWERPC_R14:
+ return "r14";
+ case PERF_REG_POWERPC_R15:
+ return "r15";
+ case PERF_REG_POWERPC_R16:
+ return "r16";
+ case PERF_REG_POWERPC_R17:
+ return "r17";
+ case PERF_REG_POWERPC_R18:
+ return "r18";
+ case PERF_REG_POWERPC_R19:
+ return "r19";
+ case PERF_REG_POWERPC_R20:
+ return "r20";
+ case PERF_REG_POWERPC_R21:
+ return "r21";
+ case PERF_REG_POWERPC_R22:
+ return "r22";
+ case PERF_REG_POWERPC_R23:
+ return "r23";
+ case PERF_REG_POWERPC_R24:
+ return "r24";
+ case PERF_REG_POWERPC_R25:
+ return "r25";
+ case PERF_REG_POWERPC_R26:
+ return "r26";
+ case PERF_REG_POWERPC_R27:
+ return "r27";
+ case PERF_REG_POWERPC_R28:
+ return "r28";
+ case PERF_REG_POWERPC_R29:
+ return "r29";
+ case PERF_REG_POWERPC_R30:
+ return "r30";
+ case PERF_REG_POWERPC_R31:
+ return "r31";
+ case PERF_REG_POWERPC_NIP:
+ return "nip";
+ case PERF_REG_POWERPC_MSR:
+ return "msr";
+ case PERF_REG_POWERPC_ORIG_R3:
+ return "orig_r3";
+ case PERF_REG_POWERPC_CTR:
+ return "ctr";
+ case PERF_REG_POWERPC_LINK:
+ return "link";
+ case PERF_REG_POWERPC_XER:
+ return "xer";
+ case PERF_REG_POWERPC_CCR:
+ return "ccr";
+ case PERF_REG_POWERPC_SOFTE:
+ return "softe";
+ case PERF_REG_POWERPC_TRAP:
+ return "trap";
+ case PERF_REG_POWERPC_DAR:
+ return "dar";
+ case PERF_REG_POWERPC_DSISR:
+ return "dsisr";
+ case PERF_REG_POWERPC_SIER:
+ return "sier";
+ case PERF_REG_POWERPC_MMCRA:
+ return "mmcra";
+ case PERF_REG_POWERPC_MMCR0:
+ return "mmcr0";
+ case PERF_REG_POWERPC_MMCR1:
+ return "mmcr1";
+ case PERF_REG_POWERPC_MMCR2:
+ return "mmcr2";
+ case PERF_REG_POWERPC_MMCR3:
+ return "mmcr3";
+ case PERF_REG_POWERPC_SIER2:
+ return "sier2";
+ case PERF_REG_POWERPC_SIER3:
+ return "sier3";
+ case PERF_REG_POWERPC_PMC1:
+ return "pmc1";
+ case PERF_REG_POWERPC_PMC2:
+ return "pmc2";
+ case PERF_REG_POWERPC_PMC3:
+ return "pmc3";
+ case PERF_REG_POWERPC_PMC4:
+ return "pmc4";
+ case PERF_REG_POWERPC_PMC5:
+ return "pmc5";
+ case PERF_REG_POWERPC_PMC6:
+ return "pmc6";
+ case PERF_REG_POWERPC_SDAR:
+ return "sdar";
+ case PERF_REG_POWERPC_SIAR:
+ return "siar";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_powerpc(void)
+{
+ return PERF_REG_POWERPC_NIP;
+}
+
+uint64_t __perf_reg_sp_powerpc(void)
+{
+ return PERF_REG_POWERPC_R1;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
new file mode 100644
index 000000000..e432630be
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/riscv/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_riscv(int id)
+{
+ switch (id) {
+ case PERF_REG_RISCV_PC:
+ return "pc";
+ case PERF_REG_RISCV_RA:
+ return "ra";
+ case PERF_REG_RISCV_SP:
+ return "sp";
+ case PERF_REG_RISCV_GP:
+ return "gp";
+ case PERF_REG_RISCV_TP:
+ return "tp";
+ case PERF_REG_RISCV_T0:
+ return "t0";
+ case PERF_REG_RISCV_T1:
+ return "t1";
+ case PERF_REG_RISCV_T2:
+ return "t2";
+ case PERF_REG_RISCV_S0:
+ return "s0";
+ case PERF_REG_RISCV_S1:
+ return "s1";
+ case PERF_REG_RISCV_A0:
+ return "a0";
+ case PERF_REG_RISCV_A1:
+ return "a1";
+ case PERF_REG_RISCV_A2:
+ return "a2";
+ case PERF_REG_RISCV_A3:
+ return "a3";
+ case PERF_REG_RISCV_A4:
+ return "a4";
+ case PERF_REG_RISCV_A5:
+ return "a5";
+ case PERF_REG_RISCV_A6:
+ return "a6";
+ case PERF_REG_RISCV_A7:
+ return "a7";
+ case PERF_REG_RISCV_S2:
+ return "s2";
+ case PERF_REG_RISCV_S3:
+ return "s3";
+ case PERF_REG_RISCV_S4:
+ return "s4";
+ case PERF_REG_RISCV_S5:
+ return "s5";
+ case PERF_REG_RISCV_S6:
+ return "s6";
+ case PERF_REG_RISCV_S7:
+ return "s7";
+ case PERF_REG_RISCV_S8:
+ return "s8";
+ case PERF_REG_RISCV_S9:
+ return "s9";
+ case PERF_REG_RISCV_S10:
+ return "s10";
+ case PERF_REG_RISCV_S11:
+ return "s11";
+ case PERF_REG_RISCV_T3:
+ return "t3";
+ case PERF_REG_RISCV_T4:
+ return "t4";
+ case PERF_REG_RISCV_T5:
+ return "t5";
+ case PERF_REG_RISCV_T6:
+ return "t6";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_riscv(void)
+{
+ return PERF_REG_RISCV_PC;
+}
+
+uint64_t __perf_reg_sp_riscv(void)
+{
+ return PERF_REG_RISCV_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_s390.c b/tools/perf/util/perf-regs-arch/perf_regs_s390.c
new file mode 100644
index 000000000..1c7a46db7
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_s390.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/s390/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_s390(int id)
+{
+ switch (id) {
+ case PERF_REG_S390_R0:
+ return "R0";
+ case PERF_REG_S390_R1:
+ return "R1";
+ case PERF_REG_S390_R2:
+ return "R2";
+ case PERF_REG_S390_R3:
+ return "R3";
+ case PERF_REG_S390_R4:
+ return "R4";
+ case PERF_REG_S390_R5:
+ return "R5";
+ case PERF_REG_S390_R6:
+ return "R6";
+ case PERF_REG_S390_R7:
+ return "R7";
+ case PERF_REG_S390_R8:
+ return "R8";
+ case PERF_REG_S390_R9:
+ return "R9";
+ case PERF_REG_S390_R10:
+ return "R10";
+ case PERF_REG_S390_R11:
+ return "R11";
+ case PERF_REG_S390_R12:
+ return "R12";
+ case PERF_REG_S390_R13:
+ return "R13";
+ case PERF_REG_S390_R14:
+ return "R14";
+ case PERF_REG_S390_R15:
+ return "R15";
+ case PERF_REG_S390_FP0:
+ return "FP0";
+ case PERF_REG_S390_FP1:
+ return "FP1";
+ case PERF_REG_S390_FP2:
+ return "FP2";
+ case PERF_REG_S390_FP3:
+ return "FP3";
+ case PERF_REG_S390_FP4:
+ return "FP4";
+ case PERF_REG_S390_FP5:
+ return "FP5";
+ case PERF_REG_S390_FP6:
+ return "FP6";
+ case PERF_REG_S390_FP7:
+ return "FP7";
+ case PERF_REG_S390_FP8:
+ return "FP8";
+ case PERF_REG_S390_FP9:
+ return "FP9";
+ case PERF_REG_S390_FP10:
+ return "FP10";
+ case PERF_REG_S390_FP11:
+ return "FP11";
+ case PERF_REG_S390_FP12:
+ return "FP12";
+ case PERF_REG_S390_FP13:
+ return "FP13";
+ case PERF_REG_S390_FP14:
+ return "FP14";
+ case PERF_REG_S390_FP15:
+ return "FP15";
+ case PERF_REG_S390_MASK:
+ return "MASK";
+ case PERF_REG_S390_PC:
+ return "PC";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_s390(void)
+{
+ return PERF_REG_S390_PC;
+}
+
+uint64_t __perf_reg_sp_s390(void)
+{
+ return PERF_REG_S390_R15;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
new file mode 100644
index 000000000..873c620f0
--- /dev/null
+++ b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/x86/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_x86(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_AX:
+ return "AX";
+ case PERF_REG_X86_BX:
+ return "BX";
+ case PERF_REG_X86_CX:
+ return "CX";
+ case PERF_REG_X86_DX:
+ return "DX";
+ case PERF_REG_X86_SI:
+ return "SI";
+ case PERF_REG_X86_DI:
+ return "DI";
+ case PERF_REG_X86_BP:
+ return "BP";
+ case PERF_REG_X86_SP:
+ return "SP";
+ case PERF_REG_X86_IP:
+ return "IP";
+ case PERF_REG_X86_FLAGS:
+ return "FLAGS";
+ case PERF_REG_X86_CS:
+ return "CS";
+ case PERF_REG_X86_SS:
+ return "SS";
+ case PERF_REG_X86_DS:
+ return "DS";
+ case PERF_REG_X86_ES:
+ return "ES";
+ case PERF_REG_X86_FS:
+ return "FS";
+ case PERF_REG_X86_GS:
+ return "GS";
+ case PERF_REG_X86_R8:
+ return "R8";
+ case PERF_REG_X86_R9:
+ return "R9";
+ case PERF_REG_X86_R10:
+ return "R10";
+ case PERF_REG_X86_R11:
+ return "R11";
+ case PERF_REG_X86_R12:
+ return "R12";
+ case PERF_REG_X86_R13:
+ return "R13";
+ case PERF_REG_X86_R14:
+ return "R14";
+ case PERF_REG_X86_R15:
+ return "R15";
+
+#define XMM(x) \
+ case PERF_REG_X86_XMM ## x: \
+ case PERF_REG_X86_XMM ## x + 1: \
+ return "XMM" #x;
+ XMM(0)
+ XMM(1)
+ XMM(2)
+ XMM(3)
+ XMM(4)
+ XMM(5)
+ XMM(6)
+ XMM(7)
+ XMM(8)
+ XMM(9)
+ XMM(10)
+ XMM(11)
+ XMM(12)
+ XMM(13)
+ XMM(14)
+ XMM(15)
+#undef XMM
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+uint64_t __perf_reg_ip_x86(void)
+{
+ return PERF_REG_X86_IP;
+}
+
+uint64_t __perf_reg_sp_x86(void)
+{
+ return PERF_REG_X86_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
new file mode 100644
index 000000000..e1e2d7015
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.c
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "perf-sys.h"
+#include "util/cloexec.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/perf_api_probe.h"
+#include <perf/cpumap.h>
+#include <errno.h>
+
+typedef void (*setup_probe_fn_t)(struct evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str)
+{
+ struct evlist *evlist;
+ struct evsel *evsel;
+ unsigned long flags = perf_event_open_cloexec_flag();
+ int err = -EAGAIN, fd;
+ static pid_t pid = -1;
+
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ if (parse_event(evlist, str))
+ goto out_delete;
+
+ evsel = evlist__first(evlist);
+
+ while (1) {
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
+ if (fd < 0) {
+ if (pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ goto out_delete;
+ }
+ break;
+ }
+ close(fd);
+
+ fn(evsel);
+
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ err = -EINVAL;
+ goto out_delete;
+ }
+ close(fd);
+ err = 0;
+
+out_delete:
+ evlist__delete(evlist);
+ return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+ const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+ struct perf_cpu_map *cpus;
+ struct perf_cpu cpu;
+ int ret, i = 0;
+
+ cpus = perf_cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = perf_cpu_map__cpu(cpus, 0);
+ perf_cpu_map__put(cpus);
+
+ do {
+ ret = perf_do_probe_api(fn, cpu, try[i++]);
+ if (!ret)
+ return true;
+ } while (ret == -EAGAIN && try[i]);
+
+ return false;
+}
+
+static void perf_probe_sample_identifier(struct evsel *evsel)
+{
+ evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+static void perf_probe_comm_exec(struct evsel *evsel)
+{
+ evsel->core.attr.comm_exec = 1;
+}
+
+static void perf_probe_context_switch(struct evsel *evsel)
+{
+ evsel->core.attr.context_switch = 1;
+}
+
+static void perf_probe_text_poke(struct evsel *evsel)
+{
+ evsel->core.attr.text_poke = 1;
+}
+
+static void perf_probe_build_id(struct evsel *evsel)
+{
+ evsel->core.attr.build_id = 1;
+}
+
+static void perf_probe_cgroup(struct evsel *evsel)
+{
+ evsel->core.attr.cgroup = 1;
+}
+
+bool perf_can_sample_identifier(void)
+{
+ return perf_probe_api(perf_probe_sample_identifier);
+}
+
+bool perf_can_comm_exec(void)
+{
+ return perf_probe_api(perf_probe_comm_exec);
+}
+
+bool perf_can_record_switch_events(void)
+{
+ return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_text_poke_events(void)
+{
+ return perf_probe_api(perf_probe_text_poke);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .exclude_kernel = 1,
+ };
+ struct perf_cpu_map *cpus;
+ struct perf_cpu cpu;
+ int fd;
+
+ cpus = perf_cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+
+ cpu = perf_cpu_map__cpu(cpus, 0);
+ perf_cpu_map__put(cpus);
+
+ fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+
+ return true;
+}
+
+/*
+ * Architectures are expected to know if AUX area sampling is supported by the
+ * hardware. Here we check for kernel support.
+ */
+bool perf_can_aux_sample(void)
+{
+ struct perf_event_attr attr = {
+ .size = sizeof(struct perf_event_attr),
+ .exclude_kernel = 1,
+ /*
+ * Non-zero value causes the kernel to calculate the effective
+ * attribute size up to that byte.
+ */
+ .aux_sample_size = 1,
+ };
+ int fd;
+
+ fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ /*
+ * If the kernel attribute is big enough to contain aux_sample_size
+ * then we assume that it is supported. We are relying on the kernel to
+ * validate the attribute size before anything else that could be wrong.
+ */
+ if (fd < 0 && errno == E2BIG)
+ return false;
+ if (fd >= 0)
+ close(fd);
+
+ return true;
+}
+
+bool perf_can_record_build_id(void)
+{
+ return perf_probe_api(perf_probe_build_id);
+}
+
+bool perf_can_record_cgroup(void)
+{
+ return perf_probe_api(perf_probe_cgroup);
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
new file mode 100644
index 000000000..b104168ef
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.h
@@ -0,0 +1,17 @@
+
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_API_PROBE_H
+#define __PERF_API_PROBE_H
+
+#include <stdbool.h>
+
+bool perf_can_aux_sample(void);
+bool perf_can_comm_exec(void);
+bool perf_can_record_cpu_wide(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_record_text_poke_events(void);
+bool perf_can_sample_identifier(void);
+bool perf_can_record_build_id(void);
+bool perf_can_record_cgroup(void);
+
+#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
new file mode 100644
index 000000000..224799145
--- /dev/null
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include "util/evsel_fprintf.h"
+#include "trace-event.h"
+
+struct bit_names {
+ int bit;
+ const char *name;
+};
+
+static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
+{
+ bool first_bit = true;
+ int i = 0;
+
+ do {
+ if (value & bits[i].bit) {
+ buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
+ first_bit = false;
+ }
+ } while (bits[++i].name != NULL);
+}
+
+static void __p_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+ bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+ bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+ bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+ bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+ bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
+ bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE),
+ bit_name(WEIGHT_STRUCT),
+ { .name = NULL, }
+ };
+#undef bit_name
+ __p_bits(buf, size, value, bits);
+}
+
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+ bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+ bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+ bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+ bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+ bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
+ { .name = NULL, }
+ };
+#undef bit_name
+ __p_bits(buf, size, value, bits);
+}
+
+static void __p_read_format(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+ bit_name(ID), bit_name(GROUP), bit_name(LOST),
+ { .name = NULL, }
+ };
+#undef bit_name
+ __p_bits(buf, size, value, bits);
+}
+
+#define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
+static const char *stringify_perf_type_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_MISSES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BRANCH_MISSES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BUS_CYCLES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_STALLED_CYCLES_BACKEND)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_REF_CPU_CYCLES)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_cache_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_L1D)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_L1I)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_LL)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_DTLB)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_ITLB)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_BPU)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_NODE)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_cache_op_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_READ)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_WRITE)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_PREFETCH)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_cache_op_result_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_RESULT_MISS)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_sw_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CPU_CLOCK)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_TASK_CLOCK)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CONTEXT_SWITCHES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CPU_MIGRATIONS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS_MIN)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_ALIGNMENT_FAULTS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_EMULATION_FAULTS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_DUMMY)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_BPF_OUTPUT)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CGROUP_SWITCHES)
+ default:
+ return NULL;
+ }
+}
+#undef ENUM_ID_TO_STR_CASE
+
+#define PRINT_ID(_s, _f) \
+do { \
+ const char *__s = _s; \
+ if (__s == NULL) \
+ snprintf(buf, size, _f, value); \
+ else \
+ snprintf(buf, size, _f" (%s)", value, __s); \
+} while (0)
+#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64)
+#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64)
+
+static void __p_type_id(char *buf, size_t size, u64 value)
+{
+ print_id_unsigned(stringify_perf_type_id(value));
+}
+
+static void __p_config_hw_id(char *buf, size_t size, u64 value)
+{
+ print_id_hex(stringify_perf_hw_id(value));
+}
+
+static void __p_config_sw_id(char *buf, size_t size, u64 value)
+{
+ print_id_hex(stringify_perf_sw_id(value));
+}
+
+static void __p_config_hw_cache_id(char *buf, size_t size, u64 value)
+{
+ const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff);
+ const char *hw_cache_op_str =
+ stringify_perf_hw_cache_op_id((value & 0xff00) >> 8);
+ const char *hw_cache_op_result_str =
+ stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16);
+
+ if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
+ hw_cache_op_result_str == NULL) {
+ snprintf(buf, size, "%#"PRIx64, value);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ }
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
+{
+ char *str = tracepoint_id_to_name(value);
+
+ print_id_hex(str);
+ free(str);
+}
+#endif
+
+static void __p_config_id(char *buf, size_t size, u32 type, u64 value)
+{
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ return __p_config_hw_id(buf, size, value);
+ case PERF_TYPE_SOFTWARE:
+ return __p_config_sw_id(buf, size, value);
+ case PERF_TYPE_HW_CACHE:
+ return __p_config_hw_cache_id(buf, size, value);
+ case PERF_TYPE_TRACEPOINT:
+#ifdef HAVE_LIBTRACEEVENT
+ return __p_config_tracepoint_id(buf, size, value);
+#endif
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_BREAKPOINT:
+ default:
+ snprintf(buf, size, "%#"PRIx64, value);
+ return;
+ }
+}
+
+#define BUF_SIZE 1024
+
+#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
+#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
+#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
+#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
+#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
+#define p_type_id(val) __p_type_id(buf, BUF_SIZE, val)
+#define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val)
+
+#define PRINT_ATTRn(_n, _f, _p, _a) \
+do { \
+ if (_a || attr->_f) { \
+ _p(attr->_f); \
+ ret += attr__fprintf(fp, _n, buf, priv);\
+ } \
+} while (0)
+
+#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p, false)
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+ attr__fprintf_f attr__fprintf, void *priv)
+{
+ char buf[BUF_SIZE];
+ int ret = 0;
+
+ PRINT_ATTRn("type", type, p_type_id, true);
+ PRINT_ATTRf(size, p_unsigned);
+ PRINT_ATTRn("config", config, p_config_id, true);
+ PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned, false);
+ PRINT_ATTRf(sample_type, p_sample_type);
+ PRINT_ATTRf(read_format, p_read_format);
+
+ PRINT_ATTRf(disabled, p_unsigned);
+ PRINT_ATTRf(inherit, p_unsigned);
+ PRINT_ATTRf(pinned, p_unsigned);
+ PRINT_ATTRf(exclusive, p_unsigned);
+ PRINT_ATTRf(exclude_user, p_unsigned);
+ PRINT_ATTRf(exclude_kernel, p_unsigned);
+ PRINT_ATTRf(exclude_hv, p_unsigned);
+ PRINT_ATTRf(exclude_idle, p_unsigned);
+ PRINT_ATTRf(mmap, p_unsigned);
+ PRINT_ATTRf(comm, p_unsigned);
+ PRINT_ATTRf(freq, p_unsigned);
+ PRINT_ATTRf(inherit_stat, p_unsigned);
+ PRINT_ATTRf(enable_on_exec, p_unsigned);
+ PRINT_ATTRf(task, p_unsigned);
+ PRINT_ATTRf(watermark, p_unsigned);
+ PRINT_ATTRf(precise_ip, p_unsigned);
+ PRINT_ATTRf(mmap_data, p_unsigned);
+ PRINT_ATTRf(sample_id_all, p_unsigned);
+ PRINT_ATTRf(exclude_host, p_unsigned);
+ PRINT_ATTRf(exclude_guest, p_unsigned);
+ PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
+ PRINT_ATTRf(exclude_callchain_user, p_unsigned);
+ PRINT_ATTRf(mmap2, p_unsigned);
+ PRINT_ATTRf(comm_exec, p_unsigned);
+ PRINT_ATTRf(use_clockid, p_unsigned);
+ PRINT_ATTRf(context_switch, p_unsigned);
+ PRINT_ATTRf(write_backward, p_unsigned);
+ PRINT_ATTRf(namespaces, p_unsigned);
+ PRINT_ATTRf(ksymbol, p_unsigned);
+ PRINT_ATTRf(bpf_event, p_unsigned);
+ PRINT_ATTRf(aux_output, p_unsigned);
+ PRINT_ATTRf(cgroup, p_unsigned);
+ PRINT_ATTRf(text_poke, p_unsigned);
+ PRINT_ATTRf(build_id, p_unsigned);
+ PRINT_ATTRf(inherit_thread, p_unsigned);
+ PRINT_ATTRf(remove_on_exec, p_unsigned);
+ PRINT_ATTRf(sigtrap, p_unsigned);
+
+ PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
+ PRINT_ATTRf(bp_type, p_unsigned);
+ PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex, false);
+ PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex, false);
+ PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
+ PRINT_ATTRf(sample_regs_user, p_hex);
+ PRINT_ATTRf(sample_stack_user, p_unsigned);
+ PRINT_ATTRf(clockid, p_signed);
+ PRINT_ATTRf(sample_regs_intr, p_hex);
+ PRINT_ATTRf(aux_watermark, p_unsigned);
+ PRINT_ATTRf(sample_max_stack, p_unsigned);
+ PRINT_ATTRf(aux_sample_size, p_unsigned);
+ PRINT_ATTRf(sig_data, p_unsigned);
+
+ return ret;
+}
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
new file mode 100644
index 000000000..e2275856b
--- /dev/null
+++ b/tools/perf/util/perf_regs.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include "perf_regs.h"
+#include "util/sample.h"
+#include "debug.h"
+
+int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
+ char **new_op __maybe_unused)
+{
+ return SDT_ARG_SKIP;
+}
+
+uint64_t __weak arch__intr_reg_mask(void)
+{
+ return 0;
+}
+
+uint64_t __weak arch__user_reg_mask(void)
+{
+ return 0;
+}
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+const char *perf_reg_name(int id, const char *arch)
+{
+ const char *reg_name = NULL;
+
+ if (!strcmp(arch, "csky"))
+ reg_name = __perf_reg_name_csky(id);
+ else if (!strcmp(arch, "loongarch"))
+ reg_name = __perf_reg_name_loongarch(id);
+ else if (!strcmp(arch, "mips"))
+ reg_name = __perf_reg_name_mips(id);
+ else if (!strcmp(arch, "powerpc"))
+ reg_name = __perf_reg_name_powerpc(id);
+ else if (!strcmp(arch, "riscv"))
+ reg_name = __perf_reg_name_riscv(id);
+ else if (!strcmp(arch, "s390"))
+ reg_name = __perf_reg_name_s390(id);
+ else if (!strcmp(arch, "x86"))
+ reg_name = __perf_reg_name_x86(id);
+ else if (!strcmp(arch, "arm"))
+ reg_name = __perf_reg_name_arm(id);
+ else if (!strcmp(arch, "arm64"))
+ reg_name = __perf_reg_name_arm64(id);
+
+ return reg_name ?: "unknown";
+}
+
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
+{
+ int i, idx = 0;
+ u64 mask = regs->mask;
+
+ if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE)
+ return -EINVAL;
+
+ if (regs->cache_mask & (1ULL << id))
+ goto out;
+
+ if (!(mask & (1ULL << id)))
+ return -EINVAL;
+
+ for (i = 0; i < id; i++) {
+ if (mask & (1ULL << i))
+ idx++;
+ }
+
+ regs->cache_mask |= (1ULL << id);
+ regs->cache_regs[id] = regs->regs[idx];
+
+out:
+ *valp = regs->cache_regs[id];
+ return 0;
+}
+
+uint64_t perf_arch_reg_ip(const char *arch)
+{
+ if (!strcmp(arch, "arm"))
+ return __perf_reg_ip_arm();
+ else if (!strcmp(arch, "arm64"))
+ return __perf_reg_ip_arm64();
+ else if (!strcmp(arch, "csky"))
+ return __perf_reg_ip_csky();
+ else if (!strcmp(arch, "loongarch"))
+ return __perf_reg_ip_loongarch();
+ else if (!strcmp(arch, "mips"))
+ return __perf_reg_ip_mips();
+ else if (!strcmp(arch, "powerpc"))
+ return __perf_reg_ip_powerpc();
+ else if (!strcmp(arch, "riscv"))
+ return __perf_reg_ip_riscv();
+ else if (!strcmp(arch, "s390"))
+ return __perf_reg_ip_s390();
+ else if (!strcmp(arch, "x86"))
+ return __perf_reg_ip_x86();
+
+ pr_err("Fail to find IP register for arch %s, returns 0\n", arch);
+ return 0;
+}
+
+uint64_t perf_arch_reg_sp(const char *arch)
+{
+ if (!strcmp(arch, "arm"))
+ return __perf_reg_sp_arm();
+ else if (!strcmp(arch, "arm64"))
+ return __perf_reg_sp_arm64();
+ else if (!strcmp(arch, "csky"))
+ return __perf_reg_sp_csky();
+ else if (!strcmp(arch, "loongarch"))
+ return __perf_reg_sp_loongarch();
+ else if (!strcmp(arch, "mips"))
+ return __perf_reg_sp_mips();
+ else if (!strcmp(arch, "powerpc"))
+ return __perf_reg_sp_powerpc();
+ else if (!strcmp(arch, "riscv"))
+ return __perf_reg_sp_riscv();
+ else if (!strcmp(arch, "s390"))
+ return __perf_reg_sp_s390();
+ else if (!strcmp(arch, "x86"))
+ return __perf_reg_sp_x86();
+
+ pr_err("Fail to find SP register for arch %s, returns 0\n", arch);
+ return 0;
+}
+
+#endif
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
new file mode 100644
index 000000000..ecd2a5362
--- /dev/null
+++ b/tools/perf/util/perf_regs.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_REGS_H
+#define __PERF_REGS_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct regs_dump;
+
+struct sample_reg {
+ const char *name;
+ uint64_t mask;
+};
+
+#define SMPL_REG_MASK(b) (1ULL << (b))
+#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) }
+#define SMPL_REG2_MASK(b) (3ULL << (b))
+#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) }
+#define SMPL_REG_END { .name = NULL }
+
+enum {
+ SDT_ARG_VALID = 0,
+ SDT_ARG_SKIP,
+};
+
+int arch_sdt_arg_parse_op(char *old_op, char **new_op);
+uint64_t arch__intr_reg_mask(void);
+uint64_t arch__user_reg_mask(void);
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+extern const struct sample_reg sample_reg_masks[];
+
+const char *perf_reg_name(int id, const char *arch);
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
+uint64_t perf_arch_reg_ip(const char *arch);
+uint64_t perf_arch_reg_sp(const char *arch);
+const char *__perf_reg_name_arm64(int id);
+uint64_t __perf_reg_ip_arm64(void);
+uint64_t __perf_reg_sp_arm64(void);
+const char *__perf_reg_name_arm(int id);
+uint64_t __perf_reg_ip_arm(void);
+uint64_t __perf_reg_sp_arm(void);
+const char *__perf_reg_name_csky(int id);
+uint64_t __perf_reg_ip_csky(void);
+uint64_t __perf_reg_sp_csky(void);
+const char *__perf_reg_name_loongarch(int id);
+uint64_t __perf_reg_ip_loongarch(void);
+uint64_t __perf_reg_sp_loongarch(void);
+const char *__perf_reg_name_mips(int id);
+uint64_t __perf_reg_ip_mips(void);
+uint64_t __perf_reg_sp_mips(void);
+const char *__perf_reg_name_powerpc(int id);
+uint64_t __perf_reg_ip_powerpc(void);
+uint64_t __perf_reg_sp_powerpc(void);
+const char *__perf_reg_name_riscv(int id);
+uint64_t __perf_reg_ip_riscv(void);
+uint64_t __perf_reg_sp_riscv(void);
+const char *__perf_reg_name_s390(int id);
+uint64_t __perf_reg_ip_s390(void);
+uint64_t __perf_reg_sp_s390(void);
+const char *__perf_reg_name_x86(int id);
+uint64_t __perf_reg_ip_x86(void);
+uint64_t __perf_reg_sp_x86(void);
+
+static inline uint64_t DWARF_MINIMAL_REGS(const char *arch)
+{
+ return (1ULL << perf_arch_reg_ip(arch)) | (1ULL << perf_arch_reg_sp(arch));
+}
+
+#else
+
+static inline uint64_t DWARF_MINIMAL_REGS(const char *arch __maybe_unused)
+{
+ return 0;
+}
+
+static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
+{
+ return "unknown";
+}
+
+static inline int perf_reg_value(u64 *valp __maybe_unused,
+ struct regs_dump *regs __maybe_unused,
+ int id __maybe_unused)
+{
+ return 0;
+}
+
+static inline uint64_t perf_arch_reg_ip(const char *arch __maybe_unused)
+{
+ return 0;
+}
+
+static inline uint64_t perf_arch_reg_sp(const char *arch __maybe_unused)
+{
+ return 0;
+}
+
+#endif /* HAVE_PERF_REGS_SUPPORT */
+#endif /* __PERF_REGS_H */
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
new file mode 100644
index 000000000..862e4a689
--- /dev/null
+++ b/tools/perf/util/pfm.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "util/cpumap.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/pmus.h"
+#include "util/pfm.h"
+#include "util/strbuf.h"
+#include "util/thread_map.h"
+
+#include <string.h>
+#include <linux/kernel.h>
+#include <perfmon/pfmlib_perf_event.h>
+
+static void libpfm_initialize(void)
+{
+ int ret;
+
+ ret = pfm_initialize();
+ if (ret != PFM_SUCCESS) {
+ ui__warning("libpfm failed to initialize: %s\n",
+ pfm_strerror(ret));
+ }
+}
+
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+ struct perf_event_attr attr;
+ struct perf_pmu *pmu;
+ struct evsel *evsel, *grp_leader = NULL;
+ char *p, *q, *p_orig;
+ const char *sep;
+ int grp_evt = -1;
+ int ret;
+
+ libpfm_initialize();
+
+ p_orig = p = strdup(str);
+ if (!p)
+ return -1;
+ /*
+ * force loading of the PMU list
+ */
+ perf_pmus__scan(NULL);
+
+ for (q = p; strsep(&p, ",{}"); q = p) {
+ sep = p ? str + (p - p_orig - 1) : "";
+ if (*sep == '{') {
+ if (grp_evt > -1) {
+ ui__error(
+ "nested event groups not supported\n");
+ goto error;
+ }
+ grp_evt++;
+ }
+
+ /* no event */
+ if (*q == '\0') {
+ if (*sep == '}') {
+ if (grp_evt < 0) {
+ ui__error("cannot close a non-existing event group\n");
+ goto error;
+ }
+ grp_evt--;
+ }
+ continue;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ event_attr_init(&attr);
+
+ ret = pfm_get_perf_event_encoding(q, PFM_PLM0|PFM_PLM3,
+ &attr, NULL, NULL);
+
+ if (ret != PFM_SUCCESS) {
+ ui__error("failed to parse event %s : %s\n", str,
+ pfm_strerror(ret));
+ goto error;
+ }
+
+ pmu = perf_pmus__find_by_type((unsigned int)attr.type);
+ evsel = parse_events__add_event(evlist->core.nr_entries,
+ &attr, q, /*metric_id=*/NULL,
+ pmu);
+ if (evsel == NULL)
+ goto error;
+
+ evsel->is_libpfm_event = true;
+
+ evlist__add(evlist, evsel);
+
+ if (grp_evt == 0)
+ grp_leader = evsel;
+
+ if (grp_evt > -1) {
+ evsel__set_leader(evsel, grp_leader);
+ grp_leader->core.nr_members++;
+ grp_evt++;
+ }
+
+ if (*sep == '}') {
+ if (grp_evt < 0) {
+ ui__error(
+ "cannot close a non-existing event group\n");
+ goto error;
+ }
+ grp_leader = NULL;
+ grp_evt = -1;
+ }
+ }
+ free(p_orig);
+ return 0;
+error:
+ free(p_orig);
+ return -1;
+}
+
+static bool is_libpfm_event_supported(const char *name, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ struct perf_pmu *pmu;
+ struct evsel *evsel;
+ struct perf_event_attr attr = {};
+ bool result = true;
+ int ret;
+
+ ret = pfm_get_perf_event_encoding(name, PFM_PLM0|PFM_PLM3,
+ &attr, NULL, NULL);
+ if (ret != PFM_SUCCESS)
+ return false;
+
+ pmu = perf_pmus__find_by_type((unsigned int)attr.type);
+ evsel = parse_events__add_event(0, &attr, name, /*metric_id=*/NULL, pmu);
+ if (evsel == NULL)
+ return false;
+
+ evsel->is_libpfm_event = true;
+
+ if (evsel__open(evsel, cpus, threads) < 0)
+ result = false;
+
+ evsel__close(evsel);
+ evsel__delete(evsel);
+
+ return result;
+}
+
+static const char *srcs[PFM_ATTR_CTRL_MAX] = {
+ [PFM_ATTR_CTRL_UNKNOWN] = "???",
+ [PFM_ATTR_CTRL_PMU] = "PMU",
+ [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event",
+};
+
+static void
+print_attr_flags(struct strbuf *buf, const pfm_event_attr_info_t *info)
+{
+ if (info->is_dfl)
+ strbuf_addf(buf, "[default] ");
+
+ if (info->is_precise)
+ strbuf_addf(buf, "[precise] ");
+}
+
+static void
+print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
+ const pfm_pmu_info_t *pinfo, const pfm_event_info_t *info,
+ struct strbuf *buf)
+{
+ int j, ret;
+ char topic[80], name[80];
+ struct perf_cpu_map *cpus = perf_cpu_map__empty_new(1);
+ struct perf_thread_map *threads = thread_map__new_by_tid(0);
+
+ strbuf_setlen(buf, 0);
+ snprintf(topic, sizeof(topic), "pfm %s", pinfo->name);
+
+ snprintf(name, sizeof(name), "%s::%s", pinfo->name, info->name);
+ strbuf_addf(buf, "Code: 0x%"PRIx64"\n", info->code);
+
+ pfm_for_each_event_attr(j, info) {
+ pfm_event_attr_info_t ainfo;
+ const char *src;
+
+ ainfo.size = sizeof(ainfo);
+ ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+ ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
+
+ src = srcs[ainfo.ctrl];
+ switch (ainfo.type) {
+ case PFM_ATTR_UMASK: /* Ignore for now */
+ break;
+ case PFM_ATTR_MOD_BOOL:
+ strbuf_addf(buf, " Modif: %s: [%s] : %s (boolean)\n", src,
+ ainfo.name, ainfo.desc);
+ break;
+ case PFM_ATTR_MOD_INTEGER:
+ strbuf_addf(buf, " Modif: %s: [%s] : %s (integer)\n", src,
+ ainfo.name, ainfo.desc);
+ break;
+ case PFM_ATTR_NONE:
+ case PFM_ATTR_RAW_UMASK:
+ case PFM_ATTR_MAX:
+ default:
+ strbuf_addf(buf, " Attr: %s: [%s] : %s\n", src,
+ ainfo.name, ainfo.desc);
+ }
+ }
+
+ if (is_libpfm_event_supported(name, cpus, threads)) {
+ print_cb->print_event(print_state, pinfo->name, topic,
+ name, info->equiv,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/NULL, "PFM event",
+ info->desc, /*long_desc=*/NULL,
+ /*encoding_desc=*/buf->buf);
+ }
+
+ pfm_for_each_event_attr(j, info) {
+ pfm_event_attr_info_t ainfo;
+ const char *src;
+
+ strbuf_setlen(buf, 0);
+
+ ainfo.size = sizeof(ainfo);
+ ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+ ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
+
+ src = srcs[ainfo.ctrl];
+ if (ainfo.type == PFM_ATTR_UMASK) {
+ strbuf_addf(buf, "Umask: 0x%02"PRIx64" : %s: ",
+ ainfo.code, src);
+ print_attr_flags(buf, &ainfo);
+ snprintf(name, sizeof(name), "%s::%s:%s",
+ pinfo->name, info->name, ainfo.name);
+
+ if (!is_libpfm_event_supported(name, cpus, threads))
+ continue;
+
+ print_cb->print_event(print_state,
+ pinfo->name,
+ topic,
+ name, /*alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/NULL, "PFM event",
+ ainfo.desc, /*long_desc=*/NULL,
+ /*encoding_desc=*/buf->buf);
+ }
+ }
+
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
+}
+
+void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ pfm_event_info_t info;
+ pfm_pmu_info_t pinfo;
+ int p, ret;
+ struct strbuf storage;
+
+ libpfm_initialize();
+
+ /* initialize to zero to indicate ABI version */
+ info.size = sizeof(info);
+ pinfo.size = sizeof(pinfo);
+
+ strbuf_init(&storage, 2048);
+
+ pfm_for_all_pmus(p) {
+ ret = pfm_get_pmu_info(p, &pinfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ /* only print events that are supported by host HW */
+ if (!pinfo.is_present)
+ continue;
+
+ /* handled by perf directly */
+ if (pinfo.pmu == PFM_PMU_PERF_EVENT)
+ continue;
+
+ for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) {
+ ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT,
+ &info);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ print_libpfm_event(print_cb, print_state, &pinfo, &info, &storage);
+ }
+ }
+ strbuf_release(&storage);
+}
diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h
new file mode 100644
index 000000000..fb25c2749
--- /dev/null
+++ b/tools/perf/util/pfm.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#ifndef __PERF_PFM_H
+#define __PERF_PFM_H
+
+#include "print-events.h"
+#include <subcmd/parse-options.h>
+
+#ifdef HAVE_LIBPFM
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+ int unset);
+
+void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state);
+
+#else
+#include <linux/compiler.h>
+
+static inline int parse_libpfm_events_option(
+ const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ return 0;
+}
+
+static inline void print_libpfm_events(const struct print_callbacks *print_cb __maybe_unused,
+ void *print_state __maybe_unused)
+{
+}
+
+#endif
+
+
+#endif /* __PERF_PFM_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
new file mode 100644
index 000000000..d515ba8a0
--- /dev/null
+++ b/tools/perf/util/pmu.c
@@ -0,0 +1,2068 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/ctype.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <dirent.h>
+#include <api/fs/fs.h>
+#include <locale.h>
+#include <fnmatch.h>
+#include <math.h>
+#include "debug.h"
+#include "evsel.h"
+#include "pmu.h"
+#include "pmus.h"
+#include <util/pmu-bison.h>
+#include <util/pmu-flex.h>
+#include "parse-events.h"
+#include "print-events.h"
+#include "header.h"
+#include "string2.h"
+#include "strbuf.h"
+#include "fncache.h"
+#include "util/evsel_config.h"
+
+struct perf_pmu perf_pmu__fake = {
+ .name = "fake",
+};
+
+#define UNIT_MAX_LEN 31 /* max length for event unit name */
+
+/**
+ * struct perf_pmu_alias - An event either read from sysfs or builtin in
+ * pmu-events.c, created by parsing the pmu-events json files.
+ */
+struct perf_pmu_alias {
+ /** @name: Name of the event like "mem-loads". */
+ char *name;
+ /** @desc: Optional short description of the event. */
+ char *desc;
+ /** @long_desc: Optional long description. */
+ char *long_desc;
+ /**
+ * @topic: Optional topic such as cache or pipeline, particularly for
+ * json events.
+ */
+ char *topic;
+ /** @terms: Owned list of the original parsed parameters. */
+ struct list_head terms;
+ /** @list: List element of struct perf_pmu aliases. */
+ struct list_head list;
+ /**
+ * @pmu_name: The name copied from the json struct pmu_event. This can
+ * differ from the PMU name as it won't have suffixes.
+ */
+ char *pmu_name;
+ /** @unit: Units for the event, such as bytes or cache lines. */
+ char unit[UNIT_MAX_LEN+1];
+ /** @scale: Value to scale read counter values by. */
+ double scale;
+ /**
+ * @per_pkg: Does the file
+ * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or
+ * equivalent json value exist and have the value 1.
+ */
+ bool per_pkg;
+ /**
+ * @snapshot: Does the file
+ * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot
+ * exist and have the value 1.
+ */
+ bool snapshot;
+ /**
+ * @deprecated: Is the event hidden and so not shown in perf list by
+ * default.
+ */
+ bool deprecated;
+ /** @from_sysfs: Was the alias from sysfs or a json event? */
+ bool from_sysfs;
+ /** @info_loaded: Have the scale, unit and other values been read from disk? */
+ bool info_loaded;
+};
+
+/**
+ * struct perf_pmu_format - Values from a format file read from
+ * <sysfs>/devices/cpu/format/ held in struct perf_pmu.
+ *
+ * For example, the contents of <sysfs>/devices/cpu/format/event may be
+ * "config:0-7" and will be represented here as name="event",
+ * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set.
+ */
+struct perf_pmu_format {
+ /** @list: Element on list within struct perf_pmu. */
+ struct list_head list;
+ /** @bits: Which config bits are set by this format value. */
+ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+ /** @name: The modifier/file name. */
+ char *name;
+ /**
+ * @value : Which config value the format relates to. Supported values
+ * are from PERF_PMU_FORMAT_VALUE_CONFIG to
+ * PERF_PMU_FORMAT_VALUE_CONFIG_END.
+ */
+ u16 value;
+ /** @loaded: Has the contents been loaded/parsed. */
+ bool loaded;
+};
+
+static int pmu_aliases_parse(struct perf_pmu *pmu);
+
+static struct perf_pmu_format *perf_pmu__new_format(struct list_head *list, char *name)
+{
+ struct perf_pmu_format *format;
+
+ format = zalloc(sizeof(*format));
+ if (!format)
+ return NULL;
+
+ format->name = strdup(name);
+ if (!format->name) {
+ free(format);
+ return NULL;
+ }
+ list_add_tail(&format->list, list);
+ return format;
+}
+
+/* Called at the end of parsing a format. */
+void perf_pmu_format__set_value(void *vformat, int config, unsigned long *bits)
+{
+ struct perf_pmu_format *format = vformat;
+
+ format->value = config;
+ memcpy(format->bits, bits, sizeof(format->bits));
+}
+
+static void __perf_pmu_format__load(struct perf_pmu_format *format, FILE *file)
+{
+ void *scanner;
+ int ret;
+
+ ret = perf_pmu_lex_init(&scanner);
+ if (ret)
+ return;
+
+ perf_pmu_set_in(file, scanner);
+ ret = perf_pmu_parse(format, scanner);
+ perf_pmu_lex_destroy(scanner);
+ format->loaded = true;
+}
+
+static void perf_pmu_format__load(struct perf_pmu *pmu, struct perf_pmu_format *format)
+{
+ char path[PATH_MAX];
+ FILE *file = NULL;
+
+ if (format->loaded)
+ return;
+
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, "format"))
+ return;
+
+ assert(strlen(path) + strlen(format->name) + 2 < sizeof(path));
+ strcat(path, "/");
+ strcat(path, format->name);
+
+ file = fopen(path, "r");
+ if (!file)
+ return;
+ __perf_pmu_format__load(format, file);
+ fclose(file);
+}
+
+/*
+ * Parse & process all the sysfs attributes located under
+ * the directory specified in 'dir' parameter.
+ */
+int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load)
+{
+ struct dirent *evt_ent;
+ DIR *format_dir;
+ int ret = 0;
+
+ format_dir = fdopendir(dirfd);
+ if (!format_dir)
+ return -EINVAL;
+
+ while ((evt_ent = readdir(format_dir)) != NULL) {
+ struct perf_pmu_format *format;
+ char *name = evt_ent->d_name;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ format = perf_pmu__new_format(&pmu->format, name);
+ if (!format) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ if (eager_load) {
+ FILE *file;
+ int fd = openat(dirfd, name, O_RDONLY);
+
+ if (fd < 0) {
+ ret = -errno;
+ break;
+ }
+ file = fdopen(fd, "r");
+ if (!file) {
+ close(fd);
+ break;
+ }
+ __perf_pmu_format__load(format, file);
+ fclose(file);
+ }
+ }
+
+ closedir(format_dir);
+ return ret;
+}
+
+/*
+ * Reading/parsing the default pmu format definition, which should be
+ * located at:
+ * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes.
+ */
+static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name)
+{
+ int fd;
+
+ fd = perf_pmu__pathname_fd(dirfd, name, "format", O_DIRECTORY);
+ if (fd < 0)
+ return 0;
+
+ /* it'll close the fd */
+ if (perf_pmu__format_parse(pmu, fd, /*eager_load=*/false))
+ return -1;
+
+ return 0;
+}
+
+int perf_pmu__convert_scale(const char *scale, char **end, double *sval)
+{
+ char *lc;
+ int ret = 0;
+
+ /*
+ * save current locale
+ */
+ lc = setlocale(LC_NUMERIC, NULL);
+
+ /*
+ * The lc string may be allocated in static storage,
+ * so get a dynamic copy to make it survive setlocale
+ * call below.
+ */
+ lc = strdup(lc);
+ if (!lc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * force to C locale to ensure kernel
+ * scale string is converted correctly.
+ * kernel uses default C locale.
+ */
+ setlocale(LC_NUMERIC, "C");
+
+ *sval = strtod(scale, end);
+
+out:
+ /* restore locale */
+ setlocale(LC_NUMERIC, lc);
+ free(lc);
+ return ret;
+}
+
+static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ struct stat st;
+ ssize_t sret;
+ size_t len;
+ char scale[128];
+ int fd, ret = -1;
+ char path[PATH_MAX];
+
+ len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+ if (!len)
+ return 0;
+ scnprintf(path + len, sizeof(path) - len, "%s/events/%s.scale", pmu->name, alias->name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ if (fstat(fd, &st) < 0)
+ goto error;
+
+ sret = read(fd, scale, sizeof(scale)-1);
+ if (sret < 0)
+ goto error;
+
+ if (scale[sret - 1] == '\n')
+ scale[sret - 1] = '\0';
+ else
+ scale[sret] = '\0';
+
+ ret = perf_pmu__convert_scale(scale, NULL, &alias->scale);
+error:
+ close(fd);
+ return ret;
+}
+
+static int perf_pmu__parse_unit(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ char path[PATH_MAX];
+ size_t len;
+ ssize_t sret;
+ int fd;
+
+
+ len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+ if (!len)
+ return 0;
+ scnprintf(path + len, sizeof(path) - len, "%s/events/%s.unit", pmu->name, alias->name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ sret = read(fd, alias->unit, UNIT_MAX_LEN);
+ if (sret < 0)
+ goto error;
+
+ close(fd);
+
+ if (alias->unit[sret - 1] == '\n')
+ alias->unit[sret - 1] = '\0';
+ else
+ alias->unit[sret] = '\0';
+
+ return 0;
+error:
+ close(fd);
+ alias->unit[0] = '\0';
+ return -1;
+}
+
+static int
+perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ char path[PATH_MAX];
+ size_t len;
+ int fd;
+
+ len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+ if (!len)
+ return 0;
+ scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ close(fd);
+
+ alias->per_pkg = true;
+ return 0;
+}
+
+static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ char path[PATH_MAX];
+ size_t len;
+ int fd;
+
+ len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+ if (!len)
+ return 0;
+ scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ alias->snapshot = true;
+ close(fd);
+ return 0;
+}
+
+/* Delete an alias entry. */
+static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+{
+ zfree(&newalias->name);
+ zfree(&newalias->desc);
+ zfree(&newalias->long_desc);
+ zfree(&newalias->topic);
+ zfree(&newalias->pmu_name);
+ parse_events_terms__purge(&newalias->terms);
+ free(newalias);
+}
+
+static void perf_pmu__del_aliases(struct perf_pmu *pmu)
+{
+ struct perf_pmu_alias *alias, *tmp;
+
+ list_for_each_entry_safe(alias, tmp, &pmu->aliases, list) {
+ list_del(&alias->list);
+ perf_pmu_free_alias(alias);
+ }
+}
+
+static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu,
+ const char *name,
+ bool load)
+{
+ struct perf_pmu_alias *alias;
+
+ if (load && !pmu->sysfs_aliases_loaded)
+ pmu_aliases_parse(pmu);
+
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ if (!strcasecmp(alias->name, name))
+ return alias;
+ }
+ return NULL;
+}
+
+static bool assign_str(const char *name, const char *field, char **old_str,
+ const char *new_str)
+{
+ if (!*old_str && new_str) {
+ *old_str = strdup(new_str);
+ return true;
+ }
+
+ if (!new_str || !strcasecmp(*old_str, new_str))
+ return false; /* Nothing to update. */
+
+ pr_debug("alias %s differs in field '%s' ('%s' != '%s')\n",
+ name, field, *old_str, new_str);
+ zfree(old_str);
+ *old_str = strdup(new_str);
+ return true;
+}
+
+static void read_alias_info(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ if (!alias->from_sysfs || alias->info_loaded)
+ return;
+
+ /*
+ * load unit name and scale if available
+ */
+ perf_pmu__parse_unit(pmu, alias);
+ perf_pmu__parse_scale(pmu, alias);
+ perf_pmu__parse_per_pkg(pmu, alias);
+ perf_pmu__parse_snapshot(pmu, alias);
+}
+
+struct update_alias_data {
+ struct perf_pmu *pmu;
+ struct perf_pmu_alias *alias;
+};
+
+static int update_alias(const struct pmu_event *pe,
+ const struct pmu_events_table *table __maybe_unused,
+ void *vdata)
+{
+ struct update_alias_data *data = vdata;
+ int ret = 0;
+
+ read_alias_info(data->pmu, data->alias);
+ assign_str(pe->name, "desc", &data->alias->desc, pe->desc);
+ assign_str(pe->name, "long_desc", &data->alias->long_desc, pe->long_desc);
+ assign_str(pe->name, "topic", &data->alias->topic, pe->topic);
+ data->alias->per_pkg = pe->perpkg;
+ if (pe->event) {
+ parse_events_terms__purge(&data->alias->terms);
+ ret = parse_events_terms(&data->alias->terms, pe->event, /*input=*/NULL);
+ }
+ if (!ret && pe->unit) {
+ char *unit;
+
+ ret = perf_pmu__convert_scale(pe->unit, &unit, &data->alias->scale);
+ if (!ret)
+ snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit);
+ }
+ return ret;
+}
+
+static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
+ const char *desc, const char *val, FILE *val_fd,
+ const struct pmu_event *pe)
+{
+ struct perf_pmu_alias *alias;
+ int ret;
+ const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL;
+ bool deprecated = false, perpkg = false;
+
+ if (perf_pmu__find_alias(pmu, name, /*load=*/ false)) {
+ /* Alias was already created/loaded. */
+ return 0;
+ }
+
+ if (pe) {
+ long_desc = pe->long_desc;
+ topic = pe->topic;
+ unit = pe->unit;
+ perpkg = pe->perpkg;
+ deprecated = pe->deprecated;
+ pmu_name = pe->pmu;
+ }
+
+ alias = zalloc(sizeof(*alias));
+ if (!alias)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&alias->terms);
+ alias->scale = 1.0;
+ alias->unit[0] = '\0';
+ alias->per_pkg = perpkg;
+ alias->snapshot = false;
+ alias->deprecated = deprecated;
+
+ ret = parse_events_terms(&alias->terms, val, val_fd);
+ if (ret) {
+ pr_err("Cannot parse alias %s: %d\n", val, ret);
+ free(alias);
+ return ret;
+ }
+
+ alias->name = strdup(name);
+ alias->desc = desc ? strdup(desc) : NULL;
+ alias->long_desc = long_desc ? strdup(long_desc) :
+ desc ? strdup(desc) : NULL;
+ alias->topic = topic ? strdup(topic) : NULL;
+ alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL;
+ if (unit) {
+ if (perf_pmu__convert_scale(unit, (char **)&unit, &alias->scale) < 0) {
+ perf_pmu_free_alias(alias);
+ return -1;
+ }
+ snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
+ }
+ if (!pe) {
+ /* Update an event from sysfs with json data. */
+ struct update_alias_data data = {
+ .pmu = pmu,
+ .alias = alias,
+ };
+
+ alias->from_sysfs = true;
+ if (pmu->events_table) {
+ if (pmu_events_table__find_event(pmu->events_table, pmu, name,
+ update_alias, &data) == 0)
+ pmu->loaded_json_aliases++;
+ }
+ }
+
+ if (!pe)
+ pmu->sysfs_aliases++;
+ else
+ pmu->loaded_json_aliases++;
+ list_add_tail(&alias->list, &pmu->aliases);
+ return 0;
+}
+
+static inline bool pmu_alias_info_file(char *name)
+{
+ size_t len;
+
+ len = strlen(name);
+ if (len > 5 && !strcmp(name + len - 5, ".unit"))
+ return true;
+ if (len > 6 && !strcmp(name + len - 6, ".scale"))
+ return true;
+ if (len > 8 && !strcmp(name + len - 8, ".per-pkg"))
+ return true;
+ if (len > 9 && !strcmp(name + len - 9, ".snapshot"))
+ return true;
+
+ return false;
+}
+
+/*
+ * Reading the pmu event aliases definition, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
+ */
+static int pmu_aliases_parse(struct perf_pmu *pmu)
+{
+ char path[PATH_MAX];
+ struct dirent *evt_ent;
+ DIR *event_dir;
+ size_t len;
+ int fd, dir_fd;
+
+ len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+ if (!len)
+ return 0;
+ scnprintf(path + len, sizeof(path) - len, "%s/events", pmu->name);
+
+ dir_fd = open(path, O_DIRECTORY);
+ if (dir_fd == -1) {
+ pmu->sysfs_aliases_loaded = true;
+ return 0;
+ }
+
+ event_dir = fdopendir(dir_fd);
+ if (!event_dir){
+ close (dir_fd);
+ return -EINVAL;
+ }
+
+ while ((evt_ent = readdir(event_dir))) {
+ char *name = evt_ent->d_name;
+ FILE *file;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ /*
+ * skip info files parsed in perf_pmu__new_alias()
+ */
+ if (pmu_alias_info_file(name))
+ continue;
+
+ fd = openat(dir_fd, name, O_RDONLY);
+ if (fd == -1) {
+ pr_debug("Cannot open %s\n", name);
+ continue;
+ }
+ file = fdopen(fd, "r");
+ if (!file) {
+ close(fd);
+ continue;
+ }
+
+ if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL,
+ /*val=*/ NULL, file, /*pe=*/ NULL) < 0)
+ pr_debug("Cannot set up %s\n", name);
+ fclose(file);
+ }
+
+ closedir(event_dir);
+ close (dir_fd);
+ pmu->sysfs_aliases_loaded = true;
+ return 0;
+}
+
+static int pmu_alias_terms(struct perf_pmu_alias *alias,
+ struct list_head *terms)
+{
+ struct parse_events_term *term, *cloned;
+ LIST_HEAD(list);
+ int ret;
+
+ list_for_each_entry(term, &alias->terms, list) {
+ ret = parse_events_term__clone(&cloned, term);
+ if (ret) {
+ parse_events_terms__purge(&list);
+ return ret;
+ }
+ /*
+ * Weak terms don't override command line options,
+ * which we don't want for implicit terms in aliases.
+ */
+ cloned->weak = true;
+ list_add_tail(&cloned->list, &list);
+ }
+ list_splice(&list, terms);
+ return 0;
+}
+
+/*
+ * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
+ * may have a "cpus" file.
+ */
+static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name, bool is_core)
+{
+ struct perf_cpu_map *cpus;
+ const char *templates[] = {
+ "cpumask",
+ "cpus",
+ NULL
+ };
+ const char **template;
+ char pmu_name[PATH_MAX];
+ struct perf_pmu pmu = {.name = pmu_name};
+ FILE *file;
+
+ strlcpy(pmu_name, name, sizeof(pmu_name));
+ for (template = templates; *template; template++) {
+ file = perf_pmu__open_file_at(&pmu, dirfd, *template);
+ if (!file)
+ continue;
+ cpus = perf_cpu_map__read(file);
+ fclose(file);
+ if (cpus)
+ return cpus;
+ }
+
+ /* Nothing found, for core PMUs assume this means all CPUs. */
+ return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL;
+}
+
+static bool pmu_is_uncore(int dirfd, const char *name)
+{
+ int fd;
+
+ fd = perf_pmu__pathname_fd(dirfd, name, "cpumask", O_PATH);
+ if (fd < 0)
+ return false;
+
+ close(fd);
+ return true;
+}
+
+static char *pmu_id(const char *name)
+{
+ char path[PATH_MAX], *str;
+ size_t len;
+
+ perf_pmu__pathname_scnprintf(path, sizeof(path), name, "identifier");
+
+ if (filename__read_str(path, &str, &len) < 0)
+ return NULL;
+
+ str[len - 1] = 0; /* remove line feed */
+
+ return str;
+}
+
+/**
+ * is_sysfs_pmu_core() - PMU CORE devices have different name other than cpu in
+ * sysfs on some platforms like ARM or Intel hybrid. Looking for
+ * possible the cpus file in sysfs files to identify whether this is a
+ * core device.
+ * @name: The PMU name such as "cpu_atom".
+ */
+static int is_sysfs_pmu_core(const char *name)
+{
+ char path[PATH_MAX];
+
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpus"))
+ return 0;
+ return file_available(path);
+}
+
+char *perf_pmu__getcpuid(struct perf_pmu *pmu)
+{
+ char *cpuid;
+ static bool printed;
+
+ cpuid = getenv("PERF_CPUID");
+ if (cpuid)
+ cpuid = strdup(cpuid);
+ if (!cpuid)
+ cpuid = get_cpuid_str(pmu);
+ if (!cpuid)
+ return NULL;
+
+ if (!printed) {
+ pr_debug("Using CPUID %s\n", cpuid);
+ printed = true;
+ }
+ return cpuid;
+}
+
+__weak const struct pmu_events_table *pmu_events_table__find(void)
+{
+ return perf_pmu__find_events_table(NULL);
+}
+
+__weak const struct pmu_metrics_table *pmu_metrics_table__find(void)
+{
+ return perf_pmu__find_metrics_table(NULL);
+}
+
+/**
+ * perf_pmu__match_ignoring_suffix - Does the pmu_name match tok ignoring any
+ * trailing suffix? The Suffix must be in form
+ * tok_{digits}, or tok{digits}.
+ * @pmu_name: The pmu_name with possible suffix.
+ * @tok: The possible match to pmu_name without suffix.
+ */
+static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok)
+{
+ const char *p;
+
+ if (strncmp(pmu_name, tok, strlen(tok)))
+ return false;
+
+ p = pmu_name + strlen(tok);
+ if (*p == 0)
+ return true;
+
+ if (*p == '_')
+ ++p;
+
+ /* Ensure we end in a number */
+ while (1) {
+ if (!isdigit(*p))
+ return false;
+ if (*(++p) == 0)
+ break;
+ }
+
+ return true;
+}
+
+/**
+ * pmu_uncore_alias_match - does name match the PMU name?
+ * @pmu_name: the json struct pmu_event name. This may lack a suffix (which
+ * matches) or be of the form "socket,pmuname" which will match
+ * "socketX_pmunameY".
+ * @name: a real full PMU name as from sysfs.
+ */
+static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+{
+ char *tmp = NULL, *tok, *str;
+ bool res;
+
+ if (strchr(pmu_name, ',') == NULL)
+ return perf_pmu__match_ignoring_suffix(name, pmu_name);
+
+ str = strdup(pmu_name);
+ if (!str)
+ return false;
+
+ /*
+ * uncore alias may be from different PMU with common prefix
+ */
+ tok = strtok_r(str, ",", &tmp);
+ if (strncmp(pmu_name, tok, strlen(tok))) {
+ res = false;
+ goto out;
+ }
+
+ /*
+ * Match more complex aliases where the alias name is a comma-delimited
+ * list of tokens, orderly contained in the matching PMU name.
+ *
+ * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we
+ * match "socket" in "socketX_pmunameY" and then "pmuname" in
+ * "pmunameY".
+ */
+ while (1) {
+ char *next_tok = strtok_r(NULL, ",", &tmp);
+
+ name = strstr(name, tok);
+ if (!name ||
+ (!next_tok && !perf_pmu__match_ignoring_suffix(name, tok))) {
+ res = false;
+ goto out;
+ }
+ if (!next_tok)
+ break;
+ tok = next_tok;
+ name += strlen(tok);
+ }
+
+ res = true;
+out:
+ free(str);
+ return res;
+}
+
+static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe,
+ const struct pmu_events_table *table __maybe_unused,
+ void *vdata)
+{
+ struct perf_pmu *pmu = vdata;
+
+ perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, pe);
+ return 0;
+}
+
+/*
+ * From the pmu_events_table, find the events that correspond to the given
+ * PMU and add them to the list 'head'.
+ */
+void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table)
+{
+ pmu_events_table__for_each_event(table, pmu, pmu_add_cpu_aliases_map_callback, pmu);
+}
+
+static void pmu_add_cpu_aliases(struct perf_pmu *pmu)
+{
+ if (!pmu->events_table)
+ return;
+
+ if (pmu->cpu_aliases_added)
+ return;
+
+ pmu_add_cpu_aliases_table(pmu, pmu->events_table);
+ pmu->cpu_aliases_added = true;
+}
+
+static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
+ const struct pmu_events_table *table __maybe_unused,
+ void *vdata)
+{
+ struct perf_pmu *pmu = vdata;
+
+ if (!pe->compat || !pe->pmu)
+ return 0;
+
+ if (!strcmp(pmu->id, pe->compat) &&
+ pmu_uncore_alias_match(pe->pmu, pmu->name)) {
+ perf_pmu__new_alias(pmu,
+ pe->name,
+ pe->desc,
+ pe->event,
+ /*val_fd=*/ NULL,
+ pe);
+ }
+
+ return 0;
+}
+
+void pmu_add_sys_aliases(struct perf_pmu *pmu)
+{
+ if (!pmu->id)
+ return;
+
+ pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, pmu);
+}
+
+struct perf_event_attr * __weak
+perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+ return NULL;
+}
+
+const char * __weak
+pmu_find_real_name(const char *name)
+{
+ return name;
+}
+
+const char * __weak
+pmu_find_alias_name(const char *name __maybe_unused)
+{
+ return NULL;
+}
+
+static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
+{
+ int max_precise = -1;
+
+ perf_pmu__scan_file_at(pmu, dirfd, "caps/max_precise", "%d", &max_precise);
+ return max_precise;
+}
+
+struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name)
+{
+ struct perf_pmu *pmu;
+ __u32 type;
+ const char *name = pmu_find_real_name(lookup_name);
+ const char *alias_name;
+
+ pmu = zalloc(sizeof(*pmu));
+ if (!pmu)
+ return NULL;
+
+ pmu->name = strdup(name);
+ if (!pmu->name)
+ goto err;
+
+ /*
+ * Read type early to fail fast if a lookup name isn't a PMU. Ensure
+ * that type value is successfully assigned (return 1).
+ */
+ if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &type) != 1)
+ goto err;
+
+ INIT_LIST_HEAD(&pmu->format);
+ INIT_LIST_HEAD(&pmu->aliases);
+ INIT_LIST_HEAD(&pmu->caps);
+
+ /*
+ * The pmu data we store & need consists of the pmu
+ * type value and format definitions. Load both right
+ * now.
+ */
+ if (pmu_format(pmu, dirfd, name)) {
+ free(pmu);
+ return NULL;
+ }
+ pmu->is_core = is_pmu_core(name);
+ pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
+
+ alias_name = pmu_find_alias_name(name);
+ if (alias_name) {
+ pmu->alias_name = strdup(alias_name);
+ if (!pmu->alias_name)
+ goto err;
+ }
+
+ pmu->type = type;
+ pmu->is_uncore = pmu_is_uncore(dirfd, name);
+ if (pmu->is_uncore)
+ pmu->id = pmu_id(name);
+ pmu->max_precise = pmu_max_precise(dirfd, pmu);
+ pmu->events_table = perf_pmu__find_events_table(pmu);
+ pmu_add_sys_aliases(pmu);
+ list_add_tail(&pmu->list, pmus);
+
+ pmu->default_config = perf_pmu__get_default_config(pmu);
+
+ return pmu;
+err:
+ zfree(&pmu->name);
+ free(pmu);
+ return NULL;
+}
+
+/* Creates the PMU when sysfs scanning fails. */
+struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus)
+{
+ struct perf_pmu *pmu = zalloc(sizeof(*pmu));
+
+ if (!pmu)
+ return NULL;
+
+ pmu->name = strdup("cpu");
+ if (!pmu->name) {
+ free(pmu);
+ return NULL;
+ }
+
+ pmu->is_core = true;
+ pmu->type = PERF_TYPE_RAW;
+ pmu->cpus = cpu_map__online();
+
+ INIT_LIST_HEAD(&pmu->format);
+ INIT_LIST_HEAD(&pmu->aliases);
+ INIT_LIST_HEAD(&pmu->caps);
+ list_add_tail(&pmu->list, core_pmus);
+ return pmu;
+}
+
+void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
+{
+ struct perf_pmu_format *format;
+
+ if (pmu->formats_checked)
+ return;
+
+ pmu->formats_checked = true;
+
+ /* fake pmu doesn't have format list */
+ if (pmu == &perf_pmu__fake)
+ return;
+
+ list_for_each_entry(format, &pmu->format, list) {
+ perf_pmu_format__load(pmu, format);
+ if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) {
+ pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'"
+ "which is not supported by this version of perf!\n",
+ pmu->name, format->name, format->value);
+ return;
+ }
+ }
+}
+
+bool evsel__is_aux_event(const struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ return pmu && pmu->auxtrace;
+}
+
+/*
+ * Set @config_name to @val as long as the user hasn't already set or cleared it
+ * by passing a config term on the command line.
+ *
+ * @val is the value to put into the bits specified by @config_name rather than
+ * the bit pattern. It is shifted into position by this function, so to set
+ * something to true, pass 1 for val rather than a pre shifted value.
+ */
+#define field_prep(_mask, _val) (((_val) << (ffsll(_mask) - 1)) & (_mask))
+void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
+ const char *config_name, u64 val)
+{
+ u64 user_bits = 0, bits;
+ struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
+
+ if (term)
+ user_bits = term->val.cfg_chg;
+
+ bits = perf_pmu__format_bits(pmu, config_name);
+
+ /* Do nothing if the user changed the value */
+ if (bits & user_bits)
+ return;
+
+ /* Otherwise replace it */
+ evsel->core.attr.config &= ~bits;
+ evsel->core.attr.config |= field_prep(bits, val);
+}
+
+static struct perf_pmu_format *
+pmu_find_format(struct list_head *formats, const char *name)
+{
+ struct perf_pmu_format *format;
+
+ list_for_each_entry(format, formats, list)
+ if (!strcmp(format->name, name))
+ return format;
+
+ return NULL;
+}
+
+__u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name)
+{
+ struct perf_pmu_format *format = pmu_find_format(&pmu->format, name);
+ __u64 bits = 0;
+ int fbit;
+
+ if (!format)
+ return 0;
+
+ for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
+ bits |= 1ULL << fbit;
+
+ return bits;
+}
+
+int perf_pmu__format_type(struct perf_pmu *pmu, const char *name)
+{
+ struct perf_pmu_format *format = pmu_find_format(&pmu->format, name);
+
+ if (!format)
+ return -1;
+
+ perf_pmu_format__load(pmu, format);
+ return format->value;
+}
+
+/*
+ * Sets value based on the format definition (format parameter)
+ * and unformatted value (value parameter).
+ */
+static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
+ bool zero)
+{
+ unsigned long fbit, vbit;
+
+ for (fbit = 0, vbit = 0; fbit < PERF_PMU_FORMAT_BITS; fbit++) {
+
+ if (!test_bit(fbit, format))
+ continue;
+
+ if (value & (1llu << vbit++))
+ *v |= (1llu << fbit);
+ else if (zero)
+ *v &= ~(1llu << fbit);
+ }
+}
+
+static __u64 pmu_format_max_value(const unsigned long *format)
+{
+ int w;
+
+ w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+ if (!w)
+ return 0;
+ if (w < 64)
+ return (1ULL << w) - 1;
+ return -1;
+}
+
+/*
+ * Term is a string term, and might be a param-term. Try to look up it's value
+ * in the remaining terms.
+ * - We have a term like "base-or-format-term=param-term",
+ * - We need to find the value supplied for "param-term" (with param-term named
+ * in a config string) later on in the term list.
+ */
+static int pmu_resolve_param_term(struct parse_events_term *term,
+ struct list_head *head_terms,
+ __u64 *value)
+{
+ struct parse_events_term *t;
+
+ list_for_each_entry(t, head_terms, list) {
+ if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM &&
+ t->config && !strcmp(t->config, term->config)) {
+ t->used = true;
+ *value = t->val.num;
+ return 0;
+ }
+ }
+
+ if (verbose > 0)
+ printf("Required parameter '%s' not specified\n", term->config);
+
+ return -1;
+}
+
+static char *pmu_formats_string(struct list_head *formats)
+{
+ struct perf_pmu_format *format;
+ char *str = NULL;
+ struct strbuf buf = STRBUF_INIT;
+ unsigned int i = 0;
+
+ if (!formats)
+ return NULL;
+
+ /* sysfs exported terms */
+ list_for_each_entry(format, formats, list)
+ if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0)
+ goto error;
+
+ str = strbuf_detach(&buf, NULL);
+error:
+ strbuf_release(&buf);
+
+ return str;
+}
+
+/*
+ * Setup one of config[12] attr members based on the
+ * user input data - term parameter.
+ */
+static int pmu_config_term(struct perf_pmu *pmu,
+ struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct list_head *head_terms,
+ bool zero, struct parse_events_error *err)
+{
+ struct perf_pmu_format *format;
+ __u64 *vp;
+ __u64 val, max_val;
+
+ /*
+ * If this is a parameter we've already used for parameterized-eval,
+ * skip it in normal eval.
+ */
+ if (term->used)
+ return 0;
+
+ /*
+ * Hardcoded terms should be already in, so nothing
+ * to be done for them.
+ */
+ if (parse_events__is_hardcoded_term(term))
+ return 0;
+
+ format = pmu_find_format(&pmu->format, term->config);
+ if (!format) {
+ char *pmu_term = pmu_formats_string(&pmu->format);
+ char *unknown_term;
+ char *help_msg;
+
+ if (asprintf(&unknown_term,
+ "unknown term '%s' for pmu '%s'",
+ term->config, pmu->name) < 0)
+ unknown_term = NULL;
+ help_msg = parse_events_formats_error_string(pmu_term);
+ if (err) {
+ parse_events_error__handle(err, term->err_term,
+ unknown_term,
+ help_msg);
+ } else {
+ pr_debug("%s (%s)\n", unknown_term, help_msg);
+ free(unknown_term);
+ }
+ free(pmu_term);
+ return -EINVAL;
+ }
+ perf_pmu_format__load(pmu, format);
+ switch (format->value) {
+ case PERF_PMU_FORMAT_VALUE_CONFIG:
+ vp = &attr->config;
+ break;
+ case PERF_PMU_FORMAT_VALUE_CONFIG1:
+ vp = &attr->config1;
+ break;
+ case PERF_PMU_FORMAT_VALUE_CONFIG2:
+ vp = &attr->config2;
+ break;
+ case PERF_PMU_FORMAT_VALUE_CONFIG3:
+ vp = &attr->config3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * Either directly use a numeric term, or try to translate string terms
+ * using event parameters.
+ */
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ if (term->no_value &&
+ bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) {
+ if (err) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("no value assigned for term"),
+ NULL);
+ }
+ return -EINVAL;
+ }
+
+ val = term->val.num;
+ } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+ if (strcmp(term->val.str, "?")) {
+ if (verbose > 0) {
+ pr_info("Invalid sysfs entry %s=%s\n",
+ term->config, term->val.str);
+ }
+ if (err) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("expected numeric value"),
+ NULL);
+ }
+ return -EINVAL;
+ }
+
+ if (pmu_resolve_param_term(term, head_terms, &val))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ max_val = pmu_format_max_value(format->bits);
+ if (val > max_val) {
+ if (err) {
+ char *err_str;
+
+ parse_events_error__handle(err, term->err_val,
+ asprintf(&err_str,
+ "value too big for format, maximum is %llu",
+ (unsigned long long)max_val) < 0
+ ? strdup("value too big for format")
+ : err_str,
+ NULL);
+ return -EINVAL;
+ }
+ /*
+ * Assume we don't care if !err, in which case the value will be
+ * silently truncated.
+ */
+ }
+
+ pmu_format_value(format->bits, val, vp, zero);
+ return 0;
+}
+
+int perf_pmu__config_terms(struct perf_pmu *pmu,
+ struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ bool zero, struct parse_events_error *err)
+{
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_terms, list) {
+ if (pmu_config_term(pmu, attr, term, head_terms, zero, err))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Configures event's 'attr' parameter based on the:
+ * 1) users input - specified in terms parameter
+ * 2) pmu format definitions - specified by pmu parameter
+ */
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ struct parse_events_error *err)
+{
+ bool zero = !!pmu->default_config;
+
+ return perf_pmu__config_terms(pmu, attr, head_terms, zero, err);
+}
+
+static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
+ struct parse_events_term *term)
+{
+ struct perf_pmu_alias *alias;
+ const char *name;
+
+ if (parse_events__is_hardcoded_term(term))
+ return NULL;
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ if (!term->no_value)
+ return NULL;
+ if (pmu_find_format(&pmu->format, term->config))
+ return NULL;
+ name = term->config;
+
+ } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+ if (strcasecmp(term->config, "event"))
+ return NULL;
+ name = term->val.str;
+ } else {
+ return NULL;
+ }
+
+ alias = perf_pmu__find_alias(pmu, name, /*load=*/ true);
+ if (alias || pmu->cpu_aliases_added)
+ return alias;
+
+ /* Alias doesn't exist, try to get it from the json events. */
+ if (pmu->events_table &&
+ pmu_events_table__find_event(pmu->events_table, pmu, name,
+ pmu_add_cpu_aliases_map_callback,
+ pmu) == 0) {
+ alias = perf_pmu__find_alias(pmu, name, /*load=*/ false);
+ }
+ return alias;
+}
+
+
+static int check_info_data(struct perf_pmu *pmu,
+ struct perf_pmu_alias *alias,
+ struct perf_pmu_info *info,
+ struct parse_events_error *err,
+ int column)
+{
+ read_alias_info(pmu, alias);
+ /*
+ * Only one term in event definition can
+ * define unit, scale and snapshot, fail
+ * if there's more than one.
+ */
+ if (info->unit && alias->unit[0]) {
+ parse_events_error__handle(err, column,
+ strdup("Attempt to set event's unit twice"),
+ NULL);
+ return -EINVAL;
+ }
+ if (info->scale && alias->scale) {
+ parse_events_error__handle(err, column,
+ strdup("Attempt to set event's scale twice"),
+ NULL);
+ return -EINVAL;
+ }
+ if (info->snapshot && alias->snapshot) {
+ parse_events_error__handle(err, column,
+ strdup("Attempt to set event snapshot twice"),
+ NULL);
+ return -EINVAL;
+ }
+
+ if (alias->unit[0])
+ info->unit = alias->unit;
+
+ if (alias->scale)
+ info->scale = alias->scale;
+
+ if (alias->snapshot)
+ info->snapshot = alias->snapshot;
+
+ return 0;
+}
+
+/*
+ * Find alias in the terms list and replace it with the terms
+ * defined for the alias
+ */
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+ struct perf_pmu_info *info, struct parse_events_error *err)
+{
+ struct parse_events_term *term, *h;
+ struct perf_pmu_alias *alias;
+ int ret;
+
+ info->per_pkg = false;
+
+ /*
+ * Mark unit and scale as not set
+ * (different from default values, see below)
+ */
+ info->unit = NULL;
+ info->scale = 0.0;
+ info->snapshot = false;
+
+ list_for_each_entry_safe(term, h, head_terms, list) {
+ alias = pmu_find_alias(pmu, term);
+ if (!alias)
+ continue;
+ ret = pmu_alias_terms(alias, &term->list);
+ if (ret) {
+ parse_events_error__handle(err, term->err_term,
+ strdup("Failure to duplicate terms"),
+ NULL);
+ return ret;
+ }
+
+ ret = check_info_data(pmu, alias, info, err, term->err_term);
+ if (ret)
+ return ret;
+
+ if (alias->per_pkg)
+ info->per_pkg = true;
+
+ list_del_init(&term->list);
+ parse_events_term__delete(term);
+ }
+
+ /*
+ * if no unit or scale found in aliases, then
+ * set defaults as for evsel
+ * unit cannot left to NULL
+ */
+ if (info->unit == NULL)
+ info->unit = "";
+
+ if (info->scale == 0.0)
+ info->scale = 1.0;
+
+ return 0;
+}
+
+struct find_event_args {
+ const char *event;
+ void *state;
+ pmu_event_callback cb;
+};
+
+static int find_event_callback(void *state, struct pmu_event_info *info)
+{
+ struct find_event_args *args = state;
+
+ if (!strcmp(args->event, info->name))
+ return args->cb(args->state, info);
+
+ return 0;
+}
+
+int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb)
+{
+ struct find_event_args args = {
+ .event = event,
+ .state = state,
+ .cb = cb,
+ };
+
+ /* Sub-optimal, but function is only used by tests. */
+ return perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ false,
+ &args, find_event_callback);
+}
+
+static void perf_pmu__del_formats(struct list_head *formats)
+{
+ struct perf_pmu_format *fmt, *tmp;
+
+ list_for_each_entry_safe(fmt, tmp, formats, list) {
+ list_del(&fmt->list);
+ zfree(&fmt->name);
+ free(fmt);
+ }
+}
+
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name)
+{
+ struct perf_pmu_format *format;
+
+ list_for_each_entry(format, &pmu->format, list) {
+ if (!strcmp(format->name, name))
+ return true;
+ }
+ return false;
+}
+
+bool is_pmu_core(const char *name)
+{
+ return !strcmp(name, "cpu") || !strcmp(name, "cpum_cf") || is_sysfs_pmu_core(name);
+}
+
+bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu)
+{
+ return pmu->is_core;
+}
+
+bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu)
+{
+ return !pmu->is_core || perf_pmus__num_core_pmus() == 1;
+}
+
+bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name)
+{
+ if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL)
+ return true;
+ if (pmu->cpu_aliases_added || !pmu->events_table)
+ return false;
+ return pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0;
+}
+
+size_t perf_pmu__num_events(struct perf_pmu *pmu)
+{
+ size_t nr;
+
+ if (!pmu->sysfs_aliases_loaded)
+ pmu_aliases_parse(pmu);
+
+ nr = pmu->sysfs_aliases;
+
+ if (pmu->cpu_aliases_added)
+ nr += pmu->loaded_json_aliases;
+ else if (pmu->events_table)
+ nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->loaded_json_aliases;
+
+ return pmu->selectable ? nr + 1 : nr;
+}
+
+static int sub_non_neg(int a, int b)
+{
+ if (b > a)
+ return 0;
+ return a - b;
+}
+
+static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
+ const struct perf_pmu_alias *alias, bool skip_duplicate_pmus)
+{
+ struct parse_events_term *term;
+ int pmu_name_len = skip_duplicate_pmus
+ ? pmu_name_len_no_suffix(pmu->name, /*num=*/NULL)
+ : (int)strlen(pmu->name);
+ int used = snprintf(buf, len, "%.*s/%s", pmu_name_len, pmu->name, alias->name);
+
+ list_for_each_entry(term, &alias->terms, list) {
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+ used += snprintf(buf + used, sub_non_neg(len, used),
+ ",%s=%s", term->config,
+ term->val.str);
+ }
+
+ if (sub_non_neg(len, used) > 0) {
+ buf[used] = '/';
+ used++;
+ }
+ if (sub_non_neg(len, used) > 0) {
+ buf[used] = '\0';
+ used++;
+ } else
+ buf[len - 1] = '\0';
+
+ return buf;
+}
+
+int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
+ void *state, pmu_event_callback cb)
+{
+ char buf[1024];
+ struct perf_pmu_alias *event;
+ struct pmu_event_info info = {
+ .pmu = pmu,
+ };
+ int ret = 0;
+ struct strbuf sb;
+
+ strbuf_init(&sb, /*hint=*/ 0);
+ pmu_add_cpu_aliases(pmu);
+ list_for_each_entry(event, &pmu->aliases, list) {
+ size_t buf_used;
+
+ info.pmu_name = event->pmu_name ?: pmu->name;
+ info.alias = NULL;
+ if (event->desc) {
+ info.name = event->name;
+ buf_used = 0;
+ } else {
+ info.name = format_alias(buf, sizeof(buf), pmu, event,
+ skip_duplicate_pmus);
+ if (pmu->is_core) {
+ info.alias = info.name;
+ info.name = event->name;
+ }
+ buf_used = strlen(buf) + 1;
+ }
+ info.scale_unit = NULL;
+ if (strlen(event->unit) || event->scale != 1.0) {
+ info.scale_unit = buf + buf_used;
+ buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+ "%G%s", event->scale, event->unit) + 1;
+ }
+ info.desc = event->desc;
+ info.long_desc = event->long_desc;
+ info.encoding_desc = buf + buf_used;
+ parse_events_term__to_strbuf(&event->terms, &sb);
+ buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+ "%s/%s/", info.pmu_name, sb.buf) + 1;
+ info.topic = event->topic;
+ info.str = sb.buf;
+ info.deprecated = event->deprecated;
+ ret = cb(state, &info);
+ if (ret)
+ goto out;
+ strbuf_setlen(&sb, /*len=*/ 0);
+ }
+ if (pmu->selectable) {
+ info.name = buf;
+ snprintf(buf, sizeof(buf), "%s//", pmu->name);
+ info.alias = NULL;
+ info.scale_unit = NULL;
+ info.desc = NULL;
+ info.long_desc = NULL;
+ info.encoding_desc = NULL;
+ info.topic = NULL;
+ info.pmu_name = pmu->name;
+ info.deprecated = false;
+ ret = cb(state, &info);
+ }
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name)
+{
+ return !strcmp(pmu->name, pmu_name) ||
+ (pmu->is_uncore && pmu_uncore_alias_match(pmu_name, pmu->name)) ||
+ /*
+ * jevents and tests use default_core as a marker for any core
+ * PMU as the PMU name varies across architectures.
+ */
+ (pmu->is_core && !strcmp(pmu_name, "default_core"));
+}
+
+bool perf_pmu__is_software(const struct perf_pmu *pmu)
+{
+ if (pmu->is_core || pmu->is_uncore || pmu->auxtrace)
+ return false;
+ switch (pmu->type) {
+ case PERF_TYPE_HARDWARE: return false;
+ case PERF_TYPE_SOFTWARE: return true;
+ case PERF_TYPE_TRACEPOINT: return true;
+ case PERF_TYPE_HW_CACHE: return false;
+ case PERF_TYPE_RAW: return false;
+ case PERF_TYPE_BREAKPOINT: return true;
+ default: break;
+ }
+ return !strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe");
+}
+
+FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
+{
+ char path[PATH_MAX];
+
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name) ||
+ !file_available(path))
+ return NULL;
+
+ return fopen(path, "r");
+}
+
+FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name)
+{
+ int fd;
+
+ fd = perf_pmu__pathname_fd(dirfd, pmu->name, name, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ return fdopen(fd, "r");
+}
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
+ ...)
+{
+ va_list args;
+ FILE *file;
+ int ret = EOF;
+
+ va_start(args, fmt);
+ file = perf_pmu__open_file(pmu, name);
+ if (file) {
+ ret = vfscanf(file, fmt, args);
+ fclose(file);
+ }
+ va_end(args);
+ return ret;
+}
+
+int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
+ const char *fmt, ...)
+{
+ va_list args;
+ FILE *file;
+ int ret = EOF;
+
+ va_start(args, fmt);
+ file = perf_pmu__open_file_at(pmu, dirfd, name);
+ if (file) {
+ ret = vfscanf(file, fmt, args);
+ fclose(file);
+ }
+ va_end(args);
+ return ret;
+}
+
+bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name)
+{
+ char path[PATH_MAX];
+
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name))
+ return false;
+
+ return file_available(path);
+}
+
+static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
+{
+ struct perf_pmu_caps *caps = zalloc(sizeof(*caps));
+
+ if (!caps)
+ return -ENOMEM;
+
+ caps->name = strdup(name);
+ if (!caps->name)
+ goto free_caps;
+ caps->value = strndup(value, strlen(value) - 1);
+ if (!caps->value)
+ goto free_name;
+ list_add_tail(&caps->list, list);
+ return 0;
+
+free_name:
+ zfree(&caps->name);
+free_caps:
+ free(caps);
+
+ return -ENOMEM;
+}
+
+static void perf_pmu__del_caps(struct perf_pmu *pmu)
+{
+ struct perf_pmu_caps *caps, *tmp;
+
+ list_for_each_entry_safe(caps, tmp, &pmu->caps, list) {
+ list_del(&caps->list);
+ zfree(&caps->name);
+ zfree(&caps->value);
+ free(caps);
+ }
+}
+
+/*
+ * Reading/parsing the given pmu capabilities, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
+ * Return the number of capabilities
+ */
+int perf_pmu__caps_parse(struct perf_pmu *pmu)
+{
+ struct stat st;
+ char caps_path[PATH_MAX];
+ DIR *caps_dir;
+ struct dirent *evt_ent;
+ int caps_fd;
+
+ if (pmu->caps_initialized)
+ return pmu->nr_caps;
+
+ pmu->nr_caps = 0;
+
+ if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps"))
+ return -1;
+
+ if (stat(caps_path, &st) < 0) {
+ pmu->caps_initialized = true;
+ return 0; /* no error if caps does not exist */
+ }
+
+ caps_dir = opendir(caps_path);
+ if (!caps_dir)
+ return -EINVAL;
+
+ caps_fd = dirfd(caps_dir);
+
+ while ((evt_ent = readdir(caps_dir)) != NULL) {
+ char *name = evt_ent->d_name;
+ char value[128];
+ FILE *file;
+ int fd;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ fd = openat(caps_fd, name, O_RDONLY);
+ if (fd == -1)
+ continue;
+ file = fdopen(fd, "r");
+ if (!file) {
+ close(fd);
+ continue;
+ }
+
+ if (!fgets(value, sizeof(value), file) ||
+ (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) {
+ fclose(file);
+ continue;
+ }
+
+ pmu->nr_caps++;
+ fclose(file);
+ }
+
+ closedir(caps_dir);
+
+ pmu->caps_initialized = true;
+ return pmu->nr_caps;
+}
+
+static void perf_pmu__compute_config_masks(struct perf_pmu *pmu)
+{
+ struct perf_pmu_format *format;
+
+ if (pmu->config_masks_computed)
+ return;
+
+ list_for_each_entry(format, &pmu->format, list) {
+ unsigned int i;
+ __u64 *mask;
+
+ if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END)
+ continue;
+
+ pmu->config_masks_present = true;
+ mask = &pmu->config_masks[format->value];
+
+ for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS)
+ *mask |= 1ULL << i;
+ }
+ pmu->config_masks_computed = true;
+}
+
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+ const char *name, int config_num,
+ const char *config_name)
+{
+ __u64 bits;
+ char buf[100];
+
+ perf_pmu__compute_config_masks(pmu);
+
+ /*
+ * Kernel doesn't export any valid format bits.
+ */
+ if (!pmu->config_masks_present)
+ return;
+
+ bits = config & ~pmu->config_masks[config_num];
+ if (bits == 0)
+ return;
+
+ bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf));
+
+ pr_warning("WARNING: event '%s' not valid (bits %s of %s "
+ "'%llx' not supported by kernel)!\n",
+ name ?: "N/A", buf, config_name, config);
+}
+
+int perf_pmu__match(const char *pattern, const char *name, const char *tok)
+{
+ if (!name)
+ return -1;
+
+ if (fnmatch(pattern, name, 0))
+ return -1;
+
+ if (tok && !perf_pmu__match_ignoring_suffix(name, tok))
+ return -1;
+
+ return 0;
+}
+
+double __weak perf_pmu__cpu_slots_per_cycle(void)
+{
+ return NAN;
+}
+
+int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size)
+{
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return 0;
+ return scnprintf(pathname, size, "%s/bus/event_source/devices/", sysfs);
+}
+
+int perf_pmu__event_source_devices_fd(void)
+{
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ scnprintf(path, sizeof(path), "%s/bus/event_source/devices/", sysfs);
+ return open(path, O_DIRECTORY);
+}
+
+/*
+ * Fill 'buf' with the path to a file or folder in 'pmu_name' in
+ * sysfs. For example if pmu_name = "cs_etm" and 'filename' = "format"
+ * then pathname will be filled with
+ * "/sys/bus/event_source/devices/cs_etm/format"
+ *
+ * Return 0 if the sysfs mountpoint couldn't be found, if no characters were
+ * written or if the buffer size is exceeded.
+ */
+int perf_pmu__pathname_scnprintf(char *buf, size_t size,
+ const char *pmu_name, const char *filename)
+{
+ size_t len;
+
+ len = perf_pmu__event_source_devices_scnprintf(buf, size);
+ if (!len || (len + strlen(pmu_name) + strlen(filename) + 1) >= size)
+ return 0;
+
+ return scnprintf(buf + len, size - len, "%s/%s", pmu_name, filename);
+}
+
+int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags)
+{
+ char path[PATH_MAX];
+
+ scnprintf(path, sizeof(path), "%s/%s", pmu_name, filename);
+ return openat(dirfd, path, flags);
+}
+
+void perf_pmu__delete(struct perf_pmu *pmu)
+{
+ perf_pmu__del_formats(&pmu->format);
+ perf_pmu__del_aliases(pmu);
+ perf_pmu__del_caps(pmu);
+
+ perf_cpu_map__put(pmu->cpus);
+
+ zfree(&pmu->default_config);
+ zfree(&pmu->name);
+ zfree(&pmu->alias_name);
+ zfree(&pmu->id);
+ free(pmu);
+}
+
+struct perf_pmu *pmu__find_core_pmu(void)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan_core(pmu))) {
+ /*
+ * The cpumap should cover all CPUs. Otherwise, some CPUs may
+ * not support some events or have different event IDs.
+ */
+ if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu)
+ return NULL;
+
+ return pmu;
+ }
+ return NULL;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
new file mode 100644
index 000000000..6a4e170c6
--- /dev/null
+++ b/tools/perf/util/pmu.h
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMU_H
+#define __PMU_H
+
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <linux/list.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include "parse-events.h"
+#include "pmu-events/pmu-events.h"
+
+struct evsel_config_term;
+struct perf_cpu_map;
+struct print_callbacks;
+
+enum {
+ PERF_PMU_FORMAT_VALUE_CONFIG,
+ PERF_PMU_FORMAT_VALUE_CONFIG1,
+ PERF_PMU_FORMAT_VALUE_CONFIG2,
+ PERF_PMU_FORMAT_VALUE_CONFIG3,
+ PERF_PMU_FORMAT_VALUE_CONFIG_END,
+};
+
+#define PERF_PMU_FORMAT_BITS 64
+#define MAX_PMU_NAME_LEN 128
+
+struct perf_event_attr;
+
+struct perf_pmu_caps {
+ char *name;
+ char *value;
+ struct list_head list;
+};
+
+/**
+ * struct perf_pmu
+ */
+struct perf_pmu {
+ /** @name: The name of the PMU such as "cpu". */
+ const char *name;
+ /**
+ * @alias_name: Optional alternate name for the PMU determined in
+ * architecture specific code.
+ */
+ char *alias_name;
+ /**
+ * @id: Optional PMU identifier read from
+ * <sysfs>/bus/event_source/devices/<name>/identifier.
+ */
+ const char *id;
+ /**
+ * @type: Perf event attributed type value, read from
+ * <sysfs>/bus/event_source/devices/<name>/type.
+ */
+ __u32 type;
+ /**
+ * @selectable: Can the PMU name be selected as if it were an event?
+ */
+ bool selectable;
+ /**
+ * @is_core: Is the PMU the core CPU PMU? Determined by the name being
+ * "cpu" or by the presence of
+ * <sysfs>/bus/event_source/devices/<name>/cpus. There may be >1 core
+ * PMU on systems like Intel hybrid.
+ */
+ bool is_core;
+ /**
+ * @is_uncore: Is the PMU not within the CPU core? Determined by the
+ * presence of <sysfs>/bus/event_source/devices/<name>/cpumask.
+ */
+ bool is_uncore;
+ /**
+ * @auxtrace: Are events auxiliary events? Determined in architecture
+ * specific code.
+ */
+ bool auxtrace;
+ /**
+ * @formats_checked: Only check PMU's formats are valid for
+ * perf_event_attr once.
+ */
+ bool formats_checked;
+ /** @config_masks_present: Are there config format values? */
+ bool config_masks_present;
+ /** @config_masks_computed: Set when masks are lazily computed. */
+ bool config_masks_computed;
+ /**
+ * @max_precise: Number of levels of :ppp precision supported by the
+ * PMU, read from
+ * <sysfs>/bus/event_source/devices/<name>/caps/max_precise.
+ */
+ int max_precise;
+ /**
+ * @default_config: Optional default perf_event_attr determined in
+ * architecture specific code.
+ */
+ struct perf_event_attr *default_config;
+ /**
+ * @cpus: Empty or the contents of either of:
+ * <sysfs>/bus/event_source/devices/<name>/cpumask.
+ * <sysfs>/bus/event_source/devices/<cpu>/cpus.
+ */
+ struct perf_cpu_map *cpus;
+ /**
+ * @format: Holds the contents of files read from
+ * <sysfs>/bus/event_source/devices/<name>/format/. The contents specify
+ * which event parameter changes what config, config1 or config2 bits.
+ */
+ struct list_head format;
+ /**
+ * @aliases: List of struct perf_pmu_alias. Each alias corresponds to an
+ * event read from <sysfs>/bus/event_source/devices/<name>/events/ or
+ * from json events in pmu-events.c.
+ */
+ struct list_head aliases;
+ /**
+ * @events_table: The events table for json events in pmu-events.c.
+ */
+ const struct pmu_events_table *events_table;
+ /** @sysfs_aliases: Number of sysfs aliases loaded. */
+ uint32_t sysfs_aliases;
+ /** @sysfs_aliases: Number of json event aliases loaded. */
+ uint32_t loaded_json_aliases;
+ /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */
+ bool sysfs_aliases_loaded;
+ /**
+ * @cpu_aliases_added: Have all json events table entries for the PMU
+ * been added?
+ */
+ bool cpu_aliases_added;
+ /** @caps_initialized: Has the list caps been initialized? */
+ bool caps_initialized;
+ /** @nr_caps: The length of the list caps. */
+ u32 nr_caps;
+ /**
+ * @caps: Holds the contents of files read from
+ * <sysfs>/bus/event_source/devices/<name>/caps/.
+ *
+ * The contents are pairs of the filename with the value of its
+ * contents, for example, max_precise (see above) may have a value of 3.
+ */
+ struct list_head caps;
+ /** @list: Element on pmus list in pmu.c. */
+ struct list_head list;
+
+ /**
+ * @config_masks: Derived from the PMU's format data, bits that are
+ * valid within the config value.
+ */
+ __u64 config_masks[PERF_PMU_FORMAT_VALUE_CONFIG_END];
+
+ /**
+ * @missing_features: Features to inhibit when events on this PMU are
+ * opened.
+ */
+ struct {
+ /**
+ * @exclude_guest: Disables perf_event_attr exclude_guest and
+ * exclude_host.
+ */
+ bool exclude_guest;
+ } missing_features;
+};
+
+/** @perf_pmu__fake: A special global PMU used for testing. */
+extern struct perf_pmu perf_pmu__fake;
+
+struct perf_pmu_info {
+ const char *unit;
+ double scale;
+ bool per_pkg;
+ bool snapshot;
+};
+
+struct pmu_event_info {
+ const struct perf_pmu *pmu;
+ const char *name;
+ const char* alias;
+ const char *scale_unit;
+ const char *desc;
+ const char *long_desc;
+ const char *encoding_desc;
+ const char *topic;
+ const char *pmu_name;
+ const char *str;
+ bool deprecated;
+};
+
+typedef int (*pmu_event_callback)(void *state, struct pmu_event_info *info);
+
+void pmu_add_sys_aliases(struct perf_pmu *pmu);
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ struct parse_events_error *error);
+int perf_pmu__config_terms(struct perf_pmu *pmu,
+ struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ bool zero, struct parse_events_error *error);
+__u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name);
+int perf_pmu__format_type(struct perf_pmu *pmu, const char *name);
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+ struct perf_pmu_info *info, struct parse_events_error *err);
+int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb);
+
+int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load);
+void perf_pmu_format__set_value(void *format, int config, unsigned long *bits);
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name);
+
+bool is_pmu_core(const char *name);
+bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu);
+bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu);
+bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name);
+size_t perf_pmu__num_events(struct perf_pmu *pmu);
+int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
+ void *state, pmu_event_callback cb);
+bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name);
+
+/**
+ * perf_pmu_is_software - is the PMU a software PMU as in it uses the
+ * perf_sw_context in the kernel?
+ */
+bool perf_pmu__is_software(const struct perf_pmu *pmu);
+
+FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name);
+FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name);
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
+int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
+ const char *fmt, ...) __scanf(4, 5);
+
+bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
+
+int perf_pmu__test(void);
+
+struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
+ const struct pmu_events_table *table);
+
+char *perf_pmu__getcpuid(struct perf_pmu *pmu);
+const struct pmu_events_table *pmu_events_table__find(void);
+const struct pmu_metrics_table *pmu_metrics_table__find(void);
+
+int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+
+int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+ const char *name, int config_num,
+ const char *config_name);
+void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
+
+int perf_pmu__match(const char *pattern, const char *name, const char *tok);
+
+const char *pmu_find_real_name(const char *name);
+const char *pmu_find_alias_name(const char *name);
+double perf_pmu__cpu_slots_per_cycle(void);
+int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
+int perf_pmu__pathname_scnprintf(char *buf, size_t size,
+ const char *pmu_name, const char *filename);
+int perf_pmu__event_source_devices_fd(void);
+int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags);
+
+struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name);
+struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus);
+void perf_pmu__delete(struct perf_pmu *pmu);
+struct perf_pmu *pmu__find_core_pmu(void);
+
+#endif /* __PMU_H */
diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l
new file mode 100644
index 000000000..67b247be6
--- /dev/null
+++ b/tools/perf/util/pmu.l
@@ -0,0 +1,48 @@
+%option prefix="perf_pmu_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <stdlib.h>
+#include <linux/bitops.h>
+#include "pmu.h"
+#include "pmu-bison.h"
+
+char *perf_pmu_get_text(yyscan_t yyscanner);
+YYSTYPE *perf_pmu_get_lval(yyscan_t yyscanner);
+
+static int value(yyscan_t scanner, int base)
+{
+ YYSTYPE *yylval = perf_pmu_get_lval(scanner);
+ char *text = perf_pmu_get_text(scanner);
+ long num;
+
+ errno = 0;
+ num = strtoul(text, NULL, base);
+ if (errno)
+ return PP_ERROR;
+
+ yylval->num = num;
+ return PP_VALUE;
+}
+
+%}
+
+num_dec [0-9]+
+
+%%
+
+{num_dec} { return value(yyscanner, 10); }
+config { return PP_CONFIG; }
+- { return '-'; }
+: { return ':'; }
+, { return ','; }
+. { ; }
+\n { ; }
+
+%%
+
+int perf_pmu_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
new file mode 100644
index 000000000..600c8c158
--- /dev/null
+++ b/tools/perf/util/pmu.y
@@ -0,0 +1,97 @@
+%define api.pure full
+%parse-param {void *format}
+%parse-param {void *scanner}
+%lex-param {void* scanner}
+
+%{
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+#include <string.h>
+#include "pmu.h"
+#include "pmu-bison.h"
+
+int perf_pmu_lex(YYSTYPE * yylval_param , void *yyscanner);
+
+#define ABORT_ON(val) \
+do { \
+ if (val) \
+ YYABORT; \
+} while (0)
+
+static void perf_pmu_error(void *format, void *scanner, const char *msg);
+
+static void perf_pmu__set_format(unsigned long *bits, long from, long to)
+{
+ long b;
+
+ if (!to)
+ to = from;
+
+ memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
+ for (b = from; b <= to; b++)
+ __set_bit(b, bits);
+}
+
+%}
+
+%token PP_CONFIG
+%token PP_VALUE PP_ERROR
+%type <num> PP_VALUE
+%type <bits> bit_term
+%type <bits> bits
+
+%union
+{
+ unsigned long num;
+ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+}
+
+%%
+
+format:
+format format_term
+|
+format_term
+
+format_term:
+PP_CONFIG ':' bits
+{
+ perf_pmu_format__set_value(format, PERF_PMU_FORMAT_VALUE_CONFIG, $3);
+}
+|
+PP_CONFIG PP_VALUE ':' bits
+{
+ perf_pmu_format__set_value(format, $2, $4);
+}
+
+bits:
+bits ',' bit_term
+{
+ bitmap_or($$, $1, $3, 64);
+}
+|
+bit_term
+{
+ memcpy($$, $1, sizeof($1));
+}
+
+bit_term:
+PP_VALUE '-' PP_VALUE
+{
+ perf_pmu__set_format($$, $1, $3);
+}
+|
+PP_VALUE
+{
+ perf_pmu__set_format($$, $1, 0);
+}
+
+%%
+
+static void perf_pmu_error(void *format __maybe_unused,
+ void *scanner __maybe_unused,
+ const char *msg __maybe_unused)
+{
+}
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
new file mode 100644
index 000000000..6631367c7
--- /dev/null
+++ b/tools/perf/util/pmus.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <subcmd/pager.h>
+#include <sys/types.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <string.h>
+#include <unistd.h>
+#include "debug.h"
+#include "evsel.h"
+#include "pmus.h"
+#include "pmu.h"
+#include "print-events.h"
+
+/*
+ * core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs
+ * directory contains "cpus" file. All PMUs belonging to core_pmus
+ * must have pmu->is_core=1. If there are more than one PMU in
+ * this list, perf interprets it as a heterogeneous platform.
+ * (FWIW, certain ARM platforms having heterogeneous cores uses
+ * homogeneous PMU, and thus they are treated as homogeneous
+ * platform by perf because core_pmus will have only one entry)
+ * other_pmus: All other PMUs which are not part of core_pmus list. It doesn't
+ * matter whether PMU is present per SMT-thread or outside of the
+ * core in the hw. For e.g., an instance of AMD ibs_fetch// and
+ * ibs_op// PMUs is present in each hw SMT thread, however they
+ * are captured under other_pmus. PMUs belonging to other_pmus
+ * must have pmu->is_core=0 but pmu->is_uncore could be 0 or 1.
+ */
+static LIST_HEAD(core_pmus);
+static LIST_HEAD(other_pmus);
+static bool read_sysfs_core_pmus;
+static bool read_sysfs_all_pmus;
+
+int pmu_name_len_no_suffix(const char *str, unsigned long *num)
+{
+ int orig_len, len;
+
+ orig_len = len = strlen(str);
+
+ /* Non-uncore PMUs have their full length, for example, i915. */
+ if (!strstarts(str, "uncore_"))
+ return len;
+
+ /*
+ * Count trailing digits and '_', if '_{num}' suffix isn't present use
+ * the full length.
+ */
+ while (len > 0 && isdigit(str[len - 1]))
+ len--;
+
+ if (len > 0 && len != orig_len && str[len - 1] == '_') {
+ if (num)
+ *num = strtoul(&str[len], NULL, 10);
+ return len - 1;
+ }
+ return orig_len;
+}
+
+void perf_pmus__destroy(void)
+{
+ struct perf_pmu *pmu, *tmp;
+
+ list_for_each_entry_safe(pmu, tmp, &core_pmus, list) {
+ list_del(&pmu->list);
+
+ perf_pmu__delete(pmu);
+ }
+ list_for_each_entry_safe(pmu, tmp, &other_pmus, list) {
+ list_del(&pmu->list);
+
+ perf_pmu__delete(pmu);
+ }
+ read_sysfs_core_pmus = false;
+ read_sysfs_all_pmus = false;
+}
+
+static struct perf_pmu *pmu_find(const char *name)
+{
+ struct perf_pmu *pmu;
+
+ list_for_each_entry(pmu, &core_pmus, list) {
+ if (!strcmp(pmu->name, name) ||
+ (pmu->alias_name && !strcmp(pmu->alias_name, name)))
+ return pmu;
+ }
+ list_for_each_entry(pmu, &other_pmus, list) {
+ if (!strcmp(pmu->name, name) ||
+ (pmu->alias_name && !strcmp(pmu->alias_name, name)))
+ return pmu;
+ }
+
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__find(const char *name)
+{
+ struct perf_pmu *pmu;
+ int dirfd;
+ bool core_pmu;
+
+ /*
+ * Once PMU is loaded it stays in the list,
+ * so we keep us from multiple reading/parsing
+ * the pmu format definitions.
+ */
+ pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
+
+ if (read_sysfs_all_pmus)
+ return NULL;
+
+ core_pmu = is_pmu_core(name);
+ if (core_pmu && read_sysfs_core_pmus)
+ return NULL;
+
+ dirfd = perf_pmu__event_source_devices_fd();
+ pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
+ close(dirfd);
+
+ return pmu;
+}
+
+static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
+{
+ struct perf_pmu *pmu;
+ bool core_pmu;
+
+ /*
+ * Once PMU is loaded it stays in the list,
+ * so we keep us from multiple reading/parsing
+ * the pmu format definitions.
+ */
+ pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
+
+ if (read_sysfs_all_pmus)
+ return NULL;
+
+ core_pmu = is_pmu_core(name);
+ if (core_pmu && read_sysfs_core_pmus)
+ return NULL;
+
+ return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
+}
+
+static int pmus_cmp(void *priv __maybe_unused,
+ const struct list_head *lhs, const struct list_head *rhs)
+{
+ unsigned long lhs_num = 0, rhs_num = 0;
+ struct perf_pmu *lhs_pmu = container_of(lhs, struct perf_pmu, list);
+ struct perf_pmu *rhs_pmu = container_of(rhs, struct perf_pmu, list);
+ const char *lhs_pmu_name = lhs_pmu->name ?: "";
+ const char *rhs_pmu_name = rhs_pmu->name ?: "";
+ int lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name, &lhs_num);
+ int rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name, &rhs_num);
+ int ret = strncmp(lhs_pmu_name, rhs_pmu_name,
+ lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len);
+
+ if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0)
+ return ret;
+
+ return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0);
+}
+
+/* Add all pmus in sysfs to pmu list: */
+static void pmu_read_sysfs(bool core_only)
+{
+ int fd;
+ DIR *dir;
+ struct dirent *dent;
+
+ if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus))
+ return;
+
+ fd = perf_pmu__event_source_devices_fd();
+ if (fd < 0)
+ return;
+
+ dir = fdopendir(fd);
+ if (!dir) {
+ close(fd);
+ return;
+ }
+
+ while ((dent = readdir(dir))) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ if (core_only && !is_pmu_core(dent->d_name))
+ continue;
+ /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
+ perf_pmu__find2(fd, dent->d_name);
+ }
+
+ closedir(dir);
+ if (list_empty(&core_pmus)) {
+ if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
+ pr_err("Failure to set up any core PMUs\n");
+ }
+ list_sort(NULL, &core_pmus, pmus_cmp);
+ list_sort(NULL, &other_pmus, pmus_cmp);
+ if (!list_empty(&core_pmus)) {
+ read_sysfs_core_pmus = true;
+ if (!core_only)
+ read_sysfs_all_pmus = true;
+ }
+}
+
+static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
+{
+ struct perf_pmu *pmu;
+
+ list_for_each_entry(pmu, &core_pmus, list) {
+ if (pmu->type == type)
+ return pmu;
+ }
+
+ list_for_each_entry(pmu, &other_pmus, list) {
+ if (pmu->type == type)
+ return pmu;
+ }
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
+{
+ struct perf_pmu *pmu = __perf_pmus__find_by_type(type);
+
+ if (pmu || read_sysfs_all_pmus)
+ return pmu;
+
+ pmu_read_sysfs(/*core_only=*/false);
+ pmu = __perf_pmus__find_by_type(type);
+ return pmu;
+}
+
+/*
+ * pmu iterator: If pmu is NULL, we start at the begin, otherwise return the
+ * next pmu. Returns NULL on end.
+ */
+struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
+{
+ bool use_core_pmus = !pmu || pmu->is_core;
+
+ if (!pmu) {
+ pmu_read_sysfs(/*core_only=*/false);
+ pmu = list_prepare_entry(pmu, &core_pmus, list);
+ }
+ if (use_core_pmus) {
+ list_for_each_entry_continue(pmu, &core_pmus, list)
+ return pmu;
+
+ pmu = NULL;
+ pmu = list_prepare_entry(pmu, &other_pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &other_pmus, list)
+ return pmu;
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
+{
+ if (!pmu) {
+ pmu_read_sysfs(/*core_only=*/true);
+ pmu = list_prepare_entry(pmu, &core_pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &core_pmus, list)
+ return pmu;
+
+ return NULL;
+}
+
+static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
+{
+ bool use_core_pmus = !pmu || pmu->is_core;
+ int last_pmu_name_len = 0;
+ const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : "";
+
+ if (!pmu) {
+ pmu_read_sysfs(/*core_only=*/false);
+ pmu = list_prepare_entry(pmu, &core_pmus, list);
+ } else
+ last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", NULL);
+
+ if (use_core_pmus) {
+ list_for_each_entry_continue(pmu, &core_pmus, list) {
+ int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL);
+
+ if (last_pmu_name_len == pmu_name_len &&
+ !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len))
+ continue;
+
+ return pmu;
+ }
+ pmu = NULL;
+ pmu = list_prepare_entry(pmu, &other_pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &other_pmus, list) {
+ int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL);
+
+ if (last_pmu_name_len == pmu_name_len &&
+ !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len))
+ continue;
+
+ return pmu;
+ }
+ return NULL;
+}
+
+const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ if (!strcmp(pmu->name, str))
+ return pmu;
+ /* Ignore "uncore_" prefix. */
+ if (!strncmp(pmu->name, "uncore_", 7)) {
+ if (!strcmp(pmu->name + 7, str))
+ return pmu;
+ }
+ /* Ignore "cpu_" prefix on Intel hybrid PMUs. */
+ if (!strncmp(pmu->name, "cpu_", 4)) {
+ if (!strcmp(pmu->name + 4, str))
+ return pmu;
+ }
+ }
+ return NULL;
+}
+
+int __weak perf_pmus__num_mem_pmus(void)
+{
+ /* All core PMUs are for mem events. */
+ return perf_pmus__num_core_pmus();
+}
+
+/** Struct for ordering events as output in perf list. */
+struct sevent {
+ /** PMU for event. */
+ const struct perf_pmu *pmu;
+ const char *name;
+ const char* alias;
+ const char *scale_unit;
+ const char *desc;
+ const char *long_desc;
+ const char *encoding_desc;
+ const char *topic;
+ const char *pmu_name;
+ bool deprecated;
+};
+
+static int cmp_sevent(const void *a, const void *b)
+{
+ const struct sevent *as = a;
+ const struct sevent *bs = b;
+ bool a_iscpu, b_iscpu;
+ int ret;
+
+ /* Put extra events last. */
+ if (!!as->desc != !!bs->desc)
+ return !!as->desc - !!bs->desc;
+
+ /* Order by topics. */
+ ret = strcmp(as->topic ?: "", bs->topic ?: "");
+ if (ret)
+ return ret;
+
+ /* Order CPU core events to be first */
+ a_iscpu = as->pmu ? as->pmu->is_core : true;
+ b_iscpu = bs->pmu ? bs->pmu->is_core : true;
+ if (a_iscpu != b_iscpu)
+ return a_iscpu ? -1 : 1;
+
+ /* Order by PMU name. */
+ if (as->pmu != bs->pmu) {
+ ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
+ if (ret)
+ return ret;
+ }
+
+ /* Order by event name. */
+ return strcmp(as->name, bs->name);
+}
+
+static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
+{
+ /* Different names -> never duplicates */
+ if (strcmp(a->name ?: "//", b->name ?: "//"))
+ return false;
+
+ /* Don't remove duplicates for different PMUs */
+ return strcmp(a->pmu_name, b->pmu_name) == 0;
+}
+
+struct events_callback_state {
+ struct sevent *aliases;
+ size_t aliases_len;
+ size_t index;
+};
+
+static int perf_pmus__print_pmu_events__callback(void *vstate,
+ struct pmu_event_info *info)
+{
+ struct events_callback_state *state = vstate;
+ struct sevent *s;
+
+ if (state->index >= state->aliases_len) {
+ pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name);
+ return 1;
+ }
+ s = &state->aliases[state->index];
+ s->pmu = info->pmu;
+#define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL
+ COPY_STR(name);
+ COPY_STR(alias);
+ COPY_STR(scale_unit);
+ COPY_STR(desc);
+ COPY_STR(long_desc);
+ COPY_STR(encoding_desc);
+ COPY_STR(topic);
+ COPY_STR(pmu_name);
+#undef COPY_STR
+ s->deprecated = info->deprecated;
+ state->index++;
+ return 0;
+}
+
+void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ struct perf_pmu *pmu;
+ int printed = 0;
+ int len;
+ struct sevent *aliases;
+ struct events_callback_state state;
+ bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
+ struct perf_pmu *(*scan_fn)(struct perf_pmu *);
+
+ if (skip_duplicate_pmus)
+ scan_fn = perf_pmus__scan_skip_duplicates;
+ else
+ scan_fn = perf_pmus__scan;
+
+ pmu = NULL;
+ len = 0;
+ while ((pmu = scan_fn(pmu)) != NULL)
+ len += perf_pmu__num_events(pmu);
+
+ aliases = zalloc(sizeof(struct sevent) * len);
+ if (!aliases) {
+ pr_err("FATAL: not enough memory to print PMU events\n");
+ return;
+ }
+ pmu = NULL;
+ state = (struct events_callback_state) {
+ .aliases = aliases,
+ .aliases_len = len,
+ .index = 0,
+ };
+ while ((pmu = scan_fn(pmu)) != NULL) {
+ perf_pmu__for_each_event(pmu, skip_duplicate_pmus, &state,
+ perf_pmus__print_pmu_events__callback);
+ }
+ qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
+ for (int j = 0; j < len; j++) {
+ /* Skip duplicates */
+ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
+ continue;
+
+ print_cb->print_event(print_state,
+ aliases[j].pmu_name,
+ aliases[j].topic,
+ aliases[j].name,
+ aliases[j].alias,
+ aliases[j].scale_unit,
+ aliases[j].deprecated,
+ "Kernel PMU event",
+ aliases[j].desc,
+ aliases[j].long_desc,
+ aliases[j].encoding_desc);
+ zfree(&aliases[j].name);
+ zfree(&aliases[j].alias);
+ zfree(&aliases[j].scale_unit);
+ zfree(&aliases[j].desc);
+ zfree(&aliases[j].long_desc);
+ zfree(&aliases[j].encoding_desc);
+ zfree(&aliases[j].topic);
+ zfree(&aliases[j].pmu_name);
+ }
+ if (printed && pager_in_use())
+ printf("\n");
+
+ zfree(&aliases);
+}
+
+bool perf_pmus__have_event(const char *pname, const char *name)
+{
+ struct perf_pmu *pmu = perf_pmus__find(pname);
+
+ return pmu && perf_pmu__have_event(pmu, name);
+}
+
+int perf_pmus__num_core_pmus(void)
+{
+ static int count;
+
+ if (!count) {
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL)
+ count++;
+ }
+ return count;
+}
+
+static bool __perf_pmus__supports_extended_type(void)
+{
+ struct perf_pmu *pmu = NULL;
+
+ if (perf_pmus__num_core_pmus() <= 1)
+ return false;
+
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (!is_event_supported(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES | ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)))
+ return false;
+ }
+
+ return true;
+}
+
+static bool perf_pmus__do_support_extended_type;
+
+static void perf_pmus__init_supports_extended_type(void)
+{
+ perf_pmus__do_support_extended_type = __perf_pmus__supports_extended_type();
+}
+
+bool perf_pmus__supports_extended_type(void)
+{
+ static pthread_once_t extended_type_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&extended_type_once, perf_pmus__init_supports_extended_type);
+
+ return perf_pmus__do_support_extended_type;
+}
+
+char *perf_pmus__default_pmu_name(void)
+{
+ int fd;
+ DIR *dir;
+ struct dirent *dent;
+ char *result = NULL;
+
+ if (!list_empty(&core_pmus))
+ return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name);
+
+ fd = perf_pmu__event_source_devices_fd();
+ if (fd < 0)
+ return strdup("cpu");
+
+ dir = fdopendir(fd);
+ if (!dir) {
+ close(fd);
+ return strdup("cpu");
+ }
+
+ while ((dent = readdir(dir))) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ if (is_pmu_core(dent->d_name)) {
+ result = strdup(dent->d_name);
+ break;
+ }
+ }
+
+ closedir(dir);
+ return result ?: strdup("cpu");
+}
+
+struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel->pmu;
+
+ if (!pmu) {
+ pmu = perf_pmus__find_by_type(evsel->core.attr.type);
+ ((struct evsel *)evsel)->pmu = pmu;
+ }
+ return pmu;
+}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
new file mode 100644
index 000000000..4c67153ac
--- /dev/null
+++ b/tools/perf/util/pmus.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMUS_H
+#define __PMUS_H
+
+struct perf_pmu;
+struct print_callbacks;
+
+int pmu_name_len_no_suffix(const char *str, unsigned long *num);
+
+void perf_pmus__destroy(void);
+
+struct perf_pmu *perf_pmus__find(const char *name);
+struct perf_pmu *perf_pmus__find_by_type(unsigned int type);
+
+struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu);
+struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu);
+
+const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str);
+
+int perf_pmus__num_mem_pmus(void);
+void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state);
+bool perf_pmus__have_event(const char *pname, const char *name);
+int perf_pmus__num_core_pmus(void);
+bool perf_pmus__supports_extended_type(void);
+char *perf_pmus__default_pmu_name(void);
+
+#endif /* __PMUS_H */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
new file mode 100644
index 000000000..a7566edc8
--- /dev/null
+++ b/tools/perf/util/print-events.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#include <api/fs/tracing_path.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <linux/zalloc.h>
+#include <subcmd/pager.h>
+
+#include "build-id.h"
+#include "debug.h"
+#include "evsel.h"
+#include "metricgroup.h"
+#include "parse-events.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "print-events.h"
+#include "probe-file.h"
+#include "string2.h"
+#include "strlist.h"
+#include "tracepoint.h"
+#include "pfm.h"
+#include "thread_map.h"
+
+#define MAX_NAME_LEN 100
+
+/** Strings corresponding to enum perf_type_id. */
+static const char * const event_type_descriptors[] = {
+ "Hardware event",
+ "Software event",
+ "Tracepoint event",
+ "Hardware cache event",
+ "Raw hardware event descriptor",
+ "Hardware breakpoint",
+};
+
+static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
+ [PERF_TOOL_DURATION_TIME] = {
+ .symbol = "duration_time",
+ .alias = "",
+ },
+ [PERF_TOOL_USER_TIME] = {
+ .symbol = "user_time",
+ .alias = "",
+ },
+ [PERF_TOOL_SYSTEM_TIME] = {
+ .symbol = "system_time",
+ .alias = "",
+ },
+};
+
+/*
+ * Print the events from <debugfs_mount_point>/tracing/events
+ */
+void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unused, void *print_state __maybe_unused)
+{
+ char *events_path = get_tracing_file("events");
+ int events_fd = open(events_path, O_PATH);
+
+ put_tracing_file(events_path);
+ if (events_fd < 0) {
+ printf("Error: failed to open tracing events directory\n");
+ return;
+ }
+
+#ifdef HAVE_SCANDIRAT_SUPPORT
+{
+ struct dirent **sys_namelist = NULL;
+ int sys_items = tracing_events__scandir_alphasort(&sys_namelist);
+
+ for (int i = 0; i < sys_items; i++) {
+ struct dirent *sys_dirent = sys_namelist[i];
+ struct dirent **evt_namelist = NULL;
+ int dir_fd;
+ int evt_items;
+
+ if (sys_dirent->d_type != DT_DIR ||
+ !strcmp(sys_dirent->d_name, ".") ||
+ !strcmp(sys_dirent->d_name, ".."))
+ goto next_sys;
+
+ dir_fd = openat(events_fd, sys_dirent->d_name, O_PATH);
+ if (dir_fd < 0)
+ goto next_sys;
+
+ evt_items = scandirat(events_fd, sys_dirent->d_name, &evt_namelist, NULL, alphasort);
+ for (int j = 0; j < evt_items; j++) {
+ struct dirent *evt_dirent = evt_namelist[j];
+ char evt_path[MAXPATHLEN];
+ int evt_fd;
+
+ if (evt_dirent->d_type != DT_DIR ||
+ !strcmp(evt_dirent->d_name, ".") ||
+ !strcmp(evt_dirent->d_name, ".."))
+ goto next_evt;
+
+ snprintf(evt_path, sizeof(evt_path), "%s/id", evt_dirent->d_name);
+ evt_fd = openat(dir_fd, evt_path, O_RDONLY);
+ if (evt_fd < 0)
+ goto next_evt;
+ close(evt_fd);
+
+ snprintf(evt_path, MAXPATHLEN, "%s:%s",
+ sys_dirent->d_name, evt_dirent->d_name);
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ evt_path,
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ "Tracepoint event",
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+next_evt:
+ free(evt_namelist[j]);
+ }
+ close(dir_fd);
+ free(evt_namelist);
+next_sys:
+ free(sys_namelist[i]);
+ }
+
+ free(sys_namelist);
+}
+#else
+ printf("\nWARNING: Your libc doesn't have the scandirat function, please ask its maintainers to implement it.\n"
+ " As a rough fallback, please do 'ls %s' to see the available tracepoint events.\n", events_path);
+#endif
+ close(events_fd);
+}
+
+void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ struct strlist *bidlist, *sdtlist;
+ struct str_node *bid_nd, *sdt_name, *next_sdt_name;
+ const char *last_sdt_name = NULL;
+
+ /*
+ * The implicitly sorted sdtlist will hold the tracepoint name followed
+ * by @<buildid>. If the tracepoint name is unique (determined by
+ * looking at the adjacent nodes) the @<buildid> is dropped otherwise
+ * the executable path and buildid are added to the name.
+ */
+ sdtlist = strlist__new(NULL, NULL);
+ if (!sdtlist) {
+ pr_debug("Failed to allocate new strlist for SDT\n");
+ return;
+ }
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ pr_debug("Failed to get buildids: %d\n", errno);
+ return;
+ }
+ strlist__for_each_entry(bid_nd, bidlist) {
+ struct probe_cache *pcache;
+ struct probe_cache_entry *ent;
+
+ pcache = probe_cache__new(bid_nd->s, NULL);
+ if (!pcache)
+ continue;
+ list_for_each_entry(ent, &pcache->entries, node) {
+ char buf[1024];
+
+ snprintf(buf, sizeof(buf), "%s:%s@%s",
+ ent->pev.group, ent->pev.event, bid_nd->s);
+ strlist__add(sdtlist, buf);
+ }
+ probe_cache__delete(pcache);
+ }
+ strlist__delete(bidlist);
+
+ strlist__for_each_entry(sdt_name, sdtlist) {
+ bool show_detail = false;
+ char *bid = strchr(sdt_name->s, '@');
+ char *evt_name = NULL;
+
+ if (bid)
+ *(bid++) = '\0';
+
+ if (last_sdt_name && !strcmp(last_sdt_name, sdt_name->s)) {
+ show_detail = true;
+ } else {
+ next_sdt_name = strlist__next(sdt_name);
+ if (next_sdt_name) {
+ char *bid2 = strchr(next_sdt_name->s, '@');
+
+ if (bid2)
+ *bid2 = '\0';
+ if (strcmp(sdt_name->s, next_sdt_name->s) == 0)
+ show_detail = true;
+ if (bid2)
+ *bid2 = '@';
+ }
+ }
+ last_sdt_name = sdt_name->s;
+
+ if (show_detail) {
+ char *path = build_id_cache__origname(bid);
+
+ if (path) {
+ if (asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid) < 0)
+ evt_name = NULL;
+ free(path);
+ }
+ }
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ evt_name ?: sdt_name->s,
+ /*event_alias=*/NULL,
+ /*deprecated=*/false,
+ /*scale_unit=*/NULL,
+ "SDT event",
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+
+ free(evt_name);
+ }
+ strlist__delete(sdtlist);
+}
+
+bool is_event_supported(u8 type, u64 config)
+{
+ bool ret = true;
+ int open_return;
+ struct evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .disabled = 1,
+ };
+ struct perf_thread_map *tmap = thread_map__new_by_tid(0);
+
+ if (tmap == NULL)
+ return false;
+
+ evsel = evsel__new(&attr);
+ if (evsel) {
+ open_return = evsel__open(evsel, NULL, tmap);
+ ret = open_return >= 0;
+
+ if (open_return == -EACCES) {
+ /*
+ * This happens if the paranoid value
+ * /proc/sys/kernel/perf_event_paranoid is set to 2
+ * Re-run with exclude_kernel set; we don't do that
+ * by default as some ARM machines do not support it.
+ *
+ */
+ evsel->core.attr.exclude_kernel = 1;
+ ret = evsel__open(evsel, NULL, tmap) >= 0;
+ }
+ evsel__delete(evsel);
+ }
+
+ perf_thread_map__put(tmap);
+ return ret;
+}
+
+int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ struct perf_pmu *pmu = NULL;
+ const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE];
+
+ /*
+ * Only print core PMUs, skipping uncore for performance and
+ * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst.
+ */
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE)
+ continue;
+
+ for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ /* skip invalid cache type */
+ if (!evsel__is_cache_op_valid(type, op))
+ continue;
+
+ for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) {
+ char name[64];
+ char alias_name[128];
+ __u64 config;
+ int ret;
+
+ __evsel__hw_cache_type_op_res_name(type, op, res,
+ name, sizeof(name));
+
+ ret = parse_events__decode_legacy_cache(name, pmu->type,
+ &config);
+ if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config))
+ continue;
+ snprintf(alias_name, sizeof(alias_name), "%s/%s/",
+ pmu->name, name);
+ print_cb->print_event(print_state,
+ "cache",
+ pmu->name,
+ name,
+ alias_name,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptor,
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+void print_tool_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ // Start at 1 because the first enum entry means no tool event.
+ for (int i = 1; i < PERF_TOOL_MAX; ++i) {
+ print_cb->print_event(print_state,
+ "tool",
+ /*pmu_name=*/NULL,
+ event_symbols_tool[i].symbol,
+ event_symbols_tool[i].alias,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ "Tool event",
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+ }
+}
+
+void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
+ unsigned int type, const struct event_symbol *syms,
+ unsigned int max)
+{
+ struct strlist *evt_name_list = strlist__new(NULL, NULL);
+ struct str_node *nd;
+
+ if (!evt_name_list) {
+ pr_debug("Failed to allocate new strlist for symbol events\n");
+ return;
+ }
+ for (unsigned int i = 0; i < max; i++) {
+ /*
+ * New attr.config still not supported here, the latest
+ * example was PERF_COUNT_SW_CGROUP_SWITCHES
+ */
+ if (syms[i].symbol == NULL)
+ continue;
+
+ if (!is_event_supported(type, i))
+ continue;
+
+ if (strlen(syms[i].alias)) {
+ char name[MAX_NAME_LEN];
+
+ snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias);
+ strlist__add(evt_name_list, name);
+ } else
+ strlist__add(evt_name_list, syms[i].symbol);
+ }
+
+ strlist__for_each_entry(nd, evt_name_list) {
+ char *alias = strstr(nd->s, " OR ");
+
+ if (alias) {
+ *alias = '\0';
+ alias += 4;
+ }
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ nd->s,
+ alias,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[type],
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+ }
+ strlist__delete(evt_name_list);
+}
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE,
+ event_symbols_hw, PERF_COUNT_HW_MAX);
+ print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE,
+ event_symbols_sw, PERF_COUNT_SW_MAX);
+
+ print_tool_events(print_cb, print_state);
+
+ print_hwcache_events(print_cb, print_state);
+
+ perf_pmus__print_pmu_events(print_cb, print_state);
+
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ "rNNN",
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[PERF_TYPE_RAW],
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[PERF_TYPE_RAW],
+ "(see 'man perf-list' on how to encode it)",
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ "mem:<addr>[/len][:access]",
+ /*scale_unit=*/NULL,
+ /*event_alias=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[PERF_TYPE_BREAKPOINT],
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
+
+ print_tracepoint_events(print_cb, print_state);
+
+ print_sdt_events(print_cb, print_state);
+
+ metricgroup__print(print_cb, print_state);
+
+ print_libpfm_events(print_cb, print_state);
+}
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
new file mode 100644
index 000000000..bf4290bef
--- /dev/null
+++ b/tools/perf/util/print-events.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_PRINT_EVENTS_H
+#define __PERF_PRINT_EVENTS_H
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <stdbool.h>
+
+struct event_symbol;
+
+struct print_callbacks {
+ void (*print_start)(void *print_state);
+ void (*print_end)(void *print_state);
+ void (*print_event)(void *print_state, const char *topic,
+ const char *pmu_name,
+ const char *event_name, const char *event_alias,
+ const char *scale_unit,
+ bool deprecated, const char *event_type_desc,
+ const char *desc, const char *long_desc,
+ const char *encoding_desc);
+ void (*print_metric)(void *print_state,
+ const char *group,
+ const char *name,
+ const char *desc,
+ const char *long_desc,
+ const char *expr,
+ const char *threshold,
+ const char *unit);
+ bool (*skip_duplicate_pmus)(void *print_state);
+};
+
+/** Print all events, the default when no options are specified. */
+void print_events(const struct print_callbacks *print_cb, void *print_state);
+int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state);
+void print_sdt_events(const struct print_callbacks *print_cb, void *print_state);
+void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
+ unsigned int type, const struct event_symbol *syms,
+ unsigned int max);
+void print_tool_events(const struct print_callbacks *print_cb, void *print_state);
+void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state);
+bool is_event_supported(u8 type, u64 config);
+
+#endif /* __PERF_PRINT_EVENTS_H */
diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c
new file mode 100644
index 000000000..13fdc51c6
--- /dev/null
+++ b/tools/perf/util/print_binary.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "print_binary.h"
+#include <linux/log2.h>
+#include <linux/ctype.h>
+
+int binary__fprintf(unsigned char *data, size_t len,
+ size_t bytes_per_line, binary__fprintf_t printer,
+ void *extra, FILE *fp)
+{
+ size_t i, j, mask;
+ int printed = 0;
+
+ if (!printer)
+ return 0;
+
+ bytes_per_line = roundup_pow_of_two(bytes_per_line);
+ mask = bytes_per_line - 1;
+
+ printed += printer(BINARY_PRINT_DATA_BEGIN, 0, extra, fp);
+ for (i = 0; i < len; i++) {
+ if ((i & mask) == 0) {
+ printed += printer(BINARY_PRINT_LINE_BEGIN, -1, extra, fp);
+ printed += printer(BINARY_PRINT_ADDR, i, extra, fp);
+ }
+
+ printed += printer(BINARY_PRINT_NUM_DATA, data[i], extra, fp);
+
+ if (((i & mask) == mask) || i == len - 1) {
+ for (j = 0; j < mask-(i & mask); j++)
+ printed += printer(BINARY_PRINT_NUM_PAD, -1, extra, fp);
+
+ printer(BINARY_PRINT_SEP, i, extra, fp);
+ for (j = i & ~mask; j <= i; j++)
+ printed += printer(BINARY_PRINT_CHAR_DATA, data[j], extra, fp);
+ for (j = 0; j < mask-(i & mask); j++)
+ printed += printer(BINARY_PRINT_CHAR_PAD, i, extra, fp);
+ printed += printer(BINARY_PRINT_LINE_END, -1, extra, fp);
+ }
+ }
+ printed += printer(BINARY_PRINT_DATA_END, -1, extra, fp);
+ return printed;
+}
+
+int is_printable_array(char *p, unsigned int len)
+{
+ unsigned int i;
+
+ if (!p || !len || p[len - 1] != 0)
+ return 0;
+
+ len--;
+
+ for (i = 0; i < len && p[i]; i++) {
+ if (!isprint(p[i]) && !isspace(p[i]))
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h
new file mode 100644
index 000000000..2a1554afc
--- /dev/null
+++ b/tools/perf/util/print_binary.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_PRINT_BINARY_H
+#define PERF_PRINT_BINARY_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+enum binary_printer_ops {
+ BINARY_PRINT_DATA_BEGIN,
+ BINARY_PRINT_LINE_BEGIN,
+ BINARY_PRINT_ADDR,
+ BINARY_PRINT_NUM_DATA,
+ BINARY_PRINT_NUM_PAD,
+ BINARY_PRINT_SEP,
+ BINARY_PRINT_CHAR_DATA,
+ BINARY_PRINT_CHAR_PAD,
+ BINARY_PRINT_LINE_END,
+ BINARY_PRINT_DATA_END,
+};
+
+typedef int (*binary__fprintf_t)(enum binary_printer_ops op,
+ unsigned int val, void *extra, FILE *fp);
+
+int binary__fprintf(unsigned char *data, size_t len,
+ size_t bytes_per_line, binary__fprintf_t printer,
+ void *extra, FILE *fp);
+
+static inline void print_binary(unsigned char *data, size_t len,
+ size_t bytes_per_line, binary__fprintf_t printer,
+ void *extra)
+{
+ binary__fprintf(data, len, bytes_per_line, printer, extra, stdout);
+}
+
+int is_printable_array(char *p, unsigned int len);
+
+#endif /* PERF_PRINT_BINARY_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
new file mode 100644
index 000000000..1a5b7fa45
--- /dev/null
+++ b/tools/perf/util/probe-event.c
@@ -0,0 +1,3817 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * probe-event.c : perf-probe definition to probe_events format converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <elf.h>
+
+#include "build-id.h"
+#include "event.h"
+#include "namespaces.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "dso.h"
+#include "color.h"
+#include "map.h"
+#include "maps.h"
+#include "mutex.h"
+#include "symbol.h"
+#include <api/fs/fs.h>
+#include "trace-event.h" /* For __maybe_unused */
+#include "probe-event.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "session.h"
+#include "string2.h"
+#include "strbuf.h"
+
+#include <subcmd/pager.h>
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
+#define PERFPROBE_GROUP "probe"
+
+bool probe_event_dry_run; /* Dry run flag */
+struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM };
+
+static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
+
+#define semantic_error(msg ...) pr_err("Semantic error :" msg)
+
+int e_snprintf(char *str, size_t size, const char *format, ...)
+{
+ int ret;
+ va_list ap;
+ va_start(ap, format);
+ ret = vsnprintf(str, size, format, ap);
+ va_end(ap);
+ if (ret >= (int)size)
+ ret = -E2BIG;
+ return ret;
+}
+
+static struct machine *host_machine;
+
+/* Initialize symbol maps and path of vmlinux/modules */
+int init_probe_symbol_maps(bool user_only)
+{
+ int ret;
+
+ symbol_conf.allow_aliases = true;
+ ret = symbol__init(NULL);
+ if (ret < 0) {
+ pr_debug("Failed to init symbol map.\n");
+ goto out;
+ }
+
+ if (host_machine || user_only) /* already initialized */
+ return 0;
+
+ if (symbol_conf.vmlinux_name)
+ pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+
+ host_machine = machine__new_host();
+ if (!host_machine) {
+ pr_debug("machine__new_host() failed.\n");
+ symbol__exit();
+ ret = -1;
+ }
+out:
+ if (ret < 0)
+ pr_warning("Failed to init vmlinux path.\n");
+ return ret;
+}
+
+void exit_probe_symbol_maps(void)
+{
+ machine__delete(host_machine);
+ host_machine = NULL;
+ symbol__exit();
+}
+
+static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
+{
+ struct kmap *kmap;
+ struct map *map = machine__kernel_map(host_machine);
+
+ if (map__load(map) < 0)
+ return NULL;
+
+ kmap = map__kmap(map);
+ if (!kmap)
+ return NULL;
+
+ if (pmap)
+ *pmap = map;
+
+ return kmap->ref_reloc_sym;
+}
+
+static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
+ bool reloc, bool reladdr)
+{
+ struct ref_reloc_sym *reloc_sym;
+ struct symbol *sym;
+ struct map *map;
+
+ /* ref_reloc_sym is just a label. Need a special fix*/
+ reloc_sym = kernel_get_ref_reloc_sym(&map);
+ if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
+ *addr = (!map__reloc(map) || reloc) ? reloc_sym->addr :
+ reloc_sym->unrelocated_addr;
+ else {
+ sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
+ if (!sym)
+ return -ENOENT;
+ *addr = map__unmap_ip(map, sym->start) -
+ ((reloc) ? 0 : map__reloc(map)) -
+ ((reladdr) ? map__start(map) : 0);
+ }
+ return 0;
+}
+
+static struct map *kernel_get_module_map(const char *module)
+{
+ struct maps *maps = machine__kernel_maps(host_machine);
+ struct map_rb_node *pos;
+
+ /* A file path -- this is an offline module */
+ if (module && strchr(module, '/'))
+ return dso__new_map(module);
+
+ if (!module) {
+ struct map *map = machine__kernel_map(host_machine);
+
+ return map__get(map);
+ }
+
+ maps__for_each_entry(maps, pos) {
+ /* short_name is "[module]" */
+ struct dso *dso = map__dso(pos->map);
+ const char *short_name = dso->short_name;
+ u16 short_name_len = dso->short_name_len;
+
+ if (strncmp(short_name + 1, module,
+ short_name_len - 2) == 0 &&
+ module[short_name_len - 2] == '\0') {
+ return map__get(pos->map);
+ }
+ }
+ return NULL;
+}
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
+{
+ /* Init maps of given executable or kernel */
+ if (user) {
+ struct map *map;
+ struct dso *dso;
+
+ map = dso__new_map(target);
+ dso = map ? map__dso(map) : NULL;
+ if (dso) {
+ mutex_lock(&dso->lock);
+ nsinfo__put(dso->nsinfo);
+ dso->nsinfo = nsinfo__get(nsi);
+ mutex_unlock(&dso->lock);
+ }
+ return map;
+ } else {
+ return kernel_get_module_map(target);
+ }
+}
+
+static int convert_exec_to_group(const char *exec, char **result)
+{
+ char *ptr1, *ptr2, *exec_copy;
+ char buf[64];
+ int ret;
+
+ exec_copy = strdup(exec);
+ if (!exec_copy)
+ return -ENOMEM;
+
+ ptr1 = basename(exec_copy);
+ if (!ptr1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (ptr2 = ptr1; *ptr2 != '\0'; ptr2++) {
+ if (!isalnum(*ptr2) && *ptr2 != '_') {
+ *ptr2 = '\0';
+ break;
+ }
+ }
+
+ ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
+ if (ret < 0)
+ goto out;
+
+ *result = strdup(buf);
+ ret = *result ? 0 : -ENOMEM;
+
+out:
+ free(exec_copy);
+ return ret;
+}
+
+static void clear_perf_probe_point(struct perf_probe_point *pp)
+{
+ zfree(&pp->file);
+ zfree(&pp->function);
+ zfree(&pp->lazy_line);
+}
+
+static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
+{
+ int i;
+
+ for (i = 0; i < ntevs; i++)
+ clear_probe_trace_event(tevs + i);
+}
+
+static bool kprobe_blacklist__listed(u64 address);
+static bool kprobe_warn_out_range(const char *symbol, u64 address)
+{
+ struct map *map;
+ bool ret = false;
+
+ map = kernel_get_module_map(NULL);
+ if (map) {
+ ret = address <= map__start(map) || map__end(map) < address;
+ if (ret)
+ pr_warning("%s is out of .text, skip it.\n", symbol);
+ map__put(map);
+ }
+ if (!ret && kprobe_blacklist__listed(address)) {
+ pr_warning("%s is blacklisted function, skip it.\n", symbol);
+ ret = true;
+ }
+
+ return ret;
+}
+
+/*
+ * @module can be module name of module file path. In case of path,
+ * inspect elf and find out what is actual module name.
+ * Caller has to free mod_name after using it.
+ */
+static char *find_module_name(const char *module)
+{
+ int fd;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *sec;
+ char *mod_name = NULL;
+ int name_offset;
+
+ fd = open(module, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ goto elf_err;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto ret_err;
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".gnu.linkonce.this_module", NULL);
+ if (!sec)
+ goto ret_err;
+
+ data = elf_getdata(sec, NULL);
+ if (!data || !data->d_buf)
+ goto ret_err;
+
+ /*
+ * NOTE:
+ * '.gnu.linkonce.this_module' section of kernel module elf directly
+ * maps to 'struct module' from linux/module.h. This section contains
+ * actual module name which will be used by kernel after loading it.
+ * But, we cannot use 'struct module' here since linux/module.h is not
+ * exposed to user-space. Offset of 'name' has remained same from long
+ * time, so hardcoding it here.
+ */
+ if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+ name_offset = 12;
+ else /* expect ELFCLASS64 by default */
+ name_offset = 24;
+
+ mod_name = strdup((char *)data->d_buf + name_offset);
+
+ret_err:
+ elf_end(elf);
+elf_err:
+ close(fd);
+ return mod_name;
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
+{
+ struct dso *dso;
+ struct map *map;
+ const char *vmlinux_name;
+ int ret = 0;
+
+ if (module) {
+ char module_name[128];
+
+ snprintf(module_name, sizeof(module_name), "[%s]", module);
+ map = maps__find_by_name(machine__kernel_maps(host_machine), module_name);
+ if (map) {
+ dso = map__dso(map);
+ goto found;
+ }
+ pr_debug("Failed to find module %s.\n", module);
+ return -ENOENT;
+ }
+
+ map = machine__kernel_map(host_machine);
+ dso = map__dso(map);
+ if (!dso->has_build_id)
+ dso__read_running_kernel_build_id(dso, host_machine);
+
+ vmlinux_name = symbol_conf.vmlinux_name;
+ dso->load_errno = 0;
+ if (vmlinux_name)
+ ret = dso__load_vmlinux(dso, map, vmlinux_name, false);
+ else
+ ret = dso__load_vmlinux_path(dso, map);
+found:
+ *pdso = dso;
+ return ret;
+}
+
+/*
+ * Some binaries like glibc have special symbols which are on the symbol
+ * table, but not in the debuginfo. If we can find the address of the
+ * symbol from map, we can translate the address back to the probe point.
+ */
+static int find_alternative_probe_point(struct debuginfo *dinfo,
+ struct perf_probe_point *pp,
+ struct perf_probe_point *result,
+ const char *target, struct nsinfo *nsi,
+ bool uprobes)
+{
+ struct map *map = NULL;
+ struct symbol *sym;
+ u64 address = 0;
+ int ret = -ENOENT;
+ size_t idx;
+
+ /* This can work only for function-name based one */
+ if (!pp->function || pp->file)
+ return -ENOTSUP;
+
+ map = get_target_map(target, nsi, uprobes);
+ if (!map)
+ return -EINVAL;
+
+ /* Find the address of given function */
+ map__for_each_symbol_by_name(map, pp->function, sym, idx) {
+ if (uprobes) {
+ address = sym->start;
+ if (sym->type == STT_GNU_IFUNC)
+ pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n"
+ "Consider identifying the final function used at run time and set the probe directly on that.\n",
+ pp->function);
+ } else
+ address = map__unmap_ip(map, sym->start) - map__reloc(map);
+ break;
+ }
+ if (!address) {
+ ret = -ENOENT;
+ goto out;
+ }
+ pr_debug("Symbol %s address found : %" PRIx64 "\n",
+ pp->function, address);
+
+ ret = debuginfo__find_probe_point(dinfo, address, result);
+ if (ret <= 0)
+ ret = (!ret) ? -ENOENT : ret;
+ else {
+ result->offset += pp->offset;
+ result->line += pp->line;
+ result->retprobe = pp->retprobe;
+ ret = 0;
+ }
+
+out:
+ map__put(map);
+ return ret;
+
+}
+
+static int get_alternative_probe_event(struct debuginfo *dinfo,
+ struct perf_probe_event *pev,
+ struct perf_probe_point *tmp)
+{
+ int ret;
+
+ memcpy(tmp, &pev->point, sizeof(*tmp));
+ memset(&pev->point, 0, sizeof(pev->point));
+ ret = find_alternative_probe_point(dinfo, tmp, &pev->point, pev->target,
+ pev->nsi, pev->uprobes);
+ if (ret < 0)
+ memcpy(&pev->point, tmp, sizeof(*tmp));
+
+ return ret;
+}
+
+static int get_alternative_line_range(struct debuginfo *dinfo,
+ struct line_range *lr,
+ const char *target, bool user)
+{
+ struct perf_probe_point pp = { .function = lr->function,
+ .file = lr->file,
+ .line = lr->start };
+ struct perf_probe_point result;
+ int ret, len = 0;
+
+ memset(&result, 0, sizeof(result));
+
+ if (lr->end != INT_MAX)
+ len = lr->end - lr->start;
+ ret = find_alternative_probe_point(dinfo, &pp, &result,
+ target, NULL, user);
+ if (!ret) {
+ lr->function = result.function;
+ lr->file = result.file;
+ lr->start = result.line;
+ if (lr->end != INT_MAX)
+ lr->end = lr->start + len;
+ clear_perf_probe_point(&pp);
+ }
+ return ret;
+}
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *nsi,
+ bool silent)
+{
+ debuginfod_client *c = debuginfod_begin();
+ char sbuild_id[SBUILD_ID_SIZE + 1];
+ struct debuginfo *ret = NULL;
+ struct nscookie nsc;
+ char *path;
+ int fd;
+
+ if (!c)
+ return NULL;
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+ fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id,
+ 0, &path);
+ if (fd >= 0)
+ close(fd);
+ debuginfod_end(c);
+ if (fd < 0) {
+ if (!silent)
+ pr_debug("Failed to find debuginfo in debuginfod.\n");
+ return NULL;
+ }
+ if (!silent)
+ pr_debug("Load debuginfo from debuginfod (%s)\n", path);
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = debuginfo__new((const char *)path);
+ nsinfo__mountns_exit(&nsc);
+ return ret;
+}
+#else
+static inline
+struct debuginfo *open_from_debuginfod(struct dso *dso __maybe_unused,
+ struct nsinfo *nsi __maybe_unused,
+ bool silent __maybe_unused)
+{
+ return NULL;
+}
+#endif
+
+/* Open new debuginfo of given module */
+static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
+ bool silent)
+{
+ const char *path = module;
+ char reason[STRERR_BUFSIZE];
+ struct debuginfo *ret = NULL;
+ struct dso *dso = NULL;
+ struct nscookie nsc;
+ int err;
+
+ if (!module || !strchr(module, '/')) {
+ err = kernel_get_module_dso(module, &dso);
+ if (err < 0) {
+ if (!dso || dso->load_errno == 0) {
+ if (!str_error_r(-err, reason, STRERR_BUFSIZE))
+ strcpy(reason, "(unknown)");
+ } else
+ dso__strerror_load(dso, reason, STRERR_BUFSIZE);
+ if (dso)
+ ret = open_from_debuginfod(dso, nsi, silent);
+ if (ret)
+ return ret;
+ if (!silent) {
+ if (module)
+ pr_err("Module %s is not loaded, please specify its full path name.\n", module);
+ else
+ pr_err("Failed to find the path for the kernel: %s\n", reason);
+ }
+ return NULL;
+ }
+ path = dso->long_name;
+ }
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = debuginfo__new(path);
+ if (!ret && !silent) {
+ pr_warning("The %s file has no debug information.\n", path);
+ if (!module || !strtailcmp(path, ".ko"))
+ pr_warning("Rebuild with CONFIG_DEBUG_INFO=y, ");
+ else
+ pr_warning("Rebuild with -g, ");
+ pr_warning("or install an appropriate debuginfo package.\n");
+ }
+ nsinfo__mountns_exit(&nsc);
+ return ret;
+}
+
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+ const char *path = module;
+
+ /* If the module is NULL, it should be the kernel. */
+ if (!module)
+ path = "kernel";
+
+ if (debuginfo_cache_path && !strcmp(debuginfo_cache_path, path))
+ goto out;
+
+ /* Copy module path */
+ free(debuginfo_cache_path);
+ debuginfo_cache_path = strdup(path);
+ if (!debuginfo_cache_path) {
+ debuginfo__delete(debuginfo_cache);
+ debuginfo_cache = NULL;
+ goto out;
+ }
+
+ debuginfo_cache = open_debuginfo(module, NULL, silent);
+ if (!debuginfo_cache)
+ zfree(&debuginfo_cache_path);
+out:
+ return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+ debuginfo__delete(debuginfo_cache);
+ debuginfo_cache = NULL;
+ zfree(&debuginfo_cache_path);
+}
+
+
+static int get_text_start_address(const char *exec, u64 *address,
+ struct nsinfo *nsi)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ int fd, ret = -ENOENT;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ fd = open(exec, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fd < 0)
+ return -errno;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ ret = -EINVAL;
+ goto out_close;
+ }
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr, ".text", NULL))
+ goto out;
+
+ *address = shdr.sh_addr - shdr.sh_offset;
+ ret = 0;
+out:
+ elf_end(elf);
+out_close:
+ close(fd);
+
+ return ret;
+}
+
+/*
+ * Convert trace point to probe point with debuginfo
+ */
+static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
+ struct perf_probe_point *pp,
+ bool is_kprobe)
+{
+ struct debuginfo *dinfo = NULL;
+ u64 stext = 0;
+ u64 addr = tp->address;
+ int ret = -ENOENT;
+
+ /* convert the address to dwarf address */
+ if (!is_kprobe) {
+ if (!addr) {
+ ret = -EINVAL;
+ goto error;
+ }
+ ret = get_text_start_address(tp->module, &stext, NULL);
+ if (ret < 0)
+ goto error;
+ addr += stext;
+ } else if (tp->symbol) {
+ /* If the module is given, this returns relative address */
+ ret = kernel_get_symbol_address_by_name(tp->symbol, &addr,
+ false, !!tp->module);
+ if (ret != 0)
+ goto error;
+ addr += tp->offset;
+ }
+
+ pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
+ tp->module ? : "kernel");
+
+ dinfo = debuginfo_cache__open(tp->module, verbose <= 0);
+ if (dinfo)
+ ret = debuginfo__find_probe_point(dinfo, addr, pp);
+ else
+ ret = -ENOENT;
+
+ if (ret > 0) {
+ pp->retprobe = tp->retprobe;
+ return 0;
+ }
+error:
+ pr_debug("Failed to find corresponding probes from debuginfo.\n");
+ return ret ? : -ENOENT;
+}
+
+/* Adjust symbol name and address */
+static int post_process_probe_trace_point(struct probe_trace_point *tp,
+ struct map *map, u64 offs)
+{
+ struct symbol *sym;
+ u64 addr = tp->address - offs;
+
+ sym = map__find_symbol(map, addr);
+ if (!sym) {
+ /*
+ * If the address is in the inittext section, map can not
+ * find it. Ignore it if we are probing offline kernel.
+ */
+ return (symbol_conf.ignore_vmlinux_buildid) ? 0 : -ENOENT;
+ }
+
+ if (strcmp(sym->name, tp->symbol)) {
+ /* If we have no realname, use symbol for it */
+ if (!tp->realname)
+ tp->realname = tp->symbol;
+ else
+ free(tp->symbol);
+ tp->symbol = strdup(sym->name);
+ if (!tp->symbol)
+ return -ENOMEM;
+ }
+ tp->offset = addr - sym->start;
+ tp->address -= offs;
+
+ return 0;
+}
+
+/*
+ * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
+ * and generate new symbols with suffixes such as .constprop.N or .isra.N
+ * etc. Since those symbols are not recorded in DWARF, we have to find
+ * correct generated symbols from offline ELF binary.
+ * For online kernel or uprobes we don't need this because those are
+ * rebased on _text, or already a section relative address.
+ */
+static int
+post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs, const char *pathname)
+{
+ struct map *map;
+ u64 stext = 0;
+ int i, ret = 0;
+
+ /* Prepare a map for offline binary */
+ map = dso__new_map(pathname);
+ if (!map || get_text_start_address(pathname, &stext, NULL) < 0) {
+ pr_warning("Failed to get ELF symbols for %s\n", pathname);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ntevs; i++) {
+ ret = post_process_probe_trace_point(&tevs[i].point,
+ map, stext);
+ if (ret < 0)
+ break;
+ }
+ map__put(map);
+
+ return ret;
+}
+
+static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs, const char *exec,
+ struct nsinfo *nsi)
+{
+ int i, ret = 0;
+ u64 stext = 0;
+
+ if (!exec)
+ return 0;
+
+ ret = get_text_start_address(exec, &stext, nsi);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < ntevs && ret >= 0; i++) {
+ /* point.address is the address of point.symbol + point.offset */
+ tevs[i].point.address -= stext;
+ tevs[i].point.module = strdup(exec);
+ if (!tevs[i].point.module) {
+ ret = -ENOMEM;
+ break;
+ }
+ tevs[i].uprobes = true;
+ }
+
+ return ret;
+}
+
+static int
+post_process_module_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs, const char *module,
+ struct debuginfo *dinfo)
+{
+ Dwarf_Addr text_offs = 0;
+ int i, ret = 0;
+ char *mod_name = NULL;
+ struct map *map;
+
+ if (!module)
+ return 0;
+
+ map = get_target_map(module, NULL, false);
+ if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) {
+ pr_warning("Failed to get ELF symbols for %s\n", module);
+ return -EINVAL;
+ }
+
+ mod_name = find_module_name(module);
+ for (i = 0; i < ntevs; i++) {
+ ret = post_process_probe_trace_point(&tevs[i].point,
+ map, text_offs);
+ if (ret < 0)
+ break;
+ tevs[i].point.module =
+ strdup(mod_name ? mod_name : module);
+ if (!tevs[i].point.module) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ free(mod_name);
+ map__put(map);
+
+ return ret;
+}
+
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs)
+{
+ struct ref_reloc_sym *reloc_sym;
+ struct map *map;
+ char *tmp;
+ int i, skipped = 0;
+
+ /* Skip post process if the target is an offline kernel */
+ if (symbol_conf.ignore_vmlinux_buildid)
+ return post_process_offline_probe_trace_events(tevs, ntevs,
+ symbol_conf.vmlinux_name);
+
+ reloc_sym = kernel_get_ref_reloc_sym(&map);
+ if (!reloc_sym) {
+ pr_warning("Relocated base symbol is not found! "
+ "Check /proc/sys/kernel/kptr_restrict\n"
+ "and /proc/sys/kernel/perf_event_paranoid. "
+ "Or run as privileged perf user.\n\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ntevs; i++) {
+ if (!tevs[i].point.address)
+ continue;
+ if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
+ continue;
+ /*
+ * If we found a wrong one, mark it by NULL symbol.
+ * Since addresses in debuginfo is same as objdump, we need
+ * to convert it to addresses on memory.
+ */
+ if (kprobe_warn_out_range(tevs[i].point.symbol,
+ map__objdump_2mem(map, tevs[i].point.address))) {
+ tmp = NULL;
+ skipped++;
+ } else {
+ tmp = strdup(reloc_sym->name);
+ if (!tmp)
+ return -ENOMEM;
+ }
+ /* If we have no realname, use symbol for it */
+ if (!tevs[i].point.realname)
+ tevs[i].point.realname = tevs[i].point.symbol;
+ else
+ free(tevs[i].point.symbol);
+ tevs[i].point.symbol = tmp;
+ tevs[i].point.offset = tevs[i].point.address -
+ (map__reloc(map) ? reloc_sym->unrelocated_addr :
+ reloc_sym->addr);
+ }
+ return skipped;
+}
+
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+ int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event *tevs,
+ int ntevs, const char *module,
+ bool uprobe, struct debuginfo *dinfo)
+{
+ int ret;
+
+ if (uprobe)
+ ret = add_exec_to_probe_trace_events(tevs, ntevs, module,
+ pev->nsi);
+ else if (module)
+ /* Currently ref_reloc_sym based probe is not for drivers */
+ ret = post_process_module_probe_trace_events(tevs, ntevs,
+ module, dinfo);
+ else
+ ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+ if (ret >= 0)
+ arch__post_process_probe_trace_events(pev, ntevs);
+
+ return ret;
+}
+
+/* Try to find perf_probe_event with debuginfo */
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ bool need_dwarf = perf_probe_event_need_dwarf(pev);
+ struct perf_probe_point tmp;
+ struct debuginfo *dinfo;
+ int ntevs, ret = 0;
+
+ /* Workaround for gcc #98776 issue.
+ * Perf failed to add kretprobe event with debuginfo of vmlinux which is
+ * compiled by gcc with -fpatchable-function-entry option enabled. The
+ * same issue with kernel module. The retprobe doesn`t need debuginfo.
+ * This workaround solution use map to query the probe function address
+ * for retprobe event.
+ */
+ if (pev->point.retprobe)
+ return 0;
+
+ dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf);
+ if (!dinfo) {
+ if (need_dwarf)
+ return -ENODATA;
+ pr_debug("Could not open debuginfo. Try to use symbols.\n");
+ return 0;
+ }
+
+ pr_debug("Try to find probe point from debuginfo.\n");
+ /* Searching trace events corresponding to a probe event */
+ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+
+ if (ntevs == 0) { /* Not found, retry with an alternative */
+ ret = get_alternative_probe_event(dinfo, pev, &tmp);
+ if (!ret) {
+ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+ /*
+ * Write back to the original probe_event for
+ * setting appropriate (user given) event name
+ */
+ clear_perf_probe_point(&pev->point);
+ memcpy(&pev->point, &tmp, sizeof(tmp));
+ }
+ }
+
+ if (ntevs > 0) { /* Succeeded to find trace events */
+ pr_debug("Found %d probe_trace_events.\n", ntevs);
+ ret = post_process_probe_trace_events(pev, *tevs, ntevs,
+ pev->target, pev->uprobes, dinfo);
+ if (ret < 0 || ret == ntevs) {
+ pr_debug("Post processing failed or all events are skipped. (%d)\n", ret);
+ clear_probe_trace_events(*tevs, ntevs);
+ zfree(tevs);
+ ntevs = 0;
+ }
+ }
+
+ debuginfo__delete(dinfo);
+
+ if (ntevs == 0) { /* No error but failed to find probe point. */
+ char *probe_point = synthesize_perf_probe_point(&pev->point);
+ pr_warning("Probe point '%s' not found.\n", probe_point);
+ free(probe_point);
+ return -ENODEV;
+ } else if (ntevs < 0) {
+ /* Error path : ntevs < 0 */
+ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
+ if (ntevs == -EBADF)
+ pr_warning("Warning: No dwarf info found in the vmlinux - "
+ "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
+ if (!need_dwarf) {
+ pr_debug("Trying to use symbols.\n");
+ return 0;
+ }
+ }
+ return ntevs;
+}
+
+#define LINEBUF_SIZE 256
+#define NR_ADDITIONAL_LINES 2
+
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+ char buf[LINEBUF_SIZE], sbuf[STRERR_BUFSIZE];
+ const char *color = show_num ? "" : PERF_COLOR_BLUE;
+ const char *prefix = NULL;
+
+ do {
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (skip)
+ continue;
+ if (!prefix) {
+ prefix = show_num ? "%7d " : " ";
+ color_fprintf(stdout, color, prefix, l);
+ }
+ color_fprintf(stdout, color, "%s", buf);
+
+ } while (strchr(buf, '\n') == NULL);
+
+ return 1;
+error:
+ if (ferror(fp)) {
+ pr_warning("File read error: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -1;
+ }
+ return 0;
+}
+
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+ int rv = __show_one_line(fp, l, skip, show_num);
+ if (rv == 0) {
+ pr_warning("Source file is shorter than expected.\n");
+ rv = -1;
+ }
+ return rv;
+}
+
+#define show_one_line_with_num(f,l) _show_one_line(f,l,false,true)
+#define show_one_line(f,l) _show_one_line(f,l,false,false)
+#define skip_one_line(f,l) _show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l) __show_one_line(f,l,false,false)
+
+/*
+ * Show line-range always requires debuginfo to find source file and
+ * line number.
+ */
+static int __show_line_range(struct line_range *lr, const char *module,
+ bool user)
+{
+ struct build_id bid;
+ int l = 1;
+ struct int_node *ln;
+ struct debuginfo *dinfo;
+ FILE *fp;
+ int ret;
+ char *tmp;
+ char sbuf[STRERR_BUFSIZE];
+ char sbuild_id[SBUILD_ID_SIZE] = "";
+
+ /* Search a line range */
+ dinfo = open_debuginfo(module, NULL, false);
+ if (!dinfo)
+ return -ENOENT;
+
+ ret = debuginfo__find_line_range(dinfo, lr);
+ if (!ret) { /* Not found, retry with an alternative */
+ ret = get_alternative_line_range(dinfo, lr, module, user);
+ if (!ret)
+ ret = debuginfo__find_line_range(dinfo, lr);
+ }
+ if (dinfo->build_id) {
+ build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE);
+ build_id__sprintf(&bid, sbuild_id);
+ }
+ debuginfo__delete(dinfo);
+ if (ret == 0 || ret == -ENOENT) {
+ pr_warning("Specified source line is not found.\n");
+ return -ENOENT;
+ } else if (ret < 0) {
+ pr_warning("Debuginfo analysis failed.\n");
+ return ret;
+ }
+
+ /* Convert source file path */
+ tmp = lr->path;
+ ret = find_source_path(tmp, sbuild_id, lr->comp_dir, &lr->path);
+
+ /* Free old path when new path is assigned */
+ if (tmp != lr->path)
+ free(tmp);
+
+ if (ret < 0) {
+ pr_warning("Failed to find source file path.\n");
+ return ret;
+ }
+
+ setup_pager();
+
+ if (lr->function)
+ fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path,
+ lr->start - lr->offset);
+ else
+ fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
+
+ fp = fopen(lr->path, "r");
+ if (fp == NULL) {
+ pr_warning("Failed to open %s: %s\n", lr->path,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -errno;
+ }
+ /* Skip to starting line number */
+ while (l < lr->start) {
+ ret = skip_one_line(fp, l++);
+ if (ret < 0)
+ goto end;
+ }
+
+ intlist__for_each_entry(ln, lr->line_list) {
+ for (; ln->i > (unsigned long)l; l++) {
+ ret = show_one_line(fp, l - lr->offset);
+ if (ret < 0)
+ goto end;
+ }
+ ret = show_one_line_with_num(fp, l++ - lr->offset);
+ if (ret < 0)
+ goto end;
+ }
+
+ if (lr->end == INT_MAX)
+ lr->end = l + NR_ADDITIONAL_LINES;
+ while (l <= lr->end) {
+ ret = show_one_line_or_eof(fp, l++ - lr->offset);
+ if (ret <= 0)
+ break;
+ }
+end:
+ fclose(fp);
+ return ret;
+}
+
+int show_line_range(struct line_range *lr, const char *module,
+ struct nsinfo *nsi, bool user)
+{
+ int ret;
+ struct nscookie nsc;
+
+ ret = init_probe_symbol_maps(user);
+ if (ret < 0)
+ return ret;
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = __show_line_range(lr, module, user);
+ nsinfo__mountns_exit(&nsc);
+ exit_probe_symbol_maps();
+
+ return ret;
+}
+
+static int show_available_vars_at(struct debuginfo *dinfo,
+ struct perf_probe_event *pev,
+ struct strfilter *_filter)
+{
+ char *buf;
+ int ret, i, nvars;
+ struct str_node *node;
+ struct variable_list *vls = NULL, *vl;
+ struct perf_probe_point tmp;
+ const char *var;
+
+ buf = synthesize_perf_probe_point(&pev->point);
+ if (!buf)
+ return -EINVAL;
+ pr_debug("Searching variables at %s\n", buf);
+
+ ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
+ if (!ret) { /* Not found, retry with an alternative */
+ ret = get_alternative_probe_event(dinfo, pev, &tmp);
+ if (!ret) {
+ ret = debuginfo__find_available_vars_at(dinfo, pev,
+ &vls);
+ /* Release the old probe_point */
+ clear_perf_probe_point(&tmp);
+ }
+ }
+ if (ret <= 0) {
+ if (ret == 0 || ret == -ENOENT) {
+ pr_err("Failed to find the address of %s\n", buf);
+ ret = -ENOENT;
+ } else
+ pr_warning("Debuginfo analysis failed.\n");
+ goto end;
+ }
+
+ /* Some variables are found */
+ fprintf(stdout, "Available variables at %s\n", buf);
+ for (i = 0; i < ret; i++) {
+ vl = &vls[i];
+ /*
+ * A probe point might be converted to
+ * several trace points.
+ */
+ fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+ vl->point.offset);
+ zfree(&vl->point.symbol);
+ nvars = 0;
+ if (vl->vars) {
+ strlist__for_each_entry(node, vl->vars) {
+ var = strchr(node->s, '\t') + 1;
+ if (strfilter__compare(_filter, var)) {
+ fprintf(stdout, "\t\t%s\n", node->s);
+ nvars++;
+ }
+ }
+ strlist__delete(vl->vars);
+ }
+ if (nvars == 0)
+ fprintf(stdout, "\t\t(No matched variables)\n");
+ }
+ free(vls);
+end:
+ free(buf);
+ return ret;
+}
+
+/* Show available variables on given probe point */
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+ struct strfilter *_filter)
+{
+ int i, ret = 0;
+ struct debuginfo *dinfo;
+
+ ret = init_probe_symbol_maps(pevs->uprobes);
+ if (ret < 0)
+ return ret;
+
+ dinfo = open_debuginfo(pevs->target, pevs->nsi, false);
+ if (!dinfo) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ setup_pager();
+
+ for (i = 0; i < npevs && ret >= 0; i++)
+ ret = show_available_vars_at(dinfo, &pevs[i], _filter);
+
+ debuginfo__delete(dinfo);
+out:
+ exit_probe_symbol_maps();
+ return ret;
+}
+
+#else /* !HAVE_DWARF_SUPPORT */
+
+static void debuginfo_cache__exit(void)
+{
+}
+
+static int
+find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
+ struct perf_probe_point *pp __maybe_unused,
+ bool is_kprobe __maybe_unused)
+{
+ return -ENOSYS;
+}
+
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs __maybe_unused)
+{
+ if (perf_probe_event_need_dwarf(pev)) {
+ pr_warning("Debuginfo-analysis is not supported.\n");
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+int show_line_range(struct line_range *lr __maybe_unused,
+ const char *module __maybe_unused,
+ struct nsinfo *nsi __maybe_unused,
+ bool user __maybe_unused)
+{
+ pr_warning("Debuginfo-analysis is not supported.\n");
+ return -ENOSYS;
+}
+
+int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
+ int npevs __maybe_unused,
+ struct strfilter *filter __maybe_unused)
+{
+ pr_warning("Debuginfo-analysis is not supported.\n");
+ return -ENOSYS;
+}
+#endif
+
+void line_range__clear(struct line_range *lr)
+{
+ zfree(&lr->function);
+ zfree(&lr->file);
+ zfree(&lr->path);
+ zfree(&lr->comp_dir);
+ intlist__delete(lr->line_list);
+}
+
+int line_range__init(struct line_range *lr)
+{
+ memset(lr, 0, sizeof(*lr));
+ lr->line_list = intlist__new(NULL);
+ if (!lr->line_list)
+ return -ENOMEM;
+ else
+ return 0;
+}
+
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+ const char *start = *ptr;
+
+ errno = 0;
+ *val = strtol(*ptr, ptr, 0);
+ if (errno || *ptr == start) {
+ semantic_error("'%s' is not a valid number.\n", what);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Check the name is good for event, group or function */
+static bool is_c_func_name(const char *name)
+{
+ if (!isalpha(*name) && *name != '_')
+ return false;
+ while (*++name != '\0') {
+ if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ * SRC[:SLN[+NUM|-ELN]]
+ * FNC[@SRC][:SLN[+NUM|-ELN]]
+ */
+int parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+ char *range, *file, *name = strdup(arg);
+ int err;
+
+ if (!name)
+ return -ENOMEM;
+
+ lr->start = 0;
+ lr->end = INT_MAX;
+
+ range = strchr(name, ':');
+ if (range) {
+ *range++ = '\0';
+
+ err = parse_line_num(&range, &lr->start, "start line");
+ if (err)
+ goto err;
+
+ if (*range == '+' || *range == '-') {
+ const char c = *range++;
+
+ err = parse_line_num(&range, &lr->end, "end line");
+ if (err)
+ goto err;
+
+ if (c == '+') {
+ lr->end += lr->start;
+ /*
+ * Adjust the number of lines here.
+ * If the number of lines == 1, the
+ * end of line should be equal to
+ * the start of line.
+ */
+ lr->end--;
+ }
+ }
+
+ pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+ err = -EINVAL;
+ if (lr->start > lr->end) {
+ semantic_error("Start line must be smaller"
+ " than end line.\n");
+ goto err;
+ }
+ if (*range != '\0') {
+ semantic_error("Tailing with invalid str '%s'.\n", range);
+ goto err;
+ }
+ }
+
+ file = strchr(name, '@');
+ if (file) {
+ *file = '\0';
+ lr->file = strdup(++file);
+ if (lr->file == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ lr->function = name;
+ } else if (strchr(name, '/') || strchr(name, '.'))
+ lr->file = name;
+ else if (is_c_func_name(name))/* We reuse it for checking funcname */
+ lr->function = name;
+ else { /* Invalid name */
+ semantic_error("'%s' is not a valid function name.\n", name);
+ err = -EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ free(name);
+ return err;
+}
+
+static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+{
+ char *ptr;
+
+ ptr = strpbrk_esc(*arg, ":");
+ if (ptr) {
+ *ptr = '\0';
+ if (!pev->sdt && !is_c_func_name(*arg))
+ goto ng_name;
+ pev->group = strdup_esc(*arg);
+ if (!pev->group)
+ return -ENOMEM;
+ *arg = ptr + 1;
+ } else
+ pev->group = NULL;
+
+ pev->event = strdup_esc(*arg);
+ if (pev->event == NULL)
+ return -ENOMEM;
+
+ if (!pev->sdt && !is_c_func_name(pev->event)) {
+ zfree(&pev->event);
+ng_name:
+ zfree(&pev->group);
+ semantic_error("%s is bad for event name -it must "
+ "follow C symbol-naming rule.\n", *arg);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Parse probepoint definition. */
+static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
+{
+ struct perf_probe_point *pp = &pev->point;
+ char *ptr, *tmp;
+ char c, nc = 0;
+ bool file_spec = false;
+ int ret;
+
+ /*
+ * <Syntax>
+ * perf probe [GRP:][EVENT=]SRC[:LN|;PTN]
+ * perf probe [GRP:][EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT]
+ * perf probe %[GRP:]SDT_EVENT
+ */
+ if (!arg)
+ return -EINVAL;
+
+ if (is_sdt_event(arg)) {
+ pev->sdt = true;
+ if (arg[0] == '%')
+ arg++;
+ }
+
+ ptr = strpbrk_esc(arg, ";=@+%");
+ if (pev->sdt) {
+ if (ptr) {
+ if (*ptr != '@') {
+ semantic_error("%s must be an SDT name.\n",
+ arg);
+ return -EINVAL;
+ }
+ /* This must be a target file name or build id */
+ tmp = build_id_cache__complement(ptr + 1);
+ if (tmp) {
+ pev->target = build_id_cache__origname(tmp);
+ free(tmp);
+ } else
+ pev->target = strdup_esc(ptr + 1);
+ if (!pev->target)
+ return -ENOMEM;
+ *ptr = '\0';
+ }
+ ret = parse_perf_probe_event_name(&arg, pev);
+ if (ret == 0) {
+ if (asprintf(&pev->point.function, "%%%s", pev->event) < 0)
+ ret = -errno;
+ }
+ return ret;
+ }
+
+ if (ptr && *ptr == '=') { /* Event name */
+ *ptr = '\0';
+ tmp = ptr + 1;
+ ret = parse_perf_probe_event_name(&arg, pev);
+ if (ret < 0)
+ return ret;
+
+ arg = tmp;
+ }
+
+ /*
+ * Check arg is function or file name and copy it.
+ *
+ * We consider arg to be a file spec if and only if it satisfies
+ * all of the below criteria::
+ * - it does not include any of "+@%",
+ * - it includes one of ":;", and
+ * - it has a period '.' in the name.
+ *
+ * Otherwise, we consider arg to be a function specification.
+ */
+ if (!strpbrk_esc(arg, "+@%")) {
+ ptr = strpbrk_esc(arg, ";:");
+ /* This is a file spec if it includes a '.' before ; or : */
+ if (ptr && memchr(arg, '.', ptr - arg))
+ file_spec = true;
+ }
+
+ ptr = strpbrk_esc(arg, ";:+@%");
+ if (ptr) {
+ nc = *ptr;
+ *ptr++ = '\0';
+ }
+
+ if (arg[0] == '\0')
+ tmp = NULL;
+ else {
+ tmp = strdup_esc(arg);
+ if (tmp == NULL)
+ return -ENOMEM;
+ }
+
+ if (file_spec)
+ pp->file = tmp;
+ else {
+ pp->function = tmp;
+
+ /*
+ * Keep pp->function even if this is absolute address,
+ * so it can mark whether abs_address is valid.
+ * Which make 'perf probe lib.bin 0x0' possible.
+ *
+ * Note that checking length of tmp is not needed
+ * because when we access tmp[1] we know tmp[0] is '0',
+ * so tmp[1] should always valid (but could be '\0').
+ */
+ if (tmp && !strncmp(tmp, "0x", 2)) {
+ pp->abs_address = strtoull(pp->function, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("Invalid absolute address.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* Parse other options */
+ while (ptr) {
+ arg = ptr;
+ c = nc;
+ if (c == ';') { /* Lazy pattern must be the last part */
+ pp->lazy_line = strdup(arg); /* let leave escapes */
+ if (pp->lazy_line == NULL)
+ return -ENOMEM;
+ break;
+ }
+ ptr = strpbrk_esc(arg, ";:+@%");
+ if (ptr) {
+ nc = *ptr;
+ *ptr++ = '\0';
+ }
+ switch (c) {
+ case ':': /* Line number */
+ pp->line = strtoul(arg, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("There is non-digit char"
+ " in line number.\n");
+ return -EINVAL;
+ }
+ break;
+ case '+': /* Byte offset from a symbol */
+ pp->offset = strtoul(arg, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("There is non-digit character"
+ " in offset.\n");
+ return -EINVAL;
+ }
+ break;
+ case '@': /* File name */
+ if (pp->file) {
+ semantic_error("SRC@SRC is not allowed.\n");
+ return -EINVAL;
+ }
+ pp->file = strdup_esc(arg);
+ if (pp->file == NULL)
+ return -ENOMEM;
+ break;
+ case '%': /* Probe places */
+ if (strcmp(arg, "return") == 0) {
+ pp->retprobe = 1;
+ } else { /* Others not supported yet */
+ semantic_error("%%%s is not supported.\n", arg);
+ return -ENOTSUP;
+ }
+ break;
+ default: /* Buggy case */
+ pr_err("This program has a bug at %s:%d.\n",
+ __FILE__, __LINE__);
+ return -ENOTSUP;
+ break;
+ }
+ }
+
+ /* Exclusion check */
+ if (pp->lazy_line && pp->line) {
+ semantic_error("Lazy pattern can't be used with"
+ " line number.\n");
+ return -EINVAL;
+ }
+
+ if (pp->lazy_line && pp->offset) {
+ semantic_error("Lazy pattern can't be used with offset.\n");
+ return -EINVAL;
+ }
+
+ if (pp->line && pp->offset) {
+ semantic_error("Offset can't be used with line number.\n");
+ return -EINVAL;
+ }
+
+ if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
+ semantic_error("File always requires line number or "
+ "lazy pattern.\n");
+ return -EINVAL;
+ }
+
+ if (pp->offset && !pp->function) {
+ semantic_error("Offset requires an entry function.\n");
+ return -EINVAL;
+ }
+
+ if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
+ semantic_error("Offset/Line/Lazy pattern can't be used with "
+ "return probe.\n");
+ return -EINVAL;
+ }
+
+ pr_debug("symbol:%s file:%s line:%d offset:%lu return:%d lazy:%s\n",
+ pp->function, pp->file, pp->line, pp->offset, pp->retprobe,
+ pp->lazy_line);
+ return 0;
+}
+
+/* Parse perf-probe event argument */
+static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
+{
+ char *tmp, *goodname;
+ struct perf_probe_arg_field **fieldp;
+
+ pr_debug("parsing arg: %s into ", str);
+
+ tmp = strchr(str, '=');
+ if (tmp) {
+ arg->name = strndup(str, tmp - str);
+ if (arg->name == NULL)
+ return -ENOMEM;
+ pr_debug("name:%s ", arg->name);
+ str = tmp + 1;
+ }
+
+ tmp = strchr(str, '@');
+ if (tmp && tmp != str && !strcmp(tmp + 1, "user")) { /* user attr */
+ if (!user_access_is_supported()) {
+ semantic_error("ftrace does not support user access\n");
+ return -EINVAL;
+ }
+ *tmp = '\0';
+ arg->user_access = true;
+ pr_debug("user_access ");
+ }
+
+ tmp = strchr(str, ':');
+ if (tmp) { /* Type setting */
+ *tmp = '\0';
+ arg->type = strdup(tmp + 1);
+ if (arg->type == NULL)
+ return -ENOMEM;
+ pr_debug("type:%s ", arg->type);
+ }
+
+ tmp = strpbrk(str, "-.[");
+ if (!is_c_varname(str) || !tmp) {
+ /* A variable, register, symbol or special value */
+ arg->var = strdup(str);
+ if (arg->var == NULL)
+ return -ENOMEM;
+ pr_debug("%s\n", arg->var);
+ return 0;
+ }
+
+ /* Structure fields or array element */
+ arg->var = strndup(str, tmp - str);
+ if (arg->var == NULL)
+ return -ENOMEM;
+ goodname = arg->var;
+ pr_debug("%s, ", arg->var);
+ fieldp = &arg->field;
+
+ do {
+ *fieldp = zalloc(sizeof(struct perf_probe_arg_field));
+ if (*fieldp == NULL)
+ return -ENOMEM;
+ if (*tmp == '[') { /* Array */
+ str = tmp;
+ (*fieldp)->index = strtol(str + 1, &tmp, 0);
+ (*fieldp)->ref = true;
+ if (*tmp != ']' || tmp == str + 1) {
+ semantic_error("Array index must be a"
+ " number.\n");
+ return -EINVAL;
+ }
+ tmp++;
+ if (*tmp == '\0')
+ tmp = NULL;
+ } else { /* Structure */
+ if (*tmp == '.') {
+ str = tmp + 1;
+ (*fieldp)->ref = false;
+ } else if (tmp[1] == '>') {
+ str = tmp + 2;
+ (*fieldp)->ref = true;
+ } else {
+ semantic_error("Argument parse error: %s\n",
+ str);
+ return -EINVAL;
+ }
+ tmp = strpbrk(str, "-.[");
+ }
+ if (tmp) {
+ (*fieldp)->name = strndup(str, tmp - str);
+ if ((*fieldp)->name == NULL)
+ return -ENOMEM;
+ if (*str != '[')
+ goodname = (*fieldp)->name;
+ pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref);
+ fieldp = &(*fieldp)->next;
+ }
+ } while (tmp);
+ (*fieldp)->name = strdup(str);
+ if ((*fieldp)->name == NULL)
+ return -ENOMEM;
+ if (*str != '[')
+ goodname = (*fieldp)->name;
+ pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref);
+
+ /* If no name is specified, set the last field name (not array index)*/
+ if (!arg->name) {
+ arg->name = strdup(goodname);
+ if (arg->name == NULL)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/* Parse perf-probe event command */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev)
+{
+ char **argv;
+ int argc, i, ret = 0;
+
+ argv = argv_split(cmd, &argc);
+ if (!argv) {
+ pr_debug("Failed to split arguments.\n");
+ return -ENOMEM;
+ }
+ if (argc - 1 > MAX_PROBE_ARGS) {
+ semantic_error("Too many probe arguments (%d).\n", argc - 1);
+ ret = -ERANGE;
+ goto out;
+ }
+ /* Parse probe point */
+ ret = parse_perf_probe_point(argv[0], pev);
+ if (ret < 0)
+ goto out;
+
+ /* Generate event name if needed */
+ if (!pev->event && pev->point.function && pev->point.line
+ && !pev->point.lazy_line && !pev->point.offset) {
+ if (asprintf(&pev->event, "%s_L%d", pev->point.function,
+ pev->point.line) < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* Copy arguments and ensure return probe has no C argument */
+ pev->nargs = argc - 1;
+ pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+ if (pev->args == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < pev->nargs && ret >= 0; i++) {
+ ret = parse_perf_probe_arg(argv[i + 1], &pev->args[i]);
+ if (ret >= 0 &&
+ is_c_varname(pev->args[i].var) && pev->point.retprobe) {
+ semantic_error("You can't specify local variable for"
+ " kretprobe.\n");
+ ret = -EINVAL;
+ }
+ }
+out:
+ argv_free(argv);
+
+ return ret;
+}
+
+/* Returns true if *any* ARG is either C variable, $params or $vars. */
+bool perf_probe_with_var(struct perf_probe_event *pev)
+{
+ int i = 0;
+
+ for (i = 0; i < pev->nargs; i++)
+ if (is_c_varname(pev->args[i].var) ||
+ !strcmp(pev->args[i].var, PROBE_ARG_PARAMS) ||
+ !strcmp(pev->args[i].var, PROBE_ARG_VARS))
+ return true;
+ return false;
+}
+
+/* Return true if this perf_probe_event requires debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
+{
+ if (pev->point.file || pev->point.line || pev->point.lazy_line)
+ return true;
+
+ if (perf_probe_with_var(pev))
+ return true;
+
+ return false;
+}
+
+/* Parse probe_events event into struct probe_point */
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
+{
+ struct probe_trace_point *tp = &tev->point;
+ char pr;
+ char *p;
+ char *argv0_str = NULL, *fmt, *fmt1_str, *fmt2_str, *fmt3_str;
+ int ret, i, argc;
+ char **argv;
+
+ pr_debug("Parsing probe_events: %s\n", cmd);
+ argv = argv_split(cmd, &argc);
+ if (!argv) {
+ pr_debug("Failed to split arguments.\n");
+ return -ENOMEM;
+ }
+ if (argc < 2) {
+ semantic_error("Too few probe arguments.\n");
+ ret = -ERANGE;
+ goto out;
+ }
+
+ /* Scan event and group name. */
+ argv0_str = strdup(argv[0]);
+ if (argv0_str == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fmt1_str = strtok_r(argv0_str, ":", &fmt);
+ fmt2_str = strtok_r(NULL, "/", &fmt);
+ fmt3_str = strtok_r(NULL, " \t", &fmt);
+ if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) {
+ semantic_error("Failed to parse event name: %s\n", argv[0]);
+ ret = -EINVAL;
+ goto out;
+ }
+ pr = fmt1_str[0];
+ tev->group = strdup(fmt2_str);
+ tev->event = strdup(fmt3_str);
+ if (tev->group == NULL || tev->event == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr);
+
+ tp->retprobe = (pr == 'r');
+
+ /* Scan module name(if there), function name and offset */
+ p = strchr(argv[1], ':');
+ if (p) {
+ tp->module = strndup(argv[1], p - argv[1]);
+ if (!tp->module) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ tev->uprobes = (tp->module[0] == '/');
+ p++;
+ } else
+ p = argv[1];
+ fmt1_str = strtok_r(p, "+", &fmt);
+ /* only the address started with 0x */
+ if (fmt1_str[0] == '0') {
+ /*
+ * Fix a special case:
+ * if address == 0, kernel reports something like:
+ * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax
+ * Newer kernel may fix that, but we want to
+ * support old kernel also.
+ */
+ if (strcmp(fmt1_str, "0x") == 0) {
+ if (!argv[2] || strcmp(argv[2], "(null)")) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tp->address = 0;
+
+ free(argv[2]);
+ for (i = 2; argv[i + 1] != NULL; i++)
+ argv[i] = argv[i + 1];
+
+ argv[i] = NULL;
+ argc -= 1;
+ } else
+ tp->address = strtoull(fmt1_str, NULL, 0);
+ } else {
+ /* Only the symbol-based probe has offset */
+ tp->symbol = strdup(fmt1_str);
+ if (tp->symbol == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fmt2_str = strtok_r(NULL, "", &fmt);
+ if (fmt2_str == NULL)
+ tp->offset = 0;
+ else
+ tp->offset = strtoul(fmt2_str, NULL, 10);
+ }
+
+ if (tev->uprobes) {
+ fmt2_str = strchr(p, '(');
+ if (fmt2_str)
+ tp->ref_ctr_offset = strtoul(fmt2_str + 1, NULL, 0);
+ }
+
+ tev->nargs = argc - 2;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (tev->args == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < tev->nargs; i++) {
+ p = strchr(argv[i + 2], '=');
+ if (p) /* We don't need which register is assigned. */
+ *p++ = '\0';
+ else
+ p = argv[i + 2];
+ tev->args[i].name = strdup(argv[i + 2]);
+ /* TODO: parse regs and offset */
+ tev->args[i].value = strdup(p);
+ if (tev->args[i].name == NULL || tev->args[i].value == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ free(argv0_str);
+ argv_free(argv);
+ return ret;
+}
+
+/* Compose only probe arg */
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa)
+{
+ struct perf_probe_arg_field *field = pa->field;
+ struct strbuf buf;
+ char *ret = NULL;
+ int err;
+
+ if (strbuf_init(&buf, 64) < 0)
+ return NULL;
+
+ if (pa->name && pa->var)
+ err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var);
+ else
+ err = strbuf_addstr(&buf, pa->name ?: pa->var);
+ if (err)
+ goto out;
+
+ while (field) {
+ if (field->name[0] == '[')
+ err = strbuf_addstr(&buf, field->name);
+ else
+ err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".",
+ field->name);
+ field = field->next;
+ if (err)
+ goto out;
+ }
+
+ if (pa->type)
+ if (strbuf_addf(&buf, ":%s", pa->type) < 0)
+ goto out;
+
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
+}
+
+/* Compose only probe point (not argument) */
+static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
+{
+ struct strbuf buf;
+ char *tmp, *ret = NULL;
+ int len, err = 0;
+
+ if (strbuf_init(&buf, 64) < 0)
+ return NULL;
+
+ if (pp->function) {
+ if (strbuf_addstr(&buf, pp->function) < 0)
+ goto out;
+ if (pp->offset)
+ err = strbuf_addf(&buf, "+%lu", pp->offset);
+ else if (pp->line)
+ err = strbuf_addf(&buf, ":%d", pp->line);
+ else if (pp->retprobe)
+ err = strbuf_addstr(&buf, "%return");
+ if (err)
+ goto out;
+ }
+ if (pp->file) {
+ tmp = pp->file;
+ len = strlen(tmp);
+ if (len > 30) {
+ tmp = strchr(pp->file + len - 30, '/');
+ tmp = tmp ? tmp + 1 : pp->file + len - 30;
+ }
+ err = strbuf_addf(&buf, "@%s", tmp);
+ if (!err && !pp->function && pp->line)
+ err = strbuf_addf(&buf, ":%d", pp->line);
+ }
+ if (!err)
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
+}
+
+char *synthesize_perf_probe_command(struct perf_probe_event *pev)
+{
+ struct strbuf buf;
+ char *tmp, *ret = NULL;
+ int i;
+
+ if (strbuf_init(&buf, 64))
+ return NULL;
+ if (pev->event)
+ if (strbuf_addf(&buf, "%s:%s=", pev->group ?: PERFPROBE_GROUP,
+ pev->event) < 0)
+ goto out;
+
+ tmp = synthesize_perf_probe_point(&pev->point);
+ if (!tmp || strbuf_addstr(&buf, tmp) < 0) {
+ free(tmp);
+ goto out;
+ }
+ free(tmp);
+
+ for (i = 0; i < pev->nargs; i++) {
+ tmp = synthesize_perf_probe_arg(pev->args + i);
+ if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0) {
+ free(tmp);
+ goto out;
+ }
+ free(tmp);
+ }
+
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
+}
+
+static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
+ struct strbuf *buf, int depth)
+{
+ int err;
+ if (ref->next) {
+ depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
+ depth + 1);
+ if (depth < 0)
+ return depth;
+ }
+ if (ref->user_access)
+ err = strbuf_addf(buf, "%s%ld(", "+u", ref->offset);
+ else
+ err = strbuf_addf(buf, "%+ld(", ref->offset);
+ return (err < 0) ? err : depth;
+}
+
+static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
+ struct strbuf *buf)
+{
+ struct probe_trace_arg_ref *ref = arg->ref;
+ int depth = 0, err;
+
+ /* Argument name or separator */
+ if (arg->name)
+ err = strbuf_addf(buf, " %s=", arg->name);
+ else
+ err = strbuf_addch(buf, ' ');
+ if (err)
+ return err;
+
+ /* Special case: @XXX */
+ if (arg->value[0] == '@' && arg->ref)
+ ref = ref->next;
+
+ /* Dereferencing arguments */
+ if (ref) {
+ depth = __synthesize_probe_trace_arg_ref(ref, buf, 1);
+ if (depth < 0)
+ return depth;
+ }
+
+ /* Print argument value */
+ if (arg->value[0] == '@' && arg->ref)
+ err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset);
+ else
+ err = strbuf_addstr(buf, arg->value);
+
+ /* Closing */
+ while (!err && depth--)
+ err = strbuf_addch(buf, ')');
+
+ /* Print argument type */
+ if (!err && arg->type)
+ err = strbuf_addf(buf, ":%s", arg->type);
+
+ return err;
+}
+
+static int
+synthesize_probe_trace_args(struct probe_trace_event *tev, struct strbuf *buf)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < tev->nargs && ret >= 0; i++)
+ ret = synthesize_probe_trace_arg(&tev->args[i], buf);
+
+ return ret;
+}
+
+static int
+synthesize_uprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf)
+{
+ int err;
+
+ /* Uprobes must have tp->module */
+ if (!tp->module)
+ return -EINVAL;
+ /*
+ * If tp->address == 0, then this point must be a
+ * absolute address uprobe.
+ * try_to_find_absolute_address() should have made
+ * tp->symbol to "0x0".
+ */
+ if (!tp->address && (!tp->symbol || strcmp(tp->symbol, "0x0")))
+ return -EINVAL;
+
+ /* Use the tp->address for uprobes */
+ err = strbuf_addf(buf, "%s:0x%" PRIx64, tp->module, tp->address);
+
+ if (err >= 0 && tp->ref_ctr_offset) {
+ if (!uprobe_ref_ctr_is_supported())
+ return -EINVAL;
+ err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset);
+ }
+ return err >= 0 ? 0 : err;
+}
+
+static int
+synthesize_kprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf)
+{
+ if (!strncmp(tp->symbol, "0x", 2)) {
+ /* Absolute address. See try_to_find_absolute_address() */
+ return strbuf_addf(buf, "%s%s0x%" PRIx64, tp->module ?: "",
+ tp->module ? ":" : "", tp->address);
+ } else {
+ return strbuf_addf(buf, "%s%s%s+%lu", tp->module ?: "",
+ tp->module ? ":" : "", tp->symbol, tp->offset);
+ }
+}
+
+char *synthesize_probe_trace_command(struct probe_trace_event *tev)
+{
+ struct probe_trace_point *tp = &tev->point;
+ struct strbuf buf;
+ char *ret = NULL;
+ int err;
+
+ if (strbuf_init(&buf, 32) < 0)
+ return NULL;
+
+ if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
+ tev->group, tev->event) < 0)
+ goto error;
+
+ if (tev->uprobes)
+ err = synthesize_uprobe_trace_def(tp, &buf);
+ else
+ err = synthesize_kprobe_trace_def(tp, &buf);
+
+ if (err >= 0)
+ err = synthesize_probe_trace_args(tev, &buf);
+
+ if (err >= 0)
+ ret = strbuf_detach(&buf, NULL);
+error:
+ strbuf_release(&buf);
+ return ret;
+}
+
+static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
+ struct perf_probe_point *pp,
+ bool is_kprobe)
+{
+ struct symbol *sym = NULL;
+ struct map *map = NULL;
+ u64 addr = tp->address;
+ int ret = -ENOENT;
+
+ if (!is_kprobe) {
+ map = dso__new_map(tp->module);
+ if (!map)
+ goto out;
+ sym = map__find_symbol(map, addr);
+ } else {
+ if (tp->symbol && !addr) {
+ if (kernel_get_symbol_address_by_name(tp->symbol,
+ &addr, true, false) < 0)
+ goto out;
+ }
+ if (addr) {
+ addr += tp->offset;
+ sym = machine__find_kernel_symbol(host_machine, addr, &map);
+ }
+ }
+
+ if (!sym)
+ goto out;
+
+ pp->retprobe = tp->retprobe;
+ pp->offset = addr - map__unmap_ip(map, sym->start);
+ pp->function = strdup(sym->name);
+ ret = pp->function ? 0 : -ENOMEM;
+
+out:
+ if (map && !is_kprobe) {
+ map__put(map);
+ }
+
+ return ret;
+}
+
+static int convert_to_perf_probe_point(struct probe_trace_point *tp,
+ struct perf_probe_point *pp,
+ bool is_kprobe)
+{
+ char buf[128];
+ int ret;
+
+ ret = find_perf_probe_point_from_dwarf(tp, pp, is_kprobe);
+ if (!ret)
+ return 0;
+ ret = find_perf_probe_point_from_map(tp, pp, is_kprobe);
+ if (!ret)
+ return 0;
+
+ pr_debug("Failed to find probe point from both of dwarf and map.\n");
+
+ if (tp->symbol) {
+ pp->function = strdup(tp->symbol);
+ pp->offset = tp->offset;
+ } else {
+ ret = e_snprintf(buf, 128, "0x%" PRIx64, tp->address);
+ if (ret < 0)
+ return ret;
+ pp->function = strdup(buf);
+ pp->offset = 0;
+ }
+ if (pp->function == NULL)
+ return -ENOMEM;
+
+ pp->retprobe = tp->retprobe;
+
+ return 0;
+}
+
+static int convert_to_perf_probe_event(struct probe_trace_event *tev,
+ struct perf_probe_event *pev, bool is_kprobe)
+{
+ struct strbuf buf = STRBUF_INIT;
+ int i, ret;
+
+ /* Convert event/group name */
+ pev->event = strdup(tev->event);
+ pev->group = strdup(tev->group);
+ if (pev->event == NULL || pev->group == NULL)
+ return -ENOMEM;
+
+ /* Convert trace_point to probe_point */
+ ret = convert_to_perf_probe_point(&tev->point, &pev->point, is_kprobe);
+ if (ret < 0)
+ return ret;
+
+ /* Convert trace_arg to probe_arg */
+ pev->nargs = tev->nargs;
+ pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+ if (pev->args == NULL)
+ return -ENOMEM;
+ for (i = 0; i < tev->nargs && ret >= 0; i++) {
+ if (tev->args[i].name)
+ pev->args[i].name = strdup(tev->args[i].name);
+ else {
+ if ((ret = strbuf_init(&buf, 32)) < 0)
+ goto error;
+ ret = synthesize_probe_trace_arg(&tev->args[i], &buf);
+ pev->args[i].name = strbuf_detach(&buf, NULL);
+ }
+ if (pev->args[i].name == NULL && ret >= 0)
+ ret = -ENOMEM;
+ }
+error:
+ if (ret < 0)
+ clear_perf_probe_event(pev);
+
+ return ret;
+}
+
+void clear_perf_probe_event(struct perf_probe_event *pev)
+{
+ struct perf_probe_arg_field *field, *next;
+ int i;
+
+ zfree(&pev->event);
+ zfree(&pev->group);
+ zfree(&pev->target);
+ clear_perf_probe_point(&pev->point);
+
+ for (i = 0; i < pev->nargs; i++) {
+ zfree(&pev->args[i].name);
+ zfree(&pev->args[i].var);
+ zfree(&pev->args[i].type);
+ field = pev->args[i].field;
+ while (field) {
+ next = field->next;
+ zfree(&field->name);
+ free(field);
+ field = next;
+ }
+ }
+ pev->nargs = 0;
+ zfree(&pev->args);
+}
+
+#define strdup_or_goto(str, label) \
+({ char *__p = NULL; if (str && !(__p = strdup(str))) goto label; __p; })
+
+static int perf_probe_point__copy(struct perf_probe_point *dst,
+ struct perf_probe_point *src)
+{
+ dst->file = strdup_or_goto(src->file, out_err);
+ dst->function = strdup_or_goto(src->function, out_err);
+ dst->lazy_line = strdup_or_goto(src->lazy_line, out_err);
+ dst->line = src->line;
+ dst->retprobe = src->retprobe;
+ dst->offset = src->offset;
+ return 0;
+
+out_err:
+ clear_perf_probe_point(dst);
+ return -ENOMEM;
+}
+
+static int perf_probe_arg__copy(struct perf_probe_arg *dst,
+ struct perf_probe_arg *src)
+{
+ struct perf_probe_arg_field *field, **ppfield;
+
+ dst->name = strdup_or_goto(src->name, out_err);
+ dst->var = strdup_or_goto(src->var, out_err);
+ dst->type = strdup_or_goto(src->type, out_err);
+
+ field = src->field;
+ ppfield = &(dst->field);
+ while (field) {
+ *ppfield = zalloc(sizeof(*field));
+ if (!*ppfield)
+ goto out_err;
+ (*ppfield)->name = strdup_or_goto(field->name, out_err);
+ (*ppfield)->index = field->index;
+ (*ppfield)->ref = field->ref;
+ field = field->next;
+ ppfield = &((*ppfield)->next);
+ }
+ return 0;
+out_err:
+ return -ENOMEM;
+}
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+ struct perf_probe_event *src)
+{
+ int i;
+
+ dst->event = strdup_or_goto(src->event, out_err);
+ dst->group = strdup_or_goto(src->group, out_err);
+ dst->target = strdup_or_goto(src->target, out_err);
+ dst->uprobes = src->uprobes;
+
+ if (perf_probe_point__copy(&dst->point, &src->point) < 0)
+ goto out_err;
+
+ dst->args = zalloc(sizeof(struct perf_probe_arg) * src->nargs);
+ if (!dst->args)
+ goto out_err;
+ dst->nargs = src->nargs;
+
+ for (i = 0; i < src->nargs; i++)
+ if (perf_probe_arg__copy(&dst->args[i], &src->args[i]) < 0)
+ goto out_err;
+ return 0;
+
+out_err:
+ clear_perf_probe_event(dst);
+ return -ENOMEM;
+}
+
+void clear_probe_trace_event(struct probe_trace_event *tev)
+{
+ struct probe_trace_arg_ref *ref, *next;
+ int i;
+
+ zfree(&tev->event);
+ zfree(&tev->group);
+ zfree(&tev->point.symbol);
+ zfree(&tev->point.realname);
+ zfree(&tev->point.module);
+ for (i = 0; i < tev->nargs; i++) {
+ zfree(&tev->args[i].name);
+ zfree(&tev->args[i].value);
+ zfree(&tev->args[i].type);
+ ref = tev->args[i].ref;
+ while (ref) {
+ next = ref->next;
+ free(ref);
+ ref = next;
+ }
+ }
+ zfree(&tev->args);
+ tev->nargs = 0;
+}
+
+struct kprobe_blacklist_node {
+ struct list_head list;
+ u64 start;
+ u64 end;
+ char *symbol;
+};
+
+static void kprobe_blacklist__delete(struct list_head *blacklist)
+{
+ struct kprobe_blacklist_node *node;
+
+ while (!list_empty(blacklist)) {
+ node = list_first_entry(blacklist,
+ struct kprobe_blacklist_node, list);
+ list_del_init(&node->list);
+ zfree(&node->symbol);
+ free(node);
+ }
+}
+
+static int kprobe_blacklist__load(struct list_head *blacklist)
+{
+ struct kprobe_blacklist_node *node;
+ const char *__debugfs = debugfs__mountpoint();
+ char buf[PATH_MAX], *p;
+ FILE *fp;
+ int ret;
+
+ if (__debugfs == NULL)
+ return -ENOTSUP;
+
+ ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(buf, "r");
+ if (!fp)
+ return -errno;
+
+ ret = 0;
+ while (fgets(buf, PATH_MAX, fp)) {
+ node = zalloc(sizeof(*node));
+ if (!node) {
+ ret = -ENOMEM;
+ break;
+ }
+ INIT_LIST_HEAD(&node->list);
+ list_add_tail(&node->list, blacklist);
+ if (sscanf(buf, "0x%" PRIx64 "-0x%" PRIx64, &node->start, &node->end) != 2) {
+ ret = -EINVAL;
+ break;
+ }
+ p = strchr(buf, '\t');
+ if (p) {
+ p++;
+ if (p[strlen(p) - 1] == '\n')
+ p[strlen(p) - 1] = '\0';
+ } else
+ p = (char *)"unknown";
+ node->symbol = strdup(p);
+ if (!node->symbol) {
+ ret = -ENOMEM;
+ break;
+ }
+ pr_debug2("Blacklist: 0x%" PRIx64 "-0x%" PRIx64 ", %s\n",
+ node->start, node->end, node->symbol);
+ ret++;
+ }
+ if (ret < 0)
+ kprobe_blacklist__delete(blacklist);
+ fclose(fp);
+
+ return ret;
+}
+
+static struct kprobe_blacklist_node *
+kprobe_blacklist__find_by_address(struct list_head *blacklist, u64 address)
+{
+ struct kprobe_blacklist_node *node;
+
+ list_for_each_entry(node, blacklist, list) {
+ if (node->start <= address && address < node->end)
+ return node;
+ }
+
+ return NULL;
+}
+
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+ if (!list_empty(&kprobe_blacklist))
+ return;
+
+ if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+ pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+ kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(u64 address)
+{
+ return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
+static int perf_probe_event__sprintf(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module,
+ struct strbuf *result)
+{
+ int i, ret;
+ char *buf;
+
+ if (asprintf(&buf, "%s:%s", group, event) < 0)
+ return -errno;
+ ret = strbuf_addf(result, " %-20s (on ", buf);
+ free(buf);
+ if (ret)
+ return ret;
+
+ /* Synthesize only event probe point */
+ buf = synthesize_perf_probe_point(&pev->point);
+ if (!buf)
+ return -ENOMEM;
+ ret = strbuf_addstr(result, buf);
+ free(buf);
+
+ if (!ret && module)
+ ret = strbuf_addf(result, " in %s", module);
+
+ if (!ret && pev->nargs > 0) {
+ ret = strbuf_add(result, " with", 5);
+ for (i = 0; !ret && i < pev->nargs; i++) {
+ buf = synthesize_perf_probe_arg(&pev->args[i]);
+ if (!buf)
+ return -ENOMEM;
+ ret = strbuf_addf(result, " %s", buf);
+ free(buf);
+ }
+ }
+ if (!ret)
+ ret = strbuf_addch(result, ')');
+
+ return ret;
+}
+
+/* Show an event */
+int show_perf_probe_event(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module, bool use_stdout)
+{
+ struct strbuf buf = STRBUF_INIT;
+ int ret;
+
+ ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
+ if (ret >= 0) {
+ if (use_stdout)
+ printf("%s\n", buf.buf);
+ else
+ pr_info("%s\n", buf.buf);
+ }
+ strbuf_release(&buf);
+
+ return ret;
+}
+
+static bool filter_probe_trace_event(struct probe_trace_event *tev,
+ struct strfilter *filter)
+{
+ char tmp[128];
+
+ /* At first, check the event name itself */
+ if (strfilter__compare(filter, tev->event))
+ return true;
+
+ /* Next, check the combination of name and group */
+ if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
+ return false;
+ return strfilter__compare(filter, tmp);
+}
+
+static int __show_perf_probe_events(int fd, bool is_kprobe,
+ struct strfilter *filter)
+{
+ int ret = 0;
+ struct probe_trace_event tev;
+ struct perf_probe_event pev;
+ struct strlist *rawlist;
+ struct str_node *ent;
+
+ memset(&tev, 0, sizeof(tev));
+ memset(&pev, 0, sizeof(pev));
+
+ rawlist = probe_file__get_rawlist(fd);
+ if (!rawlist)
+ return -ENOMEM;
+
+ strlist__for_each_entry(ent, rawlist) {
+ ret = parse_probe_trace_command(ent->s, &tev);
+ if (ret >= 0) {
+ if (!filter_probe_trace_event(&tev, filter))
+ goto next;
+ ret = convert_to_perf_probe_event(&tev, &pev,
+ is_kprobe);
+ if (ret < 0)
+ goto next;
+ ret = show_perf_probe_event(pev.group, pev.event,
+ &pev, tev.point.module,
+ true);
+ }
+next:
+ clear_perf_probe_event(&pev);
+ clear_probe_trace_event(&tev);
+ if (ret < 0)
+ break;
+ }
+ strlist__delete(rawlist);
+ /* Cleanup cached debuginfo if needed */
+ debuginfo_cache__exit();
+
+ return ret;
+}
+
+/* List up current perf-probe events */
+int show_perf_probe_events(struct strfilter *filter)
+{
+ int kp_fd, up_fd, ret;
+
+ setup_pager();
+
+ if (probe_conf.cache)
+ return probe_cache__show_all_caches(filter);
+
+ ret = init_probe_symbol_maps(false);
+ if (ret < 0)
+ return ret;
+
+ ret = probe_file__open_both(&kp_fd, &up_fd, 0);
+ if (ret < 0)
+ return ret;
+
+ if (kp_fd >= 0)
+ ret = __show_perf_probe_events(kp_fd, true, filter);
+ if (up_fd >= 0 && ret >= 0)
+ ret = __show_perf_probe_events(up_fd, false, filter);
+ if (kp_fd > 0)
+ close(kp_fd);
+ if (up_fd > 0)
+ close(up_fd);
+ exit_probe_symbol_maps();
+
+ return ret;
+}
+
+static int get_new_event_name(char *buf, size_t len, const char *base,
+ struct strlist *namelist, bool ret_event,
+ bool allow_suffix)
+{
+ int i, ret;
+ char *p, *nbase;
+
+ if (*base == '.')
+ base++;
+ nbase = strdup(base);
+ if (!nbase)
+ return -ENOMEM;
+
+ /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
+ p = strpbrk(nbase, ".@");
+ if (p && p != nbase)
+ *p = '\0';
+
+ /* Try no suffix number */
+ ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
+ if (ret < 0) {
+ pr_debug("snprintf() failed: %d\n", ret);
+ goto out;
+ }
+ if (!strlist__has_entry(namelist, buf))
+ goto out;
+
+ if (!allow_suffix) {
+ pr_warning("Error: event \"%s\" already exists.\n"
+ " Hint: Remove existing event by 'perf probe -d'\n"
+ " or force duplicates by 'perf probe -f'\n"
+ " or set 'force=yes' in BPF source.\n",
+ buf);
+ ret = -EEXIST;
+ goto out;
+ }
+
+ /* Try to add suffix */
+ for (i = 1; i < MAX_EVENT_INDEX; i++) {
+ ret = e_snprintf(buf, len, "%s_%d", nbase, i);
+ if (ret < 0) {
+ pr_debug("snprintf() failed: %d\n", ret);
+ goto out;
+ }
+ if (!strlist__has_entry(namelist, buf))
+ break;
+ }
+ if (i == MAX_EVENT_INDEX) {
+ pr_warning("Too many events are on the same function.\n");
+ ret = -ERANGE;
+ }
+
+out:
+ free(nbase);
+
+ /* Final validation */
+ if (ret >= 0 && !is_c_func_name(buf)) {
+ pr_warning("Internal error: \"%s\" is an invalid event name.\n",
+ buf);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/* Warn if the current kernel's uprobe implementation is old */
+static void warn_uprobe_event_compat(struct probe_trace_event *tev)
+{
+ int i;
+ char *buf = synthesize_probe_trace_command(tev);
+ struct probe_trace_point *tp = &tev->point;
+
+ if (tp->ref_ctr_offset && !uprobe_ref_ctr_is_supported()) {
+ pr_warning("A semaphore is associated with %s:%s and "
+ "seems your kernel doesn't support it.\n",
+ tev->group, tev->event);
+ }
+
+ /* Old uprobe event doesn't support memory dereference */
+ if (!tev->uprobes || tev->nargs == 0 || !buf)
+ goto out;
+
+ for (i = 0; i < tev->nargs; i++) {
+ if (strchr(tev->args[i].value, '@')) {
+ pr_warning("%s accesses a variable by symbol name, but that is not supported for user application probe.\n",
+ tev->args[i].value);
+ break;
+ }
+ if (strglobmatch(tev->args[i].value, "[$+-]*")) {
+ pr_warning("Please upgrade your kernel to at least 3.14 to have access to feature %s\n",
+ tev->args[i].value);
+ break;
+ }
+ }
+out:
+ free(buf);
+}
+
+/* Set new name from original perf_probe_event and namelist */
+static int probe_trace_event__set_name(struct probe_trace_event *tev,
+ struct perf_probe_event *pev,
+ struct strlist *namelist,
+ bool allow_suffix)
+{
+ const char *event, *group;
+ char buf[64];
+ int ret;
+
+ /* If probe_event or trace_event already have the name, reuse it */
+ if (pev->event && !pev->sdt)
+ event = pev->event;
+ else if (tev->event)
+ event = tev->event;
+ else {
+ /* Or generate new one from probe point */
+ if (pev->point.function &&
+ (strncmp(pev->point.function, "0x", 2) != 0) &&
+ !strisglob(pev->point.function))
+ event = pev->point.function;
+ else
+ event = tev->point.realname;
+ }
+ if (pev->group && !pev->sdt)
+ group = pev->group;
+ else if (tev->group)
+ group = tev->group;
+ else
+ group = PERFPROBE_GROUP;
+
+ /* Get an unused new event name */
+ ret = get_new_event_name(buf, 64, event, namelist,
+ tev->point.retprobe, allow_suffix);
+ if (ret < 0)
+ return ret;
+
+ event = buf;
+
+ tev->event = strdup(event);
+ tev->group = strdup(group);
+ if (tev->event == NULL || tev->group == NULL)
+ return -ENOMEM;
+
+ /*
+ * Add new event name to namelist if multiprobe event is NOT
+ * supported, since we have to use new event name for following
+ * probes in that case.
+ */
+ if (!multiprobe_event_is_supported())
+ strlist__add(namelist, event);
+ return 0;
+}
+
+static int __open_probe_file_and_namelist(bool uprobe,
+ struct strlist **namelist)
+{
+ int fd;
+
+ fd = probe_file__open(PF_FL_RW | (uprobe ? PF_FL_UPROBE : 0));
+ if (fd < 0)
+ return fd;
+
+ /* Get current event names */
+ *namelist = probe_file__get_namelist(fd);
+ if (!(*namelist)) {
+ pr_debug("Failed to get current event list.\n");
+ close(fd);
+ return -ENOMEM;
+ }
+ return fd;
+}
+
+static int __add_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event *tevs,
+ int ntevs, bool allow_suffix)
+{
+ int i, fd[2] = {-1, -1}, up, ret;
+ struct probe_trace_event *tev = NULL;
+ struct probe_cache *cache = NULL;
+ struct strlist *namelist[2] = {NULL, NULL};
+ struct nscookie nsc;
+
+ up = pev->uprobes ? 1 : 0;
+ fd[up] = __open_probe_file_and_namelist(up, &namelist[up]);
+ if (fd[up] < 0)
+ return fd[up];
+
+ ret = 0;
+ for (i = 0; i < ntevs; i++) {
+ tev = &tevs[i];
+ up = tev->uprobes ? 1 : 0;
+ if (fd[up] == -1) { /* Open the kprobe/uprobe_events */
+ fd[up] = __open_probe_file_and_namelist(up,
+ &namelist[up]);
+ if (fd[up] < 0)
+ goto close_out;
+ }
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol && !pev->uprobes)
+ continue;
+
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev, namelist[up],
+ allow_suffix);
+ if (ret < 0)
+ break;
+
+ nsinfo__mountns_enter(pev->nsi, &nsc);
+ ret = probe_file__add_event(fd[up], tev);
+ nsinfo__mountns_exit(&nsc);
+ if (ret < 0)
+ break;
+
+ /*
+ * Probes after the first probe which comes from same
+ * user input are always allowed to add suffix, because
+ * there might be several addresses corresponding to
+ * one code line.
+ */
+ allow_suffix = true;
+ }
+ if (ret == -EINVAL && pev->uprobes)
+ warn_uprobe_event_compat(tev);
+ if (ret == 0 && probe_conf.cache) {
+ cache = probe_cache__new(pev->target, pev->nsi);
+ if (!cache ||
+ probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 ||
+ probe_cache__commit(cache) < 0)
+ pr_warning("Failed to add event to probe cache\n");
+ probe_cache__delete(cache);
+ }
+
+close_out:
+ for (up = 0; up < 2; up++) {
+ strlist__delete(namelist[up]);
+ if (fd[up] >= 0)
+ close(fd[up]);
+ }
+ return ret;
+}
+
+static int find_probe_functions(struct map *map, char *name,
+ struct symbol **syms)
+{
+ int found = 0;
+ struct symbol *sym;
+ struct rb_node *tmp;
+ const char *norm, *ver;
+ char *buf = NULL;
+ bool cut_version = true;
+
+ if (map__load(map) < 0)
+ return -EACCES; /* Possible permission error to load symbols */
+
+ /* If user gives a version, don't cut off the version from symbols */
+ if (strchr(name, '@'))
+ cut_version = false;
+
+ map__for_each_symbol(map, sym, tmp) {
+ norm = arch__normalize_symbol_name(sym->name);
+ if (!norm)
+ continue;
+
+ if (cut_version) {
+ /* We don't care about default symbol or not */
+ ver = strchr(norm, '@');
+ if (ver) {
+ buf = strndup(norm, ver - norm);
+ if (!buf)
+ return -ENOMEM;
+ norm = buf;
+ }
+ }
+
+ if (strglobmatch(norm, name)) {
+ found++;
+ if (syms && found < probe_conf.max_probes)
+ syms[found - 1] = sym;
+ }
+ if (buf)
+ zfree(&buf);
+ }
+
+ return found;
+}
+
+void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
+ struct probe_trace_event *tev __maybe_unused,
+ struct map *map __maybe_unused,
+ struct symbol *sym __maybe_unused) { }
+
+
+static void pr_kallsyms_access_error(void)
+{
+ pr_err("Please ensure you can read the /proc/kallsyms symbol addresses.\n"
+ "If /proc/sys/kernel/kptr_restrict is '2', you can not read\n"
+ "kernel symbol addresses even if you are a superuser. Please change\n"
+ "it to '1'. If kptr_restrict is '1', the superuser can read the\n"
+ "symbol addresses.\n"
+ "In that case, please run this command again with sudo.\n");
+}
+
+/*
+ * Find probe function addresses from map.
+ * Return an error or the number of found probe_trace_event
+ */
+static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct map *map = NULL;
+ struct ref_reloc_sym *reloc_sym = NULL;
+ struct symbol *sym;
+ struct symbol **syms = NULL;
+ struct probe_trace_event *tev;
+ struct perf_probe_point *pp = &pev->point;
+ struct probe_trace_point *tp;
+ int num_matched_functions;
+ int ret, i, j, skipped = 0;
+ char *mod_name;
+
+ map = get_target_map(pev->target, pev->nsi, pev->uprobes);
+ if (!map) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
+ if (!syms) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Load matched symbols: Since the different local symbols may have
+ * same name but different addresses, this lists all the symbols.
+ */
+ num_matched_functions = find_probe_functions(map, pp->function, syms);
+ if (num_matched_functions <= 0) {
+ if (num_matched_functions == -EACCES) {
+ pr_err("Failed to load symbols from %s\n",
+ pev->target ?: "/proc/kallsyms");
+ if (pev->target)
+ pr_err("Please ensure the file is not stripped.\n");
+ else
+ pr_kallsyms_access_error();
+ } else
+ pr_err("Failed to find symbol %s in %s\n", pp->function,
+ pev->target ? : "kernel");
+ ret = -ENOENT;
+ goto out;
+ } else if (num_matched_functions > probe_conf.max_probes) {
+ pr_err("Too many functions matched in %s\n",
+ pev->target ? : "kernel");
+ ret = -E2BIG;
+ goto out;
+ }
+
+ /* Note that the symbols in the kmodule are not relocated */
+ if (!pev->uprobes && !pev->target &&
+ (!pp->retprobe || kretprobe_offset_is_supported())) {
+ reloc_sym = kernel_get_ref_reloc_sym(NULL);
+ if (!reloc_sym) {
+ pr_warning("Relocated base symbol is not found! "
+ "Check /proc/sys/kernel/kptr_restrict\n"
+ "and /proc/sys/kernel/perf_event_paranoid. "
+ "Or run as privileged perf user.\n\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /* Setup result trace-probe-events */
+ *tevs = zalloc(sizeof(*tev) * num_matched_functions);
+ if (!*tevs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = 0;
+
+ for (j = 0; j < num_matched_functions; j++) {
+ sym = syms[j];
+
+ if (sym->type != STT_FUNC)
+ continue;
+
+ /* There can be duplicated symbols in the map */
+ for (i = 0; i < j; i++)
+ if (sym->start == syms[i]->start) {
+ pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n",
+ sym->name, sym->start);
+ break;
+ }
+ if (i != j)
+ continue;
+
+ tev = (*tevs) + ret;
+ tp = &tev->point;
+ if (ret == num_matched_functions) {
+ pr_warning("Too many symbols are listed. Skip it.\n");
+ break;
+ }
+ ret++;
+
+ if (pp->offset > sym->end - sym->start) {
+ pr_warning("Offset %ld is bigger than the size of %s\n",
+ pp->offset, sym->name);
+ ret = -ENOENT;
+ goto err_out;
+ }
+ /* Add one probe point */
+ tp->address = map__unmap_ip(map, sym->start) + pp->offset;
+
+ /* Check the kprobe (not in module) is within .text */
+ if (!pev->uprobes && !pev->target &&
+ kprobe_warn_out_range(sym->name, tp->address)) {
+ tp->symbol = NULL; /* Skip it */
+ skipped++;
+ } else if (reloc_sym) {
+ tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
+ tp->offset = tp->address - reloc_sym->addr;
+ } else {
+ tp->symbol = strdup_or_goto(sym->name, nomem_out);
+ tp->offset = pp->offset;
+ }
+ tp->realname = strdup_or_goto(sym->name, nomem_out);
+
+ tp->retprobe = pp->retprobe;
+ if (pev->target) {
+ if (pev->uprobes) {
+ tev->point.module = strdup_or_goto(pev->target,
+ nomem_out);
+ } else {
+ mod_name = find_module_name(pev->target);
+ tev->point.module =
+ strdup(mod_name ? mod_name : pev->target);
+ free(mod_name);
+ if (!tev->point.module)
+ goto nomem_out;
+ }
+ }
+ tev->uprobes = pev->uprobes;
+ tev->nargs = pev->nargs;
+ if (tev->nargs) {
+ tev->args = zalloc(sizeof(struct probe_trace_arg) *
+ tev->nargs);
+ if (tev->args == NULL)
+ goto nomem_out;
+ }
+ for (i = 0; i < tev->nargs; i++) {
+ if (pev->args[i].name)
+ tev->args[i].name =
+ strdup_or_goto(pev->args[i].name,
+ nomem_out);
+
+ tev->args[i].value = strdup_or_goto(pev->args[i].var,
+ nomem_out);
+ if (pev->args[i].type)
+ tev->args[i].type =
+ strdup_or_goto(pev->args[i].type,
+ nomem_out);
+ }
+ arch__fix_tev_from_maps(pev, tev, map, sym);
+ }
+ if (ret == skipped) {
+ ret = -ENOENT;
+ goto err_out;
+ }
+
+out:
+ map__put(map);
+ free(syms);
+ return ret;
+
+nomem_out:
+ ret = -ENOMEM;
+err_out:
+ clear_probe_trace_events(*tevs, num_matched_functions);
+ zfree(tevs);
+ goto out;
+}
+
+static int try_to_find_absolute_address(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct perf_probe_point *pp = &pev->point;
+ struct probe_trace_event *tev;
+ struct probe_trace_point *tp;
+ int i, err;
+
+ if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2)))
+ return -EINVAL;
+ if (perf_probe_event_need_dwarf(pev))
+ return -EINVAL;
+
+ /*
+ * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at
+ * absolute address.
+ *
+ * Only one tev can be generated by this.
+ */
+ *tevs = zalloc(sizeof(*tev));
+ if (!*tevs)
+ return -ENOMEM;
+
+ tev = *tevs;
+ tp = &tev->point;
+
+ /*
+ * Don't use tp->offset, use address directly, because
+ * in synthesize_probe_trace_command() address cannot be
+ * zero.
+ */
+ tp->address = pev->point.abs_address;
+ tp->retprobe = pp->retprobe;
+ tev->uprobes = pev->uprobes;
+
+ err = -ENOMEM;
+ /*
+ * Give it a '0x' leading symbol name.
+ * In __add_probe_trace_events, a NULL symbol is interpreted as
+ * invalid.
+ */
+ if (asprintf(&tp->symbol, "0x%" PRIx64, tp->address) < 0)
+ goto errout;
+
+ /* For kprobe, check range */
+ if ((!tev->uprobes) &&
+ (kprobe_warn_out_range(tev->point.symbol,
+ tev->point.address))) {
+ err = -EACCES;
+ goto errout;
+ }
+
+ if (asprintf(&tp->realname, "abs_%" PRIx64, tp->address) < 0)
+ goto errout;
+
+ if (pev->target) {
+ tp->module = strdup(pev->target);
+ if (!tp->module)
+ goto errout;
+ }
+
+ if (tev->group) {
+ tev->group = strdup(pev->group);
+ if (!tev->group)
+ goto errout;
+ }
+
+ if (pev->event) {
+ tev->event = strdup(pev->event);
+ if (!tev->event)
+ goto errout;
+ }
+
+ tev->nargs = pev->nargs;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (!tev->args)
+ goto errout;
+
+ for (i = 0; i < tev->nargs; i++)
+ copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]);
+
+ return 1;
+
+errout:
+ clear_probe_trace_events(*tevs, 1);
+ *tevs = NULL;
+ return err;
+}
+
+/* Concatenate two arrays */
+static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
+{
+ void *ret;
+
+ ret = malloc(sz_a + sz_b);
+ if (ret) {
+ memcpy(ret, a, sz_a);
+ memcpy(ret + sz_a, b, sz_b);
+ }
+ return ret;
+}
+
+static int
+concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs,
+ struct probe_trace_event **tevs2, int ntevs2)
+{
+ struct probe_trace_event *new_tevs;
+ int ret = 0;
+
+ if (*ntevs == 0) {
+ *tevs = *tevs2;
+ *ntevs = ntevs2;
+ *tevs2 = NULL;
+ return 0;
+ }
+
+ if (*ntevs + ntevs2 > probe_conf.max_probes)
+ ret = -E2BIG;
+ else {
+ /* Concatenate the array of probe_trace_event */
+ new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs),
+ *tevs2, ntevs2 * sizeof(**tevs2));
+ if (!new_tevs)
+ ret = -ENOMEM;
+ else {
+ free(*tevs);
+ *tevs = new_tevs;
+ *ntevs += ntevs2;
+ }
+ }
+ if (ret < 0)
+ clear_probe_trace_events(*tevs2, ntevs2);
+ zfree(tevs2);
+
+ return ret;
+}
+
+/*
+ * Try to find probe_trace_event from given probe caches. Return the number
+ * of cached events found, if an error occurs return the error.
+ */
+static int find_cached_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs,
+ const char *target)
+{
+ struct probe_cache *cache;
+ struct probe_cache_entry *entry;
+ struct probe_trace_event *tmp_tevs = NULL;
+ int ntevs = 0;
+ int ret = 0;
+
+ cache = probe_cache__new(target, pev->nsi);
+ /* Return 0 ("not found") if the target has no probe cache. */
+ if (!cache)
+ return 0;
+
+ for_each_probe_cache_entry(entry, cache) {
+ /* Skip the cache entry which has no name */
+ if (!entry->pev.event || !entry->pev.group)
+ continue;
+ if ((!pev->group || strglobmatch(entry->pev.group, pev->group)) &&
+ strglobmatch(entry->pev.event, pev->event)) {
+ ret = probe_cache_entry__get_event(entry, &tmp_tevs);
+ if (ret > 0)
+ ret = concat_probe_trace_events(tevs, &ntevs,
+ &tmp_tevs, ret);
+ if (ret < 0)
+ break;
+ }
+ }
+ probe_cache__delete(cache);
+ if (ret < 0) {
+ clear_probe_trace_events(*tevs, ntevs);
+ zfree(tevs);
+ } else {
+ ret = ntevs;
+ if (ntevs > 0 && target && target[0] == '/')
+ pev->uprobes = true;
+ }
+
+ return ret;
+}
+
+/* Try to find probe_trace_event from all probe caches */
+static int find_cached_events_all(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct probe_trace_event *tmp_tevs = NULL;
+ struct strlist *bidlist;
+ struct str_node *nd;
+ char *pathname;
+ int ntevs = 0;
+ int ret;
+
+ /* Get the buildid list of all valid caches */
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ ret = -errno;
+ pr_debug("Failed to get buildids: %d\n", ret);
+ return ret;
+ }
+
+ ret = 0;
+ strlist__for_each_entry(nd, bidlist) {
+ pathname = build_id_cache__origname(nd->s);
+ ret = find_cached_events(pev, &tmp_tevs, pathname);
+ /* In the case of cnt == 0, we just skip it */
+ if (ret > 0)
+ ret = concat_probe_trace_events(tevs, &ntevs,
+ &tmp_tevs, ret);
+ free(pathname);
+ if (ret < 0)
+ break;
+ }
+ strlist__delete(bidlist);
+
+ if (ret < 0) {
+ clear_probe_trace_events(*tevs, ntevs);
+ zfree(tevs);
+ } else
+ ret = ntevs;
+
+ return ret;
+}
+
+static int find_probe_trace_events_from_cache(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct probe_cache *cache;
+ struct probe_cache_entry *entry;
+ struct probe_trace_event *tev;
+ struct str_node *node;
+ int ret, i;
+
+ if (pev->sdt) {
+ /* For SDT/cached events, we use special search functions */
+ if (!pev->target)
+ return find_cached_events_all(pev, tevs);
+ else
+ return find_cached_events(pev, tevs, pev->target);
+ }
+ cache = probe_cache__new(pev->target, pev->nsi);
+ if (!cache)
+ return 0;
+
+ entry = probe_cache__find(cache, pev);
+ if (!entry) {
+ /* SDT must be in the cache */
+ ret = pev->sdt ? -ENOENT : 0;
+ goto out;
+ }
+
+ ret = strlist__nr_entries(entry->tevlist);
+ if (ret > probe_conf.max_probes) {
+ pr_debug("Too many entries matched in the cache of %s\n",
+ pev->target ? : "kernel");
+ ret = -E2BIG;
+ goto out;
+ }
+
+ *tevs = zalloc(ret * sizeof(*tev));
+ if (!*tevs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ i = 0;
+ strlist__for_each_entry(node, entry->tevlist) {
+ tev = &(*tevs)[i++];
+ ret = parse_probe_trace_command(node->s, tev);
+ if (ret < 0)
+ goto out;
+ /* Set the uprobes attribute as same as original */
+ tev->uprobes = pev->uprobes;
+ }
+ ret = i;
+
+out:
+ probe_cache__delete(cache);
+ return ret;
+}
+
+static int convert_to_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ int ret;
+
+ if (!pev->group && !pev->sdt) {
+ /* Set group name if not given */
+ if (!pev->uprobes) {
+ pev->group = strdup(PERFPROBE_GROUP);
+ ret = pev->group ? 0 : -ENOMEM;
+ } else
+ ret = convert_exec_to_group(pev->target, &pev->group);
+ if (ret != 0) {
+ pr_warning("Failed to make a group name.\n");
+ return ret;
+ }
+ }
+
+ ret = try_to_find_absolute_address(pev, tevs);
+ if (ret > 0)
+ return ret;
+
+ /* At first, we need to lookup cache entry */
+ ret = find_probe_trace_events_from_cache(pev, tevs);
+ if (ret > 0 || pev->sdt) /* SDT can be found only in the cache */
+ return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
+
+ /* Convert perf_probe_event with debuginfo */
+ ret = try_to_find_probe_trace_events(pev, tevs);
+ if (ret != 0)
+ return ret; /* Found in debuginfo or got an error */
+
+ return find_probe_trace_events_from_map(pev, tevs);
+}
+
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int i, ret;
+
+ /* Loop 1: convert all events */
+ for (i = 0; i < npevs; i++) {
+ /* Init kprobe blacklist if needed */
+ if (!pevs[i].uprobes)
+ kprobe_blacklist__init();
+ /* Convert with or without debuginfo */
+ ret = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs);
+ if (ret < 0)
+ return ret;
+ pevs[i].ntevs = ret;
+ }
+ /* This just release blacklist only if allocated */
+ kprobe_blacklist__release();
+
+ return 0;
+}
+
+static int show_probe_trace_event(struct probe_trace_event *tev)
+{
+ char *buf = synthesize_probe_trace_command(tev);
+
+ if (!buf) {
+ pr_debug("Failed to synthesize probe trace event.\n");
+ return -EINVAL;
+ }
+
+ /* Showing definition always go stdout */
+ printf("%s\n", buf);
+ free(buf);
+
+ return 0;
+}
+
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
+{
+ struct strlist *namelist = strlist__new(NULL, NULL);
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ int i, j, ret = 0;
+
+ if (!namelist)
+ return -ENOMEM;
+
+ for (j = 0; j < npevs && !ret; j++) {
+ pev = &pevs[j];
+ for (i = 0; i < pev->ntevs && !ret; i++) {
+ tev = &pev->tevs[i];
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol && !pev->uprobes)
+ continue;
+
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev,
+ namelist, true);
+ if (!ret)
+ ret = show_probe_trace_event(tev);
+ }
+ }
+ strlist__delete(namelist);
+
+ return ret;
+}
+
+static int show_bootconfig_event(struct probe_trace_event *tev)
+{
+ struct probe_trace_point *tp = &tev->point;
+ struct strbuf buf;
+ char *ret = NULL;
+ int err;
+
+ if (strbuf_init(&buf, 32) < 0)
+ return -ENOMEM;
+
+ err = synthesize_kprobe_trace_def(tp, &buf);
+ if (err >= 0)
+ err = synthesize_probe_trace_args(tev, &buf);
+ if (err >= 0)
+ ret = strbuf_detach(&buf, NULL);
+ strbuf_release(&buf);
+
+ if (ret) {
+ printf("'%s'", ret);
+ free(ret);
+ }
+
+ return err;
+}
+
+int show_bootconfig_events(struct perf_probe_event *pevs, int npevs)
+{
+ struct strlist *namelist = strlist__new(NULL, NULL);
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ char *cur_name = NULL;
+ int i, j, ret = 0;
+
+ if (!namelist)
+ return -ENOMEM;
+
+ for (j = 0; j < npevs && !ret; j++) {
+ pev = &pevs[j];
+ if (pev->group && strcmp(pev->group, "probe"))
+ pr_warning("WARN: Group name %s is ignored\n", pev->group);
+ if (pev->uprobes) {
+ pr_warning("ERROR: Bootconfig doesn't support uprobes\n");
+ ret = -EINVAL;
+ break;
+ }
+ for (i = 0; i < pev->ntevs && !ret; i++) {
+ tev = &pev->tevs[i];
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol && !pev->uprobes)
+ continue;
+
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev,
+ namelist, true);
+ if (ret)
+ break;
+
+ if (!cur_name || strcmp(cur_name, tev->event)) {
+ printf("%sftrace.event.kprobes.%s.probe = ",
+ cur_name ? "\n" : "", tev->event);
+ cur_name = tev->event;
+ } else
+ printf(", ");
+ ret = show_bootconfig_event(tev);
+ }
+ }
+ printf("\n");
+ strlist__delete(namelist);
+
+ return ret;
+}
+
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int i, ret = 0;
+
+ /* Loop 2: add all events */
+ for (i = 0; i < npevs; i++) {
+ ret = __add_probe_trace_events(&pevs[i], pevs[i].tevs,
+ pevs[i].ntevs,
+ probe_conf.force_add);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int i, j;
+ struct perf_probe_event *pev;
+
+ /* Loop 3: cleanup and free trace events */
+ for (i = 0; i < npevs; i++) {
+ pev = &pevs[i];
+ for (j = 0; j < pevs[i].ntevs; j++)
+ clear_probe_trace_event(&pevs[i].tevs[j]);
+ zfree(&pevs[i].tevs);
+ pevs[i].ntevs = 0;
+ nsinfo__zput(pev->nsi);
+ clear_perf_probe_event(&pevs[i]);
+ }
+}
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int ret;
+
+ ret = init_probe_symbol_maps(pevs->uprobes);
+ if (ret < 0)
+ return ret;
+
+ ret = convert_perf_probe_events(pevs, npevs);
+ if (ret == 0)
+ ret = apply_perf_probe_events(pevs, npevs);
+
+ cleanup_perf_probe_events(pevs, npevs);
+
+ exit_probe_symbol_maps();
+ return ret;
+}
+
+int del_perf_probe_events(struct strfilter *filter)
+{
+ int ret, ret2, ufd = -1, kfd = -1;
+ char *str = strfilter__string(filter);
+
+ if (!str)
+ return -EINVAL;
+
+ /* Get current event names */
+ ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
+ if (ret < 0)
+ goto out;
+
+ ret = probe_file__del_events(kfd, filter);
+ if (ret < 0 && ret != -ENOENT)
+ goto error;
+
+ ret2 = probe_file__del_events(ufd, filter);
+ if (ret2 < 0 && ret2 != -ENOENT) {
+ ret = ret2;
+ goto error;
+ }
+ ret = 0;
+
+error:
+ if (kfd >= 0)
+ close(kfd);
+ if (ufd >= 0)
+ close(ufd);
+out:
+ free(str);
+
+ return ret;
+}
+
+int show_available_funcs(const char *target, struct nsinfo *nsi,
+ struct strfilter *_filter, bool user)
+{
+ struct map *map;
+ struct dso *dso;
+ int ret;
+
+ ret = init_probe_symbol_maps(user);
+ if (ret < 0)
+ return ret;
+
+ /* Get a symbol map */
+ map = get_target_map(target, nsi, user);
+ if (!map) {
+ pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
+ return -EINVAL;
+ }
+
+ ret = map__load(map);
+ if (ret) {
+ if (ret == -2) {
+ char *str = strfilter__string(_filter);
+ pr_err("Failed to find symbols matched to \"%s\"\n",
+ str);
+ free(str);
+ } else
+ pr_err("Failed to load symbols in %s\n",
+ (target) ? : "kernel");
+ goto end;
+ }
+ dso = map__dso(map);
+ dso__sort_by_name(dso);
+
+ /* Show all (filtered) symbols */
+ setup_pager();
+
+ for (size_t i = 0; i < dso->symbol_names_len; i++) {
+ struct symbol *pos = dso->symbol_names[i];
+
+ if (strfilter__compare(_filter, pos->name))
+ printf("%s\n", pos->name);
+ }
+end:
+ map__put(map);
+ exit_probe_symbol_maps();
+
+ return ret;
+}
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar)
+{
+ tvar->value = strdup(pvar->var);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+ if (pvar->type) {
+ tvar->type = strdup(pvar->type);
+ if (tvar->type == NULL)
+ return -ENOMEM;
+ }
+ if (pvar->name) {
+ tvar->name = strdup(pvar->name);
+ if (tvar->name == NULL)
+ return -ENOMEM;
+ } else
+ tvar->name = NULL;
+ return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
new file mode 100644
index 000000000..7e3b6c3d1
--- /dev/null
+++ b/tools/perf/util/probe-event.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_EVENT_H
+#define _PROBE_EVENT_H
+
+#include <linux/compiler.h>
+#include <stdbool.h>
+
+struct intlist;
+struct nsinfo;
+
+/* Probe related configurations */
+struct probe_conf {
+ bool show_ext_vars;
+ bool show_location_range;
+ bool force_add;
+ bool no_inlines;
+ bool cache;
+ bool bootconfig;
+ int max_probes;
+ unsigned long magic_num;
+};
+extern struct probe_conf probe_conf;
+extern bool probe_event_dry_run;
+
+#define DEFAULT_PROBE_MAGIC_NUM 0xdeade12d /* u32: 3735937325 */
+
+struct symbol;
+
+/* kprobe-tracer and uprobe-tracer tracing point */
+struct probe_trace_point {
+ char *realname; /* function real name (if needed) */
+ char *symbol; /* Base symbol */
+ char *module; /* Module name */
+ unsigned long offset; /* Offset from symbol */
+ unsigned long ref_ctr_offset; /* SDT reference counter offset */
+ u64 address; /* Actual address of the trace point */
+ bool retprobe; /* Return probe flag */
+};
+
+/* probe-tracer tracing argument referencing offset */
+struct probe_trace_arg_ref {
+ struct probe_trace_arg_ref *next; /* Next reference */
+ long offset; /* Offset value */
+ bool user_access; /* User-memory access */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing argument */
+struct probe_trace_arg {
+ char *name; /* Argument name */
+ char *value; /* Base value */
+ char *type; /* Type name */
+ struct probe_trace_arg_ref *ref; /* Referencing offset */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing event (point + arg) */
+struct probe_trace_event {
+ char *event; /* Event name */
+ char *group; /* Group name */
+ struct probe_trace_point point; /* Trace point */
+ int nargs; /* Number of args */
+ bool uprobes; /* uprobes only */
+ struct probe_trace_arg *args; /* Arguments */
+};
+
+/* Perf probe probing point */
+struct perf_probe_point {
+ char *file; /* File path */
+ char *function; /* Function name */
+ int line; /* Line number */
+ bool retprobe; /* Return probe flag */
+ char *lazy_line; /* Lazy matching pattern */
+ unsigned long offset; /* Offset from function entry */
+ u64 abs_address; /* Absolute address of the point */
+};
+
+/* Perf probe probing argument field chain */
+struct perf_probe_arg_field {
+ struct perf_probe_arg_field *next; /* Next field */
+ char *name; /* Name of the field */
+ long index; /* Array index number */
+ bool ref; /* Referencing flag */
+};
+
+/* Perf probe probing argument */
+struct perf_probe_arg {
+ char *name; /* Argument name */
+ char *var; /* Variable name */
+ char *type; /* Type name */
+ struct perf_probe_arg_field *field; /* Structure fields */
+ bool user_access; /* User-memory access */
+};
+
+/* Perf probe probing event (point + arg) */
+struct perf_probe_event {
+ char *event; /* Event name */
+ char *group; /* Group name */
+ struct perf_probe_point point; /* Probe point */
+ int nargs; /* Number of arguments */
+ bool sdt; /* SDT/cached event flag */
+ bool uprobes; /* Uprobe event flag */
+ char *target; /* Target binary */
+ struct perf_probe_arg *args; /* Arguments */
+ struct probe_trace_event *tevs;
+ int ntevs;
+ struct nsinfo *nsi; /* Target namespace */
+};
+
+/* Line range */
+struct line_range {
+ char *file; /* File name */
+ char *function; /* Function name */
+ int start; /* Start line number */
+ int end; /* End line number */
+ int offset; /* Start line offset */
+ char *path; /* Real path name */
+ char *comp_dir; /* Compile directory */
+ struct intlist *line_list; /* Visible lines */
+};
+
+struct strlist;
+
+/* List of variables */
+struct variable_list {
+ struct probe_trace_point point; /* Actual probepoint */
+ struct strlist *vars; /* Available variables */
+};
+
+struct map;
+int init_probe_symbol_maps(bool user_only);
+void exit_probe_symbol_maps(void);
+
+/* Command string to events */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev);
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
+
+/* Events to command string */
+char *synthesize_perf_probe_command(struct perf_probe_event *pev);
+char *synthesize_probe_trace_command(struct probe_trace_event *tev);
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+ struct perf_probe_event *src);
+
+bool perf_probe_with_var(struct perf_probe_event *pev);
+
+/* Check the perf_probe_event needs debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
+
+/* Release event contents */
+void clear_perf_probe_event(struct perf_probe_event *pev);
+void clear_probe_trace_event(struct probe_trace_event *tev);
+
+/* Command string to line-range */
+int parse_line_range_desc(const char *cmd, struct line_range *lr);
+
+/* Release line range members */
+void line_range__clear(struct line_range *lr);
+
+/* Initialize line range */
+int line_range__init(struct line_range *lr);
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
+int show_bootconfig_events(struct perf_probe_event *pevs, int npevs);
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+
+struct strfilter;
+
+int del_perf_probe_events(struct strfilter *filter);
+
+int show_perf_probe_event(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module, bool use_stdout);
+int show_perf_probe_events(struct strfilter *filter);
+int show_line_range(struct line_range *lr, const char *module,
+ struct nsinfo *nsi, bool user);
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+ struct strfilter *filter);
+int show_available_funcs(const char *module, struct nsinfo *nsi,
+ struct strfilter *filter, bool user);
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+ struct probe_trace_event *tev, struct map *map,
+ struct symbol *sym);
+
+/* If there is no space to write, returns -E2BIG. */
+int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4);
+
+/* Maximum index number of event-name postfix */
+#define MAX_EVENT_INDEX 1024
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar);
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+ int ntevs);
+
+#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
new file mode 100644
index 000000000..3d50de321
--- /dev/null
+++ b/tools/perf/util/probe-file.c
@@ -0,0 +1,1200 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * probe-file.c : operate ftrace k/uprobe events files
+ *
+ * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <linux/zalloc.h>
+#include "namespaces.h"
+#include "event.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "build-id.h"
+#include "dso.h"
+#include "color.h"
+#include "symbol.h"
+#include "strbuf.h"
+#include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
+#include "probe-event.h"
+#include "probe-file.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "string2.h"
+
+/* 4096 - 2 ('\n' + '\0') */
+#define MAX_CMDLEN 4094
+
+static bool print_common_warning(int err, bool readwrite)
+{
+ if (err == -EACCES)
+ pr_warning("No permission to %s tracefs.\nPlease %s\n",
+ readwrite ? "write" : "read",
+ readwrite ? "run this command again with sudo." :
+ "try 'sudo mount -o remount,mode=755 /sys/kernel/tracing/'");
+ else
+ return false;
+
+ return true;
+}
+
+static bool print_configure_probe_event(int kerr, int uerr)
+{
+ const char *config, *file;
+
+ if (kerr == -ENOENT && uerr == -ENOENT) {
+ file = "{k,u}probe_events";
+ config = "CONFIG_KPROBE_EVENTS=y and CONFIG_UPROBE_EVENTS=y";
+ } else if (kerr == -ENOENT) {
+ file = "kprobe_events";
+ config = "CONFIG_KPROBE_EVENTS=y";
+ } else if (uerr == -ENOENT) {
+ file = "uprobe_events";
+ config = "CONFIG_UPROBE_EVENTS=y";
+ } else
+ return false;
+
+ if (!debugfs__configured() && !tracefs__configured())
+ pr_warning("Debugfs or tracefs is not mounted\n"
+ "Please try 'sudo mount -t tracefs nodev /sys/kernel/tracing/'\n");
+ else
+ pr_warning("%s/%s does not exist.\nPlease rebuild kernel with %s.\n",
+ tracing_path_mount(), file, config);
+
+ return true;
+}
+
+static void print_open_warning(int err, bool uprobe, bool readwrite)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (print_common_warning(err, readwrite))
+ return;
+
+ if (print_configure_probe_event(uprobe ? 0 : err, uprobe ? err : 0))
+ return;
+
+ pr_warning("Failed to open %s/%cprobe_events: %s\n",
+ tracing_path_mount(), uprobe ? 'u' : 'k',
+ str_error_r(-err, sbuf, sizeof(sbuf)));
+}
+
+static void print_both_open_warning(int kerr, int uerr, bool readwrite)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (kerr == uerr && print_common_warning(kerr, readwrite))
+ return;
+
+ if (print_configure_probe_event(kerr, uerr))
+ return;
+
+ if (kerr < 0)
+ pr_warning("Failed to open %s/kprobe_events: %s.\n",
+ tracing_path_mount(),
+ str_error_r(-kerr, sbuf, sizeof(sbuf)));
+ if (uerr < 0)
+ pr_warning("Failed to open %s/uprobe_events: %s.\n",
+ tracing_path_mount(),
+ str_error_r(-uerr, sbuf, sizeof(sbuf)));
+}
+
+int open_trace_file(const char *trace_file, bool readwrite)
+{
+ char buf[PATH_MAX];
+ int ret;
+
+ ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file);
+ if (ret >= 0) {
+ pr_debug("Opening %s write=%d\n", buf, readwrite);
+ if (readwrite && !probe_event_dry_run)
+ ret = open(buf, O_RDWR | O_APPEND, 0);
+ else
+ ret = open(buf, O_RDONLY, 0);
+
+ if (ret < 0)
+ ret = -errno;
+ }
+ return ret;
+}
+
+static int open_kprobe_events(bool readwrite)
+{
+ return open_trace_file("kprobe_events", readwrite);
+}
+
+static int open_uprobe_events(bool readwrite)
+{
+ return open_trace_file("uprobe_events", readwrite);
+}
+
+int probe_file__open(int flag)
+{
+ int fd;
+
+ if (flag & PF_FL_UPROBE)
+ fd = open_uprobe_events(flag & PF_FL_RW);
+ else
+ fd = open_kprobe_events(flag & PF_FL_RW);
+ if (fd < 0)
+ print_open_warning(fd, flag & PF_FL_UPROBE, flag & PF_FL_RW);
+
+ return fd;
+}
+
+int probe_file__open_both(int *kfd, int *ufd, int flag)
+{
+ if (!kfd || !ufd)
+ return -EINVAL;
+
+ *kfd = open_kprobe_events(flag & PF_FL_RW);
+ *ufd = open_uprobe_events(flag & PF_FL_RW);
+ if (*kfd < 0 && *ufd < 0) {
+ print_both_open_warning(*kfd, *ufd, flag & PF_FL_RW);
+ return *kfd;
+ }
+
+ return 0;
+}
+
+/* Get raw string list of current kprobe_events or uprobe_events */
+struct strlist *probe_file__get_rawlist(int fd)
+{
+ int ret, idx, fddup;
+ FILE *fp;
+ char buf[MAX_CMDLEN];
+ char *p;
+ struct strlist *sl;
+
+ if (fd < 0)
+ return NULL;
+
+ sl = strlist__new(NULL, NULL);
+ if (sl == NULL)
+ return NULL;
+
+ fddup = dup(fd);
+ if (fddup < 0)
+ goto out_free_sl;
+
+ fp = fdopen(fddup, "r");
+ if (!fp)
+ goto out_close_fddup;
+
+ while (!feof(fp)) {
+ p = fgets(buf, MAX_CMDLEN, fp);
+ if (!p)
+ break;
+
+ idx = strlen(p) - 1;
+ if (p[idx] == '\n')
+ p[idx] = '\0';
+ ret = strlist__add(sl, buf);
+ if (ret < 0) {
+ pr_debug("strlist__add failed (%d)\n", ret);
+ goto out_close_fp;
+ }
+ }
+ fclose(fp);
+
+ return sl;
+
+out_close_fp:
+ fclose(fp);
+ goto out_free_sl;
+out_close_fddup:
+ close(fddup);
+out_free_sl:
+ strlist__delete(sl);
+ return NULL;
+}
+
+static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
+{
+ char buf[128];
+ struct strlist *sl, *rawlist;
+ struct str_node *ent;
+ struct probe_trace_event tev;
+ int ret = 0;
+
+ memset(&tev, 0, sizeof(tev));
+ rawlist = probe_file__get_rawlist(fd);
+ if (!rawlist)
+ return NULL;
+ sl = strlist__new(NULL, NULL);
+ strlist__for_each_entry(ent, rawlist) {
+ ret = parse_probe_trace_command(ent->s, &tev);
+ if (ret < 0)
+ break;
+ if (include_group) {
+ ret = e_snprintf(buf, 128, "%s:%s", tev.group,
+ tev.event);
+ if (ret >= 0)
+ ret = strlist__add(sl, buf);
+ } else
+ ret = strlist__add(sl, tev.event);
+ clear_probe_trace_event(&tev);
+ /* Skip if there is same name multi-probe event in the list */
+ if (ret == -EEXIST)
+ ret = 0;
+ if (ret < 0)
+ break;
+ }
+ strlist__delete(rawlist);
+
+ if (ret < 0) {
+ strlist__delete(sl);
+ return NULL;
+ }
+ return sl;
+}
+
+/* Get current perf-probe event names */
+struct strlist *probe_file__get_namelist(int fd)
+{
+ return __probe_file__get_namelist(fd, false);
+}
+
+int probe_file__add_event(int fd, struct probe_trace_event *tev)
+{
+ int ret = 0;
+ char *buf = synthesize_probe_trace_command(tev);
+ char sbuf[STRERR_BUFSIZE];
+
+ if (!buf) {
+ pr_debug("Failed to synthesize probe trace event.\n");
+ return -EINVAL;
+ }
+
+ pr_debug("Writing event: %s\n", buf);
+ if (!probe_event_dry_run) {
+ if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
+ ret = -errno;
+ pr_warning("Failed to write event: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+ }
+ free(buf);
+
+ return ret;
+}
+
+static int __del_trace_probe_event(int fd, struct str_node *ent)
+{
+ char *p;
+ char buf[128];
+ int ret;
+
+ /* Convert from perf-probe event to trace-probe event */
+ ret = e_snprintf(buf, 128, "-:%s", ent->s);
+ if (ret < 0)
+ goto error;
+
+ p = strchr(buf + 2, ':');
+ if (!p) {
+ pr_debug("Internal error: %s should have ':' but not.\n",
+ ent->s);
+ ret = -ENOTSUP;
+ goto error;
+ }
+ *p = '/';
+
+ pr_debug("Writing event: %s\n", buf);
+ ret = write(fd, buf, strlen(buf));
+ if (ret < 0) {
+ ret = -errno;
+ goto error;
+ }
+
+ return 0;
+error:
+ pr_warning("Failed to delete event: %s\n",
+ str_error_r(-ret, buf, sizeof(buf)));
+ return ret;
+}
+
+int probe_file__get_events(int fd, struct strfilter *filter,
+ struct strlist *plist)
+{
+ struct strlist *namelist;
+ struct str_node *ent;
+ const char *p;
+ int ret = -ENOENT;
+
+ if (!plist)
+ return -EINVAL;
+
+ namelist = __probe_file__get_namelist(fd, true);
+ if (!namelist)
+ return -ENOENT;
+
+ strlist__for_each_entry(ent, namelist) {
+ p = strchr(ent->s, ':');
+ if ((p && strfilter__compare(filter, p + 1)) ||
+ strfilter__compare(filter, ent->s)) {
+ ret = strlist__add(plist, ent->s);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
+ ret = 0;
+ }
+ }
+out:
+ strlist__delete(namelist);
+
+ return ret;
+}
+
+int probe_file__del_strlist(int fd, struct strlist *namelist)
+{
+ int ret = 0;
+ struct str_node *ent;
+
+ strlist__for_each_entry(ent, namelist) {
+ ret = __del_trace_probe_event(fd, ent);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+int probe_file__del_events(int fd, struct strfilter *filter)
+{
+ struct strlist *namelist;
+ int ret;
+
+ namelist = strlist__new(NULL, NULL);
+ if (!namelist)
+ return -ENOMEM;
+
+ ret = probe_file__get_events(fd, filter, namelist);
+ if (ret < 0)
+ goto out;
+
+ ret = probe_file__del_strlist(fd, namelist);
+out:
+ strlist__delete(namelist);
+ return ret;
+}
+
+/* Caller must ensure to remove this entry from list */
+static void probe_cache_entry__delete(struct probe_cache_entry *entry)
+{
+ if (entry) {
+ BUG_ON(!list_empty(&entry->node));
+
+ strlist__delete(entry->tevlist);
+ clear_perf_probe_event(&entry->pev);
+ zfree(&entry->spev);
+ free(entry);
+ }
+}
+
+static struct probe_cache_entry *
+probe_cache_entry__new(struct perf_probe_event *pev)
+{
+ struct probe_cache_entry *entry = zalloc(sizeof(*entry));
+
+ if (entry) {
+ INIT_LIST_HEAD(&entry->node);
+ entry->tevlist = strlist__new(NULL, NULL);
+ if (!entry->tevlist)
+ zfree(&entry);
+ else if (pev) {
+ entry->spev = synthesize_perf_probe_command(pev);
+ if (!entry->spev ||
+ perf_probe_event__copy(&entry->pev, pev) < 0) {
+ probe_cache_entry__delete(entry);
+ return NULL;
+ }
+ }
+ }
+
+ return entry;
+}
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+ struct probe_trace_event **tevs)
+{
+ struct probe_trace_event *tev;
+ struct str_node *node;
+ int ret, i;
+
+ ret = strlist__nr_entries(entry->tevlist);
+ if (ret > probe_conf.max_probes)
+ return -E2BIG;
+
+ *tevs = zalloc(ret * sizeof(*tev));
+ if (!*tevs)
+ return -ENOMEM;
+
+ i = 0;
+ strlist__for_each_entry(node, entry->tevlist) {
+ tev = &(*tevs)[i++];
+ ret = parse_probe_trace_command(node->s, tev);
+ if (ret < 0)
+ break;
+ }
+ return i;
+}
+
+/* For the kernel probe caches, pass target = NULL or DSO__NAME_KALLSYMS */
+static int probe_cache__open(struct probe_cache *pcache, const char *target,
+ struct nsinfo *nsi)
+{
+ char cpath[PATH_MAX];
+ char sbuildid[SBUILD_ID_SIZE];
+ char *dir_name = NULL;
+ bool is_kallsyms = false;
+ int ret, fd;
+ struct nscookie nsc;
+
+ if (target && build_id_cache__cached(target)) {
+ /* This is a cached buildid */
+ strlcpy(sbuildid, target, SBUILD_ID_SIZE);
+ dir_name = build_id_cache__linkname(sbuildid, NULL, 0);
+ goto found;
+ }
+
+ if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) {
+ target = DSO__NAME_KALLSYMS;
+ is_kallsyms = true;
+ ret = sysfs__sprintf_build_id("/", sbuildid);
+ } else {
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = filename__sprintf_build_id(target, sbuildid);
+ nsinfo__mountns_exit(&nsc);
+ }
+
+ if (ret < 0) {
+ pr_debug("Failed to get build-id from %s.\n", target);
+ return ret;
+ }
+
+ /* If we have no buildid cache, make it */
+ if (!build_id_cache__cached(sbuildid)) {
+ ret = build_id_cache__add_s(sbuildid, target, nsi,
+ is_kallsyms, NULL);
+ if (ret < 0) {
+ pr_debug("Failed to add build-id cache: %s\n", target);
+ return ret;
+ }
+ }
+
+ dir_name = build_id_cache__cachedir(sbuildid, target, nsi, is_kallsyms,
+ false);
+found:
+ if (!dir_name) {
+ pr_debug("Failed to get cache from %s\n", target);
+ return -ENOMEM;
+ }
+
+ snprintf(cpath, PATH_MAX, "%s/probes", dir_name);
+ fd = open(cpath, O_CREAT | O_RDWR, 0644);
+ if (fd < 0)
+ pr_debug("Failed to open cache(%d): %s\n", fd, cpath);
+ free(dir_name);
+ pcache->fd = fd;
+
+ return fd;
+}
+
+static int probe_cache__load(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry = NULL;
+ char buf[MAX_CMDLEN], *p;
+ int ret = 0, fddup;
+ FILE *fp;
+
+ fddup = dup(pcache->fd);
+ if (fddup < 0)
+ return -errno;
+ fp = fdopen(fddup, "r");
+ if (!fp) {
+ close(fddup);
+ return -EINVAL;
+ }
+
+ while (!feof(fp)) {
+ if (!fgets(buf, MAX_CMDLEN, fp))
+ break;
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ /* #perf_probe_event or %sdt_event */
+ if (buf[0] == '#' || buf[0] == '%') {
+ entry = probe_cache_entry__new(NULL);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (buf[0] == '%')
+ entry->sdt = true;
+ entry->spev = strdup(buf + 1);
+ if (entry->spev)
+ ret = parse_perf_probe_command(buf + 1,
+ &entry->pev);
+ else
+ ret = -ENOMEM;
+ if (ret < 0) {
+ probe_cache_entry__delete(entry);
+ goto out;
+ }
+ list_add_tail(&entry->node, &pcache->entries);
+ } else { /* trace_probe_event */
+ if (!entry) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = strlist__add(entry->tevlist, buf);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
+ }
+ }
+out:
+ fclose(fp);
+ return ret;
+}
+
+static struct probe_cache *probe_cache__alloc(void)
+{
+ struct probe_cache *pcache = zalloc(sizeof(*pcache));
+
+ if (pcache) {
+ INIT_LIST_HEAD(&pcache->entries);
+ pcache->fd = -EINVAL;
+ }
+ return pcache;
+}
+
+void probe_cache__purge(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry, *n;
+
+ list_for_each_entry_safe(entry, n, &pcache->entries, node) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+}
+
+void probe_cache__delete(struct probe_cache *pcache)
+{
+ if (!pcache)
+ return;
+
+ probe_cache__purge(pcache);
+ if (pcache->fd > 0)
+ close(pcache->fd);
+ free(pcache);
+}
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi)
+{
+ struct probe_cache *pcache = probe_cache__alloc();
+ int ret;
+
+ if (!pcache)
+ return NULL;
+
+ ret = probe_cache__open(pcache, target, nsi);
+ if (ret < 0) {
+ pr_debug("Cache open error: %d\n", ret);
+ goto out_err;
+ }
+
+ ret = probe_cache__load(pcache);
+ if (ret < 0) {
+ pr_debug("Cache read error: %d\n", ret);
+ goto out_err;
+ }
+
+ return pcache;
+
+out_err:
+ probe_cache__delete(pcache);
+ return NULL;
+}
+
+static bool streql(const char *a, const char *b)
+{
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ return !strcmp(a, b);
+}
+
+struct probe_cache_entry *
+probe_cache__find(struct probe_cache *pcache, struct perf_probe_event *pev)
+{
+ struct probe_cache_entry *entry = NULL;
+ char *cmd = synthesize_perf_probe_command(pev);
+
+ if (!cmd)
+ return NULL;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ if (pev->sdt) {
+ if (entry->pev.event &&
+ streql(entry->pev.event, pev->event) &&
+ (!pev->group ||
+ streql(entry->pev.group, pev->group)))
+ goto found;
+
+ continue;
+ }
+ /* Hit if same event name or same command-string */
+ if ((pev->event &&
+ (streql(entry->pev.group, pev->group) &&
+ streql(entry->pev.event, pev->event))) ||
+ (!strcmp(entry->spev, cmd)))
+ goto found;
+ }
+ entry = NULL;
+
+found:
+ free(cmd);
+ return entry;
+}
+
+struct probe_cache_entry *
+probe_cache__find_by_name(struct probe_cache *pcache,
+ const char *group, const char *event)
+{
+ struct probe_cache_entry *entry = NULL;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ /* Hit if same event name or same command-string */
+ if (streql(entry->pev.group, group) &&
+ streql(entry->pev.event, event))
+ goto found;
+ }
+ entry = NULL;
+
+found:
+ return entry;
+}
+
+int probe_cache__add_entry(struct probe_cache *pcache,
+ struct perf_probe_event *pev,
+ struct probe_trace_event *tevs, int ntevs)
+{
+ struct probe_cache_entry *entry = NULL;
+ char *command;
+ int i, ret = 0;
+
+ if (!pcache || !pev || !tevs || ntevs <= 0) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ /* Remove old cache entry */
+ entry = probe_cache__find(pcache, pev);
+ if (entry) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+
+ ret = -ENOMEM;
+ entry = probe_cache_entry__new(pev);
+ if (!entry)
+ goto out_err;
+
+ for (i = 0; i < ntevs; i++) {
+ if (!tevs[i].point.symbol)
+ continue;
+
+ command = synthesize_probe_trace_command(&tevs[i]);
+ if (!command)
+ goto out_err;
+ ret = strlist__add(entry->tevlist, command);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out_err;
+ }
+
+ free(command);
+ }
+ list_add_tail(&entry->node, &pcache->entries);
+ pr_debug("Added probe cache: %d\n", ntevs);
+ return 0;
+
+out_err:
+ pr_debug("Failed to add probe caches\n");
+ probe_cache_entry__delete(entry);
+ return ret;
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+static unsigned long long sdt_note__get_addr(struct sdt_note *note)
+{
+ return note->bit32 ?
+ (unsigned long long)note->addr.a32[SDT_NOTE_IDX_LOC] :
+ (unsigned long long)note->addr.a64[SDT_NOTE_IDX_LOC];
+}
+
+static unsigned long long sdt_note__get_ref_ctr_offset(struct sdt_note *note)
+{
+ return note->bit32 ?
+ (unsigned long long)note->addr.a32[SDT_NOTE_IDX_REFCTR] :
+ (unsigned long long)note->addr.a64[SDT_NOTE_IDX_REFCTR];
+}
+
+static const char * const type_to_suffix[] = {
+ ":s64", "", "", "", ":s32", "", ":s16", ":s8",
+ "", ":u8", ":u16", "", ":u32", "", "", "", ":u64"
+};
+
+/*
+ * Isolate the string number and convert it into a decimal value;
+ * this will be an index to get suffix of the uprobe name (defining
+ * the type)
+ */
+static int sdt_arg_parse_size(char *n_ptr, const char **suffix)
+{
+ long type_idx;
+
+ type_idx = strtol(n_ptr, NULL, 10);
+ if (type_idx < -8 || type_idx > 8) {
+ pr_debug4("Failed to get a valid sdt type\n");
+ return -1;
+ }
+
+ *suffix = type_to_suffix[type_idx + 8];
+ return 0;
+}
+
+static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg)
+{
+ char *op, *desc = strdup(arg), *new_op = NULL;
+ const char *suffix = "";
+ int ret = -1;
+
+ if (desc == NULL) {
+ pr_debug4("Allocation error\n");
+ return ret;
+ }
+
+ /*
+ * Argument is in N@OP format. N is size of the argument and OP is
+ * the actual assembly operand. N can be omitted; in that case
+ * argument is just OP(without @).
+ */
+ op = strchr(desc, '@');
+ if (op) {
+ op[0] = '\0';
+ op++;
+
+ if (sdt_arg_parse_size(desc, &suffix))
+ goto error;
+ } else {
+ op = desc;
+ }
+
+ ret = arch_sdt_arg_parse_op(op, &new_op);
+
+ if (ret < 0)
+ goto error;
+
+ if (ret == SDT_ARG_VALID) {
+ ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix);
+ if (ret < 0)
+ goto error;
+ }
+
+ ret = 0;
+error:
+ free(desc);
+ free(new_op);
+ return ret;
+}
+
+static char *synthesize_sdt_probe_command(struct sdt_note *note,
+ const char *pathname,
+ const char *sdtgrp)
+{
+ struct strbuf buf;
+ char *ret = NULL;
+ int i, args_count, err;
+ unsigned long long ref_ctr_offset;
+ char *arg;
+ int arg_idx = 0;
+
+ if (strbuf_init(&buf, 32) < 0)
+ return NULL;
+
+ err = strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
+ sdtgrp, note->name, pathname,
+ sdt_note__get_addr(note));
+
+ ref_ctr_offset = sdt_note__get_ref_ctr_offset(note);
+ if (ref_ctr_offset && err >= 0)
+ err = strbuf_addf(&buf, "(0x%llx)", ref_ctr_offset);
+
+ if (err < 0)
+ goto error;
+
+ if (!note->args)
+ goto out;
+
+ if (note->args) {
+ char **args = argv_split(note->args, &args_count);
+
+ if (args == NULL)
+ goto error;
+
+ for (i = 0; i < args_count; ) {
+ /*
+ * FIXUP: Arm64 ELF section '.note.stapsdt' uses string
+ * format "-4@[sp, NUM]" if a probe is to access data in
+ * the stack, e.g. below is an example for the SDT
+ * Arguments:
+ *
+ * Arguments: -4@[sp, 12] -4@[sp, 8] -4@[sp, 4]
+ *
+ * Since the string introduces an extra space character
+ * in the middle of square brackets, the argument is
+ * divided into two items. Fixup for this case, if an
+ * item contains sub string "[sp,", need to concatenate
+ * the two items.
+ */
+ if (strstr(args[i], "[sp,") && (i+1) < args_count) {
+ err = asprintf(&arg, "%s %s", args[i], args[i+1]);
+ i += 2;
+ } else {
+ err = asprintf(&arg, "%s", args[i]);
+ i += 1;
+ }
+
+ /* Failed to allocate memory */
+ if (err < 0) {
+ argv_free(args);
+ goto error;
+ }
+
+ if (synthesize_sdt_probe_arg(&buf, arg_idx, arg) < 0) {
+ free(arg);
+ argv_free(args);
+ goto error;
+ }
+
+ free(arg);
+ arg_idx++;
+ }
+
+ argv_free(args);
+ }
+
+out:
+ ret = strbuf_detach(&buf, NULL);
+error:
+ strbuf_release(&buf);
+ return ret;
+}
+
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
+{
+ struct probe_cache_entry *entry = NULL;
+ struct list_head sdtlist;
+ struct sdt_note *note;
+ char *buf;
+ char sdtgrp[64];
+ int ret;
+
+ INIT_LIST_HEAD(&sdtlist);
+ ret = get_sdt_note_list(&sdtlist, pathname);
+ if (ret < 0) {
+ pr_debug4("Failed to get sdt note: %d\n", ret);
+ return ret;
+ }
+ list_for_each_entry(note, &sdtlist, note_list) {
+ ret = snprintf(sdtgrp, 64, "sdt_%s", note->provider);
+ if (ret < 0)
+ break;
+ /* Try to find same-name entry */
+ entry = probe_cache__find_by_name(pcache, sdtgrp, note->name);
+ if (!entry) {
+ entry = probe_cache_entry__new(NULL);
+ if (!entry) {
+ ret = -ENOMEM;
+ break;
+ }
+ entry->sdt = true;
+ ret = asprintf(&entry->spev, "%s:%s=%s", sdtgrp,
+ note->name, note->name);
+ if (ret < 0)
+ break;
+ entry->pev.event = strdup(note->name);
+ entry->pev.group = strdup(sdtgrp);
+ list_add_tail(&entry->node, &pcache->entries);
+ }
+ buf = synthesize_sdt_probe_command(note, pathname, sdtgrp);
+ if (!buf) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ret = strlist__add(entry->tevlist, buf);
+
+ free(buf);
+ entry = NULL;
+
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ break;
+ }
+ }
+ if (entry) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+ cleanup_sdt_note_list(&sdtlist);
+ return ret;
+}
+#endif
+
+static int probe_cache_entry__write(struct probe_cache_entry *entry, int fd)
+{
+ struct str_node *snode;
+ struct stat st;
+ struct iovec iov[3];
+ const char *prefix = entry->sdt ? "%" : "#";
+ int ret;
+ /* Save stat for rollback */
+ ret = fstat(fd, &st);
+ if (ret < 0)
+ return ret;
+
+ pr_debug("Writing cache: %s%s\n", prefix, entry->spev);
+ iov[0].iov_base = (void *)prefix; iov[0].iov_len = 1;
+ iov[1].iov_base = entry->spev; iov[1].iov_len = strlen(entry->spev);
+ iov[2].iov_base = (void *)"\n"; iov[2].iov_len = 1;
+ ret = writev(fd, iov, 3);
+ if (ret < (int)iov[1].iov_len + 2)
+ goto rollback;
+
+ strlist__for_each_entry(snode, entry->tevlist) {
+ iov[0].iov_base = (void *)snode->s;
+ iov[0].iov_len = strlen(snode->s);
+ iov[1].iov_base = (void *)"\n"; iov[1].iov_len = 1;
+ ret = writev(fd, iov, 2);
+ if (ret < (int)iov[0].iov_len + 1)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ /* Rollback to avoid cache file corruption */
+ if (ret > 0)
+ ret = -1;
+ if (ftruncate(fd, st.st_size) < 0)
+ ret = -2;
+
+ return ret;
+}
+
+int probe_cache__commit(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry;
+ int ret = 0;
+
+ /* TBD: if we do not update existing entries, skip it */
+ ret = lseek(pcache->fd, 0, SEEK_SET);
+ if (ret < 0)
+ goto out;
+
+ ret = ftruncate(pcache->fd, 0);
+ if (ret < 0)
+ goto out;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ ret = probe_cache_entry__write(entry, pcache->fd);
+ pr_debug("Cache committed: %d\n", ret);
+ if (ret < 0)
+ break;
+ }
+out:
+ return ret;
+}
+
+static bool probe_cache_entry__compare(struct probe_cache_entry *entry,
+ struct strfilter *filter)
+{
+ char buf[128], *ptr = entry->spev;
+
+ if (entry->pev.event) {
+ snprintf(buf, 128, "%s:%s", entry->pev.group, entry->pev.event);
+ ptr = buf;
+ }
+ return strfilter__compare(filter, ptr);
+}
+
+int probe_cache__filter_purge(struct probe_cache *pcache,
+ struct strfilter *filter)
+{
+ struct probe_cache_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &pcache->entries, node) {
+ if (probe_cache_entry__compare(entry, filter)) {
+ pr_info("Removed cached event: %s\n", entry->spev);
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+ }
+ return 0;
+}
+
+static int probe_cache__show_entries(struct probe_cache *pcache,
+ struct strfilter *filter)
+{
+ struct probe_cache_entry *entry;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ if (probe_cache_entry__compare(entry, filter))
+ printf("%s\n", entry->spev);
+ }
+ return 0;
+}
+
+/* Show all cached probes */
+int probe_cache__show_all_caches(struct strfilter *filter)
+{
+ struct probe_cache *pcache;
+ struct strlist *bidlist;
+ struct str_node *nd;
+ char *buf = strfilter__string(filter);
+
+ pr_debug("list cache with filter: %s\n", buf);
+ free(buf);
+
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ pr_debug("Failed to get buildids: %d\n", errno);
+ return -EINVAL;
+ }
+ strlist__for_each_entry(nd, bidlist) {
+ pcache = probe_cache__new(nd->s, NULL);
+ if (!pcache)
+ continue;
+ if (!list_empty(&pcache->entries)) {
+ buf = build_id_cache__origname(nd->s);
+ printf("%s (%s):\n", buf, nd->s);
+ free(buf);
+ probe_cache__show_entries(pcache, filter);
+ }
+ probe_cache__delete(pcache);
+ }
+ strlist__delete(bidlist);
+
+ return 0;
+}
+
+enum ftrace_readme {
+ FTRACE_README_PROBE_TYPE_X = 0,
+ FTRACE_README_KRETPROBE_OFFSET,
+ FTRACE_README_UPROBE_REF_CTR,
+ FTRACE_README_USER_ACCESS,
+ FTRACE_README_MULTIPROBE_EVENT,
+ FTRACE_README_IMMEDIATE_VALUE,
+ FTRACE_README_END,
+};
+
+static struct {
+ const char *pattern;
+ bool avail;
+} ftrace_readme_table[] = {
+#define DEFINE_TYPE(idx, pat) \
+ [idx] = {.pattern = pat, .avail = false}
+ DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
+ DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
+ DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"),
+ DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*u]<offset>*"),
+ DEFINE_TYPE(FTRACE_README_MULTIPROBE_EVENT, "*Create/append/*"),
+ DEFINE_TYPE(FTRACE_README_IMMEDIATE_VALUE, "*\\imm-value,*"),
+};
+
+static bool scan_ftrace_readme(enum ftrace_readme type)
+{
+ int fd;
+ FILE *fp;
+ char *buf = NULL;
+ size_t len = 0;
+ bool ret = false;
+ static bool scanned = false;
+
+ if (scanned)
+ goto result;
+
+ fd = open_trace_file("README", false);
+ if (fd < 0)
+ return ret;
+
+ fp = fdopen(fd, "r");
+ if (!fp) {
+ close(fd);
+ return ret;
+ }
+
+ while (getline(&buf, &len, fp) > 0)
+ for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++)
+ if (!ftrace_readme_table[i].avail)
+ ftrace_readme_table[i].avail =
+ strglobmatch(buf, ftrace_readme_table[i].pattern);
+ scanned = true;
+
+ fclose(fp);
+ free(buf);
+
+result:
+ if (type >= FTRACE_README_END)
+ return false;
+
+ return ftrace_readme_table[type].avail;
+}
+
+bool probe_type_is_available(enum probe_type type)
+{
+ if (type >= PROBE_TYPE_END)
+ return false;
+ else if (type == PROBE_TYPE_X)
+ return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X);
+
+ return true;
+}
+
+bool kretprobe_offset_is_supported(void)
+{
+ return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
+}
+
+bool uprobe_ref_ctr_is_supported(void)
+{
+ return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR);
+}
+
+bool user_access_is_supported(void)
+{
+ return scan_ftrace_readme(FTRACE_README_USER_ACCESS);
+}
+
+bool multiprobe_event_is_supported(void)
+{
+ return scan_ftrace_readme(FTRACE_README_MULTIPROBE_EVENT);
+}
+
+bool immediate_value_is_supported(void)
+{
+ return scan_ftrace_readme(FTRACE_README_IMMEDIATE_VALUE);
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
new file mode 100644
index 000000000..0dba88c0f
--- /dev/null
+++ b/tools/perf/util/probe-file.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PROBE_FILE_H
+#define __PROBE_FILE_H
+
+#include "probe-event.h"
+
+struct strlist;
+struct strfilter;
+
+/* Cache of probe definitions */
+struct probe_cache_entry {
+ struct list_head node;
+ bool sdt;
+ struct perf_probe_event pev;
+ char *spev;
+ struct strlist *tevlist;
+};
+
+struct probe_cache {
+ int fd;
+ struct list_head entries;
+};
+
+enum probe_type {
+ PROBE_TYPE_U = 0,
+ PROBE_TYPE_S,
+ PROBE_TYPE_X,
+ PROBE_TYPE_STRING,
+ PROBE_TYPE_BITFIELD,
+ PROBE_TYPE_END,
+};
+
+#define PF_FL_UPROBE 1
+#define PF_FL_RW 2
+#define for_each_probe_cache_entry(entry, pcache) \
+ list_for_each_entry(entry, &pcache->entries, node)
+
+/* probe-file.c depends on libelf */
+#ifdef HAVE_LIBELF_SUPPORT
+int open_trace_file(const char *trace_file, bool readwrite);
+int probe_file__open(int flag);
+int probe_file__open_both(int *kfd, int *ufd, int flag);
+struct strlist *probe_file__get_namelist(int fd);
+struct strlist *probe_file__get_rawlist(int fd);
+int probe_file__add_event(int fd, struct probe_trace_event *tev);
+
+int probe_file__del_events(int fd, struct strfilter *filter);
+int probe_file__get_events(int fd, struct strfilter *filter,
+ struct strlist *plist);
+int probe_file__del_strlist(int fd, struct strlist *namelist);
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+ struct probe_trace_event **tevs);
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi);
+int probe_cache__add_entry(struct probe_cache *pcache,
+ struct perf_probe_event *pev,
+ struct probe_trace_event *tevs, int ntevs);
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname);
+int probe_cache__commit(struct probe_cache *pcache);
+void probe_cache__purge(struct probe_cache *pcache);
+void probe_cache__delete(struct probe_cache *pcache);
+int probe_cache__filter_purge(struct probe_cache *pcache,
+ struct strfilter *filter);
+struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
+ struct perf_probe_event *pev);
+struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
+ const char *group, const char *event);
+int probe_cache__show_all_caches(struct strfilter *filter);
+bool probe_type_is_available(enum probe_type type);
+bool kretprobe_offset_is_supported(void);
+bool uprobe_ref_ctr_is_supported(void);
+bool user_access_is_supported(void);
+bool multiprobe_event_is_supported(void);
+bool immediate_value_is_supported(void);
+#else /* ! HAVE_LIBELF_SUPPORT */
+static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
+{
+ return NULL;
+}
+#define probe_cache__delete(pcache) do {} while (0)
+#endif
+#endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
new file mode 100644
index 000000000..f171360b0
--- /dev/null
+++ b/tools/perf/util/probe-finder.c
@@ -0,0 +1,2109 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * probe-finder.c : C expression to kprobe event converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <dwarf-regs.h>
+
+#include <linux/bitops.h>
+#include <linux/zalloc.h>
+#include "event.h"
+#include "dso.h"
+#include "debug.h"
+#include "intlist.h"
+#include "strbuf.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "string2.h"
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
+/* Kprobe tracer basic type is up to u64 */
+#define MAX_BASIC_TYPE_BITS 64
+
+/* Dwarf FL wrappers */
+static char *debuginfo_path; /* Currently dummy */
+
+static const Dwfl_Callbacks offline_callbacks = {
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ .debuginfo_path = &debuginfo_path,
+
+ .section_address = dwfl_offline_section_address,
+
+ /* We use this table for core files too. */
+ .find_elf = dwfl_build_id_find_elf,
+};
+
+/* Get a Dwarf from offline image */
+static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
+ const char *path)
+{
+ GElf_Addr dummy;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ dbg->dwfl = dwfl_begin(&offline_callbacks);
+ if (!dbg->dwfl)
+ goto error;
+
+ dwfl_report_begin(dbg->dwfl);
+ dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd);
+ if (!dbg->mod)
+ goto error;
+
+ dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias);
+ if (!dbg->dbg)
+ goto error;
+
+ dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy);
+
+ dwfl_report_end(dbg->dwfl, NULL, NULL);
+
+ return 0;
+error:
+ if (dbg->dwfl)
+ dwfl_end(dbg->dwfl);
+ else
+ close(fd);
+ memset(dbg, 0, sizeof(*dbg));
+
+ return -ENOENT;
+}
+
+static struct debuginfo *__debuginfo__new(const char *path)
+{
+ struct debuginfo *dbg = zalloc(sizeof(*dbg));
+ if (!dbg)
+ return NULL;
+
+ if (debuginfo__init_offline_dwarf(dbg, path) < 0)
+ zfree(&dbg);
+ if (dbg)
+ pr_debug("Open Debuginfo file: %s\n", path);
+ return dbg;
+}
+
+enum dso_binary_type distro_dwarf_types[] = {
+ DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+struct debuginfo *debuginfo__new(const char *path)
+{
+ enum dso_binary_type *type;
+ char buf[PATH_MAX], nil = '\0';
+ struct dso *dso;
+ struct debuginfo *dinfo = NULL;
+ struct build_id bid;
+
+ /* Try to open distro debuginfo files */
+ dso = dso__new(path);
+ if (!dso)
+ goto out;
+
+ /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */
+ if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0)
+ dso__set_build_id(dso, &bid);
+
+ for (type = distro_dwarf_types;
+ !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND;
+ type++) {
+ if (dso__read_binary_type_filename(dso, *type, &nil,
+ buf, PATH_MAX) < 0)
+ continue;
+ dinfo = __debuginfo__new(buf);
+ }
+ dso__put(dso);
+
+out:
+ /* if failed to open all distro debuginfo, open given binary */
+ return dinfo ? : __debuginfo__new(path);
+}
+
+void debuginfo__delete(struct debuginfo *dbg)
+{
+ if (dbg) {
+ if (dbg->dwfl)
+ dwfl_end(dbg->dwfl);
+ free(dbg);
+ }
+}
+
+/*
+ * Probe finder related functions
+ */
+
+static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
+{
+ struct probe_trace_arg_ref *ref;
+ ref = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (ref != NULL)
+ ref->offset = offs;
+ return ref;
+}
+
+/*
+ * Convert a location into trace_arg.
+ * If tvar == NULL, this just checks variable can be converted.
+ * If fentry == true and vr_die is a parameter, do heuristic search
+ * for the location fuzzed by function entry mcount.
+ */
+static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
+ Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
+ unsigned int machine,
+ struct probe_trace_arg *tvar)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Addr tmp = 0;
+ Dwarf_Op *op;
+ size_t nops;
+ unsigned int regn;
+ Dwarf_Word offs = 0;
+ bool ref = false;
+ const char *regs;
+ int ret, ret2 = 0;
+
+ if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
+ goto static_var;
+
+ /* Constant value */
+ if (dwarf_attr(vr_die, DW_AT_const_value, &attr) &&
+ immediate_value_is_supported()) {
+ Dwarf_Sword snum;
+
+ if (!tvar)
+ return 0;
+
+ dwarf_formsdata(&attr, &snum);
+ ret = asprintf(&tvar->value, "\\%ld", (long)snum);
+
+ return ret < 0 ? -ENOMEM : 0;
+ }
+
+ /* TODO: handle more than 1 exprs */
+ if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+ return -EINVAL; /* Broken DIE ? */
+ if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
+ ret = dwarf_entrypc(sp_die, &tmp);
+ if (ret)
+ return -ENOENT;
+
+ if (probe_conf.show_location_range &&
+ (dwarf_tag(vr_die) == DW_TAG_variable)) {
+ ret2 = -ERANGE;
+ } else if (addr != tmp ||
+ dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
+ return -ENOENT;
+ }
+
+ ret = dwarf_highpc(sp_die, &tmp);
+ if (ret)
+ return -ENOENT;
+ /*
+ * This is fuzzed by fentry mcount. We try to find the
+ * parameter location at the earliest address.
+ */
+ for (addr += 1; addr <= tmp; addr++) {
+ if (dwarf_getlocation_addr(&attr, addr, &op,
+ &nops, 1) > 0)
+ goto found;
+ }
+ return -ENOENT;
+ }
+found:
+ if (nops == 0)
+ /* TODO: Support const_value */
+ return -ENOENT;
+
+ if (op->atom == DW_OP_addr) {
+static_var:
+ if (!tvar)
+ return ret2;
+ /* Static variables on memory (not stack), make @varname */
+ ret = strlen(dwarf_diename(vr_die));
+ tvar->value = zalloc(ret + 2);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+ snprintf(tvar->value, ret + 2, "@%s", dwarf_diename(vr_die));
+ tvar->ref = alloc_trace_arg_ref((long)offs);
+ if (tvar->ref == NULL)
+ return -ENOMEM;
+ return ret2;
+ }
+
+ /* If this is based on frame buffer, set the offset */
+ if (op->atom == DW_OP_fbreg) {
+ if (fb_ops == NULL)
+ return -ENOTSUP;
+ ref = true;
+ offs = op->number;
+ op = &fb_ops[0];
+ }
+
+ if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
+ regn = op->atom - DW_OP_breg0;
+ offs += op->number;
+ ref = true;
+ } else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) {
+ regn = op->atom - DW_OP_reg0;
+ } else if (op->atom == DW_OP_bregx) {
+ regn = op->number;
+ offs += op->number2;
+ ref = true;
+ } else if (op->atom == DW_OP_regx) {
+ regn = op->number;
+ } else {
+ pr_debug("DW_OP %x is not supported.\n", op->atom);
+ return -ENOTSUP;
+ }
+
+ if (!tvar)
+ return ret2;
+
+ regs = get_dwarf_regstr(regn, machine);
+ if (!regs) {
+ /* This should be a bug in DWARF or this tool */
+ pr_warning("Mapping for the register number %u "
+ "missing on this architecture.\n", regn);
+ return -ENOTSUP;
+ }
+
+ tvar->value = strdup(regs);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+
+ if (ref) {
+ tvar->ref = alloc_trace_arg_ref((long)offs);
+ if (tvar->ref == NULL)
+ return -ENOMEM;
+ }
+ return ret2;
+}
+
+#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
+
+static int convert_variable_type(Dwarf_Die *vr_die,
+ struct probe_trace_arg *tvar,
+ const char *cast, bool user_access)
+{
+ struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
+ Dwarf_Die type;
+ char buf[16];
+ char sbuf[STRERR_BUFSIZE];
+ int bsize, boffs, total;
+ int ret;
+ char prefix;
+
+ /* TODO: check all types */
+ if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "ustring") &&
+ strcmp(cast, "x") != 0 &&
+ strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
+ /* Non string type is OK */
+ /* and respect signedness/hexadecimal cast */
+ tvar->type = strdup(cast);
+ return (tvar->type == NULL) ? -ENOMEM : 0;
+ }
+
+ bsize = dwarf_bitsize(vr_die);
+ if (bsize > 0) {
+ /* This is a bitfield */
+ boffs = dwarf_bitoffset(vr_die);
+ total = dwarf_bytesize(vr_die);
+ if (boffs < 0 || total < 0)
+ return -ENOENT;
+ ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs,
+ BYTES_TO_BITS(total));
+ goto formatted;
+ }
+
+ if (die_get_real_type(vr_die, &type) == NULL) {
+ pr_warning("Failed to get a type information of %s.\n",
+ dwarf_diename(vr_die));
+ return -ENOENT;
+ }
+
+ pr_debug("%s type is %s.\n",
+ dwarf_diename(vr_die), dwarf_diename(&type));
+
+ if (cast && (!strcmp(cast, "string") || !strcmp(cast, "ustring"))) {
+ /* String type */
+ ret = dwarf_tag(&type);
+ if (ret != DW_TAG_pointer_type &&
+ ret != DW_TAG_array_type) {
+ pr_warning("Failed to cast into string: "
+ "%s(%s) is not a pointer nor array.\n",
+ dwarf_diename(vr_die), dwarf_diename(&type));
+ return -EINVAL;
+ }
+ if (die_get_real_type(&type, &type) == NULL) {
+ pr_warning("Failed to get a type"
+ " information.\n");
+ return -ENOENT;
+ }
+ if (ret == DW_TAG_pointer_type) {
+ while (*ref_ptr)
+ ref_ptr = &(*ref_ptr)->next;
+ /* Add new reference with offset +0 */
+ *ref_ptr = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (*ref_ptr == NULL) {
+ pr_warning("Out of memory error\n");
+ return -ENOMEM;
+ }
+ (*ref_ptr)->user_access = user_access;
+ }
+ if (!die_compare_name(&type, "char") &&
+ !die_compare_name(&type, "unsigned char")) {
+ pr_warning("Failed to cast into string: "
+ "%s is not (unsigned) char *.\n",
+ dwarf_diename(vr_die));
+ return -EINVAL;
+ }
+ tvar->type = strdup(cast);
+ return (tvar->type == NULL) ? -ENOMEM : 0;
+ }
+
+ if (cast && (strcmp(cast, "u") == 0))
+ prefix = 'u';
+ else if (cast && (strcmp(cast, "s") == 0))
+ prefix = 's';
+ else if (cast && (strcmp(cast, "x") == 0) &&
+ probe_type_is_available(PROBE_TYPE_X))
+ prefix = 'x';
+ else
+ prefix = die_is_signed_type(&type) ? 's' :
+ probe_type_is_available(PROBE_TYPE_X) ? 'x' : 'u';
+
+ ret = dwarf_bytesize(&type);
+ if (ret <= 0)
+ /* No size ... try to use default type */
+ return 0;
+ ret = BYTES_TO_BITS(ret);
+
+ /* Check the bitwidth */
+ if (ret > MAX_BASIC_TYPE_BITS) {
+ pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
+ dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
+ ret = MAX_BASIC_TYPE_BITS;
+ }
+ ret = snprintf(buf, 16, "%c%d", prefix, ret);
+
+formatted:
+ if (ret < 0 || ret >= 16) {
+ if (ret >= 16)
+ ret = -E2BIG;
+ pr_warning("Failed to convert variable type: %s\n",
+ str_error_r(-ret, sbuf, sizeof(sbuf)));
+ return ret;
+ }
+ tvar->type = strdup(buf);
+ if (tvar->type == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
+ struct perf_probe_arg_field *field,
+ struct probe_trace_arg_ref **ref_ptr,
+ Dwarf_Die *die_mem, bool user_access)
+{
+ struct probe_trace_arg_ref *ref = *ref_ptr;
+ Dwarf_Die type;
+ Dwarf_Word offs;
+ int ret, tag;
+
+ pr_debug("converting %s in %s\n", field->name, varname);
+ if (die_get_real_type(vr_die, &type) == NULL) {
+ pr_warning("Failed to get the type of %s.\n", varname);
+ return -ENOENT;
+ }
+ pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type),
+ (unsigned)dwarf_dieoffset(&type));
+ tag = dwarf_tag(&type);
+
+ if (field->name[0] == '[' &&
+ (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) {
+ /* Save original type for next field or type */
+ memcpy(die_mem, &type, sizeof(*die_mem));
+ /* Get the type of this array */
+ if (die_get_real_type(&type, &type) == NULL) {
+ pr_warning("Failed to get the type of %s.\n", varname);
+ return -ENOENT;
+ }
+ pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type),
+ (unsigned)dwarf_dieoffset(&type));
+ if (tag == DW_TAG_pointer_type) {
+ ref = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (ref == NULL)
+ return -ENOMEM;
+ if (*ref_ptr)
+ (*ref_ptr)->next = ref;
+ else
+ *ref_ptr = ref;
+ }
+ ref->offset += dwarf_bytesize(&type) * field->index;
+ ref->user_access = user_access;
+ goto next;
+ } else if (tag == DW_TAG_pointer_type) {
+ /* Check the pointer and dereference */
+ if (!field->ref) {
+ pr_err("Semantic error: %s must be referred by '->'\n",
+ field->name);
+ return -EINVAL;
+ }
+ /* Get the type pointed by this pointer */
+ if (die_get_real_type(&type, &type) == NULL) {
+ pr_warning("Failed to get the type of %s.\n", varname);
+ return -ENOENT;
+ }
+ /* Verify it is a data structure */
+ tag = dwarf_tag(&type);
+ if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+ pr_warning("%s is not a data structure nor a union.\n",
+ varname);
+ return -EINVAL;
+ }
+
+ ref = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (ref == NULL)
+ return -ENOMEM;
+ if (*ref_ptr)
+ (*ref_ptr)->next = ref;
+ else
+ *ref_ptr = ref;
+ } else {
+ /* Verify it is a data structure */
+ if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+ pr_warning("%s is not a data structure nor a union.\n",
+ varname);
+ return -EINVAL;
+ }
+ if (field->name[0] == '[') {
+ pr_err("Semantic error: %s is not a pointer"
+ " nor array.\n", varname);
+ return -EINVAL;
+ }
+ /* While processing unnamed field, we don't care about this */
+ if (field->ref && dwarf_diename(vr_die)) {
+ pr_err("Semantic error: %s must be referred by '.'\n",
+ field->name);
+ return -EINVAL;
+ }
+ if (!ref) {
+ pr_warning("Structure on a register is not "
+ "supported yet.\n");
+ return -ENOTSUP;
+ }
+ }
+
+ if (die_find_member(&type, field->name, die_mem) == NULL) {
+ pr_warning("%s(type:%s) has no member %s.\n", varname,
+ dwarf_diename(&type), field->name);
+ return -EINVAL;
+ }
+
+ /* Get the offset of the field */
+ if (tag == DW_TAG_union_type) {
+ offs = 0;
+ } else {
+ ret = die_get_data_member_location(die_mem, &offs);
+ if (ret < 0) {
+ pr_warning("Failed to get the offset of %s.\n",
+ field->name);
+ return ret;
+ }
+ }
+ ref->offset += (long)offs;
+ ref->user_access = user_access;
+
+ /* If this member is unnamed, we need to reuse this field */
+ if (!dwarf_diename(die_mem))
+ return convert_variable_fields(die_mem, varname, field,
+ &ref, die_mem, user_access);
+
+next:
+ /* Converting next field */
+ if (field->next)
+ return convert_variable_fields(die_mem, field->name,
+ field->next, &ref, die_mem, user_access);
+ else
+ return 0;
+}
+
+static void print_var_not_found(const char *varname)
+{
+ pr_err("Failed to find the location of the '%s' variable at this address.\n"
+ " Perhaps it has been optimized out.\n"
+ " Use -V with the --range option to show '%s' location range.\n",
+ varname, varname);
+}
+
+/* Show a variables in kprobe event format */
+static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
+{
+ Dwarf_Die die_mem;
+ int ret;
+
+ pr_debug("Converting variable %s into trace event.\n",
+ dwarf_diename(vr_die));
+
+ ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
+ &pf->sp_die, pf->machine, pf->tvar);
+ if (ret == -ENOENT && pf->skip_empty_arg)
+ /* This can be found in other place. skip it */
+ return 0;
+ if (ret == -ENOENT || ret == -EINVAL) {
+ print_var_not_found(pf->pvar->var);
+ } else if (ret == -ENOTSUP)
+ pr_err("Sorry, we don't support this variable location yet.\n");
+ else if (ret == 0 && pf->pvar->field) {
+ ret = convert_variable_fields(vr_die, pf->pvar->var,
+ pf->pvar->field, &pf->tvar->ref,
+ &die_mem, pf->pvar->user_access);
+ vr_die = &die_mem;
+ }
+ if (ret == 0)
+ ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type,
+ pf->pvar->user_access);
+ /* *expr will be cached in libdw. Don't free it. */
+ return ret;
+}
+
+/* Find a variable in a scope DIE */
+static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ Dwarf_Die vr_die;
+ char *buf, *ptr;
+ int ret = 0;
+
+ /* Copy raw parameters */
+ if (!is_c_varname(pf->pvar->var))
+ return copy_to_probe_trace_arg(pf->tvar, pf->pvar);
+
+ if (pf->pvar->name)
+ pf->tvar->name = strdup(pf->pvar->name);
+ else {
+ buf = synthesize_perf_probe_arg(pf->pvar);
+ if (!buf)
+ return -ENOMEM;
+ ptr = strchr(buf, ':'); /* Change type separator to _ */
+ if (ptr)
+ *ptr = '_';
+ pf->tvar->name = buf;
+ }
+ if (pf->tvar->name == NULL)
+ return -ENOMEM;
+
+ pr_debug("Searching '%s' variable in context.\n", pf->pvar->var);
+ /* Search child die for local variables and parameters. */
+ if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
+ /* Search again in global variables */
+ if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+ 0, &vr_die)) {
+ if (pf->skip_empty_arg)
+ return 0;
+ pr_warning("Failed to find '%s' in this function.\n",
+ pf->pvar->var);
+ ret = -ENOENT;
+ }
+ }
+ if (ret >= 0)
+ ret = convert_variable(&vr_die, pf);
+
+ return ret;
+}
+
+/* Convert subprogram DIE to trace point */
+static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
+ Dwarf_Addr paddr, bool retprobe,
+ const char *function,
+ struct probe_trace_point *tp)
+{
+ Dwarf_Addr eaddr;
+ GElf_Sym sym;
+ const char *symbol;
+
+ /* Verify the address is correct */
+ if (!dwarf_haspc(sp_die, paddr)) {
+ pr_warning("Specified offset is out of %s\n",
+ dwarf_diename(sp_die));
+ return -EINVAL;
+ }
+
+ if (dwarf_entrypc(sp_die, &eaddr) == 0) {
+ /* If the DIE has entrypc, use it. */
+ symbol = dwarf_diename(sp_die);
+ } else {
+ /* Try to get actual symbol name and address from symtab */
+ symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ eaddr = sym.st_value;
+ }
+ if (!symbol) {
+ pr_warning("Failed to find symbol at 0x%lx\n",
+ (unsigned long)paddr);
+ return -ENOENT;
+ }
+
+ tp->offset = (unsigned long)(paddr - eaddr);
+ tp->address = paddr;
+ tp->symbol = strdup(symbol);
+ if (!tp->symbol)
+ return -ENOMEM;
+
+ /* Return probe must be on the head of a subprogram */
+ if (retprobe) {
+ if (eaddr != paddr) {
+ pr_warning("Failed to find \"%s%%return\",\n"
+ " because %s is an inlined function and"
+ " has no return point.\n", function,
+ function);
+ return -EINVAL;
+ }
+ tp->retprobe = true;
+ }
+
+ return 0;
+}
+
+/* Call probe_finder callback with scope DIE */
+static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ Dwarf_Attribute fb_attr;
+ Dwarf_Frame *frame = NULL;
+ size_t nops;
+ int ret;
+
+ if (!sc_die) {
+ pr_err("Caller must pass a scope DIE. Program error.\n");
+ return -EINVAL;
+ }
+
+ /* If not a real subprogram, find a real one */
+ if (!die_is_func_def(sc_die)) {
+ if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+ if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+ pr_warning("Ignoring tail call from %s\n",
+ dwarf_diename(&pf->sp_die));
+ return 0;
+ } else {
+ pr_warning("Failed to find probe point in any "
+ "functions.\n");
+ return -ENOENT;
+ }
+ }
+ } else
+ memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
+
+ /* Get the frame base attribute/ops from subprogram */
+ dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr);
+ ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
+ if (ret <= 0 || nops == 0) {
+ pf->fb_ops = NULL;
+#if _ELFUTILS_PREREQ(0, 142)
+ } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
+ (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+ if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
+ (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) ||
+ dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
+ pr_warning("Failed to get call frame on 0x%jx\n",
+ (uintmax_t)pf->addr);
+ free(frame);
+ return -ENOENT;
+ }
+#endif
+ }
+
+ /* Call finder's callback handler */
+ ret = pf->callback(sc_die, pf);
+
+ /* Since *pf->fb_ops can be a part of frame. we should free it here. */
+ free(frame);
+ pf->fb_ops = NULL;
+
+ return ret;
+}
+
+struct find_scope_param {
+ const char *function;
+ const char *file;
+ int line;
+ int diff;
+ Dwarf_Die *die_mem;
+ bool found;
+};
+
+static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct find_scope_param *fsp = data;
+ const char *file;
+ int lno;
+
+ /* Skip if declared file name does not match */
+ if (fsp->file) {
+ file = die_get_decl_file(fn_die);
+ if (!file || strcmp(fsp->file, file) != 0)
+ return 0;
+ }
+ /* If the function name is given, that's what user expects */
+ if (fsp->function) {
+ if (die_match_name(fn_die, fsp->function)) {
+ memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+ fsp->found = true;
+ return 1;
+ }
+ } else {
+ /* With the line number, find the nearest declared DIE */
+ dwarf_decl_line(fn_die, &lno);
+ if (lno < fsp->line && fsp->diff > fsp->line - lno) {
+ /* Keep a candidate and continue */
+ fsp->diff = fsp->line - lno;
+ memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+ fsp->found = true;
+ }
+ }
+ return 0;
+}
+
+/* Return innermost DIE */
+static int find_inner_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct find_scope_param *fsp = data;
+
+ memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+ fsp->found = true;
+ return 1;
+}
+
+/* Find an appropriate scope fits to given conditions */
+static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem)
+{
+ struct find_scope_param fsp = {
+ .function = pf->pev->point.function,
+ .file = pf->fname,
+ .line = pf->lno,
+ .diff = INT_MAX,
+ .die_mem = die_mem,
+ .found = false,
+ };
+ int ret;
+
+ ret = cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb,
+ &fsp);
+ if (!ret && !fsp.found)
+ cu_walk_functions_at(&pf->cu_die, pf->addr,
+ find_inner_scope_cb, &fsp);
+
+ return fsp.found ? die_mem : NULL;
+}
+
+static int verify_representive_line(struct probe_finder *pf, const char *fname,
+ int lineno, Dwarf_Addr addr)
+{
+ const char *__fname, *__func = NULL;
+ Dwarf_Die die_mem;
+ int __lineno;
+
+ /* Verify line number and address by reverse search */
+ if (cu_find_lineinfo(&pf->cu_die, addr, &__fname, &__lineno) < 0)
+ return 0;
+
+ pr_debug2("Reversed line: %s:%d\n", __fname, __lineno);
+ if (strcmp(fname, __fname) || lineno == __lineno)
+ return 0;
+
+ pr_warning("This line is sharing the address with other lines.\n");
+
+ if (pf->pev->point.function) {
+ /* Find best match function name and lines */
+ pf->addr = addr;
+ if (find_best_scope(pf, &die_mem)
+ && die_match_name(&die_mem, pf->pev->point.function)
+ && dwarf_decl_line(&die_mem, &lineno) == 0) {
+ __func = dwarf_diename(&die_mem);
+ __lineno -= lineno;
+ }
+ }
+ pr_warning("Please try to probe at %s:%d instead.\n",
+ __func ? : __fname, __lineno);
+
+ return -ENOENT;
+}
+
+static int probe_point_line_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct probe_finder *pf = data;
+ Dwarf_Die *sc_die, die_mem;
+ int ret;
+
+ if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
+ return 0;
+
+ if (verify_representive_line(pf, fname, lineno, addr))
+ return -ENOENT;
+
+ pf->addr = addr;
+ sc_die = find_best_scope(pf, &die_mem);
+ if (!sc_die) {
+ pr_warning("Failed to find scope of probe point.\n");
+ return -ENOENT;
+ }
+
+ ret = call_probe_finder(sc_die, pf);
+
+ /* Continue if no error, because the line will be in inline function */
+ return ret < 0 ? ret : 0;
+}
+
+/* Find probe point from its line number */
+static int find_probe_point_by_line(struct probe_finder *pf)
+{
+ return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
+}
+
+/* Find lines which match lazy pattern */
+static int find_lazy_match_lines(struct intlist *list,
+ const char *fname, const char *pat)
+{
+ FILE *fp;
+ char *line = NULL;
+ size_t line_len;
+ ssize_t len;
+ int count = 0, linenum = 1;
+ char sbuf[STRERR_BUFSIZE];
+
+ fp = fopen(fname, "r");
+ if (!fp) {
+ pr_warning("Failed to open %s: %s\n", fname,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -errno;
+ }
+
+ while ((len = getline(&line, &line_len, fp)) > 0) {
+
+ if (line[len - 1] == '\n')
+ line[len - 1] = '\0';
+
+ if (strlazymatch(line, pat)) {
+ intlist__add(list, linenum);
+ count++;
+ }
+ linenum++;
+ }
+
+ if (ferror(fp))
+ count = -errno;
+ free(line);
+ fclose(fp);
+
+ if (count == 0)
+ pr_debug("No matched lines found in %s.\n", fname);
+ return count;
+}
+
+static int probe_point_lazy_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct probe_finder *pf = data;
+ Dwarf_Die *sc_die, die_mem;
+ int ret;
+
+ if (!intlist__has_entry(pf->lcache, lineno) ||
+ strtailcmp(fname, pf->fname) != 0)
+ return 0;
+
+ pr_debug("Probe line found: line:%d addr:0x%llx\n",
+ lineno, (unsigned long long)addr);
+ pf->addr = addr;
+ pf->lno = lineno;
+ sc_die = find_best_scope(pf, &die_mem);
+ if (!sc_die) {
+ pr_warning("Failed to find scope of probe point.\n");
+ return -ENOENT;
+ }
+
+ ret = call_probe_finder(sc_die, pf);
+
+ /*
+ * Continue if no error, because the lazy pattern will match
+ * to other lines
+ */
+ return ret < 0 ? ret : 0;
+}
+
+/* Find probe points from lazy pattern */
+static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ struct build_id bid;
+ char sbuild_id[SBUILD_ID_SIZE] = "";
+ int ret = 0;
+ char *fpath;
+
+ if (intlist__empty(pf->lcache)) {
+ const char *comp_dir;
+
+ comp_dir = cu_get_comp_dir(&pf->cu_die);
+ if (pf->dbg->build_id) {
+ build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE);
+ build_id__sprintf(&bid, sbuild_id);
+ }
+ ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath);
+ if (ret < 0) {
+ pr_warning("Failed to find source file path.\n");
+ return ret;
+ }
+
+ /* Matching lazy line pattern */
+ ret = find_lazy_match_lines(pf->lcache, fpath,
+ pf->pev->point.lazy_line);
+ free(fpath);
+ if (ret <= 0)
+ return ret;
+ }
+
+ return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
+}
+
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ struct perf_probe_point *pp = &pf->pev->point;
+
+ /* Not uprobe? */
+ if (!pf->pev->uprobes)
+ return;
+
+ /* Compiled with optimization? */
+ if (die_is_optimized_target(&pf->cu_die))
+ return;
+
+ /* Don't know entrypc? */
+ if (!pf->addr)
+ return;
+
+ /* Only FUNC and FUNC@SRC are eligible. */
+ if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+ pp->offset || pp->abs_address)
+ return;
+
+ /* Not interested in func parameter? */
+ if (!perf_probe_with_var(pf->pev))
+ return;
+
+ pr_info("Target program is compiled without optimization. Skipping prologue.\n"
+ "Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
+ pf->addr);
+
+ die_skip_prologue(sp_die, &pf->cu_die, &pf->addr);
+}
+
+static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
+{
+ struct probe_finder *pf = data;
+ struct perf_probe_point *pp = &pf->pev->point;
+ Dwarf_Addr addr;
+ int ret;
+
+ if (pp->lazy_line)
+ ret = find_probe_point_lazy(in_die, pf);
+ else {
+ /* Get probe address */
+ if (die_entrypc(in_die, &addr) != 0) {
+ pr_warning("Failed to get entry address of %s.\n",
+ dwarf_diename(in_die));
+ return -ENOENT;
+ }
+ if (addr == 0) {
+ pr_debug("%s has no valid entry address. skipped.\n",
+ dwarf_diename(in_die));
+ return -ENOENT;
+ }
+ pf->addr = addr;
+ pf->addr += pp->offset;
+ pr_debug("found inline addr: 0x%jx\n",
+ (uintmax_t)pf->addr);
+
+ ret = call_probe_finder(in_die, pf);
+ }
+
+ return ret;
+}
+
+/* Callback parameter with return value for libdw */
+struct dwarf_callback_param {
+ void *data;
+ int retval;
+};
+
+/* Search function from function name */
+static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct dwarf_callback_param *param = data;
+ struct probe_finder *pf = param->data;
+ struct perf_probe_point *pp = &pf->pev->point;
+ const char *fname;
+
+ /* Check tag and diename */
+ if (!die_is_func_def(sp_die) ||
+ !die_match_name(sp_die, pp->function))
+ return DWARF_CB_OK;
+
+ /* Check declared file */
+ fname = die_get_decl_file(sp_die);
+ if (!fname) {
+ pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n");
+ return DWARF_CB_OK;
+ }
+ if (pp->file && fname && strtailcmp(pp->file, fname))
+ return DWARF_CB_OK;
+
+ pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+ (unsigned long)dwarf_dieoffset(sp_die));
+ pf->fname = fname;
+ if (pp->line) { /* Function relative line */
+ dwarf_decl_line(sp_die, &pf->lno);
+ pf->lno += pp->line;
+ param->retval = find_probe_point_by_line(pf);
+ } else if (die_is_func_instance(sp_die)) {
+ /* Instances always have the entry address */
+ die_entrypc(sp_die, &pf->addr);
+ /* But in some case the entry address is 0 */
+ if (pf->addr == 0) {
+ pr_debug("%s has no entry PC. Skipped\n",
+ dwarf_diename(sp_die));
+ param->retval = 0;
+ /* Real function */
+ } else if (pp->lazy_line)
+ param->retval = find_probe_point_lazy(sp_die, pf);
+ else {
+ skip_prologue(sp_die, pf);
+ pf->addr += pp->offset;
+ /* TODO: Check the address in this function */
+ param->retval = call_probe_finder(sp_die, pf);
+ }
+ } else if (!probe_conf.no_inlines) {
+ /* Inlined function: search instances */
+ param->retval = die_walk_instances(sp_die,
+ probe_point_inline_cb, (void *)pf);
+ /* This could be a non-existed inline definition */
+ if (param->retval == -ENOENT)
+ param->retval = 0;
+ }
+
+ /* We need to find other candidates */
+ if (strisglob(pp->function) && param->retval >= 0) {
+ param->retval = 0; /* We have to clear the result */
+ return DWARF_CB_OK;
+ }
+
+ return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
+}
+
+static int find_probe_point_by_func(struct probe_finder *pf)
+{
+ struct dwarf_callback_param _param = {.data = (void *)pf,
+ .retval = 0};
+ dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, &_param, 0);
+ return _param.retval;
+}
+
+struct pubname_callback_param {
+ char *function;
+ char *file;
+ Dwarf_Die *cu_die;
+ Dwarf_Die *sp_die;
+ int found;
+};
+
+static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
+{
+ struct pubname_callback_param *param = data;
+ const char *fname;
+
+ if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) {
+ if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
+ return DWARF_CB_OK;
+
+ if (die_match_name(param->sp_die, param->function)) {
+ if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
+ return DWARF_CB_OK;
+
+ if (param->file) {
+ fname = die_get_decl_file(param->sp_die);
+ if (!fname || strtailcmp(param->file, fname))
+ return DWARF_CB_OK;
+ }
+
+ param->found = 1;
+ return DWARF_CB_ABORT;
+ }
+ }
+
+ return DWARF_CB_OK;
+}
+
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
+ struct probe_finder *pf)
+{
+ struct perf_probe_point *pp = &pf->pev->point;
+ Dwarf_Off off, noff;
+ size_t cuhl;
+ Dwarf_Die *diep;
+ int ret = 0;
+
+ off = 0;
+ pf->lcache = intlist__new(NULL);
+ if (!pf->lcache)
+ return -ENOMEM;
+
+ /* Fastpath: lookup by function name from .debug_pubnames section */
+ if (pp->function && !strisglob(pp->function)) {
+ struct pubname_callback_param pubname_param = {
+ .function = pp->function,
+ .file = pp->file,
+ .cu_die = &pf->cu_die,
+ .sp_die = &pf->sp_die,
+ .found = 0,
+ };
+ struct dwarf_callback_param probe_param = {
+ .data = pf,
+ };
+
+ dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+ &pubname_param, 0);
+ if (pubname_param.found) {
+ ret = probe_point_search_cb(&pf->sp_die, &probe_param);
+ if (ret)
+ goto found;
+ }
+ }
+
+ /* Loop on CUs (Compilation Unit) */
+ while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die);
+ if (!diep) {
+ off = noff;
+ continue;
+ }
+
+ /* Check if target file is included. */
+ if (pp->file)
+ pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
+ else
+ pf->fname = NULL;
+
+ if (!pp->file || pf->fname) {
+ if (pp->function)
+ ret = find_probe_point_by_func(pf);
+ else if (pp->lazy_line)
+ ret = find_probe_point_lazy(&pf->cu_die, pf);
+ else {
+ pf->lno = pp->line;
+ ret = find_probe_point_by_line(pf);
+ }
+ if (ret < 0)
+ break;
+ }
+ off = noff;
+ }
+
+found:
+ intlist__delete(pf->lcache);
+ pf->lcache = NULL;
+
+ return ret;
+}
+
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+ struct probe_finder *pf)
+{
+ int ret = 0;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+
+ if (pf->cfi_eh || pf->cfi_dbg)
+ return debuginfo__find_probe_location(dbg, pf);
+
+ /* Get the call frame information from this dwarf */
+ elf = dwarf_getelf(dbg->dbg);
+ if (elf == NULL)
+ return -EINVAL;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ return -EINVAL;
+
+ pf->machine = ehdr.e_machine;
+
+#if _ELFUTILS_PREREQ(0, 142)
+ do {
+ GElf_Shdr shdr;
+
+ if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+ shdr.sh_type == SHT_PROGBITS)
+ pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+ pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+ } while (0);
+#endif
+
+ ret = debuginfo__find_probe_location(dbg, pf);
+ return ret;
+}
+
+struct local_vars_finder {
+ struct probe_finder *pf;
+ struct perf_probe_arg *args;
+ bool vars;
+ int max_args;
+ int nargs;
+ int ret;
+};
+
+/* Collect available variables in this scope */
+static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+ struct local_vars_finder *vf = data;
+ struct probe_finder *pf = vf->pf;
+ int tag;
+
+ tag = dwarf_tag(die_mem);
+ if (tag == DW_TAG_formal_parameter ||
+ (tag == DW_TAG_variable && vf->vars)) {
+ if (convert_variable_location(die_mem, vf->pf->addr,
+ vf->pf->fb_ops, &pf->sp_die,
+ pf->machine, NULL) == 0) {
+ vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
+ if (vf->args[vf->nargs].var == NULL) {
+ vf->ret = -ENOMEM;
+ return DIE_FIND_CB_END;
+ }
+ pr_debug(" %s", vf->args[vf->nargs].var);
+ vf->nargs++;
+ }
+ }
+
+ if (dwarf_haspc(die_mem, vf->pf->addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
+}
+
+static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
+ struct perf_probe_arg *args)
+{
+ Dwarf_Die die_mem;
+ int i;
+ int n = 0;
+ struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
+ .max_args = MAX_PROBE_ARGS, .ret = 0};
+
+ for (i = 0; i < pf->pev->nargs; i++) {
+ /* var never be NULL */
+ if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
+ vf.vars = true;
+ else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
+ /* Copy normal argument */
+ args[n] = pf->pev->args[i];
+ n++;
+ continue;
+ }
+ pr_debug("Expanding %s into:", pf->pev->args[i].var);
+ vf.nargs = n;
+ /* Special local variables */
+ die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+ &die_mem);
+ pr_debug(" (%d)\n", vf.nargs - n);
+ if (vf.ret < 0)
+ return vf.ret;
+ n = vf.nargs;
+ }
+ return n;
+}
+
+static bool trace_event_finder_overlap(struct trace_event_finder *tf)
+{
+ int i;
+
+ for (i = 0; i < tf->ntevs; i++) {
+ if (tf->pf.addr == tf->tevs[i].point.address)
+ return true;
+ }
+ return false;
+}
+
+/* Add a found probe point into trace event list */
+static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ struct trace_event_finder *tf =
+ container_of(pf, struct trace_event_finder, pf);
+ struct perf_probe_point *pp = &pf->pev->point;
+ struct probe_trace_event *tev;
+ struct perf_probe_arg *args = NULL;
+ int ret, i;
+
+ /*
+ * For some reason (e.g. different column assigned to same address)
+ * This callback can be called with the address which already passed.
+ * Ignore it first.
+ */
+ if (trace_event_finder_overlap(tf))
+ return 0;
+
+ /* Check number of tevs */
+ if (tf->ntevs == tf->max_tevs) {
+ pr_warning("Too many( > %d) probe point found.\n",
+ tf->max_tevs);
+ return -ERANGE;
+ }
+ tev = &tf->tevs[tf->ntevs++];
+
+ /* Trace point should be converted from subprogram DIE */
+ ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr,
+ pp->retprobe, pp->function, &tev->point);
+ if (ret < 0)
+ goto end;
+
+ tev->point.realname = strdup(dwarf_diename(sc_die));
+ if (!tev->point.realname) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
+ tev->point.offset);
+
+ /* Expand special probe argument if exist */
+ args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS);
+ if (args == NULL) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ ret = expand_probe_args(sc_die, pf, args);
+ if (ret < 0)
+ goto end;
+
+ tev->nargs = ret;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (tev->args == NULL) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ /* Find each argument */
+ for (i = 0; i < tev->nargs; i++) {
+ pf->pvar = &args[i];
+ pf->tvar = &tev->args[i];
+ /* Variable should be found from scope DIE */
+ ret = find_variable(sc_die, pf);
+ if (ret != 0)
+ break;
+ }
+
+end:
+ if (ret) {
+ clear_probe_trace_event(tev);
+ tf->ntevs--;
+ }
+ free(args);
+ return ret;
+}
+
+static int fill_empty_trace_arg(struct perf_probe_event *pev,
+ struct probe_trace_event *tevs, int ntevs)
+{
+ char **valp;
+ char *type;
+ int i, j, ret;
+
+ if (!ntevs)
+ return -ENOENT;
+
+ for (i = 0; i < pev->nargs; i++) {
+ type = NULL;
+ for (j = 0; j < ntevs; j++) {
+ if (tevs[j].args[i].value) {
+ type = tevs[j].args[i].type;
+ break;
+ }
+ }
+ if (j == ntevs) {
+ print_var_not_found(pev->args[i].var);
+ return -ENOENT;
+ }
+ for (j = 0; j < ntevs; j++) {
+ valp = &tevs[j].args[i].value;
+ if (*valp)
+ continue;
+
+ ret = asprintf(valp, "\\%lx", probe_conf.magic_num);
+ if (ret < 0)
+ return -ENOMEM;
+ /* Note that type can be NULL */
+ if (type) {
+ tevs[j].args[i].type = strdup(type);
+ if (!tevs[j].args[i].type)
+ return -ENOMEM;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct trace_event_finder tf = {
+ .pf = {.pev = pev, .dbg = dbg, .callback = add_probe_trace_event},
+ .max_tevs = probe_conf.max_probes, .mod = dbg->mod};
+ int ret, i;
+
+ /* Allocate result tevs array */
+ *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
+ if (*tevs == NULL)
+ return -ENOMEM;
+
+ tf.tevs = *tevs;
+ tf.ntevs = 0;
+
+ if (pev->nargs != 0 && immediate_value_is_supported())
+ tf.pf.skip_empty_arg = true;
+
+ ret = debuginfo__find_probes(dbg, &tf.pf);
+ if (ret >= 0 && tf.pf.skip_empty_arg)
+ ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs);
+
+ if (ret < 0 || tf.ntevs == 0) {
+ for (i = 0; i < tf.ntevs; i++)
+ clear_probe_trace_event(&tf.tevs[i]);
+ zfree(tevs);
+ return ret;
+ }
+
+ return (ret < 0) ? ret : tf.ntevs;
+}
+
+/* Collect available variables in this scope */
+static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+ struct available_var_finder *af = data;
+ struct variable_list *vl;
+ struct strbuf buf = STRBUF_INIT;
+ int tag, ret;
+
+ vl = &af->vls[af->nvls - 1];
+
+ tag = dwarf_tag(die_mem);
+ if (tag == DW_TAG_formal_parameter ||
+ tag == DW_TAG_variable) {
+ ret = convert_variable_location(die_mem, af->pf.addr,
+ af->pf.fb_ops, &af->pf.sp_die,
+ af->pf.machine, NULL);
+ if (ret == 0 || ret == -ERANGE) {
+ int ret2;
+ bool externs = !af->child;
+
+ if (strbuf_init(&buf, 64) < 0)
+ goto error;
+
+ if (probe_conf.show_location_range) {
+ if (!externs)
+ ret2 = strbuf_add(&buf,
+ ret ? "[INV]\t" : "[VAL]\t", 6);
+ else
+ ret2 = strbuf_add(&buf, "[EXT]\t", 6);
+ if (ret2)
+ goto error;
+ }
+
+ ret2 = die_get_varname(die_mem, &buf);
+
+ if (!ret2 && probe_conf.show_location_range &&
+ !externs) {
+ if (strbuf_addch(&buf, '\t') < 0)
+ goto error;
+ ret2 = die_get_var_range(&af->pf.sp_die,
+ die_mem, &buf);
+ }
+
+ pr_debug("Add new var: %s\n", buf.buf);
+ if (ret2 == 0) {
+ strlist__add(vl->vars,
+ strbuf_detach(&buf, NULL));
+ }
+ strbuf_release(&buf);
+ }
+ }
+
+ if (af->child && dwarf_haspc(die_mem, af->pf.addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
+error:
+ strbuf_release(&buf);
+ pr_debug("Error in strbuf\n");
+ return DIE_FIND_CB_END;
+}
+
+static bool available_var_finder_overlap(struct available_var_finder *af)
+{
+ int i;
+
+ for (i = 0; i < af->nvls; i++) {
+ if (af->pf.addr == af->vls[i].point.address)
+ return true;
+ }
+ return false;
+
+}
+
+/* Add a found vars into available variables list */
+static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ struct available_var_finder *af =
+ container_of(pf, struct available_var_finder, pf);
+ struct perf_probe_point *pp = &pf->pev->point;
+ struct variable_list *vl;
+ Dwarf_Die die_mem;
+ int ret;
+
+ /*
+ * For some reason (e.g. different column assigned to same address),
+ * this callback can be called with the address which already passed.
+ * Ignore it first.
+ */
+ if (available_var_finder_overlap(af))
+ return 0;
+
+ /* Check number of tevs */
+ if (af->nvls == af->max_vls) {
+ pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
+ return -ERANGE;
+ }
+ vl = &af->vls[af->nvls++];
+
+ /* Trace point should be converted from subprogram DIE */
+ ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr,
+ pp->retprobe, pp->function, &vl->point);
+ if (ret < 0)
+ return ret;
+
+ pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
+ vl->point.offset);
+
+ /* Find local variables */
+ vl->vars = strlist__new(NULL, NULL);
+ if (vl->vars == NULL)
+ return -ENOMEM;
+ af->child = true;
+ die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
+
+ /* Find external variables */
+ if (!probe_conf.show_ext_vars)
+ goto out;
+ /* Don't need to search child DIE for external vars. */
+ af->child = false;
+ die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
+
+out:
+ if (strlist__empty(vl->vars)) {
+ strlist__delete(vl->vars);
+ vl->vars = NULL;
+ }
+
+ return ret;
+}
+
+/*
+ * Find available variables at given probe point
+ * Return the number of found probe points. Return 0 if there is no
+ * matched probe point. Return <0 if an error occurs.
+ */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct variable_list **vls)
+{
+ struct available_var_finder af = {
+ .pf = {.pev = pev, .dbg = dbg, .callback = add_available_vars},
+ .mod = dbg->mod,
+ .max_vls = probe_conf.max_probes};
+ int ret;
+
+ /* Allocate result vls array */
+ *vls = zalloc(sizeof(struct variable_list) * af.max_vls);
+ if (*vls == NULL)
+ return -ENOMEM;
+
+ af.vls = *vls;
+ af.nvls = 0;
+
+ ret = debuginfo__find_probes(dbg, &af.pf);
+ if (ret < 0) {
+ /* Free vlist for error */
+ while (af.nvls--) {
+ zfree(&af.vls[af.nvls].point.symbol);
+ strlist__delete(af.vls[af.nvls].vars);
+ }
+ zfree(vls);
+ return ret;
+ }
+
+ return (ret < 0) ? ret : af.nvls;
+}
+
+/* For the kernel module, we need a special code to get a DIE */
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+ bool adjust_offset)
+{
+ int n, i;
+ Elf32_Word shndx;
+ Elf_Scn *scn;
+ Elf *elf;
+ GElf_Shdr mem, *shdr;
+ const char *p;
+
+ elf = dwfl_module_getelf(dbg->mod, &dbg->bias);
+ if (!elf)
+ return -EINVAL;
+
+ /* Get the number of relocations */
+ n = dwfl_module_relocations(dbg->mod);
+ if (n < 0)
+ return -ENOENT;
+ /* Search the relocation related .text section */
+ for (i = 0; i < n; i++) {
+ p = dwfl_module_relocation_info(dbg->mod, i, &shndx);
+ if (strcmp(p, ".text") == 0) {
+ /* OK, get the section header */
+ scn = elf_getscn(elf, shndx);
+ if (!scn)
+ return -ENOENT;
+ shdr = gelf_getshdr(scn, &mem);
+ if (!shdr)
+ return -ENOENT;
+ *offs = shdr->sh_addr;
+ if (adjust_offset)
+ *offs -= shdr->sh_offset;
+ }
+ }
+ return 0;
+}
+
+/* Reverse search */
+int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
+ struct perf_probe_point *ppt)
+{
+ Dwarf_Die cudie, spdie, indie;
+ Dwarf_Addr _addr = 0, baseaddr = 0;
+ const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
+ int baseline = 0, lineno = 0, ret = 0;
+
+ /* We always need to relocate the address for aranges */
+ if (debuginfo__get_text_offset(dbg, &baseaddr, false) == 0)
+ addr += baseaddr;
+ /* Find cu die */
+ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) {
+ pr_warning("Failed to find debug information for address %#" PRIx64 "\n",
+ addr);
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* Find a corresponding line (filename and lineno) */
+ cu_find_lineinfo(&cudie, (Dwarf_Addr)addr, &fname, &lineno);
+ /* Don't care whether it failed or not */
+
+ /* Find a corresponding function (name, baseline and baseaddr) */
+ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
+ /* Get function entry information */
+ func = basefunc = dwarf_diename(&spdie);
+ if (!func ||
+ die_entrypc(&spdie, &baseaddr) != 0 ||
+ dwarf_decl_line(&spdie, &baseline) != 0) {
+ lineno = 0;
+ goto post;
+ }
+
+ fname = die_get_decl_file(&spdie);
+ if (addr == baseaddr) {
+ /* Function entry - Relative line number is 0 */
+ lineno = baseline;
+ goto post;
+ }
+
+ /* Track down the inline functions step by step */
+ while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr,
+ &indie)) {
+ /* There is an inline function */
+ if (die_entrypc(&indie, &_addr) == 0 &&
+ _addr == addr) {
+ /*
+ * addr is at an inline function entry.
+ * In this case, lineno should be the call-site
+ * line number. (overwrite lineinfo)
+ */
+ lineno = die_get_call_lineno(&indie);
+ fname = die_get_call_file(&indie);
+ break;
+ } else {
+ /*
+ * addr is in an inline function body.
+ * Since lineno points one of the lines
+ * of the inline function, baseline should
+ * be the entry line of the inline function.
+ */
+ tmp = dwarf_diename(&indie);
+ if (!tmp ||
+ dwarf_decl_line(&indie, &baseline) != 0)
+ break;
+ func = tmp;
+ spdie = indie;
+ }
+ }
+ /* Verify the lineno and baseline are in a same file */
+ tmp = die_get_decl_file(&spdie);
+ if (!tmp || (fname && strcmp(tmp, fname) != 0))
+ lineno = 0;
+ }
+
+post:
+ /* Make a relative line number or an offset */
+ if (lineno)
+ ppt->line = lineno - baseline;
+ else if (basefunc) {
+ ppt->offset = addr - baseaddr;
+ func = basefunc;
+ }
+
+ /* Duplicate strings */
+ if (func) {
+ ppt->function = strdup(func);
+ if (ppt->function == NULL) {
+ ret = -ENOMEM;
+ goto end;
+ }
+ }
+ if (fname) {
+ ppt->file = strdup(fname);
+ if (ppt->file == NULL) {
+ zfree(&ppt->function);
+ ret = -ENOMEM;
+ goto end;
+ }
+ }
+end:
+ if (ret == 0 && (fname || func))
+ ret = 1; /* Found a point */
+ return ret;
+}
+
+/* Add a line and store the src path */
+static int line_range_add_line(const char *src, unsigned int lineno,
+ struct line_range *lr)
+{
+ /* Copy source path */
+ if (!lr->path) {
+ lr->path = strdup(src);
+ if (lr->path == NULL)
+ return -ENOMEM;
+ }
+ return intlist__add(lr->line_list, lineno);
+}
+
+static int line_range_walk_cb(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct line_finder *lf = data;
+ const char *__fname;
+ int __lineno;
+ int err;
+
+ if ((strtailcmp(fname, lf->fname) != 0) ||
+ (lf->lno_s > lineno || lf->lno_e < lineno))
+ return 0;
+
+ /* Make sure this line can be reversible */
+ if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0
+ && (lineno != __lineno || strcmp(fname, __fname)))
+ return 0;
+
+ err = line_range_add_line(fname, lineno, lf->lr);
+ if (err < 0 && err != -EEXIST)
+ return err;
+
+ return 0;
+}
+
+/* Find line range from its line number */
+static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
+{
+ int ret;
+
+ ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
+
+ /* Update status */
+ if (ret >= 0)
+ if (!intlist__empty(lf->lr->line_list))
+ ret = lf->found = 1;
+ else
+ ret = 0; /* Lines are not found */
+ else {
+ zfree(&lf->lr->path);
+ }
+ return ret;
+}
+
+static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
+{
+ int ret = find_line_range_by_line(in_die, data);
+
+ /*
+ * We have to check all instances of inlined function, because
+ * some execution paths can be optimized out depends on the
+ * function argument of instances. However, if an error occurs,
+ * it should be handled by the caller.
+ */
+ return ret < 0 ? ret : 0;
+}
+
+/* Search function definition from function name */
+static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct dwarf_callback_param *param = data;
+ struct line_finder *lf = param->data;
+ struct line_range *lr = lf->lr;
+ const char *fname;
+
+ /* Check declared file */
+ if (lr->file) {
+ fname = die_get_decl_file(sp_die);
+ if (!fname || strtailcmp(lr->file, fname))
+ return DWARF_CB_OK;
+ }
+
+ if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) {
+ lf->fname = die_get_decl_file(sp_die);
+ dwarf_decl_line(sp_die, &lr->offset);
+ pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
+ lf->lno_s = lr->offset + lr->start;
+ if (lf->lno_s < 0) /* Overflow */
+ lf->lno_s = INT_MAX;
+ lf->lno_e = lr->offset + lr->end;
+ if (lf->lno_e < 0) /* Overflow */
+ lf->lno_e = INT_MAX;
+ pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
+ lr->start = lf->lno_s;
+ lr->end = lf->lno_e;
+ if (!die_is_func_instance(sp_die))
+ param->retval = die_walk_instances(sp_die,
+ line_range_inline_cb, lf);
+ else
+ param->retval = find_line_range_by_line(sp_die, lf);
+ return DWARF_CB_ABORT;
+ }
+ return DWARF_CB_OK;
+}
+
+static int find_line_range_by_func(struct line_finder *lf)
+{
+ struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
+ dwarf_getfuncs(&lf->cu_die, line_range_search_cb, &param, 0);
+ return param.retval;
+}
+
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr)
+{
+ struct line_finder lf = {.lr = lr, .found = 0};
+ int ret = 0;
+ Dwarf_Off off = 0, noff;
+ size_t cuhl;
+ Dwarf_Die *diep;
+ const char *comp_dir;
+
+ /* Fastpath: lookup by function name from .debug_pubnames section */
+ if (lr->function) {
+ struct pubname_callback_param pubname_param = {
+ .function = lr->function, .file = lr->file,
+ .cu_die = &lf.cu_die, .sp_die = &lf.sp_die, .found = 0};
+ struct dwarf_callback_param line_range_param = {
+ .data = (void *)&lf, .retval = 0};
+
+ dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+ &pubname_param, 0);
+ if (pubname_param.found) {
+ line_range_search_cb(&lf.sp_die, &line_range_param);
+ if (lf.found)
+ goto found;
+ }
+ }
+
+ /* Loop on CUs (Compilation Unit) */
+ while (!lf.found && ret >= 0) {
+ if (dwarf_nextcu(dbg->dbg, off, &noff, &cuhl,
+ NULL, NULL, NULL) != 0)
+ break;
+
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die);
+ if (!diep) {
+ off = noff;
+ continue;
+ }
+
+ /* Check if target file is included. */
+ if (lr->file)
+ lf.fname = cu_find_realpath(&lf.cu_die, lr->file);
+ else
+ lf.fname = 0;
+
+ if (!lr->file || lf.fname) {
+ if (lr->function)
+ ret = find_line_range_by_func(&lf);
+ else {
+ lf.lno_s = lr->start;
+ lf.lno_e = lr->end;
+ ret = find_line_range_by_line(NULL, &lf);
+ }
+ }
+ off = noff;
+ }
+
+found:
+ /* Store comp_dir */
+ if (lf.found) {
+ comp_dir = cu_get_comp_dir(&lf.cu_die);
+ if (comp_dir) {
+ lr->comp_dir = strdup(comp_dir);
+ if (!lr->comp_dir)
+ ret = -ENOMEM;
+ }
+ }
+
+ pr_debug("path: %s\n", lr->path);
+ return (ret < 0) ? ret : lf.found;
+}
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+/* debuginfod doesn't require the comp_dir but buildid is required */
+static int get_source_from_debuginfod(const char *raw_path,
+ const char *sbuild_id, char **new_path)
+{
+ debuginfod_client *c = debuginfod_begin();
+ const char *p = raw_path;
+ int fd;
+
+ if (!c)
+ return -ENOMEM;
+
+ fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id,
+ 0, p, new_path);
+ pr_debug("Search %s from debuginfod -> %d\n", p, fd);
+ if (fd >= 0)
+ close(fd);
+ debuginfod_end(c);
+ if (fd < 0) {
+ pr_debug("Failed to find %s in debuginfod (%s)\n",
+ raw_path, sbuild_id);
+ return -ENOENT;
+ }
+ pr_debug("Got a source %s\n", *new_path);
+
+ return 0;
+}
+#else
+static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused,
+ const char *sbuild_id __maybe_unused,
+ char **new_path __maybe_unused)
+{
+ return -ENOTSUP;
+}
+#endif
+/*
+ * Find a src file from a DWARF tag path. Prepend optional source path prefix
+ * and chop off leading directories that do not exist. Result is passed back as
+ * a newly allocated path on success.
+ * Return 0 if file was found and readable, -errno otherwise.
+ */
+int find_source_path(const char *raw_path, const char *sbuild_id,
+ const char *comp_dir, char **new_path)
+{
+ const char *prefix = symbol_conf.source_prefix;
+
+ if (sbuild_id && !prefix) {
+ if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path))
+ return 0;
+ }
+
+ if (!prefix) {
+ if (raw_path[0] != '/' && comp_dir)
+ /* If not an absolute path, try to use comp_dir */
+ prefix = comp_dir;
+ else {
+ if (access(raw_path, R_OK) == 0) {
+ *new_path = strdup(raw_path);
+ return *new_path ? 0 : -ENOMEM;
+ } else
+ return -errno;
+ }
+ }
+
+ *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
+ if (!*new_path)
+ return -ENOMEM;
+
+ for (;;) {
+ sprintf(*new_path, "%s/%s", prefix, raw_path);
+
+ if (access(*new_path, R_OK) == 0)
+ return 0;
+
+ if (!symbol_conf.source_prefix) {
+ /* In case of searching comp_dir, don't retry */
+ zfree(new_path);
+ return -errno;
+ }
+
+ switch (errno) {
+ case ENAMETOOLONG:
+ case ENOENT:
+ case EROFS:
+ case EFAULT:
+ raw_path = strchr(++raw_path, '/');
+ if (!raw_path) {
+ zfree(new_path);
+ return -ENOENT;
+ }
+ continue;
+
+ default:
+ zfree(new_path);
+ return -errno;
+ }
+ }
+}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
new file mode 100644
index 000000000..8bc1c80d3
--- /dev/null
+++ b/tools/perf/util/probe-finder.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_FINDER_H
+#define _PROBE_FINDER_H
+
+#include <stdbool.h>
+#include "intlist.h"
+#include "build-id.h"
+#include "probe-event.h"
+#include <linux/ctype.h>
+
+#define MAX_PROBE_BUFFER 1024
+#define MAX_PROBES 128
+#define MAX_PROBE_ARGS 128
+
+#define PROBE_ARG_VARS "$vars"
+#define PROBE_ARG_PARAMS "$params"
+
+static inline int is_c_varname(const char *name)
+{
+ /* TODO */
+ return isalpha(name[0]) || name[0] == '_';
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+#include "dwarf-aux.h"
+
+/* TODO: export debuginfo data structure even if no dwarf support */
+
+/* debug information structure */
+struct debuginfo {
+ Dwarf *dbg;
+ Dwfl_Module *mod;
+ Dwfl *dwfl;
+ Dwarf_Addr bias;
+ const unsigned char *build_id;
+};
+
+/* This also tries to open distro debuginfo */
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct probe_trace_event **tevs);
+
+/* Find a perf_probe_point from debuginfo */
+int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
+ struct perf_probe_point *ppt);
+
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+ bool adjust_offset);
+
+/* Find a line range */
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
+
+/* Find available variables */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct variable_list **vls);
+
+/* Find a src file from a DWARF tag path */
+int find_source_path(const char *raw_path, const char *sbuild_id,
+ const char *comp_dir, char **new_path);
+
+struct probe_finder {
+ struct perf_probe_event *pev; /* Target probe event */
+ struct debuginfo *dbg;
+
+ /* Callback when a probe point is found */
+ int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
+
+ /* For function searching */
+ int lno; /* Line number */
+ Dwarf_Addr addr; /* Address */
+ const char *fname; /* Real file name */
+ Dwarf_Die cu_die; /* Current CU */
+ Dwarf_Die sp_die;
+ struct intlist *lcache; /* Line cache for lazy match */
+
+ /* For variable searching */
+#if _ELFUTILS_PREREQ(0, 142)
+ /* Call Frame Information from .eh_frame */
+ Dwarf_CFI *cfi_eh;
+ /* Call Frame Information from .debug_frame */
+ Dwarf_CFI *cfi_dbg;
+#endif
+ Dwarf_Op *fb_ops; /* Frame base attribute */
+ unsigned int machine; /* Target machine arch */
+ struct perf_probe_arg *pvar; /* Current target variable */
+ struct probe_trace_arg *tvar; /* Current result variable */
+ bool skip_empty_arg; /* Skip non-exist args */
+};
+
+struct trace_event_finder {
+ struct probe_finder pf;
+ Dwfl_Module *mod; /* For solving symbols */
+ struct probe_trace_event *tevs; /* Found trace events */
+ int ntevs; /* Number of trace events */
+ int max_tevs; /* Max number of trace events */
+};
+
+struct available_var_finder {
+ struct probe_finder pf;
+ Dwfl_Module *mod; /* For solving symbols */
+ struct variable_list *vls; /* Found variable lists */
+ int nvls; /* Number of variable lists */
+ int max_vls; /* Max no. of variable lists */
+ bool child; /* Search child scopes */
+};
+
+struct line_finder {
+ struct line_range *lr; /* Target line range */
+
+ const char *fname; /* File name */
+ int lno_s; /* Start line number */
+ int lno_e; /* End line number */
+ Dwarf_Die cu_die; /* Current CU */
+ Dwarf_Die sp_die;
+ int found;
+};
+
+#endif /* HAVE_DWARF_SUPPORT */
+
+#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
new file mode 100644
index 000000000..a1d1e4ef6
--- /dev/null
+++ b/tools/perf/util/pstack.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple pointer stack
+ *
+ * (c) 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "pstack.h"
+#include "debug.h"
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct pstack {
+ unsigned short top;
+ unsigned short max_nr_entries;
+ void *entries[];
+};
+
+struct pstack *pstack__new(unsigned short max_nr_entries)
+{
+ struct pstack *pstack = zalloc((sizeof(*pstack) +
+ max_nr_entries * sizeof(void *)));
+ if (pstack != NULL)
+ pstack->max_nr_entries = max_nr_entries;
+ return pstack;
+}
+
+void pstack__delete(struct pstack *pstack)
+{
+ free(pstack);
+}
+
+bool pstack__empty(const struct pstack *pstack)
+{
+ return pstack->top == 0;
+}
+
+void pstack__remove(struct pstack *pstack, void *key)
+{
+ unsigned short i = pstack->top, last_index = pstack->top - 1;
+
+ while (i-- != 0) {
+ if (pstack->entries[i] == key) {
+ if (i < last_index)
+ memmove(pstack->entries + i,
+ pstack->entries + i + 1,
+ (last_index - i) * sizeof(void *));
+ --pstack->top;
+ return;
+ }
+ }
+ pr_err("%s: %p not on the pstack!\n", __func__, key);
+}
+
+void pstack__push(struct pstack *pstack, void *key)
+{
+ if (pstack->top == pstack->max_nr_entries) {
+ pr_err("%s: top=%d, overflow!\n", __func__, pstack->top);
+ return;
+ }
+ pstack->entries[pstack->top++] = key;
+}
+
+void *pstack__pop(struct pstack *pstack)
+{
+ void *ret;
+
+ if (pstack->top == 0) {
+ pr_err("%s: underflow!\n", __func__);
+ return NULL;
+ }
+
+ ret = pstack->entries[--pstack->top];
+ pstack->entries[pstack->top] = NULL;
+ return ret;
+}
+
+void *pstack__peek(struct pstack *pstack)
+{
+ if (pstack->top == 0)
+ return NULL;
+ return pstack->entries[pstack->top - 1];
+}
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
new file mode 100644
index 000000000..8729b8be0
--- /dev/null
+++ b/tools/perf/util/pstack.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PSTACK_
+#define _PERF_PSTACK_
+
+#include <stdbool.h>
+
+struct pstack;
+struct pstack *pstack__new(unsigned short max_nr_entries);
+void pstack__delete(struct pstack *pstack);
+bool pstack__empty(const struct pstack *pstack);
+void pstack__remove(struct pstack *pstack, void *key);
+void pstack__push(struct pstack *pstack, void *key);
+void *pstack__pop(struct pstack *pstack);
+void *pstack__peek(struct pstack *pstack);
+
+#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644
index 000000000..26e1c8d97
--- /dev/null
+++ b/tools/perf/util/python-ext-sources
@@ -0,0 +1,51 @@
+#
+# List of files needed by perf python extension
+#
+# Each source file must be placed on its own line so that it can be
+# processed by Makefile and util/setup.py accordingly.
+#
+
+util/python.c
+../lib/ctype.c
+util/cap.c
+util/evlist.c
+util/evsel.c
+util/evsel_fprintf.c
+util/perf_event_attr_fprintf.c
+util/cpumap.c
+util/memswap.c
+util/mmap.c
+util/namespaces.c
+../lib/bitmap.c
+../lib/find_bit.c
+../lib/list_sort.c
+../lib/hweight.c
+../lib/string.c
+../lib/vsprintf.c
+util/thread_map.c
+util/util.c
+util/cgroup.c
+util/parse-branch-options.c
+util/rblist.c
+util/counts.c
+util/print_binary.c
+util/strlist.c
+util/trace-event.c
+../lib/rbtree.c
+util/string.c
+util/symbol_fprintf.c
+util/units.c
+util/affinity.c
+util/rwsem.c
+util/hashmap.c
+util/perf_regs.c
+util/fncache.c
+util/perf-regs-arch/perf_regs_aarch64.c
+util/perf-regs-arch/perf_regs_arm.c
+util/perf-regs-arch/perf_regs_csky.c
+util/perf-regs-arch/perf_regs_loongarch.c
+util/perf-regs-arch/perf_regs_mips.c
+util/perf-regs-arch/perf_regs_powerpc.c
+util/perf-regs-arch/perf_regs_riscv.c
+util/perf-regs-arch/perf_regs_s390.c
+util/perf-regs-arch/perf_regs_x86.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
new file mode 100644
index 000000000..c29f5f0bb
--- /dev/null
+++ b/tools/perf/util/python.c
@@ -0,0 +1,1513 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <Python.h>
+#include <structmember.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <linux/err.h>
+#include <perf/cpumap.h>
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+#include <perf/mmap.h>
+#include "evlist.h"
+#include "callchain.h"
+#include "evsel.h"
+#include "event.h"
+#include "print_binary.h"
+#include "thread_map.h"
+#include "trace-event.h"
+#include "mmap.h"
+#include "stat.h"
+#include "metricgroup.h"
+#include "util/bpf-filter.h"
+#include "util/env.h"
+#include "util/pmu.h"
+#include "util/pmus.h"
+#include <internal/lib.h>
+#include "util.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+ PyString_FromString(arg)
+#define _PyUnicode_AsString(arg) \
+ PyString_AsString(arg)
+#define _PyUnicode_FromFormat(...) \
+ PyString_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+ PyInt_FromLong(arg)
+
+#else
+
+#define _PyUnicode_FromString(arg) \
+ PyUnicode_FromString(arg)
+#define _PyUnicode_FromFormat(...) \
+ PyUnicode_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+ PyLong_FromLong(arg)
+#endif
+
+#ifndef Py_TYPE
+#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+#endif
+
+/*
+ * Avoid bringing in event parsing.
+ */
+int parse_event(struct evlist *evlist __maybe_unused, const char *str __maybe_unused)
+{
+ return 0;
+}
+
+/*
+ * Provide these two so that we don't have to link against callchain.c and
+ * start dragging hist.c, etc.
+ */
+struct callchain_param callchain_param;
+
+int parse_callchain_record(const char *arg __maybe_unused,
+ struct callchain_param *param __maybe_unused)
+{
+ return 0;
+}
+
+/*
+ * Add these not to drag util/env.c
+ */
+struct perf_env perf_env;
+
+const char *perf_env__cpuid(struct perf_env *env __maybe_unused)
+{
+ return NULL;
+}
+
+// This one is a bit easier, wouldn't drag too much, but leave it as a stub we need it here
+const char *perf_env__arch(struct perf_env *env __maybe_unused)
+{
+ return NULL;
+}
+
+/*
+ * These ones are needed not to drag the PMU bandwagon, jevents generated
+ * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for
+ * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so
+ * far, for the perf python binding known usecases, revisit if this become
+ * necessary.
+ */
+struct perf_pmu *evsel__find_pmu(const struct evsel *evsel __maybe_unused)
+{
+ return NULL;
+}
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...)
+{
+ return EOF;
+}
+
+int perf_pmus__num_core_pmus(void)
+{
+ return 1;
+}
+
+bool evsel__is_aux_event(const struct evsel *evsel __maybe_unused)
+{
+ return false;
+}
+
+bool perf_pmus__supports_extended_type(void)
+{
+ return false;
+}
+
+/*
+ * Add this one here not to drag util/metricgroup.c
+ */
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events)
+{
+ return 0;
+}
+
+/*
+ * Add this one here not to drag util/trace-event-info.c
+ */
+char *tracepoint_id_to_name(u64 config)
+{
+ return NULL;
+}
+
+/*
+ * XXX: All these evsel destructors need some better mechanism, like a linked
+ * list of destructors registered when the relevant code indeed is used instead
+ * of having more and more calls in perf_evsel__delete(). -- acme
+ *
+ * For now, add some more:
+ *
+ * Not to drag the BPF bandwagon...
+ */
+void bpf_counter__destroy(struct evsel *evsel);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
+int bpf_counter__disable(struct evsel *evsel);
+
+void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
+{
+}
+
+int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused)
+{
+ return 0;
+}
+
+int bpf_counter__disable(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+// not to drag util/bpf-filter.c
+#ifdef HAVE_BPF_SKEL
+int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+int perf_bpf_filter__destroy(struct evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+/*
+ * Support debug printing even though util/debug.c is not linked. That means
+ * implementing 'verbose' and 'eprintf'.
+ */
+int verbose;
+int debug_peo_args;
+
+int eprintf(int level, int var, const char *fmt, ...);
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (var >= level) {
+ va_start(args, fmt);
+ ret = vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+
+ return ret;
+}
+
+/* Define PyVarObject_HEAD_INIT for python 2.5 */
+#ifndef PyVarObject_HEAD_INIT
+# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void);
+#else
+PyMODINIT_FUNC PyInit_perf(void);
+#endif
+
+#define member_def(type, member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
+ 0, help }
+
+#define sample_member_def(name, member, ptype, help) \
+ { #name, ptype, \
+ offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
+ 0, help }
+
+struct pyrf_event {
+ PyObject_HEAD
+ struct evsel *evsel;
+ struct perf_sample sample;
+ union perf_event event;
+};
+
+#define sample_members \
+ sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
+ sample_member_def(sample_pid, pid, T_INT, "event pid"), \
+ sample_member_def(sample_tid, tid, T_INT, "event tid"), \
+ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
+ sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"), \
+ sample_member_def(sample_id, id, T_ULONGLONG, "event id"), \
+ sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
+ sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \
+ sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
+
+static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+
+static PyMemberDef pyrf_mmap_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(perf_event_header, misc, T_UINT, "event misc"),
+ member_def(perf_record_mmap, pid, T_UINT, "event pid"),
+ member_def(perf_record_mmap, tid, T_UINT, "event tid"),
+ member_def(perf_record_mmap, start, T_ULONGLONG, "start of the map"),
+ member_def(perf_record_mmap, len, T_ULONGLONG, "map length"),
+ member_def(perf_record_mmap, pgoff, T_ULONGLONG, "page offset"),
+ member_def(perf_record_mmap, filename, T_STRING_INPLACE, "backing store"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRI_lx64 ", "
+ "length: %#" PRI_lx64 ", offset: %#" PRI_lx64 ", "
+ "filename: %s }",
+ pevent->event.mmap.pid, pevent->event.mmap.tid,
+ pevent->event.mmap.start, pevent->event.mmap.len,
+ pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_mmap_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.mmap_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_mmap_event__doc,
+ .tp_members = pyrf_mmap_event__members,
+ .tp_repr = (reprfunc)pyrf_mmap_event__repr,
+};
+
+static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+
+static PyMemberDef pyrf_task_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(perf_record_fork, pid, T_UINT, "event pid"),
+ member_def(perf_record_fork, ppid, T_UINT, "event ppid"),
+ member_def(perf_record_fork, tid, T_UINT, "event tid"),
+ member_def(perf_record_fork, ptid, T_UINT, "event ptid"),
+ member_def(perf_record_fork, time, T_ULONGLONG, "timestamp"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+{
+ return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+ "ptid: %u, time: %" PRI_lu64 "}",
+ pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
+ pevent->event.fork.pid,
+ pevent->event.fork.ppid,
+ pevent->event.fork.tid,
+ pevent->event.fork.ptid,
+ pevent->event.fork.time);
+}
+
+static PyTypeObject pyrf_task_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.task_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_task_event__doc,
+ .tp_members = pyrf_task_event__members,
+ .tp_repr = (reprfunc)pyrf_task_event__repr,
+};
+
+static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+
+static PyMemberDef pyrf_comm_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(perf_record_comm, pid, T_UINT, "event pid"),
+ member_def(perf_record_comm, tid, T_UINT, "event tid"),
+ member_def(perf_record_comm, comm, T_STRING_INPLACE, "process name"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+{
+ return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+ pevent->event.comm.pid,
+ pevent->event.comm.tid,
+ pevent->event.comm.comm);
+}
+
+static PyTypeObject pyrf_comm_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.comm_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_comm_event__doc,
+ .tp_members = pyrf_comm_event__members,
+ .tp_repr = (reprfunc)pyrf_comm_event__repr,
+};
+
+static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+
+static PyMemberDef pyrf_throttle_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(perf_record_throttle, time, T_ULONGLONG, "timestamp"),
+ member_def(perf_record_throttle, id, T_ULONGLONG, "event id"),
+ member_def(perf_record_throttle, stream_id, T_ULONGLONG, "event stream id"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+{
+ struct perf_record_throttle *te = (struct perf_record_throttle *)(&pevent->event.header + 1);
+
+ return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64
+ ", stream_id: %" PRI_lu64 " }",
+ pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
+ te->time, te->id, te->stream_id);
+}
+
+static PyTypeObject pyrf_throttle_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.throttle_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_throttle_event__doc,
+ .tp_members = pyrf_throttle_event__members,
+ .tp_repr = (reprfunc)pyrf_throttle_event__repr,
+};
+
+static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+
+static PyMemberDef pyrf_lost_event__members[] = {
+ sample_members
+ member_def(perf_record_lost, id, T_ULONGLONG, "event id"),
+ member_def(perf_record_lost, lost, T_ULONGLONG, "number of lost events"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: lost, id: %#" PRI_lx64 ", "
+ "lost: %#" PRI_lx64 " }",
+ pevent->event.lost.id, pevent->event.lost.lost) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_lost_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.lost_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_lost_event__doc,
+ .tp_members = pyrf_lost_event__members,
+ .tp_repr = (reprfunc)pyrf_lost_event__repr,
+};
+
+static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+
+static PyMemberDef pyrf_read_event__members[] = {
+ sample_members
+ member_def(perf_record_read, pid, T_UINT, "event pid"),
+ member_def(perf_record_read, tid, T_UINT, "event tid"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+{
+ return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
+ pevent->event.read.pid,
+ pevent->event.read.tid);
+ /*
+ * FIXME: return the array of read values,
+ * making this method useful ;-)
+ */
+}
+
+static PyTypeObject pyrf_read_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.read_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_read_event__doc,
+ .tp_members = pyrf_read_event__members,
+ .tp_repr = (reprfunc)pyrf_read_event__repr,
+};
+
+static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+
+static PyMemberDef pyrf_sample_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: sample }") < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static bool is_tracepoint(struct pyrf_event *pevent)
+{
+ return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT;
+}
+
+static PyObject*
+tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
+{
+ struct tep_handle *pevent = field->event->tep;
+ void *data = pe->sample.raw_data;
+ PyObject *ret = NULL;
+ unsigned long long val;
+ unsigned int offset, len;
+
+ if (field->flags & TEP_FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+ val = tep_read_number(pevent, data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ if (tep_field_is_relative(field->flags))
+ offset += field->offset + field->size;
+ }
+ if (field->flags & TEP_FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ ret = _PyUnicode_FromString((char *)data + offset);
+ } else {
+ ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+ field->flags &= ~TEP_FIELD_IS_STRING;
+ }
+ } else {
+ val = tep_read_number(pevent, data + field->offset,
+ field->size);
+ if (field->flags & TEP_FIELD_IS_POINTER)
+ ret = PyLong_FromUnsignedLong((unsigned long) val);
+ else if (field->flags & TEP_FIELD_IS_SIGNED)
+ ret = PyLong_FromLong((long) val);
+ else
+ ret = PyLong_FromUnsignedLong((unsigned long) val);
+ }
+
+ return ret;
+}
+
+static PyObject*
+get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
+{
+ const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
+ struct evsel *evsel = pevent->evsel;
+ struct tep_format_field *field;
+
+ if (!evsel->tp_format) {
+ struct tep_event *tp_format;
+
+ tp_format = trace_event__tp_format_id(evsel->core.attr.config);
+ if (IS_ERR_OR_NULL(tp_format))
+ return NULL;
+
+ evsel->tp_format = tp_format;
+ }
+
+ field = tep_find_any_field(evsel->tp_format, str);
+ if (!field)
+ return NULL;
+
+ return tracepoint_field(pevent, field);
+}
+#endif /* HAVE_LIBTRACEEVENT */
+
+static PyObject*
+pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name)
+{
+ PyObject *obj = NULL;
+
+#ifdef HAVE_LIBTRACEEVENT
+ if (is_tracepoint(pevent))
+ obj = get_tracepoint_field(pevent, attr_name);
+#endif
+
+ return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name);
+}
+
+static PyTypeObject pyrf_sample_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.sample_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_sample_event__doc,
+ .tp_members = pyrf_sample_event__members,
+ .tp_repr = (reprfunc)pyrf_sample_event__repr,
+ .tp_getattro = (getattrofunc) pyrf_sample_event__getattro,
+};
+
+static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
+
+static PyMemberDef pyrf_context_switch_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(perf_record_switch, next_prev_pid, T_UINT, "next/prev pid"),
+ member_def(perf_record_switch, next_prev_tid, T_UINT, "next/prev tid"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: context_switch, next_prev_pid: %u, next_prev_tid: %u, switch_out: %u }",
+ pevent->event.context_switch.next_prev_pid,
+ pevent->event.context_switch.next_prev_tid,
+ !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_context_switch_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.context_switch_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_context_switch_event__doc,
+ .tp_members = pyrf_context_switch_event__members,
+ .tp_repr = (reprfunc)pyrf_context_switch_event__repr,
+};
+
+static int pyrf_event__setup_types(void)
+{
+ int err;
+ pyrf_mmap_event__type.tp_new =
+ pyrf_task_event__type.tp_new =
+ pyrf_comm_event__type.tp_new =
+ pyrf_lost_event__type.tp_new =
+ pyrf_read_event__type.tp_new =
+ pyrf_sample_event__type.tp_new =
+ pyrf_context_switch_event__type.tp_new =
+ pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+ err = PyType_Ready(&pyrf_mmap_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_lost_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_task_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_comm_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_throttle_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_read_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_sample_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_context_switch_event__type);
+ if (err < 0)
+ goto out;
+out:
+ return err;
+}
+
+static PyTypeObject *pyrf_event__type[] = {
+ [PERF_RECORD_MMAP] = &pyrf_mmap_event__type,
+ [PERF_RECORD_LOST] = &pyrf_lost_event__type,
+ [PERF_RECORD_COMM] = &pyrf_comm_event__type,
+ [PERF_RECORD_EXIT] = &pyrf_task_event__type,
+ [PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_FORK] = &pyrf_task_event__type,
+ [PERF_RECORD_READ] = &pyrf_read_event__type,
+ [PERF_RECORD_SAMPLE] = &pyrf_sample_event__type,
+ [PERF_RECORD_SWITCH] = &pyrf_context_switch_event__type,
+ [PERF_RECORD_SWITCH_CPU_WIDE] = &pyrf_context_switch_event__type,
+};
+
+static PyObject *pyrf_event__new(union perf_event *event)
+{
+ struct pyrf_event *pevent;
+ PyTypeObject *ptype;
+
+ if ((event->header.type < PERF_RECORD_MMAP ||
+ event->header.type > PERF_RECORD_SAMPLE) &&
+ !(event->header.type == PERF_RECORD_SWITCH ||
+ event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
+ return NULL;
+
+ ptype = pyrf_event__type[event->header.type];
+ pevent = PyObject_New(struct pyrf_event, ptype);
+ if (pevent != NULL)
+ memcpy(&pevent->event, event, event->header.size);
+ return (PyObject *)pevent;
+}
+
+struct pyrf_cpu_map {
+ PyObject_HEAD
+
+ struct perf_cpu_map *cpus;
+};
+
+static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "cpustr", NULL };
+ char *cpustr = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
+ kwlist, &cpustr))
+ return -1;
+
+ pcpus->cpus = perf_cpu_map__new(cpustr);
+ if (pcpus->cpus == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
+{
+ perf_cpu_map__put(pcpus->cpus);
+ Py_TYPE(pcpus)->tp_free((PyObject*)pcpus);
+}
+
+static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ return perf_cpu_map__nr(pcpus->cpus);
+}
+
+static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ if (i >= perf_cpu_map__nr(pcpus->cpus))
+ return NULL;
+
+ return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu);
+}
+
+static PySequenceMethods pyrf_cpu_map__sequence_methods = {
+ .sq_length = pyrf_cpu_map__length,
+ .sq_item = pyrf_cpu_map__item,
+};
+
+static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+
+static PyTypeObject pyrf_cpu_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.cpu_map",
+ .tp_basicsize = sizeof(struct pyrf_cpu_map),
+ .tp_dealloc = (destructor)pyrf_cpu_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_cpu_map__doc,
+ .tp_as_sequence = &pyrf_cpu_map__sequence_methods,
+ .tp_init = (initproc)pyrf_cpu_map__init,
+};
+
+static int pyrf_cpu_map__setup_types(void)
+{
+ pyrf_cpu_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_cpu_map__type);
+}
+
+struct pyrf_thread_map {
+ PyObject_HEAD
+
+ struct perf_thread_map *threads;
+};
+
+static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "pid", "tid", "uid", NULL };
+ int pid = -1, tid = -1, uid = UINT_MAX;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+ kwlist, &pid, &tid, &uid))
+ return -1;
+
+ pthreads->threads = thread_map__new(pid, tid, uid);
+ if (pthreads->threads == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
+{
+ perf_thread_map__put(pthreads->threads);
+ Py_TYPE(pthreads)->tp_free((PyObject*)pthreads);
+}
+
+static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ return perf_thread_map__nr(pthreads->threads);
+}
+
+static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ if (i >= perf_thread_map__nr(pthreads->threads))
+ return NULL;
+
+ return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i));
+}
+
+static PySequenceMethods pyrf_thread_map__sequence_methods = {
+ .sq_length = pyrf_thread_map__length,
+ .sq_item = pyrf_thread_map__item,
+};
+
+static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+
+static PyTypeObject pyrf_thread_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.thread_map",
+ .tp_basicsize = sizeof(struct pyrf_thread_map),
+ .tp_dealloc = (destructor)pyrf_thread_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_thread_map__doc,
+ .tp_as_sequence = &pyrf_thread_map__sequence_methods,
+ .tp_init = (initproc)pyrf_thread_map__init,
+};
+
+static int pyrf_thread_map__setup_types(void)
+{
+ pyrf_thread_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_thread_map__type);
+}
+
+struct pyrf_evsel {
+ PyObject_HEAD
+
+ struct evsel evsel;
+};
+
+static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
+ };
+ static char *kwlist[] = {
+ "type",
+ "config",
+ "sample_freq",
+ "sample_period",
+ "sample_type",
+ "read_format",
+ "disabled",
+ "inherit",
+ "pinned",
+ "exclusive",
+ "exclude_user",
+ "exclude_kernel",
+ "exclude_hv",
+ "exclude_idle",
+ "mmap",
+ "context_switch",
+ "comm",
+ "freq",
+ "inherit_stat",
+ "enable_on_exec",
+ "task",
+ "watermark",
+ "precise_ip",
+ "mmap_data",
+ "sample_id_all",
+ "wakeup_events",
+ "bp_type",
+ "bp_addr",
+ "bp_len",
+ NULL
+ };
+ u64 sample_period = 0;
+ u32 disabled = 0,
+ inherit = 0,
+ pinned = 0,
+ exclusive = 0,
+ exclude_user = 0,
+ exclude_kernel = 0,
+ exclude_hv = 0,
+ exclude_idle = 0,
+ mmap = 0,
+ context_switch = 0,
+ comm = 0,
+ freq = 1,
+ inherit_stat = 0,
+ enable_on_exec = 0,
+ task = 0,
+ watermark = 0,
+ precise_ip = 0,
+ mmap_data = 0,
+ sample_id_all = 1;
+ int idx = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+ "|iKiKKiiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
+ &attr.type, &attr.config, &attr.sample_freq,
+ &sample_period, &attr.sample_type,
+ &attr.read_format, &disabled, &inherit,
+ &pinned, &exclusive, &exclude_user,
+ &exclude_kernel, &exclude_hv, &exclude_idle,
+ &mmap, &context_switch, &comm, &freq, &inherit_stat,
+ &enable_on_exec, &task, &watermark,
+ &precise_ip, &mmap_data, &sample_id_all,
+ &attr.wakeup_events, &attr.bp_type,
+ &attr.bp_addr, &attr.bp_len, &idx))
+ return -1;
+
+ /* union... */
+ if (sample_period != 0) {
+ if (attr.sample_freq != 0)
+ return -1; /* FIXME: throw right exception */
+ attr.sample_period = sample_period;
+ }
+
+ /* Bitfields */
+ attr.disabled = disabled;
+ attr.inherit = inherit;
+ attr.pinned = pinned;
+ attr.exclusive = exclusive;
+ attr.exclude_user = exclude_user;
+ attr.exclude_kernel = exclude_kernel;
+ attr.exclude_hv = exclude_hv;
+ attr.exclude_idle = exclude_idle;
+ attr.mmap = mmap;
+ attr.context_switch = context_switch;
+ attr.comm = comm;
+ attr.freq = freq;
+ attr.inherit_stat = inherit_stat;
+ attr.enable_on_exec = enable_on_exec;
+ attr.task = task;
+ attr.watermark = watermark;
+ attr.precise_ip = precise_ip;
+ attr.mmap_data = mmap_data;
+ attr.sample_id_all = sample_id_all;
+ attr.size = sizeof(attr);
+
+ evsel__init(&pevsel->evsel, &attr, idx);
+ return 0;
+}
+
+static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
+{
+ evsel__exit(&pevsel->evsel);
+ Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
+}
+
+static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct evsel *evsel = &pevsel->evsel;
+ struct perf_cpu_map *cpus = NULL;
+ struct perf_thread_map *threads = NULL;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ int group = 0, inherit = 0;
+ static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
+ &pcpus, &pthreads, &group, &inherit))
+ return NULL;
+
+ if (pthreads != NULL)
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+
+ if (pcpus != NULL)
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+
+ evsel->core.attr.inherit = inherit;
+ /*
+ * This will group just the fds for this single evsel, to group
+ * multiple events, use evlist.open().
+ */
+ if (evsel__open(evsel, cpus, threads) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evsel__methods[] = {
+ {
+ .ml_name = "open",
+ .ml_meth = (PyCFunction)pyrf_evsel__open,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("open the event selector file descriptor table.")
+ },
+ { .ml_name = NULL, }
+};
+
+static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evsel__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evsel",
+ .tp_basicsize = sizeof(struct pyrf_evsel),
+ .tp_dealloc = (destructor)pyrf_evsel__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_evsel__doc,
+ .tp_methods = pyrf_evsel__methods,
+ .tp_init = (initproc)pyrf_evsel__init,
+};
+
+static int pyrf_evsel__setup_types(void)
+{
+ pyrf_evsel__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evsel__type);
+}
+
+struct pyrf_evlist {
+ PyObject_HEAD
+
+ struct evlist evlist;
+};
+
+static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs __maybe_unused)
+{
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+
+ if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
+ return -1;
+
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+ evlist__init(&pevlist->evlist, cpus, threads);
+ return 0;
+}
+
+static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
+{
+ evlist__exit(&pevlist->evlist);
+ Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
+}
+
+static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = { "pages", "overwrite", NULL };
+ int pages = 128, overwrite = false;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
+ &pages, &overwrite))
+ return NULL;
+
+ if (evlist__mmap(evlist, pages) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = { "timeout", NULL };
+ int timeout = -1, n;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
+ return NULL;
+
+ n = evlist__poll(evlist, timeout);
+ if (n < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ return Py_BuildValue("i", n);
+}
+
+static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
+ PyObject *args __maybe_unused,
+ PyObject *kwargs __maybe_unused)
+{
+ struct evlist *evlist = &pevlist->evlist;
+ PyObject *list = PyList_New(0);
+ int i;
+
+ for (i = 0; i < evlist->core.pollfd.nr; ++i) {
+ PyObject *file;
+#if PY_MAJOR_VERSION < 3
+ FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r");
+
+ if (fp == NULL)
+ goto free_list;
+
+ file = PyFile_FromFile(fp, "perf", "r", NULL);
+#else
+ file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1,
+ NULL, NULL, NULL, 0);
+#endif
+ if (file == NULL)
+ goto free_list;
+
+ if (PyList_Append(list, file) != 0) {
+ Py_DECREF(file);
+ goto free_list;
+ }
+
+ Py_DECREF(file);
+ }
+
+ return list;
+free_list:
+ return PyErr_NoMemory();
+}
+
+
+static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
+ PyObject *args,
+ PyObject *kwargs __maybe_unused)
+{
+ struct evlist *evlist = &pevlist->evlist;
+ PyObject *pevsel;
+ struct evsel *evsel;
+
+ if (!PyArg_ParseTuple(args, "O", &pevsel))
+ return NULL;
+
+ Py_INCREF(pevsel);
+ evsel = &((struct pyrf_evsel *)pevsel)->evsel;
+ evsel->core.idx = evlist->core.nr_entries;
+ evlist__add(evlist, evsel);
+
+ return Py_BuildValue("i", evlist->core.nr_entries);
+}
+
+static struct mmap *get_md(struct evlist *evlist, int cpu)
+{
+ int i;
+
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ struct mmap *md = &evlist->mmap[i];
+
+ if (md->core.cpu.cpu == cpu)
+ return md;
+ }
+
+ return NULL;
+}
+
+static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct evlist *evlist = &pevlist->evlist;
+ union perf_event *event;
+ int sample_id_all = 1, cpu;
+ static char *kwlist[] = { "cpu", "sample_id_all", NULL };
+ struct mmap *md;
+ int err;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
+ &cpu, &sample_id_all))
+ return NULL;
+
+ md = get_md(evlist, cpu);
+ if (!md)
+ return NULL;
+
+ if (perf_mmap__read_init(&md->core) < 0)
+ goto end;
+
+ event = perf_mmap__read_event(&md->core);
+ if (event != NULL) {
+ PyObject *pyevent = pyrf_event__new(event);
+ struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
+ struct evsel *evsel;
+
+ if (pyevent == NULL)
+ return PyErr_NoMemory();
+
+ evsel = evlist__event2evsel(evlist, event);
+ if (!evsel) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+
+ pevent->evsel = evsel;
+
+ err = evsel__parse_sample(evsel, event, &pevent->sample);
+
+ /* Consume the even only after we parsed it out. */
+ perf_mmap__consume(&md->core);
+
+ if (err)
+ return PyErr_Format(PyExc_OSError,
+ "perf: can't parse sample, err=%d", err);
+ return pyevent;
+ }
+end:
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct evlist *evlist = &pevlist->evlist;
+
+ if (evlist__open(evlist) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evlist__methods[] = {
+ {
+ .ml_name = "mmap",
+ .ml_meth = (PyCFunction)pyrf_evlist__mmap,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("mmap the file descriptor table.")
+ },
+ {
+ .ml_name = "open",
+ .ml_meth = (PyCFunction)pyrf_evlist__open,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("open the file descriptors.")
+ },
+ {
+ .ml_name = "poll",
+ .ml_meth = (PyCFunction)pyrf_evlist__poll,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("poll the file descriptor table.")
+ },
+ {
+ .ml_name = "get_pollfd",
+ .ml_meth = (PyCFunction)pyrf_evlist__get_pollfd,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("get the poll file descriptor table.")
+ },
+ {
+ .ml_name = "add",
+ .ml_meth = (PyCFunction)pyrf_evlist__add,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("adds an event selector to the list.")
+ },
+ {
+ .ml_name = "read_on_cpu",
+ .ml_meth = (PyCFunction)pyrf_evlist__read_on_cpu,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("reads an event.")
+ },
+ { .ml_name = NULL, }
+};
+
+static Py_ssize_t pyrf_evlist__length(PyObject *obj)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+
+ return pevlist->evlist.core.nr_entries;
+}
+
+static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+ struct evsel *pos;
+
+ if (i >= pevlist->evlist.core.nr_entries)
+ return NULL;
+
+ evlist__for_each_entry(&pevlist->evlist, pos) {
+ if (i-- == 0)
+ break;
+ }
+
+ return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
+}
+
+static PySequenceMethods pyrf_evlist__sequence_methods = {
+ .sq_length = pyrf_evlist__length,
+ .sq_item = pyrf_evlist__item,
+};
+
+static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evlist__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evlist",
+ .tp_basicsize = sizeof(struct pyrf_evlist),
+ .tp_dealloc = (destructor)pyrf_evlist__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_as_sequence = &pyrf_evlist__sequence_methods,
+ .tp_doc = pyrf_evlist__doc,
+ .tp_methods = pyrf_evlist__methods,
+ .tp_init = (initproc)pyrf_evlist__init,
+};
+
+static int pyrf_evlist__setup_types(void)
+{
+ pyrf_evlist__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evlist__type);
+}
+
+#define PERF_CONST(name) { #name, PERF_##name }
+
+static struct {
+ const char *name;
+ int value;
+} perf__constants[] = {
+ PERF_CONST(TYPE_HARDWARE),
+ PERF_CONST(TYPE_SOFTWARE),
+ PERF_CONST(TYPE_TRACEPOINT),
+ PERF_CONST(TYPE_HW_CACHE),
+ PERF_CONST(TYPE_RAW),
+ PERF_CONST(TYPE_BREAKPOINT),
+
+ PERF_CONST(COUNT_HW_CPU_CYCLES),
+ PERF_CONST(COUNT_HW_INSTRUCTIONS),
+ PERF_CONST(COUNT_HW_CACHE_REFERENCES),
+ PERF_CONST(COUNT_HW_CACHE_MISSES),
+ PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS),
+ PERF_CONST(COUNT_HW_BRANCH_MISSES),
+ PERF_CONST(COUNT_HW_BUS_CYCLES),
+ PERF_CONST(COUNT_HW_CACHE_L1D),
+ PERF_CONST(COUNT_HW_CACHE_L1I),
+ PERF_CONST(COUNT_HW_CACHE_LL),
+ PERF_CONST(COUNT_HW_CACHE_DTLB),
+ PERF_CONST(COUNT_HW_CACHE_ITLB),
+ PERF_CONST(COUNT_HW_CACHE_BPU),
+ PERF_CONST(COUNT_HW_CACHE_OP_READ),
+ PERF_CONST(COUNT_HW_CACHE_OP_WRITE),
+ PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH),
+ PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS),
+ PERF_CONST(COUNT_HW_CACHE_RESULT_MISS),
+
+ PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND),
+ PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND),
+
+ PERF_CONST(COUNT_SW_CPU_CLOCK),
+ PERF_CONST(COUNT_SW_TASK_CLOCK),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS),
+ PERF_CONST(COUNT_SW_CONTEXT_SWITCHES),
+ PERF_CONST(COUNT_SW_CPU_MIGRATIONS),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ),
+ PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS),
+ PERF_CONST(COUNT_SW_EMULATION_FAULTS),
+ PERF_CONST(COUNT_SW_DUMMY),
+
+ PERF_CONST(SAMPLE_IP),
+ PERF_CONST(SAMPLE_TID),
+ PERF_CONST(SAMPLE_TIME),
+ PERF_CONST(SAMPLE_ADDR),
+ PERF_CONST(SAMPLE_READ),
+ PERF_CONST(SAMPLE_CALLCHAIN),
+ PERF_CONST(SAMPLE_ID),
+ PERF_CONST(SAMPLE_CPU),
+ PERF_CONST(SAMPLE_PERIOD),
+ PERF_CONST(SAMPLE_STREAM_ID),
+ PERF_CONST(SAMPLE_RAW),
+
+ PERF_CONST(FORMAT_TOTAL_TIME_ENABLED),
+ PERF_CONST(FORMAT_TOTAL_TIME_RUNNING),
+ PERF_CONST(FORMAT_ID),
+ PERF_CONST(FORMAT_GROUP),
+
+ PERF_CONST(RECORD_MMAP),
+ PERF_CONST(RECORD_LOST),
+ PERF_CONST(RECORD_COMM),
+ PERF_CONST(RECORD_EXIT),
+ PERF_CONST(RECORD_THROTTLE),
+ PERF_CONST(RECORD_UNTHROTTLE),
+ PERF_CONST(RECORD_FORK),
+ PERF_CONST(RECORD_READ),
+ PERF_CONST(RECORD_SAMPLE),
+ PERF_CONST(RECORD_MMAP2),
+ PERF_CONST(RECORD_AUX),
+ PERF_CONST(RECORD_ITRACE_START),
+ PERF_CONST(RECORD_LOST_SAMPLES),
+ PERF_CONST(RECORD_SWITCH),
+ PERF_CONST(RECORD_SWITCH_CPU_WIDE),
+
+ PERF_CONST(RECORD_MISC_SWITCH_OUT),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+#ifndef HAVE_LIBTRACEEVENT
+ return NULL;
+#else
+ struct tep_event *tp_format;
+ static char *kwlist[] = { "sys", "name", NULL };
+ char *sys = NULL;
+ char *name = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss", kwlist,
+ &sys, &name))
+ return NULL;
+
+ tp_format = trace_event__tp_format(sys, name);
+ if (IS_ERR(tp_format))
+ return _PyLong_FromLong(-1);
+
+ return _PyLong_FromLong(tp_format->id);
+#endif // HAVE_LIBTRACEEVENT
+}
+
+static PyMethodDef perf__methods[] = {
+ {
+ .ml_name = "tracepoint",
+ .ml_meth = (PyCFunction) pyrf__tracepoint,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("Get tracepoint config.")
+ },
+ { .ml_name = NULL, }
+};
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void)
+#else
+PyMODINIT_FUNC PyInit_perf(void)
+#endif
+{
+ PyObject *obj;
+ int i;
+ PyObject *dict;
+#if PY_MAJOR_VERSION < 3
+ PyObject *module = Py_InitModule("perf", perf__methods);
+#else
+ static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "perf", /* m_name */
+ "", /* m_doc */
+ -1, /* m_size */
+ perf__methods, /* m_methods */
+ NULL, /* m_reload */
+ NULL, /* m_traverse */
+ NULL, /* m_clear */
+ NULL, /* m_free */
+ };
+ PyObject *module = PyModule_Create(&moduledef);
+#endif
+
+ if (module == NULL ||
+ pyrf_event__setup_types() < 0 ||
+ pyrf_evlist__setup_types() < 0 ||
+ pyrf_evsel__setup_types() < 0 ||
+ pyrf_thread_map__setup_types() < 0 ||
+ pyrf_cpu_map__setup_types() < 0)
+#if PY_MAJOR_VERSION < 3
+ return;
+#else
+ return module;
+#endif
+
+ /* The page_size is placed in util object. */
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ Py_INCREF(&pyrf_evlist__type);
+ PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
+
+ Py_INCREF(&pyrf_evsel__type);
+ PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
+
+ Py_INCREF(&pyrf_mmap_event__type);
+ PyModule_AddObject(module, "mmap_event", (PyObject *)&pyrf_mmap_event__type);
+
+ Py_INCREF(&pyrf_lost_event__type);
+ PyModule_AddObject(module, "lost_event", (PyObject *)&pyrf_lost_event__type);
+
+ Py_INCREF(&pyrf_comm_event__type);
+ PyModule_AddObject(module, "comm_event", (PyObject *)&pyrf_comm_event__type);
+
+ Py_INCREF(&pyrf_task_event__type);
+ PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+ Py_INCREF(&pyrf_throttle_event__type);
+ PyModule_AddObject(module, "throttle_event", (PyObject *)&pyrf_throttle_event__type);
+
+ Py_INCREF(&pyrf_task_event__type);
+ PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+ Py_INCREF(&pyrf_read_event__type);
+ PyModule_AddObject(module, "read_event", (PyObject *)&pyrf_read_event__type);
+
+ Py_INCREF(&pyrf_sample_event__type);
+ PyModule_AddObject(module, "sample_event", (PyObject *)&pyrf_sample_event__type);
+
+ Py_INCREF(&pyrf_context_switch_event__type);
+ PyModule_AddObject(module, "switch_event", (PyObject *)&pyrf_context_switch_event__type);
+
+ Py_INCREF(&pyrf_thread_map__type);
+ PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
+
+ Py_INCREF(&pyrf_cpu_map__type);
+ PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
+
+ dict = PyModule_GetDict(module);
+ if (dict == NULL)
+ goto error;
+
+ for (i = 0; perf__constants[i].name != NULL; i++) {
+ obj = _PyLong_FromLong(perf__constants[i].value);
+ if (obj == NULL)
+ goto error;
+ PyDict_SetItemString(dict, perf__constants[i].name, obj);
+ Py_DECREF(obj);
+ }
+
+error:
+ if (PyErr_Occurred())
+ PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
+#if PY_MAJOR_VERSION >= 3
+ return module;
+#endif
+}
+
+/*
+ * Dummy, to avoid dragging all the test_attr infrastructure in the python
+ * binding.
+ */
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
+ int fd, int group_fd, unsigned long flags)
+{
+}
+
+void evlist__free_stats(struct evlist *evlist)
+{
+}
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
new file mode 100644
index 000000000..376e86cb4
--- /dev/null
+++ b/tools/perf/util/rb_resort.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+ b->thread->shortname) < 0,
+ struct thread *thread;
+)
+{
+ entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+ struct rb_node *nd;
+
+ resort_rb__for_each_entry(nd, threads) {
+ struct thread *t = threads_entry;
+ printf("%s: %d\n", t->shortname, t->tid);
+ }
+
+ * Then delete it:
+
+ resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ * struct threads_sorted_entry {}
+ * threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...) \
+struct __name##_sorted_entry { \
+ struct rb_node rb_node; \
+ __VA_ARGS__ \
+}; \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry); \
+ \
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
+{ \
+ struct __name##_sorted_entry *a, *b; \
+ a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
+ b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
+ return __comp; \
+} \
+ \
+struct __name##_sorted { \
+ struct rb_root entries; \
+ struct __name##_sorted_entry nd[0]; \
+}; \
+ \
+static void __name##_sorted__insert(struct __name##_sorted *sorted, \
+ struct rb_node *sorted_nd) \
+{ \
+ struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
+ while (*p != NULL) { \
+ parent = *p; \
+ if (__name##_sorted__cmp(sorted_nd, parent)) \
+ p = &(*p)->rb_left; \
+ else \
+ p = &(*p)->rb_right; \
+ } \
+ rb_link_node(sorted_nd, parent, p); \
+ rb_insert_color(sorted_nd, &sorted->entries); \
+} \
+ \
+static void __name##_sorted__sort(struct __name##_sorted *sorted, \
+ struct rb_root *entries) \
+{ \
+ struct rb_node *nd; \
+ unsigned int i = 0; \
+ for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
+ struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
+ __name##_sorted__init_entry(nd, snd); \
+ __name##_sorted__insert(sorted, &snd->rb_node); \
+ } \
+} \
+ \
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
+ int nr_entries) \
+{ \
+ struct __name##_sorted *sorted; \
+ sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
+ if (sorted) { \
+ sorted->entries = RB_ROOT; \
+ __name##_sorted__sort(sorted, entries); \
+ } \
+ return sorted; \
+} \
+ \
+static void __name##_sorted__delete(struct __name##_sorted *sorted) \
+{ \
+ free(sorted); \
+} \
+ \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name) \
+struct __name##_sorted_entry *__name##_entry; \
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each_entry(__nd, __name) \
+ for (__nd = rb_first(&__name->entries); \
+ __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
+ rb_node), __nd; \
+ __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name) \
+ __name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
+ DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \
+ __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
+ DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
+ __machine->threads[hash_bucket].nr)
+
+#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
new file mode 100644
index 000000000..f399b7ec4
--- /dev/null
+++ b/tools/perf/util/rblist.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Based on strlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rblist.h"
+
+int rblist__add_node(struct rblist *rblist, const void *new_entry)
+{
+ struct rb_node **p = &rblist->entries.rb_root.rb_node;
+ struct rb_node *parent = NULL, *new_node;
+ bool leftmost = true;
+
+ while (*p != NULL) {
+ int rc;
+
+ parent = *p;
+
+ rc = rblist->node_cmp(parent, new_entry);
+ if (rc > 0)
+ p = &(*p)->rb_left;
+ else if (rc < 0) {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ else
+ return -EEXIST;
+ }
+
+ new_node = rblist->node_new(rblist, new_entry);
+ if (new_node == NULL)
+ return -ENOMEM;
+
+ rb_link_node(new_node, parent, p);
+ rb_insert_color_cached(new_node, &rblist->entries, leftmost);
+ ++rblist->nr_entries;
+
+ return 0;
+}
+
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
+{
+ rb_erase_cached(rb_node, &rblist->entries);
+ --rblist->nr_entries;
+ rblist->node_delete(rblist, rb_node);
+}
+
+static struct rb_node *__rblist__findnew(struct rblist *rblist,
+ const void *entry,
+ bool create)
+{
+ struct rb_node **p = &rblist->entries.rb_root.rb_node;
+ struct rb_node *parent = NULL, *new_node = NULL;
+ bool leftmost = true;
+
+ while (*p != NULL) {
+ int rc;
+
+ parent = *p;
+
+ rc = rblist->node_cmp(parent, entry);
+ if (rc > 0)
+ p = &(*p)->rb_left;
+ else if (rc < 0) {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ else
+ return parent;
+ }
+
+ if (create) {
+ new_node = rblist->node_new(rblist, entry);
+ if (new_node) {
+ rb_link_node(new_node, parent, p);
+ rb_insert_color_cached(new_node,
+ &rblist->entries, leftmost);
+ ++rblist->nr_entries;
+ }
+ }
+
+ return new_node;
+}
+
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry)
+{
+ return __rblist__findnew(rblist, entry, false);
+}
+
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry)
+{
+ return __rblist__findnew(rblist, entry, true);
+}
+
+void rblist__init(struct rblist *rblist)
+{
+ if (rblist != NULL) {
+ rblist->entries = RB_ROOT_CACHED;
+ rblist->nr_entries = 0;
+ }
+
+ return;
+}
+
+void rblist__exit(struct rblist *rblist)
+{
+ struct rb_node *pos, *next = rb_first_cached(&rblist->entries);
+
+ while (next) {
+ pos = next;
+ next = rb_next(pos);
+ rblist__remove_node(rblist, pos);
+ }
+}
+
+void rblist__delete(struct rblist *rblist)
+{
+ if (rblist != NULL) {
+ rblist__exit(rblist);
+ free(rblist);
+ }
+}
+
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx)
+{
+ struct rb_node *node;
+
+ for (node = rb_first_cached(&rblist->entries); node;
+ node = rb_next(node)) {
+ if (!idx--)
+ return node;
+ }
+
+ return NULL;
+}
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
new file mode 100644
index 000000000..14b232a4d
--- /dev/null
+++ b/tools/perf/util/rblist.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RBLIST_H
+#define __PERF_RBLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+/*
+ * create node structs of the form:
+ * struct my_node {
+ * struct rb_node rb_node;
+ * ... my data ...
+ * };
+ *
+ * create list structs of the form:
+ * struct mylist {
+ * struct rblist rblist;
+ * ... my data ...
+ * };
+ */
+
+struct rblist {
+ struct rb_root_cached entries;
+ unsigned int nr_entries;
+
+ int (*node_cmp)(struct rb_node *rbn, const void *entry);
+ struct rb_node *(*node_new)(struct rblist *rlist, const void *new_entry);
+ void (*node_delete)(struct rblist *rblist, struct rb_node *rb_node);
+};
+
+void rblist__init(struct rblist *rblist);
+void rblist__exit(struct rblist *rblist);
+void rblist__delete(struct rblist *rblist);
+int rblist__add_node(struct rblist *rblist, const void *new_entry);
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx);
+
+static inline bool rblist__empty(const struct rblist *rblist)
+{
+ return rblist->nr_entries == 0;
+}
+
+static inline unsigned int rblist__nr_entries(const struct rblist *rblist)
+{
+ return rblist->nr_entries;
+}
+
+#endif /* __PERF_RBLIST_H */
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
new file mode 100644
index 000000000..9eb5c6a08
--- /dev/null
+++ b/tools/perf/util/record.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "evsel_config.h"
+#include "parse-events.h"
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include <subcmd/parse-options.h>
+#include <perf/cpumap.h>
+#include "cloexec.h"
+#include "util/perf_api_probe.h"
+#include "record.h"
+#include "../perf-sys.h"
+#include "topdown.h"
+#include "map_symbol.h"
+#include "mem-events.h"
+
+/*
+ * evsel__config_leader_sampling() uses special rules for leader sampling.
+ * However, if the leader is an AUX area event, then assume the event to sample
+ * is the next event.
+ */
+static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
+{
+ struct evsel *leader = evsel__leader(evsel);
+
+ if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) ||
+ is_mem_loads_aux_event(leader)) {
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__leader(evsel) == leader && evsel != evsel__leader(evsel))
+ return evsel;
+ }
+ }
+
+ return leader;
+}
+
+static u64 evsel__config_term_mask(struct evsel *evsel)
+{
+ struct evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ u64 term_types = 0;
+
+ list_for_each_entry(term, config_terms, list) {
+ term_types |= 1 << term->type;
+ }
+ return term_types;
+}
+
+static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel *leader = evsel__leader(evsel);
+ struct evsel *read_sampler;
+ u64 term_types, freq_mask;
+
+ if (!leader->sample_read)
+ return;
+
+ read_sampler = evsel__read_sampler(evsel, evlist);
+
+ if (evsel == read_sampler)
+ return;
+
+ term_types = evsel__config_term_mask(evsel);
+ /*
+ * Disable sampling for all group members except those with explicit
+ * config terms or the leader. In the case of an AUX area event, the 2nd
+ * event in the group is the one that 'leads' the sampling.
+ */
+ freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
+ if ((term_types & freq_mask) == 0) {
+ attr->freq = 0;
+ attr->sample_freq = 0;
+ attr->sample_period = 0;
+ }
+ if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
+ attr->write_backward = 0;
+
+ /*
+ * We don't get a sample for slave events, we make them when delivering
+ * the group leader sample. Set the slave event to follow the master
+ * sample_type to ease up reporting.
+ * An AUX area event also has sample_type requirements, so also include
+ * the sample type bits from the leader's sample_type to cover that
+ * case.
+ */
+ attr->sample_type = read_sampler->core.attr.sample_type |
+ leader->core.attr.sample_type;
+}
+
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
+{
+ struct evsel *evsel;
+ bool use_sample_identifier = false;
+ bool use_comm_exec;
+ bool sample_id = opts->sample_id;
+
+ if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0)
+ opts->no_inherit = true;
+
+ use_comm_exec = perf_can_comm_exec();
+
+ evlist__for_each_entry(evlist, evsel) {
+ evsel__config(evsel, opts, callchain);
+ if (evsel->tracking && use_comm_exec)
+ evsel->core.attr.comm_exec = 1;
+ }
+
+ /* Configure leader sampling here now that the sample type is known */
+ evlist__for_each_entry(evlist, evsel)
+ evsel__config_leader_sampling(evsel, evlist);
+
+ if (opts->full_auxtrace || opts->sample_identifier) {
+ /*
+ * Need to be able to synthesize and parse selected events with
+ * arbitrary sample types, which requires always being able to
+ * match the id.
+ */
+ use_sample_identifier = perf_can_sample_identifier();
+ sample_id = true;
+ } else if (evlist->core.nr_entries > 1) {
+ struct evsel *first = evlist__first(evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.sample_type == first->core.attr.sample_type)
+ continue;
+ use_sample_identifier = perf_can_sample_identifier();
+ break;
+ }
+ sample_id = true;
+ }
+
+ if (sample_id) {
+ evlist__for_each_entry(evlist, evsel)
+ evsel__set_sample_id(evsel, use_sample_identifier);
+ }
+
+ evlist__set_id_pos(evlist);
+}
+
+static int get_max_rate(unsigned int *rate)
+{
+ return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
+}
+
+static int record_opts__config_freq(struct record_opts *opts)
+{
+ bool user_freq = opts->user_freq != UINT_MAX;
+ bool user_interval = opts->user_interval != ULLONG_MAX;
+ unsigned int max_rate;
+
+ if (user_interval && user_freq) {
+ pr_err("cannot set frequency and period at the same time\n");
+ return -1;
+ }
+
+ if (user_interval)
+ opts->default_interval = opts->user_interval;
+ if (user_freq)
+ opts->freq = opts->user_freq;
+
+ /*
+ * User specified count overrides default frequency.
+ */
+ if (opts->default_interval)
+ opts->freq = 0;
+ else if (opts->freq) {
+ opts->default_interval = opts->freq;
+ } else {
+ pr_err("frequency and count are zero, aborting\n");
+ return -1;
+ }
+
+ if (get_max_rate(&max_rate))
+ return 0;
+
+ /*
+ * User specified frequency is over current maximum.
+ */
+ if (user_freq && (max_rate < opts->freq)) {
+ if (opts->strict_freq) {
+ pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
+ " Please use -F freq option with a lower value or consider\n"
+ " tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
+ max_rate);
+ return -1;
+ } else {
+ pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
+ " The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
+ " The kernel will lower it when perf's interrupts take too long.\n"
+ " Use --strict-freq to disable this throttling, refusing to record.\n",
+ max_rate, opts->freq, max_rate);
+
+ opts->freq = max_rate;
+ }
+ }
+
+ /*
+ * Default frequency is over current maximum.
+ */
+ if (max_rate < opts->freq) {
+ pr_warning("Lowering default frequency rate from %u to %u.\n"
+ "Please consider tweaking "
+ "/proc/sys/kernel/perf_event_max_sample_rate.\n",
+ opts->freq, max_rate);
+ opts->freq = max_rate;
+ }
+
+ return 0;
+}
+
+int record_opts__config(struct record_opts *opts)
+{
+ return record_opts__config_freq(opts);
+}
+
+bool evlist__can_select_event(struct evlist *evlist, const char *str)
+{
+ struct evlist *temp_evlist;
+ struct evsel *evsel;
+ int err, fd;
+ struct perf_cpu cpu = { .cpu = 0 };
+ bool ret = false;
+ pid_t pid = -1;
+
+ temp_evlist = evlist__new();
+ if (!temp_evlist)
+ return false;
+
+ err = parse_event(temp_evlist, str);
+ if (err)
+ goto out_delete;
+
+ evsel = evlist__last(temp_evlist);
+
+ if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) {
+ struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+
+ if (cpus)
+ cpu = perf_cpu_map__cpu(cpus, 0);
+
+ perf_cpu_map__put(cpus);
+ } else {
+ cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0);
+ }
+
+ while (1) {
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
+ perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ if (pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ goto out_delete;
+ }
+ break;
+ }
+ close(fd);
+ ret = true;
+
+out_delete:
+ evlist__delete(temp_evlist);
+ return ret;
+}
+
+int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
+{
+ unsigned int freq;
+ struct record_opts *opts = opt->value;
+
+ if (!str)
+ return -EINVAL;
+
+ if (strcasecmp(str, "max") == 0) {
+ if (get_max_rate(&freq)) {
+ pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
+ return -1;
+ }
+ pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
+ } else {
+ freq = atoi(str);
+ }
+
+ opts->user_freq = freq;
+ return 0;
+}
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
new file mode 100644
index 000000000..a6566134e
--- /dev/null
+++ b/tools/perf/util/record.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_RECORD_H
+#define _PERF_RECORD_H
+
+#include <time.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include "util/target.h"
+
+struct option;
+
+struct record_opts {
+ struct target target;
+ bool inherit_stat;
+ bool no_buffering;
+ bool no_inherit;
+ bool no_inherit_set;
+ bool no_samples;
+ bool raw_samples;
+ bool sample_address;
+ bool sample_phys_addr;
+ bool sample_data_page_size;
+ bool sample_code_page_size;
+ bool sample_weight;
+ bool sample_time;
+ bool sample_time_set;
+ bool sample_cpu;
+ bool sample_identifier;
+ bool period;
+ bool period_set;
+ bool running_time;
+ bool full_auxtrace;
+ bool auxtrace_snapshot_mode;
+ bool auxtrace_snapshot_on_exit;
+ bool auxtrace_sample_mode;
+ bool record_namespaces;
+ bool record_cgroup;
+ bool record_switch_events;
+ bool record_switch_events_set;
+ bool all_kernel;
+ bool all_user;
+ bool kernel_callchains;
+ bool user_callchains;
+ bool tail_synthesize;
+ bool overwrite;
+ bool ignore_missing_thread;
+ bool strict_freq;
+ bool sample_id;
+ bool no_bpf_event;
+ bool kcore;
+ bool text_poke;
+ bool build_id;
+ unsigned int freq;
+ unsigned int mmap_pages;
+ unsigned int auxtrace_mmap_pages;
+ unsigned int user_freq;
+ u64 branch_stack;
+ u64 sample_intr_regs;
+ u64 sample_user_regs;
+ u64 default_interval;
+ u64 user_interval;
+ size_t auxtrace_snapshot_size;
+ const char *auxtrace_snapshot_opts;
+ const char *auxtrace_sample_opts;
+ bool sample_transaction;
+ bool use_clockid;
+ clockid_t clockid;
+ u64 clockid_res_ns;
+ int nr_cblocks;
+ int affinity;
+ int mmap_flush;
+ unsigned int comp_level;
+ unsigned int nr_threads_synthesize;
+ int ctl_fd;
+ int ctl_fd_ack;
+ bool ctl_fd_close;
+ int synth;
+ int threads_spec;
+ const char *threads_user_spec;
+};
+
+extern const char * const *record_usage;
+extern struct option *record_options;
+
+int record__parse_freq(const struct option *opt, const char *str, int unset);
+
+static inline bool record_opts__no_switch_events(const struct record_opts *opts)
+{
+ return opts->record_switch_events_set && !opts->record_switch_events;
+}
+
+#endif // _PERF_RECORD_H
diff --git a/tools/perf/util/rlimit.c b/tools/perf/util/rlimit.c
new file mode 100644
index 000000000..13521d392
--- /dev/null
+++ b/tools/perf/util/rlimit.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+#include "util/debug.h"
+#include "util/rlimit.h"
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/*
+ * Bump the memlock so that we can get bpf maps of a reasonable size,
+ * like the ones used with 'perf trace' and with 'perf test bpf',
+ * improve this to some specific request if needed.
+ */
+void rlimit__bump_memlock(void)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) {
+ rlim.rlim_cur *= 4;
+ rlim.rlim_max *= 4;
+
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
+ rlim.rlim_cur /= 2;
+ rlim.rlim_max /= 2;
+
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0)
+ pr_debug("Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc\n");
+ }
+ }
+}
diff --git a/tools/perf/util/rlimit.h b/tools/perf/util/rlimit.h
new file mode 100644
index 000000000..9f59d8e71
--- /dev/null
+++ b/tools/perf/util/rlimit.h
@@ -0,0 +1,6 @@
+#ifndef __PERF_RLIMIT_H_
+#define __PERF_RLIMIT_H_
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+void rlimit__bump_memlock(void);
+#endif // __PERF_RLIMIT_H_
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000..f3d29d8dd
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int exit_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_destroy(&sem->lock);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000..94565ad4d
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,19 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+ pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+int exit_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
new file mode 100644
index 000000000..f55ca07f3
--- /dev/null
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for s390 CPU measurement counter set diagnostic facility
+ *
+ * Copyright IBM Corp. 2019
+ Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ * Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef S390_CPUMCF_KERNEL_H
+#define S390_CPUMCF_KERNEL_H
+
+#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */
+#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */
+
+struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int set:16; /* 16-23 Counter set identifier */
+ unsigned int ctr:16; /* 24-39 Number of stored counters */
+ unsigned int res1:16; /* 40-63 Reserved */
+};
+
+struct cf_trailer_entry { /* CPU-M CF trailer for raw traces (64 byte) */
+ /* 0 - 7 */
+ union {
+ struct {
+ unsigned int clock_base:1; /* TOD clock base */
+ unsigned int speed:1; /* CPU speed */
+ /* Measurement alerts */
+ unsigned int mtda:1; /* Loss of MT ctr. data alert */
+ unsigned int caca:1; /* Counter auth. change alert */
+ unsigned int lcda:1; /* Loss of counter data alert */
+ };
+ unsigned long flags; /* 0-63 All indicators */
+ };
+ /* 8 - 15 */
+ unsigned int cfvn:16; /* 64-79 Ctr First Version */
+ unsigned int csvn:16; /* 80-95 Ctr Second Version */
+ unsigned int cpu_speed:32; /* 96-127 CPU speed */
+ /* 16 - 23 */
+ unsigned long timestamp; /* 128-191 Timestamp (TOD) */
+ /* 24 - 55 */
+ union {
+ struct {
+ unsigned long progusage1;
+ unsigned long progusage2;
+ unsigned long progusage3;
+ unsigned long tod_base;
+ };
+ unsigned long progusage[4];
+ };
+ /* 56 - 63 */
+ unsigned int mach_type:16; /* Machine type */
+ unsigned int res1:16; /* Reserved */
+ unsigned int res2:32; /* Reserved */
+};
+
+#define CPUMF_CTR_SET_BASIC 0 /* Basic Counter Set */
+#define CPUMF_CTR_SET_USER 1 /* Problem-State Counter Set */
+#define CPUMF_CTR_SET_CRYPTO 2 /* Crypto-Activity Counter Set */
+#define CPUMF_CTR_SET_EXT 3 /* Extended Counter Set */
+#define CPUMF_CTR_SET_MT_DIAG 4 /* MT-diagnostic Counter Set */
+#endif
diff --git a/tools/perf/util/s390-cpumsf-kernel.h b/tools/perf/util/s390-cpumsf-kernel.h
new file mode 100644
index 000000000..de8c7ad0e
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf-kernel.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Auxtrace support for s390 CPU measurement sampling facility
+ *
+ * Copyright IBM Corp. 2018
+ * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ * Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef S390_CPUMSF_KERNEL_H
+#define S390_CPUMSF_KERNEL_H
+
+#define S390_CPUMSF_PAGESZ 4096 /* Size of sample block units */
+#define S390_CPUMSF_DIAG_DEF_FIRST 0x8001 /* Diagnostic entry lowest id */
+
+struct hws_basic_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int CL:2; /* 32-33 Configuration Level */
+ unsigned int:14;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long gpp; /* Guest Program Parameter */
+ unsigned long long hpp; /* Host Program Parameter */
+};
+
+struct hws_diag_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:15; /* 16-19 and 20-30 reserved */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ u8 data[]; /* Machine-dependent sample data */
+};
+
+struct hws_combined_entry {
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+};
+
+struct hws_trailer_entry {
+ union {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned int:29; /* 3 - 31: Reserved */
+ unsigned int bsdes:16; /* 32-47: size of basic SDE */
+ unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
+ };
+ unsigned long long flags; /* 0 - 64: All indicators */
+ };
+ unsigned long long overflow; /* 64 - sample Overflow count */
+ unsigned char timestamp[16]; /* 16 - 31 timestamp */
+ unsigned long long reserved1; /* 32 -Reserved */
+ unsigned long long reserved2; /* */
+ union { /* 48 - reserved for programming use */
+ struct {
+ unsigned long long clock_base:1; /* in progusage2 */
+ unsigned long long progusage1:63;
+ unsigned long long progusage2;
+ };
+ unsigned long long progusage[2];
+ };
+};
+
+#endif
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
new file mode 100644
index 000000000..6fe478b0b
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.c
@@ -0,0 +1,1177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Auxiliary traces are collected during 'perf record' using rbd000 event.
+ * Several PERF_RECORD_XXX are generated during recording:
+ *
+ * PERF_RECORD_AUX:
+ * Records that new data landed in the AUX buffer part.
+ * PERF_RECORD_AUXTRACE:
+ * Defines auxtrace data. Followed by the actual data. The contents of
+ * the auxtrace data is dependent on the event and the CPU.
+ * This record is generated by perf record command. For details
+ * see Documentation/perf.data-file-format.txt.
+ * PERF_RECORD_AUXTRACE_INFO:
+ * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
+ * record is generated during 'perf record' command. Each record contains
+ * up to 256 entries describing offset and size of the AUXTRACE data in the
+ * perf.data file.
+ * PERF_RECORD_AUXTRACE_ERROR:
+ * Indicates an error during AUXTRACE collection such as buffer overflow.
+ * PERF_RECORD_FINISHED_ROUND:
+ * Perf events are not necessarily in time stamp order, as they can be
+ * collected in parallel on different CPUs. If the events should be
+ * processed in time order they need to be sorted first.
+ * Perf report guarantees that there is no reordering over a
+ * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
+ * time stamp lower than this record are processed (and displayed) before
+ * the succeeding perf record are processed.
+ *
+ * These records are evaluated during perf report command.
+ *
+ * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
+ * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
+ * below.
+ * Auxiliary trace data is collected per CPU. To merge the data into the report
+ * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
+ * data is in ascending order.
+ *
+ * Each queue has a double linked list of auxtrace_buffers. This list contains
+ * the offset and size of a CPU's auxtrace data. During auxtrace processing
+ * the data portion is mmap()'ed.
+ *
+ * To sort the queues in chronological order, all queue access is controlled
+ * by the auxtrace_heap. This is basically a stack, each stack element has two
+ * entries, the queue number and a time stamp. However the stack is sorted by
+ * the time stamps. The highest time stamp is at the bottom the lowest
+ * (nearest) time stamp is at the top. That sort order is maintained at all
+ * times!
+ *
+ * After the auxtrace infrastructure has been setup, the auxtrace queues are
+ * filled with data (offset/size pairs) and the auxtrace_heap is populated.
+ *
+ * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
+ * Each record is handled by s390_cpumsf_process_event(). The time stamp of
+ * the perf record is compared with the time stamp located on the auxtrace_heap
+ * top element. If that time stamp is lower than the time stamp from the
+ * record sample, the auxtrace queues will be processed. As auxtrace queues
+ * control many auxtrace_buffers and each buffer can be quite large, the
+ * auxtrace buffer might be processed only partially. In this case the
+ * position in the auxtrace_buffer of that queue is remembered and the time
+ * stamp of the last processed entry of the auxtrace_buffer replaces the
+ * current auxtrace_heap top.
+ *
+ * 3. Auxtrace_queues might run of out data and are fed by the
+ * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
+ *
+ * Event Generation
+ * Each sampling-data entry in the auxiliary trace data generates a perf sample.
+ * This sample is filled
+ * with data from the auxtrace such as PID/TID, instruction address, CPU state,
+ * etc. This sample is processed with perf_session__deliver_synth_event() to
+ * be included into the GUI.
+ *
+ * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
+ * auxiliary traces entries until the time stamp of this record is reached
+ * auxtrace_heap top. This is triggered by ordered_event->deliver().
+ *
+ *
+ * Perf event processing.
+ * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
+ * This is the function call sequence:
+ *
+ * __cmd_report()
+ * |
+ * perf_session__process_events()
+ * |
+ * __perf_session__process_events()
+ * |
+ * perf_session__process_event()
+ * | This functions splits the PERF_RECORD_XXX records.
+ * | - Those generated by perf record command (type number equal or higher
+ * | than PERF_RECORD_USER_TYPE_START) are handled by
+ * | perf_session__process_user_event(see below)
+ * | - Those generated by the kernel are handled by
+ * | evlist__parse_sample_timestamp()
+ * |
+ * evlist__parse_sample_timestamp()
+ * | Extract time stamp from sample data.
+ * |
+ * perf_session__queue_event()
+ * | If timestamp is positive the sample is entered into an ordered_event
+ * | list, sort order is the timestamp. The event processing is deferred until
+ * | later (see perf_session__process_user_event()).
+ * | Other timestamps (0 or -1) are handled immediately by
+ * | perf_session__deliver_event(). These are events generated at start up
+ * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
+ * | records. They are needed to create a list of running processes and its
+ * | memory mappings and layout. They are needed at the beginning to enable
+ * | command perf report to create process trees and memory mappings.
+ * |
+ * perf_session__deliver_event()
+ * | Delivers a PERF_RECORD_XXX entry for handling.
+ * |
+ * auxtrace__process_event()
+ * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
+ * | time stamps from the auxiliary trace buffers. This enables
+ * | synchronization between auxiliary trace data and the events on the
+ * | perf.data file.
+ * |
+ * machine__deliver_event()
+ * | Handles the PERF_RECORD_XXX event. This depends on the record type.
+ * It might update the process tree, update a process memory map or enter
+ * a sample with IP and call back chain data into GUI data pool.
+ *
+ *
+ * Deferred processing determined by perf_session__process_user_event() is
+ * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
+ * are generated during command perf record.
+ * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
+ * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
+ * built up while reading the perf.data file.
+ * Each event is now processed by calling perf_session__deliver_event().
+ * This enables time synchronization between the data in the perf.data file and
+ * the data in the auxiliary trace buffers.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/zalloc.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "tool.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "s390-cpumsf.h"
+#include "s390-cpumsf-kernel.h"
+#include "s390-cpumcf-kernel.h"
+#include "config.h"
+#include "util/sample.h"
+
+struct s390_cpumsf {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ struct perf_session *session;
+ struct machine *machine;
+ u32 auxtrace_type;
+ u32 pmu_type;
+ u16 machine_type;
+ bool data_queued;
+ bool use_logfile;
+ char *logdir;
+};
+
+struct s390_cpumsf_queue {
+ struct s390_cpumsf *sf;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ int cpu;
+ FILE *logfile;
+ FILE *logfile_ctr;
+};
+
+/* Check if the raw data should be dumped to file. If this is the case and
+ * the file to dump to has not been opened for writing, do so.
+ *
+ * Return 0 on success and greater zero on error so processing continues.
+ */
+static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
+ struct perf_sample *sample)
+{
+ struct s390_cpumsf_queue *sfq;
+ struct auxtrace_queue *q;
+ int rc = 0;
+
+ if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
+ return rc;
+
+ q = &sf->queues.queue_array[sample->cpu];
+ sfq = q->priv;
+ if (!sfq) /* Queue not yet allocated */
+ return rc;
+
+ if (!sfq->logfile_ctr) {
+ char *name;
+
+ rc = (sf->logdir)
+ ? asprintf(&name, "%s/aux.ctr.%02x",
+ sf->logdir, sample->cpu)
+ : asprintf(&name, "aux.ctr.%02x", sample->cpu);
+ if (rc > 0)
+ sfq->logfile_ctr = fopen(name, "w");
+ if (sfq->logfile_ctr == NULL) {
+ pr_err("Failed to open counter set log file %s, "
+ "continue...\n", name);
+ rc = 1;
+ }
+ free(name);
+ }
+
+ if (sfq->logfile_ctr) {
+ /* See comment above for -4 */
+ size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
+ sfq->logfile_ctr);
+ if (n != 1) {
+ pr_err("Failed to write counter set data\n");
+ rc = 1;
+ }
+ }
+ return rc;
+}
+
+/* Display s390 CPU measurement facility basic-sampling data entry
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
+static bool s390_cpumsf_basic_show(const char *color, size_t pos,
+ struct hws_basic_entry *basicp)
+{
+ struct hws_basic_entry *basic = basicp;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ struct hws_basic_entry local;
+ unsigned long long word = be64toh(*(unsigned long long *)basicp);
+
+ memset(&local, 0, sizeof(local));
+ local.def = be16toh(basicp->def);
+ local.prim_asn = word & 0xffff;
+ local.CL = word >> 30 & 0x3;
+ local.I = word >> 32 & 0x1;
+ local.AS = word >> 33 & 0x3;
+ local.P = word >> 35 & 0x1;
+ local.W = word >> 36 & 0x1;
+ local.T = word >> 37 & 0x1;
+ local.U = word >> 40 & 0xf;
+ local.ia = be64toh(basicp->ia);
+ local.gpp = be64toh(basicp->gpp);
+ local.hpp = be64toh(basicp->hpp);
+ basic = &local;
+#endif
+ if (basic->def != 1) {
+ pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
+ return false;
+ }
+ color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
+ " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
+ "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
+ pos, basic->def, basic->U,
+ basic->T ? 'T' : ' ',
+ basic->W ? 'W' : ' ',
+ basic->P ? 'P' : ' ',
+ basic->I ? 'I' : ' ',
+ basic->AS, basic->prim_asn, basic->ia, basic->CL,
+ basic->hpp, basic->gpp);
+ return true;
+}
+
+/* Display s390 CPU measurement facility diagnostic-sampling data entry.
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
+static bool s390_cpumsf_diag_show(const char *color, size_t pos,
+ struct hws_diag_entry *diagp)
+{
+ struct hws_diag_entry *diag = diagp;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ struct hws_diag_entry local;
+ unsigned long long word = be64toh(*(unsigned long long *)diagp);
+
+ local.def = be16toh(diagp->def);
+ local.I = word >> 32 & 0x1;
+ diag = &local;
+#endif
+ if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
+ pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
+ return false;
+ }
+ color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
+ pos, diag->def, diag->I ? 'I' : ' ');
+ return true;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
+ int idx)
+{
+ /* te->t set: TOD in STCKE format, bytes 8-15
+ * to->t not set: TOD in STCK format, bytes 0-7
+ */
+ unsigned long long ts;
+
+ memcpy(&ts, &te->timestamp[idx], sizeof(ts));
+ return be64toh(ts);
+}
+
+/* Display s390 CPU measurement facility trailer entry */
+static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
+ struct hws_trailer_entry *te)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ struct hws_trailer_entry local;
+ const unsigned long long flags = be64toh(te->flags);
+
+ memset(&local, 0, sizeof(local));
+ local.f = flags >> 63 & 0x1;
+ local.a = flags >> 62 & 0x1;
+ local.t = flags >> 61 & 0x1;
+ local.bsdes = be16toh((flags >> 16 & 0xffff));
+ local.dsdes = be16toh((flags & 0xffff));
+ memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
+ local.overflow = be64toh(te->overflow);
+ local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
+ local.progusage2 = be64toh(te->progusage2);
+ te = &local;
+#endif
+ if (te->bsdes != sizeof(struct hws_basic_entry)) {
+ pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
+ return false;
+ }
+ color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
+ " dsdes:%d Overflow:%lld Time:%#llx\n"
+ "\t\tC:%d TOD:%#lx\n",
+ pos,
+ te->f ? 'F' : ' ',
+ te->a ? 'A' : ' ',
+ te->t ? 'T' : ' ',
+ te->bsdes, te->dsdes, te->overflow,
+ trailer_timestamp(te, te->clock_base),
+ te->clock_base, te->progusage2);
+ return true;
+}
+
+/* Test a sample data block. It must be 4KB or a multiple thereof in size and
+ * 4KB page aligned. Each sample data page has a trailer entry at the
+ * end which contains the sample entry data sizes.
+ *
+ * Return true if the sample data block passes the checks and set the
+ * basic set entry size and diagnostic set entry size.
+ *
+ * Return false on failure.
+ *
+ * Note: Old hardware does not set the basic or diagnostic entry sizes
+ * in the trailer entry. Use the type number instead.
+ */
+static bool s390_cpumsf_validate(int machine_type,
+ unsigned char *buf, size_t len,
+ unsigned short *bsdes,
+ unsigned short *dsdes)
+{
+ struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
+ struct hws_trailer_entry *te;
+
+ *dsdes = *bsdes = 0;
+ if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
+ return false;
+ if (be16toh(basic->def) != 1) /* No basic set entry, must be first */
+ return false;
+ /* Check for trailer entry at end of SDB */
+ te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+ - sizeof(*te));
+ *bsdes = be16toh(te->bsdes);
+ *dsdes = be16toh(te->dsdes);
+ if (!te->bsdes && !te->dsdes) {
+ /* Very old hardware, use CPUID */
+ switch (machine_type) {
+ case 2097:
+ case 2098:
+ *dsdes = 64;
+ *bsdes = 32;
+ break;
+ case 2817:
+ case 2818:
+ *dsdes = 74;
+ *bsdes = 32;
+ break;
+ case 2827:
+ case 2828:
+ *dsdes = 85;
+ *bsdes = 32;
+ break;
+ case 2964:
+ case 2965:
+ *dsdes = 112;
+ *bsdes = 32;
+ break;
+ default:
+ /* Illegal trailer entry */
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Return true if there is room for another entry */
+static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
+{
+ size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
+
+ if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
+ return false;
+ return true;
+}
+
+/* Dump an auxiliary buffer. These buffers are multiple of
+ * 4KB SDB pages.
+ */
+static void s390_cpumsf_dump(struct s390_cpumsf *sf,
+ unsigned char *buf, size_t len)
+{
+ const char *color = PERF_COLOR_BLUE;
+ struct hws_basic_entry *basic;
+ struct hws_diag_entry *diag;
+ unsigned short bsdes, dsdes;
+ size_t pos = 0;
+
+ color_fprintf(stdout, color,
+ ". ... s390 AUX data: size %zu bytes\n",
+ len);
+
+ if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+ &dsdes)) {
+ pr_err("Invalid AUX trace data block size:%zu"
+ " (type:%d bsdes:%hd dsdes:%hd)\n",
+ len, sf->machine_type, bsdes, dsdes);
+ return;
+ }
+
+ /* s390 kernel always returns 4KB blocks fully occupied,
+ * no partially filled SDBs.
+ */
+ while (pos < len) {
+ /* Handle Basic entry */
+ basic = (struct hws_basic_entry *)(buf + pos);
+ if (s390_cpumsf_basic_show(color, pos, basic))
+ pos += bsdes;
+ else
+ return;
+
+ /* Handle Diagnostic entry */
+ diag = (struct hws_diag_entry *)(buf + pos);
+ if (s390_cpumsf_diag_show(color, pos, diag))
+ pos += dsdes;
+ else
+ return;
+
+ /* Check for trailer entry */
+ if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+ /* Show trailer entry */
+ struct hws_trailer_entry te;
+
+ pos = (pos + S390_CPUMSF_PAGESZ)
+ & ~(S390_CPUMSF_PAGESZ - 1);
+ pos -= sizeof(te);
+ memcpy(&te, buf + pos, sizeof(te));
+ /* Set descriptor sizes in case of old hardware
+ * where these values are not set.
+ */
+ te.bsdes = bsdes;
+ te.dsdes = dsdes;
+ if (s390_cpumsf_trailer_show(color, pos, &te))
+ pos += sizeof(te);
+ else
+ return;
+ }
+ }
+}
+
+static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ s390_cpumsf_dump(sf, buf, len);
+}
+
+#define S390_LPP_PID_MASK 0xffffffff
+
+static bool s390_cpumsf_make_event(size_t pos,
+ struct hws_basic_entry *basic,
+ struct s390_cpumsf_queue *sfq)
+{
+ struct perf_sample sample = {
+ .ip = basic->ia,
+ .pid = basic->hpp & S390_LPP_PID_MASK,
+ .tid = basic->hpp & S390_LPP_PID_MASK,
+ .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
+ .cpu = sfq->cpu,
+ .period = 1
+ };
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event));
+ if (basic->CL == 1) /* Native LPAR mode */
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ else if (basic->CL == 2) /* Guest kernel/user space */
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else if (basic->gpp || basic->prim_asn != 0xffff)
+ /* Use heuristics on old hardware */
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = sample.cpumode;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
+ pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
+ __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
+ sample.tid, sample.cpumode, sample.cpu);
+ if (perf_session__deliver_synth_event(sfq->sf->session, &event,
+ &sample)) {
+ pr_err("s390 Auxiliary Trace: failed to deliver event\n");
+ return false;
+ }
+ return true;
+}
+
+static unsigned long long get_trailer_time(const unsigned char *buf)
+{
+ struct hws_trailer_entry *te;
+ unsigned long long aux_time, progusage2;
+ bool clock_base;
+
+ te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+ - sizeof(*te));
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
+ progusage2 = be64toh(te->progusage[1]);
+#else
+ clock_base = te->clock_base;
+ progusage2 = te->progusage2;
+#endif
+ if (!clock_base) /* TOD_CLOCK_BASE value missing */
+ return 0;
+
+ /* Correct calculation to convert time stamp in trailer entry to
+ * nano seconds (taken from arch/s390 function tod_to_ns()).
+ * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
+ */
+ aux_time = trailer_timestamp(te, clock_base) - progusage2;
+ aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
+ return aux_time;
+}
+
+/* Process the data samples of a single queue. The first parameter is a
+ * pointer to the queue, the second parameter is the time stamp. This
+ * is the time stamp:
+ * - of the event that triggered this processing.
+ * - or the time stamp when the last processing of this queue stopped.
+ * In this case it stopped at a 4KB page boundary and record the
+ * position on where to continue processing on the next invocation
+ * (see buffer->use_data and buffer->use_size).
+ *
+ * When this function returns the second parameter is updated to
+ * reflect the time stamp of the last processed auxiliary data entry
+ * (taken from the trailer entry of that page). The caller uses this
+ * returned time stamp to record the last processed entry in this
+ * queue.
+ *
+ * The function returns:
+ * 0: Processing successful. The second parameter returns the
+ * time stamp from the trailer entry until which position
+ * processing took place. Subsequent calls resume from this
+ * position.
+ * <0: An error occurred during processing. The second parameter
+ * returns the maximum time stamp.
+ * >0: Done on this queue. The second parameter returns the
+ * maximum time stamp.
+ */
+static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
+{
+ struct s390_cpumsf *sf = sfq->sf;
+ unsigned char *buf = sfq->buffer->use_data;
+ size_t len = sfq->buffer->use_size;
+ struct hws_basic_entry *basic;
+ unsigned short bsdes, dsdes;
+ size_t pos = 0;
+ int err = 1;
+ u64 aux_ts;
+
+ if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+ &dsdes)) {
+ *ts = ~0ULL;
+ return -1;
+ }
+
+ /* Get trailer entry time stamp and check if entries in
+ * this auxiliary page are ready for processing. If the
+ * time stamp of the first entry is too high, whole buffer
+ * can be skipped. In this case return time stamp.
+ */
+ aux_ts = get_trailer_time(buf);
+ if (!aux_ts) {
+ pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
+ (s64)sfq->buffer->data_offset);
+ aux_ts = ~0ULL;
+ goto out;
+ }
+ if (aux_ts > *ts) {
+ *ts = aux_ts;
+ return 0;
+ }
+
+ while (pos < len) {
+ /* Handle Basic entry */
+ basic = (struct hws_basic_entry *)(buf + pos);
+ if (s390_cpumsf_make_event(pos, basic, sfq))
+ pos += bsdes;
+ else {
+ err = -EBADF;
+ goto out;
+ }
+
+ pos += dsdes; /* Skip diagnostic entry */
+
+ /* Check for trailer entry */
+ if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+ pos = (pos + S390_CPUMSF_PAGESZ)
+ & ~(S390_CPUMSF_PAGESZ - 1);
+ /* Check existence of next page */
+ if (pos >= len)
+ break;
+ aux_ts = get_trailer_time(buf + pos);
+ if (!aux_ts) {
+ aux_ts = ~0ULL;
+ goto out;
+ }
+ if (aux_ts > *ts) {
+ *ts = aux_ts;
+ sfq->buffer->use_data += pos;
+ sfq->buffer->use_size -= pos;
+ return 0;
+ }
+ }
+ }
+out:
+ *ts = aux_ts;
+ sfq->buffer->use_size = 0;
+ sfq->buffer->use_data = NULL;
+ return err; /* Buffer completely scanned or error */
+}
+
+/* Run the s390 auxiliary trace decoder.
+ * Select the queue buffer to operate on, the caller already selected
+ * the proper queue, depending on second parameter 'ts'.
+ * This is the time stamp until which the auxiliary entries should
+ * be processed. This value is updated by called functions and
+ * returned to the caller.
+ *
+ * Resume processing in the current buffer. If there is no buffer
+ * get a new buffer from the queue and setup start position for
+ * processing.
+ * When a buffer is completely processed remove it from the queue
+ * before returning.
+ *
+ * This function returns
+ * 1: When the queue is empty. Second parameter will be set to
+ * maximum time stamp.
+ * 0: Normal processing done.
+ * <0: Error during queue buffer setup. This causes the caller
+ * to stop processing completely.
+ */
+static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
+ u64 *ts)
+{
+
+ struct auxtrace_buffer *buffer;
+ struct auxtrace_queue *queue;
+ int err;
+
+ queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
+
+ /* Get buffer and last position in buffer to resume
+ * decoding the auxiliary entries. One buffer might be large
+ * and decoding might stop in between. This depends on the time
+ * stamp of the trailer entry in each page of the auxiliary
+ * data and the time stamp of the event triggering the decoding.
+ */
+ if (sfq->buffer == NULL) {
+ sfq->buffer = buffer = auxtrace_buffer__next(queue,
+ sfq->buffer);
+ if (!buffer) {
+ *ts = ~0ULL;
+ return 1; /* Processing done on this queue */
+ }
+ /* Start with a new buffer on this queue */
+ if (buffer->data) {
+ buffer->use_size = buffer->size;
+ buffer->use_data = buffer->data;
+ }
+ if (sfq->logfile) { /* Write into log file */
+ size_t rc = fwrite(buffer->data, buffer->size, 1,
+ sfq->logfile);
+ if (rc != 1)
+ pr_err("Failed to write auxiliary data\n");
+ }
+ } else
+ buffer = sfq->buffer;
+
+ if (!buffer->data) {
+ int fd = perf_data__fd(sfq->sf->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ buffer->use_size = buffer->size;
+ buffer->use_data = buffer->data;
+
+ if (sfq->logfile) { /* Write into log file */
+ size_t rc = fwrite(buffer->data, buffer->size, 1,
+ sfq->logfile);
+ if (rc != 1)
+ pr_err("Failed to write auxiliary data\n");
+ }
+ }
+ pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
+ __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
+ buffer->size, buffer->use_size);
+ err = s390_cpumsf_samples(sfq, ts);
+
+ /* If non-zero, there is either an error (err < 0) or the buffer is
+ * completely done (err > 0). The error is unrecoverable, usually
+ * some descriptors could not be read successfully, so continue with
+ * the next buffer.
+ * In both cases the parameter 'ts' has been updated.
+ */
+ if (err) {
+ sfq->buffer = NULL;
+ list_del_init(&buffer->list);
+ auxtrace_buffer__free(buffer);
+ if (err > 0) /* Buffer done, no error */
+ err = 0;
+ }
+ return err;
+}
+
+static struct s390_cpumsf_queue *
+s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
+{
+ struct s390_cpumsf_queue *sfq;
+
+ sfq = zalloc(sizeof(struct s390_cpumsf_queue));
+ if (sfq == NULL)
+ return NULL;
+
+ sfq->sf = sf;
+ sfq->queue_nr = queue_nr;
+ sfq->cpu = -1;
+ if (sf->use_logfile) {
+ char *name;
+ int rc;
+
+ rc = (sf->logdir)
+ ? asprintf(&name, "%s/aux.smp.%02x",
+ sf->logdir, queue_nr)
+ : asprintf(&name, "aux.smp.%02x", queue_nr);
+ if (rc > 0)
+ sfq->logfile = fopen(name, "w");
+ if (sfq->logfile == NULL) {
+ pr_err("Failed to open auxiliary log file %s,"
+ "continue...\n", name);
+ sf->use_logfile = false;
+ }
+ free(name);
+ }
+ return sfq;
+}
+
+static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr, u64 ts)
+{
+ struct s390_cpumsf_queue *sfq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (sfq == NULL) {
+ sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
+ if (!sfq)
+ return -ENOMEM;
+ queue->priv = sfq;
+
+ if (queue->cpu != -1)
+ sfq->cpu = queue->cpu;
+ }
+ return auxtrace_heap__add(&sf->heap, queue_nr, ts);
+}
+
+static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
+{
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < sf->queues.nr_queues; i++) {
+ ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
+ i, ts);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
+{
+ if (!sf->queues.new_data)
+ return 0;
+
+ sf->queues.new_data = false;
+ return s390_cpumsf_setup_queues(sf, ts);
+}
+
+static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct s390_cpumsf_queue *sfq;
+
+ if (!sf->heap.heap_cnt)
+ return 0;
+
+ if (sf->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = sf->heap.heap_array[0].queue_nr;
+ queue = &sf->queues.queue_array[queue_nr];
+ sfq = queue->priv;
+
+ auxtrace_heap__pop(&sf->heap);
+ if (sf->heap.heap_cnt) {
+ ts = sf->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ ret = s390_cpumsf_run_decoder(sfq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&sf->heap, queue_nr, ts);
+ return ret;
+ }
+ if (!ret) {
+ ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
+ pid_t pid, pid_t tid, u64 ip, u64 timestamp)
+{
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+ union perf_event event;
+ int err;
+
+ strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ code, cpu, pid, tid, ip, msg, timestamp);
+
+ err = perf_session__deliver_synth_event(sf->session, &event, NULL);
+ if (err)
+ pr_err("s390 Auxiliary Trace: failed to deliver error event,"
+ "error %d\n", err);
+ return err;
+}
+
+static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
+{
+ return s390_cpumsf_synth_error(sf, 1, sample->cpu,
+ sample->pid, sample->tid, 0,
+ sample->time);
+}
+
+static int
+s390_cpumsf_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+ u64 timestamp = sample->time;
+ struct evsel *ev_bc000;
+
+ int err = 0;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("s390 Auxiliary Trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (event->header.type == PERF_RECORD_SAMPLE &&
+ sample->raw_size) {
+ /* Handle event with raw data */
+ ev_bc000 = evlist__event2evsel(session->evlist, event);
+ if (ev_bc000 &&
+ ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
+ err = s390_cpumcf_dumpctr(sf, sample);
+ return err;
+ }
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+ return s390_cpumsf_lost(sf, sample);
+
+ if (timestamp) {
+ err = s390_cpumsf_update_queues(sf, timestamp);
+ if (!err)
+ err = s390_cpumsf_process_queues(sf, timestamp);
+ }
+ return err;
+}
+
+struct s390_cpumsf_synth {
+ struct perf_tool cpumsf_tool;
+ struct perf_session *session;
+};
+
+static int
+s390_cpumsf_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+
+ int fd = perf_data__fd(session->data);
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int err;
+
+ if (sf->data_queued)
+ return 0;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&sf->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here after copying piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ s390_cpumsf_dump_event(sf, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ return 0;
+}
+
+static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static void s390_cpumsf_free_queues(struct perf_session *session)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+ struct auxtrace_queues *queues = &sf->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
+ queues->queue_array[i].priv;
+
+ if (sfq != NULL) {
+ if (sfq->logfile) {
+ fclose(sfq->logfile);
+ sfq->logfile = NULL;
+ }
+ if (sfq->logfile_ctr) {
+ fclose(sfq->logfile_ctr);
+ sfq->logfile_ctr = NULL;
+ }
+ }
+ zfree(&queues->queue_array[i].priv);
+ }
+ auxtrace_queues__free(queues);
+}
+
+static void s390_cpumsf_free(struct perf_session *session)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+
+ auxtrace_heap__free(&sf->heap);
+ s390_cpumsf_free_queues(session);
+ session->auxtrace = NULL;
+ zfree(&sf->logdir);
+ free(sf);
+}
+
+static bool
+s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel)
+{
+ return evsel->core.attr.type == PERF_TYPE_RAW &&
+ evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
+}
+
+static int s390_cpumsf_get_type(const char *cpuid)
+{
+ int ret, family = 0;
+
+ ret = sscanf(cpuid, "%*[^,],%u", &family);
+ return (ret == 1) ? family : 0;
+}
+
+/* Check itrace options set on perf report command.
+ * Return true, if none are set or all options specified can be
+ * handled on s390 (currently only option 'd' for logging.
+ * Return false otherwise.
+ */
+static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
+{
+ bool ison = false;
+
+ if (!itops || !itops->set)
+ return true;
+ ison = itops->inject || itops->instructions || itops->branches ||
+ itops->transactions || itops->ptwrites ||
+ itops->pwr_events || itops->errors ||
+ itops->dont_decode || itops->calls || itops->returns ||
+ itops->callchain || itops->thread_stack ||
+ itops->last_branch || itops->add_callchain ||
+ itops->add_last_branch;
+ if (!ison)
+ return true;
+ pr_err("Unsupported --itrace options specified\n");
+ return false;
+}
+
+/* Check for AUXTRACE dump directory if it is needed.
+ * On failure print an error message but continue.
+ * Return 0 on wrong keyword in config file and 1 otherwise.
+ */
+static int s390_cpumsf__config(const char *var, const char *value, void *cb)
+{
+ struct s390_cpumsf *sf = cb;
+ struct stat stbuf;
+ int rc;
+
+ if (strcmp(var, "auxtrace.dumpdir"))
+ return 0;
+ sf->logdir = strdup(value);
+ if (sf->logdir == NULL) {
+ pr_err("Failed to find auxtrace log directory %s,"
+ " continue with current directory...\n", value);
+ return 1;
+ }
+ rc = stat(sf->logdir, &stbuf);
+ if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
+ pr_err("Missing auxtrace log directory %s,"
+ " continue with current directory...\n", value);
+ zfree(&sf->logdir);
+ }
+ return 1;
+}
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ struct s390_cpumsf *sf;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info))
+ return -EINVAL;
+
+ sf = zalloc(sizeof(struct s390_cpumsf));
+ if (sf == NULL)
+ return -ENOMEM;
+
+ if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
+ err = -EINVAL;
+ goto err_free;
+ }
+ sf->use_logfile = session->itrace_synth_opts->log;
+ if (sf->use_logfile)
+ perf_config(s390_cpumsf__config, sf);
+
+ err = auxtrace_queues__init(&sf->queues);
+ if (err)
+ goto err_free;
+
+ sf->session = session;
+ sf->machine = &session->machines.host; /* No kvm support */
+ sf->auxtrace_type = auxtrace_info->type;
+ sf->pmu_type = PERF_TYPE_RAW;
+ sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
+
+ sf->auxtrace.process_event = s390_cpumsf_process_event;
+ sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
+ sf->auxtrace.flush_events = s390_cpumsf_flush;
+ sf->auxtrace.free_events = s390_cpumsf_free_events;
+ sf->auxtrace.free = s390_cpumsf_free;
+ sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
+ session->auxtrace = &sf->auxtrace;
+
+ if (dump_trace)
+ return 0;
+
+ err = auxtrace_queues__process_index(&sf->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (sf->queues.populated)
+ sf->data_queued = true;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&sf->queues);
+ session->auxtrace = NULL;
+err_free:
+ zfree(&sf->logdir);
+ free(sf);
+ return err;
+}
diff --git a/tools/perf/util/s390-cpumsf.h b/tools/perf/util/s390-cpumsf.h
new file mode 100644
index 000000000..fb64d1005
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+
+#ifndef INCLUDE__PERF_S390_CPUMSF_H
+#define INCLUDE__PERF_S390_CPUMSF_H
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *
+s390_cpumsf_recording_init(int *err, struct perf_pmu *s390_cpumsf_pmu);
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+#endif
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
new file mode 100644
index 000000000..115b16edb
--- /dev/null
+++ b/tools/perf/util/s390-sample-raw.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2019
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Architecture specific trace_event function. Save event's bc000 raw data
+ * to file. File name is aux.ctr.## where ## stands for the CPU number the
+ * sample was taken from.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <sys/stat.h>
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+
+#include "debug.h"
+#include "session.h"
+#include "evlist.h"
+#include "color.h"
+#include "sample-raw.h"
+#include "s390-cpumcf-kernel.h"
+#include "util/pmu.h"
+#include "util/sample.h"
+
+static size_t ctrset_size(struct cf_ctrset_entry *set)
+{
+ return sizeof(*set) + set->ctr * sizeof(u64);
+}
+
+static bool ctrset_valid(struct cf_ctrset_entry *set)
+{
+ return set->def == S390_CPUMCF_DIAG_DEF;
+}
+
+/* CPU Measurement Counter Facility raw data is a byte stream. It is 8 byte
+ * aligned and might have trailing padding bytes.
+ * Display the raw data on screen.
+ */
+static bool s390_cpumcfdg_testctr(struct perf_sample *sample)
+{
+ size_t len = sample->raw_size, offset = 0;
+ unsigned char *buf = sample->raw_data;
+ struct cf_trailer_entry *te;
+ struct cf_ctrset_entry *cep, ce;
+
+ if (!len)
+ return false;
+ while (offset < len) {
+ cep = (struct cf_ctrset_entry *)(buf + offset);
+ ce.def = be16_to_cpu(cep->def);
+ ce.set = be16_to_cpu(cep->set);
+ ce.ctr = be16_to_cpu(cep->ctr);
+ ce.res1 = be16_to_cpu(cep->res1);
+
+ if (!ctrset_valid(&ce) || offset + ctrset_size(&ce) > len) {
+ /* Raw data for counter sets are always multiple of 8
+ * bytes. Prepending a 4 bytes size field to the
+ * raw data block in the sample causes the perf tool
+ * to append 4 padding bytes to make the raw data part
+ * of the sample a multiple of eight bytes again.
+ *
+ * If the last entry (trailer) is 4 bytes off the raw
+ * area data end, all is good.
+ */
+ if (len - offset - sizeof(*te) == 4)
+ break;
+ pr_err("Invalid counter set entry at %zd\n", offset);
+ return false;
+ }
+ offset += ctrset_size(&ce);
+ }
+ return true;
+}
+
+/* Dump event bc000 on screen, already tested on correctness. */
+static void s390_cpumcfdg_dumptrail(const char *color, size_t offset,
+ struct cf_trailer_entry *tep)
+{
+ struct cf_trailer_entry te;
+
+ te.flags = be64_to_cpu(tep->flags);
+ te.cfvn = be16_to_cpu(tep->cfvn);
+ te.csvn = be16_to_cpu(tep->csvn);
+ te.cpu_speed = be32_to_cpu(tep->cpu_speed);
+ te.timestamp = be64_to_cpu(tep->timestamp);
+ te.progusage1 = be64_to_cpu(tep->progusage1);
+ te.progusage2 = be64_to_cpu(tep->progusage2);
+ te.progusage3 = be64_to_cpu(tep->progusage3);
+ te.tod_base = be64_to_cpu(tep->tod_base);
+ te.mach_type = be16_to_cpu(tep->mach_type);
+ te.res1 = be16_to_cpu(tep->res1);
+ te.res2 = be32_to_cpu(tep->res2);
+
+ color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c"
+ " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n",
+ offset, te.clock_base ? 'T' : ' ',
+ te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ',
+ te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ',
+ te.cfvn, te.csvn, te.cpu_speed, te.timestamp);
+ color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx"
+ " Type:%x\n\n",
+ te.progusage1, te.progusage2, te.progusage3,
+ te.tod_base, te.mach_type);
+}
+
+/* Return starting number of a counter set */
+static int get_counterset_start(int setnr)
+{
+ switch (setnr) {
+ case CPUMF_CTR_SET_BASIC: /* Basic counter set */
+ return 0;
+ case CPUMF_CTR_SET_USER: /* Problem state counter set */
+ return 32;
+ case CPUMF_CTR_SET_CRYPTO: /* Crypto counter set */
+ return 64;
+ case CPUMF_CTR_SET_EXT: /* Extended counter set */
+ return 128;
+ case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */
+ return 448;
+ default:
+ return -1;
+ }
+}
+
+struct get_counter_name_data {
+ int wanted;
+ char *result;
+};
+
+static int get_counter_name_callback(void *vdata, struct pmu_event_info *info)
+{
+ struct get_counter_name_data *data = vdata;
+ int rc, event_nr;
+ const char *event_str;
+
+ if (info->str == NULL)
+ return 0;
+
+ event_str = strstr(info->str, "event=");
+ if (!event_str)
+ return 0;
+
+ rc = sscanf(event_str, "event=%x", &event_nr);
+ if (rc == 1 && event_nr == data->wanted) {
+ data->result = strdup(info->name);
+ return 1; /* Terminate the search. */
+ }
+ return 0;
+}
+
+/* Scan the PMU and extract the logical name of a counter from the event. Input
+ * is the counter set and counter number with in the set. Construct the event
+ * number and use this as key. If they match return the name of this counter.
+ * If no match is found a NULL pointer is returned.
+ */
+static char *get_counter_name(int set, int nr, struct perf_pmu *pmu)
+{
+ struct get_counter_name_data data = {
+ .wanted = get_counterset_start(set) + nr,
+ .result = NULL,
+ };
+
+ if (!pmu)
+ return NULL;
+
+ perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ true,
+ &data, get_counter_name_callback);
+ return data.result;
+}
+
+static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample)
+{
+ size_t i, len = sample->raw_size, offset = 0;
+ unsigned char *buf = sample->raw_data;
+ const char *color = PERF_COLOR_BLUE;
+ struct cf_ctrset_entry *cep, ce;
+ u64 *p;
+
+ while (offset < len) {
+ cep = (struct cf_ctrset_entry *)(buf + offset);
+
+ ce.def = be16_to_cpu(cep->def);
+ ce.set = be16_to_cpu(cep->set);
+ ce.ctr = be16_to_cpu(cep->ctr);
+ ce.res1 = be16_to_cpu(cep->res1);
+
+ if (!ctrset_valid(&ce)) { /* Print trailer */
+ s390_cpumcfdg_dumptrail(color, offset,
+ (struct cf_trailer_entry *)cep);
+ return;
+ }
+
+ color_fprintf(stdout, color, " [%#08zx] Counterset:%d"
+ " Counters:%d\n", offset, ce.set, ce.ctr);
+ for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) {
+ char *ev_name = get_counter_name(ce.set, i, pmu);
+
+ color_fprintf(stdout, color,
+ "\tCounter:%03d %s Value:%#018lx\n", i,
+ ev_name ?: "<unknown>", be64_to_cpu(*p));
+ free(ev_name);
+ }
+ offset += ctrset_size(&ce);
+ }
+}
+
+/* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events
+ * and if the event was triggered by a counter set diagnostic event display
+ * its raw data.
+ * The function is only invoked when the dump flag -D is set.
+ */
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
+{
+ struct evsel *evsel;
+
+ if (event->header.type != PERF_RECORD_SAMPLE)
+ return;
+
+ evsel = evlist__event2evsel(evlist, event);
+ if (evsel == NULL ||
+ evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG)
+ return;
+
+ /* Display raw data on screen */
+ if (!s390_cpumcfdg_testctr(sample)) {
+ pr_err("Invalid counter set data encountered\n");
+ return;
+ }
+ s390_cpumcfdg_dump(evsel->pmu, sample);
+}
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c
new file mode 100644
index 000000000..f3f6bd9d2
--- /dev/null
+++ b/tools/perf/util/sample-raw.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <string.h>
+#include <linux/string.h>
+#include "evlist.h"
+#include "env.h"
+#include "header.h"
+#include "sample-raw.h"
+
+/*
+ * Check platform the perf data file was created on and perform platform
+ * specific interpretation.
+ */
+void evlist__init_trace_event_sample_raw(struct evlist *evlist)
+{
+ const char *arch_pf = perf_env__arch(evlist->env);
+ const char *cpuid = perf_env__cpuid(evlist->env);
+
+ if (arch_pf && !strcmp("s390", arch_pf))
+ evlist->trace_event_sample_raw = evlist__s390_sample_raw;
+ else if (arch_pf && !strcmp("x86", arch_pf) &&
+ cpuid && strstarts(cpuid, "AuthenticAMD") &&
+ evlist__has_amd_ibs(evlist)) {
+ evlist->trace_event_sample_raw = evlist__amd_sample_raw;
+ }
+}
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h
new file mode 100644
index 000000000..ea01c5811
--- /dev/null
+++ b/tools/perf/util/sample-raw.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SAMPLE_RAW_H
+#define __SAMPLE_RAW_H 1
+
+struct evlist;
+union perf_event;
+struct perf_sample;
+
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
+ struct perf_sample *sample);
+bool evlist__has_amd_ibs(struct evlist *evlist);
+void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
+ struct perf_sample *sample);
+void evlist__init_trace_event_sample_raw(struct evlist *evlist);
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
new file mode 100644
index 000000000..c92ad0f51
--- /dev/null
+++ b/tools/perf/util/sample.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SAMPLE_H
+#define __PERF_SAMPLE_H
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+/* number of register is bound by the number of bits in regs_dump::mask (64) */
+#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
+
+struct regs_dump {
+ u64 abi;
+ u64 mask;
+ u64 *regs;
+
+ /* Cached values/mask filled by first register access. */
+ u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
+ u64 cache_mask;
+};
+
+struct stack_dump {
+ u16 offset;
+ u64 size;
+ char *data;
+};
+
+struct sample_read_value {
+ u64 value;
+ u64 id; /* only if PERF_FORMAT_ID */
+ u64 lost; /* only if PERF_FORMAT_LOST */
+};
+
+struct sample_read {
+ u64 time_enabled;
+ u64 time_running;
+ union {
+ struct {
+ u64 nr;
+ struct sample_read_value *values;
+ } group;
+ struct sample_read_value one;
+ };
+};
+
+static inline size_t sample_read_value_size(u64 read_format)
+{
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_LOST)
+ return sizeof(struct sample_read_value);
+ else
+ return offsetof(struct sample_read_value, lost);
+}
+
+static inline struct sample_read_value *next_sample_read_value(struct sample_read_value *v, u64 read_format)
+{
+ return (void *)v + sample_read_value_size(read_format);
+}
+
+#define sample_read_group__for_each(v, nr, rf) \
+ for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++)
+
+#define MAX_INSN 16
+
+struct aux_sample {
+ u64 size;
+ void *data;
+};
+
+struct simd_flags {
+ u64 arch:1, /* architecture (isa) */
+ pred:2; /* predication */
+};
+
+/* simd architecture flags */
+#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
+
+/* simd predicate flags */
+#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
+#define SIMD_OP_FLAGS_PRED_EMPTY 0x02 /* empty predicate */
+
+struct perf_sample {
+ u64 ip;
+ u32 pid, tid;
+ u64 time;
+ u64 addr;
+ u64 id;
+ u64 stream_id;
+ u64 period;
+ u64 weight;
+ u64 transaction;
+ u64 insn_cnt;
+ u64 cyc_cnt;
+ u32 cpu;
+ u32 raw_size;
+ u64 data_src;
+ u64 phys_addr;
+ u64 data_page_size;
+ u64 code_page_size;
+ u64 cgroup;
+ u32 flags;
+ u32 machine_pid;
+ u32 vcpu;
+ u16 insn_len;
+ u8 cpumode;
+ u16 misc;
+ u16 ins_lat;
+ union {
+ u16 p_stage_cyc;
+ u16 retire_lat;
+ };
+ bool no_hw_idx; /* No hw_idx collected in branch_stack */
+ char insn[MAX_INSN];
+ void *raw_data;
+ struct ip_callchain *callchain;
+ struct branch_stack *branch_stack;
+ struct regs_dump user_regs;
+ struct regs_dump intr_regs;
+ struct stack_dump user_stack;
+ struct sample_read read;
+ struct aux_sample aux_sample;
+ struct simd_flags simd_flags;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+ return sample->raw_data - 4;
+}
+
+#endif /* __PERF_SAMPLE_H */
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
new file mode 100644
index 000000000..586b94e90
--- /dev/null
+++ b/tools/perf/util/scripting-engines/Build
@@ -0,0 +1,9 @@
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+ perf-$(CONFIG_LIBPERL) += trace-event-perl.o
+endif
+perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+
+CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
+
+# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum -Wno-declaration-after-statement
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
new file mode 100644
index 000000000..603091317
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -0,0 +1,769 @@
+/*
+ * trace-event-perl. Feed perf script events to an embedded Perl interpreter.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/time64.h>
+#include <traceevent/event-parse.h>
+
+#include <stdbool.h>
+/* perl needs the following define, right after including stdbool.h */
+#define HAS_BOOL
+#include <EXTERN.h>
+#include <perl.h>
+
+#include "../callchain.h"
+#include "../dso.h"
+#include "../machine.h"
+#include "../map.h"
+#include "../symbol.h"
+#include "../thread.h"
+#include "../event.h"
+#include "../trace-event.h"
+#include "../evsel.h"
+#include "../debug.h"
+
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
+typedef PerlInterpreter * INTERP;
+
+void xs_init(pTHX);
+
+void xs_init(pTHX)
+{
+ const char *file = __FILE__;
+ dXSUB_SYS;
+
+ newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context,
+ file);
+ newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file);
+}
+
+INTERP my_perl;
+
+#define TRACE_EVENT_TYPE_MAX \
+ ((1 << (sizeof(unsigned short) * 8)) - 1)
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static void define_symbolic_value(const char *ev_name,
+ const char *field_name,
+ const char *field_value,
+ const char *field_str)
+{
+ unsigned long long value;
+ dSP;
+
+ value = eval_flag(field_value);
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+ XPUSHs(sv_2mortal(newSVuv(value)));
+ XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_symbolic_value", 0))
+ call_pv("main::define_symbolic_value", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_symbolic_values(struct tep_print_flag_sym *field,
+ const char *ev_name,
+ const char *field_name)
+{
+ define_symbolic_value(ev_name, field_name, field->value, field->str);
+ if (field->next)
+ define_symbolic_values(field->next, ev_name, field_name);
+}
+
+static void define_symbolic_field(const char *ev_name,
+ const char *field_name)
+{
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_symbolic_field", 0))
+ call_pv("main::define_symbolic_field", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_flag_value(const char *ev_name,
+ const char *field_name,
+ const char *field_value,
+ const char *field_str)
+{
+ unsigned long long value;
+ dSP;
+
+ value = eval_flag(field_value);
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+ XPUSHs(sv_2mortal(newSVuv(value)));
+ XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_flag_value", 0))
+ call_pv("main::define_flag_value", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_flag_values(struct tep_print_flag_sym *field,
+ const char *ev_name,
+ const char *field_name)
+{
+ define_flag_value(ev_name, field_name, field->value, field->str);
+ if (field->next)
+ define_flag_values(field->next, ev_name, field_name);
+}
+
+static void define_flag_field(const char *ev_name,
+ const char *field_name,
+ const char *delim)
+{
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(delim, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_flag_field", 0))
+ call_pv("main::define_flag_field", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_event_symbols(struct tep_event *event,
+ const char *ev_name,
+ struct tep_print_arg *args)
+{
+ if (args == NULL)
+ return;
+
+ switch (args->type) {
+ case TEP_PRINT_NULL:
+ break;
+ case TEP_PRINT_ATOM:
+ define_flag_value(ev_name, cur_field_name, "0",
+ args->atom.atom);
+ zero_flag_atom = 0;
+ break;
+ case TEP_PRINT_FIELD:
+ free(cur_field_name);
+ cur_field_name = strdup(args->field.name);
+ break;
+ case TEP_PRINT_FLAGS:
+ define_event_symbols(event, ev_name, args->flags.field);
+ define_flag_field(ev_name, cur_field_name, args->flags.delim);
+ define_flag_values(args->flags.flags, ev_name, cur_field_name);
+ break;
+ case TEP_PRINT_SYMBOL:
+ define_event_symbols(event, ev_name, args->symbol.field);
+ define_symbolic_field(ev_name, cur_field_name);
+ define_symbolic_values(args->symbol.symbols, ev_name,
+ cur_field_name);
+ break;
+ case TEP_PRINT_HEX:
+ case TEP_PRINT_HEX_STR:
+ define_event_symbols(event, ev_name, args->hex.field);
+ define_event_symbols(event, ev_name, args->hex.size);
+ break;
+ case TEP_PRINT_INT_ARRAY:
+ define_event_symbols(event, ev_name, args->int_array.field);
+ define_event_symbols(event, ev_name, args->int_array.count);
+ define_event_symbols(event, ev_name, args->int_array.el_size);
+ break;
+ case TEP_PRINT_BSTRING:
+ case TEP_PRINT_DYNAMIC_ARRAY:
+ case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+ case TEP_PRINT_STRING:
+ case TEP_PRINT_BITMASK:
+ break;
+ case TEP_PRINT_TYPE:
+ define_event_symbols(event, ev_name, args->typecast.item);
+ break;
+ case TEP_PRINT_OP:
+ if (strcmp(args->op.op, ":") == 0)
+ zero_flag_atom = 1;
+ define_event_symbols(event, ev_name, args->op.left);
+ define_event_symbols(event, ev_name, args->op.right);
+ break;
+ case TEP_PRINT_FUNC:
+ default:
+ pr_err("Unsupported print arg type\n");
+ /* we should warn... */
+ return;
+ }
+
+ if (args->next)
+ define_event_symbols(event, ev_name, args->next);
+}
+
+static SV *perl_process_callchain(struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al)
+{
+ struct callchain_cursor *cursor;
+ AV *list;
+
+ list = newAV();
+ if (!list)
+ goto exit;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ goto exit;
+
+ cursor = get_tls_callchain_cursor();
+
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
+ sample, NULL, NULL, scripting_max_stack) != 0) {
+ pr_err("Failed to resolve callchain. Skipping\n");
+ goto exit;
+ }
+ callchain_cursor_commit(cursor);
+
+
+ while (1) {
+ HV *elem;
+ struct callchain_cursor_node *node;
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ elem = newHV();
+ if (!elem)
+ goto exit;
+
+ if (!hv_stores(elem, "ip", newSVuv(node->ip))) {
+ hv_undef(elem);
+ goto exit;
+ }
+
+ if (node->ms.sym) {
+ HV *sym = newHV();
+ if (!sym) {
+ hv_undef(elem);
+ goto exit;
+ }
+ if (!hv_stores(sym, "start", newSVuv(node->ms.sym->start)) ||
+ !hv_stores(sym, "end", newSVuv(node->ms.sym->end)) ||
+ !hv_stores(sym, "binding", newSVuv(node->ms.sym->binding)) ||
+ !hv_stores(sym, "name", newSVpvn(node->ms.sym->name,
+ node->ms.sym->namelen)) ||
+ !hv_stores(elem, "sym", newRV_noinc((SV*)sym))) {
+ hv_undef(sym);
+ hv_undef(elem);
+ goto exit;
+ }
+ }
+
+ if (node->ms.map) {
+ struct map *map = node->ms.map;
+ struct dso *dso = map ? map__dso(map) : NULL;
+ const char *dsoname = "[unknown]";
+
+ if (dso) {
+ if (symbol_conf.show_kernel_path && dso->long_name)
+ dsoname = dso->long_name;
+ else
+ dsoname = dso->name;
+ }
+ if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+ hv_undef(elem);
+ goto exit;
+ }
+ }
+
+ callchain_cursor_advance(cursor);
+ av_push(list, newRV_noinc((SV*)elem));
+ }
+
+exit:
+ return newRV_noinc((SV*)list);
+}
+
+static void perl_process_tracepoint(struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al)
+{
+ struct thread *thread = al->thread;
+ struct tep_event *event = evsel->tp_format;
+ struct tep_format_field *field;
+ static char handler[256];
+ unsigned long long val;
+ unsigned long s, ns;
+ int pid;
+ int cpu = sample->cpu;
+ void *data = sample->raw_data;
+ unsigned long long nsecs = sample->time;
+ const char *comm = thread__comm_str(thread);
+ DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+ bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX);
+ dSP;
+
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ return;
+
+ if (!event) {
+ pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
+ return;
+ }
+
+ pid = raw_field_value(event, "common_pid", data);
+
+ sprintf(handler, "%s::%s", event->system, event->name);
+
+ if (!__test_and_set_bit(event->id, events_defined))
+ define_event_symbols(event, handler, event->print_fmt.args);
+
+ s = nsecs / NSEC_PER_SEC;
+ ns = nsecs - s * NSEC_PER_SEC;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+ XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+ XPUSHs(sv_2mortal(newSVuv(cpu)));
+ XPUSHs(sv_2mortal(newSVuv(s)));
+ XPUSHs(sv_2mortal(newSVuv(ns)));
+ XPUSHs(sv_2mortal(newSViv(pid)));
+ XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+ XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+
+ /* common fields other than pid can be accessed via xsub fns */
+
+ for (field = event->format.fields; field; field = field->next) {
+ if (field->flags & TEP_FIELD_IS_STRING) {
+ int offset;
+ if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+ offset = *(int *)(data + field->offset);
+ offset &= 0xffff;
+ if (tep_field_is_relative(field->flags))
+ offset += field->offset + field->size;
+ } else
+ offset = field->offset;
+ XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
+ } else { /* FIELD_IS_NUMERIC */
+ val = read_size(event, data + field->offset,
+ field->size);
+ if (field->flags & TEP_FIELD_IS_SIGNED) {
+ XPUSHs(sv_2mortal(newSViv(val)));
+ } else {
+ XPUSHs(sv_2mortal(newSVuv(val)));
+ }
+ }
+ }
+
+ PUTBACK;
+
+ if (get_cv(handler, 0))
+ call_pv(handler, G_SCALAR);
+ else if (get_cv("main::trace_unhandled", 0)) {
+ XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+ XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+ XPUSHs(sv_2mortal(newSVuv(cpu)));
+ XPUSHs(sv_2mortal(newSVuv(nsecs)));
+ XPUSHs(sv_2mortal(newSViv(pid)));
+ XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+ XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+ call_pv("main::trace_unhandled", G_SCALAR);
+ }
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void perl_process_event_generic(union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel)
+{
+ dSP;
+
+ if (!get_cv("process_event", 0))
+ return;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ XPUSHs(sv_2mortal(newSVpvn((const char *)event, event->header.size)));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->core.attr, sizeof(evsel->core.attr))));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample))));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size)));
+ PUTBACK;
+ call_pv("process_event", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void perl_process_event(union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ scripting_context__update(scripting_context, event, sample, evsel, al, addr_al);
+ perl_process_tracepoint(sample, evsel, al);
+ perl_process_event_generic(event, sample, evsel);
+}
+
+static void run_start_sub(void)
+{
+ dSP; /* access to Perl stack */
+ PUSHMARK(SP);
+
+ if (get_cv("main::trace_begin", 0))
+ call_pv("main::trace_begin", G_DISCARD | G_NOARGS);
+}
+
+/*
+ * Start trace script
+ */
+static int perl_start_script(const char *script, int argc, const char **argv,
+ struct perf_session *session)
+{
+ const char **command_line;
+ int i, err = 0;
+
+ scripting_context->session = session;
+
+ command_line = malloc((argc + 2) * sizeof(const char *));
+ command_line[0] = "";
+ command_line[1] = script;
+ for (i = 2; i < argc + 2; i++)
+ command_line[i] = argv[i - 2];
+
+ my_perl = perl_alloc();
+ perl_construct(my_perl);
+
+ if (perl_parse(my_perl, xs_init, argc + 2, (char **)command_line,
+ (char **)NULL)) {
+ err = -1;
+ goto error;
+ }
+
+ if (perl_run(my_perl)) {
+ err = -1;
+ goto error;
+ }
+
+ if (SvTRUE(ERRSV)) {
+ err = -1;
+ goto error;
+ }
+
+ run_start_sub();
+
+ free(command_line);
+ return 0;
+error:
+ perl_free(my_perl);
+ free(command_line);
+
+ return err;
+}
+
+static int perl_flush_script(void)
+{
+ return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int perl_stop_script(void)
+{
+ dSP; /* access to Perl stack */
+ PUSHMARK(SP);
+
+ if (get_cv("main::trace_end", 0))
+ call_pv("main::trace_end", G_DISCARD | G_NOARGS);
+
+ perl_destruct(my_perl);
+ perl_free(my_perl);
+
+ return 0;
+}
+
+static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
+{
+ int i, not_first, count, nr_events;
+ struct tep_event **all_events;
+ struct tep_event *event = NULL;
+ struct tep_format_field *f;
+ char fname[PATH_MAX];
+ FILE *ofp;
+
+ sprintf(fname, "%s.pl", outfile);
+ ofp = fopen(fname, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "couldn't open %s\n", fname);
+ return -1;
+ }
+
+ fprintf(ofp, "# perf script event handlers, "
+ "generated by perf script -g perl\n");
+
+ fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+ " License version 2\n\n");
+
+ fprintf(ofp, "# The common_* event handler fields are the most useful "
+ "fields common to\n");
+
+ fprintf(ofp, "# all events. They don't necessarily correspond to "
+ "the 'common_*' fields\n");
+
+ fprintf(ofp, "# in the format files. Those fields not available as "
+ "handler params can\n");
+
+ fprintf(ofp, "# be retrieved using Perl functions of the form "
+ "common_*($context).\n");
+
+ fprintf(ofp, "# See Context.pm for the list of available "
+ "functions.\n\n");
+
+ fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/"
+ "Perf-Trace-Util/lib\";\n");
+
+ fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n");
+ fprintf(ofp, "use Perf::Trace::Core;\n");
+ fprintf(ofp, "use Perf::Trace::Context;\n");
+ fprintf(ofp, "use Perf::Trace::Util;\n\n");
+
+ fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
+ fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n");
+
+
+ fprintf(ofp, "\n\
+sub print_backtrace\n\
+{\n\
+ my $callchain = shift;\n\
+ for my $node (@$callchain)\n\
+ {\n\
+ if(exists $node->{sym})\n\
+ {\n\
+ printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\
+ }\n\
+ else\n\
+ {\n\
+ printf( \"\\t[\\%%x]\\n\", $node{ip});\n\
+ }\n\
+ }\n\
+}\n\n\
+");
+
+ nr_events = tep_get_events_count(pevent);
+ all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
+
+ for (i = 0; all_events && i < nr_events; i++) {
+ event = all_events[i];
+ fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
+ fprintf(ofp, "\tmy (");
+
+ fprintf(ofp, "$event_name, ");
+ fprintf(ofp, "$context, ");
+ fprintf(ofp, "$common_cpu, ");
+ fprintf(ofp, "$common_secs, ");
+ fprintf(ofp, "$common_nsecs,\n");
+ fprintf(ofp, "\t $common_pid, ");
+ fprintf(ofp, "$common_comm, ");
+ fprintf(ofp, "$common_callchain,\n\t ");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t ");
+
+ fprintf(ofp, "$%s", f->name);
+ }
+ fprintf(ofp, ") = @_;\n\n");
+
+ fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+ "$common_secs, $common_nsecs,\n\t "
+ "$common_pid, $common_comm, $common_callchain);\n\n");
+
+ fprintf(ofp, "\tprintf(\"");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (count && count % 4 == 0) {
+ fprintf(ofp, "\".\n\t \"");
+ }
+ count++;
+
+ fprintf(ofp, "%s=", f->name);
+ if (f->flags & TEP_FIELD_IS_STRING ||
+ f->flags & TEP_FIELD_IS_FLAG ||
+ f->flags & TEP_FIELD_IS_SYMBOLIC)
+ fprintf(ofp, "%%s");
+ else if (f->flags & TEP_FIELD_IS_SIGNED)
+ fprintf(ofp, "%%d");
+ else
+ fprintf(ofp, "%%u");
+ }
+
+ fprintf(ofp, "\\n\",\n\t ");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t ");
+
+ if (f->flags & TEP_FIELD_IS_FLAG) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t ");
+ count = 4;
+ }
+ fprintf(ofp, "flag_str(\"");
+ fprintf(ofp, "%s::%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", $%s)", f->name,
+ f->name);
+ } else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t ");
+ count = 4;
+ }
+ fprintf(ofp, "symbol_str(\"");
+ fprintf(ofp, "%s::%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", $%s)", f->name,
+ f->name);
+ } else
+ fprintf(ofp, "$%s", f->name);
+ }
+
+ fprintf(ofp, ");\n\n");
+
+ fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+
+ fprintf(ofp, "}\n\n");
+ }
+
+ fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
+ "$common_cpu, $common_secs, $common_nsecs,\n\t "
+ "$common_pid, $common_comm, $common_callchain) = @_;\n\n");
+
+ fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+ "$common_secs, $common_nsecs,\n\t $common_pid, "
+ "$common_comm, $common_callchain);\n");
+ fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+ fprintf(ofp, "}\n\n");
+
+ fprintf(ofp, "sub print_header\n{\n"
+ "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
+ "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t "
+ "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n");
+
+ fprintf(ofp,
+ "\n# Packed byte string args of process_event():\n"
+ "#\n"
+ "# $event:\tunion perf_event\tutil/event.h\n"
+ "# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n"
+ "# $sample:\tstruct perf_sample\tutil/event.h\n"
+ "# $raw_data:\tperf_sample->raw_data\tutil/event.h\n"
+ "\n"
+ "sub process_event\n"
+ "{\n"
+ "\tmy ($event, $attr, $sample, $raw_data) = @_;\n"
+ "\n"
+ "\tmy @event\t= unpack(\"LSS\", $event);\n"
+ "\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n"
+ "\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n"
+ "\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n"
+ "\n"
+ "\tuse Data::Dumper;\n"
+ "\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n"
+ "}\n");
+
+ fclose(ofp);
+
+ fprintf(stderr, "generated Perl script: %s\n", fname);
+
+ return 0;
+}
+
+struct scripting_ops perl_scripting_ops = {
+ .name = "Perl",
+ .dirname = "perl",
+ .start_script = perl_start_script,
+ .flush_script = perl_flush_script,
+ .stop_script = perl_stop_script,
+ .process_event = perl_process_event,
+ .generate_script = perl_generate_script,
+};
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
new file mode 100644
index 000000000..943127414
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -0,0 +1,2218 @@
+/*
+ * trace-event-python. Feed trace events to an embedded Python interpreter.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <Python.h>
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/time64.h>
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+#include "../build-id.h"
+#include "../counts.h"
+#include "../debug.h"
+#include "../dso.h"
+#include "../callchain.h"
+#include "../env.h"
+#include "../evsel.h"
+#include "../event.h"
+#include "../thread.h"
+#include "../comm.h"
+#include "../machine.h"
+#include "../db-export.h"
+#include "../thread-stack.h"
+#include "../trace-event.h"
+#include "../call-path.h"
+#include "map.h"
+#include "symbol.h"
+#include "thread_map.h"
+#include "print_binary.h"
+#include "stat.h"
+#include "mem-events.h"
+#include "util/perf_regs.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+ PyString_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+ PyString_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+ PyString_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+ PyInt_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+ PyInt_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+ PyCObject_FromVoidPtr((arg1), (arg2))
+
+PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyUnicode_FromString(arg) \
+ PyUnicode_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+ PyUnicode_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+ PyBytes_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+ PyLong_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+ PyLong_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+ PyCapsule_New((arg1), (arg2), (arg3))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
+
+#ifdef HAVE_LIBTRACEEVENT
+#define TRACE_EVENT_TYPE_MAX \
+ ((1 << (sizeof(unsigned short) * 8)) - 1)
+
+#define N_COMMON_FIELDS 7
+
+static char *cur_field_name;
+static int zero_flag_atom;
+#endif
+
+#define MAX_FIELDS 64
+
+extern struct scripting_context *scripting_context;
+
+static PyObject *main_module, *main_dict;
+
+struct tables {
+ struct db_export dbe;
+ PyObject *evsel_handler;
+ PyObject *machine_handler;
+ PyObject *thread_handler;
+ PyObject *comm_handler;
+ PyObject *comm_thread_handler;
+ PyObject *dso_handler;
+ PyObject *symbol_handler;
+ PyObject *branch_type_handler;
+ PyObject *sample_handler;
+ PyObject *call_path_handler;
+ PyObject *call_return_handler;
+ PyObject *synth_handler;
+ PyObject *context_switch_handler;
+ bool db_export_mode;
+};
+
+static struct tables tables_global;
+
+static void handler_call_die(const char *handler_name) __noreturn;
+static void handler_call_die(const char *handler_name)
+{
+ PyErr_Print();
+ Py_FatalError("problem in Python trace event handler");
+ // Py_FatalError does not return
+ // but we have to make the compiler happy
+ abort();
+}
+
+/*
+ * Insert val into the dictionary and decrement the reference counter.
+ * This is necessary for dictionaries since PyDict_SetItemString() does not
+ * steal a reference, as opposed to PyTuple_SetItem().
+ */
+static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val)
+{
+ PyDict_SetItemString(dict, key, val);
+ Py_DECREF(val);
+}
+
+static PyObject *get_handler(const char *handler_name)
+{
+ PyObject *handler;
+
+ handler = PyDict_GetItemString(main_dict, handler_name);
+ if (handler && !PyCallable_Check(handler))
+ return NULL;
+ return handler;
+}
+
+static void call_object(PyObject *handler, PyObject *args, const char *die_msg)
+{
+ PyObject *retval;
+
+ retval = PyObject_CallObject(handler, args);
+ if (retval == NULL)
+ handler_call_die(die_msg);
+ Py_DECREF(retval);
+}
+
+static void try_call_object(const char *handler_name, PyObject *args)
+{
+ PyObject *handler;
+
+ handler = get_handler(handler_name);
+ if (handler)
+ call_object(handler, args, handler_name);
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static int get_argument_count(PyObject *handler)
+{
+ int arg_count = 0;
+
+ /*
+ * The attribute for the code object is func_code in Python 2,
+ * whereas it is __code__ in Python 3.0+.
+ */
+ PyObject *code_obj = PyObject_GetAttrString(handler,
+ "func_code");
+ if (PyErr_Occurred()) {
+ PyErr_Clear();
+ code_obj = PyObject_GetAttrString(handler,
+ "__code__");
+ }
+ PyErr_Clear();
+ if (code_obj) {
+ PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
+ "co_argcount");
+ if (arg_count_obj) {
+ arg_count = (int) _PyLong_AsLong(arg_count_obj);
+ Py_DECREF(arg_count_obj);
+ }
+ Py_DECREF(code_obj);
+ }
+ return arg_count;
+}
+
+static void define_value(enum tep_print_arg_type field_type,
+ const char *ev_name,
+ const char *field_name,
+ const char *field_value,
+ const char *field_str)
+{
+ const char *handler_name = "define_flag_value";
+ PyObject *t;
+ unsigned long long value;
+ unsigned n = 0;
+
+ if (field_type == TEP_PRINT_SYMBOL)
+ handler_name = "define_symbolic_value";
+
+ t = PyTuple_New(4);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ value = eval_flag(field_value);
+
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(value));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_str));
+
+ try_call_object(handler_name, t);
+
+ Py_DECREF(t);
+}
+
+static void define_values(enum tep_print_arg_type field_type,
+ struct tep_print_flag_sym *field,
+ const char *ev_name,
+ const char *field_name)
+{
+ define_value(field_type, ev_name, field_name, field->value,
+ field->str);
+
+ if (field->next)
+ define_values(field_type, field->next, ev_name, field_name);
+}
+
+static void define_field(enum tep_print_arg_type field_type,
+ const char *ev_name,
+ const char *field_name,
+ const char *delim)
+{
+ const char *handler_name = "define_flag_field";
+ PyObject *t;
+ unsigned n = 0;
+
+ if (field_type == TEP_PRINT_SYMBOL)
+ handler_name = "define_symbolic_field";
+
+ if (field_type == TEP_PRINT_FLAGS)
+ t = PyTuple_New(3);
+ else
+ t = PyTuple_New(2);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+ if (field_type == TEP_PRINT_FLAGS)
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim));
+
+ try_call_object(handler_name, t);
+
+ Py_DECREF(t);
+}
+
+static void define_event_symbols(struct tep_event *event,
+ const char *ev_name,
+ struct tep_print_arg *args)
+{
+ if (args == NULL)
+ return;
+
+ switch (args->type) {
+ case TEP_PRINT_NULL:
+ break;
+ case TEP_PRINT_ATOM:
+ define_value(TEP_PRINT_FLAGS, ev_name, cur_field_name, "0",
+ args->atom.atom);
+ zero_flag_atom = 0;
+ break;
+ case TEP_PRINT_FIELD:
+ free(cur_field_name);
+ cur_field_name = strdup(args->field.name);
+ break;
+ case TEP_PRINT_FLAGS:
+ define_event_symbols(event, ev_name, args->flags.field);
+ define_field(TEP_PRINT_FLAGS, ev_name, cur_field_name,
+ args->flags.delim);
+ define_values(TEP_PRINT_FLAGS, args->flags.flags, ev_name,
+ cur_field_name);
+ break;
+ case TEP_PRINT_SYMBOL:
+ define_event_symbols(event, ev_name, args->symbol.field);
+ define_field(TEP_PRINT_SYMBOL, ev_name, cur_field_name, NULL);
+ define_values(TEP_PRINT_SYMBOL, args->symbol.symbols, ev_name,
+ cur_field_name);
+ break;
+ case TEP_PRINT_HEX:
+ case TEP_PRINT_HEX_STR:
+ define_event_symbols(event, ev_name, args->hex.field);
+ define_event_symbols(event, ev_name, args->hex.size);
+ break;
+ case TEP_PRINT_INT_ARRAY:
+ define_event_symbols(event, ev_name, args->int_array.field);
+ define_event_symbols(event, ev_name, args->int_array.count);
+ define_event_symbols(event, ev_name, args->int_array.el_size);
+ break;
+ case TEP_PRINT_STRING:
+ break;
+ case TEP_PRINT_TYPE:
+ define_event_symbols(event, ev_name, args->typecast.item);
+ break;
+ case TEP_PRINT_OP:
+ if (strcmp(args->op.op, ":") == 0)
+ zero_flag_atom = 1;
+ define_event_symbols(event, ev_name, args->op.left);
+ define_event_symbols(event, ev_name, args->op.right);
+ break;
+ default:
+ /* gcc warns for these? */
+ case TEP_PRINT_BSTRING:
+ case TEP_PRINT_DYNAMIC_ARRAY:
+ case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+ case TEP_PRINT_FUNC:
+ case TEP_PRINT_BITMASK:
+ /* we should warn... */
+ return;
+ }
+
+ if (args->next)
+ define_event_symbols(event, ev_name, args->next);
+}
+
+static PyObject *get_field_numeric_entry(struct tep_event *event,
+ struct tep_format_field *field, void *data)
+{
+ bool is_array = field->flags & TEP_FIELD_IS_ARRAY;
+ PyObject *obj = NULL, *list = NULL;
+ unsigned long long val;
+ unsigned int item_size, n_items, i;
+
+ if (is_array) {
+ list = PyList_New(field->arraylen);
+ item_size = field->size / field->arraylen;
+ n_items = field->arraylen;
+ } else {
+ item_size = field->size;
+ n_items = 1;
+ }
+
+ for (i = 0; i < n_items; i++) {
+
+ val = read_size(event, data + field->offset + i * item_size,
+ item_size);
+ if (field->flags & TEP_FIELD_IS_SIGNED) {
+ if ((long long)val >= LONG_MIN &&
+ (long long)val <= LONG_MAX)
+ obj = _PyLong_FromLong(val);
+ else
+ obj = PyLong_FromLongLong(val);
+ } else {
+ if (val <= LONG_MAX)
+ obj = _PyLong_FromLong(val);
+ else
+ obj = PyLong_FromUnsignedLongLong(val);
+ }
+ if (is_array)
+ PyList_SET_ITEM(list, i, obj);
+ }
+ if (is_array)
+ obj = list;
+ return obj;
+}
+#endif
+
+static const char *get_dsoname(struct map *map)
+{
+ const char *dsoname = "[unknown]";
+ struct dso *dso = map ? map__dso(map) : NULL;
+
+ if (dso) {
+ if (symbol_conf.show_kernel_path && dso->long_name)
+ dsoname = dso->long_name;
+ else
+ dsoname = dso->name;
+ }
+
+ return dsoname;
+}
+
+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+ unsigned long offset;
+
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - map__start(al->map) - sym->start;
+
+ return offset;
+}
+
+static PyObject *python_process_callchain(struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al)
+{
+ PyObject *pylist;
+ struct callchain_cursor *cursor;
+
+ pylist = PyList_New(0);
+ if (!pylist)
+ Py_FatalError("couldn't create Python list");
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ goto exit;
+
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack) != 0) {
+ pr_err("Failed to resolve callchain. Skipping\n");
+ goto exit;
+ }
+ callchain_cursor_commit(cursor);
+
+
+ while (1) {
+ PyObject *pyelem;
+ struct callchain_cursor_node *node;
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ pyelem = PyDict_New();
+ if (!pyelem)
+ Py_FatalError("couldn't create Python dictionary");
+
+
+ pydict_set_item_string_decref(pyelem, "ip",
+ PyLong_FromUnsignedLongLong(node->ip));
+
+ if (node->ms.sym) {
+ PyObject *pysym = PyDict_New();
+ if (!pysym)
+ Py_FatalError("couldn't create Python dictionary");
+ pydict_set_item_string_decref(pysym, "start",
+ PyLong_FromUnsignedLongLong(node->ms.sym->start));
+ pydict_set_item_string_decref(pysym, "end",
+ PyLong_FromUnsignedLongLong(node->ms.sym->end));
+ pydict_set_item_string_decref(pysym, "binding",
+ _PyLong_FromLong(node->ms.sym->binding));
+ pydict_set_item_string_decref(pysym, "name",
+ _PyUnicode_FromStringAndSize(node->ms.sym->name,
+ node->ms.sym->namelen));
+ pydict_set_item_string_decref(pyelem, "sym", pysym);
+
+ if (node->ms.map) {
+ struct map *map = node->ms.map;
+ struct addr_location node_al;
+ unsigned long offset;
+
+ addr_location__init(&node_al);
+ node_al.addr = map__map_ip(map, node->ip);
+ node_al.map = map__get(map);
+ offset = get_offset(node->ms.sym, &node_al);
+ addr_location__exit(&node_al);
+
+ pydict_set_item_string_decref(
+ pyelem, "sym_off",
+ PyLong_FromUnsignedLongLong(offset));
+ }
+ if (node->srcline && strcmp(":0", node->srcline)) {
+ pydict_set_item_string_decref(
+ pyelem, "sym_srcline",
+ _PyUnicode_FromString(node->srcline));
+ }
+ }
+
+ if (node->ms.map) {
+ const char *dsoname = get_dsoname(node->ms.map);
+
+ pydict_set_item_string_decref(pyelem, "dso",
+ _PyUnicode_FromString(dsoname));
+ }
+
+ callchain_cursor_advance(cursor);
+ PyList_Append(pylist, pyelem);
+ Py_DECREF(pyelem);
+ }
+
+exit:
+ return pylist;
+}
+
+static PyObject *python_process_brstack(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ PyObject *pylist;
+ u64 i;
+
+ pylist = PyList_New(0);
+ if (!pylist)
+ Py_FatalError("couldn't create Python list");
+
+ if (!(br && br->nr))
+ goto exit;
+
+ for (i = 0; i < br->nr; i++) {
+ PyObject *pyelem;
+ struct addr_location al;
+ const char *dsoname;
+
+ pyelem = PyDict_New();
+ if (!pyelem)
+ Py_FatalError("couldn't create Python dictionary");
+
+ pydict_set_item_string_decref(pyelem, "from",
+ PyLong_FromUnsignedLongLong(entries[i].from));
+ pydict_set_item_string_decref(pyelem, "to",
+ PyLong_FromUnsignedLongLong(entries[i].to));
+ pydict_set_item_string_decref(pyelem, "mispred",
+ PyBool_FromLong(entries[i].flags.mispred));
+ pydict_set_item_string_decref(pyelem, "predicted",
+ PyBool_FromLong(entries[i].flags.predicted));
+ pydict_set_item_string_decref(pyelem, "in_tx",
+ PyBool_FromLong(entries[i].flags.in_tx));
+ pydict_set_item_string_decref(pyelem, "abort",
+ PyBool_FromLong(entries[i].flags.abort));
+ pydict_set_item_string_decref(pyelem, "cycles",
+ PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
+
+ addr_location__init(&al);
+ thread__find_map_fb(thread, sample->cpumode,
+ entries[i].from, &al);
+ dsoname = get_dsoname(al.map);
+ pydict_set_item_string_decref(pyelem, "from_dsoname",
+ _PyUnicode_FromString(dsoname));
+
+ thread__find_map_fb(thread, sample->cpumode,
+ entries[i].to, &al);
+ dsoname = get_dsoname(al.map);
+ pydict_set_item_string_decref(pyelem, "to_dsoname",
+ _PyUnicode_FromString(dsoname));
+
+ addr_location__exit(&al);
+ PyList_Append(pylist, pyelem);
+ Py_DECREF(pyelem);
+ }
+
+exit:
+ return pylist;
+}
+
+static int get_symoff(struct symbol *sym, struct addr_location *al,
+ bool print_off, char *bf, int size)
+{
+ unsigned long offset;
+
+ if (!sym || !sym->name[0])
+ return scnprintf(bf, size, "%s", "[unknown]");
+
+ if (!print_off)
+ return scnprintf(bf, size, "%s", sym->name);
+
+ offset = get_offset(sym, al);
+
+ return scnprintf(bf, size, "%s+0x%x", sym->name, offset);
+}
+
+static int get_br_mspred(struct branch_flags *flags, char *bf, int size)
+{
+ if (!flags->mispred && !flags->predicted)
+ return scnprintf(bf, size, "%s", "-");
+
+ if (flags->mispred)
+ return scnprintf(bf, size, "%s", "M");
+
+ return scnprintf(bf, size, "%s", "P");
+}
+
+static PyObject *python_process_brstacksym(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ PyObject *pylist;
+ u64 i;
+ char bf[512];
+
+ pylist = PyList_New(0);
+ if (!pylist)
+ Py_FatalError("couldn't create Python list");
+
+ if (!(br && br->nr))
+ goto exit;
+
+ for (i = 0; i < br->nr; i++) {
+ PyObject *pyelem;
+ struct addr_location al;
+
+ addr_location__init(&al);
+ pyelem = PyDict_New();
+ if (!pyelem)
+ Py_FatalError("couldn't create Python dictionary");
+
+ thread__find_symbol_fb(thread, sample->cpumode,
+ entries[i].from, &al);
+ get_symoff(al.sym, &al, true, bf, sizeof(bf));
+ pydict_set_item_string_decref(pyelem, "from",
+ _PyUnicode_FromString(bf));
+
+ thread__find_symbol_fb(thread, sample->cpumode,
+ entries[i].to, &al);
+ get_symoff(al.sym, &al, true, bf, sizeof(bf));
+ pydict_set_item_string_decref(pyelem, "to",
+ _PyUnicode_FromString(bf));
+
+ get_br_mspred(&entries[i].flags, bf, sizeof(bf));
+ pydict_set_item_string_decref(pyelem, "pred",
+ _PyUnicode_FromString(bf));
+
+ if (entries[i].flags.in_tx) {
+ pydict_set_item_string_decref(pyelem, "in_tx",
+ _PyUnicode_FromString("X"));
+ } else {
+ pydict_set_item_string_decref(pyelem, "in_tx",
+ _PyUnicode_FromString("-"));
+ }
+
+ if (entries[i].flags.abort) {
+ pydict_set_item_string_decref(pyelem, "abort",
+ _PyUnicode_FromString("A"));
+ } else {
+ pydict_set_item_string_decref(pyelem, "abort",
+ _PyUnicode_FromString("-"));
+ }
+
+ PyList_Append(pylist, pyelem);
+ Py_DECREF(pyelem);
+ addr_location__exit(&al);
+ }
+
+exit:
+ return pylist;
+}
+
+static PyObject *get_sample_value_as_tuple(struct sample_read_value *value,
+ u64 read_format)
+{
+ PyObject *t;
+
+ t = PyTuple_New(3);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+ PyTuple_SetItem(t, 0, PyLong_FromUnsignedLongLong(value->id));
+ PyTuple_SetItem(t, 1, PyLong_FromUnsignedLongLong(value->value));
+ if (read_format & PERF_FORMAT_LOST)
+ PyTuple_SetItem(t, 2, PyLong_FromUnsignedLongLong(value->lost));
+
+ return t;
+}
+
+static void set_sample_read_in_dict(PyObject *dict_sample,
+ struct perf_sample *sample,
+ struct evsel *evsel)
+{
+ u64 read_format = evsel->core.attr.read_format;
+ PyObject *values;
+ unsigned int i;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ pydict_set_item_string_decref(dict_sample, "time_enabled",
+ PyLong_FromUnsignedLongLong(sample->read.time_enabled));
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ pydict_set_item_string_decref(dict_sample, "time_running",
+ PyLong_FromUnsignedLongLong(sample->read.time_running));
+ }
+
+ if (read_format & PERF_FORMAT_GROUP)
+ values = PyList_New(sample->read.group.nr);
+ else
+ values = PyList_New(1);
+
+ if (!values)
+ Py_FatalError("couldn't create Python list");
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ struct sample_read_value *v = sample->read.group.values;
+
+ i = 0;
+ sample_read_group__for_each(v, sample->read.group.nr, read_format) {
+ PyObject *t = get_sample_value_as_tuple(v, read_format);
+ PyList_SET_ITEM(values, i, t);
+ i++;
+ }
+ } else {
+ PyObject *t = get_sample_value_as_tuple(&sample->read.one,
+ read_format);
+ PyList_SET_ITEM(values, 0, t);
+ }
+ pydict_set_item_string_decref(dict_sample, "values", values);
+}
+
+static void set_sample_datasrc_in_dict(PyObject *dict,
+ struct perf_sample *sample)
+{
+ struct mem_info mi = { .data_src.val = sample->data_src };
+ char decode[100];
+
+ pydict_set_item_string_decref(dict, "datasrc",
+ PyLong_FromUnsignedLongLong(sample->data_src));
+
+ perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+ pydict_set_item_string_decref(dict, "datasrc_decode",
+ _PyUnicode_FromString(decode));
+}
+
+static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size)
+{
+ unsigned int i = 0, r;
+ int printed = 0;
+
+ bf[0] = 0;
+
+ if (size <= 0)
+ return;
+
+ if (!regs || !regs->regs)
+ return;
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+
+ printed += scnprintf(bf + printed, size - printed,
+ "%5s:0x%" PRIx64 " ",
+ perf_reg_name(r, arch), val);
+ }
+}
+
+static void set_regs_in_dict(PyObject *dict,
+ struct perf_sample *sample,
+ struct evsel *evsel)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ const char *arch = perf_env__arch(evsel__env(evsel));
+
+ /*
+ * Here value 28 is a constant size which can be used to print
+ * one register value and its corresponds to:
+ * 16 chars is to specify 64 bit register in hexadecimal.
+ * 2 chars is for appending "0x" to the hexadecimal value and
+ * 10 chars is for register name.
+ */
+ int size = __sw_hweight64(attr->sample_regs_intr) * 28;
+ char *bf = malloc(size);
+
+ regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
+
+ pydict_set_item_string_decref(dict, "iregs",
+ _PyUnicode_FromString(bf));
+
+ regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size);
+
+ pydict_set_item_string_decref(dict, "uregs",
+ _PyUnicode_FromString(bf));
+ free(bf);
+}
+
+static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
+ const char *dso_field, const char *dso_bid_field,
+ const char *dso_map_start, const char *dso_map_end,
+ const char *sym_field, const char *symoff_field)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ if (al->map) {
+ struct dso *dso = map__dso(al->map);
+
+ pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(dso->name));
+ build_id__sprintf(&dso->bid, sbuild_id);
+ pydict_set_item_string_decref(dict, dso_bid_field,
+ _PyUnicode_FromString(sbuild_id));
+ pydict_set_item_string_decref(dict, dso_map_start,
+ PyLong_FromUnsignedLong(map__start(al->map)));
+ pydict_set_item_string_decref(dict, dso_map_end,
+ PyLong_FromUnsignedLong(map__end(al->map)));
+ }
+ if (al->sym) {
+ pydict_set_item_string_decref(dict, sym_field,
+ _PyUnicode_FromString(al->sym->name));
+ pydict_set_item_string_decref(dict, symoff_field,
+ PyLong_FromUnsignedLong(get_offset(al->sym, al)));
+ }
+}
+
+static void set_sample_flags(PyObject *dict, u32 flags)
+{
+ const char *ch = PERF_IP_FLAG_CHARS;
+ char *p, str[33];
+
+ for (p = str; *ch; ch++, flags >>= 1) {
+ if (flags & 1)
+ *p++ = *ch;
+ }
+ *p = 0;
+ pydict_set_item_string_decref(dict, "flags", _PyUnicode_FromString(str));
+}
+
+static void python_process_sample_flags(struct perf_sample *sample, PyObject *dict_sample)
+{
+ char flags_disp[SAMPLE_FLAGS_BUF_SIZE];
+
+ set_sample_flags(dict_sample, sample->flags);
+ perf_sample__sprintf_flags(sample->flags, flags_disp, sizeof(flags_disp));
+ pydict_set_item_string_decref(dict_sample, "flags_disp",
+ _PyUnicode_FromString(flags_disp));
+}
+
+static PyObject *get_perf_sample_dict(struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al,
+ PyObject *callchain)
+{
+ PyObject *dict, *dict_sample, *brstack, *brstacksym;
+
+ dict = PyDict_New();
+ if (!dict)
+ Py_FatalError("couldn't create Python dictionary");
+
+ dict_sample = PyDict_New();
+ if (!dict_sample)
+ Py_FatalError("couldn't create Python dictionary");
+
+ pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel)));
+ pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr)));
+
+ pydict_set_item_string_decref(dict_sample, "pid",
+ _PyLong_FromLong(sample->pid));
+ pydict_set_item_string_decref(dict_sample, "tid",
+ _PyLong_FromLong(sample->tid));
+ pydict_set_item_string_decref(dict_sample, "cpu",
+ _PyLong_FromLong(sample->cpu));
+ pydict_set_item_string_decref(dict_sample, "ip",
+ PyLong_FromUnsignedLongLong(sample->ip));
+ pydict_set_item_string_decref(dict_sample, "time",
+ PyLong_FromUnsignedLongLong(sample->time));
+ pydict_set_item_string_decref(dict_sample, "period",
+ PyLong_FromUnsignedLongLong(sample->period));
+ pydict_set_item_string_decref(dict_sample, "phys_addr",
+ PyLong_FromUnsignedLongLong(sample->phys_addr));
+ pydict_set_item_string_decref(dict_sample, "addr",
+ PyLong_FromUnsignedLongLong(sample->addr));
+ set_sample_read_in_dict(dict_sample, sample, evsel);
+ pydict_set_item_string_decref(dict_sample, "weight",
+ PyLong_FromUnsignedLongLong(sample->weight));
+ pydict_set_item_string_decref(dict_sample, "transaction",
+ PyLong_FromUnsignedLongLong(sample->transaction));
+ set_sample_datasrc_in_dict(dict_sample, sample);
+ pydict_set_item_string_decref(dict, "sample", dict_sample);
+
+ pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize(
+ (const char *)sample->raw_data, sample->raw_size));
+ pydict_set_item_string_decref(dict, "comm",
+ _PyUnicode_FromString(thread__comm_str(al->thread)));
+ set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end",
+ "symbol", "symoff");
+
+ pydict_set_item_string_decref(dict, "callchain", callchain);
+
+ brstack = python_process_brstack(sample, al->thread);
+ pydict_set_item_string_decref(dict, "brstack", brstack);
+
+ brstacksym = python_process_brstacksym(sample, al->thread);
+ pydict_set_item_string_decref(dict, "brstacksym", brstacksym);
+
+ if (sample->machine_pid) {
+ pydict_set_item_string_decref(dict_sample, "machine_pid",
+ _PyLong_FromLong(sample->machine_pid));
+ pydict_set_item_string_decref(dict_sample, "vcpu",
+ _PyLong_FromLong(sample->vcpu));
+ }
+
+ pydict_set_item_string_decref(dict_sample, "cpumode",
+ _PyLong_FromLong((unsigned long)sample->cpumode));
+
+ if (addr_al) {
+ pydict_set_item_string_decref(dict_sample, "addr_correlates_sym",
+ PyBool_FromLong(1));
+ set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid",
+ "addr_dso_map_start", "addr_dso_map_end",
+ "addr_symbol", "addr_symoff");
+ }
+
+ if (sample->flags)
+ python_process_sample_flags(sample, dict_sample);
+
+ /* Instructions per cycle (IPC) */
+ if (sample->insn_cnt && sample->cyc_cnt) {
+ pydict_set_item_string_decref(dict_sample, "insn_cnt",
+ PyLong_FromUnsignedLongLong(sample->insn_cnt));
+ pydict_set_item_string_decref(dict_sample, "cyc_cnt",
+ PyLong_FromUnsignedLongLong(sample->cyc_cnt));
+ }
+
+ set_regs_in_dict(dict, sample, evsel);
+
+ return dict;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static void python_process_tracepoint(struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ struct tep_event *event = evsel->tp_format;
+ PyObject *handler, *context, *t, *obj = NULL, *callchain;
+ PyObject *dict = NULL, *all_entries_dict = NULL;
+ static char handler_name[256];
+ struct tep_format_field *field;
+ unsigned long s, ns;
+ unsigned n = 0;
+ int pid;
+ int cpu = sample->cpu;
+ void *data = sample->raw_data;
+ unsigned long long nsecs = sample->time;
+ const char *comm = thread__comm_str(al->thread);
+ const char *default_handler_name = "trace_unhandled";
+ DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+ bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX);
+
+ if (!event) {
+ snprintf(handler_name, sizeof(handler_name),
+ "ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
+ Py_FatalError(handler_name);
+ }
+
+ pid = raw_field_value(event, "common_pid", data);
+
+ sprintf(handler_name, "%s__%s", event->system, event->name);
+
+ if (!__test_and_set_bit(event->id, events_defined))
+ define_event_symbols(event, handler_name, event->print_fmt.args);
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ handler = get_handler(default_handler_name);
+ if (!handler)
+ return;
+ dict = PyDict_New();
+ if (!dict)
+ Py_FatalError("couldn't create Python dict");
+ }
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+
+ s = nsecs / NSEC_PER_SEC;
+ ns = nsecs - s * NSEC_PER_SEC;
+
+ context = _PyCapsule_New(scripting_context, NULL, NULL);
+
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name));
+ PyTuple_SetItem(t, n++, context);
+
+ /* ip unwinding */
+ callchain = python_process_callchain(sample, evsel, al);
+ /* Need an additional reference for the perf_sample dict */
+ Py_INCREF(callchain);
+
+ if (!dict) {
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(s));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(ns));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(pid));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(comm));
+ PyTuple_SetItem(t, n++, callchain);
+ } else {
+ pydict_set_item_string_decref(dict, "common_cpu", _PyLong_FromLong(cpu));
+ pydict_set_item_string_decref(dict, "common_s", _PyLong_FromLong(s));
+ pydict_set_item_string_decref(dict, "common_ns", _PyLong_FromLong(ns));
+ pydict_set_item_string_decref(dict, "common_pid", _PyLong_FromLong(pid));
+ pydict_set_item_string_decref(dict, "common_comm", _PyUnicode_FromString(comm));
+ pydict_set_item_string_decref(dict, "common_callchain", callchain);
+ }
+ for (field = event->format.fields; field; field = field->next) {
+ unsigned int offset, len;
+ unsigned long long val;
+
+ if (field->flags & TEP_FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+ val = tep_read_number(scripting_context->pevent,
+ data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ if (tep_field_is_relative(field->flags))
+ offset += field->offset + field->size;
+ }
+ if (field->flags & TEP_FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ obj = _PyUnicode_FromString((char *) data + offset);
+ } else {
+ obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+ field->flags &= ~TEP_FIELD_IS_STRING;
+ }
+ } else { /* FIELD_IS_NUMERIC */
+ obj = get_field_numeric_entry(event, field, data);
+ }
+ if (!dict)
+ PyTuple_SetItem(t, n++, obj);
+ else
+ pydict_set_item_string_decref(dict, field->name, obj);
+
+ }
+
+ if (dict)
+ PyTuple_SetItem(t, n++, dict);
+
+ if (get_argument_count(handler) == (int) n + 1) {
+ all_entries_dict = get_perf_sample_dict(sample, evsel, al, addr_al,
+ callchain);
+ PyTuple_SetItem(t, n++, all_entries_dict);
+ } else {
+ Py_DECREF(callchain);
+ }
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ if (!dict)
+ call_object(handler, t, handler_name);
+ else
+ call_object(handler, t, default_handler_name);
+
+ Py_DECREF(t);
+}
+#else
+static void python_process_tracepoint(struct perf_sample *sample __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct addr_location *al __maybe_unused,
+ struct addr_location *addr_al __maybe_unused)
+{
+ fprintf(stderr, "Tracepoint events are not supported because "
+ "perf is not linked with libtraceevent.\n");
+}
+#endif
+
+static PyObject *tuple_new(unsigned int sz)
+{
+ PyObject *t;
+
+ t = PyTuple_New(sz);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+ return t;
+}
+
+static int tuple_set_s64(PyObject *t, unsigned int pos, s64 val)
+{
+#if BITS_PER_LONG == 64
+ return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+#endif
+#if BITS_PER_LONG == 32
+ return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val));
+#endif
+}
+
+/*
+ * Databases support only signed 64-bit numbers, so even though we are
+ * exporting a u64, it must be as s64.
+ */
+#define tuple_set_d64 tuple_set_s64
+
+static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
+{
+#if BITS_PER_LONG == 64
+ return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLong(val));
+#endif
+#if BITS_PER_LONG == 32
+ return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLongLong(val));
+#endif
+}
+
+static int tuple_set_u32(PyObject *t, unsigned int pos, u32 val)
+{
+ return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLong(val));
+}
+
+static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val)
+{
+ return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+}
+
+static int tuple_set_bool(PyObject *t, unsigned int pos, bool val)
+{
+ return PyTuple_SetItem(t, pos, PyBool_FromLong(val));
+}
+
+static int tuple_set_string(PyObject *t, unsigned int pos, const char *s)
+{
+ return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s));
+}
+
+static int tuple_set_bytes(PyObject *t, unsigned int pos, void *bytes,
+ unsigned int sz)
+{
+ return PyTuple_SetItem(t, pos, _PyBytes_FromStringAndSize(bytes, sz));
+}
+
+static int python_export_evsel(struct db_export *dbe, struct evsel *evsel)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(2);
+
+ tuple_set_d64(t, 0, evsel->db_id);
+ tuple_set_string(t, 1, evsel__name(evsel));
+
+ call_object(tables->evsel_handler, t, "evsel_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_machine(struct db_export *dbe,
+ struct machine *machine)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(3);
+
+ tuple_set_d64(t, 0, machine->db_id);
+ tuple_set_s32(t, 1, machine->pid);
+ tuple_set_string(t, 2, machine->root_dir ? machine->root_dir : "");
+
+ call_object(tables->machine_handler, t, "machine_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_thread(struct db_export *dbe, struct thread *thread,
+ u64 main_thread_db_id, struct machine *machine)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(5);
+
+ tuple_set_d64(t, 0, thread__db_id(thread));
+ tuple_set_d64(t, 1, machine->db_id);
+ tuple_set_d64(t, 2, main_thread_db_id);
+ tuple_set_s32(t, 3, thread__pid(thread));
+ tuple_set_s32(t, 4, thread__tid(thread));
+
+ call_object(tables->thread_handler, t, "thread_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_comm(struct db_export *dbe, struct comm *comm,
+ struct thread *thread)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(5);
+
+ tuple_set_d64(t, 0, comm->db_id);
+ tuple_set_string(t, 1, comm__str(comm));
+ tuple_set_d64(t, 2, thread__db_id(thread));
+ tuple_set_d64(t, 3, comm->start);
+ tuple_set_s32(t, 4, comm->exec);
+
+ call_object(tables->comm_handler, t, "comm_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_comm_thread(struct db_export *dbe, u64 db_id,
+ struct comm *comm, struct thread *thread)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(3);
+
+ tuple_set_d64(t, 0, db_id);
+ tuple_set_d64(t, 1, comm->db_id);
+ tuple_set_d64(t, 2, thread__db_id(thread));
+
+ call_object(tables->comm_thread_handler, t, "comm_thread_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_dso(struct db_export *dbe, struct dso *dso,
+ struct machine *machine)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ char sbuild_id[SBUILD_ID_SIZE];
+ PyObject *t;
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+
+ t = tuple_new(5);
+
+ tuple_set_d64(t, 0, dso->db_id);
+ tuple_set_d64(t, 1, machine->db_id);
+ tuple_set_string(t, 2, dso->short_name);
+ tuple_set_string(t, 3, dso->long_name);
+ tuple_set_string(t, 4, sbuild_id);
+
+ call_object(tables->dso_handler, t, "dso_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_symbol(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ u64 *sym_db_id = symbol__priv(sym);
+ PyObject *t;
+
+ t = tuple_new(6);
+
+ tuple_set_d64(t, 0, *sym_db_id);
+ tuple_set_d64(t, 1, dso->db_id);
+ tuple_set_d64(t, 2, sym->start);
+ tuple_set_d64(t, 3, sym->end);
+ tuple_set_s32(t, 4, sym->binding);
+ tuple_set_string(t, 5, sym->name);
+
+ call_object(tables->symbol_handler, t, "symbol_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_branch_type(struct db_export *dbe, u32 branch_type,
+ const char *name)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(2);
+
+ tuple_set_s32(t, 0, branch_type);
+ tuple_set_string(t, 1, name);
+
+ call_object(tables->branch_type_handler, t, "branch_type_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static void python_export_sample_table(struct db_export *dbe,
+ struct export_sample *es)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(25);
+
+ tuple_set_d64(t, 0, es->db_id);
+ tuple_set_d64(t, 1, es->evsel->db_id);
+ tuple_set_d64(t, 2, maps__machine(es->al->maps)->db_id);
+ tuple_set_d64(t, 3, thread__db_id(es->al->thread));
+ tuple_set_d64(t, 4, es->comm_db_id);
+ tuple_set_d64(t, 5, es->dso_db_id);
+ tuple_set_d64(t, 6, es->sym_db_id);
+ tuple_set_d64(t, 7, es->offset);
+ tuple_set_d64(t, 8, es->sample->ip);
+ tuple_set_d64(t, 9, es->sample->time);
+ tuple_set_s32(t, 10, es->sample->cpu);
+ tuple_set_d64(t, 11, es->addr_dso_db_id);
+ tuple_set_d64(t, 12, es->addr_sym_db_id);
+ tuple_set_d64(t, 13, es->addr_offset);
+ tuple_set_d64(t, 14, es->sample->addr);
+ tuple_set_d64(t, 15, es->sample->period);
+ tuple_set_d64(t, 16, es->sample->weight);
+ tuple_set_d64(t, 17, es->sample->transaction);
+ tuple_set_d64(t, 18, es->sample->data_src);
+ tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
+ tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+ tuple_set_d64(t, 21, es->call_path_id);
+ tuple_set_d64(t, 22, es->sample->insn_cnt);
+ tuple_set_d64(t, 23, es->sample->cyc_cnt);
+ tuple_set_s32(t, 24, es->sample->flags);
+
+ call_object(tables->sample_handler, t, "sample_table");
+
+ Py_DECREF(t);
+}
+
+static void python_export_synth(struct db_export *dbe, struct export_sample *es)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(3);
+
+ tuple_set_d64(t, 0, es->db_id);
+ tuple_set_d64(t, 1, es->evsel->core.attr.config);
+ tuple_set_bytes(t, 2, es->sample->raw_data, es->sample->raw_size);
+
+ call_object(tables->synth_handler, t, "synth_data");
+
+ Py_DECREF(t);
+}
+
+static int python_export_sample(struct db_export *dbe,
+ struct export_sample *es)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+
+ python_export_sample_table(dbe, es);
+
+ if (es->evsel->core.attr.type == PERF_TYPE_SYNTH && tables->synth_handler)
+ python_export_synth(dbe, es);
+
+ return 0;
+}
+
+static int python_export_call_path(struct db_export *dbe, struct call_path *cp)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+ u64 parent_db_id, sym_db_id;
+
+ parent_db_id = cp->parent ? cp->parent->db_id : 0;
+ sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0;
+
+ t = tuple_new(4);
+
+ tuple_set_d64(t, 0, cp->db_id);
+ tuple_set_d64(t, 1, parent_db_id);
+ tuple_set_d64(t, 2, sym_db_id);
+ tuple_set_d64(t, 3, cp->ip);
+
+ call_object(tables->call_path_handler, t, "call_path_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_call_return(struct db_export *dbe,
+ struct call_return *cr)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ u64 comm_db_id = cr->comm ? cr->comm->db_id : 0;
+ PyObject *t;
+
+ t = tuple_new(14);
+
+ tuple_set_d64(t, 0, cr->db_id);
+ tuple_set_d64(t, 1, thread__db_id(cr->thread));
+ tuple_set_d64(t, 2, comm_db_id);
+ tuple_set_d64(t, 3, cr->cp->db_id);
+ tuple_set_d64(t, 4, cr->call_time);
+ tuple_set_d64(t, 5, cr->return_time);
+ tuple_set_d64(t, 6, cr->branch_count);
+ tuple_set_d64(t, 7, cr->call_ref);
+ tuple_set_d64(t, 8, cr->return_ref);
+ tuple_set_d64(t, 9, cr->cp->parent->db_id);
+ tuple_set_s32(t, 10, cr->flags);
+ tuple_set_d64(t, 11, cr->parent_db_id);
+ tuple_set_d64(t, 12, cr->insn_count);
+ tuple_set_d64(t, 13, cr->cyc_count);
+
+ call_object(tables->call_return_handler, t, "call_return_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_context_switch(struct db_export *dbe, u64 db_id,
+ struct machine *machine,
+ struct perf_sample *sample,
+ u64 th_out_id, u64 comm_out_id,
+ u64 th_in_id, u64 comm_in_id, int flags)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(9);
+
+ tuple_set_d64(t, 0, db_id);
+ tuple_set_d64(t, 1, machine->db_id);
+ tuple_set_d64(t, 2, sample->time);
+ tuple_set_s32(t, 3, sample->cpu);
+ tuple_set_d64(t, 4, th_out_id);
+ tuple_set_d64(t, 5, comm_out_id);
+ tuple_set_d64(t, 6, th_in_id);
+ tuple_set_d64(t, 7, comm_in_id);
+ tuple_set_s32(t, 8, flags);
+
+ call_object(tables->context_switch_handler, t, "context_switch");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_process_call_return(struct call_return *cr, u64 *parent_db_id,
+ void *data)
+{
+ struct db_export *dbe = data;
+
+ return db_export__call_return(dbe, cr, parent_db_id);
+}
+
+static void python_process_general_event(struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ PyObject *handler, *t, *dict, *callchain;
+ static char handler_name[64];
+ unsigned n = 0;
+
+ snprintf(handler_name, sizeof(handler_name), "%s", "process_event");
+
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ /*
+ * Use the MAX_FIELDS to make the function expandable, though
+ * currently there is only one item for the tuple.
+ */
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ /* ip unwinding */
+ callchain = python_process_callchain(sample, evsel, al);
+ dict = get_perf_sample_dict(sample, evsel, al, addr_al, callchain);
+
+ PyTuple_SetItem(t, n++, dict);
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_event(union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ struct tables *tables = &tables_global;
+
+ scripting_context__update(scripting_context, event, sample, evsel, al, addr_al);
+
+ switch (evsel->core.attr.type) {
+ case PERF_TYPE_TRACEPOINT:
+ python_process_tracepoint(sample, evsel, al, addr_al);
+ break;
+ /* Reserve for future process_hw/sw/raw APIs */
+ default:
+ if (tables->db_export_mode)
+ db_export__sample(&tables->dbe, event, sample, evsel, al, addr_al);
+ else
+ python_process_general_event(sample, evsel, al, addr_al);
+ }
+}
+
+static void python_process_throttle(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const char *handler_name;
+ PyObject *handler, *t;
+
+ if (event->header.type == PERF_RECORD_THROTTLE)
+ handler_name = "throttle";
+ else
+ handler_name = "unthrottle";
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ t = tuple_new(6);
+ if (!t)
+ return;
+
+ tuple_set_u64(t, 0, event->throttle.time);
+ tuple_set_u64(t, 1, event->throttle.id);
+ tuple_set_u64(t, 2, event->throttle.stream_id);
+ tuple_set_s32(t, 3, sample->cpu);
+ tuple_set_s32(t, 4, sample->pid);
+ tuple_set_s32(t, 5, sample->tid);
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_do_process_switch(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const char *handler_name = "context_switch";
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ bool out_preempt = out && (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT);
+ pid_t np_pid = -1, np_tid = -1;
+ PyObject *handler, *t;
+
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ np_pid = event->context_switch.next_prev_pid;
+ np_tid = event->context_switch.next_prev_tid;
+ }
+
+ t = tuple_new(11);
+ if (!t)
+ return;
+
+ tuple_set_u64(t, 0, sample->time);
+ tuple_set_s32(t, 1, sample->cpu);
+ tuple_set_s32(t, 2, sample->pid);
+ tuple_set_s32(t, 3, sample->tid);
+ tuple_set_s32(t, 4, np_pid);
+ tuple_set_s32(t, 5, np_tid);
+ tuple_set_s32(t, 6, machine->pid);
+ tuple_set_bool(t, 7, out);
+ tuple_set_bool(t, 8, out_preempt);
+ tuple_set_s32(t, 9, sample->machine_pid);
+ tuple_set_s32(t, 10, sample->vcpu);
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_switch(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct tables *tables = &tables_global;
+
+ if (tables->db_export_mode)
+ db_export__switch(&tables->dbe, event, sample, machine);
+ else
+ python_do_process_switch(event, sample, machine);
+}
+
+static void python_process_auxtrace_error(struct perf_session *session __maybe_unused,
+ union perf_event *event)
+{
+ struct perf_record_auxtrace_error *e = &event->auxtrace_error;
+ u8 cpumode = e->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ const char *handler_name = "auxtrace_error";
+ unsigned long long tm = e->time;
+ const char *msg = e->msg;
+ PyObject *handler, *t;
+
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ if (!e->fmt) {
+ tm = 0;
+ msg = (const char *)&e->time;
+ }
+
+ t = tuple_new(11);
+
+ tuple_set_u32(t, 0, e->type);
+ tuple_set_u32(t, 1, e->code);
+ tuple_set_s32(t, 2, e->cpu);
+ tuple_set_s32(t, 3, e->pid);
+ tuple_set_s32(t, 4, e->tid);
+ tuple_set_u64(t, 5, e->ip);
+ tuple_set_u64(t, 6, tm);
+ tuple_set_string(t, 7, msg);
+ tuple_set_u32(t, 8, cpumode);
+ tuple_set_s32(t, 9, e->machine_pid);
+ tuple_set_s32(t, 10, e->vcpu);
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void get_handler_name(char *str, size_t size,
+ struct evsel *evsel)
+{
+ char *p = str;
+
+ scnprintf(str, size, "stat__%s", evsel__name(evsel));
+
+ while ((p = strchr(p, ':'))) {
+ *p = '_';
+ p++;
+ }
+}
+
+static void
+process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp,
+ struct perf_counts_values *count)
+{
+ PyObject *handler, *t;
+ static char handler_name[256];
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ get_handler_name(handler_name, sizeof(handler_name),
+ counter);
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
+
+ tuple_set_u64(t, n++, tstamp);
+ tuple_set_u64(t, n++, count->val);
+ tuple_set_u64(t, n++, count->ena);
+ tuple_set_u64(t, n++, count->run);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_stat(struct perf_stat_config *config,
+ struct evsel *counter, u64 tstamp)
+{
+ struct perf_thread_map *threads = counter->core.threads;
+ struct perf_cpu_map *cpus = counter->core.cpus;
+ int cpu, thread;
+
+ for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
+ for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
+ perf_thread_map__pid(threads, thread), tstamp,
+ perf_counts(counter->counts, cpu, thread));
+ }
+ }
+}
+
+static void python_process_stat_interval(u64 tstamp)
+{
+ PyObject *handler, *t;
+ static const char handler_name[] = "stat__interval";
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ tuple_set_u64(t, n++, tstamp);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static int perf_script_context_init(void)
+{
+ PyObject *perf_script_context;
+ PyObject *perf_trace_context;
+ PyObject *dict;
+ int ret;
+
+ perf_trace_context = PyImport_AddModule("perf_trace_context");
+ if (!perf_trace_context)
+ return -1;
+ dict = PyModule_GetDict(perf_trace_context);
+ if (!dict)
+ return -1;
+
+ perf_script_context = _PyCapsule_New(scripting_context, NULL, NULL);
+ if (!perf_script_context)
+ return -1;
+
+ ret = PyDict_SetItemString(dict, "perf_script_context", perf_script_context);
+ if (!ret)
+ ret = PyDict_SetItemString(main_dict, "perf_script_context", perf_script_context);
+ Py_DECREF(perf_script_context);
+ return ret;
+}
+
+static int run_start_sub(void)
+{
+ main_module = PyImport_AddModule("__main__");
+ if (main_module == NULL)
+ return -1;
+ Py_INCREF(main_module);
+
+ main_dict = PyModule_GetDict(main_module);
+ if (main_dict == NULL)
+ goto error;
+ Py_INCREF(main_dict);
+
+ if (perf_script_context_init())
+ goto error;
+
+ try_call_object("trace_begin", NULL);
+
+ return 0;
+
+error:
+ Py_XDECREF(main_dict);
+ Py_XDECREF(main_module);
+ return -1;
+}
+
+#define SET_TABLE_HANDLER_(name, handler_name, table_name) do { \
+ tables->handler_name = get_handler(#table_name); \
+ if (tables->handler_name) \
+ tables->dbe.export_ ## name = python_export_ ## name; \
+} while (0)
+
+#define SET_TABLE_HANDLER(name) \
+ SET_TABLE_HANDLER_(name, name ## _handler, name ## _table)
+
+static void set_table_handlers(struct tables *tables)
+{
+ const char *perf_db_export_mode = "perf_db_export_mode";
+ const char *perf_db_export_calls = "perf_db_export_calls";
+ const char *perf_db_export_callchains = "perf_db_export_callchains";
+ PyObject *db_export_mode, *db_export_calls, *db_export_callchains;
+ bool export_calls = false;
+ bool export_callchains = false;
+ int ret;
+
+ memset(tables, 0, sizeof(struct tables));
+ if (db_export__init(&tables->dbe))
+ Py_FatalError("failed to initialize export");
+
+ db_export_mode = PyDict_GetItemString(main_dict, perf_db_export_mode);
+ if (!db_export_mode)
+ return;
+
+ ret = PyObject_IsTrue(db_export_mode);
+ if (ret == -1)
+ handler_call_die(perf_db_export_mode);
+ if (!ret)
+ return;
+
+ /* handle export calls */
+ tables->dbe.crp = NULL;
+ db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
+ if (db_export_calls) {
+ ret = PyObject_IsTrue(db_export_calls);
+ if (ret == -1)
+ handler_call_die(perf_db_export_calls);
+ export_calls = !!ret;
+ }
+
+ if (export_calls) {
+ tables->dbe.crp =
+ call_return_processor__new(python_process_call_return,
+ &tables->dbe);
+ if (!tables->dbe.crp)
+ Py_FatalError("failed to create calls processor");
+ }
+
+ /* handle export callchains */
+ tables->dbe.cpr = NULL;
+ db_export_callchains = PyDict_GetItemString(main_dict,
+ perf_db_export_callchains);
+ if (db_export_callchains) {
+ ret = PyObject_IsTrue(db_export_callchains);
+ if (ret == -1)
+ handler_call_die(perf_db_export_callchains);
+ export_callchains = !!ret;
+ }
+
+ if (export_callchains) {
+ /*
+ * Attempt to use the call path root from the call return
+ * processor, if the call return processor is in use. Otherwise,
+ * we allocate a new call path root. This prevents exporting
+ * duplicate call path ids when both are in use simultaneously.
+ */
+ if (tables->dbe.crp)
+ tables->dbe.cpr = tables->dbe.crp->cpr;
+ else
+ tables->dbe.cpr = call_path_root__new();
+
+ if (!tables->dbe.cpr)
+ Py_FatalError("failed to create call path root");
+ }
+
+ tables->db_export_mode = true;
+ /*
+ * Reserve per symbol space for symbol->db_id via symbol__priv()
+ */
+ symbol_conf.priv_size = sizeof(u64);
+
+ SET_TABLE_HANDLER(evsel);
+ SET_TABLE_HANDLER(machine);
+ SET_TABLE_HANDLER(thread);
+ SET_TABLE_HANDLER(comm);
+ SET_TABLE_HANDLER(comm_thread);
+ SET_TABLE_HANDLER(dso);
+ SET_TABLE_HANDLER(symbol);
+ SET_TABLE_HANDLER(branch_type);
+ SET_TABLE_HANDLER(sample);
+ SET_TABLE_HANDLER(call_path);
+ SET_TABLE_HANDLER(call_return);
+ SET_TABLE_HANDLER(context_switch);
+
+ /*
+ * Synthesized events are samples but with architecture-specific data
+ * stored in sample->raw_data. They are exported via
+ * python_export_sample() and consequently do not need a separate export
+ * callback.
+ */
+ tables->synth_handler = get_handler("synth_data");
+}
+
+#if PY_MAJOR_VERSION < 3
+static void _free_command_line(const char **command_line, int num)
+{
+ free(command_line);
+}
+#else
+static void _free_command_line(wchar_t **command_line, int num)
+{
+ int i;
+ for (i = 0; i < num; i++)
+ PyMem_RawFree(command_line[i]);
+ free(command_line);
+}
+#endif
+
+
+/*
+ * Start trace script
+ */
+static int python_start_script(const char *script, int argc, const char **argv,
+ struct perf_session *session)
+{
+ struct tables *tables = &tables_global;
+#if PY_MAJOR_VERSION < 3
+ const char **command_line;
+#else
+ wchar_t **command_line;
+#endif
+ /*
+ * Use a non-const name variable to cope with python 2.6's
+ * PyImport_AppendInittab prototype
+ */
+ char buf[PATH_MAX], name[19] = "perf_trace_context";
+ int i, err = 0;
+ FILE *fp;
+
+ scripting_context->session = session;
+#if PY_MAJOR_VERSION < 3
+ command_line = malloc((argc + 1) * sizeof(const char *));
+ command_line[0] = script;
+ for (i = 1; i < argc + 1; i++)
+ command_line[i] = argv[i - 1];
+ PyImport_AppendInittab(name, initperf_trace_context);
+#else
+ command_line = malloc((argc + 1) * sizeof(wchar_t *));
+ command_line[0] = Py_DecodeLocale(script, NULL);
+ for (i = 1; i < argc + 1; i++)
+ command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+ PyImport_AppendInittab(name, PyInit_perf_trace_context);
+#endif
+ Py_Initialize();
+
+#if PY_MAJOR_VERSION < 3
+ PySys_SetArgv(argc + 1, (char **)command_line);
+#else
+ PySys_SetArgv(argc + 1, command_line);
+#endif
+
+ fp = fopen(script, "r");
+ if (!fp) {
+ sprintf(buf, "Can't open python script \"%s\"", script);
+ perror(buf);
+ err = -1;
+ goto error;
+ }
+
+ err = PyRun_SimpleFile(fp, script);
+ if (err) {
+ fprintf(stderr, "Error running python script %s\n", script);
+ goto error;
+ }
+
+ err = run_start_sub();
+ if (err) {
+ fprintf(stderr, "Error starting python script %s\n", script);
+ goto error;
+ }
+
+ set_table_handlers(tables);
+
+ if (tables->db_export_mode) {
+ err = db_export__branch_types(&tables->dbe);
+ if (err)
+ goto error;
+ }
+
+ _free_command_line(command_line, argc + 1);
+
+ return err;
+error:
+ Py_Finalize();
+ _free_command_line(command_line, argc + 1);
+
+ return err;
+}
+
+static int python_flush_script(void)
+{
+ return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int python_stop_script(void)
+{
+ struct tables *tables = &tables_global;
+
+ try_call_object("trace_end", NULL);
+
+ db_export__exit(&tables->dbe);
+
+ Py_XDECREF(main_dict);
+ Py_XDECREF(main_module);
+ Py_Finalize();
+
+ return 0;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static int python_generate_script(struct tep_handle *pevent, const char *outfile)
+{
+ int i, not_first, count, nr_events;
+ struct tep_event **all_events;
+ struct tep_event *event = NULL;
+ struct tep_format_field *f;
+ char fname[PATH_MAX];
+ FILE *ofp;
+
+ sprintf(fname, "%s.py", outfile);
+ ofp = fopen(fname, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "couldn't open %s\n", fname);
+ return -1;
+ }
+ fprintf(ofp, "# perf script event handlers, "
+ "generated by perf script -g python\n");
+
+ fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+ " License version 2\n\n");
+
+ fprintf(ofp, "# The common_* event handler fields are the most useful "
+ "fields common to\n");
+
+ fprintf(ofp, "# all events. They don't necessarily correspond to "
+ "the 'common_*' fields\n");
+
+ fprintf(ofp, "# in the format files. Those fields not available as "
+ "handler params can\n");
+
+ fprintf(ofp, "# be retrieved using Python functions of the form "
+ "common_*(context).\n");
+
+ fprintf(ofp, "# See the perf-script-python Documentation for the list "
+ "of available functions.\n\n");
+
+ fprintf(ofp, "from __future__ import print_function\n\n");
+ fprintf(ofp, "import os\n");
+ fprintf(ofp, "import sys\n\n");
+
+ fprintf(ofp, "sys.path.append(os.environ['PERF_EXEC_PATH'] + \\\n");
+ fprintf(ofp, "\t'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')\n");
+ fprintf(ofp, "\nfrom perf_trace_context import *\n");
+ fprintf(ofp, "from Core import *\n\n\n");
+
+ fprintf(ofp, "def trace_begin():\n");
+ fprintf(ofp, "\tprint(\"in trace_begin\")\n\n");
+
+ fprintf(ofp, "def trace_end():\n");
+ fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
+
+ nr_events = tep_get_events_count(pevent);
+ all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
+
+ for (i = 0; all_events && i < nr_events; i++) {
+ event = all_events[i];
+ fprintf(ofp, "def %s__%s(", event->system, event->name);
+ fprintf(ofp, "event_name, ");
+ fprintf(ofp, "context, ");
+ fprintf(ofp, "common_cpu,\n");
+ fprintf(ofp, "\tcommon_secs, ");
+ fprintf(ofp, "common_nsecs, ");
+ fprintf(ofp, "common_pid, ");
+ fprintf(ofp, "common_comm,\n\t");
+ fprintf(ofp, "common_callchain, ");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t");
+
+ fprintf(ofp, "%s", f->name);
+ }
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t\t");
+ fprintf(ofp, "perf_sample_dict");
+
+ fprintf(ofp, "):\n");
+
+ fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
+ "common_secs, common_nsecs,\n\t\t\t"
+ "common_pid, common_comm)\n\n");
+
+ fprintf(ofp, "\t\tprint(\"");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (count && count % 3 == 0) {
+ fprintf(ofp, "\" \\\n\t\t\"");
+ }
+ count++;
+
+ fprintf(ofp, "%s=", f->name);
+ if (f->flags & TEP_FIELD_IS_STRING ||
+ f->flags & TEP_FIELD_IS_FLAG ||
+ f->flags & TEP_FIELD_IS_ARRAY ||
+ f->flags & TEP_FIELD_IS_SYMBOLIC)
+ fprintf(ofp, "%%s");
+ else if (f->flags & TEP_FIELD_IS_SIGNED)
+ fprintf(ofp, "%%d");
+ else
+ fprintf(ofp, "%%u");
+ }
+
+ fprintf(ofp, "\" %% \\\n\t\t(");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t\t");
+
+ if (f->flags & TEP_FIELD_IS_FLAG) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t\t");
+ count = 4;
+ }
+ fprintf(ofp, "flag_str(\"");
+ fprintf(ofp, "%s__%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", %s)", f->name,
+ f->name);
+ } else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t\t");
+ count = 4;
+ }
+ fprintf(ofp, "symbol_str(\"");
+ fprintf(ofp, "%s__%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", %s)", f->name,
+ f->name);
+ } else
+ fprintf(ofp, "%s", f->name);
+ }
+
+ fprintf(ofp, "))\n\n");
+
+ fprintf(ofp, "\t\tprint('Sample: {'+"
+ "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
+
+ fprintf(ofp, "\t\tfor node in common_callchain:");
+ fprintf(ofp, "\n\t\t\tif 'sym' in node:");
+ fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% (");
+ fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],");
+ fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))");
+ fprintf(ofp, "\n\t\t\telse:");
+ fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
+ fprintf(ofp, "\t\tprint()\n\n");
+
+ }
+
+ fprintf(ofp, "def trace_unhandled(event_name, context, "
+ "event_fields_dict, perf_sample_dict):\n");
+
+ fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n");
+ fprintf(ofp, "\t\tprint('Sample: {'+"
+ "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
+
+ fprintf(ofp, "def print_header("
+ "event_name, cpu, secs, nsecs, pid, comm):\n"
+ "\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+ "(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n");
+
+ fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
+ "\treturn delimiter.join"
+ "(['%%s=%%s'%%(k,str(v))for k,v in sorted(a_dict.items())])\n");
+
+ fclose(ofp);
+
+ fprintf(stderr, "generated Python script: %s\n", fname);
+
+ return 0;
+}
+#else
+static int python_generate_script(struct tep_handle *pevent __maybe_unused,
+ const char *outfile __maybe_unused)
+{
+ fprintf(stderr, "Generating Python perf-script is not supported."
+ " Install libtraceevent and rebuild perf to enable it.\n"
+ "For example:\n # apt install libtraceevent-dev (ubuntu)"
+ "\n # yum install libtraceevent-devel (Fedora)"
+ "\n etc.\n");
+ return -1;
+}
+#endif
+
+struct scripting_ops python_scripting_ops = {
+ .name = "Python",
+ .dirname = "python",
+ .start_script = python_start_script,
+ .flush_script = python_flush_script,
+ .stop_script = python_stop_script,
+ .process_event = python_process_event,
+ .process_switch = python_process_switch,
+ .process_auxtrace_error = python_process_auxtrace_error,
+ .process_stat = python_process_stat,
+ .process_stat_interval = python_process_stat_interval,
+ .process_throttle = python_process_throttle,
+ .generate_script = python_generate_script,
+};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
new file mode 100644
index 000000000..1e9aa8ed1
--- /dev/null
+++ b/tools/perf/util/session.c
@@ -0,0 +1,2892 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <api/fs/fs.h>
+
+#include <byteswap.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <perf/cpumap.h>
+
+#include "map_symbol.h"
+#include "branch.h"
+#include "debug.h"
+#include "env.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "memswap.h"
+#include "map.h"
+#include "symbol.h"
+#include "session.h"
+#include "tool.h"
+#include "perf_regs.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "sample-raw.h"
+#include "stat.h"
+#include "tsc.h"
+#include "ui/progress.h"
+#include "util.h"
+#include "arch/common.h"
+#include "units.h"
+#include <internal/lib.h>
+
+#ifdef HAVE_ZSTD_SUPPORT
+static int perf_session__process_compressed_event(struct perf_session *session,
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
+{
+ void *src;
+ size_t decomp_size, src_size;
+ u64 decomp_last_rem = 0;
+ size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
+ struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
+
+ if (decomp_last) {
+ decomp_last_rem = decomp_last->size - decomp_last->head;
+ decomp_len += decomp_last_rem;
+ }
+
+ mmap_len = sizeof(struct decomp) + decomp_len;
+ decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (decomp == MAP_FAILED) {
+ pr_err("Couldn't allocate memory for decompression\n");
+ return -1;
+ }
+
+ decomp->file_pos = file_offset;
+ decomp->file_path = file_path;
+ decomp->mmap_len = mmap_len;
+ decomp->head = 0;
+
+ if (decomp_last_rem) {
+ memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
+ decomp->size = decomp_last_rem;
+ }
+
+ src = (void *)event + sizeof(struct perf_record_compressed);
+ src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
+
+ decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
+ &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
+ if (!decomp_size) {
+ munmap(decomp, mmap_len);
+ pr_err("Couldn't decompress data\n");
+ return -1;
+ }
+
+ decomp->size += decomp_size;
+
+ if (session->active_decomp->decomp == NULL)
+ session->active_decomp->decomp = decomp;
+ else
+ session->active_decomp->decomp_last->next = decomp;
+
+ session->active_decomp->decomp_last = decomp;
+
+ pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
+
+ return 0;
+}
+#else /* !HAVE_ZSTD_SUPPORT */
+#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
+#endif
+
+static int perf_session__deliver_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool,
+ u64 file_offset,
+ const char *file_path);
+
+static int perf_session__open(struct perf_session *session, int repipe_fd)
+{
+ struct perf_data *data = session->data;
+
+ if (perf_session__read_header(session, repipe_fd) < 0) {
+ pr_err("incompatible file format (rerun with -v to learn more)\n");
+ return -1;
+ }
+
+ if (perf_data__is_pipe(data))
+ return 0;
+
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
+ if (!evlist__valid_sample_type(session->evlist)) {
+ pr_err("non matching sample_type\n");
+ return -1;
+ }
+
+ if (!evlist__valid_sample_id_all(session->evlist)) {
+ pr_err("non matching sample_id_all\n");
+ return -1;
+ }
+
+ if (!evlist__valid_read_format(session->evlist)) {
+ pr_err("non matching read_format\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+void perf_session__set_id_hdr_size(struct perf_session *session)
+{
+ u16 id_hdr_size = evlist__id_hdr_size(session->evlist);
+
+ machines__set_id_hdr_size(&session->machines, id_hdr_size);
+}
+
+int perf_session__create_kernel_maps(struct perf_session *session)
+{
+ int ret = machine__create_kernel_maps(&session->machines.host);
+
+ if (ret >= 0)
+ ret = machines__create_guest_kernel_maps(&session->machines);
+ return ret;
+}
+
+static void perf_session__destroy_kernel_maps(struct perf_session *session)
+{
+ machines__destroy_kernel_maps(&session->machines);
+}
+
+static bool perf_session__has_comm_exec(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.comm_exec)
+ return true;
+ }
+
+ return false;
+}
+
+static void perf_session__set_comm_exec(struct perf_session *session)
+{
+ bool comm_exec = perf_session__has_comm_exec(session);
+
+ machines__set_comm_exec(&session->machines, comm_exec);
+}
+
+static int ordered_events__deliver_event(struct ordered_events *oe,
+ struct ordered_event *event)
+{
+ struct perf_session *session = container_of(oe, struct perf_session,
+ ordered_events);
+
+ return perf_session__deliver_event(session, event->event,
+ session->tool, event->file_offset,
+ event->file_path);
+}
+
+struct perf_session *__perf_session__new(struct perf_data *data,
+ bool repipe, int repipe_fd,
+ struct perf_tool *tool)
+{
+ int ret = -ENOMEM;
+ struct perf_session *session = zalloc(sizeof(*session));
+
+ if (!session)
+ goto out;
+
+ session->repipe = repipe;
+ session->tool = tool;
+ session->decomp_data.zstd_decomp = &session->zstd_data;
+ session->active_decomp = &session->decomp_data;
+ INIT_LIST_HEAD(&session->auxtrace_index);
+ machines__init(&session->machines);
+ ordered_events__init(&session->ordered_events,
+ ordered_events__deliver_event, NULL);
+
+ perf_env__init(&session->header.env);
+ if (data) {
+ ret = perf_data__open(data);
+ if (ret < 0)
+ goto out_delete;
+
+ session->data = data;
+
+ if (perf_data__is_read(data)) {
+ ret = perf_session__open(session, repipe_fd);
+ if (ret < 0)
+ goto out_delete;
+
+ /*
+ * set session attributes that are present in perf.data
+ * but not in pipe-mode.
+ */
+ if (!data->is_pipe) {
+ perf_session__set_id_hdr_size(session);
+ perf_session__set_comm_exec(session);
+ }
+
+ evlist__init_trace_event_sample_raw(session->evlist);
+
+ /* Open the directory data. */
+ if (data->is_dir) {
+ ret = perf_data__open_dir(data);
+ if (ret)
+ goto out_delete;
+ }
+
+ if (!symbol_conf.kallsyms_name &&
+ !symbol_conf.vmlinux_name)
+ symbol_conf.kallsyms_name = perf_data__kallsyms_name(data);
+ }
+ } else {
+ session->machines.host.env = &perf_env;
+ }
+
+ session->machines.host.single_address_space =
+ perf_env__single_address_space(session->machines.host.env);
+
+ if (!data || perf_data__is_write(data)) {
+ /*
+ * In O_RDONLY mode this will be performed when reading the
+ * kernel MMAP event, in perf_event__process_mmap().
+ */
+ if (perf_session__create_kernel_maps(session) < 0)
+ pr_warning("Cannot read kernel map\n");
+ }
+
+ /*
+ * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
+ * processed, so evlist__sample_id_all is not meaningful here.
+ */
+ if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
+ tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
+ dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+ tool->ordered_events = false;
+ }
+
+ return session;
+
+ out_delete:
+ perf_session__delete(session);
+ out:
+ return ERR_PTR(ret);
+}
+
+static void perf_decomp__release_events(struct decomp *next)
+{
+ struct decomp *decomp;
+ size_t mmap_len;
+
+ do {
+ decomp = next;
+ if (decomp == NULL)
+ break;
+ next = decomp->next;
+ mmap_len = decomp->mmap_len;
+ munmap(decomp, mmap_len);
+ } while (1);
+}
+
+void perf_session__delete(struct perf_session *session)
+{
+ if (session == NULL)
+ return;
+ auxtrace__free(session);
+ auxtrace_index__free(&session->auxtrace_index);
+ perf_session__destroy_kernel_maps(session);
+ perf_decomp__release_events(session->decomp_data.decomp);
+ perf_env__exit(&session->header.env);
+ machines__exit(&session->machines);
+ if (session->data) {
+ if (perf_data__is_read(session->data))
+ evlist__delete(session->evlist);
+ perf_data__close(session->data);
+ }
+#ifdef HAVE_LIBTRACEEVENT
+ trace_event__cleanup(&session->tevent);
+#endif
+ free(session);
+}
+
+static int process_event_synth_tracing_data_stub(struct perf_session *session
+ __maybe_unused,
+ union perf_event *event
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evlist **pevlist
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evlist **pevlist
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_event_update(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct ordered_events *oe __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int skipn(int fd, off_t n)
+{
+ char buf[4096];
+ ssize_t ret;
+
+ while (n > 0) {
+ ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+ if (ret <= 0)
+ return ret;
+ n -= ret;
+ }
+
+ return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event)
+{
+ dump_printf(": unhandled!\n");
+ if (perf_data__is_pipe(session->data))
+ skipn(perf_data__fd(session->data), event->auxtrace.size);
+ return event->auxtrace.size;
+}
+
+static int process_event_op2_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+
+static
+int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_thread_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_cpu_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_config(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_round(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_time_conv(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ u64 file_offset __maybe_unused,
+ const char *file_path __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+void perf_tool__fill_defaults(struct perf_tool *tool)
+{
+ if (tool->sample == NULL)
+ tool->sample = process_event_sample_stub;
+ if (tool->mmap == NULL)
+ tool->mmap = process_event_stub;
+ if (tool->mmap2 == NULL)
+ tool->mmap2 = process_event_stub;
+ if (tool->comm == NULL)
+ tool->comm = process_event_stub;
+ if (tool->namespaces == NULL)
+ tool->namespaces = process_event_stub;
+ if (tool->cgroup == NULL)
+ tool->cgroup = process_event_stub;
+ if (tool->fork == NULL)
+ tool->fork = process_event_stub;
+ if (tool->exit == NULL)
+ tool->exit = process_event_stub;
+ if (tool->lost == NULL)
+ tool->lost = perf_event__process_lost;
+ if (tool->lost_samples == NULL)
+ tool->lost_samples = perf_event__process_lost_samples;
+ if (tool->aux == NULL)
+ tool->aux = perf_event__process_aux;
+ if (tool->itrace_start == NULL)
+ tool->itrace_start = perf_event__process_itrace_start;
+ if (tool->context_switch == NULL)
+ tool->context_switch = perf_event__process_switch;
+ if (tool->ksymbol == NULL)
+ tool->ksymbol = perf_event__process_ksymbol;
+ if (tool->bpf == NULL)
+ tool->bpf = perf_event__process_bpf;
+ if (tool->text_poke == NULL)
+ tool->text_poke = perf_event__process_text_poke;
+ if (tool->aux_output_hw_id == NULL)
+ tool->aux_output_hw_id = perf_event__process_aux_output_hw_id;
+ if (tool->read == NULL)
+ tool->read = process_event_sample_stub;
+ if (tool->throttle == NULL)
+ tool->throttle = process_event_stub;
+ if (tool->unthrottle == NULL)
+ tool->unthrottle = process_event_stub;
+ if (tool->attr == NULL)
+ tool->attr = process_event_synth_attr_stub;
+ if (tool->event_update == NULL)
+ tool->event_update = process_event_synth_event_update_stub;
+ if (tool->tracing_data == NULL)
+ tool->tracing_data = process_event_synth_tracing_data_stub;
+ if (tool->build_id == NULL)
+ tool->build_id = process_event_op2_stub;
+ if (tool->finished_round == NULL) {
+ if (tool->ordered_events)
+ tool->finished_round = perf_event__process_finished_round;
+ else
+ tool->finished_round = process_finished_round_stub;
+ }
+ if (tool->id_index == NULL)
+ tool->id_index = process_event_op2_stub;
+ if (tool->auxtrace_info == NULL)
+ tool->auxtrace_info = process_event_op2_stub;
+ if (tool->auxtrace == NULL)
+ tool->auxtrace = process_event_auxtrace_stub;
+ if (tool->auxtrace_error == NULL)
+ tool->auxtrace_error = process_event_op2_stub;
+ if (tool->thread_map == NULL)
+ tool->thread_map = process_event_thread_map_stub;
+ if (tool->cpu_map == NULL)
+ tool->cpu_map = process_event_cpu_map_stub;
+ if (tool->stat_config == NULL)
+ tool->stat_config = process_event_stat_config_stub;
+ if (tool->stat == NULL)
+ tool->stat = process_stat_stub;
+ if (tool->stat_round == NULL)
+ tool->stat_round = process_stat_round_stub;
+ if (tool->time_conv == NULL)
+ tool->time_conv = process_event_time_conv_stub;
+ if (tool->feature == NULL)
+ tool->feature = process_event_op2_stub;
+ if (tool->compressed == NULL)
+ tool->compressed = perf_session__process_compressed_event;
+ if (tool->finished_init == NULL)
+ tool->finished_init = process_event_op2_stub;
+}
+
+static void swap_sample_id_all(union perf_event *event, void *data)
+{
+ void *end = (void *) event + event->header.size;
+ int size = end - data;
+
+ BUG_ON(size % sizeof(u64));
+ mem_bswap_64(data, size);
+}
+
+static void perf_event__all64_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ struct perf_event_header *hdr = &event->header;
+ mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
+}
+
+static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
+{
+ event->comm.pid = bswap_32(event->comm.pid);
+ event->comm.tid = bswap_32(event->comm.tid);
+
+ if (sample_id_all) {
+ void *data = &event->comm.comm;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
+static void perf_event__mmap_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->mmap.pid = bswap_32(event->mmap.pid);
+ event->mmap.tid = bswap_32(event->mmap.tid);
+ event->mmap.start = bswap_64(event->mmap.start);
+ event->mmap.len = bswap_64(event->mmap.len);
+ event->mmap.pgoff = bswap_64(event->mmap.pgoff);
+
+ if (sample_id_all) {
+ void *data = &event->mmap.filename;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
+static void perf_event__mmap2_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->mmap2.pid = bswap_32(event->mmap2.pid);
+ event->mmap2.tid = bswap_32(event->mmap2.tid);
+ event->mmap2.start = bswap_64(event->mmap2.start);
+ event->mmap2.len = bswap_64(event->mmap2.len);
+ event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
+
+ if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
+ event->mmap2.maj = bswap_32(event->mmap2.maj);
+ event->mmap2.min = bswap_32(event->mmap2.min);
+ event->mmap2.ino = bswap_64(event->mmap2.ino);
+ event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
+ }
+
+ if (sample_id_all) {
+ void *data = &event->mmap2.filename;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
+{
+ event->fork.pid = bswap_32(event->fork.pid);
+ event->fork.tid = bswap_32(event->fork.tid);
+ event->fork.ppid = bswap_32(event->fork.ppid);
+ event->fork.ptid = bswap_32(event->fork.ptid);
+ event->fork.time = bswap_64(event->fork.time);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->fork + 1);
+}
+
+static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
+{
+ event->read.pid = bswap_32(event->read.pid);
+ event->read.tid = bswap_32(event->read.tid);
+ event->read.value = bswap_64(event->read.value);
+ event->read.time_enabled = bswap_64(event->read.time_enabled);
+ event->read.time_running = bswap_64(event->read.time_running);
+ event->read.id = bswap_64(event->read.id);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->read + 1);
+}
+
+static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
+{
+ event->aux.aux_offset = bswap_64(event->aux.aux_offset);
+ event->aux.aux_size = bswap_64(event->aux.aux_size);
+ event->aux.flags = bswap_64(event->aux.flags);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->aux + 1);
+}
+
+static void perf_event__itrace_start_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->itrace_start.pid = bswap_32(event->itrace_start.pid);
+ event->itrace_start.tid = bswap_32(event->itrace_start.tid);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->itrace_start + 1);
+}
+
+static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
+{
+ if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ event->context_switch.next_prev_pid =
+ bswap_32(event->context_switch.next_prev_pid);
+ event->context_switch.next_prev_tid =
+ bswap_32(event->context_switch.next_prev_tid);
+ }
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->context_switch + 1);
+}
+
+static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
+{
+ event->text_poke.addr = bswap_64(event->text_poke.addr);
+ event->text_poke.old_len = bswap_16(event->text_poke.old_len);
+ event->text_poke.new_len = bswap_16(event->text_poke.new_len);
+
+ if (sample_id_all) {
+ size_t len = sizeof(event->text_poke.old_len) +
+ sizeof(event->text_poke.new_len) +
+ event->text_poke.old_len +
+ event->text_poke.new_len;
+ void *data = &event->text_poke.old_len;
+
+ data += PERF_ALIGN(len, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
+static void perf_event__throttle_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->throttle.time = bswap_64(event->throttle.time);
+ event->throttle.id = bswap_64(event->throttle.id);
+ event->throttle.stream_id = bswap_64(event->throttle.stream_id);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->throttle + 1);
+}
+
+static void perf_event__namespaces_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ u64 i;
+
+ event->namespaces.pid = bswap_32(event->namespaces.pid);
+ event->namespaces.tid = bswap_32(event->namespaces.tid);
+ event->namespaces.nr_namespaces = bswap_64(event->namespaces.nr_namespaces);
+
+ for (i = 0; i < event->namespaces.nr_namespaces; i++) {
+ struct perf_ns_link_info *ns = &event->namespaces.link_info[i];
+
+ ns->dev = bswap_64(ns->dev);
+ ns->ino = bswap_64(ns->ino);
+ }
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->namespaces.link_info[i]);
+}
+
+static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all)
+{
+ event->cgroup.id = bswap_64(event->cgroup.id);
+
+ if (sample_id_all) {
+ void *data = &event->cgroup.path;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
+static u8 revbyte(u8 b)
+{
+ int rev = (b >> 4) | ((b & 0xf) << 4);
+ rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
+ rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
+ return (u8) rev;
+}
+
+/*
+ * XXX this is hack in attempt to carry flags bitfield
+ * through endian village. ABI says:
+ *
+ * Bit-fields are allocated from right to left (least to most significant)
+ * on little-endian implementations and from left to right (most to least
+ * significant) on big-endian implementations.
+ *
+ * The above seems to be byte specific, so we need to reverse each
+ * byte of the bitfield. 'Internet' also says this might be implementation
+ * specific and we probably need proper fix and carry perf_event_attr
+ * bitfield flags in separate data file FEAT_ section. Thought this seems
+ * to work for now.
+ */
+static void swap_bitfield(u8 *p, unsigned len)
+{
+ unsigned i;
+
+ for (i = 0; i < len; i++) {
+ *p = revbyte(*p);
+ p++;
+ }
+}
+
+/* exported for swapping attributes in file header */
+void perf_event__attr_swap(struct perf_event_attr *attr)
+{
+ attr->type = bswap_32(attr->type);
+ attr->size = bswap_32(attr->size);
+
+#define bswap_safe(f, n) \
+ (attr->size > (offsetof(struct perf_event_attr, f) + \
+ sizeof(attr->f) * (n)))
+#define bswap_field(f, sz) \
+do { \
+ if (bswap_safe(f, 0)) \
+ attr->f = bswap_##sz(attr->f); \
+} while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+ bswap_field_64(config);
+ bswap_field_64(sample_period);
+ bswap_field_64(sample_type);
+ bswap_field_64(read_format);
+ bswap_field_32(wakeup_events);
+ bswap_field_32(bp_type);
+ bswap_field_64(bp_addr);
+ bswap_field_64(bp_len);
+ bswap_field_64(branch_sample_type);
+ bswap_field_64(sample_regs_user);
+ bswap_field_32(sample_stack_user);
+ bswap_field_32(aux_watermark);
+ bswap_field_16(sample_max_stack);
+ bswap_field_32(aux_sample_size);
+
+ /*
+ * After read_format are bitfields. Check read_format because
+ * we are unable to use offsetof on bitfield.
+ */
+ if (bswap_safe(read_format, 1))
+ swap_bitfield((u8 *) (&attr->read_format + 1),
+ sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
+}
+
+static void perf_event__hdr_attr_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ size_t size;
+
+ perf_event__attr_swap(&event->attr.attr);
+
+ size = event->header.size;
+ size -= perf_record_header_attr_id(event) - (void *)event;
+ mem_bswap_64(perf_record_header_attr_id(event), size);
+}
+
+static void perf_event__event_update_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->event_update.type = bswap_64(event->event_update.type);
+ event->event_update.id = bswap_64(event->event_update.id);
+}
+
+static void perf_event__event_type_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->event_type.event_type.event_id =
+ bswap_64(event->event_type.event_type.event_id);
+}
+
+static void perf_event__tracing_data_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->tracing_data.size = bswap_32(event->tracing_data.size);
+}
+
+static void perf_event__auxtrace_info_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ size_t size;
+
+ event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
+
+ size = event->header.size;
+ size -= (void *)&event->auxtrace_info.priv - (void *)event;
+ mem_bswap_64(event->auxtrace_info.priv, size);
+}
+
+static void perf_event__auxtrace_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->auxtrace.size = bswap_64(event->auxtrace.size);
+ event->auxtrace.offset = bswap_64(event->auxtrace.offset);
+ event->auxtrace.reference = bswap_64(event->auxtrace.reference);
+ event->auxtrace.idx = bswap_32(event->auxtrace.idx);
+ event->auxtrace.tid = bswap_32(event->auxtrace.tid);
+ event->auxtrace.cpu = bswap_32(event->auxtrace.cpu);
+}
+
+static void perf_event__auxtrace_error_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
+ event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
+ event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu);
+ event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid);
+ event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid);
+ event->auxtrace_error.fmt = bswap_32(event->auxtrace_error.fmt);
+ event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
+ if (event->auxtrace_error.fmt)
+ event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
+ if (event->auxtrace_error.fmt >= 2) {
+ event->auxtrace_error.machine_pid = bswap_32(event->auxtrace_error.machine_pid);
+ event->auxtrace_error.vcpu = bswap_32(event->auxtrace_error.vcpu);
+ }
+}
+
+static void perf_event__thread_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ unsigned i;
+
+ event->thread_map.nr = bswap_64(event->thread_map.nr);
+
+ for (i = 0; i < event->thread_map.nr; i++)
+ event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
+}
+
+static void perf_event__cpu_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ struct perf_record_cpu_map_data *data = &event->cpu_map.data;
+
+ data->type = bswap_16(data->type);
+
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
+ data->cpus_data.nr = bswap_16(data->cpus_data.nr);
+
+ for (unsigned i = 0; i < data->cpus_data.nr; i++)
+ data->cpus_data.cpu[i] = bswap_16(data->cpus_data.cpu[i]);
+ break;
+ case PERF_CPU_MAP__MASK:
+ data->mask32_data.long_size = bswap_16(data->mask32_data.long_size);
+
+ switch (data->mask32_data.long_size) {
+ case 4:
+ data->mask32_data.nr = bswap_16(data->mask32_data.nr);
+ for (unsigned i = 0; i < data->mask32_data.nr; i++)
+ data->mask32_data.mask[i] = bswap_32(data->mask32_data.mask[i]);
+ break;
+ case 8:
+ data->mask64_data.nr = bswap_16(data->mask64_data.nr);
+ for (unsigned i = 0; i < data->mask64_data.nr; i++)
+ data->mask64_data.mask[i] = bswap_64(data->mask64_data.mask[i]);
+ break;
+ default:
+ pr_err("cpu_map swap: unsupported long size\n");
+ }
+ break;
+ case PERF_CPU_MAP__RANGE_CPUS:
+ data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu);
+ data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu);
+ break;
+ default:
+ break;
+ }
+}
+
+static void perf_event__stat_config_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ u64 size;
+
+ size = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
+ size += 1; /* nr item itself */
+ mem_bswap_64(&event->stat_config.nr, size);
+}
+
+static void perf_event__stat_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat.id = bswap_64(event->stat.id);
+ event->stat.thread = bswap_32(event->stat.thread);
+ event->stat.cpu = bswap_32(event->stat.cpu);
+ event->stat.val = bswap_64(event->stat.val);
+ event->stat.ena = bswap_64(event->stat.ena);
+ event->stat.run = bswap_64(event->stat.run);
+}
+
+static void perf_event__stat_round_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat_round.type = bswap_64(event->stat_round.type);
+ event->stat_round.time = bswap_64(event->stat_round.time);
+}
+
+static void perf_event__time_conv_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->time_conv.time_shift = bswap_64(event->time_conv.time_shift);
+ event->time_conv.time_mult = bswap_64(event->time_conv.time_mult);
+ event->time_conv.time_zero = bswap_64(event->time_conv.time_zero);
+
+ if (event_contains(event->time_conv, time_cycles)) {
+ event->time_conv.time_cycles = bswap_64(event->time_conv.time_cycles);
+ event->time_conv.time_mask = bswap_64(event->time_conv.time_mask);
+ }
+}
+
+typedef void (*perf_event__swap_op)(union perf_event *event,
+ bool sample_id_all);
+
+static perf_event__swap_op perf_event__swap_ops[] = {
+ [PERF_RECORD_MMAP] = perf_event__mmap_swap,
+ [PERF_RECORD_MMAP2] = perf_event__mmap2_swap,
+ [PERF_RECORD_COMM] = perf_event__comm_swap,
+ [PERF_RECORD_FORK] = perf_event__task_swap,
+ [PERF_RECORD_EXIT] = perf_event__task_swap,
+ [PERF_RECORD_LOST] = perf_event__all64_swap,
+ [PERF_RECORD_READ] = perf_event__read_swap,
+ [PERF_RECORD_THROTTLE] = perf_event__throttle_swap,
+ [PERF_RECORD_UNTHROTTLE] = perf_event__throttle_swap,
+ [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
+ [PERF_RECORD_AUX] = perf_event__aux_swap,
+ [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
+ [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
+ [PERF_RECORD_SWITCH] = perf_event__switch_swap,
+ [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
+ [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap,
+ [PERF_RECORD_CGROUP] = perf_event__cgroup_swap,
+ [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
+ [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap,
+ [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
+ [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
+ [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
+ [PERF_RECORD_HEADER_BUILD_ID] = NULL,
+ [PERF_RECORD_ID_INDEX] = perf_event__all64_swap,
+ [PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap,
+ [PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap,
+ [PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap,
+ [PERF_RECORD_THREAD_MAP] = perf_event__thread_map_swap,
+ [PERF_RECORD_CPU_MAP] = perf_event__cpu_map_swap,
+ [PERF_RECORD_STAT_CONFIG] = perf_event__stat_config_swap,
+ [PERF_RECORD_STAT] = perf_event__stat_swap,
+ [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap,
+ [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap,
+ [PERF_RECORD_TIME_CONV] = perf_event__time_conv_swap,
+ [PERF_RECORD_HEADER_MAX] = NULL,
+};
+
+/*
+ * When perf record finishes a pass on every buffers, it records this pseudo
+ * event.
+ * We record the max timestamp t found in the pass n.
+ * Assuming these timestamps are monotonic across cpus, we know that if
+ * a buffer still has events with timestamps below t, they will be all
+ * available and then read in the pass n + 1.
+ * Hence when we start to read the pass n + 2, we can safely flush every
+ * events with timestamps below t.
+ *
+ * ============ PASS n =================
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 1 | 2
+ * 2 | 3
+ * - | 4 <--- max recorded
+ *
+ * ============ PASS n + 1 ==============
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 3 | 5
+ * 4 | 6
+ * 5 | 7 <---- max recorded
+ *
+ * Flush every events below timestamp 4
+ *
+ * ============ PASS n + 2 ==============
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 6 | 8
+ * 7 | 9
+ * - | 10
+ *
+ * Flush every events below timestamp 7
+ * etc...
+ */
+int perf_event__process_finished_round(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct ordered_events *oe)
+{
+ if (dump_trace)
+ fprintf(stdout, "\n");
+ return ordered_events__flush(oe, OE_FLUSH__ROUND);
+}
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+ u64 timestamp, u64 file_offset, const char *file_path)
+{
+ return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset, file_path);
+}
+
+static void callchain__lbr_callstack_printf(struct perf_sample *sample)
+{
+ struct ip_callchain *callchain = sample->callchain;
+ struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ u64 kernel_callchain_nr = callchain->nr;
+ unsigned int i;
+
+ for (i = 0; i < kernel_callchain_nr; i++) {
+ if (callchain->ips[i] == PERF_CONTEXT_USER)
+ break;
+ }
+
+ if ((i != kernel_callchain_nr) && lbr_stack->nr) {
+ u64 total_nr;
+ /*
+ * LBR callstack can only get user call chain,
+ * i is kernel call chain number,
+ * 1 is PERF_CONTEXT_USER.
+ *
+ * The user call chain is stored in LBR registers.
+ * LBR are pair registers. The caller is stored
+ * in "from" register, while the callee is stored
+ * in "to" register.
+ * For example, there is a call stack
+ * "A"->"B"->"C"->"D".
+ * The LBR registers will be recorded like
+ * "C"->"D", "B"->"C", "A"->"B".
+ * So only the first "to" register and all "from"
+ * registers are needed to construct the whole stack.
+ */
+ total_nr = i + 1 + lbr_stack->nr + 1;
+ kernel_callchain_nr = i + 1;
+
+ printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);
+
+ for (i = 0; i < kernel_callchain_nr; i++)
+ printf("..... %2d: %016" PRIx64 "\n",
+ i, callchain->ips[i]);
+
+ printf("..... %2d: %016" PRIx64 "\n",
+ (int)(kernel_callchain_nr), entries[0].to);
+ for (i = 0; i < lbr_stack->nr; i++)
+ printf("..... %2d: %016" PRIx64 "\n",
+ (int)(i + kernel_callchain_nr + 1), entries[i].from);
+ }
+}
+
+static void callchain__printf(struct evsel *evsel,
+ struct perf_sample *sample)
+{
+ unsigned int i;
+ struct ip_callchain *callchain = sample->callchain;
+
+ if (evsel__has_branch_callstack(evsel))
+ callchain__lbr_callstack_printf(sample);
+
+ printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
+
+ for (i = 0; i < callchain->nr; i++)
+ printf("..... %2d: %016" PRIx64 "\n",
+ i, callchain->ips[i]);
+}
+
+static void branch_stack__printf(struct perf_sample *sample, bool callstack)
+{
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ uint64_t i;
+
+ if (!callstack) {
+ printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+ } else {
+ /* the reason of adding 1 to nr is because after expanding
+ * branch stack it generates nr + 1 callstack records. e.g.,
+ * B()->C()
+ * A()->B()
+ * the final callstack should be:
+ * C()
+ * B()
+ * A()
+ */
+ printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+ }
+
+ for (i = 0; i < sample->branch_stack->nr; i++) {
+ struct branch_entry *e = &entries[i];
+
+ if (!callstack) {
+ printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n",
+ i, e->from, e->to,
+ (unsigned short)e->flags.cycles,
+ e->flags.mispred ? "M" : " ",
+ e->flags.predicted ? "P" : " ",
+ e->flags.abort ? "A" : " ",
+ e->flags.in_tx ? "T" : " ",
+ (unsigned)e->flags.reserved,
+ get_branch_type(e),
+ e->flags.spec ? branch_spec_desc(e->flags.spec) : "");
+ } else {
+ if (i == 0) {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n"
+ "..... %2"PRIu64": %016" PRIx64 "\n",
+ i, e->to, i+1, e->from);
+ } else {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+ }
+ }
+ }
+}
+
+static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
+{
+ unsigned rid, i = 0;
+
+ for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs[i++];
+
+ printf(".... %-5s 0x%016" PRIx64 "\n",
+ perf_reg_name(rid, arch), val);
+ }
+}
+
+static const char *regs_abi[] = {
+ [PERF_SAMPLE_REGS_ABI_NONE] = "none",
+ [PERF_SAMPLE_REGS_ABI_32] = "32-bit",
+ [PERF_SAMPLE_REGS_ABI_64] = "64-bit",
+};
+
+static inline const char *regs_dump_abi(struct regs_dump *d)
+{
+ if (d->abi > PERF_SAMPLE_REGS_ABI_64)
+ return "unknown";
+
+ return regs_abi[d->abi];
+}
+
+static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
+{
+ u64 mask = regs->mask;
+
+ printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
+ type,
+ mask,
+ regs_dump_abi(regs));
+
+ regs_dump__printf(mask, regs->regs, arch);
+}
+
+static void regs_user__printf(struct perf_sample *sample, const char *arch)
+{
+ struct regs_dump *user_regs = &sample->user_regs;
+
+ if (user_regs->regs)
+ regs__printf("user", user_regs, arch);
+}
+
+static void regs_intr__printf(struct perf_sample *sample, const char *arch)
+{
+ struct regs_dump *intr_regs = &sample->intr_regs;
+
+ if (intr_regs->regs)
+ regs__printf("intr", intr_regs, arch);
+}
+
+static void stack_user__printf(struct stack_dump *dump)
+{
+ printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
+ dump->size, dump->offset);
+}
+
+static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
+{
+ u64 sample_type = __evlist__combined_sample_type(evlist);
+
+ if (event->header.type != PERF_RECORD_SAMPLE &&
+ !evlist__sample_id_all(evlist)) {
+ fputs("-1 -1 ", stdout);
+ return;
+ }
+
+ if ((sample_type & PERF_SAMPLE_CPU))
+ printf("%u ", sample->cpu);
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ printf("%" PRIu64 " ", sample->time);
+}
+
+static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+{
+ printf("... sample_read:\n");
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ printf("...... time enabled %016" PRIx64 "\n",
+ sample->read.time_enabled);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ printf("...... time running %016" PRIx64 "\n",
+ sample->read.time_running);
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ struct sample_read_value *value = sample->read.group.values;
+
+ printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);
+
+ sample_read_group__for_each(value, sample->read.group.nr, read_format) {
+ printf("..... id %016" PRIx64
+ ", value %016" PRIx64,
+ value->id, value->value);
+ if (read_format & PERF_FORMAT_LOST)
+ printf(", lost %" PRIu64, value->lost);
+ printf("\n");
+ }
+ } else {
+ printf("..... id %016" PRIx64 ", value %016" PRIx64,
+ sample->read.one.id, sample->read.one.value);
+ if (read_format & PERF_FORMAT_LOST)
+ printf(", lost %" PRIu64, sample->read.one.lost);
+ printf("\n");
+ }
+}
+
+static void dump_event(struct evlist *evlist, union perf_event *event,
+ u64 file_offset, struct perf_sample *sample,
+ const char *file_path)
+{
+ if (!dump_trace)
+ return;
+
+ printf("\n%#" PRIx64 "@%s [%#x]: event: %d\n",
+ file_offset, file_path, event->header.size, event->header.type);
+
+ trace_event(event);
+ if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
+ evlist->trace_event_sample_raw(evlist, event, sample);
+
+ if (sample)
+ evlist__print_tstamp(evlist, event, sample);
+
+ printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
+ event->header.size, perf_event__name(event->header.type));
+}
+
+char *get_page_size_name(u64 size, char *str)
+{
+ if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size))
+ snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A");
+
+ return str;
+}
+
+static void dump_sample(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *sample, const char *arch)
+{
+ u64 sample_type;
+ char str[PAGE_SIZE_NAME_LEN];
+
+ if (!dump_trace)
+ return;
+
+ printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
+ event->header.misc, sample->pid, sample->tid, sample->ip,
+ sample->period, sample->addr);
+
+ sample_type = evsel->core.attr.sample_type;
+
+ if (evsel__has_callchain(evsel))
+ callchain__printf(evsel, sample);
+
+ if (evsel__has_br_stack(evsel))
+ branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
+
+ if (sample_type & PERF_SAMPLE_REGS_USER)
+ regs_user__printf(sample, arch);
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ regs_intr__printf(sample, arch);
+
+ if (sample_type & PERF_SAMPLE_STACK_USER)
+ stack_user__printf(&sample->user_stack);
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+ printf("... weight: %" PRIu64 "", sample->weight);
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ printf(",0x%"PRIx16"", sample->ins_lat);
+ printf(",0x%"PRIx16"", sample->p_stage_cyc);
+ }
+ printf("\n");
+ }
+
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
+ printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
+
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
+
+ if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str));
+
+ if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str));
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
+ printf("... transaction: %" PRIx64 "\n", sample->transaction);
+
+ if (sample_type & PERF_SAMPLE_READ)
+ sample_read__printf(sample, evsel->core.attr.read_format);
+}
+
+static void dump_read(struct evsel *evsel, union perf_event *event)
+{
+ struct perf_record_read *read_event = &event->read;
+ u64 read_format;
+
+ if (!dump_trace)
+ return;
+
+ printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
+ evsel__name(evsel), event->read.value);
+
+ if (!evsel)
+ return;
+
+ read_format = evsel->core.attr.read_format;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
+
+ if (read_format & PERF_FORMAT_ID)
+ printf("... id : %" PRI_lu64 "\n", read_event->id);
+
+ if (read_format & PERF_FORMAT_LOST)
+ printf("... lost : %" PRI_lu64 "\n", read_event->lost);
+}
+
+static struct machine *machines__find_for_cpumode(struct machines *machines,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (perf_guest &&
+ ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
+ (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
+ u32 pid;
+
+ if (sample->machine_pid)
+ pid = sample->machine_pid;
+ else if (event->header.type == PERF_RECORD_MMAP
+ || event->header.type == PERF_RECORD_MMAP2)
+ pid = event->mmap.pid;
+ else
+ pid = sample->pid;
+
+ /*
+ * Guest code machine is created as needed and does not use
+ * DEFAULT_GUEST_KERNEL_ID.
+ */
+ if (symbol_conf.guest_code)
+ return machines__findnew(machines, pid);
+
+ return machines__find_guest(machines, pid);
+ }
+
+ return &machines->host;
+}
+
+static int deliver_sample_value(struct evlist *evlist,
+ struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct sample_read_value *v,
+ struct machine *machine)
+{
+ struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
+ struct evsel *evsel;
+
+ if (sid) {
+ sample->id = v->id;
+ sample->period = v->value - sid->period;
+ sid->period = v->value;
+ }
+
+ if (!sid || sid->evsel == NULL) {
+ ++evlist->stats.nr_unknown_id;
+ return 0;
+ }
+
+ /*
+ * There's no reason to deliver sample
+ * for zero period, bail out.
+ */
+ if (!sample->period)
+ return 0;
+
+ evsel = container_of(sid->evsel, struct evsel, core);
+ return tool->sample(tool, event, sample, evsel, machine);
+}
+
+static int deliver_sample_group(struct evlist *evlist,
+ struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ u64 read_format)
+{
+ int ret = -EINVAL;
+ struct sample_read_value *v = sample->read.group.values;
+
+ sample_read_group__for_each(v, sample->read.group.nr, read_format) {
+ ret = deliver_sample_value(evlist, tool, event, sample, v,
+ machine);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
+ union perf_event *event, struct perf_sample *sample,
+ struct evsel *evsel, struct machine *machine)
+{
+ /* We know evsel != NULL. */
+ u64 sample_type = evsel->core.attr.sample_type;
+ u64 read_format = evsel->core.attr.read_format;
+
+ /* Standard sample delivery. */
+ if (!(sample_type & PERF_SAMPLE_READ))
+ return tool->sample(tool, event, sample, evsel, machine);
+
+ /* For PERF_SAMPLE_READ we have either single or group mode. */
+ if (read_format & PERF_FORMAT_GROUP)
+ return deliver_sample_group(evlist, tool, event, sample,
+ machine, read_format);
+ else
+ return deliver_sample_value(evlist, tool, event, sample,
+ &sample->read.one, machine);
+}
+
+static int machines__deliver_event(struct machines *machines,
+ struct evlist *evlist,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool, u64 file_offset,
+ const char *file_path)
+{
+ struct evsel *evsel;
+ struct machine *machine;
+
+ dump_event(evlist, event, file_offset, sample, file_path);
+
+ evsel = evlist__id2evsel(evlist, sample->id);
+
+ machine = machines__find_for_cpumode(machines, event, sample);
+
+ switch (event->header.type) {
+ case PERF_RECORD_SAMPLE:
+ if (evsel == NULL) {
+ ++evlist->stats.nr_unknown_id;
+ return 0;
+ }
+ if (machine == NULL) {
+ ++evlist->stats.nr_unprocessable_samples;
+ dump_sample(evsel, event, sample, perf_env__arch(NULL));
+ return 0;
+ }
+ dump_sample(evsel, event, sample, perf_env__arch(machine->env));
+ return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
+ case PERF_RECORD_MMAP:
+ return tool->mmap(tool, event, sample, machine);
+ case PERF_RECORD_MMAP2:
+ if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+ ++evlist->stats.nr_proc_map_timeout;
+ return tool->mmap2(tool, event, sample, machine);
+ case PERF_RECORD_COMM:
+ return tool->comm(tool, event, sample, machine);
+ case PERF_RECORD_NAMESPACES:
+ return tool->namespaces(tool, event, sample, machine);
+ case PERF_RECORD_CGROUP:
+ return tool->cgroup(tool, event, sample, machine);
+ case PERF_RECORD_FORK:
+ return tool->fork(tool, event, sample, machine);
+ case PERF_RECORD_EXIT:
+ return tool->exit(tool, event, sample, machine);
+ case PERF_RECORD_LOST:
+ if (tool->lost == perf_event__process_lost)
+ evlist->stats.total_lost += event->lost.lost;
+ return tool->lost(tool, event, sample, machine);
+ case PERF_RECORD_LOST_SAMPLES:
+ if (tool->lost_samples == perf_event__process_lost_samples &&
+ !(event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF))
+ evlist->stats.total_lost_samples += event->lost_samples.lost;
+ return tool->lost_samples(tool, event, sample, machine);
+ case PERF_RECORD_READ:
+ dump_read(evsel, event);
+ return tool->read(tool, event, sample, evsel, machine);
+ case PERF_RECORD_THROTTLE:
+ return tool->throttle(tool, event, sample, machine);
+ case PERF_RECORD_UNTHROTTLE:
+ return tool->unthrottle(tool, event, sample, machine);
+ case PERF_RECORD_AUX:
+ if (tool->aux == perf_event__process_aux) {
+ if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+ evlist->stats.total_aux_lost += 1;
+ if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
+ evlist->stats.total_aux_partial += 1;
+ if (event->aux.flags & PERF_AUX_FLAG_COLLISION)
+ evlist->stats.total_aux_collision += 1;
+ }
+ return tool->aux(tool, event, sample, machine);
+ case PERF_RECORD_ITRACE_START:
+ return tool->itrace_start(tool, event, sample, machine);
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ return tool->context_switch(tool, event, sample, machine);
+ case PERF_RECORD_KSYMBOL:
+ return tool->ksymbol(tool, event, sample, machine);
+ case PERF_RECORD_BPF_EVENT:
+ return tool->bpf(tool, event, sample, machine);
+ case PERF_RECORD_TEXT_POKE:
+ return tool->text_poke(tool, event, sample, machine);
+ case PERF_RECORD_AUX_OUTPUT_HW_ID:
+ return tool->aux_output_hw_id(tool, event, sample, machine);
+ default:
+ ++evlist->stats.nr_unknown_events;
+ return -1;
+ }
+}
+
+static int perf_session__deliver_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool,
+ u64 file_offset,
+ const char *file_path)
+{
+ struct perf_sample sample;
+ int ret = evlist__parse_sample(session->evlist, event, &sample);
+
+ if (ret) {
+ pr_err("Can't parse sample, err = %d\n", ret);
+ return ret;
+ }
+
+ ret = auxtrace__process_event(session, event, &sample, tool);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 0;
+
+ ret = machines__deliver_event(&session->machines, session->evlist,
+ event, &sample, tool, file_offset, file_path);
+
+ if (dump_trace && sample.aux_sample.size)
+ auxtrace__dump_auxtrace_sample(session, &sample);
+
+ return ret;
+}
+
+static s64 perf_session__process_user_event(struct perf_session *session,
+ union perf_event *event,
+ u64 file_offset,
+ const char *file_path)
+{
+ struct ordered_events *oe = &session->ordered_events;
+ struct perf_tool *tool = session->tool;
+ struct perf_sample sample = { .time = 0, };
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ if (event->header.type != PERF_RECORD_COMPRESSED ||
+ tool->compressed == perf_session__process_compressed_event_stub)
+ dump_event(session->evlist, event, file_offset, &sample, file_path);
+
+ /* These events are processed right away */
+ switch (event->header.type) {
+ case PERF_RECORD_HEADER_ATTR:
+ err = tool->attr(tool, event, &session->evlist);
+ if (err == 0) {
+ perf_session__set_id_hdr_size(session);
+ perf_session__set_comm_exec(session);
+ }
+ return err;
+ case PERF_RECORD_EVENT_UPDATE:
+ return tool->event_update(tool, event, &session->evlist);
+ case PERF_RECORD_HEADER_EVENT_TYPE:
+ /*
+ * Deprecated, but we need to handle it for sake
+ * of old data files create in pipe mode.
+ */
+ return 0;
+ case PERF_RECORD_HEADER_TRACING_DATA:
+ /*
+ * Setup for reading amidst mmap, but only when we
+ * are in 'file' mode. The 'pipe' fd is in proper
+ * place already.
+ */
+ if (!perf_data__is_pipe(session->data))
+ lseek(fd, file_offset, SEEK_SET);
+ return tool->tracing_data(session, event);
+ case PERF_RECORD_HEADER_BUILD_ID:
+ return tool->build_id(session, event);
+ case PERF_RECORD_FINISHED_ROUND:
+ return tool->finished_round(tool, event, oe);
+ case PERF_RECORD_ID_INDEX:
+ return tool->id_index(session, event);
+ case PERF_RECORD_AUXTRACE_INFO:
+ return tool->auxtrace_info(session, event);
+ case PERF_RECORD_AUXTRACE:
+ /*
+ * Setup for reading amidst mmap, but only when we
+ * are in 'file' mode. The 'pipe' fd is in proper
+ * place already.
+ */
+ if (!perf_data__is_pipe(session->data))
+ lseek(fd, file_offset + event->header.size, SEEK_SET);
+ return tool->auxtrace(session, event);
+ case PERF_RECORD_AUXTRACE_ERROR:
+ perf_session__auxtrace_error_inc(session, event);
+ return tool->auxtrace_error(session, event);
+ case PERF_RECORD_THREAD_MAP:
+ return tool->thread_map(session, event);
+ case PERF_RECORD_CPU_MAP:
+ return tool->cpu_map(session, event);
+ case PERF_RECORD_STAT_CONFIG:
+ return tool->stat_config(session, event);
+ case PERF_RECORD_STAT:
+ return tool->stat(session, event);
+ case PERF_RECORD_STAT_ROUND:
+ return tool->stat_round(session, event);
+ case PERF_RECORD_TIME_CONV:
+ session->time_conv = event->time_conv;
+ return tool->time_conv(session, event);
+ case PERF_RECORD_HEADER_FEATURE:
+ return tool->feature(session, event);
+ case PERF_RECORD_COMPRESSED:
+ err = tool->compressed(session, event, file_offset, file_path);
+ if (err)
+ dump_event(session->evlist, event, file_offset, &sample, file_path);
+ return err;
+ case PERF_RECORD_FINISHED_INIT:
+ return tool->finished_init(session, event);
+ default:
+ return -EINVAL;
+ }
+}
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct evlist *evlist = session->evlist;
+ struct perf_tool *tool = session->tool;
+
+ events_stats__inc(&evlist->stats, event->header.type);
+
+ if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+ return perf_session__process_user_event(session, event, 0, NULL);
+
+ return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL);
+}
+
+static void event_swap(union perf_event *event, bool sample_id_all)
+{
+ perf_event__swap_op swap;
+
+ swap = perf_event__swap_ops[event->header.type];
+ if (swap)
+ swap(event, sample_id_all);
+}
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+ void *buf, size_t buf_sz,
+ union perf_event **event_ptr,
+ struct perf_sample *sample)
+{
+ union perf_event *event;
+ size_t hdr_sz, rest;
+ int fd;
+
+ if (session->one_mmap && !session->header.needs_swap) {
+ event = file_offset - session->one_mmap_offset +
+ session->one_mmap_addr;
+ goto out_parse_sample;
+ }
+
+ if (perf_data__is_pipe(session->data))
+ return -1;
+
+ fd = perf_data__fd(session->data);
+ hdr_sz = sizeof(struct perf_event_header);
+
+ if (buf_sz < hdr_sz)
+ return -1;
+
+ if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
+ readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
+ return -1;
+
+ event = (union perf_event *)buf;
+
+ if (session->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+
+ if (event->header.size < hdr_sz || event->header.size > buf_sz)
+ return -1;
+
+ buf += hdr_sz;
+ rest = event->header.size - hdr_sz;
+
+ if (readn(fd, buf, rest) != (ssize_t)rest)
+ return -1;
+
+ if (session->header.needs_swap)
+ event_swap(event, evlist__sample_id_all(session->evlist));
+
+out_parse_sample:
+
+ if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
+ evlist__parse_sample(session->evlist, event, sample))
+ return -1;
+
+ *event_ptr = event;
+
+ return 0;
+}
+
+int perf_session__peek_events(struct perf_session *session, u64 offset,
+ u64 size, peek_events_cb_t cb, void *data)
+{
+ u64 max_offset = offset + size;
+ char buf[PERF_SAMPLE_MAX_SIZE];
+ union perf_event *event;
+ int err;
+
+ do {
+ err = perf_session__peek_event(session, offset, buf,
+ PERF_SAMPLE_MAX_SIZE, &event,
+ NULL);
+ if (err)
+ return err;
+
+ err = cb(session, event, offset, data);
+ if (err)
+ return err;
+
+ offset += event->header.size;
+ if (event->header.type == PERF_RECORD_AUXTRACE)
+ offset += event->auxtrace.size;
+
+ } while (offset < max_offset);
+
+ return err;
+}
+
+static s64 perf_session__process_event(struct perf_session *session,
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
+{
+ struct evlist *evlist = session->evlist;
+ struct perf_tool *tool = session->tool;
+ int ret;
+
+ if (session->header.needs_swap)
+ event_swap(event, evlist__sample_id_all(evlist));
+
+ if (event->header.type >= PERF_RECORD_HEADER_MAX)
+ return -EINVAL;
+
+ events_stats__inc(&evlist->stats, event->header.type);
+
+ if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+ return perf_session__process_user_event(session, event, file_offset, file_path);
+
+ if (tool->ordered_events) {
+ u64 timestamp = -1ULL;
+
+ ret = evlist__parse_sample_timestamp(evlist, event, &timestamp);
+ if (ret && ret != -1)
+ return ret;
+
+ ret = perf_session__queue_event(session, event, timestamp, file_offset, file_path);
+ if (ret != -ETIME)
+ return ret;
+ }
+
+ return perf_session__deliver_event(session, event, tool, file_offset, file_path);
+}
+
+void perf_event_header__bswap(struct perf_event_header *hdr)
+{
+ hdr->type = bswap_32(hdr->type);
+ hdr->misc = bswap_16(hdr->misc);
+ hdr->size = bswap_16(hdr->size);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
+{
+ return machine__findnew_thread(&session->machines.host, -1, pid);
+}
+
+int perf_session__register_idle_thread(struct perf_session *session)
+{
+ struct thread *thread = machine__idle_thread(&session->machines.host);
+
+ /* machine__idle_thread() got the thread, so put it */
+ thread__put(thread);
+ return thread ? 0 : -1;
+}
+
+static void
+perf_session__warn_order(const struct perf_session *session)
+{
+ const struct ordered_events *oe = &session->ordered_events;
+ struct evsel *evsel;
+ bool should_warn = true;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.write_backward)
+ should_warn = false;
+ }
+
+ if (!should_warn)
+ return;
+ if (oe->nr_unordered_events != 0)
+ ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+}
+
+static void perf_session__warn_about_errors(const struct perf_session *session)
+{
+ const struct events_stats *stats = &session->evlist->stats;
+
+ if (session->tool->lost == perf_event__process_lost &&
+ stats->nr_events[PERF_RECORD_LOST] != 0) {
+ ui__warning("Processed %d events and lost %d chunks!\n\n"
+ "Check IO/CPU overload!\n\n",
+ stats->nr_events[0],
+ stats->nr_events[PERF_RECORD_LOST]);
+ }
+
+ if (session->tool->lost_samples == perf_event__process_lost_samples) {
+ double drop_rate;
+
+ drop_rate = (double)stats->total_lost_samples /
+ (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+ if (drop_rate > 0.05) {
+ ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
+ stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+ drop_rate * 100.0);
+ }
+ }
+
+ if (session->tool->aux == perf_event__process_aux &&
+ stats->total_aux_lost != 0) {
+ ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
+ stats->total_aux_lost,
+ stats->nr_events[PERF_RECORD_AUX]);
+ }
+
+ if (session->tool->aux == perf_event__process_aux &&
+ stats->total_aux_partial != 0) {
+ bool vmm_exclusive = false;
+
+ (void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
+ &vmm_exclusive);
+
+ ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
+ "Are you running a KVM guest in the background?%s\n\n",
+ stats->total_aux_partial,
+ stats->nr_events[PERF_RECORD_AUX],
+ vmm_exclusive ?
+ "\nReloading kvm_intel module with vmm_exclusive=0\n"
+ "will reduce the gaps to only guest's timeslices." :
+ "");
+ }
+
+ if (session->tool->aux == perf_event__process_aux &&
+ stats->total_aux_collision != 0) {
+ ui__warning("AUX data detected collision %" PRIu64 " times out of %u!\n\n",
+ stats->total_aux_collision,
+ stats->nr_events[PERF_RECORD_AUX]);
+ }
+
+ if (stats->nr_unknown_events != 0) {
+ ui__warning("Found %u unknown events!\n\n"
+ "Is this an older tool processing a perf.data "
+ "file generated by a more recent tool?\n\n"
+ "If that is not the case, consider "
+ "reporting to linux-kernel@vger.kernel.org.\n\n",
+ stats->nr_unknown_events);
+ }
+
+ if (stats->nr_unknown_id != 0) {
+ ui__warning("%u samples with id not present in the header\n",
+ stats->nr_unknown_id);
+ }
+
+ if (stats->nr_invalid_chains != 0) {
+ ui__warning("Found invalid callchains!\n\n"
+ "%u out of %u events were discarded for this reason.\n\n"
+ "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+ stats->nr_invalid_chains,
+ stats->nr_events[PERF_RECORD_SAMPLE]);
+ }
+
+ if (stats->nr_unprocessable_samples != 0) {
+ ui__warning("%u unprocessable samples recorded.\n"
+ "Do you have a KVM guest running and not using 'perf kvm'?\n",
+ stats->nr_unprocessable_samples);
+ }
+
+ perf_session__warn_order(session);
+
+ events_stats__auxtrace_error_warn(stats);
+
+ if (stats->nr_proc_map_timeout != 0) {
+ ui__warning("%d map information files for pre-existing threads were\n"
+ "not processed, if there are samples for addresses they\n"
+ "will not be resolved, you may find out which are these\n"
+ "threads by running with -v and redirecting the output\n"
+ "to a file.\n"
+ "The time limit to process proc map is too short?\n"
+ "Increase it by --proc-map-timeout\n",
+ stats->nr_proc_map_timeout);
+ }
+}
+
+static int perf_session__flush_thread_stack(struct thread *thread,
+ void *p __maybe_unused)
+{
+ return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+ return machines__for_each_thread(&session->machines,
+ perf_session__flush_thread_stack,
+ NULL);
+}
+
+volatile sig_atomic_t session_done;
+
+static int __perf_session__process_decomp_events(struct perf_session *session);
+
+static int __perf_session__process_pipe_events(struct perf_session *session)
+{
+ struct ordered_events *oe = &session->ordered_events;
+ struct perf_tool *tool = session->tool;
+ union perf_event *event;
+ uint32_t size, cur_size = 0;
+ void *buf = NULL;
+ s64 skip = 0;
+ u64 head;
+ ssize_t err;
+ void *p;
+
+ perf_tool__fill_defaults(tool);
+
+ head = 0;
+ cur_size = sizeof(union perf_event);
+
+ buf = malloc(cur_size);
+ if (!buf)
+ return -errno;
+ ordered_events__set_copy_on_queue(oe, true);
+more:
+ event = buf;
+ err = perf_data__read(session->data, event,
+ sizeof(struct perf_event_header));
+ if (err <= 0) {
+ if (err == 0)
+ goto done;
+
+ pr_err("failed to read event header\n");
+ goto out_err;
+ }
+
+ if (session->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+
+ size = event->header.size;
+ if (size < sizeof(struct perf_event_header)) {
+ pr_err("bad event header size\n");
+ goto out_err;
+ }
+
+ if (size > cur_size) {
+ void *new = realloc(buf, size);
+ if (!new) {
+ pr_err("failed to allocate memory to read event\n");
+ goto out_err;
+ }
+ buf = new;
+ cur_size = size;
+ event = buf;
+ }
+ p = event;
+ p += sizeof(struct perf_event_header);
+
+ if (size - sizeof(struct perf_event_header)) {
+ err = perf_data__read(session->data, p,
+ size - sizeof(struct perf_event_header));
+ if (err <= 0) {
+ if (err == 0) {
+ pr_err("unexpected end of event stream\n");
+ goto done;
+ }
+
+ pr_err("failed to read event data\n");
+ goto out_err;
+ }
+ }
+
+ if ((skip = perf_session__process_event(session, event, head, "pipe")) < 0) {
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+ head, event->header.size, event->header.type);
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ head += size;
+
+ if (skip > 0)
+ head += skip;
+
+ err = __perf_session__process_decomp_events(session);
+ if (err)
+ goto out_err;
+
+ if (!session_done())
+ goto more;
+done:
+ /* do the final flush for ordered samples */
+ err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+ if (err)
+ goto out_err;
+ err = auxtrace__flush_events(session, tool);
+ if (err)
+ goto out_err;
+ err = perf_session__flush_thread_stacks(session);
+out_err:
+ free(buf);
+ if (!tool->no_warn)
+ perf_session__warn_about_errors(session);
+ ordered_events__free(&session->ordered_events);
+ auxtrace__free_events(session);
+ return err;
+}
+
+static union perf_event *
+prefetch_event(char *buf, u64 head, size_t mmap_size,
+ bool needs_swap, union perf_event *error)
+{
+ union perf_event *event;
+ u16 event_size;
+
+ /*
+ * Ensure we have enough space remaining to read
+ * the size of the event in the headers.
+ */
+ if (head + sizeof(event->header) > mmap_size)
+ return NULL;
+
+ event = (union perf_event *)(buf + head);
+ if (needs_swap)
+ perf_event_header__bswap(&event->header);
+
+ event_size = event->header.size;
+ if (head + event_size <= mmap_size)
+ return event;
+
+ /* We're not fetching the event so swap back again */
+ if (needs_swap)
+ perf_event_header__bswap(&event->header);
+
+ /* Check if the event fits into the next mmapped buf. */
+ if (event_size <= mmap_size - head % page_size) {
+ /* Remap buf and fetch again. */
+ return NULL;
+ }
+
+ /* Invalid input. Event size should never exceed mmap_size. */
+ pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
+ " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
+
+ return error;
+}
+
+static union perf_event *
+fetch_mmaped_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
+{
+ return prefetch_event(buf, head, mmap_size, needs_swap, ERR_PTR(-EINVAL));
+}
+
+static union perf_event *
+fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
+{
+ return prefetch_event(buf, head, mmap_size, needs_swap, NULL);
+}
+
+static int __perf_session__process_decomp_events(struct perf_session *session)
+{
+ s64 skip;
+ u64 size;
+ struct decomp *decomp = session->active_decomp->decomp_last;
+
+ if (!decomp)
+ return 0;
+
+ while (decomp->head < decomp->size && !session_done()) {
+ union perf_event *event = fetch_decomp_event(decomp->head, decomp->size, decomp->data,
+ session->header.needs_swap);
+
+ if (!event)
+ break;
+
+ size = event->header.size;
+
+ if (size < sizeof(struct perf_event_header) ||
+ (skip = perf_session__process_event(session, event, decomp->file_pos,
+ decomp->file_path)) < 0) {
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+ decomp->file_pos + decomp->head, event->header.size, event->header.type);
+ return -EINVAL;
+ }
+
+ if (skip)
+ size += skip;
+
+ decomp->head += size;
+ }
+
+ return 0;
+}
+
+/*
+ * On 64bit we can mmap the data file in one go. No need for tiny mmap
+ * slices. On 32bit we use 32MB.
+ */
+#if BITS_PER_LONG == 64
+#define MMAP_SIZE ULLONG_MAX
+#define NUM_MMAPS 1
+#else
+#define MMAP_SIZE (32 * 1024 * 1024ULL)
+#define NUM_MMAPS 128
+#endif
+
+struct reader;
+
+typedef s64 (*reader_cb_t)(struct perf_session *session,
+ union perf_event *event,
+ u64 file_offset,
+ const char *file_path);
+
+struct reader {
+ int fd;
+ const char *path;
+ u64 data_size;
+ u64 data_offset;
+ reader_cb_t process;
+ bool in_place_update;
+ char *mmaps[NUM_MMAPS];
+ size_t mmap_size;
+ int mmap_idx;
+ char *mmap_cur;
+ u64 file_pos;
+ u64 file_offset;
+ u64 head;
+ u64 size;
+ bool done;
+ struct zstd_data zstd_data;
+ struct decomp_data decomp_data;
+};
+
+static int
+reader__init(struct reader *rd, bool *one_mmap)
+{
+ u64 data_size = rd->data_size;
+ char **mmaps = rd->mmaps;
+
+ rd->head = rd->data_offset;
+ data_size += rd->data_offset;
+
+ rd->mmap_size = MMAP_SIZE;
+ if (rd->mmap_size > data_size) {
+ rd->mmap_size = data_size;
+ if (one_mmap)
+ *one_mmap = true;
+ }
+
+ memset(mmaps, 0, sizeof(rd->mmaps));
+
+ if (zstd_init(&rd->zstd_data, 0))
+ return -1;
+ rd->decomp_data.zstd_decomp = &rd->zstd_data;
+
+ return 0;
+}
+
+static void
+reader__release_decomp(struct reader *rd)
+{
+ perf_decomp__release_events(rd->decomp_data.decomp);
+ zstd_fini(&rd->zstd_data);
+}
+
+static int
+reader__mmap(struct reader *rd, struct perf_session *session)
+{
+ int mmap_prot, mmap_flags;
+ char *buf, **mmaps = rd->mmaps;
+ u64 page_offset;
+
+ mmap_prot = PROT_READ;
+ mmap_flags = MAP_SHARED;
+
+ if (rd->in_place_update) {
+ mmap_prot |= PROT_WRITE;
+ } else if (session->header.needs_swap) {
+ mmap_prot |= PROT_WRITE;
+ mmap_flags = MAP_PRIVATE;
+ }
+
+ if (mmaps[rd->mmap_idx]) {
+ munmap(mmaps[rd->mmap_idx], rd->mmap_size);
+ mmaps[rd->mmap_idx] = NULL;
+ }
+
+ page_offset = page_size * (rd->head / page_size);
+ rd->file_offset += page_offset;
+ rd->head -= page_offset;
+
+ buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd,
+ rd->file_offset);
+ if (buf == MAP_FAILED) {
+ pr_err("failed to mmap file\n");
+ return -errno;
+ }
+ mmaps[rd->mmap_idx] = rd->mmap_cur = buf;
+ rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1);
+ rd->file_pos = rd->file_offset + rd->head;
+ if (session->one_mmap) {
+ session->one_mmap_addr = buf;
+ session->one_mmap_offset = rd->file_offset;
+ }
+
+ return 0;
+}
+
+enum {
+ READER_OK,
+ READER_NODATA,
+};
+
+static int
+reader__read_event(struct reader *rd, struct perf_session *session,
+ struct ui_progress *prog)
+{
+ u64 size;
+ int err = READER_OK;
+ union perf_event *event;
+ s64 skip;
+
+ event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur,
+ session->header.needs_swap);
+ if (IS_ERR(event))
+ return PTR_ERR(event);
+
+ if (!event)
+ return READER_NODATA;
+
+ size = event->header.size;
+
+ skip = -EINVAL;
+
+ if (size < sizeof(struct perf_event_header) ||
+ (skip = rd->process(session, event, rd->file_pos, rd->path)) < 0) {
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
+ rd->file_offset + rd->head, event->header.size,
+ event->header.type, strerror(-skip));
+ err = skip;
+ goto out;
+ }
+
+ if (skip)
+ size += skip;
+
+ rd->size += size;
+ rd->head += size;
+ rd->file_pos += size;
+
+ err = __perf_session__process_decomp_events(session);
+ if (err)
+ goto out;
+
+ ui_progress__update(prog, size);
+
+out:
+ return err;
+}
+
+static inline bool
+reader__eof(struct reader *rd)
+{
+ return (rd->file_pos >= rd->data_size + rd->data_offset);
+}
+
+static int
+reader__process_events(struct reader *rd, struct perf_session *session,
+ struct ui_progress *prog)
+{
+ int err;
+
+ err = reader__init(rd, &session->one_mmap);
+ if (err)
+ goto out;
+
+ session->active_decomp = &rd->decomp_data;
+
+remap:
+ err = reader__mmap(rd, session);
+ if (err)
+ goto out;
+
+more:
+ err = reader__read_event(rd, session, prog);
+ if (err < 0)
+ goto out;
+ else if (err == READER_NODATA)
+ goto remap;
+
+ if (session_done())
+ goto out;
+
+ if (!reader__eof(rd))
+ goto more;
+
+out:
+ session->active_decomp = &session->decomp_data;
+ return err;
+}
+
+static s64 process_simple(struct perf_session *session,
+ union perf_event *event,
+ u64 file_offset,
+ const char *file_path)
+{
+ return perf_session__process_event(session, event, file_offset, file_path);
+}
+
+static int __perf_session__process_events(struct perf_session *session)
+{
+ struct reader rd = {
+ .fd = perf_data__fd(session->data),
+ .path = session->data->file.path,
+ .data_size = session->header.data_size,
+ .data_offset = session->header.data_offset,
+ .process = process_simple,
+ .in_place_update = session->data->in_place_update,
+ };
+ struct ordered_events *oe = &session->ordered_events;
+ struct perf_tool *tool = session->tool;
+ struct ui_progress prog;
+ int err;
+
+ perf_tool__fill_defaults(tool);
+
+ if (rd.data_size == 0)
+ return -1;
+
+ ui_progress__init_size(&prog, rd.data_size, "Processing events...");
+
+ err = reader__process_events(&rd, session, &prog);
+ if (err)
+ goto out_err;
+ /* do the final flush for ordered samples */
+ err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+ if (err)
+ goto out_err;
+ err = auxtrace__flush_events(session, tool);
+ if (err)
+ goto out_err;
+ err = perf_session__flush_thread_stacks(session);
+out_err:
+ ui_progress__finish();
+ if (!tool->no_warn)
+ perf_session__warn_about_errors(session);
+ /*
+ * We may switching perf.data output, make ordered_events
+ * reusable.
+ */
+ ordered_events__reinit(&session->ordered_events);
+ auxtrace__free_events(session);
+ reader__release_decomp(&rd);
+ session->one_mmap = false;
+ return err;
+}
+
+/*
+ * Processing 2 MB of data from each reader in sequence,
+ * because that's the way the ordered events sorting works
+ * most efficiently.
+ */
+#define READER_MAX_SIZE (2 * 1024 * 1024)
+
+/*
+ * This function reads, merge and process directory data.
+ * It assumens the version 1 of directory data, where each
+ * data file holds per-cpu data, already sorted by kernel.
+ */
+static int __perf_session__process_dir_events(struct perf_session *session)
+{
+ struct perf_data *data = session->data;
+ struct perf_tool *tool = session->tool;
+ int i, ret, readers, nr_readers;
+ struct ui_progress prog;
+ u64 total_size = perf_data__size(session->data);
+ struct reader *rd;
+
+ perf_tool__fill_defaults(tool);
+
+ ui_progress__init_size(&prog, total_size, "Sorting events...");
+
+ nr_readers = 1;
+ for (i = 0; i < data->dir.nr; i++) {
+ if (data->dir.files[i].size)
+ nr_readers++;
+ }
+
+ rd = zalloc(nr_readers * sizeof(struct reader));
+ if (!rd)
+ return -ENOMEM;
+
+ rd[0] = (struct reader) {
+ .fd = perf_data__fd(session->data),
+ .path = session->data->file.path,
+ .data_size = session->header.data_size,
+ .data_offset = session->header.data_offset,
+ .process = process_simple,
+ .in_place_update = session->data->in_place_update,
+ };
+ ret = reader__init(&rd[0], NULL);
+ if (ret)
+ goto out_err;
+ ret = reader__mmap(&rd[0], session);
+ if (ret)
+ goto out_err;
+ readers = 1;
+
+ for (i = 0; i < data->dir.nr; i++) {
+ if (!data->dir.files[i].size)
+ continue;
+ rd[readers] = (struct reader) {
+ .fd = data->dir.files[i].fd,
+ .path = data->dir.files[i].path,
+ .data_size = data->dir.files[i].size,
+ .data_offset = 0,
+ .process = process_simple,
+ .in_place_update = session->data->in_place_update,
+ };
+ ret = reader__init(&rd[readers], NULL);
+ if (ret)
+ goto out_err;
+ ret = reader__mmap(&rd[readers], session);
+ if (ret)
+ goto out_err;
+ readers++;
+ }
+
+ i = 0;
+ while (readers) {
+ if (session_done())
+ break;
+
+ if (rd[i].done) {
+ i = (i + 1) % nr_readers;
+ continue;
+ }
+ if (reader__eof(&rd[i])) {
+ rd[i].done = true;
+ readers--;
+ continue;
+ }
+
+ session->active_decomp = &rd[i].decomp_data;
+ ret = reader__read_event(&rd[i], session, &prog);
+ if (ret < 0) {
+ goto out_err;
+ } else if (ret == READER_NODATA) {
+ ret = reader__mmap(&rd[i], session);
+ if (ret)
+ goto out_err;
+ }
+
+ if (rd[i].size >= READER_MAX_SIZE) {
+ rd[i].size = 0;
+ i = (i + 1) % nr_readers;
+ }
+ }
+
+ ret = ordered_events__flush(&session->ordered_events, OE_FLUSH__FINAL);
+ if (ret)
+ goto out_err;
+
+ ret = perf_session__flush_thread_stacks(session);
+out_err:
+ ui_progress__finish();
+
+ if (!tool->no_warn)
+ perf_session__warn_about_errors(session);
+
+ /*
+ * We may switching perf.data output, make ordered_events
+ * reusable.
+ */
+ ordered_events__reinit(&session->ordered_events);
+
+ session->one_mmap = false;
+
+ session->active_decomp = &session->decomp_data;
+ for (i = 0; i < nr_readers; i++)
+ reader__release_decomp(&rd[i]);
+ zfree(&rd);
+
+ return ret;
+}
+
+int perf_session__process_events(struct perf_session *session)
+{
+ if (perf_session__register_idle_thread(session) < 0)
+ return -ENOMEM;
+
+ if (perf_data__is_pipe(session->data))
+ return __perf_session__process_pipe_events(session);
+
+ if (perf_data__is_dir(session->data) && session->data->dir.nr)
+ return __perf_session__process_dir_events(session);
+
+ return __perf_session__process_events(session);
+}
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT)
+ return true;
+ }
+
+ pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+ return false;
+}
+
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
+{
+ char *bracket;
+ struct ref_reloc_sym *ref;
+ struct kmap *kmap;
+
+ ref = zalloc(sizeof(struct ref_reloc_sym));
+ if (ref == NULL)
+ return -ENOMEM;
+
+ ref->name = strdup(symbol_name);
+ if (ref->name == NULL) {
+ free(ref);
+ return -ENOMEM;
+ }
+
+ bracket = strchr(ref->name, ']');
+ if (bracket)
+ *bracket = '\0';
+
+ ref->addr = addr;
+
+ kmap = map__kmap(map);
+ if (kmap)
+ kmap->ref_reloc_sym = ref;
+
+ return 0;
+}
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
+{
+ return machines__fprintf_dsos(&session->machines, fp);
+}
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
+}
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp,
+ bool skip_empty)
+{
+ size_t ret;
+ const char *msg = "";
+
+ if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+ msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
+
+ ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
+
+ ret += events_stats__fprintf(&session->evlist->stats, fp, skip_empty);
+ return ret;
+}
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
+{
+ /*
+ * FIXME: Here we have to actually print all the machines in this
+ * session, not just the host...
+ */
+ return machine__fprintf(&session->machines.host, fp);
+}
+
+struct evsel *perf_session__find_first_evtype(struct perf_session *session,
+ unsigned int type)
+{
+ struct evsel *pos;
+
+ evlist__for_each_entry(session->evlist, pos) {
+ if (pos->core.attr.type == type)
+ return pos;
+ }
+ return NULL;
+}
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+ const char *cpu_list, unsigned long *cpu_bitmap)
+{
+ int i, err = -1;
+ struct perf_cpu_map *map;
+ int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
+
+ for (i = 0; i < PERF_TYPE_MAX; ++i) {
+ struct evsel *evsel;
+
+ evsel = perf_session__find_first_evtype(session, i);
+ if (!evsel)
+ continue;
+
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CPU)) {
+ pr_err("File does not contain CPU events. "
+ "Remove -C option to proceed.\n");
+ return -1;
+ }
+ }
+
+ map = perf_cpu_map__new(cpu_list);
+ if (map == NULL) {
+ pr_err("Invalid cpu_list\n");
+ return -1;
+ }
+
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+
+ if (cpu.cpu >= nr_cpus) {
+ pr_err("Requested CPU %d too large. "
+ "Consider raising MAX_NR_CPUS\n", cpu.cpu);
+ goto out_delete_map;
+ }
+
+ __set_bit(cpu.cpu, cpu_bitmap);
+ }
+
+ err = 0;
+
+out_delete_map:
+ perf_cpu_map__put(map);
+ return err;
+}
+
+void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
+ bool full)
+{
+ if (session == NULL || fp == NULL)
+ return;
+
+ fprintf(fp, "# ========\n");
+ perf_header__fprintf_info(session, fp, full);
+ fprintf(fp, "# ========\n#\n");
+}
+
+static int perf_session__register_guest(struct perf_session *session, pid_t machine_pid)
+{
+ struct machine *machine = machines__findnew(&session->machines, machine_pid);
+ struct thread *thread;
+
+ if (!machine)
+ return -ENOMEM;
+
+ machine->single_address_space = session->machines.host.single_address_space;
+
+ thread = machine__idle_thread(machine);
+ if (!thread)
+ return -ENOMEM;
+ thread__put(thread);
+
+ machine->kallsyms_filename = perf_data__guest_kallsyms_name(session->data, machine_pid);
+
+ return 0;
+}
+
+static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid,
+ pid_t tid, int guest_cpu)
+{
+ struct machine *machine = &session->machines.host;
+ struct thread *thread = machine__findnew_thread(machine, pid, tid);
+
+ if (!thread)
+ return -ENOMEM;
+ thread__set_guest_cpu(thread, guest_cpu);
+ thread__put(thread);
+
+ return 0;
+}
+
+int perf_event__process_id_index(struct perf_session *session,
+ union perf_event *event)
+{
+ struct evlist *evlist = session->evlist;
+ struct perf_record_id_index *ie = &event->id_index;
+ size_t sz = ie->header.size - sizeof(*ie);
+ size_t i, nr, max_nr;
+ size_t e1_sz = sizeof(struct id_index_entry);
+ size_t e2_sz = sizeof(struct id_index_entry_2);
+ size_t etot_sz = e1_sz + e2_sz;
+ struct id_index_entry_2 *e2;
+ pid_t last_pid = 0;
+
+ max_nr = sz / e1_sz;
+ nr = ie->nr;
+ if (nr > max_nr) {
+ printf("Too big: nr %zu max_nr %zu\n", nr, max_nr);
+ return -EINVAL;
+ }
+
+ if (sz >= nr * etot_sz) {
+ max_nr = sz / etot_sz;
+ if (nr > max_nr) {
+ printf("Too big2: nr %zu max_nr %zu\n", nr, max_nr);
+ return -EINVAL;
+ }
+ e2 = (void *)ie + sizeof(*ie) + nr * e1_sz;
+ } else {
+ e2 = NULL;
+ }
+
+ if (dump_trace)
+ fprintf(stdout, " nr: %zu\n", nr);
+
+ for (i = 0; i < nr; i++, (e2 ? e2++ : 0)) {
+ struct id_index_entry *e = &ie->entries[i];
+ struct perf_sample_id *sid;
+ int ret;
+
+ if (dump_trace) {
+ fprintf(stdout, " ... id: %"PRI_lu64, e->id);
+ fprintf(stdout, " idx: %"PRI_lu64, e->idx);
+ fprintf(stdout, " cpu: %"PRI_ld64, e->cpu);
+ fprintf(stdout, " tid: %"PRI_ld64, e->tid);
+ if (e2) {
+ fprintf(stdout, " machine_pid: %"PRI_ld64, e2->machine_pid);
+ fprintf(stdout, " vcpu: %"PRI_lu64"\n", e2->vcpu);
+ } else {
+ fprintf(stdout, "\n");
+ }
+ }
+
+ sid = evlist__id2sid(evlist, e->id);
+ if (!sid)
+ return -ENOENT;
+
+ sid->idx = e->idx;
+ sid->cpu.cpu = e->cpu;
+ sid->tid = e->tid;
+
+ if (!e2)
+ continue;
+
+ sid->machine_pid = e2->machine_pid;
+ sid->vcpu.cpu = e2->vcpu;
+
+ if (!sid->machine_pid)
+ continue;
+
+ if (sid->machine_pid != last_pid) {
+ ret = perf_session__register_guest(session, sid->machine_pid);
+ if (ret)
+ return ret;
+ last_pid = sid->machine_pid;
+ perf_guest = true;
+ }
+
+ ret = perf_session__set_guest_cpu(session, sid->machine_pid, e->tid, e2->vcpu);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
new file mode 100644
index 000000000..ee3715e85
--- /dev/null
+++ b/tools/perf/util/session.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SESSION_H
+#define __PERF_SESSION_H
+
+#include "trace-event.h"
+#include "event.h"
+#include "header.h"
+#include "machine.h"
+#include "data.h"
+#include "ordered-events.h"
+#include "util/compress.h"
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/perf_event.h>
+
+struct ip_callchain;
+struct symbol;
+struct thread;
+
+struct auxtrace;
+struct itrace_synth_opts;
+
+struct decomp_data {
+ struct decomp *decomp;
+ struct decomp *decomp_last;
+ struct zstd_data *zstd_decomp;
+};
+
+struct perf_session {
+ struct perf_header header;
+ struct machines machines;
+ struct evlist *evlist;
+ struct auxtrace *auxtrace;
+ struct itrace_synth_opts *itrace_synth_opts;
+ struct list_head auxtrace_index;
+#ifdef HAVE_LIBTRACEEVENT
+ struct trace_event tevent;
+#endif
+ struct perf_record_time_conv time_conv;
+ bool repipe;
+ bool one_mmap;
+ void *one_mmap_addr;
+ u64 one_mmap_offset;
+ struct ordered_events ordered_events;
+ struct perf_data *data;
+ struct perf_tool *tool;
+ u64 bytes_transferred;
+ u64 bytes_compressed;
+ struct zstd_data zstd_data;
+ struct decomp_data decomp_data;
+ struct decomp_data *active_decomp;
+};
+
+struct decomp {
+ struct decomp *next;
+ u64 file_pos;
+ const char *file_path;
+ size_t mmap_len;
+ u64 head;
+ size_t size;
+ char data[];
+};
+
+struct perf_tool;
+
+struct perf_session *__perf_session__new(struct perf_data *data,
+ bool repipe, int repipe_fd,
+ struct perf_tool *tool);
+
+static inline struct perf_session *perf_session__new(struct perf_data *data,
+ struct perf_tool *tool)
+{
+ return __perf_session__new(data, false, -1, tool);
+}
+
+void perf_session__delete(struct perf_session *session);
+
+void perf_event_header__bswap(struct perf_event_header *hdr);
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+ void *buf, size_t buf_sz,
+ union perf_event **event_ptr,
+ struct perf_sample *sample);
+typedef int (*peek_events_cb_t)(struct perf_session *session,
+ union perf_event *event, u64 offset,
+ void *data);
+int perf_session__peek_events(struct perf_session *session, u64 offset,
+ u64 size, peek_events_cb_t cb, void *data);
+
+int perf_session__process_events(struct perf_session *session);
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+ u64 timestamp, u64 file_offset, const char *file_path);
+
+void perf_tool__fill_defaults(struct perf_tool *tool);
+
+int perf_session__resolve_callchain(struct perf_session *session,
+ struct evsel *evsel,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent);
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg);
+
+void perf_event__attr_swap(struct perf_event_attr *attr);
+
+int perf_session__create_kernel_maps(struct perf_session *session);
+
+void perf_session__set_id_hdr_size(struct perf_session *session);
+
+static inline
+struct machine *perf_session__find_machine(struct perf_session *session, pid_t pid)
+{
+ return machines__find(&session->machines, pid);
+}
+
+static inline
+struct machine *perf_session__findnew_machine(struct perf_session *session, pid_t pid)
+{
+ return machines__findnew(&session->machines, pid);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
+int perf_session__register_idle_thread(struct perf_session *session);
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+ bool (fn)(struct dso *dso, int parm), int parm);
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp,
+ bool skip_empty);
+
+struct evsel *perf_session__find_first_evtype(struct perf_session *session,
+ unsigned int type);
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+ const char *cpu_list, unsigned long *cpu_bitmap);
+
+void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+struct evsel_str_handler;
+
+#define perf_session__set_tracepoints_handlers(session, array) \
+ __evlist__set_tracepoints_handlers(session->evlist, array, ARRAY_SIZE(array))
+
+extern volatile int session_done;
+
+#define session_done() READ_ONCE(session_done)
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample);
+
+int perf_event__process_id_index(struct perf_session *session,
+ union perf_event *event);
+
+int perf_event__process_finished_round(struct perf_tool *tool,
+ union perf_event *event,
+ struct ordered_events *oe);
+
+#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/setns.c b/tools/perf/util/setns.c
new file mode 100644
index 000000000..48f9c0af6
--- /dev/null
+++ b/tools/perf/util/setns.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+#include "namespaces.h"
+#include <unistd.h>
+#include <sys/syscall.h>
+
+int setns(int fd, int nstype)
+{
+ return syscall(__NR_setns, fd, nstype);
+}
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
new file mode 100644
index 000000000..79d5e2955
--- /dev/null
+++ b/tools/perf/util/setup.py
@@ -0,0 +1,114 @@
+from os import getenv, path
+from subprocess import Popen, PIPE
+from re import sub
+
+cc = getenv("CC")
+
+# Check if CC has options, as is the case in yocto, where it uses CC="cc --sysroot..."
+cc_tokens = cc.split()
+if len(cc_tokens) > 1:
+ cc = cc_tokens[0]
+ cc_options = " ".join([str(e) for e in cc_tokens[1:]]) + " "
+else:
+ cc_options = ""
+
+cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
+src_feature_tests = getenv('srctree') + '/tools/build/feature'
+
+def clang_has_option(option):
+ cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+ return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
+
+if cc_is_clang:
+ from sysconfig import get_config_vars
+ vars = get_config_vars()
+ for var in ('CFLAGS', 'OPT'):
+ vars[var] = sub("-specs=[^ ]+", "", vars[var])
+ if not clang_has_option("-mcet"):
+ vars[var] = sub("-mcet", "", vars[var])
+ if not clang_has_option("-fcf-protection"):
+ vars[var] = sub("-fcf-protection", "", vars[var])
+ if not clang_has_option("-fstack-clash-protection"):
+ vars[var] = sub("-fstack-clash-protection", "", vars[var])
+ if not clang_has_option("-fstack-protector-strong"):
+ vars[var] = sub("-fstack-protector-strong", "", vars[var])
+ if not clang_has_option("-fno-semantic-interposition"):
+ vars[var] = sub("-fno-semantic-interposition", "", vars[var])
+ if not clang_has_option("-ffat-lto-objects"):
+ vars[var] = sub("-ffat-lto-objects", "", vars[var])
+ if not clang_has_option("-ftree-loop-distribute-patterns"):
+ vars[var] = sub("-ftree-loop-distribute-patterns", "", vars[var])
+ if not clang_has_option("-gno-variable-location-views"):
+ vars[var] = sub("-gno-variable-location-views", "", vars[var])
+
+from setuptools import setup, Extension
+
+from setuptools.command.build_ext import build_ext as _build_ext
+from setuptools.command.install_lib import install_lib as _install_lib
+
+class build_ext(_build_ext):
+ def finalize_options(self):
+ _build_ext.finalize_options(self)
+ self.build_lib = build_lib
+ self.build_temp = build_tmp
+
+class install_lib(_install_lib):
+ def finalize_options(self):
+ _install_lib.finalize_options(self)
+ self.build_dir = build_lib
+
+
+cflags = getenv('CFLAGS', '').split()
+# switch off several checks (need to be at the end of cflags list)
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls', '-DPYTHON_PERF' ]
+if cc_is_clang:
+ cflags += ["-Wno-unused-command-line-argument" ]
+else:
+ cflags += ['-Wno-cast-function-type' ]
+
+# The python headers have mixed code with declarations (decls after asserts, for instance)
+cflags += [ "-Wno-declaration-after-statement" ]
+
+src_perf = getenv('srctree') + '/tools/perf'
+build_lib = getenv('PYTHON_EXTBUILD_LIB')
+build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+libtraceevent = getenv('LIBTRACEEVENT')
+libapikfs = getenv('LIBAPI')
+libperf = getenv('LIBPERF')
+
+ext_sources = [f.strip() for f in open('util/python-ext-sources')
+ if len(f.strip()) > 0 and f[0] != '#']
+
+extra_libraries = []
+
+if '-DHAVE_LIBTRACEEVENT' in cflags:
+ extra_libraries += [ 'traceevent' ]
+else:
+ ext_sources.remove('util/trace-event.c')
+
+# use full paths with source files
+ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
+
+if '-DHAVE_LIBNUMA_SUPPORT' in cflags:
+ extra_libraries += [ 'numa' ]
+if '-DHAVE_LIBCAP_SUPPORT' in cflags:
+ extra_libraries += [ 'cap' ]
+
+perf = Extension('perf',
+ sources = ext_sources,
+ include_dirs = ['util/include'],
+ libraries = extra_libraries,
+ extra_compile_args = cflags,
+ extra_objects = [ x for x in [libtraceevent, libapikfs, libperf]
+ if x is not None],
+ )
+
+setup(name='perf',
+ version='0.1',
+ description='Interface with the Linux profiling infrastructure',
+ author='Arnaldo Carvalho de Melo',
+ author_email='acme@redhat.com',
+ license='GPLv2',
+ url='http://perf.wiki.kernel.org',
+ ext_modules=[perf],
+ cmdclass={'build_ext': build_ext, 'install_lib': install_lib})
diff --git a/tools/perf/util/sharded_mutex.c b/tools/perf/util/sharded_mutex.c
new file mode 100644
index 000000000..e11e8d094
--- /dev/null
+++ b/tools/perf/util/sharded_mutex.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "sharded_mutex.h"
+
+#include <stdlib.h>
+
+struct sharded_mutex *sharded_mutex__new(size_t num_shards)
+{
+ struct sharded_mutex *result;
+ size_t size;
+ unsigned int bits;
+
+ for (bits = 0; ((size_t)1 << bits) < num_shards; bits++)
+ ;
+
+ size = sizeof(*result) + sizeof(struct mutex) * (1 << bits);
+ result = malloc(size);
+ if (!result)
+ return NULL;
+
+ result->cap_bits = bits;
+ for (size_t i = 0; i < ((size_t)1 << bits); i++)
+ mutex_init(&result->mutexes[i]);
+
+ return result;
+}
+
+void sharded_mutex__delete(struct sharded_mutex *sm)
+{
+ for (size_t i = 0; i < ((size_t)1 << sm->cap_bits); i++)
+ mutex_destroy(&sm->mutexes[i]);
+
+ free(sm);
+}
diff --git a/tools/perf/util/sharded_mutex.h b/tools/perf/util/sharded_mutex.h
new file mode 100644
index 000000000..7325e969e
--- /dev/null
+++ b/tools/perf/util/sharded_mutex.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_SHARDED_MUTEX_H
+#define PERF_SHARDED_MUTEX_H
+
+#include "mutex.h"
+#include "hashmap.h"
+
+/*
+ * In a situation where a lock is needed per object, having a mutex can be
+ * relatively memory expensive (40 bytes on x86-64). If the object can be
+ * constantly hashed, a sharded mutex is an alternative global pool of mutexes
+ * where the mutex is looked up from a hash value. This can lead to collisions
+ * if the number of shards isn't large enough.
+ */
+struct sharded_mutex {
+ /* mutexes array is 1<<cap_bits in size. */
+ unsigned int cap_bits;
+ struct mutex mutexes[];
+};
+
+struct sharded_mutex *sharded_mutex__new(size_t num_shards);
+void sharded_mutex__delete(struct sharded_mutex *sm);
+
+static inline struct mutex *sharded_mutex__get_mutex(struct sharded_mutex *sm, size_t hash)
+{
+ return &sm->mutexes[hash_bits(hash, sm->cap_bits)];
+}
+
+#endif /* PERF_SHARDED_MUTEX_H */
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
new file mode 100644
index 000000000..388846f17
--- /dev/null
+++ b/tools/perf/util/sideband_evlist.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/mmap.h"
+#include "util/perf_api_probe.h"
+#include <perf/mmap.h>
+#include <linux/perf_event.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdbool.h>
+
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+ evsel__sb_cb_t cb, void *data)
+{
+ struct evsel *evsel;
+
+ if (!attr->sample_id_all) {
+ pr_warning("enabling sample_id_all for all side band events\n");
+ attr->sample_id_all = 1;
+ }
+
+ evsel = evsel__new_idx(attr, evlist->core.nr_entries);
+ if (!evsel)
+ return -1;
+
+ evsel->side_band.cb = cb;
+ evsel->side_band.data = data;
+ evlist__add(evlist, evsel);
+ return 0;
+}
+
+static void *perf_evlist__poll_thread(void *arg)
+{
+ struct evlist *evlist = arg;
+ bool draining = false;
+ int i, done = 0;
+ /*
+ * In order to read symbols from other namespaces perf to needs to call
+ * setns(2). This isn't permitted if the struct_fs has multiple users.
+ * unshare(2) the fs so that we may continue to setns into namespaces
+ * that we're observing when, for instance, reading the build-ids at
+ * the end of a 'perf record' session.
+ */
+ unshare(CLONE_FS);
+
+ while (!done) {
+ bool got_data = false;
+
+ if (evlist->thread.done)
+ draining = true;
+
+ if (!draining)
+ evlist__poll(evlist, 1000);
+
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ struct mmap *map = &evlist->mmap[i];
+ union perf_event *event;
+
+ if (perf_mmap__read_init(&map->core))
+ continue;
+ while ((event = perf_mmap__read_event(&map->core)) != NULL) {
+ struct evsel *evsel = evlist__event2evsel(evlist, event);
+
+ if (evsel && evsel->side_band.cb)
+ evsel->side_band.cb(event, evsel->side_band.data);
+ else
+ pr_warning("cannot locate proper evsel for the side band event\n");
+
+ perf_mmap__consume(&map->core);
+ got_data = true;
+ }
+ perf_mmap__read_done(&map->core);
+ }
+
+ if (draining && !got_data)
+ break;
+ }
+ return NULL;
+}
+
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ evsel->core.attr.sample_id_all = 1;
+ evsel->core.attr.watermark = 1;
+ evsel->core.attr.wakeup_watermark = 1;
+ evsel->side_band.cb = cb;
+ evsel->side_band.data = data;
+ }
+}
+
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target)
+{
+ struct evsel *counter;
+
+ if (!evlist)
+ return 0;
+
+ if (evlist__create_maps(evlist, target))
+ goto out_delete_evlist;
+
+ if (evlist->core.nr_entries > 1) {
+ bool can_sample_identifier = perf_can_sample_identifier();
+
+ evlist__for_each_entry(evlist, counter)
+ evsel__set_sample_id(counter, can_sample_identifier);
+
+ evlist__set_id_pos(evlist);
+ }
+
+ evlist__for_each_entry(evlist, counter) {
+ if (evsel__open(counter, evlist->core.user_requested_cpus,
+ evlist->core.threads) < 0)
+ goto out_delete_evlist;
+ }
+
+ if (evlist__mmap(evlist, UINT_MAX))
+ goto out_delete_evlist;
+
+ evlist__for_each_entry(evlist, counter) {
+ if (evsel__enable(counter))
+ goto out_delete_evlist;
+ }
+
+ evlist->thread.done = 0;
+ if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
+ goto out_delete_evlist;
+
+ return 0;
+
+out_delete_evlist:
+ evlist__delete(evlist);
+ evlist = NULL;
+ return -1;
+}
+
+void evlist__stop_sb_thread(struct evlist *evlist)
+{
+ if (!evlist)
+ return;
+ evlist->thread.done = 1;
+ pthread_join(evlist->thread.th, NULL);
+ evlist__delete(evlist);
+}
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
new file mode 100644
index 000000000..650e804d0
--- /dev/null
+++ b/tools/perf/util/smt.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <string.h>
+#include "api/fs/fs.h"
+#include "cputopo.h"
+#include "smt.h"
+
+bool smt_on(void)
+{
+ static bool cached;
+ static bool cached_result;
+ int fs_value;
+
+ if (cached)
+ return cached_result;
+
+ if (sysfs__read_int("devices/system/cpu/smt/active", &fs_value) >= 0)
+ cached_result = (fs_value == 1);
+ else
+ cached_result = cpu_topology__smt_on(online_topology());
+
+ cached = true;
+ return cached_result;
+}
+
+bool core_wide(bool system_wide, const char *user_requested_cpu_list)
+{
+ /* If not everything running on a core is being recorded then we can't use core_wide. */
+ if (!system_wide)
+ return false;
+
+ /* Cheap case that SMT is disabled and therefore we're inherently core_wide. */
+ if (!smt_on())
+ return true;
+
+ return cpu_topology__core_wide(online_topology(), user_requested_cpu_list);
+}
diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h
new file mode 100644
index 000000000..01441fd2c
--- /dev/null
+++ b/tools/perf/util/smt.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SMT_H
+#define __SMT_H 1
+
+/*
+ * Returns true if SMT (aka hyperthreading) is enabled. Determined via sysfs or
+ * the online topology.
+ */
+bool smt_on(void);
+
+/*
+ * Returns true when system wide and all SMT threads for a core are in the
+ * user_requested_cpus map.
+ */
+bool core_wide(bool system_wide, const char *user_requested_cpu_list);
+
+#endif /* __SMT_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
new file mode 100644
index 000000000..6aa1c7f2b
--- /dev/null
+++ b/tools/perf/util/sort.c
@@ -0,0 +1,3772 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <linux/mman.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "dso.h"
+#include "sort.h"
+#include "hist.h"
+#include "cacheline.h"
+#include "comm.h"
+#include "map.h"
+#include "maps.h"
+#include "symbol.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "thread.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "srcline.h"
+#include "strlist.h"
+#include "strbuf.h"
+#include "mem-events.h"
+#include "annotate.h"
+#include "event.h"
+#include "time-utils.h"
+#include "cgroup.h"
+#include "machine.h"
+#include "trace-event.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+regex_t parent_regex;
+const char default_parent_pattern[] = "^sys_|^do_page_fault";
+const char *parent_pattern = default_parent_pattern;
+const char *default_sort_order = "comm,dso,symbol";
+const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
+const char default_top_sort_order[] = "dso,symbol";
+const char default_diff_sort_order[] = "dso,symbol";
+const char default_tracepoint_sort_order[] = "trace";
+const char *sort_order;
+const char *field_order;
+regex_t ignore_callees_regex;
+int have_ignore_callees = 0;
+enum sort_mode sort__mode = SORT_MODE__NORMAL;
+static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
+static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
+
+/*
+ * Some architectures have Adjacent Cacheline Prefetch feature, which
+ * behaves like the cacheline size is doubled. Enable this flag to
+ * check things in double cacheline granularity.
+ */
+bool chk_double_cl;
+
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurrences of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
+static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+ int n;
+ va_list ap;
+
+ va_start(ap, fmt);
+ n = vsnprintf(bf, size, fmt, ap);
+ if (symbol_conf.field_sep && n > 0) {
+ char *sep = bf;
+
+ while (1) {
+ sep = strchr(sep, *symbol_conf.field_sep);
+ if (sep == NULL)
+ break;
+ *sep = '.';
+ }
+ }
+ va_end(ap);
+
+ if (n >= (int)size)
+ return size - 1;
+ return n;
+}
+
+static int64_t cmp_null(const void *l, const void *r)
+{
+ if (!l && !r)
+ return 0;
+ else if (!l)
+ return -1;
+ else
+ return 1;
+}
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return thread__tid(right->thread) - thread__tid(left->thread);
+}
+
+static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ const char *comm = thread__comm_str(he->thread);
+
+ width = max(7U, width) - 8;
+ return repsep_snprintf(bf, size, "%7d:%-*.*s", thread__tid(he->thread),
+ width, width, comm ?: "");
+}
+
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const struct thread *th = arg;
+
+ if (type != HIST_FILTER__THREAD)
+ return -1;
+
+ return th && RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(th);
+}
+
+struct sort_entry sort_thread = {
+ .se_header = " Pid:Command",
+ .se_cmp = sort__thread_cmp,
+ .se_snprintf = hist_entry__thread_snprintf,
+ .se_filter = hist_entry__thread_filter,
+ .se_width_idx = HISTC_THREAD,
+};
+
+/* --sort simd */
+
+static int64_t
+sort__simd_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (left->simd_flags.arch != right->simd_flags.arch)
+ return (int64_t) left->simd_flags.arch - right->simd_flags.arch;
+
+ return (int64_t) left->simd_flags.pred - right->simd_flags.pred;
+}
+
+static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
+{
+ u64 arch = simd_flags->arch;
+
+ if (arch & SIMD_OP_FLAGS_ARCH_SVE)
+ return "SVE";
+ else
+ return "n/a";
+}
+
+static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width __maybe_unused)
+{
+ const char *name;
+
+ if (!he->simd_flags.arch)
+ return repsep_snprintf(bf, size, "");
+
+ name = hist_entry__get_simd_name(&he->simd_flags);
+
+ if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_EMPTY)
+ return repsep_snprintf(bf, size, "[e] %s", name);
+ else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_PARTIAL)
+ return repsep_snprintf(bf, size, "[p] %s", name);
+
+ return repsep_snprintf(bf, size, "[.] %s", name);
+}
+
+struct sort_entry sort_simd = {
+ .se_header = "Simd ",
+ .se_cmp = sort__simd_cmp,
+ .se_snprintf = hist_entry__simd_snprintf,
+ .se_width_idx = HISTC_SIMD,
+};
+
+/* --sort comm */
+
+/*
+ * We can't use pointer comparison in functions below,
+ * because it gives different results based on pointer
+ * values, which could break some sorting assumptions.
+ */
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, comm__str(he->comm));
+}
+
+struct sort_entry sort_comm = {
+ .se_header = "Command",
+ .se_cmp = sort__comm_cmp,
+ .se_collapse = sort__comm_collapse,
+ .se_sort = sort__comm_sort,
+ .se_snprintf = hist_entry__comm_snprintf,
+ .se_filter = hist_entry__thread_filter,
+ .se_width_idx = HISTC_COMM,
+};
+
+/* --sort dso */
+
+static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
+{
+ struct dso *dso_l = map_l ? map__dso(map_l) : NULL;
+ struct dso *dso_r = map_r ? map__dso(map_r) : NULL;
+ const char *dso_name_l, *dso_name_r;
+
+ if (!dso_l || !dso_r)
+ return cmp_null(dso_r, dso_l);
+
+ if (verbose > 0) {
+ dso_name_l = dso_l->long_name;
+ dso_name_r = dso_r->long_name;
+ } else {
+ dso_name_l = dso_l->short_name;
+ dso_name_r = dso_r->short_name;
+ }
+
+ return strcmp(dso_name_l, dso_name_r);
+}
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__dso_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_snprintf(struct map *map, char *bf,
+ size_t size, unsigned int width)
+{
+ const struct dso *dso = map ? map__dso(map) : NULL;
+ const char *dso_name = "[unknown]";
+
+ if (dso)
+ dso_name = verbose > 0 ? dso->long_name : dso->short_name;
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
+}
+
+static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
+}
+
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const struct dso *dso = arg;
+
+ if (type != HIST_FILTER__DSO)
+ return -1;
+
+ return dso && (!he->ms.map || map__dso(he->ms.map) != dso);
+}
+
+struct sort_entry sort_dso = {
+ .se_header = "Shared Object",
+ .se_cmp = sort__dso_cmp,
+ .se_snprintf = hist_entry__dso_snprintf,
+ .se_filter = hist_entry__dso_filter,
+ .se_width_idx = HISTC_DSO,
+};
+
+/* --sort symbol */
+
+static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
+{
+ return (int64_t)(right_ip - left_ip);
+}
+
+int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+ if (!sym_l || !sym_r)
+ return cmp_null(sym_l, sym_r);
+
+ if (sym_l == sym_r)
+ return 0;
+
+ if (sym_l->inlined || sym_r->inlined) {
+ int ret = strcmp(sym_l->name, sym_r->name);
+
+ if (ret)
+ return ret;
+ if ((sym_l->start <= sym_r->end) && (sym_l->end >= sym_r->start))
+ return 0;
+ }
+
+ if (sym_l->start != sym_r->start)
+ return (int64_t)(sym_r->start - sym_l->start);
+
+ return (int64_t)(sym_r->end - sym_l->end);
+}
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ int64_t ret;
+
+ if (!left->ms.sym && !right->ms.sym)
+ return _sort__addr_cmp(left->ip, right->ip);
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
+ ret = sort__dso_cmp(left, right);
+ if (ret != 0)
+ return ret;
+ }
+
+ return _sort__sym_cmp(left->ms.sym, right->ms.sym);
+}
+
+static int64_t
+sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->ms.sym || !right->ms.sym)
+ return cmp_null(left->ms.sym, right->ms.sym);
+
+ return strcmp(right->ms.sym->name, left->ms.sym->name);
+}
+
+static int _hist_entry__sym_snprintf(struct map_symbol *ms,
+ u64 ip, char level, char *bf, size_t size,
+ unsigned int width)
+{
+ struct symbol *sym = ms->sym;
+ struct map *map = ms->map;
+ size_t ret = 0;
+
+ if (verbose > 0) {
+ struct dso *dso = map ? map__dso(map) : NULL;
+ char o = dso ? dso__symtab_origin(dso) : '!';
+ u64 rip = ip;
+
+ if (dso && dso->kernel && dso->adjust_symbols)
+ rip = map__unmap_ip(map, ip);
+
+ ret += repsep_snprintf(bf, size, "%-#*llx %c ",
+ BITS_PER_LONG / 4 + 2, rip, o);
+ }
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+ if (sym && map) {
+ if (sym->type == STT_OBJECT) {
+ ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+ ip - map__unmap_ip(map, sym->start));
+ } else {
+ ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+ width - ret,
+ sym->name);
+ if (sym->inlined)
+ ret += repsep_snprintf(bf + ret, size - ret,
+ " (inlined)");
+ }
+ } else {
+ size_t len = BITS_PER_LONG / 4;
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+ len, ip);
+ }
+
+ return ret;
+}
+
+int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width)
+{
+ return _hist_entry__sym_snprintf(&he->ms, he->ip,
+ he->level, bf, size, width);
+}
+
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const char *sym = arg;
+
+ if (type != HIST_FILTER__SYMBOL)
+ return -1;
+
+ return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
+struct sort_entry sort_sym = {
+ .se_header = "Symbol",
+ .se_cmp = sort__sym_cmp,
+ .se_sort = sort__sym_sort,
+ .se_snprintf = hist_entry__sym_snprintf,
+ .se_filter = hist_entry__sym_filter,
+ .se_width_idx = HISTC_SYMBOL,
+};
+
+/* --sort srcline */
+
+char *hist_entry__srcline(struct hist_entry *he)
+{
+ return map__srcline(he->ms.map, he->ip, he->ms.sym);
+}
+
+static int64_t
+sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ int64_t ret;
+
+ ret = _sort__addr_cmp(left->ip, right->ip);
+ if (ret)
+ return ret;
+
+ return sort__dso_cmp(left, right);
+}
+
+static int64_t
+sort__srcline_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->srcline)
+ left->srcline = hist_entry__srcline(left);
+ if (!right->srcline)
+ right->srcline = hist_entry__srcline(right);
+
+ return strcmp(right->srcline, left->srcline);
+}
+
+static int64_t
+sort__srcline_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__srcline_collapse(left, right);
+}
+
+static void
+sort__srcline_init(struct hist_entry *he)
+{
+ if (!he->srcline)
+ he->srcline = hist_entry__srcline(he);
+}
+
+static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
+}
+
+struct sort_entry sort_srcline = {
+ .se_header = "Source:Line",
+ .se_cmp = sort__srcline_cmp,
+ .se_collapse = sort__srcline_collapse,
+ .se_sort = sort__srcline_sort,
+ .se_init = sort__srcline_init,
+ .se_snprintf = hist_entry__srcline_snprintf,
+ .se_width_idx = HISTC_SRCLINE,
+};
+
+/* --sort srcline_from */
+
+static char *addr_map_symbol__srcline(struct addr_map_symbol *ams)
+{
+ return map__srcline(ams->ms.map, ams->al_addr, ams->ms.sym);
+}
+
+static int64_t
+sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->branch_info->from.addr - right->branch_info->from.addr;
+}
+
+static int64_t
+sort__srcline_from_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info->srcline_from)
+ left->branch_info->srcline_from = addr_map_symbol__srcline(&left->branch_info->from);
+
+ if (!right->branch_info->srcline_from)
+ right->branch_info->srcline_from = addr_map_symbol__srcline(&right->branch_info->from);
+
+ return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
+}
+
+static int64_t
+sort__srcline_from_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__srcline_from_collapse(left, right);
+}
+
+static void sort__srcline_from_init(struct hist_entry *he)
+{
+ if (!he->branch_info->srcline_from)
+ he->branch_info->srcline_from = addr_map_symbol__srcline(&he->branch_info->from);
+}
+
+static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_from);
+}
+
+struct sort_entry sort_srcline_from = {
+ .se_header = "From Source:Line",
+ .se_cmp = sort__srcline_from_cmp,
+ .se_collapse = sort__srcline_from_collapse,
+ .se_sort = sort__srcline_from_sort,
+ .se_init = sort__srcline_from_init,
+ .se_snprintf = hist_entry__srcline_from_snprintf,
+ .se_width_idx = HISTC_SRCLINE_FROM,
+};
+
+/* --sort srcline_to */
+
+static int64_t
+sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->branch_info->to.addr - right->branch_info->to.addr;
+}
+
+static int64_t
+sort__srcline_to_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info->srcline_to)
+ left->branch_info->srcline_to = addr_map_symbol__srcline(&left->branch_info->to);
+
+ if (!right->branch_info->srcline_to)
+ right->branch_info->srcline_to = addr_map_symbol__srcline(&right->branch_info->to);
+
+ return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
+}
+
+static int64_t
+sort__srcline_to_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__srcline_to_collapse(left, right);
+}
+
+static void sort__srcline_to_init(struct hist_entry *he)
+{
+ if (!he->branch_info->srcline_to)
+ he->branch_info->srcline_to = addr_map_symbol__srcline(&he->branch_info->to);
+}
+
+static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_to);
+}
+
+struct sort_entry sort_srcline_to = {
+ .se_header = "To Source:Line",
+ .se_cmp = sort__srcline_to_cmp,
+ .se_collapse = sort__srcline_to_collapse,
+ .se_sort = sort__srcline_to_sort,
+ .se_init = sort__srcline_to_init,
+ .se_snprintf = hist_entry__srcline_to_snprintf,
+ .se_width_idx = HISTC_SRCLINE_TO,
+};
+
+static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+
+ struct symbol *sym = he->ms.sym;
+ struct annotation *notes;
+ double ipc = 0.0, coverage = 0.0;
+ char tmp[64];
+
+ if (!sym)
+ return repsep_snprintf(bf, size, "%-*s", width, "-");
+
+ notes = symbol__annotation(sym);
+
+ if (notes->hit_cycles)
+ ipc = notes->hit_insn / ((double)notes->hit_cycles);
+
+ if (notes->total_insn) {
+ coverage = notes->cover_insn * 100.0 /
+ ((double)notes->total_insn);
+ }
+
+ snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage);
+ return repsep_snprintf(bf, size, "%-*s", width, tmp);
+}
+
+struct sort_entry sort_sym_ipc = {
+ .se_header = "IPC [IPC Coverage]",
+ .se_cmp = sort__sym_cmp,
+ .se_snprintf = hist_entry__sym_ipc_snprintf,
+ .se_width_idx = HISTC_SYMBOL_IPC,
+};
+
+static int hist_entry__sym_ipc_null_snprintf(struct hist_entry *he
+ __maybe_unused,
+ char *bf, size_t size,
+ unsigned int width)
+{
+ char tmp[64];
+
+ snprintf(tmp, sizeof(tmp), "%-5s %2s", "-", "-");
+ return repsep_snprintf(bf, size, "%-*s", width, tmp);
+}
+
+struct sort_entry sort_sym_ipc_null = {
+ .se_header = "IPC [IPC Coverage]",
+ .se_cmp = sort__sym_cmp,
+ .se_snprintf = hist_entry__sym_ipc_null_snprintf,
+ .se_width_idx = HISTC_SYMBOL_IPC,
+};
+
+/* --sort srcfile */
+
+static char no_srcfile[1];
+
+static char *hist_entry__get_srcfile(struct hist_entry *e)
+{
+ char *sf, *p;
+ struct map *map = e->ms.map;
+
+ if (!map)
+ return no_srcfile;
+
+ sf = __get_srcline(map__dso(map), map__rip_2objdump(map, e->ip),
+ e->ms.sym, false, true, true, e->ip);
+ if (sf == SRCLINE_UNKNOWN)
+ return no_srcfile;
+ p = strchr(sf, ':');
+ if (p && *sf) {
+ *p = 0;
+ return sf;
+ }
+ free(sf);
+ return no_srcfile;
+}
+
+static int64_t
+sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__srcline_cmp(left, right);
+}
+
+static int64_t
+sort__srcfile_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->srcfile)
+ left->srcfile = hist_entry__get_srcfile(left);
+ if (!right->srcfile)
+ right->srcfile = hist_entry__get_srcfile(right);
+
+ return strcmp(right->srcfile, left->srcfile);
+}
+
+static int64_t
+sort__srcfile_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__srcfile_collapse(left, right);
+}
+
+static void sort__srcfile_init(struct hist_entry *he)
+{
+ if (!he->srcfile)
+ he->srcfile = hist_entry__get_srcfile(he);
+}
+
+static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
+}
+
+struct sort_entry sort_srcfile = {
+ .se_header = "Source File",
+ .se_cmp = sort__srcfile_cmp,
+ .se_collapse = sort__srcfile_collapse,
+ .se_sort = sort__srcfile_sort,
+ .se_init = sort__srcfile_init,
+ .se_snprintf = hist_entry__srcfile_snprintf,
+ .se_width_idx = HISTC_SRCFILE,
+};
+
+/* --sort parent */
+
+static int64_t
+sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct symbol *sym_l = left->parent;
+ struct symbol *sym_r = right->parent;
+
+ if (!sym_l || !sym_r)
+ return cmp_null(sym_l, sym_r);
+
+ return strcmp(sym_r->name, sym_l->name);
+}
+
+static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width,
+ he->parent ? he->parent->name : "[other]");
+}
+
+struct sort_entry sort_parent = {
+ .se_header = "Parent symbol",
+ .se_cmp = sort__parent_cmp,
+ .se_snprintf = hist_entry__parent_snprintf,
+ .se_width_idx = HISTC_PARENT,
+};
+
+/* --sort cpu */
+
+static int64_t
+sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->cpu - left->cpu;
+}
+
+static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*.*d", width, width, he->cpu);
+}
+
+struct sort_entry sort_cpu = {
+ .se_header = "CPU",
+ .se_cmp = sort__cpu_cmp,
+ .se_snprintf = hist_entry__cpu_snprintf,
+ .se_width_idx = HISTC_CPU,
+};
+
+/* --sort cgroup_id */
+
+static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
+{
+ return (int64_t)(right_dev - left_dev);
+}
+
+static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino)
+{
+ return (int64_t)(right_ino - left_ino);
+}
+
+static int64_t
+sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ int64_t ret;
+
+ ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev);
+ if (ret != 0)
+ return ret;
+
+ return _sort__cgroup_inode_cmp(right->cgroup_id.ino,
+ left->cgroup_id.ino);
+}
+
+static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width __maybe_unused)
+{
+ return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev,
+ he->cgroup_id.ino);
+}
+
+struct sort_entry sort_cgroup_id = {
+ .se_header = "cgroup id (dev/inode)",
+ .se_cmp = sort__cgroup_id_cmp,
+ .se_snprintf = hist_entry__cgroup_id_snprintf,
+ .se_width_idx = HISTC_CGROUP_ID,
+};
+
+/* --sort cgroup */
+
+static int64_t
+sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->cgroup - left->cgroup;
+}
+
+static int hist_entry__cgroup_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width __maybe_unused)
+{
+ const char *cgrp_name = "N/A";
+
+ if (he->cgroup) {
+ struct cgroup *cgrp = cgroup__find(maps__machine(he->ms.maps)->env,
+ he->cgroup);
+ if (cgrp != NULL)
+ cgrp_name = cgrp->name;
+ else
+ cgrp_name = "unknown";
+ }
+
+ return repsep_snprintf(bf, size, "%s", cgrp_name);
+}
+
+struct sort_entry sort_cgroup = {
+ .se_header = "Cgroup",
+ .se_cmp = sort__cgroup_cmp,
+ .se_snprintf = hist_entry__cgroup_snprintf,
+ .se_width_idx = HISTC_CGROUP,
+};
+
+/* --sort socket */
+
+static int64_t
+sort__socket_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->socket - left->socket;
+}
+
+static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
+}
+
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+ int sk = *(const int *)arg;
+
+ if (type != HIST_FILTER__SOCKET)
+ return -1;
+
+ return sk >= 0 && he->socket != sk;
+}
+
+struct sort_entry sort_socket = {
+ .se_header = "Socket",
+ .se_cmp = sort__socket_cmp,
+ .se_snprintf = hist_entry__socket_snprintf,
+ .se_filter = hist_entry__socket_filter,
+ .se_width_idx = HISTC_SOCKET,
+};
+
+/* --sort time */
+
+static int64_t
+sort__time_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->time - left->time;
+}
+
+static int hist_entry__time_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char he_time[32];
+
+ if (symbol_conf.nanosecs)
+ timestamp__scnprintf_nsec(he->time, he_time,
+ sizeof(he_time));
+ else
+ timestamp__scnprintf_usec(he->time, he_time,
+ sizeof(he_time));
+
+ return repsep_snprintf(bf, size, "%-.*s", width, he_time);
+}
+
+struct sort_entry sort_time = {
+ .se_header = "Time",
+ .se_cmp = sort__time_cmp,
+ .se_snprintf = hist_entry__time_snprintf,
+ .se_width_idx = HISTC_TIME,
+};
+
+/* --sort trace */
+
+#ifdef HAVE_LIBTRACEEVENT
+static char *get_trace_output(struct hist_entry *he)
+{
+ struct trace_seq seq;
+ struct evsel *evsel;
+ struct tep_record rec = {
+ .data = he->raw_data,
+ .size = he->raw_size,
+ };
+
+ evsel = hists_to_evsel(he->hists);
+
+ trace_seq_init(&seq);
+ if (symbol_conf.raw_trace) {
+ tep_print_fields(&seq, he->raw_data, he->raw_size,
+ evsel->tp_format);
+ } else {
+ tep_print_event(evsel->tp_format->tep,
+ &seq, &rec, "%s", TEP_PRINT_INFO);
+ }
+ /*
+ * Trim the buffer, it starts at 4KB and we're not going to
+ * add anything more to this buffer.
+ */
+ return realloc(seq.buffer, seq.len + 1);
+}
+
+static int64_t
+sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct evsel *evsel;
+
+ evsel = hists_to_evsel(left->hists);
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ return 0;
+
+ if (left->trace_output == NULL)
+ left->trace_output = get_trace_output(left);
+ if (right->trace_output == NULL)
+ right->trace_output = get_trace_output(right);
+
+ return strcmp(right->trace_output, left->trace_output);
+}
+
+static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ struct evsel *evsel;
+
+ evsel = hists_to_evsel(he->hists);
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ return scnprintf(bf, size, "%-.*s", width, "N/A");
+
+ if (he->trace_output == NULL)
+ he->trace_output = get_trace_output(he);
+ return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
+}
+
+struct sort_entry sort_trace = {
+ .se_header = "Trace output",
+ .se_cmp = sort__trace_cmp,
+ .se_snprintf = hist_entry__trace_snprintf,
+ .se_width_idx = HISTC_TRACE,
+};
+#endif /* HAVE_LIBTRACEEVENT */
+
+/* sort keys for branch stacks */
+
+static int64_t
+sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return _sort__dso_cmp(left->branch_info->from.ms.map,
+ right->branch_info->from.ms.map);
+}
+
+static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info)
+ return _hist_entry__dso_snprintf(he->branch_info->from.ms.map,
+ bf, size, width);
+ else
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const struct dso *dso = arg;
+
+ if (type != HIST_FILTER__DSO)
+ return -1;
+
+ return dso && (!he->branch_info || !he->branch_info->from.ms.map ||
+ map__dso(he->branch_info->from.ms.map) != dso);
+}
+
+static int64_t
+sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return _sort__dso_cmp(left->branch_info->to.ms.map,
+ right->branch_info->to.ms.map);
+}
+
+static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info)
+ return _hist_entry__dso_snprintf(he->branch_info->to.ms.map,
+ bf, size, width);
+ else
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const struct dso *dso = arg;
+
+ if (type != HIST_FILTER__DSO)
+ return -1;
+
+ return dso && (!he->branch_info || !he->branch_info->to.ms.map ||
+ map__dso(he->branch_info->to.ms.map) != dso);
+}
+
+static int64_t
+sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *from_l, *from_r;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ from_l = &left->branch_info->from;
+ from_r = &right->branch_info->from;
+
+ if (!from_l->ms.sym && !from_r->ms.sym)
+ return _sort__addr_cmp(from_l->addr, from_r->addr);
+
+ return _sort__sym_cmp(from_l->ms.sym, from_r->ms.sym);
+}
+
+static int64_t
+sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *to_l, *to_r;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ to_l = &left->branch_info->to;
+ to_r = &right->branch_info->to;
+
+ if (!to_l->ms.sym && !to_r->ms.sym)
+ return _sort__addr_cmp(to_l->addr, to_r->addr);
+
+ return _sort__sym_cmp(to_l->ms.sym, to_r->ms.sym);
+}
+
+static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *from = &he->branch_info->from;
+
+ return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
+ from->al_level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *to = &he->branch_info->to;
+
+ return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
+ to->al_level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const char *sym = arg;
+
+ if (type != HIST_FILTER__SYMBOL)
+ return -1;
+
+ return sym && !(he->branch_info && he->branch_info->from.ms.sym &&
+ strstr(he->branch_info->from.ms.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const char *sym = arg;
+
+ if (type != HIST_FILTER__SYMBOL)
+ return -1;
+
+ return sym && !(he->branch_info && he->branch_info->to.ms.sym &&
+ strstr(he->branch_info->to.ms.sym->name, sym));
+}
+
+struct sort_entry sort_dso_from = {
+ .se_header = "Source Shared Object",
+ .se_cmp = sort__dso_from_cmp,
+ .se_snprintf = hist_entry__dso_from_snprintf,
+ .se_filter = hist_entry__dso_from_filter,
+ .se_width_idx = HISTC_DSO_FROM,
+};
+
+struct sort_entry sort_dso_to = {
+ .se_header = "Target Shared Object",
+ .se_cmp = sort__dso_to_cmp,
+ .se_snprintf = hist_entry__dso_to_snprintf,
+ .se_filter = hist_entry__dso_to_filter,
+ .se_width_idx = HISTC_DSO_TO,
+};
+
+struct sort_entry sort_sym_from = {
+ .se_header = "Source Symbol",
+ .se_cmp = sort__sym_from_cmp,
+ .se_snprintf = hist_entry__sym_from_snprintf,
+ .se_filter = hist_entry__sym_from_filter,
+ .se_width_idx = HISTC_SYMBOL_FROM,
+};
+
+struct sort_entry sort_sym_to = {
+ .se_header = "Target Symbol",
+ .se_cmp = sort__sym_to_cmp,
+ .se_snprintf = hist_entry__sym_to_snprintf,
+ .se_filter = hist_entry__sym_to_filter,
+ .se_width_idx = HISTC_SYMBOL_TO,
+};
+
+static int _hist_entry__addr_snprintf(struct map_symbol *ms,
+ u64 ip, char level, char *bf, size_t size,
+ unsigned int width)
+{
+ struct symbol *sym = ms->sym;
+ struct map *map = ms->map;
+ size_t ret = 0, offs;
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+ if (sym && map) {
+ if (sym->type == STT_OBJECT) {
+ ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+ ip - map__unmap_ip(map, sym->start));
+ } else {
+ ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+ width - ret,
+ sym->name);
+ offs = ip - sym->start;
+ if (offs)
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", offs);
+ }
+ } else {
+ size_t len = BITS_PER_LONG / 4;
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+ len, ip);
+ }
+
+ return ret;
+}
+
+static int hist_entry__addr_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *from = &he->branch_info->from;
+
+ return _hist_entry__addr_snprintf(&from->ms, from->al_addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__addr_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *to = &he->branch_info->to;
+
+ return _hist_entry__addr_snprintf(&to->ms, to->al_addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int64_t
+sort__addr_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *from_l;
+ struct addr_map_symbol *from_r;
+ int64_t ret;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ from_l = &left->branch_info->from;
+ from_r = &right->branch_info->from;
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ ret = _sort__dso_cmp(from_l->ms.map, from_r->ms.map);
+ if (ret != 0)
+ return ret;
+
+ return _sort__addr_cmp(from_l->addr, from_r->addr);
+}
+
+static int64_t
+sort__addr_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *to_l;
+ struct addr_map_symbol *to_r;
+ int64_t ret;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ to_l = &left->branch_info->to;
+ to_r = &right->branch_info->to;
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ ret = _sort__dso_cmp(to_l->ms.map, to_r->ms.map);
+ if (ret != 0)
+ return ret;
+
+ return _sort__addr_cmp(to_l->addr, to_r->addr);
+}
+
+struct sort_entry sort_addr_from = {
+ .se_header = "Source Address",
+ .se_cmp = sort__addr_from_cmp,
+ .se_snprintf = hist_entry__addr_from_snprintf,
+ .se_filter = hist_entry__sym_from_filter, /* shared with sym_from */
+ .se_width_idx = HISTC_ADDR_FROM,
+};
+
+struct sort_entry sort_addr_to = {
+ .se_header = "Target Address",
+ .se_cmp = sort__addr_to_cmp,
+ .se_snprintf = hist_entry__addr_to_snprintf,
+ .se_filter = hist_entry__sym_to_filter, /* shared with sym_to */
+ .se_width_idx = HISTC_ADDR_TO,
+};
+
+
+static int64_t
+sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ unsigned char mp, p;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ mp = left->branch_info->flags.mispred != right->branch_info->flags.mispred;
+ p = left->branch_info->flags.predicted != right->branch_info->flags.predicted;
+ return mp || p;
+}
+
+static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width){
+ static const char *out = "N/A";
+
+ if (he->branch_info) {
+ if (he->branch_info->flags.predicted)
+ out = "N";
+ else if (he->branch_info->flags.mispred)
+ out = "Y";
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
+}
+
+static int64_t
+sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return left->branch_info->flags.cycles -
+ right->branch_info->flags.cycles;
+}
+
+static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (!he->branch_info)
+ return scnprintf(bf, size, "%-.*s", width, "N/A");
+ if (he->branch_info->flags.cycles == 0)
+ return repsep_snprintf(bf, size, "%-*s", width, "-");
+ return repsep_snprintf(bf, size, "%-*hd", width,
+ he->branch_info->flags.cycles);
+}
+
+struct sort_entry sort_cycles = {
+ .se_header = "Basic Block Cycles",
+ .se_cmp = sort__cycles_cmp,
+ .se_snprintf = hist_entry__cycles_snprintf,
+ .se_width_idx = HISTC_CYCLES,
+};
+
+/* --sort daddr_sym */
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->daddr.addr;
+ if (right->mem_info)
+ r = right->mem_info->daddr.addr;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ uint64_t addr = 0;
+ struct map_symbol *ms = NULL;
+
+ if (he->mem_info) {
+ addr = he->mem_info->daddr.addr;
+ ms = &he->mem_info->daddr.ms;
+ }
+ return _hist_entry__sym_snprintf(ms, addr, he->level, bf, size, width);
+}
+
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->iaddr.addr;
+ if (right->mem_info)
+ r = right->mem_info->iaddr.addr;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ uint64_t addr = 0;
+ struct map_symbol *ms = NULL;
+
+ if (he->mem_info) {
+ addr = he->mem_info->iaddr.addr;
+ ms = &he->mem_info->iaddr.ms;
+ }
+ return _hist_entry__sym_snprintf(ms, addr, he->level, bf, size, width);
+}
+
+static int64_t
+sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct map *map_l = NULL;
+ struct map *map_r = NULL;
+
+ if (left->mem_info)
+ map_l = left->mem_info->daddr.ms.map;
+ if (right->mem_info)
+ map_r = right->mem_info->daddr.ms.map;
+
+ return _sort__dso_cmp(map_l, map_r);
+}
+
+static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ struct map *map = NULL;
+
+ if (he->mem_info)
+ map = he->mem_info->daddr.ms.map;
+
+ return _hist_entry__dso_snprintf(map, bf, size, width);
+}
+
+static int64_t
+sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_lock = PERF_MEM_LOCK_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_lock = PERF_MEM_LOCK_NA;
+
+ return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
+}
+
+static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[10];
+
+ perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+static int64_t
+sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
+
+ return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
+}
+
+static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[64];
+
+ perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_lvl = PERF_MEM_LVL_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_lvl = PERF_MEM_LVL_NA;
+
+ return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
+}
+
+static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[64];
+
+ perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
+
+ return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
+}
+
+static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[64];
+
+ perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ u64 l, r;
+ struct map *l_map, *r_map;
+ struct dso *l_dso, *r_dso;
+ int rc;
+
+ if (!left->mem_info) return -1;
+ if (!right->mem_info) return 1;
+
+ /* group event types together */
+ if (left->cpumode > right->cpumode) return -1;
+ if (left->cpumode < right->cpumode) return 1;
+
+ l_map = left->mem_info->daddr.ms.map;
+ r_map = right->mem_info->daddr.ms.map;
+
+ /* if both are NULL, jump to sort on al_addr instead */
+ if (!l_map && !r_map)
+ goto addr;
+
+ if (!l_map) return -1;
+ if (!r_map) return 1;
+
+ l_dso = map__dso(l_map);
+ r_dso = map__dso(r_map);
+ rc = dso__cmp_id(l_dso, r_dso);
+ if (rc)
+ return rc;
+ /*
+ * Addresses with no major/minor numbers are assumed to be
+ * anonymous in userspace. Sort those on pid then address.
+ *
+ * The kernel and non-zero major/minor mapped areas are
+ * assumed to be unity mapped. Sort those on address.
+ */
+
+ if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
+ (!(map__flags(l_map) & MAP_SHARED)) && !l_dso->id.maj && !l_dso->id.min &&
+ !l_dso->id.ino && !l_dso->id.ino_generation) {
+ /* userspace anonymous */
+
+ if (thread__pid(left->thread) > thread__pid(right->thread))
+ return -1;
+ if (thread__pid(left->thread) < thread__pid(right->thread))
+ return 1;
+ }
+
+addr:
+ /* al_addr does all the right addr - start + offset calculations */
+ l = cl_address(left->mem_info->daddr.al_addr, chk_double_cl);
+ r = cl_address(right->mem_info->daddr.al_addr, chk_double_cl);
+
+ if (l > r) return -1;
+ if (l < r) return 1;
+
+ return 0;
+}
+
+static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+
+ uint64_t addr = 0;
+ struct map_symbol *ms = NULL;
+ char level = he->level;
+
+ if (he->mem_info) {
+ struct map *map = he->mem_info->daddr.ms.map;
+ struct dso *dso = map ? map__dso(map) : NULL;
+
+ addr = cl_address(he->mem_info->daddr.al_addr, chk_double_cl);
+ ms = &he->mem_info->daddr.ms;
+
+ /* print [s] for shared data mmaps */
+ if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
+ map && !(map__prot(map) & PROT_EXEC) &&
+ (map__flags(map) & MAP_SHARED) &&
+ (dso->id.maj || dso->id.min || dso->id.ino || dso->id.ino_generation))
+ level = 's';
+ else if (!map)
+ level = 'X';
+ }
+ return _hist_entry__sym_snprintf(ms, addr, level, bf, size, width);
+}
+
+struct sort_entry sort_mispredict = {
+ .se_header = "Branch Mispredicted",
+ .se_cmp = sort__mispredict_cmp,
+ .se_snprintf = hist_entry__mispredict_snprintf,
+ .se_width_idx = HISTC_MISPREDICT,
+};
+
+static int64_t
+sort__weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->weight - right->weight;
+}
+
+static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*llu", width, he->weight);
+}
+
+struct sort_entry sort_local_weight = {
+ .se_header = "Local Weight",
+ .se_cmp = sort__weight_cmp,
+ .se_snprintf = hist_entry__local_weight_snprintf,
+ .se_width_idx = HISTC_LOCAL_WEIGHT,
+};
+
+static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*llu", width,
+ he->weight * he->stat.nr_events);
+}
+
+struct sort_entry sort_global_weight = {
+ .se_header = "Weight",
+ .se_cmp = sort__weight_cmp,
+ .se_snprintf = hist_entry__global_weight_snprintf,
+ .se_width_idx = HISTC_GLOBAL_WEIGHT,
+};
+
+static int64_t
+sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->ins_lat - right->ins_lat;
+}
+
+static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat);
+}
+
+struct sort_entry sort_local_ins_lat = {
+ .se_header = "Local INSTR Latency",
+ .se_cmp = sort__ins_lat_cmp,
+ .se_snprintf = hist_entry__local_ins_lat_snprintf,
+ .se_width_idx = HISTC_LOCAL_INS_LAT,
+};
+
+static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width,
+ he->ins_lat * he->stat.nr_events);
+}
+
+struct sort_entry sort_global_ins_lat = {
+ .se_header = "INSTR Latency",
+ .se_cmp = sort__ins_lat_cmp,
+ .se_snprintf = hist_entry__global_ins_lat_snprintf,
+ .se_width_idx = HISTC_GLOBAL_INS_LAT,
+};
+
+static int64_t
+sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->p_stage_cyc - right->p_stage_cyc;
+}
+
+static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width,
+ he->p_stage_cyc * he->stat.nr_events);
+}
+
+
+static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
+}
+
+struct sort_entry sort_local_p_stage_cyc = {
+ .se_header = "Local Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
+ .se_snprintf = hist_entry__p_stage_cyc_snprintf,
+ .se_width_idx = HISTC_LOCAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_global_p_stage_cyc = {
+ .se_header = "Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
+ .se_snprintf = hist_entry__global_p_stage_cyc_snprintf,
+ .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_mem_daddr_sym = {
+ .se_header = "Data Symbol",
+ .se_cmp = sort__daddr_cmp,
+ .se_snprintf = hist_entry__daddr_snprintf,
+ .se_width_idx = HISTC_MEM_DADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_iaddr_sym = {
+ .se_header = "Code Symbol",
+ .se_cmp = sort__iaddr_cmp,
+ .se_snprintf = hist_entry__iaddr_snprintf,
+ .se_width_idx = HISTC_MEM_IADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_daddr_dso = {
+ .se_header = "Data Object",
+ .se_cmp = sort__dso_daddr_cmp,
+ .se_snprintf = hist_entry__dso_daddr_snprintf,
+ .se_width_idx = HISTC_MEM_DADDR_DSO,
+};
+
+struct sort_entry sort_mem_locked = {
+ .se_header = "Locked",
+ .se_cmp = sort__locked_cmp,
+ .se_snprintf = hist_entry__locked_snprintf,
+ .se_width_idx = HISTC_MEM_LOCKED,
+};
+
+struct sort_entry sort_mem_tlb = {
+ .se_header = "TLB access",
+ .se_cmp = sort__tlb_cmp,
+ .se_snprintf = hist_entry__tlb_snprintf,
+ .se_width_idx = HISTC_MEM_TLB,
+};
+
+struct sort_entry sort_mem_lvl = {
+ .se_header = "Memory access",
+ .se_cmp = sort__lvl_cmp,
+ .se_snprintf = hist_entry__lvl_snprintf,
+ .se_width_idx = HISTC_MEM_LVL,
+};
+
+struct sort_entry sort_mem_snoop = {
+ .se_header = "Snoop",
+ .se_cmp = sort__snoop_cmp,
+ .se_snprintf = hist_entry__snoop_snprintf,
+ .se_width_idx = HISTC_MEM_SNOOP,
+};
+
+struct sort_entry sort_mem_dcacheline = {
+ .se_header = "Data Cacheline",
+ .se_cmp = sort__dcacheline_cmp,
+ .se_snprintf = hist_entry__dcacheline_snprintf,
+ .se_width_idx = HISTC_MEM_DCACHELINE,
+};
+
+static int64_t
+sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_blk = PERF_MEM_BLK_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_blk = PERF_MEM_BLK_NA;
+
+ return (int64_t)(data_src_r.mem_blk - data_src_l.mem_blk);
+}
+
+static int hist_entry__blocked_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[16];
+
+ perf_mem__blk_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+struct sort_entry sort_mem_blocked = {
+ .se_header = "Blocked",
+ .se_cmp = sort__blocked_cmp,
+ .se_snprintf = hist_entry__blocked_snprintf,
+ .se_width_idx = HISTC_MEM_BLOCKED,
+};
+
+static int64_t
+sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->daddr.phys_addr;
+ if (right->mem_info)
+ r = right->mem_info->daddr.phys_addr;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ uint64_t addr = 0;
+ size_t ret = 0;
+ size_t len = BITS_PER_LONG / 4;
+
+ addr = he->mem_info->daddr.phys_addr;
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
+
+ if (ret > width)
+ bf[width] = '\0';
+
+ return width;
+}
+
+struct sort_entry sort_mem_phys_daddr = {
+ .se_header = "Data Physical Address",
+ .se_cmp = sort__phys_daddr_cmp,
+ .se_snprintf = hist_entry__phys_daddr_snprintf,
+ .se_width_idx = HISTC_MEM_PHYS_DADDR,
+};
+
+static int64_t
+sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->daddr.data_page_size;
+ if (right->mem_info)
+ r = right->mem_info->daddr.data_page_size;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char str[PAGE_SIZE_NAME_LEN];
+
+ return repsep_snprintf(bf, size, "%-*s", width,
+ get_page_size_name(he->mem_info->daddr.data_page_size, str));
+}
+
+struct sort_entry sort_mem_data_page_size = {
+ .se_header = "Data Page Size",
+ .se_cmp = sort__data_page_size_cmp,
+ .se_snprintf = hist_entry__data_page_size_snprintf,
+ .se_width_idx = HISTC_MEM_DATA_PAGE_SIZE,
+};
+
+static int64_t
+sort__code_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = left->code_page_size;
+ uint64_t r = right->code_page_size;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__code_page_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char str[PAGE_SIZE_NAME_LEN];
+
+ return repsep_snprintf(bf, size, "%-*s", width,
+ get_page_size_name(he->code_page_size, str));
+}
+
+struct sort_entry sort_code_page_size = {
+ .se_header = "Code Page Size",
+ .se_cmp = sort__code_page_size_cmp,
+ .se_snprintf = hist_entry__code_page_size_snprintf,
+ .se_width_idx = HISTC_CODE_PAGE_SIZE,
+};
+
+static int64_t
+sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return left->branch_info->flags.abort !=
+ right->branch_info->flags.abort;
+}
+
+static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ static const char *out = "N/A";
+
+ if (he->branch_info) {
+ if (he->branch_info->flags.abort)
+ out = "A";
+ else
+ out = ".";
+ }
+
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_abort = {
+ .se_header = "Transaction abort",
+ .se_cmp = sort__abort_cmp,
+ .se_snprintf = hist_entry__abort_snprintf,
+ .se_width_idx = HISTC_ABORT,
+};
+
+static int64_t
+sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return left->branch_info->flags.in_tx !=
+ right->branch_info->flags.in_tx;
+}
+
+static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ static const char *out = "N/A";
+
+ if (he->branch_info) {
+ if (he->branch_info->flags.in_tx)
+ out = "T";
+ else
+ out = ".";
+ }
+
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_in_tx = {
+ .se_header = "Branch in transaction",
+ .se_cmp = sort__in_tx_cmp,
+ .se_snprintf = hist_entry__in_tx_snprintf,
+ .se_width_idx = HISTC_IN_TX,
+};
+
+static int64_t
+sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->transaction - right->transaction;
+}
+
+static inline char *add_str(char *p, const char *str)
+{
+ strcpy(p, str);
+ return p + strlen(str);
+}
+
+static struct txbit {
+ unsigned flag;
+ const char *name;
+ int skip_for_len;
+} txbits[] = {
+ { PERF_TXN_ELISION, "EL ", 0 },
+ { PERF_TXN_TRANSACTION, "TX ", 1 },
+ { PERF_TXN_SYNC, "SYNC ", 1 },
+ { PERF_TXN_ASYNC, "ASYNC ", 0 },
+ { PERF_TXN_RETRY, "RETRY ", 0 },
+ { PERF_TXN_CONFLICT, "CON ", 0 },
+ { PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
+ { PERF_TXN_CAPACITY_READ, "CAP-READ ", 0 },
+ { 0, NULL, 0 }
+};
+
+int hist_entry__transaction_len(void)
+{
+ int i;
+ int len = 0;
+
+ for (i = 0; txbits[i].name; i++) {
+ if (!txbits[i].skip_for_len)
+ len += strlen(txbits[i].name);
+ }
+ len += 4; /* :XX<space> */
+ return len;
+}
+
+static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ u64 t = he->transaction;
+ char buf[128];
+ char *p = buf;
+ int i;
+
+ buf[0] = 0;
+ for (i = 0; txbits[i].name; i++)
+ if (txbits[i].flag & t)
+ p = add_str(p, txbits[i].name);
+ if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
+ p = add_str(p, "NEITHER ");
+ if (t & PERF_TXN_ABORT_MASK) {
+ sprintf(p, ":%" PRIx64,
+ (t & PERF_TXN_ABORT_MASK) >>
+ PERF_TXN_ABORT_SHIFT);
+ p += strlen(p);
+ }
+
+ return repsep_snprintf(bf, size, "%-*s", width, buf);
+}
+
+struct sort_entry sort_transaction = {
+ .se_header = "Transaction ",
+ .se_cmp = sort__transaction_cmp,
+ .se_snprintf = hist_entry__transaction_snprintf,
+ .se_width_idx = HISTC_TRANSACTION,
+};
+
+/* --sort symbol_size */
+
+static int64_t _sort__sym_size_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+ int64_t size_l = sym_l != NULL ? symbol__size(sym_l) : 0;
+ int64_t size_r = sym_r != NULL ? symbol__size(sym_r) : 0;
+
+ return size_l < size_r ? -1 :
+ size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__sym_size_cmp(right->ms.sym, left->ms.sym);
+}
+
+static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf,
+ size_t bf_size, unsigned int width)
+{
+ if (sym)
+ return repsep_snprintf(bf, bf_size, "%*d", width, symbol__size(sym));
+
+ return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__sym_size_snprintf(he->ms.sym, bf, size, width);
+}
+
+struct sort_entry sort_sym_size = {
+ .se_header = "Symbol size",
+ .se_cmp = sort__sym_size_cmp,
+ .se_snprintf = hist_entry__sym_size_snprintf,
+ .se_width_idx = HISTC_SYM_SIZE,
+};
+
+/* --sort dso_size */
+
+static int64_t _sort__dso_size_cmp(struct map *map_l, struct map *map_r)
+{
+ int64_t size_l = map_l != NULL ? map__size(map_l) : 0;
+ int64_t size_r = map_r != NULL ? map__size(map_r) : 0;
+
+ return size_l < size_r ? -1 :
+ size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__dso_size_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
+ size_t bf_size, unsigned int width)
+{
+ if (map && map__dso(map))
+ return repsep_snprintf(bf, bf_size, "%*d", width, map__size(map));
+
+ return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__dso_size_snprintf(he->ms.map, bf, size, width);
+}
+
+struct sort_entry sort_dso_size = {
+ .se_header = "DSO size",
+ .se_cmp = sort__dso_size_cmp,
+ .se_snprintf = hist_entry__dso_size_snprintf,
+ .se_width_idx = HISTC_DSO_SIZE,
+};
+
+/* --sort dso_size */
+
+static int64_t
+sort__addr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ u64 left_ip = left->ip;
+ u64 right_ip = right->ip;
+ struct map *left_map = left->ms.map;
+ struct map *right_map = right->ms.map;
+
+ if (left_map)
+ left_ip = map__unmap_ip(left_map, left_ip);
+ if (right_map)
+ right_ip = map__unmap_ip(right_map, right_ip);
+
+ return _sort__addr_cmp(left_ip, right_ip);
+}
+
+static int hist_entry__addr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ u64 ip = he->ip;
+ struct map *map = he->ms.map;
+
+ if (map)
+ ip = map__unmap_ip(map, ip);
+
+ return repsep_snprintf(bf, size, "%-#*llx", width, ip);
+}
+
+struct sort_entry sort_addr = {
+ .se_header = "Address",
+ .se_cmp = sort__addr_cmp,
+ .se_snprintf = hist_entry__addr_snprintf,
+ .se_width_idx = HISTC_ADDR,
+};
+
+
+struct sort_dimension {
+ const char *name;
+ struct sort_entry *entry;
+ int taken;
+};
+
+int __weak arch_support_sort_key(const char *sort_key __maybe_unused)
+{
+ return 0;
+}
+
+const char * __weak arch_perf_header_entry(const char *se_header)
+{
+ return se_header;
+}
+
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+{
+ sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header);
+}
+
+#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension common_sort_dimensions[] = {
+ DIM(SORT_PID, "pid", sort_thread),
+ DIM(SORT_COMM, "comm", sort_comm),
+ DIM(SORT_DSO, "dso", sort_dso),
+ DIM(SORT_SYM, "symbol", sort_sym),
+ DIM(SORT_PARENT, "parent", sort_parent),
+ DIM(SORT_CPU, "cpu", sort_cpu),
+ DIM(SORT_SOCKET, "socket", sort_socket),
+ DIM(SORT_SRCLINE, "srcline", sort_srcline),
+ DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
+ DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
+ DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
+ DIM(SORT_TRANSACTION, "transaction", sort_transaction),
+#ifdef HAVE_LIBTRACEEVENT
+ DIM(SORT_TRACE, "trace", sort_trace),
+#endif
+ DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+ DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
+ DIM(SORT_CGROUP, "cgroup", sort_cgroup),
+ DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
+ DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
+ DIM(SORT_TIME, "time", sort_time),
+ DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
+ DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
+ DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
+ DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
+ DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
+ DIM(SORT_ADDR, "addr", sort_addr),
+ DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
+ DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
+ DIM(SORT_SIMD, "simd", sort_simd)
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension bstack_sort_dimensions[] = {
+ DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
+ DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
+ DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
+ DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
+ DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+ DIM(SORT_IN_TX, "in_tx", sort_in_tx),
+ DIM(SORT_ABORT, "abort", sort_abort),
+ DIM(SORT_CYCLES, "cycles", sort_cycles),
+ DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
+ DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
+ DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
+ DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from),
+ DIM(SORT_ADDR_TO, "addr_to", sort_addr_to),
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension memory_sort_dimensions[] = {
+ DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
+ DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym),
+ DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
+ DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
+ DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
+ DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
+ DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
+ DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
+ DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
+ DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size),
+ DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked),
+};
+
+#undef DIM
+
+struct hpp_dimension {
+ const char *name;
+ struct perf_hpp_fmt *fmt;
+ int taken;
+};
+
+#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
+
+static struct hpp_dimension hpp_sort_dimensions[] = {
+ DIM(PERF_HPP__OVERHEAD, "overhead"),
+ DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
+ DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
+ DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
+ DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
+ DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
+ DIM(PERF_HPP__SAMPLES, "sample"),
+ DIM(PERF_HPP__PERIOD, "period"),
+};
+
+#undef DIM
+
+struct hpp_sort_entry {
+ struct perf_hpp_fmt hpp;
+ struct sort_entry *se;
+};
+
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+ struct hpp_sort_entry *hse;
+
+ if (!perf_hpp__is_sort_entry(fmt))
+ return;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ hists__new_col_len(hists, hse->se->se_width_idx, strlen(fmt->name));
+}
+
+static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists, int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct hpp_sort_entry *hse;
+ size_t len = fmt->user_len;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+ if (!len)
+ len = hists__col_len(hists, hse->se->se_width_idx);
+
+ return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
+}
+
+static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists)
+{
+ struct hpp_sort_entry *hse;
+ size_t len = fmt->user_len;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+ if (!len)
+ len = hists__col_len(hists, hse->se->se_width_idx);
+
+ return len;
+}
+
+static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct hpp_sort_entry *hse;
+ size_t len = fmt->user_len;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+ if (!len)
+ len = hists__col_len(he->hists, hse->se->se_width_idx);
+
+ return hse->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_sort_entry *hse;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ return hse->se->se_cmp(a, b);
+}
+
+static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_sort_entry *hse;
+ int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp;
+ return collapse_fn(a, b);
+}
+
+static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_sort_entry *hse;
+ int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *);
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ sort_fn = hse->se->se_sort ?: hse->se->se_cmp;
+ return sort_fn(a, b);
+}
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+ return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key) \
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \
+{ \
+ struct hpp_sort_entry *hse; \
+ \
+ if (!perf_hpp__is_sort_entry(fmt)) \
+ return false; \
+ \
+ hse = container_of(fmt, struct hpp_sort_entry, hpp); \
+ return hse->se == &sort_ ## key ; \
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+MK_SORT_ENTRY_CHK(trace)
+#else
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt __maybe_unused)
+{
+ return false;
+}
+#endif
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+ struct hpp_sort_entry *hse_a;
+ struct hpp_sort_entry *hse_b;
+
+ if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+ return false;
+
+ hse_a = container_of(a, struct hpp_sort_entry, hpp);
+ hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+ return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+ struct hpp_sort_entry *hse;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ free(hse);
+}
+
+static void hse_init(struct perf_hpp_fmt *fmt, struct hist_entry *he)
+{
+ struct hpp_sort_entry *hse;
+
+ if (!perf_hpp__is_sort_entry(fmt))
+ return;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+ if (hse->se->se_init)
+ hse->se->se_init(he);
+}
+
+static struct hpp_sort_entry *
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
+{
+ struct hpp_sort_entry *hse;
+
+ hse = malloc(sizeof(*hse));
+ if (hse == NULL) {
+ pr_err("Memory allocation failed\n");
+ return NULL;
+ }
+
+ hse->se = sd->entry;
+ hse->hpp.name = sd->entry->se_header;
+ hse->hpp.header = __sort__hpp_header;
+ hse->hpp.width = __sort__hpp_width;
+ hse->hpp.entry = __sort__hpp_entry;
+ hse->hpp.color = NULL;
+
+ hse->hpp.cmp = __sort__hpp_cmp;
+ hse->hpp.collapse = __sort__hpp_collapse;
+ hse->hpp.sort = __sort__hpp_sort;
+ hse->hpp.equal = __sort__hpp_equal;
+ hse->hpp.free = hse_free;
+ hse->hpp.init = hse_init;
+
+ INIT_LIST_HEAD(&hse->hpp.list);
+ INIT_LIST_HEAD(&hse->hpp.sort_list);
+ hse->hpp.elide = false;
+ hse->hpp.len = 0;
+ hse->hpp.user_len = 0;
+ hse->hpp.level = level;
+
+ return hse;
+}
+
+static void hpp_free(struct perf_hpp_fmt *fmt)
+{
+ free(fmt);
+}
+
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+ int level)
+{
+ struct perf_hpp_fmt *fmt;
+
+ fmt = memdup(hd->fmt, sizeof(*fmt));
+ if (fmt) {
+ INIT_LIST_HEAD(&fmt->list);
+ INIT_LIST_HEAD(&fmt->sort_list);
+ fmt->free = hpp_free;
+ fmt->level = level;
+ }
+
+ return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+ struct perf_hpp_fmt *fmt;
+ struct hpp_sort_entry *hse;
+ int ret = -1;
+ int r;
+
+ perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ if (hse->se->se_filter == NULL)
+ continue;
+
+ /*
+ * hist entry is filtered if any of sort key in the hpp list
+ * is applied. But it should skip non-matched filter types.
+ */
+ r = hse->se->se_filter(he, type, arg);
+ if (r >= 0) {
+ if (ret < 0)
+ ret = 0;
+ ret |= r;
+ }
+ }
+
+ return ret;
+}
+
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+ struct perf_hpp_list *list,
+ int level)
+{
+ struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
+
+ if (hse == NULL)
+ return -1;
+
+ perf_hpp_list__register_sort_field(list, &hse->hpp);
+ return 0;
+}
+
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+ struct perf_hpp_list *list)
+{
+ struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
+
+ if (hse == NULL)
+ return -1;
+
+ perf_hpp_list__column_register(list, &hse->hpp);
+ return 0;
+}
+
+#ifndef HAVE_LIBTRACEEVENT
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused)
+{
+ return false;
+}
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+ return false;
+}
+#else
+struct hpp_dynamic_entry {
+ struct perf_hpp_fmt hpp;
+ struct evsel *evsel;
+ struct tep_format_field *field;
+ unsigned dynamic_len;
+ bool raw_trace;
+};
+
+static int hde_width(struct hpp_dynamic_entry *hde)
+{
+ if (!hde->hpp.len) {
+ int len = hde->dynamic_len;
+ int namelen = strlen(hde->field->name);
+ int fieldlen = hde->field->size;
+
+ if (namelen > len)
+ len = namelen;
+
+ if (!(hde->field->flags & TEP_FIELD_IS_STRING)) {
+ /* length for print hex numbers */
+ fieldlen = hde->field->size * 2 + 2;
+ }
+ if (fieldlen > len)
+ len = fieldlen;
+
+ hde->hpp.len = len;
+ }
+ return hde->hpp.len;
+}
+
+static void update_dynamic_len(struct hpp_dynamic_entry *hde,
+ struct hist_entry *he)
+{
+ char *str, *pos;
+ struct tep_format_field *field = hde->field;
+ size_t namelen;
+ bool last = false;
+
+ if (hde->raw_trace)
+ return;
+
+ /* parse pretty print result and update max length */
+ if (!he->trace_output)
+ he->trace_output = get_trace_output(he);
+
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ size_t len;
+
+ str += namelen + 1;
+ len = pos - str;
+
+ if (len > hde->dynamic_len)
+ hde->dynamic_len = len;
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+}
+
+static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
+}
+
+static int __sort__hde_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return len;
+}
+
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ return hists_to_evsel(hists) == hde->evsel;
+}
+
+static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+ char *str, *pos;
+ struct tep_format_field *field;
+ size_t namelen;
+ bool last = false;
+ int ret;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ if (hde->raw_trace)
+ goto raw_field;
+
+ if (!he->trace_output)
+ he->trace_output = get_trace_output(he);
+
+ field = hde->field;
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ str += namelen + 1;
+ str = strndup(str, pos - str);
+
+ if (str == NULL)
+ return scnprintf(hpp->buf, hpp->size,
+ "%*.*s", len, len, "ERROR");
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+
+ if (str == NULL) {
+ struct trace_seq seq;
+raw_field:
+ trace_seq_init(&seq);
+ tep_print_field(&seq, he->raw_data, hde->field);
+ str = seq.buffer;
+ }
+
+ ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str);
+ free(str);
+ return ret;
+}
+
+static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_dynamic_entry *hde;
+ struct tep_format_field *field;
+ unsigned offset, size;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ field = hde->field;
+ if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+ unsigned long long dyn;
+
+ tep_read_number_field(field, a->raw_data, &dyn);
+ offset = dyn & 0xffff;
+ size = (dyn >> 16) & 0xffff;
+ if (tep_field_is_relative(field->flags))
+ offset += field->offset + field->size;
+ /* record max width for output */
+ if (size > hde->dynamic_len)
+ hde->dynamic_len = size;
+ } else {
+ offset = field->offset;
+ size = field->size;
+ }
+
+ return memcmp(a->raw_data + offset, b->raw_data + offset, size);
+}
+
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
+{
+ return fmt->cmp == __sort__hde_cmp;
+}
+
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+ struct hpp_dynamic_entry *hde_a;
+ struct hpp_dynamic_entry *hde_b;
+
+ if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+ return false;
+
+ hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+ hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+ return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+ free(hde);
+}
+
+static void __sort__hde_init(struct perf_hpp_fmt *fmt, struct hist_entry *he)
+{
+ struct hpp_dynamic_entry *hde;
+
+ if (!perf_hpp__is_dynamic_entry(fmt))
+ return;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+ update_dynamic_len(hde, he);
+}
+
+static struct hpp_dynamic_entry *
+__alloc_dynamic_entry(struct evsel *evsel, struct tep_format_field *field,
+ int level)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = malloc(sizeof(*hde));
+ if (hde == NULL) {
+ pr_debug("Memory allocation failed\n");
+ return NULL;
+ }
+
+ hde->evsel = evsel;
+ hde->field = field;
+ hde->dynamic_len = 0;
+
+ hde->hpp.name = field->name;
+ hde->hpp.header = __sort__hde_header;
+ hde->hpp.width = __sort__hde_width;
+ hde->hpp.entry = __sort__hde_entry;
+ hde->hpp.color = NULL;
+
+ hde->hpp.init = __sort__hde_init;
+ hde->hpp.cmp = __sort__hde_cmp;
+ hde->hpp.collapse = __sort__hde_cmp;
+ hde->hpp.sort = __sort__hde_cmp;
+ hde->hpp.equal = __sort__hde_equal;
+ hde->hpp.free = hde_free;
+
+ INIT_LIST_HEAD(&hde->hpp.list);
+ INIT_LIST_HEAD(&hde->hpp.sort_list);
+ hde->hpp.elide = false;
+ hde->hpp.len = 0;
+ hde->hpp.user_len = 0;
+ hde->hpp.level = level;
+
+ return hde;
+}
+#endif /* HAVE_LIBTRACEEVENT */
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+ struct perf_hpp_fmt *new_fmt = NULL;
+
+ if (perf_hpp__is_sort_entry(fmt)) {
+ struct hpp_sort_entry *hse, *new_hse;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ new_hse = memdup(hse, sizeof(*hse));
+ if (new_hse)
+ new_fmt = &new_hse->hpp;
+#ifdef HAVE_LIBTRACEEVENT
+ } else if (perf_hpp__is_dynamic_entry(fmt)) {
+ struct hpp_dynamic_entry *hde, *new_hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+ new_hde = memdup(hde, sizeof(*hde));
+ if (new_hde)
+ new_fmt = &new_hde->hpp;
+#endif
+ } else {
+ new_fmt = memdup(fmt, sizeof(*fmt));
+ }
+
+ INIT_LIST_HEAD(&new_fmt->list);
+ INIT_LIST_HEAD(&new_fmt->sort_list);
+
+ return new_fmt;
+}
+
+static int parse_field_name(char *str, char **event, char **field, char **opt)
+{
+ char *event_name, *field_name, *opt_name;
+
+ event_name = str;
+ field_name = strchr(str, '.');
+
+ if (field_name) {
+ *field_name++ = '\0';
+ } else {
+ event_name = NULL;
+ field_name = str;
+ }
+
+ opt_name = strchr(field_name, '/');
+ if (opt_name)
+ *opt_name++ = '\0';
+
+ *event = event_name;
+ *field = field_name;
+ *opt = opt_name;
+
+ return 0;
+}
+
+/* find match evsel using a given event name. The event name can be:
+ * 1. '%' + event index (e.g. '%1' for first event)
+ * 2. full event name (e.g. sched:sched_switch)
+ * 3. partial event name (should not contain ':')
+ */
+static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
+{
+ struct evsel *evsel = NULL;
+ struct evsel *pos;
+ bool full_name;
+
+ /* case 1 */
+ if (event_name[0] == '%') {
+ int nr = strtol(event_name+1, NULL, 0);
+
+ if (nr > evlist->core.nr_entries)
+ return NULL;
+
+ evsel = evlist__first(evlist);
+ while (--nr > 0)
+ evsel = evsel__next(evsel);
+
+ return evsel;
+ }
+
+ full_name = !!strchr(event_name, ':');
+ evlist__for_each_entry(evlist, pos) {
+ /* case 2 */
+ if (full_name && evsel__name_is(pos, event_name))
+ return pos;
+ /* case 3 */
+ if (!full_name && strstr(pos->name, event_name)) {
+ if (evsel) {
+ pr_debug("'%s' event is ambiguous: it can be %s or %s\n",
+ event_name, evsel->name, pos->name);
+ return NULL;
+ }
+ evsel = pos;
+ }
+ }
+
+ return evsel;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static int __dynamic_dimension__add(struct evsel *evsel,
+ struct tep_format_field *field,
+ bool raw_trace, int level)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = __alloc_dynamic_entry(evsel, field, level);
+ if (hde == NULL)
+ return -ENOMEM;
+
+ hde->raw_trace = raw_trace;
+
+ perf_hpp__register_sort_field(&hde->hpp);
+ return 0;
+}
+
+static int add_evsel_fields(struct evsel *evsel, bool raw_trace, int level)
+{
+ int ret;
+ struct tep_format_field *field;
+
+ field = evsel->tp_format->format.fields;
+ while (field) {
+ ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+ if (ret < 0)
+ return ret;
+
+ field = field->next;
+ }
+ return 0;
+}
+
+static int add_all_dynamic_fields(struct evlist *evlist, bool raw_trace,
+ int level)
+{
+ int ret;
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ ret = add_evsel_fields(evsel, raw_trace, level);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int add_all_matching_fields(struct evlist *evlist,
+ char *field_name, bool raw_trace, int level)
+{
+ int ret = -ESRCH;
+ struct evsel *evsel;
+ struct tep_format_field *field;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ field = tep_find_any_field(evsel->tp_format, field_name);
+ if (field == NULL)
+ continue;
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+#endif /* HAVE_LIBTRACEEVENT */
+
+static int add_dynamic_entry(struct evlist *evlist, const char *tok,
+ int level)
+{
+ char *str, *event_name, *field_name, *opt_name;
+ struct evsel *evsel;
+ bool raw_trace = symbol_conf.raw_trace;
+ int ret = 0;
+
+ if (evlist == NULL)
+ return -ENOENT;
+
+ str = strdup(tok);
+ if (str == NULL)
+ return -ENOMEM;
+
+ if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (opt_name) {
+ if (strcmp(opt_name, "raw")) {
+ pr_debug("unsupported field option %s\n", opt_name);
+ ret = -EINVAL;
+ goto out;
+ }
+ raw_trace = true;
+ }
+
+#ifdef HAVE_LIBTRACEEVENT
+ if (!strcmp(field_name, "trace_fields")) {
+ ret = add_all_dynamic_fields(evlist, raw_trace, level);
+ goto out;
+ }
+
+ if (event_name == NULL) {
+ ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
+ goto out;
+ }
+#else
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+ pr_err("%s %s", ret ? "," : "This perf binary isn't linked with libtraceevent, can't process", evsel__name(evsel));
+ ret = -ENOTSUP;
+ }
+ }
+
+ if (ret) {
+ pr_err("\n");
+ goto out;
+ }
+#endif
+
+ evsel = find_evsel(evlist, event_name);
+ if (evsel == NULL) {
+ pr_debug("Cannot find event: %s\n", event_name);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+ pr_debug("%s is not a tracepoint event\n", event_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+#ifdef HAVE_LIBTRACEEVENT
+ if (!strcmp(field_name, "*")) {
+ ret = add_evsel_fields(evsel, raw_trace, level);
+ } else {
+ struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name);
+
+ if (field == NULL) {
+ pr_debug("Cannot find event field for %s.%s\n",
+ event_name, field_name);
+ return -ENOENT;
+ }
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+ }
+#else
+ (void)level;
+ (void)raw_trace;
+#endif /* HAVE_LIBTRACEEVENT */
+
+out:
+ free(str);
+ return ret;
+}
+
+static int __sort_dimension__add(struct sort_dimension *sd,
+ struct perf_hpp_list *list,
+ int level)
+{
+ if (sd->taken)
+ return 0;
+
+ if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
+ return -1;
+
+ if (sd->entry->se_collapse)
+ list->need_collapse = 1;
+
+ sd->taken = 1;
+
+ return 0;
+}
+
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+ struct perf_hpp_list *list,
+ int level)
+{
+ struct perf_hpp_fmt *fmt;
+
+ if (hd->taken)
+ return 0;
+
+ fmt = __hpp_dimension__alloc_hpp(hd, level);
+ if (!fmt)
+ return -1;
+
+ hd->taken = 1;
+ perf_hpp_list__register_sort_field(list, fmt);
+ return 0;
+}
+
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+ struct sort_dimension *sd)
+{
+ if (sd->taken)
+ return 0;
+
+ if (__sort_dimension__add_hpp_output(sd, list) < 0)
+ return -1;
+
+ sd->taken = 1;
+ return 0;
+}
+
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+ struct hpp_dimension *hd)
+{
+ struct perf_hpp_fmt *fmt;
+
+ if (hd->taken)
+ return 0;
+
+ fmt = __hpp_dimension__alloc_hpp(hd, 0);
+ if (!fmt)
+ return -1;
+
+ hd->taken = 1;
+ perf_hpp_list__column_register(list, fmt);
+ return 0;
+}
+
+int hpp_dimension__add_output(unsigned col)
+{
+ BUG_ON(col >= PERF_HPP__MAX_INDEX);
+ return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+}
+
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct evlist *evlist,
+ int level)
+{
+ unsigned int i, j;
+
+ /*
+ * Check to see if there are any arch specific
+ * sort dimensions not applicable for the current
+ * architecture. If so, Skip that sort key since
+ * we don't want to display it in the output fields.
+ */
+ for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) {
+ if (!strcmp(arch_specific_sort_keys[j], tok) &&
+ !arch_support_sort_key(tok)) {
+ return 0;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
+
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
+ if (sd->name && !strcmp(dynamic_headers[j], sd->name))
+ sort_dimension_add_dynamic_header(sd);
+ }
+
+ if (sd->entry == &sort_parent) {
+ int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
+ if (ret) {
+ char err[BUFSIZ];
+
+ regerror(ret, &parent_regex, err, sizeof(err));
+ pr_err("Invalid regex: %s\n%s", parent_pattern, err);
+ return -EINVAL;
+ }
+ list->parent = 1;
+ } else if (sd->entry == &sort_sym) {
+ list->sym = 1;
+ /*
+ * perf diff displays the performance difference amongst
+ * two or more perf.data files. Those files could come
+ * from different binaries. So we should not compare
+ * their ips, but the name of symbol.
+ */
+ if (sort__mode == SORT_MODE__DIFF)
+ sd->entry->se_collapse = sort__sym_sort;
+
+ } else if (sd->entry == &sort_dso) {
+ list->dso = 1;
+ } else if (sd->entry == &sort_socket) {
+ list->socket = 1;
+ } else if (sd->entry == &sort_thread) {
+ list->thread = 1;
+ } else if (sd->entry == &sort_comm) {
+ list->comm = 1;
+ }
+
+ return __sort_dimension__add(sd, list, level);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (strncasecmp(tok, hd->name, strlen(tok)))
+ continue;
+
+ return __hpp_dimension__add(hd, list, level);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+ struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ if (sort__mode != SORT_MODE__BRANCH)
+ return -EINVAL;
+
+ if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
+ list->sym = 1;
+
+ __sort_dimension__add(sd, list, level);
+ return 0;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+ struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ if (sort__mode != SORT_MODE__MEMORY)
+ return -EINVAL;
+
+ if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
+ return -EINVAL;
+
+ if (sd->entry == &sort_mem_daddr_sym)
+ list->sym = 1;
+
+ __sort_dimension__add(sd, list, level);
+ return 0;
+ }
+
+ if (!add_dynamic_entry(evlist, tok, level))
+ return 0;
+
+ return -ESRCH;
+}
+
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+ struct evlist *evlist)
+{
+ char *tmp, *tok;
+ int ret = 0;
+ int level = 0;
+ int next_level = 1;
+ bool in_group = false;
+
+ do {
+ tok = str;
+ tmp = strpbrk(str, "{}, ");
+ if (tmp) {
+ if (in_group)
+ next_level = level;
+ else
+ next_level = level + 1;
+
+ if (*tmp == '{')
+ in_group = true;
+ else if (*tmp == '}')
+ in_group = false;
+
+ *tmp = '\0';
+ str = tmp + 1;
+ }
+
+ if (*tok) {
+ ret = sort_dimension__add(list, tok, evlist, level);
+ if (ret == -EINVAL) {
+ if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
+ ui__error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+ else
+ ui__error("Invalid --sort key: `%s'", tok);
+ break;
+ } else if (ret == -ESRCH) {
+ ui__error("Unknown --sort key: `%s'", tok);
+ break;
+ }
+ }
+
+ level = next_level;
+ } while (tmp);
+
+ return ret;
+}
+
+static const char *get_default_sort_order(struct evlist *evlist)
+{
+ const char *default_sort_orders[] = {
+ default_sort_order,
+ default_branch_sort_order,
+ default_mem_sort_order,
+ default_top_sort_order,
+ default_diff_sort_order,
+ default_tracepoint_sort_order,
+ };
+ bool use_trace = true;
+ struct evsel *evsel;
+
+ BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
+
+ if (evlist == NULL || evlist__empty(evlist))
+ goto out_no_evlist;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+ use_trace = false;
+ break;
+ }
+ }
+
+ if (use_trace) {
+ sort__mode = SORT_MODE__TRACEPOINT;
+ if (symbol_conf.raw_trace)
+ return "trace_fields";
+ }
+out_no_evlist:
+ return default_sort_orders[sort__mode];
+}
+
+static int setup_sort_order(struct evlist *evlist)
+{
+ char *new_sort_order;
+
+ /*
+ * Append '+'-prefixed sort order to the default sort
+ * order string.
+ */
+ if (!sort_order || is_strict_order(sort_order))
+ return 0;
+
+ if (sort_order[1] == '\0') {
+ ui__error("Invalid --sort key: `+'");
+ return -EINVAL;
+ }
+
+ /*
+ * We allocate new sort_order string, but we never free it,
+ * because it's checked over the rest of the code.
+ */
+ if (asprintf(&new_sort_order, "%s,%s",
+ get_default_sort_order(evlist), sort_order + 1) < 0) {
+ pr_err("Not enough memory to set up --sort");
+ return -ENOMEM;
+ }
+
+ sort_order = new_sort_order;
+ return 0;
+}
+
+/*
+ * Adds 'pre,' prefix into 'str' is 'pre' is
+ * not already part of 'str'.
+ */
+static char *prefix_if_not_in(const char *pre, char *str)
+{
+ char *n;
+
+ if (!str || strstr(str, pre))
+ return str;
+
+ if (asprintf(&n, "%s,%s", pre, str) < 0)
+ n = NULL;
+
+ free(str);
+ return n;
+}
+
+static char *setup_overhead(char *keys)
+{
+ if (sort__mode == SORT_MODE__DIFF)
+ return keys;
+
+ keys = prefix_if_not_in("overhead", keys);
+
+ if (symbol_conf.cumulate_callchain)
+ keys = prefix_if_not_in("overhead_children", keys);
+
+ return keys;
+}
+
+static int __setup_sorting(struct evlist *evlist)
+{
+ char *str;
+ const char *sort_keys;
+ int ret = 0;
+
+ ret = setup_sort_order(evlist);
+ if (ret)
+ return ret;
+
+ sort_keys = sort_order;
+ if (sort_keys == NULL) {
+ if (is_strict_order(field_order)) {
+ /*
+ * If user specified field order but no sort order,
+ * we'll honor it and not add default sort orders.
+ */
+ return 0;
+ }
+
+ sort_keys = get_default_sort_order(evlist);
+ }
+
+ str = strdup(sort_keys);
+ if (str == NULL) {
+ pr_err("Not enough memory to setup sort keys");
+ return -ENOMEM;
+ }
+
+ /*
+ * Prepend overhead fields for backward compatibility.
+ */
+ if (!is_strict_order(field_order)) {
+ str = setup_overhead(str);
+ if (str == NULL) {
+ pr_err("Not enough memory to setup overhead keys");
+ return -ENOMEM;
+ }
+ }
+
+ ret = setup_sort_list(&perf_hpp_list, str, evlist);
+
+ free(str);
+ return ret;
+}
+
+void perf_hpp__set_elide(int idx, bool elide)
+{
+ struct perf_hpp_fmt *fmt;
+ struct hpp_sort_entry *hse;
+
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ if (hse->se->se_width_idx == idx) {
+ fmt->elide = elide;
+ break;
+ }
+ }
+}
+
+static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
+{
+ if (list && strlist__nr_entries(list) == 1) {
+ if (fp != NULL)
+ fprintf(fp, "# %s: %s\n", list_name,
+ strlist__entry(list, 0)->s);
+ return true;
+ }
+ return false;
+}
+
+static bool get_elide(int idx, FILE *output)
+{
+ switch (idx) {
+ case HISTC_SYMBOL:
+ return __get_elide(symbol_conf.sym_list, "symbol", output);
+ case HISTC_DSO:
+ return __get_elide(symbol_conf.dso_list, "dso", output);
+ case HISTC_COMM:
+ return __get_elide(symbol_conf.comm_list, "comm", output);
+ default:
+ break;
+ }
+
+ if (sort__mode != SORT_MODE__BRANCH)
+ return false;
+
+ switch (idx) {
+ case HISTC_SYMBOL_FROM:
+ return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
+ case HISTC_SYMBOL_TO:
+ return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
+ case HISTC_DSO_FROM:
+ return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
+ case HISTC_DSO_TO:
+ return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+ case HISTC_ADDR_FROM:
+ return __get_elide(symbol_conf.sym_from_list, "addr_from", output);
+ case HISTC_ADDR_TO:
+ return __get_elide(symbol_conf.sym_to_list, "addr_to", output);
+ default:
+ break;
+ }
+
+ return false;
+}
+
+void sort__setup_elide(FILE *output)
+{
+ struct perf_hpp_fmt *fmt;
+ struct hpp_sort_entry *hse;
+
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ fmt->elide = get_elide(hse->se->se_width_idx, output);
+ }
+
+ /*
+ * It makes no sense to elide all of sort entries.
+ * Just revert them to show up again.
+ */
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ if (!fmt->elide)
+ return;
+ }
+
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ fmt->elide = false;
+ }
+}
+
+int output_field_add(struct perf_hpp_list *list, char *tok)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
+
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ return __sort_dimension__add_output(list, sd);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (strncasecmp(tok, hd->name, strlen(tok)))
+ continue;
+
+ return __hpp_dimension__add_output(list, hd);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+ struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ if (sort__mode != SORT_MODE__BRANCH)
+ return -EINVAL;
+
+ return __sort_dimension__add_output(list, sd);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+ struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ if (sort__mode != SORT_MODE__MEMORY)
+ return -EINVAL;
+
+ return __sort_dimension__add_output(list, sd);
+ }
+
+ return -ESRCH;
+}
+
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+ char *tmp, *tok;
+ int ret = 0;
+
+ for (tok = strtok_r(str, ", ", &tmp);
+ tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ ret = output_field_add(list, tok);
+ if (ret == -EINVAL) {
+ ui__error("Invalid --fields key: `%s'", tok);
+ break;
+ } else if (ret == -ESRCH) {
+ ui__error("Unknown --fields key: `%s'", tok);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+void reset_dimensions(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++)
+ common_sort_dimensions[i].taken = 0;
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++)
+ hpp_sort_dimensions[i].taken = 0;
+
+ for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++)
+ bstack_sort_dimensions[i].taken = 0;
+
+ for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++)
+ memory_sort_dimensions[i].taken = 0;
+}
+
+bool is_strict_order(const char *order)
+{
+ return order && (*order != '+');
+}
+
+static int __setup_output_field(void)
+{
+ char *str, *strp;
+ int ret = -EINVAL;
+
+ if (field_order == NULL)
+ return 0;
+
+ strp = str = strdup(field_order);
+ if (str == NULL) {
+ pr_err("Not enough memory to setup output fields");
+ return -ENOMEM;
+ }
+
+ if (!is_strict_order(field_order))
+ strp++;
+
+ if (!strlen(strp)) {
+ ui__error("Invalid --fields key: `+'");
+ goto out;
+ }
+
+ ret = setup_output_list(&perf_hpp_list, strp);
+
+out:
+ free(str);
+ return ret;
+}
+
+int setup_sorting(struct evlist *evlist)
+{
+ int err;
+
+ err = __setup_sorting(evlist);
+ if (err < 0)
+ return err;
+
+ if (parent_pattern != default_parent_pattern) {
+ err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
+ if (err < 0)
+ return err;
+ }
+
+ reset_dimensions();
+
+ /*
+ * perf diff doesn't use default hpp output fields.
+ */
+ if (sort__mode != SORT_MODE__DIFF)
+ perf_hpp__init();
+
+ err = __setup_output_field();
+ if (err < 0)
+ return err;
+
+ /* copy sort keys to output fields */
+ perf_hpp__setup_output_field(&perf_hpp_list);
+ /* and then copy output fields to sort keys */
+ perf_hpp__append_sort_keys(&perf_hpp_list);
+
+ /* setup hists-specific output fields */
+ if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+ return -1;
+
+ return 0;
+}
+
+void reset_output_field(void)
+{
+ perf_hpp_list.need_collapse = 0;
+ perf_hpp_list.parent = 0;
+ perf_hpp_list.sym = 0;
+ perf_hpp_list.dso = 0;
+
+ field_order = NULL;
+ sort_order = NULL;
+
+ reset_dimensions();
+ perf_hpp__reset_output_field(&perf_hpp_list);
+}
+
+#define INDENT (3*8 + 1)
+
+static void add_key(struct strbuf *sb, const char *str, int *llen)
+{
+ if (!str)
+ return;
+
+ if (*llen >= 75) {
+ strbuf_addstr(sb, "\n\t\t\t ");
+ *llen = INDENT;
+ }
+ strbuf_addf(sb, " %s", str);
+ *llen += strlen(str) + 1;
+}
+
+static void add_sort_string(struct strbuf *sb, struct sort_dimension *s, int n,
+ int *llen)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ add_key(sb, s[i].name, llen);
+}
+
+static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int n,
+ int *llen)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ add_key(sb, s[i].name, llen);
+}
+
+char *sort_help(const char *prefix)
+{
+ struct strbuf sb;
+ char *s;
+ int len = strlen(prefix) + INDENT;
+
+ strbuf_init(&sb, 300);
+ strbuf_addstr(&sb, prefix);
+ add_hpp_sort_string(&sb, hpp_sort_dimensions,
+ ARRAY_SIZE(hpp_sort_dimensions), &len);
+ add_sort_string(&sb, common_sort_dimensions,
+ ARRAY_SIZE(common_sort_dimensions), &len);
+ add_sort_string(&sb, bstack_sort_dimensions,
+ ARRAY_SIZE(bstack_sort_dimensions), &len);
+ add_sort_string(&sb, memory_sort_dimensions,
+ ARRAY_SIZE(memory_sort_dimensions), &len);
+ s = strbuf_detach(&sb, NULL);
+ strbuf_release(&sb);
+ return s;
+}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
new file mode 100644
index 000000000..ecfb7f135
--- /dev/null
+++ b/tools/perf/util/sort.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SORT_H
+#define __PERF_SORT_H
+#include <regex.h>
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "map_symbol.h"
+#include "symbol_conf.h"
+#include "callchain.h"
+#include "values.h"
+#include "hist.h"
+#include "stat.h"
+#include "spark.h"
+
+struct option;
+struct thread;
+
+extern regex_t parent_regex;
+extern const char *sort_order;
+extern const char *field_order;
+extern const char default_parent_pattern[];
+extern const char *parent_pattern;
+extern const char *default_sort_order;
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
+extern enum sort_mode sort__mode;
+extern struct sort_entry sort_comm;
+extern struct sort_entry sort_dso;
+extern struct sort_entry sort_sym;
+extern struct sort_entry sort_parent;
+extern struct sort_entry sort_dso_from;
+extern struct sort_entry sort_dso_to;
+extern struct sort_entry sort_sym_from;
+extern struct sort_entry sort_sym_to;
+extern struct sort_entry sort_srcline;
+extern const char default_mem_sort_order[];
+extern bool chk_double_cl;
+
+struct res_sample {
+ u64 time;
+ int cpu;
+ int tid;
+};
+
+struct he_stat {
+ u64 period;
+ u64 period_sys;
+ u64 period_us;
+ u64 period_guest_sys;
+ u64 period_guest_us;
+ u32 nr_events;
+};
+
+struct namespace_id {
+ u64 dev;
+ u64 ino;
+};
+
+struct hist_entry_diff {
+ bool computed;
+ union {
+ /* PERF_HPP__DELTA */
+ double period_ratio_delta;
+
+ /* PERF_HPP__RATIO */
+ double period_ratio;
+
+ /* HISTC_WEIGHTED_DIFF */
+ s64 wdiff;
+
+ /* PERF_HPP_DIFF__CYCLES */
+ s64 cycles;
+ };
+ struct stats stats;
+ unsigned long svals[NUM_SPARKS];
+};
+
+struct hist_entry_ops {
+ void *(*new)(size_t size);
+ void (*free)(void *ptr);
+};
+
+/**
+ * struct hist_entry - histogram entry
+ *
+ * @row_offset - offset from the first callchain expanded to appear on screen
+ * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding
+ */
+struct hist_entry {
+ struct rb_node rb_node_in;
+ struct rb_node rb_node;
+ union {
+ struct list_head node;
+ struct list_head head;
+ } pairs;
+ struct he_stat stat;
+ struct he_stat *stat_acc;
+ struct map_symbol ms;
+ struct thread *thread;
+ struct comm *comm;
+ struct namespace_id cgroup_id;
+ u64 cgroup;
+ u64 ip;
+ u64 transaction;
+ s32 socket;
+ s32 cpu;
+ u64 code_page_size;
+ u64 weight;
+ u64 ins_lat;
+ u64 p_stage_cyc;
+ u8 cpumode;
+ u8 depth;
+ struct simd_flags simd_flags;
+
+ /* We are added by hists__add_dummy_entry. */
+ bool dummy;
+ bool leaf;
+
+ char level;
+ u8 filtered;
+
+ u16 callchain_size;
+ union {
+ /*
+ * Since perf diff only supports the stdio output, TUI
+ * fields are only accessed from perf report (or perf
+ * top). So make it a union to reduce memory usage.
+ */
+ struct hist_entry_diff diff;
+ struct /* for TUI */ {
+ u16 row_offset;
+ u16 nr_rows;
+ bool init_have_children;
+ bool unfolded;
+ bool has_children;
+ bool has_no_entry;
+ };
+ };
+ char *srcline;
+ char *srcfile;
+ struct symbol *parent;
+ struct branch_info *branch_info;
+ long time;
+ struct hists *hists;
+ struct mem_info *mem_info;
+ struct block_info *block_info;
+ struct kvm_info *kvm_info;
+ void *raw_data;
+ u32 raw_size;
+ int num_res;
+ struct res_sample *res_samples;
+ void *trace_output;
+ struct perf_hpp_list *hpp_list;
+ struct hist_entry *parent_he;
+ struct hist_entry_ops *ops;
+ union {
+ /* this is for hierarchical entry structure */
+ struct {
+ struct rb_root_cached hroot_in;
+ struct rb_root_cached hroot_out;
+ }; /* non-leaf entries */
+ struct rb_root sorted_chain; /* leaf entry has callchains */
+ };
+ struct callchain_root callchain[0]; /* must be last member */
+};
+
+static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
+{
+ return he->callchain_size != 0;
+}
+
+int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width);
+
+static inline bool hist_entry__has_pairs(struct hist_entry *he)
+{
+ return !list_empty(&he->pairs.node);
+}
+
+static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
+{
+ if (hist_entry__has_pairs(he))
+ return list_entry(he->pairs.node.next, struct hist_entry, pairs.node);
+ return NULL;
+}
+
+static inline void hist_entry__add_pair(struct hist_entry *pair,
+ struct hist_entry *he)
+{
+ list_add_tail(&pair->pairs.node, &he->pairs.head);
+}
+
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+ u64 period = he->stat.period;
+ u64 total_period = hists__total_period(he->hists);
+
+ if (unlikely(total_period == 0))
+ return 0;
+
+ if (symbol_conf.cumulate_callchain)
+ period = he->stat_acc->period;
+
+ return period * 100.0 / total_period;
+}
+
+enum sort_mode {
+ SORT_MODE__NORMAL,
+ SORT_MODE__BRANCH,
+ SORT_MODE__MEMORY,
+ SORT_MODE__TOP,
+ SORT_MODE__DIFF,
+ SORT_MODE__TRACEPOINT,
+};
+
+enum sort_type {
+ /* common sort keys */
+ SORT_PID,
+ SORT_COMM,
+ SORT_DSO,
+ SORT_SYM,
+ SORT_PARENT,
+ SORT_CPU,
+ SORT_SOCKET,
+ SORT_SRCLINE,
+ SORT_SRCFILE,
+ SORT_LOCAL_WEIGHT,
+ SORT_GLOBAL_WEIGHT,
+ SORT_TRANSACTION,
+ SORT_TRACE,
+ SORT_SYM_SIZE,
+ SORT_DSO_SIZE,
+ SORT_CGROUP,
+ SORT_CGROUP_ID,
+ SORT_SYM_IPC_NULL,
+ SORT_TIME,
+ SORT_CODE_PAGE_SIZE,
+ SORT_LOCAL_INS_LAT,
+ SORT_GLOBAL_INS_LAT,
+ SORT_LOCAL_PIPELINE_STAGE_CYC,
+ SORT_GLOBAL_PIPELINE_STAGE_CYC,
+ SORT_ADDR,
+ SORT_LOCAL_RETIRE_LAT,
+ SORT_GLOBAL_RETIRE_LAT,
+ SORT_SIMD,
+
+ /* branch stack specific sort keys */
+ __SORT_BRANCH_STACK,
+ SORT_DSO_FROM = __SORT_BRANCH_STACK,
+ SORT_DSO_TO,
+ SORT_SYM_FROM,
+ SORT_SYM_TO,
+ SORT_MISPREDICT,
+ SORT_ABORT,
+ SORT_IN_TX,
+ SORT_CYCLES,
+ SORT_SRCLINE_FROM,
+ SORT_SRCLINE_TO,
+ SORT_SYM_IPC,
+ SORT_ADDR_FROM,
+ SORT_ADDR_TO,
+
+ /* memory mode specific sort keys */
+ __SORT_MEMORY_MODE,
+ SORT_MEM_DADDR_SYMBOL = __SORT_MEMORY_MODE,
+ SORT_MEM_DADDR_DSO,
+ SORT_MEM_LOCKED,
+ SORT_MEM_TLB,
+ SORT_MEM_LVL,
+ SORT_MEM_SNOOP,
+ SORT_MEM_DCACHELINE,
+ SORT_MEM_IADDR_SYMBOL,
+ SORT_MEM_PHYS_DADDR,
+ SORT_MEM_DATA_PAGE_SIZE,
+ SORT_MEM_BLOCKED,
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+ const char *se_header;
+
+ int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
+ int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
+ int64_t (*se_sort)(struct hist_entry *, struct hist_entry *);
+ int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
+ unsigned int width);
+ int (*se_filter)(struct hist_entry *he, int type, const void *arg);
+ void (*se_init)(struct hist_entry *he);
+ u8 se_width_idx;
+};
+
+struct block_hist {
+ struct hists block_hists;
+ struct perf_hpp_list block_list;
+ struct perf_hpp_fmt block_fmt;
+ int block_idx;
+ bool valid;
+ struct hist_entry he;
+};
+
+extern struct sort_entry sort_thread;
+
+struct evlist;
+struct tep_handle;
+int setup_sorting(struct evlist *evlist);
+int setup_output_field(void);
+void reset_output_field(void);
+void sort__setup_elide(FILE *fp);
+void perf_hpp__set_elide(int idx, bool elide);
+
+char *sort_help(const char *prefix);
+
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
+bool is_strict_order(const char *order);
+
+int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct evlist *evlist,
+ int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
+char *hist_entry__srcline(struct hist_entry *he);
+#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c
new file mode 100644
index 000000000..70272a8b8
--- /dev/null
+++ b/tools/perf/util/spark.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include "spark.h"
+#include "stat.h"
+
+#define SPARK_SHIFT 8
+
+/* Print spark lines on outf for numval values in val. */
+int print_spark(char *bf, int size, unsigned long *val, int numval)
+{
+ static const char *ticks[NUM_SPARKS] = {
+ "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"
+ };
+ int i, printed = 0;
+ unsigned long min = ULONG_MAX, max = 0, f;
+
+ for (i = 0; i < numval; i++) {
+ if (val[i] < min)
+ min = val[i];
+ if (val[i] > max)
+ max = val[i];
+ }
+ f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1);
+ if (f < 1)
+ f = 1;
+ for (i = 0; i < numval; i++) {
+ printed += scnprintf(bf + printed, size - printed, "%s",
+ ticks[((val[i] - min) << SPARK_SHIFT) / f]);
+ }
+
+ return printed;
+}
diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h
new file mode 100644
index 000000000..25402d7d7
--- /dev/null
+++ b/tools/perf/util/spark.h
@@ -0,0 +1,8 @@
+#ifndef SPARK_H
+#define SPARK_H 1
+
+#define NUM_SPARKS 8
+
+int print_spark(char *bf, int size, unsigned long *val, int numval);
+
+#endif
diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
new file mode 100644
index 000000000..476e99896
--- /dev/null
+++ b/tools/perf/util/srccode.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Manage printing of source lines
+ * Copyright (c) 2017, Intel Corporation.
+ * Author: Andi Kleen
+ */
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <string.h>
+#include "srccode.h"
+#include "debug.h"
+#include <internal/lib.h> // page_size
+#include "fncache.h"
+
+#define MAXSRCCACHE (32*1024*1024)
+#define MAXSRCFILES 64
+#define SRC_HTAB_SZ 64
+
+struct srcfile {
+ struct hlist_node hash_nd;
+ struct list_head nd;
+ char *fn;
+ char **lines;
+ char *map;
+ unsigned numlines;
+ size_t maplen;
+};
+
+static struct hlist_head srcfile_htab[SRC_HTAB_SZ];
+static LIST_HEAD(srcfile_list);
+static long map_total_sz;
+static int num_srcfiles;
+
+static int countlines(char *map, int maplen)
+{
+ int numl;
+ char *end = map + maplen;
+ char *p = map;
+
+ if (maplen == 0)
+ return 0;
+ numl = 0;
+ while (p < end && (p = memchr(p, '\n', end - p)) != NULL) {
+ numl++;
+ p++;
+ }
+ if (p < end)
+ numl++;
+ return numl;
+}
+
+static void fill_lines(char **lines, int maxline, char *map, int maplen)
+{
+ int l;
+ char *end = map + maplen;
+ char *p = map;
+
+ if (maplen == 0 || maxline == 0)
+ return;
+ l = 0;
+ lines[l++] = map;
+ while (p < end && (p = memchr(p, '\n', end - p)) != NULL) {
+ if (l >= maxline)
+ return;
+ lines[l++] = ++p;
+ }
+ if (p < end)
+ lines[l] = p;
+}
+
+static void free_srcfile(struct srcfile *sf)
+{
+ list_del_init(&sf->nd);
+ hlist_del(&sf->hash_nd);
+ map_total_sz -= sf->maplen;
+ munmap(sf->map, sf->maplen);
+ zfree(&sf->lines);
+ zfree(&sf->fn);
+ free(sf);
+ num_srcfiles--;
+}
+
+static struct srcfile *find_srcfile(char *fn)
+{
+ struct stat st;
+ struct srcfile *h;
+ int fd;
+ unsigned long sz;
+ unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ;
+
+ hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) {
+ if (!strcmp(fn, h->fn)) {
+ /* Move to front */
+ list_move(&h->nd, &srcfile_list);
+ return h;
+ }
+ }
+
+ /* Only prune if there is more than one entry */
+ while ((num_srcfiles > MAXSRCFILES || map_total_sz > MAXSRCCACHE) &&
+ srcfile_list.next != &srcfile_list) {
+ assert(!list_empty(&srcfile_list));
+ h = list_entry(srcfile_list.prev, struct srcfile, nd);
+ free_srcfile(h);
+ }
+
+ fd = open(fn, O_RDONLY);
+ if (fd < 0 || fstat(fd, &st) < 0) {
+ pr_debug("cannot open source file %s\n", fn);
+ return NULL;
+ }
+
+ h = malloc(sizeof(struct srcfile));
+ if (!h)
+ return NULL;
+
+ h->fn = strdup(fn);
+ if (!h->fn)
+ goto out_h;
+
+ h->maplen = st.st_size;
+ sz = (h->maplen + page_size - 1) & ~(page_size - 1);
+ h->map = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
+ close(fd);
+ if (h->map == (char *)-1) {
+ pr_debug("cannot mmap source file %s\n", fn);
+ goto out_fn;
+ }
+ h->numlines = countlines(h->map, h->maplen);
+ h->lines = calloc(h->numlines, sizeof(char *));
+ if (!h->lines)
+ goto out_map;
+ fill_lines(h->lines, h->numlines, h->map, h->maplen);
+ list_add(&h->nd, &srcfile_list);
+ hlist_add_head(&h->hash_nd, &srcfile_htab[hval]);
+ map_total_sz += h->maplen;
+ num_srcfiles++;
+ return h;
+
+out_map:
+ munmap(h->map, sz);
+out_fn:
+ zfree(&h->fn);
+out_h:
+ free(h);
+ return NULL;
+}
+
+/* Result is not 0 terminated */
+char *find_sourceline(char *fn, unsigned line, int *lenp)
+{
+ char *l, *p;
+ struct srcfile *sf = find_srcfile(fn);
+ if (!sf)
+ return NULL;
+ line--;
+ if (line >= sf->numlines)
+ return NULL;
+ l = sf->lines[line];
+ if (!l)
+ return NULL;
+ p = memchr(l, '\n', sf->map + sf->maplen - l);
+ *lenp = p - l;
+ return l;
+}
diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h
new file mode 100644
index 000000000..1b5ed7697
--- /dev/null
+++ b/tools/perf/util/srccode.h
@@ -0,0 +1,20 @@
+#ifndef SRCCODE_H
+#define SRCCODE_H 1
+
+struct srccode_state {
+ char *srcfile;
+ unsigned line;
+};
+
+static inline void srccode_state_init(struct srccode_state *state)
+{
+ state->srcfile = NULL;
+ state->line = 0;
+}
+
+void srccode_state_free(struct srccode_state *state);
+
+/* Result is not 0 terminated */
+char *find_sourceline(char *fn, unsigned line, int *lenp);
+
+#endif
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
new file mode 100644
index 000000000..034b496df
--- /dev/null
+++ b/tools/perf/util/srcline.c
@@ -0,0 +1,1059 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include <api/io.h>
+
+#include "util/dso.h"
+#include "util/debug.h"
+#include "util/callchain.h"
+#include "util/symbol_conf.h"
+#include "srcline.h"
+#include "string2.h"
+#include "symbol.h"
+#include "subcmd/run-command.h"
+
+/* If addr2line doesn't return data for 1 second then timeout. */
+int addr2line_timeout_ms = 1 * 1000;
+bool srcline_full_filename;
+
+char *srcline__unknown = (char *)"??:0";
+
+static const char *dso__name(struct dso *dso)
+{
+ const char *dso_name;
+
+ if (dso->symsrc_filename)
+ dso_name = dso->symsrc_filename;
+ else
+ dso_name = dso->long_name;
+
+ if (dso_name[0] == '[')
+ return NULL;
+
+ if (!strncmp(dso_name, "/tmp/perf-", 10))
+ return NULL;
+
+ return dso_name;
+}
+
+static int inline_list__append(struct symbol *symbol, char *srcline,
+ struct inline_node *node)
+{
+ struct inline_list *ilist;
+
+ ilist = zalloc(sizeof(*ilist));
+ if (ilist == NULL)
+ return -1;
+
+ ilist->symbol = symbol;
+ ilist->srcline = srcline;
+
+ if (callchain_param.order == ORDER_CALLEE)
+ list_add_tail(&ilist->list, &node->val);
+ else
+ list_add(&ilist->list, &node->val);
+
+ return 0;
+}
+
+/* basename version that takes a const input string */
+static const char *gnu_basename(const char *path)
+{
+ const char *base = strrchr(path, '/');
+
+ return base ? base + 1 : path;
+}
+
+static char *srcline_from_fileline(const char *file, unsigned int line)
+{
+ char *srcline;
+
+ if (!file)
+ return NULL;
+
+ if (!srcline_full_filename)
+ file = gnu_basename(file);
+
+ if (asprintf(&srcline, "%s:%u", file, line) < 0)
+ return NULL;
+
+ return srcline;
+}
+
+static struct symbol *new_inline_sym(struct dso *dso,
+ struct symbol *base_sym,
+ const char *funcname)
+{
+ struct symbol *inline_sym;
+ char *demangled = NULL;
+
+ if (!funcname)
+ funcname = "??";
+
+ if (dso) {
+ demangled = dso__demangle_sym(dso, 0, funcname);
+ if (demangled)
+ funcname = demangled;
+ }
+
+ if (base_sym && strcmp(funcname, base_sym->name) == 0) {
+ /* reuse the real, existing symbol */
+ inline_sym = base_sym;
+ /* ensure that we don't alias an inlined symbol, which could
+ * lead to double frees in inline_node__delete
+ */
+ assert(!base_sym->inlined);
+ } else {
+ /* create a fake symbol for the inline frame */
+ inline_sym = symbol__new(base_sym ? base_sym->start : 0,
+ base_sym ? (base_sym->end - base_sym->start) : 0,
+ base_sym ? base_sym->binding : 0,
+ base_sym ? base_sym->type : 0,
+ funcname);
+ if (inline_sym)
+ inline_sym->inlined = 1;
+ }
+
+ free(demangled);
+
+ return inline_sym;
+}
+
+#define MAX_INLINE_NEST 1024
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+/*
+ * Implement addr2line using libbfd.
+ */
+#define PACKAGE "perf"
+#include <bfd.h>
+
+struct a2l_data {
+ const char *input;
+ u64 addr;
+
+ bool found;
+ const char *filename;
+ const char *funcname;
+ unsigned line;
+
+ bfd *abfd;
+ asymbol **syms;
+};
+
+static int bfd_error(const char *string)
+{
+ const char *errmsg;
+
+ errmsg = bfd_errmsg(bfd_get_error());
+ fflush(stdout);
+
+ if (string)
+ pr_debug("%s: %s\n", string, errmsg);
+ else
+ pr_debug("%s\n", errmsg);
+
+ return -1;
+}
+
+static int slurp_symtab(bfd *abfd, struct a2l_data *a2l)
+{
+ long storage;
+ long symcount;
+ asymbol **syms;
+ bfd_boolean dynamic = FALSE;
+
+ if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0)
+ return bfd_error(bfd_get_filename(abfd));
+
+ storage = bfd_get_symtab_upper_bound(abfd);
+ if (storage == 0L) {
+ storage = bfd_get_dynamic_symtab_upper_bound(abfd);
+ dynamic = TRUE;
+ }
+ if (storage < 0L)
+ return bfd_error(bfd_get_filename(abfd));
+
+ syms = malloc(storage);
+ if (dynamic)
+ symcount = bfd_canonicalize_dynamic_symtab(abfd, syms);
+ else
+ symcount = bfd_canonicalize_symtab(abfd, syms);
+
+ if (symcount < 0) {
+ free(syms);
+ return bfd_error(bfd_get_filename(abfd));
+ }
+
+ a2l->syms = syms;
+ return 0;
+}
+
+static void find_address_in_section(bfd *abfd, asection *section, void *data)
+{
+ bfd_vma pc, vma;
+ bfd_size_type size;
+ struct a2l_data *a2l = data;
+ flagword flags;
+
+ if (a2l->found)
+ return;
+
+#ifdef bfd_get_section_flags
+ flags = bfd_get_section_flags(abfd, section);
+#else
+ flags = bfd_section_flags(section);
+#endif
+ if ((flags & SEC_ALLOC) == 0)
+ return;
+
+ pc = a2l->addr;
+#ifdef bfd_get_section_vma
+ vma = bfd_get_section_vma(abfd, section);
+#else
+ vma = bfd_section_vma(section);
+#endif
+#ifdef bfd_get_section_size
+ size = bfd_get_section_size(section);
+#else
+ size = bfd_section_size(section);
+#endif
+
+ if (pc < vma || pc >= vma + size)
+ return;
+
+ a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma,
+ &a2l->filename, &a2l->funcname,
+ &a2l->line);
+
+ if (a2l->filename && !strlen(a2l->filename))
+ a2l->filename = NULL;
+}
+
+static struct a2l_data *addr2line_init(const char *path)
+{
+ bfd *abfd;
+ struct a2l_data *a2l = NULL;
+
+ abfd = bfd_openr(path, NULL);
+ if (abfd == NULL)
+ return NULL;
+
+ if (!bfd_check_format(abfd, bfd_object))
+ goto out;
+
+ a2l = zalloc(sizeof(*a2l));
+ if (a2l == NULL)
+ goto out;
+
+ a2l->abfd = abfd;
+ a2l->input = strdup(path);
+ if (a2l->input == NULL)
+ goto out;
+
+ if (slurp_symtab(abfd, a2l))
+ goto out;
+
+ return a2l;
+
+out:
+ if (a2l) {
+ zfree((char **)&a2l->input);
+ free(a2l);
+ }
+ bfd_close(abfd);
+ return NULL;
+}
+
+static void addr2line_cleanup(struct a2l_data *a2l)
+{
+ if (a2l->abfd)
+ bfd_close(a2l->abfd);
+ zfree((char **)&a2l->input);
+ zfree(&a2l->syms);
+ free(a2l);
+}
+
+static int inline_list__append_dso_a2l(struct dso *dso,
+ struct inline_node *node,
+ struct symbol *sym)
+{
+ struct a2l_data *a2l = dso->a2l;
+ struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
+ char *srcline = NULL;
+
+ if (a2l->filename)
+ srcline = srcline_from_fileline(a2l->filename, a2l->line);
+
+ return inline_list__append(inline_sym, srcline, node);
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line, struct dso *dso,
+ bool unwind_inlines, struct inline_node *node,
+ struct symbol *sym)
+{
+ int ret = 0;
+ struct a2l_data *a2l = dso->a2l;
+
+ if (!a2l) {
+ dso->a2l = addr2line_init(dso_name);
+ a2l = dso->a2l;
+ }
+
+ if (a2l == NULL) {
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("addr2line_init failed for %s\n", dso_name);
+ return 0;
+ }
+
+ a2l->addr = addr;
+ a2l->found = false;
+
+ bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
+
+ if (!a2l->found)
+ return 0;
+
+ if (unwind_inlines) {
+ int cnt = 0;
+
+ if (node && inline_list__append_dso_a2l(dso, node, sym))
+ return 0;
+
+ while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
+ &a2l->funcname, &a2l->line) &&
+ cnt++ < MAX_INLINE_NEST) {
+
+ if (a2l->filename && !strlen(a2l->filename))
+ a2l->filename = NULL;
+
+ if (node != NULL) {
+ if (inline_list__append_dso_a2l(dso, node, sym))
+ return 0;
+ // found at least one inline frame
+ ret = 1;
+ }
+ }
+ }
+
+ if (file) {
+ *file = a2l->filename ? strdup(a2l->filename) : NULL;
+ ret = *file ? 1 : 0;
+ }
+
+ if (line)
+ *line = a2l->line;
+
+ return ret;
+}
+
+void dso__free_a2l(struct dso *dso)
+{
+ struct a2l_data *a2l = dso->a2l;
+
+ if (!a2l)
+ return;
+
+ addr2line_cleanup(a2l);
+
+ dso->a2l = NULL;
+}
+
+#else /* HAVE_LIBBFD_SUPPORT */
+
+static int filename_split(char *filename, unsigned int *line_nr)
+{
+ char *sep;
+
+ sep = strchr(filename, '\n');
+ if (sep)
+ *sep = '\0';
+
+ if (!strcmp(filename, "??:0"))
+ return 0;
+
+ sep = strchr(filename, ':');
+ if (sep) {
+ *sep++ = '\0';
+ *line_nr = strtoul(sep, NULL, 0);
+ return 1;
+ }
+ pr_debug("addr2line missing ':' in filename split\n");
+ return 0;
+}
+
+static void addr2line_subprocess_cleanup(struct child_process *a2l)
+{
+ if (a2l->pid != -1) {
+ kill(a2l->pid, SIGKILL);
+ finish_command(a2l); /* ignore result, we don't care */
+ a2l->pid = -1;
+ }
+
+ free(a2l);
+}
+
+static struct child_process *addr2line_subprocess_init(const char *addr2line_path,
+ const char *binary_path)
+{
+ const char *argv[] = {
+ addr2line_path ?: "addr2line",
+ "-e", binary_path,
+ "-a", "-i", "-f", NULL
+ };
+ struct child_process *a2l = zalloc(sizeof(*a2l));
+ int start_command_status = 0;
+
+ if (a2l == NULL) {
+ pr_err("Failed to allocate memory for addr2line");
+ return NULL;
+ }
+
+ a2l->pid = -1;
+ a2l->in = -1;
+ a2l->out = -1;
+ a2l->no_stderr = 1;
+
+ a2l->argv = argv;
+ start_command_status = start_command(a2l);
+ a2l->argv = NULL; /* it's not used after start_command; avoid dangling pointers */
+
+ if (start_command_status != 0) {
+ pr_warning("could not start addr2line (%s) for %s: start_command return code %d\n",
+ addr2line_path, binary_path, start_command_status);
+ addr2line_subprocess_cleanup(a2l);
+ return NULL;
+ }
+
+ return a2l;
+}
+
+enum a2l_style {
+ BROKEN,
+ GNU_BINUTILS,
+ LLVM,
+};
+
+static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name)
+{
+ static bool cached;
+ static enum a2l_style style;
+
+ if (!cached) {
+ char buf[128];
+ struct io io;
+ int ch;
+ int lines;
+
+ if (write(a2l->in, ",\n", 2) != 2)
+ return BROKEN;
+
+ io__init(&io, a2l->out, buf, sizeof(buf));
+ ch = io__get_char(&io);
+ if (ch == ',') {
+ style = LLVM;
+ cached = true;
+ lines = 1;
+ pr_debug("Detected LLVM addr2line style\n");
+ } else if (ch == '0') {
+ style = GNU_BINUTILS;
+ cached = true;
+ lines = 3;
+ pr_debug("Detected binutils addr2line style\n");
+ } else {
+ if (!symbol_conf.disable_add2line_warn) {
+ char *output = NULL;
+ size_t output_len;
+
+ io__getline(&io, &output, &output_len);
+ pr_warning("%s %s: addr2line configuration failed\n",
+ __func__, dso_name);
+ pr_warning("\t%c%s", ch, output);
+ }
+ pr_debug("Unknown/broken addr2line style\n");
+ return BROKEN;
+ }
+ while (lines) {
+ ch = io__get_char(&io);
+ if (ch <= 0)
+ break;
+ if (ch == '\n')
+ lines--;
+ }
+ /* Ignore SIGPIPE in the event addr2line exits. */
+ signal(SIGPIPE, SIG_IGN);
+ }
+ return style;
+}
+
+static int read_addr2line_record(struct io *io,
+ enum a2l_style style,
+ const char *dso_name,
+ u64 addr,
+ bool first,
+ char **function,
+ char **filename,
+ unsigned int *line_nr)
+{
+ /*
+ * Returns:
+ * -1 ==> error
+ * 0 ==> sentinel (or other ill-formed) record read
+ * 1 ==> a genuine record read
+ */
+ char *line = NULL;
+ size_t line_len = 0;
+ unsigned int dummy_line_nr = 0;
+ int ret = -1;
+
+ if (function != NULL)
+ zfree(function);
+
+ if (filename != NULL)
+ zfree(filename);
+
+ if (line_nr != NULL)
+ *line_nr = 0;
+
+ /*
+ * Read the first line. Without an error this will be:
+ * - for the first line an address like 0x1234,
+ * - the binutils sentinel 0x0000000000000000,
+ * - the llvm-addr2line the sentinel ',' character,
+ * - the function name line for an inlined function.
+ */
+ if (io__getline(io, &line, &line_len) < 0 || !line_len)
+ goto error;
+
+ pr_debug("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line);
+ if (style == LLVM && line_len == 2 && line[0] == ',') {
+ /* Found the llvm-addr2line sentinel character. */
+ zfree(&line);
+ return 0;
+ } else if (style == GNU_BINUTILS && (!first || addr != 0)) {
+ int zero_count = 0, non_zero_count = 0;
+ /*
+ * Check for binutils sentinel ignoring it for the case the
+ * requested address is 0.
+ */
+
+ /* A given address should always start 0x. */
+ if (line_len >= 2 || line[0] != '0' || line[1] != 'x') {
+ for (size_t i = 2; i < line_len; i++) {
+ if (line[i] == '0')
+ zero_count++;
+ else if (line[i] != '\n')
+ non_zero_count++;
+ }
+ if (!non_zero_count) {
+ int ch;
+
+ if (first && !zero_count) {
+ /* Line was erroneous just '0x'. */
+ goto error;
+ }
+ /*
+ * Line was 0x0..0, the sentinel for binutils. Remove
+ * the function and filename lines.
+ */
+ zfree(&line);
+ do {
+ ch = io__get_char(io);
+ } while (ch > 0 && ch != '\n');
+ do {
+ ch = io__get_char(io);
+ } while (ch > 0 && ch != '\n');
+ return 0;
+ }
+ }
+ }
+ /* Read the second function name line (if inline data then this is the first line). */
+ if (first && (io__getline(io, &line, &line_len) < 0 || !line_len))
+ goto error;
+
+ pr_debug("%s %s: addr2line read line: %s", __func__, dso_name, line);
+ if (function != NULL)
+ *function = strdup(strim(line));
+
+ zfree(&line);
+ line_len = 0;
+
+ /* Read the third filename and line number line. */
+ if (io__getline(io, &line, &line_len) < 0 || !line_len)
+ goto error;
+
+ pr_debug("%s %s: addr2line filename:number : %s", __func__, dso_name, line);
+ if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 &&
+ style == GNU_BINUTILS) {
+ ret = 0;
+ goto error;
+ }
+
+ if (filename != NULL)
+ *filename = strdup(line);
+
+ zfree(&line);
+ line_len = 0;
+
+ return 1;
+
+error:
+ free(line);
+ if (function != NULL)
+ zfree(function);
+ if (filename != NULL)
+ zfree(filename);
+ return ret;
+}
+
+static int inline_list__append_record(struct dso *dso,
+ struct inline_node *node,
+ struct symbol *sym,
+ const char *function,
+ const char *filename,
+ unsigned int line_nr)
+{
+ struct symbol *inline_sym = new_inline_sym(dso, sym, function);
+
+ return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node);
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line_nr,
+ struct dso *dso,
+ bool unwind_inlines,
+ struct inline_node *node,
+ struct symbol *sym __maybe_unused)
+{
+ struct child_process *a2l = dso->a2l;
+ char *record_function = NULL;
+ char *record_filename = NULL;
+ unsigned int record_line_nr = 0;
+ int record_status = -1;
+ int ret = 0;
+ size_t inline_count = 0;
+ int len;
+ char buf[128];
+ ssize_t written;
+ struct io io = { .eof = false };
+ enum a2l_style a2l_style;
+
+ if (!a2l) {
+ if (!filename__has_section(dso_name, ".debug_line"))
+ goto out;
+
+ dso->a2l = addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name);
+ a2l = dso->a2l;
+ }
+
+ if (a2l == NULL) {
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name);
+ goto out;
+ }
+ a2l_style = addr2line_configure(a2l, dso_name);
+ if (a2l_style == BROKEN)
+ goto out;
+
+ /*
+ * Send our request and then *deliberately* send something that can't be
+ * interpreted as a valid address to ask addr2line about (namely,
+ * ","). This causes addr2line to first write out the answer to our
+ * request, in an unbounded/unknown number of records, and then to write
+ * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or ","
+ * for llvm-addr2line, so that we can detect when it has finished giving
+ * us anything useful.
+ */
+ len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr);
+ written = len > 0 ? write(a2l->in, buf, len) : -1;
+ if (written != len) {
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: could not send request\n", __func__, dso_name);
+ goto out;
+ }
+ io__init(&io, a2l->out, buf, sizeof(buf));
+ io.timeout_ms = addr2line_timeout_ms;
+ switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true,
+ &record_function, &record_filename, &record_line_nr)) {
+ case -1:
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: could not read first record\n", __func__, dso_name);
+ goto out;
+ case 0:
+ /*
+ * The first record was invalid, so return failure, but first
+ * read another record, since we sent a sentinel ',' for the
+ * sake of detected the last inlined function. Treat this as the
+ * first of a record as the ',' generates a new start with GNU
+ * binutils, also force a non-zero address as we're no longer
+ * reading that record.
+ */
+ switch (read_addr2line_record(&io, a2l_style, dso_name,
+ /*addr=*/1, /*first=*/true,
+ NULL, NULL, NULL)) {
+ case -1:
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: could not read sentinel record\n",
+ __func__, dso_name);
+ break;
+ case 0:
+ /* The sentinel as expected. */
+ break;
+ default:
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: unexpected record instead of sentinel",
+ __func__, dso_name);
+ break;
+ }
+ goto out;
+ default:
+ /* First record as expected. */
+ break;
+ }
+
+ if (file) {
+ *file = strdup(record_filename);
+ ret = 1;
+ }
+ if (line_nr)
+ *line_nr = record_line_nr;
+
+ if (unwind_inlines) {
+ if (node && inline_list__append_record(dso, node, sym,
+ record_function,
+ record_filename,
+ record_line_nr)) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * We have to read the records even if we don't care about the inline
+ * info. This isn't the first record and force the address to non-zero
+ * as we're reading records beyond the first.
+ */
+ while ((record_status = read_addr2line_record(&io,
+ a2l_style,
+ dso_name,
+ /*addr=*/1,
+ /*first=*/false,
+ &record_function,
+ &record_filename,
+ &record_line_nr)) == 1) {
+ if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) {
+ if (inline_list__append_record(dso, node, sym,
+ record_function,
+ record_filename,
+ record_line_nr)) {
+ ret = 0;
+ goto out;
+ }
+ ret = 1; /* found at least one inline frame */
+ }
+ }
+
+out:
+ free(record_function);
+ free(record_filename);
+ if (io.eof) {
+ dso->a2l = NULL;
+ addr2line_subprocess_cleanup(a2l);
+ }
+ return ret;
+}
+
+void dso__free_a2l(struct dso *dso)
+{
+ struct child_process *a2l = dso->a2l;
+
+ if (!a2l)
+ return;
+
+ addr2line_subprocess_cleanup(a2l);
+
+ dso->a2l = NULL;
+}
+
+#endif /* HAVE_LIBBFD_SUPPORT */
+
+static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
+ struct dso *dso, struct symbol *sym)
+{
+ struct inline_node *node;
+
+ node = zalloc(sizeof(*node));
+ if (node == NULL) {
+ perror("not enough memory for the inline node");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&node->val);
+ node->addr = addr;
+
+ addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
+ return node;
+}
+
+/*
+ * Number of addr2line failures (without success) before disabling it for that
+ * dso.
+ */
+#define A2L_FAIL_LIMIT 123
+
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, bool unwind_inlines,
+ u64 ip)
+{
+ char *file = NULL;
+ unsigned line = 0;
+ char *srcline;
+ const char *dso_name;
+
+ if (!dso->has_srcline)
+ goto out;
+
+ dso_name = dso__name(dso);
+ if (dso_name == NULL)
+ goto out;
+
+ if (!addr2line(dso_name, addr, &file, &line, dso,
+ unwind_inlines, NULL, sym))
+ goto out;
+
+ srcline = srcline_from_fileline(file, line);
+ free(file);
+
+ if (!srcline)
+ goto out;
+
+ dso->a2l_fails = 0;
+
+ return srcline;
+
+out:
+ if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) {
+ dso->has_srcline = 0;
+ dso__free_a2l(dso);
+ }
+
+ if (!show_addr)
+ return (show_sym && sym) ?
+ strndup(sym->name, sym->namelen) : SRCLINE_UNKNOWN;
+
+ if (sym) {
+ if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
+ ip - sym->start) < 0)
+ return SRCLINE_UNKNOWN;
+ } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
+ return SRCLINE_UNKNOWN;
+ return srcline;
+}
+
+/* Returns filename and fills in line number in line */
+char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line)
+{
+ char *file = NULL;
+ const char *dso_name;
+
+ if (!dso->has_srcline)
+ goto out;
+
+ dso_name = dso__name(dso);
+ if (dso_name == NULL)
+ goto out;
+
+ if (!addr2line(dso_name, addr, &file, line, dso, true, NULL, NULL))
+ goto out;
+
+ dso->a2l_fails = 0;
+ return file;
+
+out:
+ if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) {
+ dso->has_srcline = 0;
+ dso__free_a2l(dso);
+ }
+
+ return NULL;
+}
+
+void zfree_srcline(char **srcline)
+{
+ if (*srcline == NULL)
+ return;
+
+ if (*srcline != SRCLINE_UNKNOWN)
+ free(*srcline);
+
+ *srcline = NULL;
+}
+
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, u64 ip)
+{
+ return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip);
+}
+
+struct srcline_node {
+ u64 addr;
+ char *srcline;
+ struct rb_node rb_node;
+};
+
+void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline)
+{
+ struct rb_node **p = &tree->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct srcline_node *i, *node;
+ bool leftmost = true;
+
+ node = zalloc(sizeof(struct srcline_node));
+ if (!node) {
+ perror("not enough memory for the srcline node");
+ return;
+ }
+
+ node->addr = addr;
+ node->srcline = srcline;
+
+ while (*p != NULL) {
+ parent = *p;
+ i = rb_entry(parent, struct srcline_node, rb_node);
+ if (addr < i->addr)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+ rb_link_node(&node->rb_node, parent, p);
+ rb_insert_color_cached(&node->rb_node, tree, leftmost);
+}
+
+char *srcline__tree_find(struct rb_root_cached *tree, u64 addr)
+{
+ struct rb_node *n = tree->rb_root.rb_node;
+
+ while (n) {
+ struct srcline_node *i = rb_entry(n, struct srcline_node,
+ rb_node);
+
+ if (addr < i->addr)
+ n = n->rb_left;
+ else if (addr > i->addr)
+ n = n->rb_right;
+ else
+ return i->srcline;
+ }
+
+ return NULL;
+}
+
+void srcline__tree_delete(struct rb_root_cached *tree)
+{
+ struct srcline_node *pos;
+ struct rb_node *next = rb_first_cached(tree);
+
+ while (next) {
+ pos = rb_entry(next, struct srcline_node, rb_node);
+ next = rb_next(&pos->rb_node);
+ rb_erase_cached(&pos->rb_node, tree);
+ zfree_srcline(&pos->srcline);
+ zfree(&pos);
+ }
+}
+
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+ struct symbol *sym)
+{
+ const char *dso_name;
+
+ dso_name = dso__name(dso);
+ if (dso_name == NULL)
+ return NULL;
+
+ return addr2inlines(dso_name, addr, dso, sym);
+}
+
+void inline_node__delete(struct inline_node *node)
+{
+ struct inline_list *ilist, *tmp;
+
+ list_for_each_entry_safe(ilist, tmp, &node->val, list) {
+ list_del_init(&ilist->list);
+ zfree_srcline(&ilist->srcline);
+ /* only the inlined symbols are owned by the list */
+ if (ilist->symbol && ilist->symbol->inlined)
+ symbol__delete(ilist->symbol);
+ free(ilist);
+ }
+
+ free(node);
+}
+
+void inlines__tree_insert(struct rb_root_cached *tree,
+ struct inline_node *inlines)
+{
+ struct rb_node **p = &tree->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ const u64 addr = inlines->addr;
+ struct inline_node *i;
+ bool leftmost = true;
+
+ while (*p != NULL) {
+ parent = *p;
+ i = rb_entry(parent, struct inline_node, rb_node);
+ if (addr < i->addr)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+ rb_link_node(&inlines->rb_node, parent, p);
+ rb_insert_color_cached(&inlines->rb_node, tree, leftmost);
+}
+
+struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr)
+{
+ struct rb_node *n = tree->rb_root.rb_node;
+
+ while (n) {
+ struct inline_node *i = rb_entry(n, struct inline_node,
+ rb_node);
+
+ if (addr < i->addr)
+ n = n->rb_left;
+ else if (addr > i->addr)
+ n = n->rb_right;
+ else
+ return i;
+ }
+
+ return NULL;
+}
+
+void inlines__tree_delete(struct rb_root_cached *tree)
+{
+ struct inline_node *pos;
+ struct rb_node *next = rb_first_cached(tree);
+
+ while (next) {
+ pos = rb_entry(next, struct inline_node, rb_node);
+ next = rb_next(&pos->rb_node);
+ rb_erase_cached(&pos->rb_node, tree);
+ inline_node__delete(pos);
+ }
+}
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
new file mode 100644
index 000000000..75010d39e
--- /dev/null
+++ b/tools/perf/util/srcline.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_SRCLINE_H
+#define PERF_SRCLINE_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct dso;
+struct symbol;
+
+extern int addr2line_timeout_ms;
+extern bool srcline_full_filename;
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, u64 ip);
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, bool unwind_inlines,
+ u64 ip);
+void zfree_srcline(char **srcline);
+char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line);
+
+/* insert the srcline into the DSO, which will take ownership */
+void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline);
+/* find previously inserted srcline */
+char *srcline__tree_find(struct rb_root_cached *tree, u64 addr);
+/* delete all srclines within the tree */
+void srcline__tree_delete(struct rb_root_cached *tree);
+
+extern char *srcline__unknown;
+#define SRCLINE_UNKNOWN srcline__unknown
+
+struct inline_list {
+ struct symbol *symbol;
+ char *srcline;
+ struct list_head list;
+};
+
+struct inline_node {
+ u64 addr;
+ struct list_head val;
+ struct rb_node rb_node;
+};
+
+/* parse inlined frames for the given address */
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+ struct symbol *sym);
+/* free resources associated to the inline node list */
+void inline_node__delete(struct inline_node *node);
+
+/* insert the inline node list into the DSO, which will take ownership */
+void inlines__tree_insert(struct rb_root_cached *tree,
+ struct inline_node *inlines);
+/* find previously inserted inline node list */
+struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr);
+/* delete all nodes within the tree of inline_node s */
+void inlines__tree_delete(struct rb_root_cached *tree);
+
+#endif /* PERF_SRCLINE_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
new file mode 100644
index 000000000..afe6db8e7
--- /dev/null
+++ b/tools/perf/util/stat-display.c
@@ -0,0 +1,1597 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <math.h>
+#include <perf/cpumap.h>
+#include "color.h"
+#include "counts.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "stat.h"
+#include "top.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "string2.h"
+#include <linux/ctype.h>
+#include "cgroup.h"
+#include <api/fs/fs.h>
+#include "util.h"
+#include "iostat.h"
+#include "pmu.h"
+#include "pmus.h"
+
+#define CNTR_NOT_SUPPORTED "<not supported>"
+#define CNTR_NOT_COUNTED "<not counted>"
+
+#define MGROUP_LEN 50
+#define METRIC_LEN 38
+#define EVNAME_LEN 32
+#define COUNTS_LEN 18
+#define INTERVAL_LEN 16
+#define CGROUP_LEN 16
+#define COMM_LEN 16
+#define PID_LEN 7
+#define CPUS_LEN 4
+
+static int aggr_header_lens[] = {
+ [AGGR_CORE] = 18,
+ [AGGR_CACHE] = 22,
+ [AGGR_DIE] = 12,
+ [AGGR_SOCKET] = 6,
+ [AGGR_NODE] = 6,
+ [AGGR_NONE] = 6,
+ [AGGR_THREAD] = 16,
+ [AGGR_GLOBAL] = 0,
+};
+
+static const char *aggr_header_csv[] = {
+ [AGGR_CORE] = "core,cpus,",
+ [AGGR_CACHE] = "cache,cpus,",
+ [AGGR_DIE] = "die,cpus,",
+ [AGGR_SOCKET] = "socket,cpus,",
+ [AGGR_NONE] = "cpu,",
+ [AGGR_THREAD] = "comm-pid,",
+ [AGGR_NODE] = "node,",
+ [AGGR_GLOBAL] = ""
+};
+
+static const char *aggr_header_std[] = {
+ [AGGR_CORE] = "core",
+ [AGGR_CACHE] = "cache",
+ [AGGR_DIE] = "die",
+ [AGGR_SOCKET] = "socket",
+ [AGGR_NONE] = "cpu",
+ [AGGR_THREAD] = "comm-pid",
+ [AGGR_NODE] = "node",
+ [AGGR_GLOBAL] = ""
+};
+
+static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena)
+{
+ if (run != ena)
+ fprintf(config->output, " (%.2f%%)", 100.0 * run / ena);
+}
+
+static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena)
+{
+ double enabled_percent = 100;
+
+ if (run != ena)
+ enabled_percent = 100 * run / ena;
+ fprintf(config->output, "%s%" PRIu64 "%s%.2f",
+ config->csv_sep, run, config->csv_sep, enabled_percent);
+}
+
+static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena)
+{
+ double enabled_percent = 100;
+
+ if (run != ena)
+ enabled_percent = 100 * run / ena;
+ fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ",
+ run, enabled_percent);
+}
+
+static void print_running(struct perf_stat_config *config,
+ u64 run, u64 ena, bool before_metric)
+{
+ if (config->json_output) {
+ if (before_metric)
+ print_running_json(config, run, ena);
+ } else if (config->csv_output) {
+ if (before_metric)
+ print_running_csv(config, run, ena);
+ } else {
+ if (!before_metric)
+ print_running_std(config, run, ena);
+ }
+}
+
+static void print_noise_pct_std(struct perf_stat_config *config,
+ double pct)
+{
+ if (pct)
+ fprintf(config->output, " ( +-%6.2f%% )", pct);
+}
+
+static void print_noise_pct_csv(struct perf_stat_config *config,
+ double pct)
+{
+ fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
+}
+
+static void print_noise_pct_json(struct perf_stat_config *config,
+ double pct)
+{
+ fprintf(config->output, "\"variance\" : %.2f, ", pct);
+}
+
+static void print_noise_pct(struct perf_stat_config *config,
+ double total, double avg, bool before_metric)
+{
+ double pct = rel_stddev_stats(total, avg);
+
+ if (config->json_output) {
+ if (before_metric)
+ print_noise_pct_json(config, pct);
+ } else if (config->csv_output) {
+ if (before_metric)
+ print_noise_pct_csv(config, pct);
+ } else {
+ if (!before_metric)
+ print_noise_pct_std(config, pct);
+ }
+}
+
+static void print_noise(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool before_metric)
+{
+ struct perf_stat_evsel *ps;
+
+ if (config->run_count == 1)
+ return;
+
+ ps = evsel->stats;
+ print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric);
+}
+
+static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name)
+{
+ fprintf(config->output, " %-*s", CGROUP_LEN, cgrp_name);
+}
+
+static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name)
+{
+ fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+}
+
+static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name)
+{
+ fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
+}
+
+static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp)
+{
+ if (nr_cgroups || config->cgroup_list) {
+ const char *cgrp_name = cgrp ? cgrp->name : "";
+
+ if (config->json_output)
+ print_cgroup_json(config, cgrp_name);
+ else if (config->csv_output)
+ print_cgroup_csv(config, cgrp_name);
+ else
+ print_cgroup_std(config, cgrp_name);
+ }
+}
+
+static void print_aggr_id_std(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
+{
+ FILE *output = config->output;
+ int idx = config->aggr_mode;
+ char buf[128];
+
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core);
+ break;
+ case AGGR_CACHE:
+ snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d",
+ id.socket, id.die, id.cache_lvl, id.cache);
+ break;
+ case AGGR_DIE:
+ snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
+ break;
+ case AGGR_SOCKET:
+ snprintf(buf, sizeof(buf), "S%d", id.socket);
+ break;
+ case AGGR_NODE:
+ snprintf(buf, sizeof(buf), "N%d", id.node);
+ break;
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread) {
+ snprintf(buf, sizeof(buf), "S%d-D%d-C%d ",
+ id.socket, id.die, id.core);
+ fprintf(output, "%-*s ",
+ aggr_header_lens[AGGR_CORE], buf);
+ } else if (id.cpu.cpu > -1) {
+ fprintf(output, "CPU%-*d ",
+ aggr_header_lens[AGGR_NONE] - 3, id.cpu.cpu);
+ }
+ return;
+ case AGGR_THREAD:
+ fprintf(output, "%*s-%-*d ",
+ COMM_LEN, perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ PID_LEN, perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+ return;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ return;
+ }
+
+ fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, aggr_nr);
+}
+
+static void print_aggr_id_csv(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
+{
+ FILE *output = config->output;
+ const char *sep = config->csv_sep;
+
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ fprintf(output, "S%d-D%d-C%d%s%d%s",
+ id.socket, id.die, id.core, sep, aggr_nr, sep);
+ break;
+ case AGGR_CACHE:
+ fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s",
+ id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep);
+ break;
+ case AGGR_DIE:
+ fprintf(output, "S%d-D%d%s%d%s",
+ id.socket, id.die, sep, aggr_nr, sep);
+ break;
+ case AGGR_SOCKET:
+ fprintf(output, "S%d%s%d%s",
+ id.socket, sep, aggr_nr, sep);
+ break;
+ case AGGR_NODE:
+ fprintf(output, "N%d%s%d%s",
+ id.node, sep, aggr_nr, sep);
+ break;
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread) {
+ fprintf(output, "S%d-D%d-C%d%s",
+ id.socket, id.die, id.core, sep);
+ } else if (id.cpu.cpu > -1) {
+ fprintf(output, "CPU%d%s",
+ id.cpu.cpu, sep);
+ }
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "%s-%d%s",
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx),
+ sep);
+ break;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ break;
+ }
+}
+
+static void print_aggr_id_json(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
+{
+ FILE *output = config->output;
+
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
+ id.socket, id.die, id.core, aggr_nr);
+ break;
+ case AGGR_CACHE:
+ fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ",
+ id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
+ break;
+ case AGGR_DIE:
+ fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
+ id.socket, id.die, aggr_nr);
+ break;
+ case AGGR_SOCKET:
+ fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
+ id.socket, aggr_nr);
+ break;
+ case AGGR_NODE:
+ fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
+ id.node, aggr_nr);
+ break;
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread) {
+ fprintf(output, "\"core\" : \"S%d-D%d-C%d\"",
+ id.socket, id.die, id.core);
+ } else if (id.cpu.cpu > -1) {
+ fprintf(output, "\"cpu\" : \"%d\", ",
+ id.cpu.cpu);
+ }
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "\"thread\" : \"%s-%d\", ",
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+ break;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ break;
+ }
+}
+
+static void aggr_printout(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
+{
+ if (config->json_output)
+ print_aggr_id_json(config, evsel, id, aggr_nr);
+ else if (config->csv_output)
+ print_aggr_id_csv(config, evsel, id, aggr_nr);
+ else
+ print_aggr_id_std(config, evsel, id, aggr_nr);
+}
+
+struct outstate {
+ FILE *fh;
+ bool newline;
+ bool first;
+ const char *prefix;
+ int nfields;
+ int aggr_nr;
+ struct aggr_cpu_id id;
+ struct evsel *evsel;
+ struct cgroup *cgrp;
+};
+
+static void new_line_std(struct perf_stat_config *config __maybe_unused,
+ void *ctx)
+{
+ struct outstate *os = ctx;
+
+ os->newline = true;
+}
+
+static inline void __new_line_std_csv(struct perf_stat_config *config,
+ struct outstate *os)
+{
+ fputc('\n', os->fh);
+ if (os->prefix)
+ fputs(os->prefix, os->fh);
+ aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+}
+
+static inline void __new_line_std(struct outstate *os)
+{
+ fprintf(os->fh, " ");
+}
+
+static void do_new_line_std(struct perf_stat_config *config,
+ struct outstate *os)
+{
+ __new_line_std_csv(config, os);
+ if (config->aggr_mode == AGGR_NONE)
+ fprintf(os->fh, " ");
+ __new_line_std(os);
+}
+
+static void print_metric_std(struct perf_stat_config *config,
+ void *ctx, const char *color, const char *fmt,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ int n;
+ bool newline = os->newline;
+
+ os->newline = false;
+
+ if (unit == NULL || fmt == NULL) {
+ fprintf(out, "%-*s", METRIC_LEN, "");
+ return;
+ }
+
+ if (newline)
+ do_new_line_std(config, os);
+
+ n = fprintf(out, " # ");
+ if (color)
+ n += color_fprintf(out, color, fmt, val);
+ else
+ n += fprintf(out, fmt, val);
+ fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(struct perf_stat_config *config, void *ctx)
+{
+ struct outstate *os = ctx;
+ int i;
+
+ __new_line_std_csv(config, os);
+ for (i = 0; i < os->nfields; i++)
+ fputs(config->csv_sep, os->fh);
+}
+
+static void print_metric_csv(struct perf_stat_config *config __maybe_unused,
+ void *ctx,
+ const char *color __maybe_unused,
+ const char *fmt, const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ char buf[64], *vals, *ends;
+
+ if (unit == NULL || fmt == NULL) {
+ fprintf(out, "%s%s", config->csv_sep, config->csv_sep);
+ return;
+ }
+ snprintf(buf, sizeof(buf), fmt, val);
+ ends = vals = skip_spaces(buf);
+ while (isdigit(*ends) || *ends == '.')
+ ends++;
+ *ends = 0;
+ fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, skip_spaces(unit));
+}
+
+static void print_metric_json(struct perf_stat_config *config __maybe_unused,
+ void *ctx,
+ const char *color __maybe_unused,
+ const char *fmt __maybe_unused,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+
+ fprintf(out, "\"metric-value\" : \"%f\", ", val);
+ fprintf(out, "\"metric-unit\" : \"%s\"", unit);
+ if (!config->metric_only)
+ fprintf(out, "}");
+}
+
+static void new_line_json(struct perf_stat_config *config, void *ctx)
+{
+ struct outstate *os = ctx;
+
+ fputs("\n{", os->fh);
+ if (os->prefix)
+ fprintf(os->fh, "%s", os->prefix);
+ aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+}
+
+static void print_metricgroup_header_json(struct perf_stat_config *config,
+ void *ctx,
+ const char *metricgroup_name)
+{
+ if (!metricgroup_name)
+ return;
+
+ fprintf(config->output, "\"metricgroup\" : \"%s\"}", metricgroup_name);
+ new_line_json(config, ctx);
+}
+
+static void print_metricgroup_header_csv(struct perf_stat_config *config,
+ void *ctx,
+ const char *metricgroup_name)
+{
+ struct outstate *os = ctx;
+ int i;
+
+ if (!metricgroup_name) {
+ /* Leave space for running and enabling */
+ for (i = 0; i < os->nfields - 2; i++)
+ fputs(config->csv_sep, os->fh);
+ return;
+ }
+
+ for (i = 0; i < os->nfields; i++)
+ fputs(config->csv_sep, os->fh);
+ fprintf(config->output, "%s", metricgroup_name);
+ new_line_csv(config, ctx);
+}
+
+static void print_metricgroup_header_std(struct perf_stat_config *config,
+ void *ctx,
+ const char *metricgroup_name)
+{
+ struct outstate *os = ctx;
+ int n;
+
+ if (!metricgroup_name) {
+ __new_line_std(os);
+ return;
+ }
+
+ n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name);
+
+ fprintf(config->output, "%*s", MGROUP_LEN - n - 1, "");
+}
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+ if (!unit)
+ return false;
+ if (strstr(unit, "/sec") ||
+ strstr(unit, "CPUs utilized"))
+ return false;
+ return true;
+}
+
+static const char *fixunit(char *buf, struct evsel *evsel,
+ const char *unit)
+{
+ if (!strncmp(unit, "of all", 6)) {
+ snprintf(buf, 1024, "%s %s", evsel__name(evsel),
+ unit);
+ return buf;
+ }
+ return unit;
+}
+
+static void print_metric_only(struct perf_stat_config *config,
+ void *ctx, const char *color, const char *fmt,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ char buf[1024], str[1024];
+ unsigned mlen = config->metric_only_len;
+
+ if (!valid_only_metric(unit))
+ return;
+ unit = fixunit(buf, os->evsel, unit);
+ if (mlen < strlen(unit))
+ mlen = strlen(unit) + 1;
+
+ if (color)
+ mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
+
+ color_snprintf(str, sizeof(str), color ?: "", fmt, val);
+ fprintf(out, "%*s ", mlen, str);
+ os->first = false;
+}
+
+static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused,
+ void *ctx, const char *color __maybe_unused,
+ const char *fmt,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ char buf[64], *vals, *ends;
+ char tbuf[1024];
+
+ if (!valid_only_metric(unit))
+ return;
+ unit = fixunit(tbuf, os->evsel, unit);
+ snprintf(buf, sizeof(buf), fmt ?: "", val);
+ ends = vals = skip_spaces(buf);
+ while (isdigit(*ends) || *ends == '.')
+ ends++;
+ *ends = 0;
+ fprintf(out, "%s%s", vals, config->csv_sep);
+ os->first = false;
+}
+
+static void print_metric_only_json(struct perf_stat_config *config __maybe_unused,
+ void *ctx, const char *color __maybe_unused,
+ const char *fmt,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ char buf[64], *vals, *ends;
+ char tbuf[1024];
+
+ if (!valid_only_metric(unit))
+ return;
+ unit = fixunit(tbuf, os->evsel, unit);
+ snprintf(buf, sizeof(buf), fmt ?: "", val);
+ ends = vals = skip_spaces(buf);
+ while (isdigit(*ends) || *ends == '.')
+ ends++;
+ *ends = 0;
+ if (!unit[0] || !vals[0])
+ return;
+ fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals);
+ os->first = false;
+}
+
+static void new_line_metric(struct perf_stat_config *config __maybe_unused,
+ void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(struct perf_stat_config *config,
+ void *ctx, const char *color __maybe_unused,
+ const char *fmt __maybe_unused,
+ const char *unit, double val __maybe_unused)
+{
+ struct outstate *os = ctx;
+ char tbuf[1024];
+
+ /* In case of iostat, print metric header for first root port only */
+ if (config->iostat_run &&
+ os->evsel->priv != os->evsel->evlist->selected->priv)
+ return;
+
+ if (os->evsel->cgrp != os->cgrp)
+ return;
+
+ if (!valid_only_metric(unit))
+ return;
+ unit = fixunit(tbuf, os->evsel, unit);
+
+ if (config->json_output)
+ return;
+ else if (config->csv_output)
+ fprintf(os->fh, "%s%s", unit, config->csv_sep);
+ else
+ fprintf(os->fh, "%*s ", config->metric_only_len, unit);
+}
+
+static void print_counter_value_std(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
+{
+ FILE *output = config->output;
+ double sc = evsel->scale;
+ const char *fmt;
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
+
+ if (config->big_num)
+ fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
+ else
+ fmt = floor(sc) != sc ? "%*.2f " : "%*.0f ";
+
+ if (ok)
+ fprintf(output, fmt, COUNTS_LEN, avg);
+ else
+ fprintf(output, "%*s ", COUNTS_LEN, bad_count);
+
+ if (evsel->unit)
+ fprintf(output, "%-*s ", config->unit_width, evsel->unit);
+
+ fprintf(output, "%-*s", EVNAME_LEN, evsel__name(evsel));
+}
+
+static void print_counter_value_csv(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
+{
+ FILE *output = config->output;
+ double sc = evsel->scale;
+ const char *sep = config->csv_sep;
+ const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
+
+ if (ok)
+ fprintf(output, fmt, avg, sep);
+ else
+ fprintf(output, "%s%s", bad_count, sep);
+
+ if (evsel->unit)
+ fprintf(output, "%s%s", evsel->unit, sep);
+
+ fprintf(output, "%s", evsel__name(evsel));
+}
+
+static void print_counter_value_json(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
+{
+ FILE *output = config->output;
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
+
+ if (ok)
+ fprintf(output, "\"counter-value\" : \"%f\", ", avg);
+ else
+ fprintf(output, "\"counter-value\" : \"%s\", ", bad_count);
+
+ if (evsel->unit)
+ fprintf(output, "\"unit\" : \"%s\", ", evsel->unit);
+
+ fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel));
+}
+
+static void print_counter_value(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
+{
+ if (config->json_output)
+ print_counter_value_json(config, evsel, avg, ok);
+ else if (config->csv_output)
+ print_counter_value_csv(config, evsel, avg, ok);
+ else
+ print_counter_value_std(config, evsel, avg, ok);
+}
+
+static void abs_printout(struct perf_stat_config *config,
+ struct aggr_cpu_id id, int aggr_nr,
+ struct evsel *evsel, double avg, bool ok)
+{
+ aggr_printout(config, evsel, id, aggr_nr);
+ print_counter_value(config, evsel, avg, ok);
+ print_cgroup(config, evsel->cgrp);
+}
+
+static bool is_mixed_hw_group(struct evsel *counter)
+{
+ struct evlist *evlist = counter->evlist;
+ u32 pmu_type = counter->core.attr.type;
+ struct evsel *pos;
+
+ if (counter->core.nr_members < 2)
+ return false;
+
+ evlist__for_each_entry(evlist, pos) {
+ /* software events can be part of any hardware group */
+ if (pos->core.attr.type == PERF_TYPE_SOFTWARE)
+ continue;
+ if (pmu_type == PERF_TYPE_SOFTWARE) {
+ pmu_type = pos->core.attr.type;
+ continue;
+ }
+ if (pmu_type != pos->core.attr.type)
+ return true;
+ }
+
+ return false;
+}
+
+static bool evlist__has_hybrid(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (perf_pmus__num_core_pmus() == 1)
+ return false;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.is_pmu_core)
+ return true;
+ }
+
+ return false;
+}
+
+static void printout(struct perf_stat_config *config, struct outstate *os,
+ double uval, u64 run, u64 ena, double noise, int aggr_idx)
+{
+ struct perf_stat_output_ctx out;
+ print_metric_t pm;
+ new_line_t nl;
+ print_metricgroup_header_t pmh;
+ bool ok = true;
+ struct evsel *counter = os->evsel;
+
+ if (config->csv_output) {
+ pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
+ nl = config->metric_only ? new_line_metric : new_line_csv;
+ pmh = print_metricgroup_header_csv;
+ os->nfields = 4 + (counter->cgrp ? 1 : 0);
+ } else if (config->json_output) {
+ pm = config->metric_only ? print_metric_only_json : print_metric_json;
+ nl = config->metric_only ? new_line_metric : new_line_json;
+ pmh = print_metricgroup_header_json;
+ } else {
+ pm = config->metric_only ? print_metric_only : print_metric_std;
+ nl = config->metric_only ? new_line_metric : new_line_std;
+ pmh = print_metricgroup_header_std;
+ }
+
+ if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+ if (config->metric_only) {
+ pm(config, os, NULL, "", "", 0);
+ return;
+ }
+
+ ok = false;
+
+ if (counter->supported) {
+ if (!evlist__has_hybrid(counter->evlist)) {
+ config->print_free_counters_hint = 1;
+ if (is_mixed_hw_group(counter))
+ config->print_mixed_hw_group_error = 1;
+ }
+ }
+ }
+
+ out.print_metric = pm;
+ out.new_line = nl;
+ out.print_metricgroup_header = pmh;
+ out.ctx = os;
+ out.force_header = false;
+
+ if (!config->metric_only && !counter->default_metricgroup) {
+ abs_printout(config, os->id, os->aggr_nr, counter, uval, ok);
+
+ print_noise(config, counter, noise, /*before_metric=*/true);
+ print_running(config, run, ena, /*before_metric=*/true);
+ }
+
+ if (ok) {
+ if (!config->metric_only && counter->default_metricgroup) {
+ void *from = NULL;
+
+ aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+ /* Print out all the metricgroup with the same metric event. */
+ do {
+ int num = 0;
+
+ /* Print out the new line for the next new metricgroup. */
+ if (from) {
+ if (config->json_output)
+ new_line_json(config, (void *)os);
+ else
+ __new_line_std_csv(config, os);
+ }
+
+ print_noise(config, counter, noise, /*before_metric=*/true);
+ print_running(config, run, ena, /*before_metric=*/true);
+ from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx,
+ &num, from, &out,
+ &config->metric_events);
+ } while (from != NULL);
+ } else
+ perf_stat__print_shadow_stats(config, counter, uval, aggr_idx,
+ &out, &config->metric_events);
+ } else {
+ pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
+ }
+
+ if (!config->metric_only) {
+ print_noise(config, counter, noise, /*before_metric=*/false);
+ print_running(config, run, ena, /*before_metric=*/false);
+ }
+}
+
+static void uniquify_event_name(struct evsel *counter)
+{
+ char *new_name;
+ char *config;
+ int ret = 0;
+
+ if (counter->uniquified_name || counter->use_config_name ||
+ !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name,
+ strlen(counter->pmu_name)))
+ return;
+
+ config = strchr(counter->name, '/');
+ if (config) {
+ if (asprintf(&new_name,
+ "%s%s", counter->pmu_name, config) > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ }
+ } else {
+ if (evsel__is_hybrid(counter)) {
+ ret = asprintf(&new_name, "%s/%s/",
+ counter->pmu_name, counter->name);
+ } else {
+ ret = asprintf(&new_name, "%s [%s]",
+ counter->name, counter->pmu_name);
+ }
+
+ if (ret) {
+ free(counter->name);
+ counter->name = new_name;
+ }
+ }
+
+ counter->uniquified_name = true;
+}
+
+static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
+{
+ return evsel__is_hybrid(evsel) && !config->hybrid_merge;
+}
+
+static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
+{
+ if (config->no_merge || hybrid_uniquify(counter, config))
+ uniquify_event_name(counter);
+}
+
+/**
+ * should_skip_zero_count() - Check if the event should print 0 values.
+ * @config: The perf stat configuration (including aggregation mode).
+ * @counter: The evsel with its associated cpumap.
+ * @id: The aggregation id that is being queried.
+ *
+ * Due to mismatch between the event cpumap or thread-map and the
+ * aggregation mode, sometimes it'd iterate the counter with the map
+ * which does not contain any values.
+ *
+ * For example, uncore events have dedicated CPUs to manage them,
+ * result for other CPUs should be zero and skipped.
+ *
+ * Return: %true if the value should NOT be printed, %false if the value
+ * needs to be printed like "<not counted>" or "<not supported>".
+ */
+static bool should_skip_zero_counter(struct perf_stat_config *config,
+ struct evsel *counter,
+ const struct aggr_cpu_id *id)
+{
+ struct perf_cpu cpu;
+ int idx;
+
+ /*
+ * Skip value 0 when enabling --per-thread globally,
+ * otherwise it will have too many 0 output.
+ */
+ if (config->aggr_mode == AGGR_THREAD && config->system_wide)
+ return true;
+
+ /* Tool events have the software PMU but are only gathered on 1. */
+ if (evsel__is_tool(counter))
+ return true;
+
+ /*
+ * Skip value 0 when it's an uncore event and the given aggr id
+ * does not belong to the PMU cpumask.
+ */
+ if (!counter->pmu || !counter->pmu->is_uncore)
+ return false;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) {
+ struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu);
+
+ if (aggr_cpu_id__equal(id, &own_id))
+ return false;
+ }
+ return true;
+}
+
+static void print_counter_aggrdata(struct perf_stat_config *config,
+ struct evsel *counter, int aggr_idx,
+ struct outstate *os)
+{
+ FILE *output = config->output;
+ u64 ena, run, val;
+ double uval;
+ struct perf_stat_evsel *ps = counter->stats;
+ struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
+ struct aggr_cpu_id id = config->aggr_map->map[aggr_idx];
+ double avg = aggr->counts.val;
+ bool metric_only = config->metric_only;
+
+ os->id = id;
+ os->aggr_nr = aggr->nr;
+ os->evsel = counter;
+
+ /* Skip already merged uncore/hybrid events */
+ if (counter->merged_stat)
+ return;
+
+ uniquify_counter(config, counter);
+
+ val = aggr->counts.val;
+ ena = aggr->counts.ena;
+ run = aggr->counts.run;
+
+ if (perf_stat__skip_metric_event(counter, &config->metric_events, ena, run))
+ return;
+
+ if (val == 0 && should_skip_zero_counter(config, counter, &id))
+ return;
+
+ if (!metric_only) {
+ if (config->json_output)
+ fputc('{', output);
+ if (os->prefix)
+ fprintf(output, "%s", os->prefix);
+ else if (config->summary && config->csv_output &&
+ !config->no_csv_summary && !config->interval)
+ fprintf(output, "%s%s", "summary", config->csv_sep);
+ }
+
+ uval = val * counter->scale;
+
+ printout(config, os, uval, run, ena, avg, aggr_idx);
+
+ if (!metric_only)
+ fputc('\n', output);
+}
+
+static void print_metric_begin(struct perf_stat_config *config,
+ struct evlist *evlist,
+ struct outstate *os, int aggr_idx)
+{
+ struct perf_stat_aggr *aggr;
+ struct aggr_cpu_id id;
+ struct evsel *evsel;
+
+ os->first = true;
+ if (!config->metric_only)
+ return;
+
+ if (config->json_output)
+ fputc('{', config->output);
+ if (os->prefix)
+ fprintf(config->output, "%s", os->prefix);
+
+ evsel = evlist__first(evlist);
+ id = config->aggr_map->map[aggr_idx];
+ aggr = &evsel->stats->aggr[aggr_idx];
+ aggr_printout(config, evsel, id, aggr->nr);
+
+ print_cgroup(config, os->cgrp ? : evsel->cgrp);
+}
+
+static void print_metric_end(struct perf_stat_config *config, struct outstate *os)
+{
+ FILE *output = config->output;
+
+ if (!config->metric_only)
+ return;
+
+ if (config->json_output) {
+ if (os->first)
+ fputs("\"metric-value\" : \"none\"", output);
+ fputc('}', output);
+ }
+ fputc('\n', output);
+}
+
+static void print_aggr(struct perf_stat_config *config,
+ struct evlist *evlist,
+ struct outstate *os)
+{
+ struct evsel *counter;
+ int aggr_idx;
+
+ if (!config->aggr_map || !config->aggr_get_id)
+ return;
+
+ /*
+ * With metric_only everything is on a single line.
+ * Without each counter has its own line.
+ */
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ print_metric_begin(config, evlist, os, aggr_idx);
+
+ evlist__for_each_entry(evlist, counter) {
+ print_counter_aggrdata(config, counter, aggr_idx, os);
+ }
+ print_metric_end(config, os);
+ }
+}
+
+static void print_aggr_cgroup(struct perf_stat_config *config,
+ struct evlist *evlist,
+ struct outstate *os)
+{
+ struct evsel *counter, *evsel;
+ int aggr_idx;
+
+ if (!config->aggr_map || !config->aggr_get_id)
+ return;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (os->cgrp == evsel->cgrp)
+ continue;
+
+ os->cgrp = evsel->cgrp;
+
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ print_metric_begin(config, evlist, os, aggr_idx);
+
+ evlist__for_each_entry(evlist, counter) {
+ if (counter->cgrp != os->cgrp)
+ continue;
+
+ print_counter_aggrdata(config, counter, aggr_idx, os);
+ }
+ print_metric_end(config, os);
+ }
+ }
+}
+
+static void print_counter(struct perf_stat_config *config,
+ struct evsel *counter, struct outstate *os)
+{
+ int aggr_idx;
+
+ /* AGGR_THREAD doesn't have config->aggr_get_id */
+ if (!config->aggr_map)
+ return;
+
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ print_counter_aggrdata(config, counter, aggr_idx, os);
+ }
+}
+
+static void print_no_aggr_metric(struct perf_stat_config *config,
+ struct evlist *evlist,
+ struct outstate *os)
+{
+ int all_idx;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) {
+ struct evsel *counter;
+ bool first = true;
+
+ evlist__for_each_entry(evlist, counter) {
+ u64 ena, run, val;
+ double uval;
+ struct perf_stat_evsel *ps = counter->stats;
+ int aggr_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+
+ if (aggr_idx < 0)
+ continue;
+
+ os->evsel = counter;
+ os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+ if (first) {
+ print_metric_begin(config, evlist, os, aggr_idx);
+ first = false;
+ }
+ val = ps->aggr[aggr_idx].counts.val;
+ ena = ps->aggr[aggr_idx].counts.ena;
+ run = ps->aggr[aggr_idx].counts.run;
+
+ uval = val * counter->scale;
+ printout(config, os, uval, run, ena, 1.0, aggr_idx);
+ }
+ if (!first)
+ print_metric_end(config, os);
+ }
+}
+
+static void print_metric_headers_std(struct perf_stat_config *config,
+ bool no_indent)
+{
+ fputc(' ', config->output);
+
+ if (!no_indent) {
+ int len = aggr_header_lens[config->aggr_mode];
+
+ if (nr_cgroups || config->cgroup_list)
+ len += CGROUP_LEN + 1;
+
+ fprintf(config->output, "%*s", len, "");
+ }
+}
+
+static void print_metric_headers_csv(struct perf_stat_config *config,
+ bool no_indent __maybe_unused)
+{
+ if (config->interval)
+ fputs("time,", config->output);
+ if (!config->iostat_run)
+ fputs(aggr_header_csv[config->aggr_mode], config->output);
+}
+
+static void print_metric_headers_json(struct perf_stat_config *config __maybe_unused,
+ bool no_indent __maybe_unused)
+{
+}
+
+static void print_metric_headers(struct perf_stat_config *config,
+ struct evlist *evlist, bool no_indent)
+{
+ struct evsel *counter;
+ struct outstate os = {
+ .fh = config->output
+ };
+ struct perf_stat_output_ctx out = {
+ .ctx = &os,
+ .print_metric = print_metric_header,
+ .new_line = new_line_metric,
+ .force_header = true,
+ };
+
+ if (config->json_output)
+ print_metric_headers_json(config, no_indent);
+ else if (config->csv_output)
+ print_metric_headers_csv(config, no_indent);
+ else
+ print_metric_headers_std(config, no_indent);
+
+ if (config->iostat_run)
+ iostat_print_header_prefix(config);
+
+ if (config->cgroup_list)
+ os.cgrp = evlist__first(evlist)->cgrp;
+
+ /* Print metrics headers only */
+ evlist__for_each_entry(evlist, counter) {
+ os.evsel = counter;
+
+ perf_stat__print_shadow_stats(config, counter, 0,
+ 0,
+ &out,
+ &config->metric_events);
+ }
+
+ if (!config->json_output)
+ fputc('\n', config->output);
+}
+
+static void prepare_interval(struct perf_stat_config *config,
+ char *prefix, size_t len, struct timespec *ts)
+{
+ if (config->iostat_run)
+ return;
+
+ if (config->json_output)
+ scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ",
+ (unsigned long) ts->tv_sec, ts->tv_nsec);
+ else if (config->csv_output)
+ scnprintf(prefix, len, "%lu.%09lu%s",
+ (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
+ else
+ scnprintf(prefix, len, "%6lu.%09lu ",
+ (unsigned long) ts->tv_sec, ts->tv_nsec);
+}
+
+static void print_header_interval_std(struct perf_stat_config *config,
+ struct target *_target __maybe_unused,
+ struct evlist *evlist,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ FILE *output = config->output;
+
+ switch (config->aggr_mode) {
+ case AGGR_NODE:
+ case AGGR_SOCKET:
+ case AGGR_DIE:
+ case AGGR_CACHE:
+ case AGGR_CORE:
+ fprintf(output, "#%*s %-*s cpus",
+ INTERVAL_LEN - 1, "time",
+ aggr_header_lens[config->aggr_mode],
+ aggr_header_std[config->aggr_mode]);
+ break;
+ case AGGR_NONE:
+ fprintf(output, "#%*s %-*s",
+ INTERVAL_LEN - 1, "time",
+ aggr_header_lens[config->aggr_mode],
+ aggr_header_std[config->aggr_mode]);
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "#%*s %*s-%-*s",
+ INTERVAL_LEN - 1, "time",
+ COMM_LEN, "comm", PID_LEN, "pid");
+ break;
+ case AGGR_GLOBAL:
+ default:
+ if (!config->iostat_run)
+ fprintf(output, "#%*s",
+ INTERVAL_LEN - 1, "time");
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ break;
+ }
+
+ if (config->metric_only)
+ print_metric_headers(config, evlist, true);
+ else
+ fprintf(output, " %*s %*s events\n",
+ COUNTS_LEN, "counts", config->unit_width, "unit");
+}
+
+static void print_header_std(struct perf_stat_config *config,
+ struct target *_target, struct evlist *evlist,
+ int argc, const char **argv)
+{
+ FILE *output = config->output;
+ int i;
+
+ fprintf(output, "\n");
+ fprintf(output, " Performance counter stats for ");
+ if (_target->bpf_str)
+ fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
+ else if (_target->system_wide)
+ fprintf(output, "\'system wide");
+ else if (_target->cpu_list)
+ fprintf(output, "\'CPU(s) %s", _target->cpu_list);
+ else if (!target__has_task(_target)) {
+ fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+ for (i = 1; argv && (i < argc); i++)
+ fprintf(output, " %s", argv[i]);
+ } else if (_target->pid)
+ fprintf(output, "process id \'%s", _target->pid);
+ else
+ fprintf(output, "thread id \'%s", _target->tid);
+
+ fprintf(output, "\'");
+ if (config->run_count > 1)
+ fprintf(output, " (%d runs)", config->run_count);
+ fprintf(output, ":\n\n");
+
+ if (config->metric_only)
+ print_metric_headers(config, evlist, false);
+}
+
+static void print_header_csv(struct perf_stat_config *config,
+ struct target *_target __maybe_unused,
+ struct evlist *evlist,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ if (config->metric_only)
+ print_metric_headers(config, evlist, true);
+}
+static void print_header_json(struct perf_stat_config *config,
+ struct target *_target __maybe_unused,
+ struct evlist *evlist,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ if (config->metric_only)
+ print_metric_headers(config, evlist, true);
+}
+
+static void print_header(struct perf_stat_config *config,
+ struct target *_target,
+ struct evlist *evlist,
+ int argc, const char **argv)
+{
+ static int num_print_iv;
+
+ fflush(stdout);
+
+ if (config->interval_clear)
+ puts(CONSOLE_CLEAR);
+
+ if (num_print_iv == 0 || config->interval_clear) {
+ if (config->json_output)
+ print_header_json(config, _target, evlist, argc, argv);
+ else if (config->csv_output)
+ print_header_csv(config, _target, evlist, argc, argv);
+ else if (config->interval)
+ print_header_interval_std(config, _target, evlist, argc, argv);
+ else
+ print_header_std(config, _target, evlist, argc, argv);
+ }
+
+ if (num_print_iv++ == 25)
+ num_print_iv = 0;
+}
+
+static int get_precision(double num)
+{
+ if (num > 1)
+ return 0;
+
+ return lround(ceil(-log10(num)));
+}
+
+static void print_table(struct perf_stat_config *config,
+ FILE *output, int precision, double avg)
+{
+ char tmp[64];
+ int idx, indent = 0;
+
+ scnprintf(tmp, 64, " %17.*f", precision, avg);
+ while (tmp[indent] == ' ')
+ indent++;
+
+ fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
+
+ for (idx = 0; idx < config->run_count; idx++) {
+ double run = (double) config->walltime_run[idx] / NSEC_PER_SEC;
+ int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
+
+ fprintf(output, " %17.*f (%+.*f) ",
+ precision, run, precision, run - avg);
+
+ for (h = 0; h < n; h++)
+ fprintf(output, "#");
+
+ fprintf(output, "\n");
+ }
+
+ fprintf(output, "\n%*s# Final result:\n", indent, "");
+}
+
+static double timeval2double(struct timeval *t)
+{
+ return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
+}
+
+static void print_footer(struct perf_stat_config *config)
+{
+ double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+ FILE *output = config->output;
+
+ if (config->interval || config->csv_output || config->json_output)
+ return;
+
+ if (!config->null_run)
+ fprintf(output, "\n");
+
+ if (config->run_count == 1) {
+ fprintf(output, " %17.9f seconds time elapsed", avg);
+
+ if (config->ru_display) {
+ double ru_utime = timeval2double(&config->ru_data.ru_utime);
+ double ru_stime = timeval2double(&config->ru_data.ru_stime);
+
+ fprintf(output, "\n\n");
+ fprintf(output, " %17.9f seconds user\n", ru_utime);
+ fprintf(output, " %17.9f seconds sys\n", ru_stime);
+ }
+ } else {
+ double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+ /*
+ * Display at most 2 more significant
+ * digits than the stddev inaccuracy.
+ */
+ int precision = get_precision(sd) + 2;
+
+ if (config->walltime_run_table)
+ print_table(config, output, precision, avg);
+
+ fprintf(output, " %17.*f +- %.*f seconds time elapsed",
+ precision, avg, precision, sd);
+
+ print_noise_pct(config, sd, avg, /*before_metric=*/false);
+ }
+ fprintf(output, "\n\n");
+
+ if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled())
+ fprintf(output,
+"Some events weren't counted. Try disabling the NMI watchdog:\n"
+" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+" perf stat ...\n"
+" echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+ if (config->print_mixed_hw_group_error)
+ fprintf(output,
+ "The events in group usually have to be from "
+ "the same PMU. Try reorganizing the group.\n");
+}
+
+static void print_percore(struct perf_stat_config *config,
+ struct evsel *counter, struct outstate *os)
+{
+ bool metric_only = config->metric_only;
+ FILE *output = config->output;
+ struct cpu_aggr_map *core_map;
+ int aggr_idx, core_map_len = 0;
+
+ if (!config->aggr_map || !config->aggr_get_id)
+ return;
+
+ if (config->percore_show_thread)
+ return print_counter(config, counter, os);
+
+ /*
+ * core_map will hold the aggr_cpu_id for the cores that have been
+ * printed so that each core is printed just once.
+ */
+ core_map = cpu_aggr_map__empty_new(config->aggr_map->nr);
+ if (core_map == NULL) {
+ fprintf(output, "Cannot allocate per-core aggr map for display\n");
+ return;
+ }
+
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ struct perf_cpu curr_cpu = config->aggr_map->map[aggr_idx].cpu;
+ struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL);
+ bool found = false;
+
+ for (int i = 0; i < core_map_len; i++) {
+ if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ print_counter_aggrdata(config, counter, aggr_idx, os);
+
+ core_map->map[core_map_len++] = core_id;
+ }
+ free(core_map);
+
+ if (metric_only)
+ fputc('\n', output);
+}
+
+static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist,
+ struct outstate *os)
+{
+ struct evsel *counter;
+
+ evlist__for_each_entry(evlist, counter) {
+ if (os->cgrp != counter->cgrp) {
+ if (os->cgrp != NULL)
+ print_metric_end(config, os);
+
+ os->cgrp = counter->cgrp;
+ print_metric_begin(config, evlist, os, /*aggr_idx=*/0);
+ }
+
+ print_counter(config, counter, os);
+ }
+ if (os->cgrp)
+ print_metric_end(config, os);
+}
+
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+ struct target *_target, struct timespec *ts,
+ int argc, const char **argv)
+{
+ bool metric_only = config->metric_only;
+ int interval = config->interval;
+ struct evsel *counter;
+ char buf[64];
+ struct outstate os = {
+ .fh = config->output,
+ .first = true,
+ };
+
+ if (config->iostat_run)
+ evlist->selected = evlist__first(evlist);
+
+ if (interval) {
+ os.prefix = buf;
+ prepare_interval(config, buf, sizeof(buf), ts);
+ }
+
+ print_header(config, _target, evlist, argc, argv);
+
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ case AGGR_CACHE:
+ case AGGR_DIE:
+ case AGGR_SOCKET:
+ case AGGR_NODE:
+ if (config->cgroup_list)
+ print_aggr_cgroup(config, evlist, &os);
+ else
+ print_aggr(config, evlist, &os);
+ break;
+ case AGGR_THREAD:
+ case AGGR_GLOBAL:
+ if (config->iostat_run) {
+ iostat_print_counters(evlist, config, ts, buf,
+ (iostat_print_counter_t)print_counter, &os);
+ } else if (config->cgroup_list) {
+ print_cgroup_counter(config, evlist, &os);
+ } else {
+ print_metric_begin(config, evlist, &os, /*aggr_idx=*/0);
+ evlist__for_each_entry(evlist, counter) {
+ print_counter(config, counter, &os);
+ }
+ print_metric_end(config, &os);
+ }
+ break;
+ case AGGR_NONE:
+ if (metric_only)
+ print_no_aggr_metric(config, evlist, &os);
+ else {
+ evlist__for_each_entry(evlist, counter) {
+ if (counter->percore)
+ print_percore(config, counter, &os);
+ else
+ print_counter(config, counter, &os);
+ }
+ }
+ break;
+ case AGGR_MAX:
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+
+ print_footer(config);
+
+ fflush(config->output);
+}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644
index 000000000..e31426167
--- /dev/null
+++ b/tools/perf/util/stat-shadow.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+#include "debug.h"
+#include "pmu.h"
+#include "rblist.h"
+#include "evlist.h"
+#include "expr.h"
+#include "metricgroup.h"
+#include "cgroup.h"
+#include "units.h"
+#include <linux/zalloc.h>
+#include "iostat.h"
+#include "util/hashmap.h"
+
+struct stats walltime_nsecs_stats;
+struct rusage_stats ru_stats;
+
+enum {
+ CTX_BIT_USER = 1 << 0,
+ CTX_BIT_KERNEL = 1 << 1,
+ CTX_BIT_HV = 1 << 2,
+ CTX_BIT_HOST = 1 << 3,
+ CTX_BIT_IDLE = 1 << 4,
+ CTX_BIT_MAX = 1 << 5,
+};
+
+enum stat_type {
+ STAT_NONE = 0,
+ STAT_NSECS,
+ STAT_CYCLES,
+ STAT_INSTRUCTIONS,
+ STAT_STALLED_CYCLES_FRONT,
+ STAT_STALLED_CYCLES_BACK,
+ STAT_BRANCHES,
+ STAT_BRANCH_MISS,
+ STAT_CACHE_REFS,
+ STAT_CACHE_MISSES,
+ STAT_L1_DCACHE,
+ STAT_L1_ICACHE,
+ STAT_LL_CACHE,
+ STAT_ITLB_CACHE,
+ STAT_DTLB_CACHE,
+ STAT_L1D_MISS,
+ STAT_L1I_MISS,
+ STAT_LL_MISS,
+ STAT_DTLB_MISS,
+ STAT_ITLB_MISS,
+ STAT_MAX
+};
+
+static int evsel_context(const struct evsel *evsel)
+{
+ int ctx = 0;
+
+ if (evsel->core.attr.exclude_kernel)
+ ctx |= CTX_BIT_KERNEL;
+ if (evsel->core.attr.exclude_user)
+ ctx |= CTX_BIT_USER;
+ if (evsel->core.attr.exclude_hv)
+ ctx |= CTX_BIT_HV;
+ if (evsel->core.attr.exclude_host)
+ ctx |= CTX_BIT_HOST;
+ if (evsel->core.attr.exclude_idle)
+ ctx |= CTX_BIT_IDLE;
+
+ return ctx;
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+ memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+ memset(&ru_stats, 0, sizeof(ru_stats));
+}
+
+static enum stat_type evsel__stat_type(const struct evsel *evsel)
+{
+ /* Fake perf_hw_cache_op_id values for use with evsel__match. */
+ u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
+ u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
+ u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
+ u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
+ u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
+
+ if (evsel__is_clock(evsel))
+ return STAT_NSECS;
+ else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
+ return STAT_CYCLES;
+ else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
+ return STAT_INSTRUCTIONS;
+ else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+ return STAT_STALLED_CYCLES_FRONT;
+ else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+ return STAT_STALLED_CYCLES_BACK;
+ else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+ return STAT_BRANCHES;
+ else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
+ return STAT_BRANCH_MISS;
+ else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
+ return STAT_CACHE_REFS;
+ else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
+ return STAT_CACHE_MISSES;
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
+ return STAT_L1_DCACHE;
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
+ return STAT_L1_ICACHE;
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
+ return STAT_LL_CACHE;
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
+ return STAT_DTLB_CACHE;
+ else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
+ return STAT_ITLB_CACHE;
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
+ return STAT_L1D_MISS;
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
+ return STAT_L1I_MISS;
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
+ return STAT_LL_MISS;
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
+ return STAT_DTLB_MISS;
+ else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
+ return STAT_ITLB_MISS;
+ return STAT_NONE;
+}
+
+static const char *get_ratio_color(const double ratios[3], double val)
+{
+ const char *color = PERF_COLOR_NORMAL;
+
+ if (val > ratios[0])
+ color = PERF_COLOR_RED;
+ else if (val > ratios[1])
+ color = PERF_COLOR_MAGENTA;
+ else if (val > ratios[2])
+ color = PERF_COLOR_YELLOW;
+
+ return color;
+}
+
+static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type)
+{
+ const struct evsel *cur;
+ int evsel_ctx = evsel_context(evsel);
+
+ evlist__for_each_entry(evsel->evlist, cur) {
+ struct perf_stat_aggr *aggr;
+
+ /* Ignore the evsel that is being searched from. */
+ if (evsel == cur)
+ continue;
+
+ /* Ignore evsels that are part of different groups. */
+ if (evsel->core.leader->nr_members > 1 &&
+ evsel->core.leader != cur->core.leader)
+ continue;
+ /* Ignore evsels with mismatched modifiers. */
+ if (evsel_ctx != evsel_context(cur))
+ continue;
+ /* Ignore if not the cgroup we're looking for. */
+ if (evsel->cgrp != cur->cgrp)
+ continue;
+ /* Ignore if not the stat we're looking for. */
+ if (type != evsel__stat_type(cur))
+ continue;
+
+ aggr = &cur->stats->aggr[aggr_idx];
+ if (type == STAT_NSECS)
+ return aggr->counts.val;
+ return aggr->counts.val * cur->scale;
+ }
+ return 0.0;
+}
+
+static void print_ratio(struct perf_stat_config *config,
+ const struct evsel *evsel, int aggr_idx,
+ double numerator, struct perf_stat_output_ctx *out,
+ enum stat_type denominator_type,
+ const double color_ratios[3], const char *unit)
+{
+ double denominator = find_stat(evsel, aggr_idx, denominator_type);
+
+ if (numerator && denominator) {
+ double ratio = numerator / denominator * 100.0;
+ const char *color = get_ratio_color(color_ratios, ratio);
+
+ out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio);
+ } else
+ out->print_metric(config, out->ctx, NULL, NULL, unit, 0);
+}
+
+static void print_stalled_cycles_front(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double stalled,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {50.0, 30.0, 10.0};
+
+ print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
+ "frontend cycles idle");
+}
+
+static void print_stalled_cycles_back(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double stalled,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {75.0, 50.0, 20.0};
+
+ print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
+ "backend cycles idle");
+}
+
+static void print_branch_miss(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {20.0, 10.0, 5.0};
+
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios,
+ "of all branches");
+}
+
+static void print_l1d_miss(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {20.0, 10.0, 5.0};
+
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios,
+ "of all L1-dcache accesses");
+}
+
+static void print_l1i_miss(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {20.0, 10.0, 5.0};
+
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios,
+ "of all L1-icache accesses");
+}
+
+static void print_ll_miss(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {20.0, 10.0, 5.0};
+
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios,
+ "of all LL-cache accesses");
+}
+
+static void print_dtlb_miss(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {20.0, 10.0, 5.0};
+
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios,
+ "of all dTLB cache accesses");
+}
+
+static void print_itlb_miss(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {20.0, 10.0, 5.0};
+
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios,
+ "of all iTLB cache accesses");
+}
+
+static void print_cache_miss(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out)
+{
+ static const double color_ratios[3] = {20.0, 10.0, 5.0};
+
+ print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios,
+ "of all cache refs");
+}
+
+static void print_instructions(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double instructions,
+ struct perf_stat_output_ctx *out)
+{
+ print_metric_t print_metric = out->print_metric;
+ void *ctxp = out->ctx;
+ double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES);
+ double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT),
+ find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK));
+
+ if (cycles) {
+ print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle",
+ instructions / cycles);
+ } else
+ print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
+
+ if (max_stalled && instructions) {
+ out->new_line(config, ctxp);
+ print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn",
+ max_stalled / instructions);
+ }
+}
+
+static void print_cycles(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double cycles,
+ struct perf_stat_output_ctx *out)
+{
+ double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS);
+
+ if (cycles && nsecs) {
+ double ratio = cycles / nsecs;
+
+ out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio);
+ } else
+ out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0);
+}
+
+static void print_nsecs(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx __maybe_unused, double nsecs,
+ struct perf_stat_output_ctx *out)
+{
+ print_metric_t print_metric = out->print_metric;
+ void *ctxp = out->ctx;
+ double wall_time = avg_stats(&walltime_nsecs_stats);
+
+ if (wall_time) {
+ print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
+ nsecs / (wall_time * evsel->scale));
+ } else
+ print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
+}
+
+static int prepare_metric(struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ struct expr_parse_ctx *pctx,
+ int aggr_idx)
+{
+ int i;
+
+ for (i = 0; metric_events[i]; i++) {
+ char *n;
+ double val;
+ int source_count = 0;
+
+ if (evsel__is_tool(metric_events[i])) {
+ struct stats *stats;
+ double scale;
+
+ switch (metric_events[i]->tool_event) {
+ case PERF_TOOL_DURATION_TIME:
+ stats = &walltime_nsecs_stats;
+ scale = 1e-9;
+ break;
+ case PERF_TOOL_USER_TIME:
+ stats = &ru_stats.ru_utime_usec_stat;
+ scale = 1e-6;
+ break;
+ case PERF_TOOL_SYSTEM_TIME:
+ stats = &ru_stats.ru_stime_usec_stat;
+ scale = 1e-6;
+ break;
+ case PERF_TOOL_NONE:
+ pr_err("Invalid tool event 'none'");
+ abort();
+ case PERF_TOOL_MAX:
+ pr_err("Invalid tool event 'max'");
+ abort();
+ default:
+ pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
+ abort();
+ }
+ val = avg_stats(stats) * scale;
+ source_count = 1;
+ } else {
+ struct perf_stat_evsel *ps = metric_events[i]->stats;
+ struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
+
+ if (!aggr)
+ break;
+
+ if (!metric_events[i]->supported) {
+ /*
+ * Not supported events will have a count of 0,
+ * which can be confusing in a
+ * metric. Explicitly set the value to NAN. Not
+ * counted events (enable time of 0) are read as
+ * 0.
+ */
+ val = NAN;
+ source_count = 0;
+ } else {
+ /*
+ * If an event was scaled during stat gathering,
+ * reverse the scale before computing the
+ * metric.
+ */
+ val = aggr->counts.val * (1.0 / metric_events[i]->scale);
+ source_count = evsel__source_count(metric_events[i]);
+ }
+ }
+ n = strdup(evsel__metric_id(metric_events[i]));
+ if (!n)
+ return -ENOMEM;
+
+ expr__add_id_val_source_count(pctx, n, val, source_count);
+ }
+
+ for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
+ int ret = expr__add_ref(pctx, &metric_refs[j]);
+
+ if (ret)
+ return ret;
+ }
+
+ return i;
+}
+
+static void generic_metric(struct perf_stat_config *config,
+ const char *metric_expr,
+ const char *metric_threshold,
+ struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ char *name,
+ const char *metric_name,
+ const char *metric_unit,
+ int runtime,
+ int aggr_idx,
+ struct perf_stat_output_ctx *out)
+{
+ print_metric_t print_metric = out->print_metric;
+ struct expr_parse_ctx *pctx;
+ double ratio, scale, threshold;
+ int i;
+ void *ctxp = out->ctx;
+ const char *color = NULL;
+
+ pctx = expr__ctx_new();
+ if (!pctx)
+ return;
+
+ if (config->user_requested_cpu_list)
+ pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
+ pctx->sctx.runtime = runtime;
+ pctx->sctx.system_wide = config->system_wide;
+ i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
+ if (i < 0) {
+ expr__ctx_free(pctx);
+ return;
+ }
+ if (!metric_events[i]) {
+ if (expr__parse(&ratio, pctx, metric_expr) == 0) {
+ char *unit;
+ char metric_bf[64];
+
+ if (metric_threshold &&
+ expr__parse(&threshold, pctx, metric_threshold) == 0 &&
+ !isnan(threshold)) {
+ color = fpclassify(threshold) == FP_ZERO
+ ? PERF_COLOR_GREEN : PERF_COLOR_RED;
+ }
+
+ if (metric_unit && metric_name) {
+ if (perf_pmu__convert_scale(metric_unit,
+ &unit, &scale) >= 0) {
+ ratio *= scale;
+ }
+ if (strstr(metric_expr, "?"))
+ scnprintf(metric_bf, sizeof(metric_bf),
+ "%s %s_%d", unit, metric_name, runtime);
+ else
+ scnprintf(metric_bf, sizeof(metric_bf),
+ "%s %s", unit, metric_name);
+
+ print_metric(config, ctxp, color, "%8.1f",
+ metric_bf, ratio);
+ } else {
+ print_metric(config, ctxp, color, "%8.2f",
+ metric_name ?
+ metric_name :
+ out->force_header ? name : "",
+ ratio);
+ }
+ } else {
+ print_metric(config, ctxp, color, /*unit=*/NULL,
+ out->force_header ?
+ (metric_name ? metric_name : name) : "", 0);
+ }
+ } else {
+ print_metric(config, ctxp, color, /*unit=*/NULL,
+ out->force_header ?
+ (metric_name ? metric_name : name) : "", 0);
+ }
+
+ expr__ctx_free(pctx);
+}
+
+double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
+{
+ struct expr_parse_ctx *pctx;
+ double ratio = 0.0;
+
+ pctx = expr__ctx_new();
+ if (!pctx)
+ return NAN;
+
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
+ goto out;
+
+ if (expr__parse(&ratio, pctx, mexp->metric_expr))
+ ratio = 0.0;
+
+out:
+ expr__ctx_free(pctx);
+ return ratio;
+}
+
+static void perf_stat__print_metricgroup_header(struct perf_stat_config *config,
+ struct evsel *evsel,
+ void *ctxp,
+ const char *name,
+ struct perf_stat_output_ctx *out)
+{
+ bool need_full_name = perf_pmus__num_core_pmus() > 1;
+ static const char *last_name;
+ static const char *last_pmu;
+ char full_name[64];
+
+ /*
+ * A metricgroup may have several metric events,
+ * e.g.,TopdownL1 on e-core of ADL.
+ * The name has been output by the first metric
+ * event. Only align with other metics from
+ * different metric events.
+ */
+ if (last_name && !strcmp(last_name, name)) {
+ if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) {
+ out->print_metricgroup_header(config, ctxp, NULL);
+ return;
+ }
+ }
+
+ if (need_full_name)
+ scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name);
+ else
+ scnprintf(full_name, sizeof(full_name), "%s", name);
+
+ out->print_metricgroup_header(config, ctxp, full_name);
+
+ last_name = name;
+ last_pmu = evsel->pmu_name;
+}
+
+/**
+ * perf_stat__print_shadow_stats_metricgroup - Print out metrics associated with the evsel
+ * For the non-default, all metrics associated
+ * with the evsel are printed.
+ * For the default mode, only the metrics from
+ * the same metricgroup and the name of the
+ * metricgroup are printed. To print the metrics
+ * from the next metricgroup (if available),
+ * invoke the function with correspoinding
+ * metric_expr.
+ */
+void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
+ struct evsel *evsel,
+ int aggr_idx,
+ int *num,
+ void *from,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events)
+{
+ struct metric_event *me;
+ struct metric_expr *mexp = from;
+ void *ctxp = out->ctx;
+ bool header_printed = false;
+ const char *name = NULL;
+
+ me = metricgroup__lookup(metric_events, evsel, false);
+ if (me == NULL)
+ return NULL;
+
+ if (!mexp)
+ mexp = list_first_entry(&me->head, typeof(*mexp), nd);
+
+ list_for_each_entry_from(mexp, &me->head, nd) {
+ /* Print the display name of the Default metricgroup */
+ if (!config->metric_only && me->is_default) {
+ if (!name)
+ name = mexp->default_metricgroup_name;
+ /*
+ * Two or more metricgroup may share the same metric
+ * event, e.g., TopdownL1 and TopdownL2 on SPR.
+ * Return and print the prefix, e.g., noise, running
+ * for the next metricgroup.
+ */
+ if (strcmp(name, mexp->default_metricgroup_name))
+ return (void *)mexp;
+ /* Only print the name of the metricgroup once */
+ if (!header_printed) {
+ header_printed = true;
+ perf_stat__print_metricgroup_header(config, evsel, ctxp,
+ name, out);
+ }
+ }
+
+ if ((*num)++ > 0)
+ out->new_line(config, ctxp);
+ generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
+ mexp->metric_events, mexp->metric_refs, evsel->name,
+ mexp->metric_name, mexp->metric_unit, mexp->runtime,
+ aggr_idx, out);
+ }
+
+ return NULL;
+}
+
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+ struct evsel *evsel,
+ double avg, int aggr_idx,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events)
+{
+ typedef void (*stat_print_function_t)(struct perf_stat_config *config,
+ const struct evsel *evsel,
+ int aggr_idx, double misses,
+ struct perf_stat_output_ctx *out);
+ static const stat_print_function_t stat_print_function[STAT_MAX] = {
+ [STAT_INSTRUCTIONS] = print_instructions,
+ [STAT_BRANCH_MISS] = print_branch_miss,
+ [STAT_L1D_MISS] = print_l1d_miss,
+ [STAT_L1I_MISS] = print_l1i_miss,
+ [STAT_DTLB_MISS] = print_dtlb_miss,
+ [STAT_ITLB_MISS] = print_itlb_miss,
+ [STAT_LL_MISS] = print_ll_miss,
+ [STAT_CACHE_MISSES] = print_cache_miss,
+ [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front,
+ [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back,
+ [STAT_CYCLES] = print_cycles,
+ [STAT_NSECS] = print_nsecs,
+ };
+ print_metric_t print_metric = out->print_metric;
+ void *ctxp = out->ctx;
+ int num = 1;
+
+ if (config->iostat_run) {
+ iostat_print_metric(config, evsel, out);
+ } else {
+ stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)];
+
+ if (fn)
+ fn(config, evsel, aggr_idx, avg, out);
+ else {
+ double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS);
+
+ if (nsecs) {
+ char unit = ' ';
+ char unit_buf[10] = "/sec";
+ double ratio = convert_unit_double(1000000000.0 * avg / nsecs,
+ &unit);
+
+ if (unit != ' ')
+ snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+ print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
+ } else
+ num = 0;
+ }
+ }
+
+ perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx,
+ &num, NULL, out, metric_events);
+
+ if (num == 0)
+ print_metric(config, ctxp, NULL, NULL, NULL, 0);
+}
+
+/**
+ * perf_stat__skip_metric_event - Skip the evsel in the Default metricgroup,
+ * if it's not running or not the metric event.
+ */
+bool perf_stat__skip_metric_event(struct evsel *evsel,
+ struct rblist *metric_events,
+ u64 ena, u64 run)
+{
+ if (!evsel->default_metricgroup)
+ return false;
+
+ if (!ena || !run)
+ return true;
+
+ return !metricgroup__lookup(metric_events, evsel, false);
+}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
new file mode 100644
index 000000000..ec3506042
--- /dev/null
+++ b/tools/perf/util/stat.c
@@ -0,0 +1,801 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <linux/err.h>
+#include <inttypes.h>
+#include <math.h>
+#include <string.h>
+#include "counts.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "header.h"
+#include "stat.h"
+#include "session.h"
+#include "target.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "util/hashmap.h"
+#include <linux/zalloc.h>
+
+void update_stats(struct stats *stats, u64 val)
+{
+ double delta;
+
+ stats->n++;
+ delta = val - stats->mean;
+ stats->mean += delta / stats->n;
+ stats->M2 += delta*(val - stats->mean);
+
+ if (val > stats->max)
+ stats->max = val;
+
+ if (val < stats->min)
+ stats->min = val;
+}
+
+double avg_stats(struct stats *stats)
+{
+ return stats->mean;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ * (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2 = -------------------------------
+ * n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ * s
+ * s_mean = -------
+ * sqrt(n)
+ *
+ */
+double stddev_stats(struct stats *stats)
+{
+ double variance, variance_mean;
+
+ if (stats->n < 2)
+ return 0.0;
+
+ variance = stats->M2 / (stats->n - 1);
+ variance_mean = variance / stats->n;
+
+ return sqrt(variance_mean);
+}
+
+double rel_stddev_stats(double stddev, double avg)
+{
+ double pct = 0.0;
+
+ if (avg)
+ pct = 100.0 * stddev/avg;
+
+ return pct;
+}
+
+static void evsel__reset_aggr_stats(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ struct perf_stat_aggr *aggr = ps->aggr;
+
+ if (aggr)
+ memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr);
+}
+
+static void evsel__reset_stat_priv(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ init_stats(&ps->res_stats);
+ evsel__reset_aggr_stats(evsel);
+}
+
+static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ if (ps == NULL)
+ return 0;
+
+ ps->nr_aggr = nr_aggr;
+ ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr));
+ if (ps->aggr == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__alloc_aggr_stats(evsel, nr_aggr) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr)
+{
+ struct perf_stat_evsel *ps;
+
+ ps = zalloc(sizeof(*ps));
+ if (ps == NULL)
+ return -ENOMEM;
+
+ evsel->stats = ps;
+
+ if (nr_aggr && evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) {
+ evsel->stats = NULL;
+ free(ps);
+ return -ENOMEM;
+ }
+
+ evsel__reset_stat_priv(evsel);
+ return 0;
+}
+
+static void evsel__free_stat_priv(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ if (ps) {
+ zfree(&ps->aggr);
+ zfree(&ps->group_data);
+ }
+ zfree(&evsel->stats);
+}
+
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel)
+{
+ int cpu_map_nr = evsel__nr_cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
+ struct perf_counts *counts;
+
+ counts = perf_counts__new(cpu_map_nr, nthreads);
+ if (counts)
+ evsel->prev_raw_counts = counts;
+
+ return counts ? 0 : -ENOMEM;
+}
+
+static void evsel__free_prev_raw_counts(struct evsel *evsel)
+{
+ perf_counts__delete(evsel->prev_raw_counts);
+ evsel->prev_raw_counts = NULL;
+}
+
+static void evsel__reset_prev_raw_counts(struct evsel *evsel)
+{
+ if (evsel->prev_raw_counts)
+ perf_counts__reset(evsel->prev_raw_counts);
+}
+
+static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw)
+{
+ if (evsel__alloc_stat_priv(evsel, nr_aggr) < 0 ||
+ evsel__alloc_counts(evsel) < 0 ||
+ (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
+ return -ENOMEM;
+
+ return 0;
+}
+
+int evlist__alloc_stats(struct perf_stat_config *config,
+ struct evlist *evlist, bool alloc_raw)
+{
+ struct evsel *evsel;
+ int nr_aggr = 0;
+
+ if (config && config->aggr_map)
+ nr_aggr = config->aggr_map->nr;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__alloc_stats(evsel, nr_aggr, alloc_raw))
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ evlist__free_stats(evlist);
+ return -1;
+}
+
+void evlist__free_stats(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ evsel__free_stat_priv(evsel);
+ evsel__free_counts(evsel);
+ evsel__free_prev_raw_counts(evsel);
+ }
+}
+
+void evlist__reset_stats(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ evsel__reset_stat_priv(evsel);
+ evsel__reset_counts(evsel);
+ }
+}
+
+void evlist__reset_aggr_stats(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__reset_aggr_stats(evsel);
+}
+
+void evlist__reset_prev_raw_counts(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__reset_prev_raw_counts(evsel);
+}
+
+static void evsel__copy_prev_raw_counts(struct evsel *evsel)
+{
+ int idx, nthreads = perf_thread_map__nr(evsel->core.threads);
+
+ for (int thread = 0; thread < nthreads; thread++) {
+ perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) {
+ *perf_counts(evsel->counts, idx, thread) =
+ *perf_counts(evsel->prev_raw_counts, idx, thread);
+ }
+ }
+}
+
+void evlist__copy_prev_raw_counts(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__copy_prev_raw_counts(evsel);
+}
+
+static void evsel__copy_res_stats(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ /*
+ * For GLOBAL aggregation mode, it updates the counts for each run
+ * in the evsel->stats.res_stats. See perf_stat_process_counter().
+ */
+ *ps->aggr[0].counts.values = avg_stats(&ps->res_stats);
+}
+
+void evlist__copy_res_stats(struct perf_stat_config *config, struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (config->aggr_mode != AGGR_GLOBAL)
+ return;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__copy_res_stats(evsel);
+}
+
+static size_t pkg_id_hash(long __key, void *ctx __maybe_unused)
+{
+ uint64_t *key = (uint64_t *) __key;
+
+ return *key & 0xffffffff;
+}
+
+static bool pkg_id_equal(long __key1, long __key2, void *ctx __maybe_unused)
+{
+ uint64_t *key1 = (uint64_t *) __key1;
+ uint64_t *key2 = (uint64_t *) __key2;
+
+ return *key1 == *key2;
+}
+
+static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
+ int cpu_map_idx, bool *skip)
+{
+ struct hashmap *mask = counter->per_pkg_mask;
+ struct perf_cpu_map *cpus = evsel__cpus(counter);
+ struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx);
+ int s, d, ret = 0;
+ uint64_t *key;
+
+ *skip = false;
+
+ if (!counter->per_pkg)
+ return 0;
+
+ if (perf_cpu_map__empty(cpus))
+ return 0;
+
+ if (!mask) {
+ mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
+ if (IS_ERR(mask))
+ return -ENOMEM;
+
+ counter->per_pkg_mask = mask;
+ }
+
+ /*
+ * we do not consider an event that has not run as a good
+ * instance to mark a package as used (skip=1). Otherwise
+ * we may run into a situation where the first CPU in a package
+ * is not running anything, yet the second is, and this function
+ * would mark the package as used after the first CPU and would
+ * not read the values from the second CPU.
+ */
+ if (!(vals->run && vals->ena))
+ return 0;
+
+ s = cpu__get_socket_id(cpu);
+ if (s < 0)
+ return -1;
+
+ /*
+ * On multi-die system, die_id > 0. On no-die system, die_id = 0.
+ * We use hashmap(socket, die) to check the used socket+die pair.
+ */
+ d = cpu__get_die_id(cpu);
+ if (d < 0)
+ return -1;
+
+ key = malloc(sizeof(*key));
+ if (!key)
+ return -ENOMEM;
+
+ *key = (uint64_t)d << 32 | s;
+ if (hashmap__find(mask, key, NULL)) {
+ *skip = true;
+ free(key);
+ } else
+ ret = hashmap__add(mask, key, 1);
+
+ return ret;
+}
+
+static bool evsel__count_has_error(struct evsel *evsel,
+ struct perf_counts_values *count,
+ struct perf_stat_config *config)
+{
+ /* the evsel was failed already */
+ if (evsel->err || evsel->counts->scaled == -1)
+ return true;
+
+ /* this is meaningful for CPU aggregation modes only */
+ if (config->aggr_mode == AGGR_GLOBAL)
+ return false;
+
+ /* it's considered ok when it actually ran */
+ if (count->ena != 0 && count->run != 0)
+ return false;
+
+ return true;
+}
+
+static int
+process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
+ int cpu_map_idx, int thread,
+ struct perf_counts_values *count)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ static struct perf_counts_values zero;
+ bool skip = false;
+
+ if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) {
+ pr_err("failed to read per-pkg counter\n");
+ return -1;
+ }
+
+ if (skip)
+ count = &zero;
+
+ if (!evsel->snapshot)
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
+ perf_counts_values__scale(count, config->scale, NULL);
+
+ if (config->aggr_mode == AGGR_THREAD) {
+ struct perf_counts_values *aggr_counts = &ps->aggr[thread].counts;
+
+ /*
+ * Skip value 0 when enabling --per-thread globally,
+ * otherwise too many 0 output.
+ */
+ if (count->val == 0 && config->system_wide)
+ return 0;
+
+ ps->aggr[thread].nr++;
+
+ aggr_counts->val += count->val;
+ aggr_counts->ena += count->ena;
+ aggr_counts->run += count->run;
+ return 0;
+ }
+
+ if (ps->aggr) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+ struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu);
+ struct perf_stat_aggr *ps_aggr;
+ int i;
+
+ for (i = 0; i < ps->nr_aggr; i++) {
+ if (!aggr_cpu_id__equal(&aggr_id, &config->aggr_map->map[i]))
+ continue;
+
+ ps_aggr = &ps->aggr[i];
+ ps_aggr->nr++;
+
+ /*
+ * When any result is bad, make them all to give consistent output
+ * in interval mode. But per-task counters can have 0 enabled time
+ * when some tasks are idle.
+ */
+ if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) {
+ ps_aggr->counts.val = 0;
+ ps_aggr->counts.ena = 0;
+ ps_aggr->counts.run = 0;
+ ps_aggr->failed = true;
+ }
+
+ if (!ps_aggr->failed) {
+ ps_aggr->counts.val += count->val;
+ ps_aggr->counts.ena += count->ena;
+ ps_aggr->counts.run += count->run;
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int process_counter_maps(struct perf_stat_config *config,
+ struct evsel *counter)
+{
+ int nthreads = perf_thread_map__nr(counter->core.threads);
+ int ncpus = evsel__nr_cpus(counter);
+ int idx, thread;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (idx = 0; idx < ncpus; idx++) {
+ if (process_counter_values(config, counter, idx, thread,
+ perf_counts(counter->counts, idx, thread)))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+ struct evsel *counter)
+{
+ struct perf_stat_evsel *ps = counter->stats;
+ u64 *count;
+ int ret;
+
+ if (counter->per_pkg)
+ evsel__zero_per_pkg(counter);
+
+ ret = process_counter_maps(config, counter);
+ if (ret)
+ return ret;
+
+ if (config->aggr_mode != AGGR_GLOBAL)
+ return 0;
+
+ /*
+ * GLOBAL aggregation mode only has a single aggr counts,
+ * so we can use ps->aggr[0] as the actual output.
+ */
+ count = ps->aggr[0].counts.values;
+ update_stats(&ps->res_stats, *count);
+
+ if (verbose > 0) {
+ fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ evsel__name(counter), count[0], count[1], count[2]);
+ }
+
+ return 0;
+}
+
+static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
+{
+ struct perf_stat_evsel *ps_a = evsel->stats;
+ struct perf_stat_evsel *ps_b = alias->stats;
+ int i;
+
+ if (ps_a->aggr == NULL && ps_b->aggr == NULL)
+ return 0;
+
+ if (ps_a->nr_aggr != ps_b->nr_aggr) {
+ pr_err("Unmatched aggregation mode between aliases\n");
+ return -1;
+ }
+
+ for (i = 0; i < ps_a->nr_aggr; i++) {
+ struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts;
+ struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts;
+
+ /* NB: don't increase aggr.nr for aliases */
+
+ aggr_counts_a->val += aggr_counts_b->val;
+ aggr_counts_a->ena += aggr_counts_b->ena;
+ aggr_counts_a->run += aggr_counts_b->run;
+ }
+
+ return 0;
+}
+/* events should have the same name, scale, unit, cgroup but on different PMUs */
+static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
+{
+ if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b)))
+ return false;
+
+ if (evsel_a->scale != evsel_b->scale)
+ return false;
+
+ if (evsel_a->cgrp != evsel_b->cgrp)
+ return false;
+
+ if (strcmp(evsel_a->unit, evsel_b->unit))
+ return false;
+
+ if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
+ return false;
+
+ return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name);
+}
+
+static void evsel__merge_aliases(struct evsel *evsel)
+{
+ struct evlist *evlist = evsel->evlist;
+ struct evsel *alias;
+
+ alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node);
+ list_for_each_entry_continue(alias, &evlist->core.entries, core.node) {
+ /* Merge the same events on different PMUs. */
+ if (evsel__is_alias(evsel, alias)) {
+ evsel__merge_aggr_counters(evsel, alias);
+ alias->merged_stat = true;
+ }
+ }
+}
+
+static bool evsel__should_merge_hybrid(const struct evsel *evsel,
+ const struct perf_stat_config *config)
+{
+ return config->hybrid_merge && evsel__is_hybrid(evsel);
+}
+
+static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config)
+{
+ /* this evsel is already merged */
+ if (evsel->merged_stat)
+ return;
+
+ if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config))
+ evsel__merge_aliases(evsel);
+}
+
+/* merge the same uncore and hybrid events if requested */
+void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (config->no_merge)
+ return;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__merge_stats(evsel, config);
+}
+
+static void evsel__update_percore_stats(struct evsel *evsel, struct aggr_cpu_id *core_id)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ struct perf_counts_values counts = { 0, };
+ struct aggr_cpu_id id;
+ struct perf_cpu cpu;
+ int idx;
+
+ /* collect per-core counts */
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+ struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+ id = aggr_cpu_id__core(cpu, NULL);
+ if (!aggr_cpu_id__equal(core_id, &id))
+ continue;
+
+ counts.val += aggr->counts.val;
+ counts.ena += aggr->counts.ena;
+ counts.run += aggr->counts.run;
+ }
+
+ /* update aggregated per-core counts for each CPU */
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+ struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+ id = aggr_cpu_id__core(cpu, NULL);
+ if (!aggr_cpu_id__equal(core_id, &id))
+ continue;
+
+ aggr->counts.val = counts.val;
+ aggr->counts.ena = counts.ena;
+ aggr->counts.run = counts.run;
+
+ aggr->used = true;
+ }
+}
+
+/* we have an aggr_map for cpu, but want to aggregate the counters per-core */
+static void evsel__process_percore(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ struct aggr_cpu_id core_id;
+ struct perf_cpu cpu;
+ int idx;
+
+ if (!evsel->percore)
+ return;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+ struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+ if (aggr->used)
+ continue;
+
+ core_id = aggr_cpu_id__core(cpu, NULL);
+ evsel__update_percore_stats(evsel, &core_id);
+ }
+}
+
+/* process cpu stats on per-core events */
+void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (config->aggr_mode != AGGR_NONE)
+ return;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__process_percore(evsel);
+}
+
+int perf_event__process_stat_event(struct perf_session *session,
+ union perf_event *event)
+{
+ struct perf_counts_values count, *ptr;
+ struct perf_record_stat *st = &event->stat;
+ struct evsel *counter;
+ int cpu_map_idx;
+
+ count.val = st->val;
+ count.ena = st->ena;
+ count.run = st->run;
+
+ counter = evlist__id2evsel(session->evlist, st->id);
+ if (!counter) {
+ pr_err("Failed to resolve counter for stat event.\n");
+ return -EINVAL;
+ }
+ cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
+ if (cpu_map_idx == -1) {
+ pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
+ return -EINVAL;
+ }
+ ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
+ if (ptr == NULL) {
+ pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
+ st->cpu, st->thread, evsel__name(counter));
+ return -EINVAL;
+ }
+ *ptr = count;
+ counter->supported = true;
+ return 0;
+}
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
+{
+ struct perf_record_stat *st = (struct perf_record_stat *)event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
+ st->id, st->cpu, st->thread);
+ ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
+ st->val, st->ena, st->run);
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
+{
+ struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
+ rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
+{
+ struct perf_stat_config sc = {};
+ size_t ret;
+
+ perf_event__read_stat_config(&sc, &event->stat_config);
+
+ ret = fprintf(fp, "\n");
+ ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
+ ret += fprintf(fp, "... scale %d\n", sc.scale);
+ ret += fprintf(fp, "... interval %u\n", sc.interval);
+
+ return ret;
+}
+
+int create_perf_stat_counter(struct evsel *evsel,
+ struct perf_stat_config *config,
+ struct target *target,
+ int cpu_map_idx)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel *leader = evsel__leader(evsel);
+
+ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+ /*
+ * The event is part of non trivial group, let's enable
+ * the group read (for leader) and ID retrieval for all
+ * members.
+ */
+ if (leader->core.nr_members > 1)
+ attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+ attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
+
+ /*
+ * Some events get initialized with sample_(period/type) set,
+ * like tracepoints. Clear it up for counting.
+ */
+ attr->sample_period = 0;
+
+ if (config->identifier)
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+ if (config->all_user) {
+ attr->exclude_kernel = 1;
+ attr->exclude_user = 0;
+ }
+
+ if (config->all_kernel) {
+ attr->exclude_kernel = 0;
+ attr->exclude_user = 1;
+ }
+
+ /*
+ * Disabling all counters initially, they will be enabled
+ * either manually by us or by kernel via enable_on_exec
+ * set later.
+ */
+ if (evsel__is_group_leader(evsel)) {
+ attr->disabled = 1;
+
+ if (target__enable_on_exec(target))
+ attr->enable_on_exec = 1;
+ }
+
+ if (target__has_cpu(target) && !target__has_per_thread(target))
+ return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
+
+ return evsel__open_per_thread(evsel, evsel->core.threads);
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
new file mode 100644
index 000000000..325d0fad1
--- /dev/null
+++ b/tools/perf/util/stat.h
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STATS_H
+#define __PERF_STATS_H
+
+#include <linux/types.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include "cpumap.h"
+#include "rblist.h"
+#include "counts.h"
+
+struct perf_cpu_map;
+struct perf_stat_config;
+struct timespec;
+
+struct stats {
+ double n, mean, M2;
+ u64 max, min;
+};
+
+/* hold aggregated event info */
+struct perf_stat_aggr {
+ /* aggregated values */
+ struct perf_counts_values counts;
+ /* number of entries (CPUs) aggregated */
+ int nr;
+ /* whether any entry has failed to read/process event */
+ bool failed;
+ /* to mark this data is processed already */
+ bool used;
+};
+
+/* per-evsel event stats */
+struct perf_stat_evsel {
+ /* used for repeated runs */
+ struct stats res_stats;
+ /* number of allocated 'aggr' */
+ int nr_aggr;
+ /* aggregated event values */
+ struct perf_stat_aggr *aggr;
+ /* used for group read */
+ u64 *group_data;
+};
+
+enum aggr_mode {
+ AGGR_NONE,
+ AGGR_GLOBAL,
+ AGGR_SOCKET,
+ AGGR_DIE,
+ AGGR_CACHE,
+ AGGR_CORE,
+ AGGR_THREAD,
+ AGGR_UNSET,
+ AGGR_NODE,
+ AGGR_MAX
+};
+
+struct rusage_stats {
+ struct stats ru_utime_usec_stat;
+ struct stats ru_stime_usec_stat;
+};
+
+typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);
+
+struct perf_stat_config {
+ enum aggr_mode aggr_mode;
+ u32 aggr_level;
+ bool scale;
+ bool no_inherit;
+ bool identifier;
+ bool csv_output;
+ bool json_output;
+ bool interval_clear;
+ bool metric_only;
+ bool null_run;
+ bool ru_display;
+ bool big_num;
+ bool no_merge;
+ bool hybrid_merge;
+ bool walltime_run_table;
+ bool all_kernel;
+ bool all_user;
+ bool percore_show_thread;
+ bool summary;
+ bool no_csv_summary;
+ bool metric_no_group;
+ bool metric_no_merge;
+ bool metric_no_threshold;
+ bool stop_read_counter;
+ bool iostat_run;
+ char *user_requested_cpu_list;
+ bool system_wide;
+ FILE *output;
+ unsigned int interval;
+ unsigned int timeout;
+ unsigned int unit_width;
+ unsigned int metric_only_len;
+ int times;
+ int run_count;
+ int print_free_counters_hint;
+ int print_mixed_hw_group_error;
+ const char *csv_sep;
+ struct stats *walltime_nsecs_stats;
+ struct rusage ru_data;
+ struct rusage_stats *ru_stats;
+ struct cpu_aggr_map *aggr_map;
+ aggr_get_id_t aggr_get_id;
+ struct cpu_aggr_map *cpus_aggr_map;
+ u64 *walltime_run;
+ struct rblist metric_events;
+ int ctl_fd;
+ int ctl_fd_ack;
+ bool ctl_fd_close;
+ const char *cgroup_list;
+ unsigned int topdown_level;
+};
+
+void perf_stat__set_big_num(int set);
+void perf_stat__set_no_csv_summary(int set);
+
+void update_stats(struct stats *stats, u64 val);
+double avg_stats(struct stats *stats);
+double stddev_stats(struct stats *stats);
+double rel_stddev_stats(double stddev, double avg);
+
+static inline void init_stats(struct stats *stats)
+{
+ stats->n = 0.0;
+ stats->mean = 0.0;
+ stats->M2 = 0.0;
+ stats->min = (u64) -1;
+ stats->max = 0;
+}
+
+static inline void init_rusage_stats(struct rusage_stats *ru_stats) {
+ init_stats(&ru_stats->ru_utime_usec_stat);
+ init_stats(&ru_stats->ru_stime_usec_stat);
+}
+
+static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) {
+ const u64 us_to_ns = 1000;
+ const u64 s_to_ns = 1000000000;
+ update_stats(&ru_stats->ru_utime_usec_stat,
+ (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
+ update_stats(&ru_stats->ru_stime_usec_stat,
+ (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
+}
+
+struct evsel;
+struct evlist;
+
+extern struct stats walltime_nsecs_stats;
+extern struct rusage_stats ru_stats;
+
+typedef void (*print_metric_t)(struct perf_stat_config *config,
+ void *ctx, const char *color, const char *unit,
+ const char *fmt, double val);
+typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
+
+/* Used to print the display name of the Default metricgroup for now. */
+typedef void (*print_metricgroup_header_t)(struct perf_stat_config *config,
+ void *ctx, const char *metricgroup_name);
+
+void perf_stat__reset_shadow_stats(void);
+struct perf_stat_output_ctx {
+ void *ctx;
+ print_metric_t print_metric;
+ new_line_t new_line;
+ print_metricgroup_header_t print_metricgroup_header;
+ bool force_header;
+};
+
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+ struct evsel *evsel,
+ double avg, int aggr_idx,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events);
+bool perf_stat__skip_metric_event(struct evsel *evsel,
+ struct rblist *metric_events,
+ u64 ena, u64 run);
+void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
+ struct evsel *evsel,
+ int aggr_idx,
+ int *num,
+ void *from,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events);
+
+int evlist__alloc_stats(struct perf_stat_config *config,
+ struct evlist *evlist, bool alloc_raw);
+void evlist__free_stats(struct evlist *evlist);
+void evlist__reset_stats(struct evlist *evlist);
+void evlist__reset_prev_raw_counts(struct evlist *evlist);
+void evlist__copy_prev_raw_counts(struct evlist *evlist);
+void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
+
+int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr);
+void evlist__reset_aggr_stats(struct evlist *evlist);
+void evlist__copy_res_stats(struct perf_stat_config *config, struct evlist *evlist);
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+ struct evsel *counter);
+void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist);
+void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist);
+
+struct perf_tool;
+union perf_event;
+struct perf_session;
+struct target;
+
+int perf_event__process_stat_event(struct perf_session *session,
+ union perf_event *event);
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
+
+int create_perf_stat_counter(struct evsel *evsel,
+ struct perf_stat_config *config,
+ struct target *target,
+ int cpu_map_idx);
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+ struct target *_target, struct timespec *ts, int argc, const char **argv);
+
+struct metric_expr;
+double test_generic_metric(struct metric_expr *mexp, int aggr_idx);
+#endif
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
new file mode 100644
index 000000000..a64a37628
--- /dev/null
+++ b/tools/perf/util/strbuf.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cache.h"
+#include "debug.h"
+#include "strbuf.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+int strbuf_init(struct strbuf *sb, ssize_t hint)
+{
+ sb->alloc = sb->len = 0;
+ sb->buf = strbuf_slopbuf;
+ if (hint)
+ return strbuf_grow(sb, hint);
+ return 0;
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+ if (sb->alloc) {
+ zfree(&sb->buf);
+ strbuf_init(sb, 0);
+ }
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+ char *res = sb->alloc ? sb->buf : NULL;
+ if (sz)
+ *sz = sb->len;
+ strbuf_init(sb, 0);
+ return res;
+}
+
+int strbuf_grow(struct strbuf *sb, size_t extra)
+{
+ char *buf;
+ size_t nr = sb->len + extra + 1;
+
+ if (nr < sb->alloc)
+ return 0;
+
+ if (nr <= sb->len)
+ return -E2BIG;
+
+ if (alloc_nr(sb->alloc) > nr)
+ nr = alloc_nr(sb->alloc);
+
+ /*
+ * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is
+ * a static variable. Thus we have to avoid passing it to realloc.
+ */
+ buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf));
+ if (!buf)
+ return -ENOMEM;
+
+ sb->buf = buf;
+ sb->alloc = nr;
+ return 0;
+}
+
+int strbuf_addch(struct strbuf *sb, int c)
+{
+ int ret = strbuf_grow(sb, 1);
+ if (ret)
+ return ret;
+
+ sb->buf[sb->len++] = c;
+ sb->buf[sb->len] = '\0';
+ return 0;
+}
+
+int strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+ int ret = strbuf_grow(sb, len);
+ if (ret)
+ return ret;
+
+ memcpy(sb->buf + sb->len, data, len);
+ return strbuf_setlen(sb, sb->len + len);
+}
+
+static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+{
+ int len, ret;
+ va_list ap_saved;
+
+ if (!strbuf_avail(sb)) {
+ ret = strbuf_grow(sb, 64);
+ if (ret)
+ return ret;
+ }
+
+ va_copy(ap_saved, ap);
+ len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+ if (len < 0) {
+ va_end(ap_saved);
+ return len;
+ }
+ if (len > strbuf_avail(sb)) {
+ ret = strbuf_grow(sb, len);
+ if (ret) {
+ va_end(ap_saved);
+ return ret;
+ }
+ len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
+ if (len > strbuf_avail(sb)) {
+ pr_debug("this should not happen, your vsnprintf is broken");
+ va_end(ap_saved);
+ return -EINVAL;
+ }
+ }
+ va_end(ap_saved);
+ return strbuf_setlen(sb, sb->len + len);
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = strbuf_addv(sb, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint)
+{
+ size_t oldlen = sb->len;
+ size_t oldalloc = sb->alloc;
+ int ret;
+
+ ret = strbuf_grow(sb, hint ? hint : 8192);
+ if (ret)
+ return ret;
+
+ for (;;) {
+ ssize_t cnt;
+
+ cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+ if (cnt < 0) {
+ if (oldalloc == 0)
+ strbuf_release(sb);
+ else
+ strbuf_setlen(sb, oldlen);
+ return cnt;
+ }
+ if (!cnt)
+ break;
+ sb->len += cnt;
+ ret = strbuf_grow(sb, 8192);
+ if (ret)
+ return ret;
+ }
+
+ sb->buf[sb->len] = '\0';
+ return sb->len - oldlen;
+}
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
new file mode 100644
index 000000000..be94d7046
--- /dev/null
+++ b/tools/perf/util/strbuf.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRBUF_H
+#define __PERF_STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ * build complex strings/buffers whose final size isn't easily known.
+ *
+ * It is NOT legal to copy the ->buf pointer away.
+ * `strbuf_detach' is the operation that detaches a buffer from its shell
+ * while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ * allocated. The extra byte is used to store a '\0', allowing the ->buf
+ * member to be a valid C-string. Every strbuf function ensure this
+ * invariant is preserved.
+ *
+ * Note that it is OK to "play" with the buffer directly if you work it
+ * that way:
+ *
+ * strbuf_grow(sb, SOME_SIZE);
+ * ... Here, the memory array starting at sb->buf, and of length
+ * ... strbuf_avail(sb) is all yours, and you are sure that
+ * ... strbuf_avail(sb) is at least SOME_SIZE.
+ * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ * Doing so is safe, though if it has to be done in many places, adding the
+ * missing API to the strbuf module is the way to go.
+ *
+ * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ * even if it's true in the current implementation. Alloc is somehow a
+ * "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+ size_t alloc;
+ size_t len;
+ char *buf;
+};
+
+#define STRBUF_INIT { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+int strbuf_init(struct strbuf *buf, ssize_t hint);
+void strbuf_release(struct strbuf *buf);
+char *strbuf_detach(struct strbuf *buf, size_t *);
+
+/*----- strbuf size related -----*/
+static inline ssize_t strbuf_avail(const struct strbuf *sb) {
+ return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+int strbuf_grow(struct strbuf *buf, size_t);
+
+static inline int strbuf_setlen(struct strbuf *sb, size_t len) {
+ if (!sb->alloc) {
+ int ret = strbuf_grow(sb, 0);
+ if (ret)
+ return ret;
+ }
+ assert(len < sb->alloc);
+ sb->len = len;
+ sb->buf[len] = '\0';
+ return 0;
+}
+
+/*----- add data in your buffer -----*/
+int strbuf_addch(struct strbuf *sb, int c);
+
+int strbuf_add(struct strbuf *buf, const void *, size_t);
+static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
+ return strbuf_add(sb, s, strlen(s));
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3);
+
+/* XXX: if read fails, any partial read is undone */
+ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
+
+#endif /* __PERF_STRBUF_H */
diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c
new file mode 100644
index 000000000..545e44981
--- /dev/null
+++ b/tools/perf/util/stream.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Compare and figure out the top N hottest streams
+ * Copyright (c) 2020, Intel Corporation.
+ * Author: Jin Yao
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "hist.h"
+#include "sort.h"
+#include "stream.h"
+#include "evlist.h"
+
+static void evsel_streams__delete(struct evsel_streams *es, int nr_evsel)
+{
+ for (int i = 0; i < nr_evsel; i++)
+ zfree(&es[i].streams);
+
+ free(es);
+}
+
+void evlist_streams__delete(struct evlist_streams *els)
+{
+ evsel_streams__delete(els->ev_streams, els->nr_evsel);
+ free(els);
+}
+
+static struct evlist_streams *evlist_streams__new(int nr_evsel,
+ int nr_streams_max)
+{
+ struct evlist_streams *els;
+ struct evsel_streams *es;
+
+ els = zalloc(sizeof(*els));
+ if (!els)
+ return NULL;
+
+ es = calloc(nr_evsel, sizeof(struct evsel_streams));
+ if (!es) {
+ free(els);
+ return NULL;
+ }
+
+ for (int i = 0; i < nr_evsel; i++) {
+ struct evsel_streams *s = &es[i];
+
+ s->streams = calloc(nr_streams_max, sizeof(struct stream));
+ if (!s->streams)
+ goto err;
+
+ s->nr_streams_max = nr_streams_max;
+ s->evsel_idx = -1;
+ }
+
+ els->ev_streams = es;
+ els->nr_evsel = nr_evsel;
+ return els;
+
+err:
+ evsel_streams__delete(es, nr_evsel);
+ return NULL;
+}
+
+/*
+ * The cnodes with high hit number are hot callchains.
+ */
+static void evsel_streams__set_hot_cnode(struct evsel_streams *es,
+ struct callchain_node *cnode)
+{
+ int i, idx = 0;
+ u64 hit;
+
+ if (es->nr_streams < es->nr_streams_max) {
+ i = es->nr_streams;
+ es->streams[i].cnode = cnode;
+ es->nr_streams++;
+ return;
+ }
+
+ /*
+ * Considering a few number of hot streams, only use simple
+ * way to find the cnode with smallest hit number and replace.
+ */
+ hit = (es->streams[0].cnode)->hit;
+ for (i = 1; i < es->nr_streams; i++) {
+ if ((es->streams[i].cnode)->hit < hit) {
+ hit = (es->streams[i].cnode)->hit;
+ idx = i;
+ }
+ }
+
+ if (cnode->hit > hit)
+ es->streams[idx].cnode = cnode;
+}
+
+static void update_hot_callchain(struct hist_entry *he,
+ struct evsel_streams *es)
+{
+ struct rb_root *root = &he->sorted_chain;
+ struct rb_node *rb_node = rb_first(root);
+ struct callchain_node *cnode;
+
+ while (rb_node) {
+ cnode = rb_entry(rb_node, struct callchain_node, rb_node);
+ evsel_streams__set_hot_cnode(es, cnode);
+ rb_node = rb_next(rb_node);
+ }
+}
+
+static void init_hot_callchain(struct hists *hists, struct evsel_streams *es)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+
+ while (next) {
+ struct hist_entry *he;
+
+ he = rb_entry(next, struct hist_entry, rb_node);
+ update_hot_callchain(he, es);
+ next = rb_next(&he->rb_node);
+ }
+
+ es->streams_hits = callchain_total_hits(hists);
+}
+
+static int evlist__init_callchain_streams(struct evlist *evlist,
+ struct evlist_streams *els)
+{
+ struct evsel_streams *es = els->ev_streams;
+ struct evsel *pos;
+ int i = 0;
+
+ BUG_ON(els->nr_evsel < evlist->core.nr_entries);
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+
+ hists__output_resort(hists, NULL);
+ init_hot_callchain(hists, &es[i]);
+ es[i].evsel_idx = pos->core.idx;
+ i++;
+ }
+
+ return 0;
+}
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+ int nr_streams_max)
+{
+ int nr_evsel = evlist->core.nr_entries, ret = -1;
+ struct evlist_streams *els = evlist_streams__new(nr_evsel,
+ nr_streams_max);
+
+ if (!els)
+ return NULL;
+
+ ret = evlist__init_callchain_streams(evlist, els);
+ if (ret) {
+ evlist_streams__delete(els);
+ return NULL;
+ }
+
+ return els;
+}
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+ int evsel_idx)
+{
+ struct evsel_streams *es = els->ev_streams;
+
+ for (int i = 0; i < els->nr_evsel; i++) {
+ if (es[i].evsel_idx == evsel_idx)
+ return &es[i];
+ }
+
+ return NULL;
+}
+
+static struct stream *stream__callchain_match(struct stream *base_stream,
+ struct evsel_streams *es_pair)
+{
+ for (int i = 0; i < es_pair->nr_streams; i++) {
+ struct stream *pair_stream = &es_pair->streams[i];
+
+ if (callchain_cnode_matched(base_stream->cnode,
+ pair_stream->cnode)) {
+ return pair_stream;
+ }
+ }
+
+ return NULL;
+}
+
+static struct stream *stream__match(struct stream *base_stream,
+ struct evsel_streams *es_pair)
+{
+ return stream__callchain_match(base_stream, es_pair);
+}
+
+static void stream__link(struct stream *base_stream, struct stream *pair_stream)
+{
+ base_stream->pair_cnode = pair_stream->cnode;
+ pair_stream->pair_cnode = base_stream->cnode;
+}
+
+void evsel_streams__match(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ for (int i = 0; i < es_base->nr_streams; i++) {
+ struct stream *base_stream = &es_base->streams[i];
+ struct stream *pair_stream;
+
+ pair_stream = stream__match(base_stream, es_pair);
+ if (pair_stream)
+ stream__link(base_stream, pair_stream);
+ }
+}
+
+static void print_callchain_pair(struct stream *base_stream, int idx,
+ struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ struct callchain_node *base_cnode = base_stream->cnode;
+ struct callchain_node *pair_cnode = base_stream->pair_cnode;
+ struct callchain_list *base_chain, *pair_chain;
+ char buf1[512], buf2[512], cbuf1[256], cbuf2[256];
+ char *s1, *s2;
+ double pct;
+
+ printf("\nhot chain pair %d:\n", idx);
+
+ pct = (double)base_cnode->hit / (double)es_base->streams_hits;
+ scnprintf(buf1, sizeof(buf1), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(base_cnode), pct * 100.0);
+
+ pct = (double)pair_cnode->hit / (double)es_pair->streams_hits;
+ scnprintf(buf2, sizeof(buf2), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(pair_cnode), pct * 100.0);
+
+ printf("%35s\t%35s\n", buf1, buf2);
+
+ printf("%35s\t%35s\n",
+ "---------------------------",
+ "--------------------------");
+
+ pair_chain = list_first_entry(&pair_cnode->val,
+ struct callchain_list,
+ list);
+
+ list_for_each_entry(base_chain, &base_cnode->val, list) {
+ if (&pair_chain->list == &pair_cnode->val)
+ return;
+
+ s1 = callchain_list__sym_name(base_chain, cbuf1, sizeof(cbuf1),
+ false);
+ s2 = callchain_list__sym_name(pair_chain, cbuf2, sizeof(cbuf2),
+ false);
+
+ scnprintf(buf1, sizeof(buf1), "%35s\t%35s", s1, s2);
+ printf("%s\n", buf1);
+ pair_chain = list_next_entry(pair_chain, list);
+ }
+}
+
+static void print_stream_callchain(struct stream *stream, int idx,
+ struct evsel_streams *es, bool pair)
+{
+ struct callchain_node *cnode = stream->cnode;
+ struct callchain_list *chain;
+ char buf[512], cbuf[256], *s;
+ double pct;
+
+ printf("\nhot chain %d:\n", idx);
+
+ pct = (double)cnode->hit / (double)es->streams_hits;
+ scnprintf(buf, sizeof(buf), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(cnode), pct * 100.0);
+
+ if (pair) {
+ printf("%35s\t%35s\n", "", buf);
+ printf("%35s\t%35s\n",
+ "", "--------------------------");
+ } else {
+ printf("%35s\n", buf);
+ printf("%35s\n", "--------------------------");
+ }
+
+ list_for_each_entry(chain, &cnode->val, list) {
+ s = callchain_list__sym_name(chain, cbuf, sizeof(cbuf), false);
+
+ if (pair)
+ scnprintf(buf, sizeof(buf), "%35s\t%35s", "", s);
+ else
+ scnprintf(buf, sizeof(buf), "%35s", s);
+
+ printf("%s\n", buf);
+ }
+}
+
+static void callchain_streams_report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ struct stream *base_stream;
+ int i, idx = 0;
+
+ printf("[ Matched hot streams ]\n");
+ for (i = 0; i < es_base->nr_streams; i++) {
+ base_stream = &es_base->streams[i];
+ if (base_stream->pair_cnode) {
+ print_callchain_pair(base_stream, ++idx,
+ es_base, es_pair);
+ }
+ }
+
+ idx = 0;
+ printf("\n[ Hot streams in old perf data only ]\n");
+ for (i = 0; i < es_base->nr_streams; i++) {
+ base_stream = &es_base->streams[i];
+ if (!base_stream->pair_cnode) {
+ print_stream_callchain(base_stream, ++idx,
+ es_base, false);
+ }
+ }
+
+ idx = 0;
+ printf("\n[ Hot streams in new perf data only ]\n");
+ for (i = 0; i < es_pair->nr_streams; i++) {
+ base_stream = &es_pair->streams[i];
+ if (!base_stream->pair_cnode) {
+ print_stream_callchain(base_stream, ++idx,
+ es_pair, true);
+ }
+ }
+}
+
+void evsel_streams__report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ return callchain_streams_report(es_base, es_pair);
+}
diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h
new file mode 100644
index 000000000..bee768874
--- /dev/null
+++ b/tools/perf/util/stream.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STREAM_H
+#define __PERF_STREAM_H
+
+#include "callchain.h"
+
+struct stream {
+ struct callchain_node *cnode;
+ struct callchain_node *pair_cnode;
+};
+
+struct evsel_streams {
+ struct stream *streams;
+ int nr_streams_max;
+ int nr_streams;
+ int evsel_idx;
+ u64 streams_hits;
+};
+
+struct evlist_streams {
+ struct evsel_streams *ev_streams;
+ int nr_evsel;
+};
+
+struct evlist;
+
+void evlist_streams__delete(struct evlist_streams *els);
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+ int nr_streams_max);
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+ int evsel_idx);
+
+void evsel_streams__match(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair);
+
+void evsel_streams__report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair);
+
+#endif /* __PERF_STREAM_H */
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
new file mode 100644
index 000000000..02807b9d4
--- /dev/null
+++ b/tools/perf/util/strfilter.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "string2.h"
+#include "strfilter.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+/* Operators */
+static const char *OP_and = "&"; /* Logical AND */
+static const char *OP_or = "|"; /* Logical OR */
+static const char *OP_not = "!"; /* Logical NOT */
+
+#define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!')
+#define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')')
+
+static void strfilter_node__delete(struct strfilter_node *node)
+{
+ if (node) {
+ if (node->p && !is_operator(*node->p))
+ zfree((char **)&node->p);
+ strfilter_node__delete(node->l);
+ strfilter_node__delete(node->r);
+ free(node);
+ }
+}
+
+void strfilter__delete(struct strfilter *filter)
+{
+ if (filter) {
+ strfilter_node__delete(filter->root);
+ free(filter);
+ }
+}
+
+static const char *get_token(const char *s, const char **e)
+{
+ const char *p;
+
+ s = skip_spaces(s);
+
+ if (*s == '\0') {
+ p = s;
+ goto end;
+ }
+
+ p = s + 1;
+ if (!is_separator(*s)) {
+ /* End search */
+retry:
+ while (*p && !is_separator(*p) && !isspace(*p))
+ p++;
+ /* Escape and special case: '!' is also used in glob pattern */
+ if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
+ p++;
+ goto retry;
+ }
+ }
+end:
+ *e = p;
+ return s;
+}
+
+static struct strfilter_node *strfilter_node__alloc(const char *op,
+ struct strfilter_node *l,
+ struct strfilter_node *r)
+{
+ struct strfilter_node *node = zalloc(sizeof(*node));
+
+ if (node) {
+ node->p = op;
+ node->l = l;
+ node->r = r;
+ }
+
+ return node;
+}
+
+static struct strfilter_node *strfilter_node__new(const char *s,
+ const char **ep)
+{
+ struct strfilter_node root, *cur, *last_op;
+ const char *e;
+
+ if (!s)
+ return NULL;
+
+ memset(&root, 0, sizeof(root));
+ last_op = cur = &root;
+
+ s = get_token(s, &e);
+ while (*s != '\0' && *s != ')') {
+ switch (*s) {
+ case '&': /* Exchg last OP->r with AND */
+ if (!cur->r || !last_op->r)
+ goto error;
+ cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
+ if (!cur)
+ goto nomem;
+ last_op->r = cur;
+ last_op = cur;
+ break;
+ case '|': /* Exchg the root with OR */
+ if (!cur->r || !root.r)
+ goto error;
+ cur = strfilter_node__alloc(OP_or, root.r, NULL);
+ if (!cur)
+ goto nomem;
+ root.r = cur;
+ last_op = cur;
+ break;
+ case '!': /* Add NOT as a leaf node */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur = cur->r;
+ break;
+ case '(': /* Recursively parses inside the parenthesis */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__new(s + 1, &s);
+ if (!s)
+ goto nomem;
+ if (!cur->r || *s != ')')
+ goto error;
+ e = s + 1;
+ break;
+ default:
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(NULL, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur->r->p = strndup(s, e - s);
+ if (!cur->r->p)
+ goto nomem;
+ }
+ s = get_token(e, &e);
+ }
+ if (!cur->r)
+ goto error;
+ *ep = s;
+ return root.r;
+nomem:
+ s = NULL;
+error:
+ *ep = s;
+ strfilter_node__delete(root.r);
+ return NULL;
+}
+
+/*
+ * Parse filter rule and return new strfilter.
+ * Return NULL if fail, and *ep == NULL if memory allocation failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err)
+{
+ struct strfilter *filter = zalloc(sizeof(*filter));
+ const char *ep = NULL;
+
+ if (filter)
+ filter->root = strfilter_node__new(rules, &ep);
+
+ if (!filter || !filter->root || *ep != '\0') {
+ if (err)
+ *err = ep;
+ strfilter__delete(filter);
+ filter = NULL;
+ }
+
+ return filter;
+}
+
+static int strfilter__append(struct strfilter *filter, bool _or,
+ const char *rules, const char **err)
+{
+ struct strfilter_node *right, *root;
+ const char *ep = NULL;
+
+ if (!filter || !rules)
+ return -EINVAL;
+
+ right = strfilter_node__new(rules, &ep);
+ if (!right || *ep != '\0') {
+ if (err)
+ *err = ep;
+ goto error;
+ }
+ root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
+ if (!root) {
+ ep = NULL;
+ goto error;
+ }
+
+ filter->root = root;
+ return 0;
+
+error:
+ strfilter_node__delete(right);
+ return ep ? -EINVAL : -ENOMEM;
+}
+
+int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
+{
+ return strfilter__append(filter, true, rules, err);
+}
+
+int strfilter__and(struct strfilter *filter, const char *rules,
+ const char **err)
+{
+ return strfilter__append(filter, false, rules, err);
+}
+
+static bool strfilter_node__compare(struct strfilter_node *node,
+ const char *str)
+{
+ if (!node || !node->p)
+ return false;
+
+ switch (*node->p) {
+ case '|': /* OR */
+ return strfilter_node__compare(node->l, str) ||
+ strfilter_node__compare(node->r, str);
+ case '&': /* AND */
+ return strfilter_node__compare(node->l, str) &&
+ strfilter_node__compare(node->r, str);
+ case '!': /* NOT */
+ return !strfilter_node__compare(node->r, str);
+ default:
+ return strglobmatch(str, node->p);
+ }
+}
+
+/* Return true if STR matches the filter rules */
+bool strfilter__compare(struct strfilter *filter, const char *str)
+{
+ if (!filter)
+ return false;
+ return strfilter_node__compare(filter->root, str);
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
+
+/* sprint node in parenthesis if needed */
+static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
+{
+ int len;
+ int pt = node->r ? 2 : 0; /* don't need to check node->l */
+
+ if (buf && pt)
+ *buf++ = '(';
+ len = strfilter_node__sprint(node, buf);
+ if (len < 0)
+ return len;
+ if (buf && pt)
+ *(buf + len) = ')';
+ return len + pt;
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
+{
+ int len = 0, rlen;
+
+ if (!node || !node->p)
+ return -EINVAL;
+
+ switch (*node->p) {
+ case '|':
+ case '&':
+ len = strfilter_node__sprint_pt(node->l, buf);
+ if (len < 0)
+ return len;
+ fallthrough;
+ case '!':
+ if (buf) {
+ *(buf + len++) = *node->p;
+ buf += len;
+ } else
+ len++;
+ rlen = strfilter_node__sprint_pt(node->r, buf);
+ if (rlen < 0)
+ return rlen;
+ len += rlen;
+ break;
+ default:
+ len = strlen(node->p);
+ if (buf)
+ strcpy(buf, node->p);
+ }
+
+ return len;
+}
+
+char *strfilter__string(struct strfilter *filter)
+{
+ int len;
+ char *ret = NULL;
+
+ len = strfilter_node__sprint(filter->root, NULL);
+ if (len < 0)
+ return NULL;
+
+ ret = malloc(len + 1);
+ if (ret)
+ strfilter_node__sprint(filter->root, ret);
+
+ return ret;
+}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
new file mode 100644
index 000000000..c05aca9ca
--- /dev/null
+++ b/tools/perf/util/strfilter.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRFILTER_H
+#define __PERF_STRFILTER_H
+/* General purpose glob matching filter */
+
+#include <linux/list.h>
+#include <stdbool.h>
+
+/* A node of string filter */
+struct strfilter_node {
+ struct strfilter_node *l; /* Tree left branch (for &,|) */
+ struct strfilter_node *r; /* Tree right branch (for !,&,|) */
+ const char *p; /* Operator or rule */
+};
+
+/* String filter */
+struct strfilter {
+ struct strfilter_node *root;
+};
+
+/**
+ * strfilter__new - Create a new string filter
+ * @rules: Filter rule, which is a combination of glob expressions.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and return new strfilter. Return NULL if an error detected.
+ * In that case, *@err will indicate where it is detected, and *@err is NULL
+ * if a memory allocation is failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err);
+
+/**
+ * strfilter__or - Append an additional rule by logical-or
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ * @filter by using logical-or.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-or.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__or(struct strfilter *filter,
+ const char *rules, const char **err);
+
+/**
+ * strfilter__add - Append an additional rule by logical-and
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ * @filter by using logical-and.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-and.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__and(struct strfilter *filter,
+ const char *rules, const char **err);
+
+/**
+ * strfilter__compare - compare given string and a string filter
+ * @filter: String filter
+ * @str: target string
+ *
+ * Compare @str and @filter. Return true if the str match the rule
+ */
+bool strfilter__compare(struct strfilter *filter, const char *str);
+
+/**
+ * strfilter__delete - delete a string filter
+ * @filter: String filter to delete
+ *
+ * Delete @filter.
+ */
+void strfilter__delete(struct strfilter *filter);
+
+/**
+ * strfilter__string - Reconstruct a rule string from filter
+ * @filter: String filter to reconstruct
+ *
+ * Reconstruct a rule string from @filter. This will be good for
+ * debug messages. Note that returning string must be freed afterward.
+ */
+char *strfilter__string(struct strfilter *filter);
+
+#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
new file mode 100644
index 000000000..cf05b0b56
--- /dev/null
+++ b/tools/perf/util/string.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "string2.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include <linux/ctype.h>
+
+const char *graph_dotted_line =
+ "---------------------------------------------------------------------"
+ "---------------------------------------------------------------------"
+ "---------------------------------------------------------------------";
+const char *dots =
+ "....................................................................."
+ "....................................................................."
+ ".....................................................................";
+
+/*
+ * perf_atoll()
+ * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
+ * and return its numeric value
+ */
+s64 perf_atoll(const char *str)
+{
+ s64 length;
+ char *p;
+ char c;
+
+ if (!isdigit(str[0]))
+ goto out_err;
+
+ length = strtoll(str, &p, 10);
+ switch (c = *p++) {
+ case 'b': case 'B':
+ if (*p)
+ goto out_err;
+
+ fallthrough;
+ case '\0':
+ return length;
+ default:
+ goto out_err;
+ /* two-letter suffices */
+ case 'k': case 'K':
+ length <<= 10;
+ break;
+ case 'm': case 'M':
+ length <<= 20;
+ break;
+ case 'g': case 'G':
+ length <<= 30;
+ break;
+ case 't': case 'T':
+ length <<= 40;
+ break;
+ }
+ /* we want the cases to match */
+ if (islower(c)) {
+ if (strcmp(p, "b") != 0)
+ goto out_err;
+ } else {
+ if (strcmp(p, "B") != 0)
+ goto out_err;
+ }
+ return length;
+
+out_err:
+ return -1;
+}
+
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+ bool complement = false, ret = true;
+
+ if (*pat == '!') {
+ complement = true;
+ pat++;
+ }
+ if (*pat++ == c) /* First character is special */
+ goto end;
+
+ while (*pat && *pat != ']') { /* Matching */
+ if (*pat == '-' && *(pat + 1) != ']') { /* Range */
+ if (*(pat - 1) <= c && c <= *(pat + 1))
+ goto end;
+ if (*(pat - 1) > *(pat + 1))
+ goto error;
+ pat += 2;
+ } else if (*pat++ == c)
+ goto end;
+ }
+ if (!*pat)
+ goto error;
+ ret = false;
+
+end:
+ while (*pat && *pat != ']') /* Searching closing */
+ pat++;
+ if (!*pat)
+ goto error;
+ *npat = pat + 1;
+ return complement ? !ret : ret;
+
+error:
+ return false;
+}
+
+/* Glob/lazy pattern matching */
+static bool __match_glob(const char *str, const char *pat, bool ignore_space,
+ bool case_ins)
+{
+ while (*str && *pat && *pat != '*') {
+ if (ignore_space) {
+ /* Ignore spaces for lazy matching */
+ if (isspace(*str)) {
+ str++;
+ continue;
+ }
+ if (isspace(*pat)) {
+ pat++;
+ continue;
+ }
+ }
+ if (*pat == '?') { /* Matches any single character */
+ str++;
+ pat++;
+ continue;
+ } else if (*pat == '[') /* Character classes/Ranges */
+ if (__match_charclass(pat + 1, *str, &pat)) {
+ str++;
+ continue;
+ } else
+ return false;
+ else if (*pat == '\\') /* Escaped char match as normal char */
+ pat++;
+ if (case_ins) {
+ if (tolower(*str) != tolower(*pat))
+ return false;
+ } else if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (__match_glob(str++, pat, ignore_space, case_ins))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
+bool strglobmatch(const char *str, const char *pat)
+{
+ return __match_glob(str, pat, false, false);
+}
+
+bool strglobmatch_nocase(const char *str, const char *pat)
+{
+ return __match_glob(str, pat, false, true);
+}
+
+/**
+ * strlazymatch - matching pattern strings lazily with glob pattern
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This is similar to strglobmatch, except this ignores spaces in
+ * the target string.
+ */
+bool strlazymatch(const char *str, const char *pat)
+{
+ return __match_glob(str, pat, true, false);
+}
+
+/**
+ * strtailcmp - Compare the tail of two strings
+ * @s1: 1st string to be compared
+ * @s2: 2nd string to be compared
+ *
+ * Return 0 if whole of either string is same as another's tail part.
+ */
+int strtailcmp(const char *s1, const char *s2)
+{
+ int i1 = strlen(s1);
+ int i2 = strlen(s2);
+ while (--i1 >= 0 && --i2 >= 0) {
+ if (s1[i1] != s2[i2])
+ return s1[i1] - s2[i2];
+ }
+ return 0;
+}
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
+{
+ /*
+ * FIXME: replace this with an expression using log10() when we
+ * find a suitable implementation, maybe the one in the dvb drivers...
+ *
+ * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators
+ */
+ size_t size = nints * 28 + 1; /* \0 */
+ size_t i, printed = 0;
+ char *expr = malloc(size);
+
+ if (expr) {
+ const char *or_and = "||", *eq_neq = "==";
+ char *e = expr;
+
+ if (!in) {
+ or_and = "&&";
+ eq_neq = "!=";
+ }
+
+ for (i = 0; i < nints; ++i) {
+ if (printed == size)
+ goto out_err_overflow;
+
+ if (i > 0)
+ printed += scnprintf(e + printed, size - printed, " %s ", or_and);
+ printed += scnprintf(e + printed, size - printed,
+ "%s %s %d", var, eq_neq, ints[i]);
+ }
+ }
+
+ return expr;
+
+out_err_overflow:
+ free(expr);
+ return NULL;
+}
+
+/* Like strpbrk(), but not break if it is right after a backslash (escaped) */
+char *strpbrk_esc(char *str, const char *stopset)
+{
+ char *ptr;
+
+ do {
+ ptr = strpbrk(str, stopset);
+ if (ptr == str ||
+ (ptr == str + 1 && *(ptr - 1) != '\\'))
+ break;
+ str = ptr + 1;
+ } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\');
+
+ return ptr;
+}
+
+/* Like strdup, but do not copy a single backslash */
+char *strdup_esc(const char *str)
+{
+ char *s, *d, *p, *ret = strdup(str);
+
+ if (!ret)
+ return NULL;
+
+ d = strchr(ret, '\\');
+ if (!d)
+ return ret;
+
+ s = d + 1;
+ do {
+ if (*s == '\0') {
+ *d = '\0';
+ break;
+ }
+ p = strchr(s + 1, '\\');
+ if (p) {
+ memmove(d, s, p - s);
+ d += p - s;
+ s = p + 1;
+ } else
+ memmove(d, s, strlen(s) + 1);
+ } while (p);
+
+ return ret;
+}
+
+unsigned int hex(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ return c - 'A' + 10;
+}
diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
new file mode 100644
index 000000000..56c30fef9
--- /dev/null
+++ b/tools/perf/util/string2.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_STRING_H
+#define PERF_STRING_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <sys/types.h> // pid_t
+#include <stddef.h>
+#include <string.h>
+
+extern const char *graph_dotted_line;
+extern const char *dots;
+
+s64 perf_atoll(const char *str);
+bool strglobmatch(const char *str, const char *pat);
+bool strglobmatch_nocase(const char *str, const char *pat);
+bool strlazymatch(const char *str, const char *pat);
+static inline bool strisglob(const char *str)
+{
+ return strpbrk(str, "*?[") != NULL;
+}
+int strtailcmp(const char *s1, const char *s2);
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints);
+
+static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints)
+{
+ return asprintf_expr_inout_ints(var, true, nints, ints);
+}
+
+static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints)
+{
+ return asprintf_expr_inout_ints(var, false, nints, ints);
+}
+
+char *asprintf__tp_filter_pids(size_t npids, pid_t *pids);
+
+char *strpbrk_esc(char *str, const char *stopset);
+char *strdup_esc(const char *str);
+
+unsigned int hex(char c);
+
+#endif /* PERF_STRING_H */
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
new file mode 100644
index 000000000..8a868cbef
--- /dev/null
+++ b/tools/perf/util/strlist.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "strlist.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/zalloc.h>
+
+static
+struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry)
+{
+ const char *s = entry;
+ struct rb_node *rc = NULL;
+ struct strlist *strlist = container_of(rblist, struct strlist, rblist);
+ struct str_node *snode = malloc(sizeof(*snode));
+
+ if (snode != NULL) {
+ if (strlist->dupstr) {
+ s = strdup(s);
+ if (s == NULL)
+ goto out_delete;
+ }
+ snode->s = s;
+ rc = &snode->rb_node;
+ }
+
+ return rc;
+
+out_delete:
+ free(snode);
+ return NULL;
+}
+
+static void str_node__delete(struct str_node *snode, bool dupstr)
+{
+ if (dupstr)
+ zfree((char **)&snode->s);
+ free(snode);
+}
+
+static
+void strlist__node_delete(struct rblist *rblist, struct rb_node *rb_node)
+{
+ struct strlist *slist = container_of(rblist, struct strlist, rblist);
+ struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+ str_node__delete(snode, slist->dupstr);
+}
+
+static int strlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+ const char *str = entry;
+ struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+ return strcmp(snode->s, str);
+}
+
+int strlist__add(struct strlist *slist, const char *new_entry)
+{
+ return rblist__add_node(&slist->rblist, new_entry);
+}
+
+int strlist__load(struct strlist *slist, const char *filename)
+{
+ char entry[1024];
+ int err;
+ FILE *fp = fopen(filename, "r");
+
+ if (fp == NULL)
+ return -errno;
+
+ while (fgets(entry, sizeof(entry), fp) != NULL) {
+ const size_t len = strlen(entry);
+
+ if (len == 0)
+ continue;
+ entry[len - 1] = '\0';
+
+ err = strlist__add(slist, entry);
+ if (err != 0)
+ goto out;
+ }
+
+ err = 0;
+out:
+ fclose(fp);
+ return err;
+}
+
+void strlist__remove(struct strlist *slist, struct str_node *snode)
+{
+ rblist__remove_node(&slist->rblist, &snode->rb_node);
+}
+
+struct str_node *strlist__find(struct strlist *slist, const char *entry)
+{
+ struct str_node *snode = NULL;
+ struct rb_node *rb_node = rblist__find(&slist->rblist, entry);
+
+ if (rb_node)
+ snode = container_of(rb_node, struct str_node, rb_node);
+
+ return snode;
+}
+
+static int strlist__parse_list_entry(struct strlist *slist, const char *s,
+ const char *subst_dir)
+{
+ int err;
+ char *subst = NULL;
+
+ if (strncmp(s, "file://", 7) == 0)
+ return strlist__load(slist, s + 7);
+
+ if (subst_dir) {
+ err = -ENOMEM;
+ if (asprintf(&subst, "%s/%s", subst_dir, s) < 0)
+ goto out;
+
+ if (access(subst, F_OK) == 0) {
+ err = strlist__load(slist, subst);
+ goto out;
+ }
+
+ if (slist->file_only) {
+ err = -ENOENT;
+ goto out;
+ }
+ }
+
+ err = strlist__add(slist, s);
+out:
+ free(subst);
+ return err;
+}
+
+static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir)
+{
+ char *sep;
+ int err;
+
+ while ((sep = strchr(s, ',')) != NULL) {
+ *sep = '\0';
+ err = strlist__parse_list_entry(slist, s, subst_dir);
+ *sep = ',';
+ if (err != 0)
+ return err;
+ s = sep + 1;
+ }
+
+ return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0;
+}
+
+struct strlist *strlist__new(const char *list, const struct strlist_config *config)
+{
+ struct strlist *slist = malloc(sizeof(*slist));
+
+ if (slist != NULL) {
+ bool dupstr = true;
+ bool file_only = false;
+ const char *dirname = NULL;
+
+ if (config) {
+ dupstr = !config->dont_dupstr;
+ dirname = config->dirname;
+ file_only = config->file_only;
+ }
+
+ rblist__init(&slist->rblist);
+ slist->rblist.node_cmp = strlist__node_cmp;
+ slist->rblist.node_new = strlist__node_new;
+ slist->rblist.node_delete = strlist__node_delete;
+
+ slist->dupstr = dupstr;
+ slist->file_only = file_only;
+
+ if (list && strlist__parse_list(slist, list, dirname) != 0)
+ goto out_error;
+ }
+
+ return slist;
+out_error:
+ free(slist);
+ return NULL;
+}
+
+void strlist__delete(struct strlist *slist)
+{
+ if (slist != NULL)
+ rblist__delete(&slist->rblist);
+}
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
+{
+ struct str_node *snode = NULL;
+ struct rb_node *rb_node;
+
+ rb_node = rblist__entry(&slist->rblist, idx);
+ if (rb_node)
+ snode = container_of(rb_node, struct str_node, rb_node);
+
+ return snode;
+}
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
new file mode 100644
index 000000000..7e82c71dc
--- /dev/null
+++ b/tools/perf/util/strlist.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRLIST_H
+#define __PERF_STRLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct str_node {
+ struct rb_node rb_node;
+ const char *s;
+};
+
+struct strlist {
+ struct rblist rblist;
+ bool dupstr;
+ bool file_only;
+};
+
+/*
+ * @file_only: When dirname is present, only consider entries as filenames,
+ * that should not be added to the list if dirname/entry is not
+ * found
+ */
+struct strlist_config {
+ bool dont_dupstr;
+ bool file_only;
+ const char *dirname;
+};
+
+struct strlist *strlist__new(const char *slist, const struct strlist_config *config);
+void strlist__delete(struct strlist *slist);
+
+void strlist__remove(struct strlist *slist, struct str_node *sn);
+int strlist__load(struct strlist *slist, const char *filename);
+int strlist__add(struct strlist *slist, const char *str);
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
+struct str_node *strlist__find(struct strlist *slist, const char *entry);
+
+static inline bool strlist__has_entry(struct strlist *slist, const char *entry)
+{
+ return strlist__find(slist, entry) != NULL;
+}
+
+static inline bool strlist__empty(const struct strlist *slist)
+{
+ return rblist__empty(&slist->rblist);
+}
+
+static inline unsigned int strlist__nr_entries(const struct strlist *slist)
+{
+ return rblist__nr_entries(&slist->rblist);
+}
+
+/* For strlist iteration */
+static inline struct str_node *strlist__first(struct strlist *slist)
+{
+ struct rb_node *rn = rb_first_cached(&slist->rblist.entries);
+ return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+static inline struct str_node *strlist__next(struct str_node *sn)
+{
+ struct rb_node *rn;
+ if (!sn)
+ return NULL;
+ rn = rb_next(&sn->rb_node);
+ return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+
+/**
+ * strlist_for_each - iterate over a strlist
+ * @pos: the &struct str_node to use as a loop cursor.
+ * @slist: the &struct strlist for loop.
+ */
+#define strlist__for_each_entry(pos, slist) \
+ for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
+
+/**
+ * strlist_for_each_safe - iterate over a strlist safe against removal of
+ * str_node
+ * @pos: the &struct str_node to use as a loop cursor.
+ * @n: another &struct str_node to use as temporary storage.
+ * @slist: the &struct strlist for loop.
+ */
+#define strlist__for_each_entry_safe(pos, n, slist) \
+ for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
+ pos = n, n = strlist__next(n))
+#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
new file mode 100644
index 000000000..0e4dc31c6
--- /dev/null
+++ b/tools/perf/util/svghelper.c
@@ -0,0 +1,809 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svghelper.c - helper functions for outputting svg
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * Authors:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/bitmap.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
+
+#include "env.h"
+#include "svghelper.h"
+
+static u64 first_time, last_time;
+static u64 turbo_frequency, max_freq;
+
+
+#define SLOT_MULT 30.0
+#define SLOT_HEIGHT 25.0
+#define SLOT_HALF (SLOT_HEIGHT / 2)
+
+int svg_page_width = 1000;
+u64 svg_highlight;
+const char *svg_highlight_name;
+
+#define MIN_TEXT_SIZE 0.01
+
+static u64 total_height;
+static FILE *svgfile;
+
+static double cpu2slot(int cpu)
+{
+ return 2 * cpu + 1;
+}
+
+static int *topology_map;
+
+static double cpu2y(int cpu)
+{
+ if (topology_map)
+ return cpu2slot(topology_map[cpu]) * SLOT_MULT;
+ else
+ return cpu2slot(cpu) * SLOT_MULT;
+}
+
+static double time2pixels(u64 __time)
+{
+ double X;
+
+ X = 1.0 * svg_page_width * (__time - first_time) / (last_time - first_time);
+ return X;
+}
+
+/*
+ * Round text sizes so that the svg viewer only needs a discrete
+ * number of renderings of the font
+ */
+static double round_text_size(double size)
+{
+ int loop = 100;
+ double target = 10.0;
+
+ if (size >= 10.0)
+ return size;
+ while (loop--) {
+ if (size >= target)
+ return target;
+ target = target / 2.0;
+ }
+ return size;
+}
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end)
+{
+ int new_width;
+
+ svgfile = fopen(filename, "w");
+ if (!svgfile) {
+ fprintf(stderr, "Cannot open %s for output\n", filename);
+ return;
+ }
+ first_time = start;
+ first_time = first_time / 100000000 * 100000000;
+ last_time = end;
+
+ /*
+ * if the recording is short, we default to a width of 1000, but
+ * for longer recordings we want at least 200 units of width per second
+ */
+ new_width = (last_time - first_time) / 5000000;
+
+ if (new_width > svg_page_width)
+ svg_page_width = new_width;
+
+ total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT;
+ fprintf(svgfile, "<?xml version=\"1.0\" standalone=\"no\"?> \n");
+ fprintf(svgfile, "<!DOCTYPE svg SYSTEM \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n");
+ fprintf(svgfile, "<svg width=\"%i\" height=\"%" PRIu64 "\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", svg_page_width, total_height);
+
+ fprintf(svgfile, "<defs>\n <style type=\"text/css\">\n <![CDATA[\n");
+
+ fprintf(svgfile, " rect { stroke-width: 1; }\n");
+ fprintf(svgfile, " rect.process { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:1; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.process2 { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.process3 { fill:rgb(180,180,180); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.sample { fill:rgb( 0, 0,255); fill-opacity:0.8; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.sample_hi{ fill:rgb(255,128, 0); fill-opacity:0.8; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.error { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.net { fill:rgb( 0,128, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.disk { fill:rgb( 0, 0,255); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.sync { fill:rgb(128,128, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.poll { fill:rgb( 0,128,128); fill-opacity:0.2; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.blocked { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.waiting { fill:rgb(224,214, 0); fill-opacity:0.8; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.WAITING { fill:rgb(255,214, 48); fill-opacity:0.6; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.cpu { fill:rgb(192,192,192); fill-opacity:0.2; stroke-width:0.5; stroke:rgb(128,128,128); } \n");
+ fprintf(svgfile, " rect.pstate { fill:rgb(128,128,128); fill-opacity:0.8; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c1 { fill:rgb(255,214,214); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c2 { fill:rgb(255,172,172); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c3 { fill:rgb(255,130,130); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c4 { fill:rgb(255, 88, 88); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c5 { fill:rgb(255, 44, 44); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c6 { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " line.pstate { stroke:rgb(255,255, 0); stroke-opacity:0.8; stroke-width:2; } \n");
+
+ fprintf(svgfile, " ]]>\n </style>\n</defs>\n");
+}
+
+static double normalize_height(double height)
+{
+ if (height < 0.25)
+ return 0.25;
+ else if (height < 0.50)
+ return 0.50;
+ else if (height < 0.75)
+ return 0.75;
+ else
+ return 0.100;
+}
+
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+ double w = time2pixels(end) - time2pixels(start);
+ height = normalize_height(height);
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start),
+ w,
+ Yslot * SLOT_MULT,
+ SLOT_HALF * height,
+ type);
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+ double w = time2pixels(end) - time2pixels(start);
+ height = normalize_height(height);
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start),
+ w,
+ Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HALF * height,
+ SLOT_HALF * height,
+ type);
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+ double w = time2pixels(end) - time2pixels(start);
+ height = normalize_height(height);
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start),
+ w,
+ Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HEIGHT * height,
+ SLOT_HEIGHT * height,
+ type);
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_box(int Yslot, u64 start, u64 end, const char *type)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT, type);
+}
+
+static char *time_to_string(u64 duration);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>#%d blocked %s</title>\n", cpu,
+ time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Blocked on:\n%s</desc>\n", backtrace);
+ svg_box(Yslot, start, end, "blocked");
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+ double text_size;
+ const char *type;
+
+ if (!svgfile)
+ return;
+
+ if (svg_highlight && end - start > svg_highlight)
+ type = "sample_hi";
+ else
+ type = "sample";
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<title>#%d running %s</title>\n",
+ cpu, time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT,
+ type);
+
+ text_size = (time2pixels(end)-time2pixels(start));
+ if (cpu > 9)
+ text_size = text_size/2;
+ if (text_size > 1.25)
+ text_size = 1.25;
+ text_size = round_text_size(text_size);
+
+ if (text_size > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">%i</text>\n",
+ time2pixels(start), Yslot * SLOT_MULT + SLOT_HEIGHT - 1, text_size, cpu + 1);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+static char *time_to_string(u64 duration)
+{
+ static char text[80];
+
+ text[0] = 0;
+
+ if (duration < NSEC_PER_USEC) /* less than 1 usec */
+ return text;
+
+ if (duration < NSEC_PER_MSEC) { /* less than 1 msec */
+ sprintf(text, "%.1f us", duration / (double)NSEC_PER_USEC);
+ return text;
+ }
+ sprintf(text, "%.1f ms", duration / (double)NSEC_PER_MSEC);
+
+ return text;
+}
+
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+ char *text;
+ const char *style;
+ double font_size;
+
+ if (!svgfile)
+ return;
+
+ style = "waiting";
+
+ if (end-start > 10 * NSEC_PER_MSEC) /* 10 msec */
+ style = "WAITING";
+
+ text = time_to_string(end-start);
+
+ font_size = 1.0 * (time2pixels(end)-time2pixels(start));
+
+ if (font_size > 3)
+ font_size = 3;
+
+ font_size = round_text_size(font_size);
+
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), Yslot * SLOT_MULT);
+ fprintf(svgfile, "<title>#%d waiting %s</title>\n", cpu, time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Waiting on:\n%s</desc>\n", backtrace);
+ fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(end)-time2pixels(start), SLOT_HEIGHT, style);
+ if (font_size > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\"> %s</text>\n",
+ font_size, text);
+ fprintf(svgfile, "</g>\n");
+}
+
+static char *cpu_model(void)
+{
+ static char cpu_m[255];
+ char buf[256];
+ FILE *file;
+
+ cpu_m[0] = 0;
+ /* CPU type */
+ file = fopen("/proc/cpuinfo", "r");
+ if (file) {
+ while (fgets(buf, 255, file)) {
+ if (strcasestr(buf, "model name")) {
+ strlcpy(cpu_m, &buf[13], 255);
+ break;
+ }
+ }
+ fclose(file);
+ }
+
+ /* CPU type */
+ file = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies", "r");
+ if (file) {
+ while (fgets(buf, 255, file)) {
+ unsigned int freq;
+ freq = strtoull(buf, NULL, 10);
+ if (freq > max_freq)
+ max_freq = freq;
+ }
+ fclose(file);
+ }
+ return cpu_m;
+}
+
+void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq)
+{
+ char cpu_string[80];
+ if (!svgfile)
+ return;
+
+ max_freq = __max_freq;
+ turbo_frequency = __turbo_freq;
+
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"cpu\"/>\n",
+ time2pixels(first_time),
+ time2pixels(last_time)-time2pixels(first_time),
+ cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+ sprintf(cpu_string, "CPU %i", (int)cpu);
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+ 10+time2pixels(first_time), cpu2y(cpu) + SLOT_HEIGHT/2, cpu_string);
+
+ fprintf(svgfile, "<text transform=\"translate(%.8f,%.8f)\" font-size=\"1.25pt\">%s</text>\n",
+ 10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, cpu_model());
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace)
+{
+ double width;
+ const char *type;
+
+ if (!svgfile)
+ return;
+
+ if (svg_highlight && end - start >= svg_highlight)
+ type = "sample_hi";
+ else if (svg_highlight_name && strstr(name, svg_highlight_name))
+ type = "sample_hi";
+ else
+ type = "sample";
+
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), cpu2y(cpu));
+ fprintf(svgfile, "<title>%d %s running %s</title>\n", pid, name, time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+ fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(end)-time2pixels(start), SLOT_MULT+SLOT_HEIGHT, type);
+ width = time2pixels(end)-time2pixels(start);
+ if (width > 6)
+ width = 6;
+
+ width = round_text_size(width);
+
+ if (width > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\">%s</text>\n",
+ width, name);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_cstate(int cpu, u64 start, u64 end, int type)
+{
+ double width;
+ char style[128];
+
+ if (!svgfile)
+ return;
+
+
+ fprintf(svgfile, "<g>\n");
+
+ if (type > 6)
+ type = 6;
+ sprintf(style, "c%i", type);
+
+ fprintf(svgfile, "<rect class=\"%s\" x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\"/>\n",
+ style,
+ time2pixels(start), time2pixels(end)-time2pixels(start),
+ cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+ width = (time2pixels(end)-time2pixels(start))/2.0;
+ if (width > 6)
+ width = 6;
+
+ width = round_text_size(width);
+
+ if (width > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">C%i</text>\n",
+ time2pixels(start), cpu2y(cpu)+width, width, type);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+static char *HzToHuman(unsigned long hz)
+{
+ static char buffer[1024];
+ unsigned long long Hz;
+
+ memset(buffer, 0, 1024);
+
+ Hz = hz;
+
+ /* default: just put the Number in */
+ sprintf(buffer, "%9lli", Hz);
+
+ if (Hz > 1000)
+ sprintf(buffer, " %6lli Mhz", (Hz+500)/1000);
+
+ if (Hz > 1500000)
+ sprintf(buffer, " %6.2f Ghz", (Hz+5000.0)/1000000);
+
+ if (Hz == turbo_frequency)
+ sprintf(buffer, "Turbo");
+
+ return buffer;
+}
+
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq)
+{
+ double height = 0;
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+
+ if (max_freq)
+ height = freq * 1.0 / max_freq * (SLOT_HEIGHT + SLOT_MULT);
+ height = 1 + cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - height;
+ fprintf(svgfile, "<line x1=\"%.8f\" x2=\"%.8f\" y1=\"%.1f\" y2=\"%.1f\" class=\"pstate\"/>\n",
+ time2pixels(start), time2pixels(end), height, height);
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"0.25pt\">%s</text>\n",
+ time2pixels(start), height+0.9, HzToHuman(freq));
+
+ fprintf(svgfile, "</g>\n");
+}
+
+
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace)
+{
+ double height;
+
+ if (!svgfile)
+ return;
+
+
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<title>%s wakes up %s</title>\n",
+ desc1 ? desc1 : "?",
+ desc2 ? desc2 : "?");
+
+ if (backtrace)
+ fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+ if (row1 < row2) {
+ if (row1) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+ if (desc2)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+ time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_HEIGHT/48, desc2);
+ }
+ if (row2) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32, time2pixels(start), row2 * SLOT_MULT);
+ if (desc1)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+ time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32, desc1);
+ }
+ } else {
+ if (row2) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+ if (desc1)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+ time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/48, desc1);
+ }
+ if (row1) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row1 * SLOT_MULT - SLOT_MULT/32, time2pixels(start), row1 * SLOT_MULT);
+ if (desc2)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+ time2pixels(start), row1 * SLOT_MULT - SLOT_HEIGHT/32, desc2);
+ }
+ }
+ height = row1 * SLOT_MULT;
+ if (row2 > row1)
+ height += SLOT_HEIGHT;
+ if (row1)
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n",
+ time2pixels(start), height);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace)
+{
+ double height;
+
+ if (!svgfile)
+ return;
+
+
+ fprintf(svgfile, "<g>\n");
+
+ if (backtrace)
+ fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+ if (row1 < row2)
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row2 * SLOT_MULT);
+ else
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row1 * SLOT_MULT);
+
+ height = row1 * SLOT_MULT;
+ if (row2 > row1)
+ height += SLOT_HEIGHT;
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n",
+ time2pixels(start), height);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_interrupt(u64 start, int row, const char *backtrace)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<title>Wakeup from interrupt</title>\n");
+
+ if (backtrace)
+ fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n",
+ time2pixels(start), row * SLOT_MULT);
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n",
+ time2pixels(start), row * SLOT_MULT + SLOT_HEIGHT);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_text(int Yslot, u64 start, const char *text)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+ time2pixels(start), Yslot * SLOT_MULT+SLOT_HEIGHT/2, text);
+}
+
+static void svg_legenda_box(int X, const char *text, const char *style)
+{
+ double boxsize;
+ boxsize = SLOT_HEIGHT / 2;
+
+ fprintf(svgfile, "<rect x=\"%i\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+ X, boxsize, boxsize, style);
+ fprintf(svgfile, "<text transform=\"translate(%.8f, %.8f)\" font-size=\"%.8fpt\">%s</text>\n",
+ X + boxsize + 5, boxsize, 0.8 * boxsize, text);
+}
+
+void svg_io_legenda(void)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ svg_legenda_box(0, "Disk", "disk");
+ svg_legenda_box(100, "Network", "net");
+ svg_legenda_box(200, "Sync", "sync");
+ svg_legenda_box(300, "Poll", "poll");
+ svg_legenda_box(400, "Error", "error");
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_legenda(void)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ svg_legenda_box(0, "Running", "sample");
+ svg_legenda_box(100, "Idle","c1");
+ svg_legenda_box(200, "Deeper Idle", "c3");
+ svg_legenda_box(350, "Deepest Idle", "c6");
+ svg_legenda_box(550, "Sleeping", "process2");
+ svg_legenda_box(650, "Waiting for cpu", "waiting");
+ svg_legenda_box(800, "Blocked on IO", "blocked");
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_time_grid(double min_thickness)
+{
+ u64 i;
+
+ if (!svgfile)
+ return;
+
+ i = first_time;
+ while (i < last_time) {
+ int color = 220;
+ double thickness = 0.075;
+ if ((i % 100000000) == 0) {
+ thickness = 0.5;
+ color = 192;
+ }
+ if ((i % 1000000000) == 0) {
+ thickness = 2.0;
+ color = 128;
+ }
+
+ if (thickness >= min_thickness)
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%" PRIu64 "\" style=\"stroke:rgb(%i,%i,%i);stroke-width:%.3f\"/>\n",
+ time2pixels(i), SLOT_MULT/2, time2pixels(i),
+ total_height, color, color, color, thickness);
+
+ i += 10000000;
+ }
+}
+
+void svg_close(void)
+{
+ if (svgfile) {
+ fprintf(svgfile, "</svg>\n");
+ fclose(svgfile);
+ svgfile = NULL;
+ }
+}
+
+#define cpumask_bits(maskp) ((maskp)->bits)
+typedef struct { DECLARE_BITMAP(bits, MAX_NR_CPUS); } cpumask_t;
+
+struct topology {
+ cpumask_t *sib_core;
+ int sib_core_nr;
+ cpumask_t *sib_thr;
+ int sib_thr_nr;
+};
+
+static void scan_thread_topology(int *map, struct topology *t, int cpu,
+ int *pos, int nr_cpus)
+{
+ int i;
+ int thr;
+
+ for (i = 0; i < t->sib_thr_nr; i++) {
+ if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
+ continue;
+
+ for_each_set_bit(thr, cpumask_bits(&t->sib_thr[i]), nr_cpus)
+ if (map[thr] == -1)
+ map[thr] = (*pos)++;
+ }
+}
+
+static void scan_core_topology(int *map, struct topology *t, int nr_cpus)
+{
+ int pos = 0;
+ int i;
+ int cpu;
+
+ for (i = 0; i < t->sib_core_nr; i++)
+ for_each_set_bit(cpu, cpumask_bits(&t->sib_core[i]), nr_cpus)
+ scan_thread_topology(map, t, cpu, &pos, nr_cpus);
+}
+
+static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
+{
+ int i;
+ int ret = 0;
+ struct perf_cpu_map *m;
+ struct perf_cpu c;
+
+ m = perf_cpu_map__new(s);
+ if (!m)
+ return -1;
+
+ for (i = 0; i < perf_cpu_map__nr(m); i++) {
+ c = perf_cpu_map__cpu(m, i);
+ if (c.cpu >= nr_cpus) {
+ ret = -1;
+ break;
+ }
+
+ __set_bit(c.cpu, cpumask_bits(b));
+ }
+
+ perf_cpu_map__put(m);
+
+ return ret;
+}
+
+int svg_build_topology_map(struct perf_env *env)
+{
+ int i, nr_cpus;
+ struct topology t;
+ char *sib_core, *sib_thr;
+
+ nr_cpus = min(env->nr_cpus_online, MAX_NR_CPUS);
+
+ t.sib_core_nr = env->nr_sibling_cores;
+ t.sib_thr_nr = env->nr_sibling_threads;
+ t.sib_core = calloc(env->nr_sibling_cores, sizeof(cpumask_t));
+ t.sib_thr = calloc(env->nr_sibling_threads, sizeof(cpumask_t));
+
+ sib_core = env->sibling_cores;
+ sib_thr = env->sibling_threads;
+
+ if (!t.sib_core || !t.sib_thr) {
+ fprintf(stderr, "topology: no memory\n");
+ goto exit;
+ }
+
+ for (i = 0; i < env->nr_sibling_cores; i++) {
+ if (str_to_bitmap(sib_core, &t.sib_core[i], nr_cpus)) {
+ fprintf(stderr, "topology: can't parse siblings map\n");
+ goto exit;
+ }
+
+ sib_core += strlen(sib_core) + 1;
+ }
+
+ for (i = 0; i < env->nr_sibling_threads; i++) {
+ if (str_to_bitmap(sib_thr, &t.sib_thr[i], nr_cpus)) {
+ fprintf(stderr, "topology: can't parse siblings map\n");
+ goto exit;
+ }
+
+ sib_thr += strlen(sib_thr) + 1;
+ }
+
+ topology_map = malloc(sizeof(int) * nr_cpus);
+ if (!topology_map) {
+ fprintf(stderr, "topology: no memory\n");
+ goto exit;
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ topology_map[i] = -1;
+
+ scan_core_topology(topology_map, &t, nr_cpus);
+
+ return 0;
+
+exit:
+ zfree(&t.sib_core);
+ zfree(&t.sib_thr);
+
+ return -1;
+}
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
new file mode 100644
index 000000000..81823e8ba
--- /dev/null
+++ b/tools/perf/util/svghelper.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SVGHELPER_H
+#define __PERF_SVGHELPER_H
+
+#include <linux/types.h>
+
+struct perf_env;
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_box(int Yslot, u64 start, u64 end, const char *type);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
+void svg_cstate(int cpu, u64 start, u64 end, int type);
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+
+
+void svg_time_grid(double min_thickness);
+void svg_io_legenda(void);
+void svg_legenda(void);
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
+void svg_interrupt(u64 start, int row, const char *backtrace);
+void svg_text(int Yslot, u64 start, const char *text);
+void svg_close(void);
+int svg_build_topology_map(struct perf_env *env);
+
+extern int svg_page_width;
+extern u64 svg_highlight;
+extern const char *svg_highlight_name;
+
+#endif /* __PERF_SVGHELPER_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
new file mode 100644
index 000000000..95e99c332
--- /dev/null
+++ b/tools/perf/util/symbol-elf.c
@@ -0,0 +1,2915 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include "dso.h"
+#include "map.h"
+#include "maps.h"
+#include "symbol.h"
+#include "symsrc.h"
+#include "demangle-cxx.h"
+#include "demangle-ocaml.h"
+#include "demangle-java.h"
+#include "demangle-rust.h"
+#include "machine.h"
+#include "vdso.h"
+#include "debug.h"
+#include "util/copyfile.h"
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <symbol/kallsyms.h>
+#include <internal/lib.h>
+
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+#endif
+
+#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
+#ifndef DMGL_PARAMS
+#define DMGL_PARAMS (1 << 0) /* Include function args */
+#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
+#endif
+#endif
+
+#ifndef EM_AARCH64
+#define EM_AARCH64 183 /* ARM 64 bit */
+#endif
+
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH 258
+#endif
+
+#ifndef ELF32_ST_VISIBILITY
+#define ELF32_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
+
+/* For ELF64 the definitions are the same. */
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o)
+#endif
+
+/* How to extract information held in the st_other field. */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(val) ELF64_ST_VISIBILITY (val)
+#endif
+
+typedef Elf64_Nhdr GElf_Nhdr;
+
+
+#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
+static int elf_getphdrnum(Elf *elf, size_t *dst)
+{
+ GElf_Ehdr gehdr;
+ GElf_Ehdr *ehdr;
+
+ ehdr = gelf_getehdr(elf, &gehdr);
+ if (!ehdr)
+ return -1;
+
+ *dst = ehdr->e_phnum;
+
+ return 0;
+}
+#endif
+
+#ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT
+static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused)
+{
+ pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__);
+ return -1;
+}
+#endif
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @syms: struct elf_symtab instance to iterate
+ * @idx: uint32_t idx
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \
+ for (idx = 0, gelf_getsym(syms, idx, &sym);\
+ idx < nr_syms; \
+ idx++, gelf_getsym(syms, idx, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+ return GELF_ST_TYPE(sym->st_info);
+}
+
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym)
+{
+ return GELF_ST_VISIBILITY(sym->st_other);
+}
+
+#ifndef STT_GNU_IFUNC
+#define STT_GNU_IFUNC 10
+#endif
+
+static inline int elf_sym__is_function(const GElf_Sym *sym)
+{
+ return (elf_sym__type(sym) == STT_FUNC ||
+ elf_sym__type(sym) == STT_GNU_IFUNC) &&
+ sym->st_name != 0 &&
+ sym->st_shndx != SHN_UNDEF;
+}
+
+static inline bool elf_sym__is_object(const GElf_Sym *sym)
+{
+ return elf_sym__type(sym) == STT_OBJECT &&
+ sym->st_name != 0 &&
+ sym->st_shndx != SHN_UNDEF;
+}
+
+static inline int elf_sym__is_label(const GElf_Sym *sym)
+{
+ return elf_sym__type(sym) == STT_NOTYPE &&
+ sym->st_name != 0 &&
+ sym->st_shndx != SHN_UNDEF &&
+ sym->st_shndx != SHN_ABS &&
+ elf_sym__visibility(sym) != STV_HIDDEN &&
+ elf_sym__visibility(sym) != STV_INTERNAL;
+}
+
+static bool elf_sym__filter(GElf_Sym *sym)
+{
+ return elf_sym__is_function(sym) || elf_sym__is_object(sym);
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+ const Elf_Data *symstrs)
+{
+ return symstrs->d_buf + sym->st_name;
+}
+
+static inline const char *elf_sec__name(const GElf_Shdr *shdr,
+ const Elf_Data *secstrs)
+{
+ return secstrs->d_buf + shdr->sh_name;
+}
+
+static inline int elf_sec__is_text(const GElf_Shdr *shdr,
+ const Elf_Data *secstrs)
+{
+ return strstr(elf_sec__name(shdr, secstrs), "text") != NULL;
+}
+
+static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
+ const Elf_Data *secstrs)
+{
+ return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
+}
+
+static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs)
+{
+ return elf_sec__is_text(shdr, secstrs) ||
+ elf_sec__is_data(shdr, secstrs);
+}
+
+static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
+{
+ Elf_Scn *sec = NULL;
+ GElf_Shdr shdr;
+ size_t cnt = 1;
+
+ while ((sec = elf_nextscn(elf, sec)) != NULL) {
+ gelf_getshdr(sec, &shdr);
+
+ if ((addr >= shdr.sh_addr) &&
+ (addr < (shdr.sh_addr + shdr.sh_size)))
+ return cnt;
+
+ ++cnt;
+ }
+
+ return -1;
+}
+
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ GElf_Shdr *shp, const char *name, size_t *idx)
+{
+ Elf_Scn *sec = NULL;
+ size_t cnt = 1;
+
+ /* ELF is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL))
+ return NULL;
+
+ while ((sec = elf_nextscn(elf, sec)) != NULL) {
+ char *str;
+
+ gelf_getshdr(sec, shp);
+ str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+ if (str && !strcmp(name, str)) {
+ if (idx)
+ *idx = cnt;
+ return sec;
+ }
+ ++cnt;
+ }
+
+ return NULL;
+}
+
+bool filename__has_section(const char *filename, const char *sec)
+{
+ int fd;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ bool found = false;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return false;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ goto out;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto elf_out;
+
+ found = !!elf_section_by_name(elf, &ehdr, &shdr, sec, NULL);
+
+elf_out:
+ elf_end(elf);
+out:
+ close(fd);
+ return found;
+}
+
+static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
+{
+ size_t i, phdrnum;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, phdr) == NULL)
+ return -1;
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ sz = max(phdr->p_memsz, phdr->p_filesz);
+ if (!sz)
+ continue;
+
+ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
+ return 0;
+ }
+
+ /* Not found any valid program header */
+ return -1;
+}
+
+static bool want_demangle(bool is_kernel_sym)
+{
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+/*
+ * Demangle C++ function signature, typically replaced by demangle-cxx.cpp
+ * version.
+ */
+__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
+ bool modifiers __maybe_unused)
+{
+#ifdef HAVE_LIBBFD_SUPPORT
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return bfd_demangle(NULL, str, flags);
+#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return cplus_demangle(str, flags);
+#else
+ return NULL;
+#endif
+}
+
+static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+ char *demangled = NULL;
+
+ /*
+ * We need to figure out if the object was created from C++ sources
+ * DWARF DW_compile_unit has this, but we don't always have access
+ * to it...
+ */
+ if (!want_demangle(dso->kernel || kmodule))
+ return demangled;
+
+ demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0);
+ if (demangled == NULL) {
+ demangled = ocaml_demangle_sym(elf_name);
+ if (demangled == NULL) {
+ demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+ }
+ }
+ else if (rust_is_mangled(demangled))
+ /*
+ * Input to Rust demangling is the BFD-demangled
+ * name which it Rust-demangles in place.
+ */
+ rust_demangle_sym(demangled);
+
+ return demangled;
+}
+
+struct rel_info {
+ u32 nr_entries;
+ u32 *sorted;
+ bool is_rela;
+ Elf_Data *reldata;
+ GElf_Rela rela;
+ GElf_Rel rel;
+};
+
+static u32 get_rel_symidx(struct rel_info *ri, u32 idx)
+{
+ idx = ri->sorted ? ri->sorted[idx] : idx;
+ if (ri->is_rela) {
+ gelf_getrela(ri->reldata, idx, &ri->rela);
+ return GELF_R_SYM(ri->rela.r_info);
+ }
+ gelf_getrel(ri->reldata, idx, &ri->rel);
+ return GELF_R_SYM(ri->rel.r_info);
+}
+
+static u64 get_rel_offset(struct rel_info *ri, u32 x)
+{
+ if (ri->is_rela) {
+ GElf_Rela rela;
+
+ gelf_getrela(ri->reldata, x, &rela);
+ return rela.r_offset;
+ } else {
+ GElf_Rel rel;
+
+ gelf_getrel(ri->reldata, x, &rel);
+ return rel.r_offset;
+ }
+}
+
+static int rel_cmp(const void *a, const void *b, void *r)
+{
+ struct rel_info *ri = r;
+ u64 a_offset = get_rel_offset(ri, *(const u32 *)a);
+ u64 b_offset = get_rel_offset(ri, *(const u32 *)b);
+
+ return a_offset < b_offset ? -1 : (a_offset > b_offset ? 1 : 0);
+}
+
+static int sort_rel(struct rel_info *ri)
+{
+ size_t sz = sizeof(ri->sorted[0]);
+ u32 i;
+
+ ri->sorted = calloc(ri->nr_entries, sz);
+ if (!ri->sorted)
+ return -1;
+ for (i = 0; i < ri->nr_entries; i++)
+ ri->sorted[i] = i;
+ qsort_r(ri->sorted, ri->nr_entries, sz, rel_cmp, ri);
+ return 0;
+}
+
+/*
+ * For x86_64, the GNU linker is putting IFUNC information in the relocation
+ * addend.
+ */
+static bool addend_may_be_ifunc(GElf_Ehdr *ehdr, struct rel_info *ri)
+{
+ return ehdr->e_machine == EM_X86_64 && ri->is_rela &&
+ GELF_R_TYPE(ri->rela.r_info) == R_X86_64_IRELATIVE;
+}
+
+static bool get_ifunc_name(Elf *elf, struct dso *dso, GElf_Ehdr *ehdr,
+ struct rel_info *ri, char *buf, size_t buf_sz)
+{
+ u64 addr = ri->rela.r_addend;
+ struct symbol *sym;
+ GElf_Phdr phdr;
+
+ if (!addend_may_be_ifunc(ehdr, ri))
+ return false;
+
+ if (elf_read_program_header(elf, addr, &phdr))
+ return false;
+
+ addr -= phdr.p_vaddr - phdr.p_offset;
+
+ sym = dso__find_symbol_nocache(dso, addr);
+
+ /* Expecting the address to be an IFUNC or IFUNC alias */
+ if (!sym || sym->start != addr || (sym->type != STT_GNU_IFUNC && !sym->ifunc_alias))
+ return false;
+
+ snprintf(buf, buf_sz, "%s@plt", sym->name);
+
+ return true;
+}
+
+static void exit_rel(struct rel_info *ri)
+{
+ zfree(&ri->sorted);
+}
+
+static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt,
+ u64 *plt_header_size, u64 *plt_entry_size)
+{
+ switch (ehdr->e_machine) {
+ case EM_ARM:
+ *plt_header_size = 20;
+ *plt_entry_size = 12;
+ return true;
+ case EM_AARCH64:
+ *plt_header_size = 32;
+ *plt_entry_size = 16;
+ return true;
+ case EM_LOONGARCH:
+ *plt_header_size = 32;
+ *plt_entry_size = 16;
+ return true;
+ case EM_SPARC:
+ *plt_header_size = 48;
+ *plt_entry_size = 12;
+ return true;
+ case EM_SPARCV9:
+ *plt_header_size = 128;
+ *plt_entry_size = 32;
+ return true;
+ case EM_386:
+ case EM_X86_64:
+ *plt_entry_size = shdr_plt->sh_entsize;
+ /* Size is 8 or 16, if not, assume alignment indicates size */
+ if (*plt_entry_size != 8 && *plt_entry_size != 16)
+ *plt_entry_size = shdr_plt->sh_addralign == 8 ? 8 : 16;
+ *plt_header_size = *plt_entry_size;
+ break;
+ default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
+ *plt_header_size = shdr_plt->sh_entsize;
+ *plt_entry_size = shdr_plt->sh_entsize;
+ break;
+ }
+ if (*plt_entry_size)
+ return true;
+ pr_debug("Missing PLT entry size for %s\n", dso->long_name);
+ return false;
+}
+
+static bool machine_is_x86(GElf_Half e_machine)
+{
+ return e_machine == EM_386 || e_machine == EM_X86_64;
+}
+
+struct rela_dyn {
+ GElf_Addr offset;
+ u32 sym_idx;
+};
+
+struct rela_dyn_info {
+ struct dso *dso;
+ Elf_Data *plt_got_data;
+ u32 nr_entries;
+ struct rela_dyn *sorted;
+ Elf_Data *dynsym_data;
+ Elf_Data *dynstr_data;
+ Elf_Data *rela_dyn_data;
+};
+
+static void exit_rela_dyn(struct rela_dyn_info *di)
+{
+ zfree(&di->sorted);
+}
+
+static int cmp_offset(const void *a, const void *b)
+{
+ const struct rela_dyn *va = a;
+ const struct rela_dyn *vb = b;
+
+ return va->offset < vb->offset ? -1 : (va->offset > vb->offset ? 1 : 0);
+}
+
+static int sort_rela_dyn(struct rela_dyn_info *di)
+{
+ u32 i, n;
+
+ di->sorted = calloc(di->nr_entries, sizeof(di->sorted[0]));
+ if (!di->sorted)
+ return -1;
+
+ /* Get data for sorting: the offset and symbol index */
+ for (i = 0, n = 0; i < di->nr_entries; i++) {
+ GElf_Rela rela;
+ u32 sym_idx;
+
+ gelf_getrela(di->rela_dyn_data, i, &rela);
+ sym_idx = GELF_R_SYM(rela.r_info);
+ if (sym_idx) {
+ di->sorted[n].sym_idx = sym_idx;
+ di->sorted[n].offset = rela.r_offset;
+ n += 1;
+ }
+ }
+
+ /* Sort by offset */
+ di->nr_entries = n;
+ qsort(di->sorted, n, sizeof(di->sorted[0]), cmp_offset);
+
+ return 0;
+}
+
+static void get_rela_dyn_info(Elf *elf, GElf_Ehdr *ehdr, struct rela_dyn_info *di, Elf_Scn *scn)
+{
+ GElf_Shdr rela_dyn_shdr;
+ GElf_Shdr shdr;
+
+ di->plt_got_data = elf_getdata(scn, NULL);
+
+ scn = elf_section_by_name(elf, ehdr, &rela_dyn_shdr, ".rela.dyn", NULL);
+ if (!scn || !rela_dyn_shdr.sh_link || !rela_dyn_shdr.sh_entsize)
+ return;
+
+ di->nr_entries = rela_dyn_shdr.sh_size / rela_dyn_shdr.sh_entsize;
+ di->rela_dyn_data = elf_getdata(scn, NULL);
+
+ scn = elf_getscn(elf, rela_dyn_shdr.sh_link);
+ if (!scn || !gelf_getshdr(scn, &shdr) || !shdr.sh_link)
+ return;
+
+ di->dynsym_data = elf_getdata(scn, NULL);
+ di->dynstr_data = elf_getdata(elf_getscn(elf, shdr.sh_link), NULL);
+
+ if (!di->plt_got_data || !di->dynstr_data || !di->dynsym_data || !di->rela_dyn_data)
+ return;
+
+ /* Sort into offset order */
+ sort_rela_dyn(di);
+}
+
+/* Get instruction displacement from a plt entry for x86_64 */
+static u32 get_x86_64_plt_disp(const u8 *p)
+{
+ u8 endbr64[] = {0xf3, 0x0f, 0x1e, 0xfa};
+ int n = 0;
+
+ /* Skip endbr64 */
+ if (!memcmp(p, endbr64, sizeof(endbr64)))
+ n += sizeof(endbr64);
+ /* Skip bnd prefix */
+ if (p[n] == 0xf2)
+ n += 1;
+ /* jmp with 4-byte displacement */
+ if (p[n] == 0xff && p[n + 1] == 0x25) {
+ u32 disp;
+
+ n += 2;
+ /* Also add offset from start of entry to end of instruction */
+ memcpy(&disp, p + n, sizeof(disp));
+ return n + 4 + le32toh(disp);
+ }
+ return 0;
+}
+
+static bool get_plt_got_name(GElf_Shdr *shdr, size_t i,
+ struct rela_dyn_info *di,
+ char *buf, size_t buf_sz)
+{
+ struct rela_dyn vi, *vr;
+ const char *sym_name;
+ char *demangled;
+ GElf_Sym sym;
+ bool result;
+ u32 disp;
+
+ if (!di->sorted)
+ return false;
+
+ disp = get_x86_64_plt_disp(di->plt_got_data->d_buf + i);
+ if (!disp)
+ return false;
+
+ /* Compute target offset of the .plt.got entry */
+ vi.offset = shdr->sh_offset + di->plt_got_data->d_off + i + disp;
+
+ /* Find that offset in .rela.dyn (sorted by offset) */
+ vr = bsearch(&vi, di->sorted, di->nr_entries, sizeof(di->sorted[0]), cmp_offset);
+ if (!vr)
+ return false;
+
+ /* Get the associated symbol */
+ gelf_getsym(di->dynsym_data, vr->sym_idx, &sym);
+ sym_name = elf_sym__name(&sym, di->dynstr_data);
+ demangled = demangle_sym(di->dso, 0, sym_name);
+ if (demangled != NULL)
+ sym_name = demangled;
+
+ snprintf(buf, buf_sz, "%s@plt", sym_name);
+
+ result = *sym_name;
+
+ free(demangled);
+
+ return result;
+}
+
+static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf,
+ GElf_Ehdr *ehdr,
+ char *buf, size_t buf_sz)
+{
+ struct rela_dyn_info di = { .dso = dso };
+ struct symbol *sym;
+ GElf_Shdr shdr;
+ Elf_Scn *scn;
+ int err = -1;
+ size_t i;
+
+ scn = elf_section_by_name(elf, ehdr, &shdr, ".plt.got", NULL);
+ if (!scn || !shdr.sh_entsize)
+ return 0;
+
+ if (ehdr->e_machine == EM_X86_64)
+ get_rela_dyn_info(elf, ehdr, &di, scn);
+
+ for (i = 0; i < shdr.sh_size; i += shdr.sh_entsize) {
+ if (!get_plt_got_name(&shdr, i, &di, buf, buf_sz))
+ snprintf(buf, buf_sz, "offset_%#" PRIx64 "@plt", (u64)shdr.sh_offset + i);
+ sym = symbol__new(shdr.sh_offset + i, shdr.sh_entsize, STB_GLOBAL, STT_FUNC, buf);
+ if (!sym)
+ goto out;
+ symbols__insert(&dso->symbols, sym);
+ }
+ err = 0;
+out:
+ exit_rela_dyn(&di);
+ return err;
+}
+
+/*
+ * We need to check if we have a .dynsym, so that we can handle the
+ * .plt, synthesizing its symbols, that aren't on the symtabs (be it
+ * .dynsym or .symtab).
+ * And always look at the original dso, not at debuginfo packages, that
+ * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
+ */
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
+{
+ uint32_t idx;
+ GElf_Sym sym;
+ u64 plt_offset, plt_header_size, plt_entry_size;
+ GElf_Shdr shdr_plt, plt_sec_shdr;
+ struct symbol *f, *plt_sym;
+ GElf_Shdr shdr_rel_plt, shdr_dynsym;
+ Elf_Data *syms, *symstrs;
+ Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym;
+ GElf_Ehdr ehdr;
+ char sympltname[1024];
+ Elf *elf;
+ int nr = 0, err = -1;
+ struct rel_info ri = { .is_rela = false };
+ bool lazy_plt;
+
+ elf = ss->elf;
+ ehdr = ss->ehdr;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL))
+ return 0;
+
+ /*
+ * A symbol from a previous section (e.g. .init) can have been expanded
+ * by symbols__fixup_end() to overlap .plt. Truncate it before adding
+ * a symbol for .plt header.
+ */
+ f = dso__find_symbol_nocache(dso, shdr_plt.sh_offset);
+ if (f && f->start < shdr_plt.sh_offset && f->end > shdr_plt.sh_offset)
+ f->end = shdr_plt.sh_offset;
+
+ if (!get_plt_sizes(dso, &ehdr, &shdr_plt, &plt_header_size, &plt_entry_size))
+ return 0;
+
+ /* Add a symbol for .plt header */
+ plt_sym = symbol__new(shdr_plt.sh_offset, plt_header_size, STB_GLOBAL, STT_FUNC, ".plt");
+ if (!plt_sym)
+ goto out_elf_end;
+ symbols__insert(&dso->symbols, plt_sym);
+
+ /* Only x86 has .plt.got */
+ if (machine_is_x86(ehdr.e_machine) &&
+ dso__synthesize_plt_got_symbols(dso, elf, &ehdr, sympltname, sizeof(sympltname)))
+ goto out_elf_end;
+
+ /* Only x86 has .plt.sec */
+ if (machine_is_x86(ehdr.e_machine) &&
+ elf_section_by_name(elf, &ehdr, &plt_sec_shdr, ".plt.sec", NULL)) {
+ if (!get_plt_sizes(dso, &ehdr, &plt_sec_shdr, &plt_header_size, &plt_entry_size))
+ return 0;
+ /* Extend .plt symbol to entire .plt */
+ plt_sym->end = plt_sym->start + shdr_plt.sh_size;
+ /* Use .plt.sec offset */
+ plt_offset = plt_sec_shdr.sh_offset;
+ lazy_plt = false;
+ } else {
+ plt_offset = shdr_plt.sh_offset;
+ lazy_plt = true;
+ }
+
+ scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+ ".rela.plt", NULL);
+ if (scn_plt_rel == NULL) {
+ scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+ ".rel.plt", NULL);
+ if (scn_plt_rel == NULL)
+ return 0;
+ }
+
+ if (shdr_rel_plt.sh_type != SHT_RELA &&
+ shdr_rel_plt.sh_type != SHT_REL)
+ return 0;
+
+ if (!shdr_rel_plt.sh_link)
+ return 0;
+
+ if (shdr_rel_plt.sh_link == ss->dynsym_idx) {
+ scn_dynsym = ss->dynsym;
+ shdr_dynsym = ss->dynshdr;
+ } else if (shdr_rel_plt.sh_link == ss->symtab_idx) {
+ /*
+ * A static executable can have a .plt due to IFUNCs, in which
+ * case .symtab is used not .dynsym.
+ */
+ scn_dynsym = ss->symtab;
+ shdr_dynsym = ss->symshdr;
+ } else {
+ goto out_elf_end;
+ }
+
+ if (!scn_dynsym)
+ return 0;
+
+ /*
+ * Fetch the relocation section to find the idxes to the GOT
+ * and the symbols in the .dynsym they refer to.
+ */
+ ri.reldata = elf_getdata(scn_plt_rel, NULL);
+ if (!ri.reldata)
+ goto out_elf_end;
+
+ syms = elf_getdata(scn_dynsym, NULL);
+ if (syms == NULL)
+ goto out_elf_end;
+
+ scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link);
+ if (scn_symstrs == NULL)
+ goto out_elf_end;
+
+ symstrs = elf_getdata(scn_symstrs, NULL);
+ if (symstrs == NULL)
+ goto out_elf_end;
+
+ if (symstrs->d_size == 0)
+ goto out_elf_end;
+
+ ri.nr_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
+
+ ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA;
+
+ if (lazy_plt) {
+ /*
+ * Assume a .plt with the same number of entries as the number
+ * of relocation entries is not lazy and does not have a header.
+ */
+ if (ri.nr_entries * plt_entry_size == shdr_plt.sh_size)
+ dso__delete_symbol(dso, plt_sym);
+ else
+ plt_offset += plt_header_size;
+ }
+
+ /*
+ * x86 doesn't insert IFUNC relocations in .plt order, so sort to get
+ * back in order.
+ */
+ if (machine_is_x86(ehdr.e_machine) && sort_rel(&ri))
+ goto out_elf_end;
+
+ for (idx = 0; idx < ri.nr_entries; idx++) {
+ const char *elf_name = NULL;
+ char *demangled = NULL;
+
+ gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym);
+
+ elf_name = elf_sym__name(&sym, symstrs);
+ demangled = demangle_sym(dso, 0, elf_name);
+ if (demangled)
+ elf_name = demangled;
+ if (*elf_name)
+ snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name);
+ else if (!get_ifunc_name(elf, dso, &ehdr, &ri, sympltname, sizeof(sympltname)))
+ snprintf(sympltname, sizeof(sympltname),
+ "offset_%#" PRIx64 "@plt", plt_offset);
+ free(demangled);
+
+ f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, STT_FUNC, sympltname);
+ if (!f)
+ goto out_elf_end;
+
+ plt_offset += plt_entry_size;
+ symbols__insert(&dso->symbols, f);
+ ++nr;
+ }
+
+ err = 0;
+out_elf_end:
+ exit_rel(&ri);
+ if (err == 0)
+ return nr;
+ pr_debug("%s: problems reading %s PLT info.\n",
+ __func__, dso->long_name);
+ return 0;
+}
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+ return demangle_sym(dso, kmodule, elf_name);
+}
+
+/*
+ * Align offset to 4 bytes as needed for note name and descriptor data.
+ */
+#define NOTE_ALIGN(n) (((n) + 3) & -4U)
+
+static int elf_read_build_id(Elf *elf, void *bf, size_t size)
+{
+ int err = -1;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *sec;
+ Elf_Kind ek;
+ void *ptr;
+
+ if (size < BUILD_ID_SIZE)
+ goto out;
+
+ ek = elf_kind(elf);
+ if (ek != ELF_K_ELF)
+ goto out;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ pr_err("%s: cannot get elf header.\n", __func__);
+ goto out;
+ }
+
+ /*
+ * Check following sections for notes:
+ * '.note.gnu.build-id'
+ * '.notes'
+ * '.note' (VDSO specific)
+ */
+ do {
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".note.gnu.build-id", NULL);
+ if (sec)
+ break;
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".notes", NULL);
+ if (sec)
+ break;
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".note", NULL);
+ if (sec)
+ break;
+
+ return err;
+
+ } while (0);
+
+ data = elf_getdata(sec, NULL);
+ if (data == NULL)
+ goto out;
+
+ ptr = data->d_buf;
+ while (ptr < (data->d_buf + data->d_size)) {
+ GElf_Nhdr *nhdr = ptr;
+ size_t namesz = NOTE_ALIGN(nhdr->n_namesz),
+ descsz = NOTE_ALIGN(nhdr->n_descsz);
+ const char *name;
+
+ ptr += sizeof(*nhdr);
+ name = ptr;
+ ptr += namesz;
+ if (nhdr->n_type == NT_GNU_BUILD_ID &&
+ nhdr->n_namesz == sizeof("GNU")) {
+ if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+ size_t sz = min(size, descsz);
+ memcpy(bf, ptr, sz);
+ memset(bf + sz, 0, size - sz);
+ err = sz;
+ break;
+ }
+ }
+ ptr += descsz;
+ }
+
+out:
+ return err;
+}
+
+#ifdef HAVE_LIBBFD_BUILDID_SUPPORT
+
+static int read_build_id(const char *filename, struct build_id *bid)
+{
+ size_t size = sizeof(bid->data);
+ int err = -1;
+ bfd *abfd;
+
+ abfd = bfd_openr(filename, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ if (!abfd->build_id || abfd->build_id->size > size)
+ goto out_close;
+
+ memcpy(bid->data, abfd->build_id->data, abfd->build_id->size);
+ memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size);
+ err = bid->size = abfd->build_id->size;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+#else // HAVE_LIBBFD_BUILDID_SUPPORT
+
+static int read_build_id(const char *filename, struct build_id *bid)
+{
+ size_t size = sizeof(bid->data);
+ int fd, err = -1;
+ Elf *elf;
+
+ if (size < BUILD_ID_SIZE)
+ goto out;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ err = elf_read_build_id(elf, bid->data, size);
+ if (err > 0)
+ bid->size = err;
+
+ elf_end(elf);
+out_close:
+ close(fd);
+out:
+ return err;
+}
+
+#endif // HAVE_LIBBFD_BUILDID_SUPPORT
+
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+ struct kmod_path m = { .name = NULL, };
+ char path[PATH_MAX];
+ int err;
+
+ if (!filename)
+ return -EFAULT;
+
+ err = kmod_path__parse(&m, filename);
+ if (err)
+ return -1;
+
+ if (m.comp) {
+ int error = 0, fd;
+
+ fd = filename__decompress(filename, path, sizeof(path), m.comp, &error);
+ if (fd < 0) {
+ pr_debug("Failed to decompress (error %d) %s\n",
+ error, filename);
+ return -1;
+ }
+ close(fd);
+ filename = path;
+ }
+
+ err = read_build_id(filename, bid);
+
+ if (m.comp)
+ unlink(filename);
+ return err;
+}
+
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
+{
+ size_t size = sizeof(bid->data);
+ int fd, err = -1;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ while (1) {
+ char bf[BUFSIZ];
+ GElf_Nhdr nhdr;
+ size_t namesz, descsz;
+
+ if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
+ break;
+
+ namesz = NOTE_ALIGN(nhdr.n_namesz);
+ descsz = NOTE_ALIGN(nhdr.n_descsz);
+ if (nhdr.n_type == NT_GNU_BUILD_ID &&
+ nhdr.n_namesz == sizeof("GNU")) {
+ if (read(fd, bf, namesz) != (ssize_t)namesz)
+ break;
+ if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
+ size_t sz = min(descsz, size);
+ if (read(fd, bid->data, sz) == (ssize_t)sz) {
+ memset(bid->data + sz, 0, size - sz);
+ bid->size = sz;
+ err = 0;
+ break;
+ }
+ } else if (read(fd, bf, descsz) != (ssize_t)descsz)
+ break;
+ } else {
+ int n = namesz + descsz;
+
+ if (n > (int)sizeof(bf)) {
+ n = sizeof(bf);
+ pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
+ __func__, filename, nhdr.n_namesz, nhdr.n_descsz);
+ }
+ if (read(fd, bf, n) != n)
+ break;
+ }
+ }
+ close(fd);
+out:
+ return err;
+}
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size)
+{
+ int err = -1;
+ asection *section;
+ bfd *abfd;
+
+ abfd = bfd_openr(filename, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ section = bfd_get_section_by_name(abfd, ".gnu_debuglink");
+ if (!section)
+ goto out_close;
+
+ if (section->size > size)
+ goto out_close;
+
+ if (!bfd_get_section_contents(abfd, section, debuglink, 0,
+ section->size))
+ goto out_close;
+
+ err = 0;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+#else
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size)
+{
+ int fd, err = -1;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *sec;
+ Elf_Kind ek;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ ek = elf_kind(elf);
+ if (ek != ELF_K_ELF)
+ goto out_elf_end;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ pr_err("%s: cannot get elf header.\n", __func__);
+ goto out_elf_end;
+ }
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".gnu_debuglink", NULL);
+ if (sec == NULL)
+ goto out_elf_end;
+
+ data = elf_getdata(sec, NULL);
+ if (data == NULL)
+ goto out_elf_end;
+
+ /* the start of this section is a zero-terminated string */
+ strncpy(debuglink, data->d_buf, size);
+
+ err = 0;
+
+out_elf_end:
+ elf_end(elf);
+out_close:
+ close(fd);
+out:
+ return err;
+}
+
+#endif
+
+static int dso__swap_init(struct dso *dso, unsigned char eidata)
+{
+ static unsigned int const endian = 1;
+
+ dso->needs_swap = DSO_SWAP__NO;
+
+ switch (eidata) {
+ case ELFDATA2LSB:
+ /* We are big endian, DSO is little endian. */
+ if (*(unsigned char const *)&endian != 1)
+ dso->needs_swap = DSO_SWAP__YES;
+ break;
+
+ case ELFDATA2MSB:
+ /* We are little endian, DSO is big endian. */
+ if (*(unsigned char const *)&endian != 0)
+ dso->needs_swap = DSO_SWAP__YES;
+ break;
+
+ default:
+ pr_err("unrecognized DSO data encoding %d\n", eidata);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss)
+{
+ return ss->dynsym || ss->opdsec;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss)
+{
+ return ss->symtab != NULL;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+ zfree(&ss->name);
+ elf_end(ss->elf);
+ close(ss->fd);
+}
+
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+ /*
+ * Usually vmlinux is an ELF file with type ET_EXEC for most
+ * architectures; except Arm64 kernel is linked with option
+ * '-share', so need to check type ET_DYN.
+ */
+ return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL ||
+ ehdr.e_type == ET_DYN;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+ enum dso_binary_type type)
+{
+ GElf_Ehdr ehdr;
+ Elf *elf;
+ int fd;
+
+ if (dso__needs_decompress(dso)) {
+ fd = dso__decompress_kmodule_fd(dso, name);
+ if (fd < 0)
+ return -1;
+
+ type = dso->symtab_type;
+ } else {
+ fd = open(name, O_RDONLY);
+ if (fd < 0) {
+ dso->load_errno = errno;
+ return -1;
+ }
+ }
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+ dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+ goto out_close;
+ }
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+ pr_debug("%s: cannot get elf header.\n", __func__);
+ goto out_elf_end;
+ }
+
+ if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) {
+ dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR;
+ goto out_elf_end;
+ }
+
+ /* Always reject images with a mismatched build-id: */
+ if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
+ u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
+ int size;
+
+ size = elf_read_build_id(elf, build_id, BUILD_ID_SIZE);
+ if (size <= 0) {
+ dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID;
+ goto out_elf_end;
+ }
+
+ build_id__init(&bid, build_id, size);
+ if (!dso__build_id_equal(dso, &bid)) {
+ pr_debug("%s: build id mismatch for %s.\n", __func__, name);
+ dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
+ goto out_elf_end;
+ }
+ }
+
+ ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+ ss->symtab_idx = 0;
+ ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab",
+ &ss->symtab_idx);
+ if (ss->symshdr.sh_type != SHT_SYMTAB)
+ ss->symtab = NULL;
+
+ ss->dynsym_idx = 0;
+ ss->dynsym = elf_section_by_name(elf, &ehdr, &ss->dynshdr, ".dynsym",
+ &ss->dynsym_idx);
+ if (ss->dynshdr.sh_type != SHT_DYNSYM)
+ ss->dynsym = NULL;
+
+ ss->opdidx = 0;
+ ss->opdsec = elf_section_by_name(elf, &ehdr, &ss->opdshdr, ".opd",
+ &ss->opdidx);
+ if (ss->opdshdr.sh_type != SHT_PROGBITS)
+ ss->opdsec = NULL;
+
+ if (dso->kernel == DSO_SPACE__USER)
+ ss->adjust_symbols = true;
+ else
+ ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
+
+ ss->name = strdup(name);
+ if (!ss->name) {
+ dso->load_errno = errno;
+ goto out_elf_end;
+ }
+
+ ss->elf = elf;
+ ss->fd = fd;
+ ss->ehdr = ehdr;
+ ss->type = type;
+
+ return 0;
+
+out_elf_end:
+ elf_end(elf);
+out_close:
+ close(fd);
+ return -1;
+}
+
+/**
+ * ref_reloc_sym_not_found - has kernel relocation symbol been found.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns %true if we are dealing with the kernel maps and the
+ * relocation reference symbol has not yet been found. Otherwise %false is
+ * returned.
+ */
+static bool ref_reloc_sym_not_found(struct kmap *kmap)
+{
+ return kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
+ !kmap->ref_reloc_sym->unrelocated_addr;
+}
+
+/**
+ * ref_reloc - kernel relocation offset.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns the offset of kernel addresses as determined by using
+ * the relocation reference symbol i.e. if the kernel has not been relocated
+ * then the return value is zero.
+ */
+static u64 ref_reloc(struct kmap *kmap)
+{
+ if (kmap && kmap->ref_reloc_sym &&
+ kmap->ref_reloc_sym->unrelocated_addr)
+ return kmap->ref_reloc_sym->addr -
+ kmap->ref_reloc_sym->unrelocated_addr;
+ return 0;
+}
+
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+ GElf_Sym *sym __maybe_unused) { }
+
+static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
+ GElf_Sym *sym, GElf_Shdr *shdr,
+ struct maps *kmaps, struct kmap *kmap,
+ struct dso **curr_dsop, struct map **curr_mapp,
+ const char *section_name,
+ bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
+{
+ struct dso *curr_dso = *curr_dsop;
+ struct map *curr_map;
+ char dso_name[PATH_MAX];
+
+ /* Adjust symbol to map to file offset */
+ if (adjust_kernel_syms)
+ sym->st_value -= shdr->sh_addr - shdr->sh_offset;
+
+ if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0)
+ return 0;
+
+ if (strcmp(section_name, ".text") == 0) {
+ /*
+ * The initial kernel mapping is based on
+ * kallsyms and identity maps. Overwrite it to
+ * map to the kernel dso.
+ */
+ if (*remap_kernel && dso->kernel && !kmodule) {
+ *remap_kernel = false;
+ map__set_start(map, shdr->sh_addr + ref_reloc(kmap));
+ map__set_end(map, map__start(map) + shdr->sh_size);
+ map__set_pgoff(map, shdr->sh_offset);
+ map__set_map_ip(map, map__dso_map_ip);
+ map__set_unmap_ip(map, map__dso_unmap_ip);
+ /* Ensure maps are correctly ordered */
+ if (kmaps) {
+ int err;
+ struct map *tmp = map__get(map);
+
+ maps__remove(kmaps, map);
+ err = maps__insert(kmaps, map);
+ map__put(tmp);
+ if (err)
+ return err;
+ }
+ }
+
+ /*
+ * The initial module mapping is based on
+ * /proc/modules mapped to offset zero.
+ * Overwrite it to map to the module dso.
+ */
+ if (*remap_kernel && kmodule) {
+ *remap_kernel = false;
+ map__set_pgoff(map, shdr->sh_offset);
+ }
+
+ *curr_mapp = map;
+ *curr_dsop = dso;
+ return 0;
+ }
+
+ if (!kmap)
+ return 0;
+
+ snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
+
+ curr_map = maps__find_by_name(kmaps, dso_name);
+ if (curr_map == NULL) {
+ u64 start = sym->st_value;
+
+ if (kmodule)
+ start += map__start(map) + shdr->sh_offset;
+
+ curr_dso = dso__new(dso_name);
+ if (curr_dso == NULL)
+ return -1;
+ curr_dso->kernel = dso->kernel;
+ curr_dso->long_name = dso->long_name;
+ curr_dso->long_name_len = dso->long_name_len;
+ curr_dso->binary_type = dso->binary_type;
+ curr_dso->adjust_symbols = dso->adjust_symbols;
+ curr_map = map__new2(start, curr_dso);
+ dso__put(curr_dso);
+ if (curr_map == NULL)
+ return -1;
+
+ if (curr_dso->kernel)
+ map__kmap(curr_map)->kmaps = kmaps;
+
+ if (adjust_kernel_syms) {
+ map__set_start(curr_map, shdr->sh_addr + ref_reloc(kmap));
+ map__set_end(curr_map, map__start(curr_map) + shdr->sh_size);
+ map__set_pgoff(curr_map, shdr->sh_offset);
+ } else {
+ map__set_map_ip(curr_map, identity__map_ip);
+ map__set_unmap_ip(curr_map, identity__map_ip);
+ }
+ curr_dso->symtab_type = dso->symtab_type;
+ if (maps__insert(kmaps, curr_map))
+ return -1;
+ /*
+ * Add it before we drop the reference to curr_map, i.e. while
+ * we still are sure to have a reference to this DSO via
+ * *curr_map->dso.
+ */
+ dsos__add(&maps__machine(kmaps)->dsos, curr_dso);
+ /* kmaps already got it */
+ map__put(curr_map);
+ dso__set_loaded(curr_dso);
+ *curr_mapp = curr_map;
+ *curr_dsop = curr_dso;
+ } else
+ *curr_dsop = map__dso(curr_map);
+
+ return 0;
+}
+
+static int
+dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ struct symsrc *runtime_ss, int kmodule, int dynsym)
+{
+ struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
+ struct maps *kmaps = kmap ? map__kmaps(map) : NULL;
+ struct map *curr_map = map;
+ struct dso *curr_dso = dso;
+ Elf_Data *symstrs, *secstrs, *secstrs_run, *secstrs_sym;
+ uint32_t nr_syms;
+ int err = -1;
+ uint32_t idx;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ GElf_Shdr tshdr;
+ Elf_Data *syms, *opddata = NULL;
+ GElf_Sym sym;
+ Elf_Scn *sec, *sec_strndx;
+ Elf *elf;
+ int nr = 0;
+ bool remap_kernel = false, adjust_kernel_syms = false;
+
+ if (kmap && !kmaps)
+ return -1;
+
+ elf = syms_ss->elf;
+ ehdr = syms_ss->ehdr;
+ if (dynsym) {
+ sec = syms_ss->dynsym;
+ shdr = syms_ss->dynshdr;
+ } else {
+ sec = syms_ss->symtab;
+ shdr = syms_ss->symshdr;
+ }
+
+ if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+ ".text", NULL))
+ dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
+ if (runtime_ss->opdsec)
+ opddata = elf_rawdata(runtime_ss->opdsec, NULL);
+
+ syms = elf_getdata(sec, NULL);
+ if (syms == NULL)
+ goto out_elf_end;
+
+ sec = elf_getscn(elf, shdr.sh_link);
+ if (sec == NULL)
+ goto out_elf_end;
+
+ symstrs = elf_getdata(sec, NULL);
+ if (symstrs == NULL)
+ goto out_elf_end;
+
+ sec_strndx = elf_getscn(runtime_ss->elf, runtime_ss->ehdr.e_shstrndx);
+ if (sec_strndx == NULL)
+ goto out_elf_end;
+
+ secstrs_run = elf_getdata(sec_strndx, NULL);
+ if (secstrs_run == NULL)
+ goto out_elf_end;
+
+ sec_strndx = elf_getscn(elf, ehdr.e_shstrndx);
+ if (sec_strndx == NULL)
+ goto out_elf_end;
+
+ secstrs_sym = elf_getdata(sec_strndx, NULL);
+ if (secstrs_sym == NULL)
+ goto out_elf_end;
+
+ nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+ memset(&sym, 0, sizeof(sym));
+
+ /*
+ * The kernel relocation symbol is needed in advance in order to adjust
+ * kernel maps correctly.
+ */
+ if (ref_reloc_sym_not_found(kmap)) {
+ elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+ const char *elf_name = elf_sym__name(&sym, symstrs);
+
+ if (strcmp(elf_name, kmap->ref_reloc_sym->name))
+ continue;
+ kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
+ map__set_reloc(map, kmap->ref_reloc_sym->addr - kmap->ref_reloc_sym->unrelocated_addr);
+ break;
+ }
+ }
+
+ /*
+ * Handle any relocation of vdso necessary because older kernels
+ * attempted to prelink vdso to its virtual address.
+ */
+ if (dso__is_vdso(dso))
+ map__set_reloc(map, map__start(map) - dso->text_offset);
+
+ dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
+ /*
+ * Initial kernel and module mappings do not map to the dso.
+ * Flag the fixups.
+ */
+ if (dso->kernel) {
+ remap_kernel = true;
+ adjust_kernel_syms = dso->adjust_symbols;
+ }
+ elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+ struct symbol *f;
+ const char *elf_name = elf_sym__name(&sym, symstrs);
+ char *demangled = NULL;
+ int is_label = elf_sym__is_label(&sym);
+ const char *section_name;
+ bool used_opd = false;
+
+ if (!is_label && !elf_sym__filter(&sym))
+ continue;
+
+ /* Reject ARM ELF "mapping symbols": these aren't unique and
+ * don't identify functions, so will confuse the profile
+ * output: */
+ if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) {
+ if (elf_name[0] == '$' && strchr("adtx", elf_name[1])
+ && (elf_name[2] == '\0' || elf_name[2] == '.'))
+ continue;
+ }
+
+ if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) {
+ u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr;
+ u64 *opd = opddata->d_buf + offset;
+ sym.st_value = DSO__SWAP(dso, u64, *opd);
+ sym.st_shndx = elf_addr_to_index(runtime_ss->elf,
+ sym.st_value);
+ used_opd = true;
+ }
+
+ /*
+ * When loading symbols in a data mapping, ABS symbols (which
+ * has a value of SHN_ABS in its st_shndx) failed at
+ * elf_getscn(). And it marks the loading as a failure so
+ * already loaded symbols cannot be fixed up.
+ *
+ * I'm not sure what should be done. Just ignore them for now.
+ * - Namhyung Kim
+ */
+ if (sym.st_shndx == SHN_ABS)
+ continue;
+
+ sec = elf_getscn(syms_ss->elf, sym.st_shndx);
+ if (!sec)
+ goto out_elf_end;
+
+ gelf_getshdr(sec, &shdr);
+
+ /*
+ * If the attribute bit SHF_ALLOC is not set, the section
+ * doesn't occupy memory during process execution.
+ * E.g. ".gnu.warning.*" section is used by linker to generate
+ * warnings when calling deprecated functions, the symbols in
+ * the section aren't loaded to memory during process execution,
+ * so skip them.
+ */
+ if (!(shdr.sh_flags & SHF_ALLOC))
+ continue;
+
+ secstrs = secstrs_sym;
+
+ /*
+ * We have to fallback to runtime when syms' section header has
+ * NOBITS set. NOBITS results in file offset (sh_offset) not
+ * being incremented. So sh_offset used below has different
+ * values for syms (invalid) and runtime (valid).
+ */
+ if (shdr.sh_type == SHT_NOBITS) {
+ sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
+ if (!sec)
+ goto out_elf_end;
+
+ gelf_getshdr(sec, &shdr);
+ secstrs = secstrs_run;
+ }
+
+ if (is_label && !elf_sec__filter(&shdr, secstrs))
+ continue;
+
+ section_name = elf_sec__name(&shdr, secstrs);
+
+ /* On ARM, symbols for thumb functions have 1 added to
+ * the symbol address as a flag - remove it */
+ if ((ehdr.e_machine == EM_ARM) &&
+ (GELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
+ (sym.st_value & 1))
+ --sym.st_value;
+
+ if (dso->kernel) {
+ if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
+ section_name, adjust_kernel_syms, kmodule, &remap_kernel))
+ goto out_elf_end;
+ } else if ((used_opd && runtime_ss->adjust_symbols) ||
+ (!used_opd && syms_ss->adjust_symbols)) {
+ GElf_Phdr phdr;
+
+ if (elf_read_program_header(runtime_ss->elf,
+ (u64)sym.st_value, &phdr)) {
+ pr_debug4("%s: failed to find program header for "
+ "symbol: %s st_value: %#" PRIx64 "\n",
+ __func__, elf_name, (u64)sym.st_value);
+ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+ "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n",
+ __func__, (u64)sym.st_value, (u64)shdr.sh_addr,
+ (u64)shdr.sh_offset);
+ /*
+ * Fail to find program header, let's rollback
+ * to use shdr.sh_addr and shdr.sh_offset to
+ * calibrate symbol's file address, though this
+ * is not necessary for normal C ELF file, we
+ * still need to handle java JIT symbols in this
+ * case.
+ */
+ sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ } else {
+ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
+ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
+ (u64)phdr.p_offset);
+ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
+ }
+ }
+
+ demangled = demangle_sym(dso, kmodule, elf_name);
+ if (demangled != NULL)
+ elf_name = demangled;
+
+ f = symbol__new(sym.st_value, sym.st_size,
+ GELF_ST_BIND(sym.st_info),
+ GELF_ST_TYPE(sym.st_info), elf_name);
+ free(demangled);
+ if (!f)
+ goto out_elf_end;
+
+ arch__sym_update(f, &sym);
+
+ __symbols__insert(&curr_dso->symbols, f, dso->kernel);
+ nr++;
+ }
+
+ /*
+ * For misannotated, zeroed, ASM function sizes.
+ */
+ if (nr > 0) {
+ symbols__fixup_end(&dso->symbols, false);
+ symbols__fixup_duplicate(&dso->symbols);
+ if (kmap) {
+ /*
+ * We need to fixup this here too because we create new
+ * maps here, for things like vsyscall sections.
+ */
+ maps__fixup_end(kmaps);
+ }
+ }
+ err = nr;
+out_elf_end:
+ return err;
+}
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ struct symsrc *runtime_ss, int kmodule)
+{
+ int nr = 0;
+ int err = -1;
+
+ dso->symtab_type = syms_ss->type;
+ dso->is_64_bit = syms_ss->is_64_bit;
+ dso->rel = syms_ss->ehdr.e_type == ET_REL;
+
+ /*
+ * Modules may already have symbols from kallsyms, but those symbols
+ * have the wrong values for the dso maps, so remove them.
+ */
+ if (kmodule && syms_ss->symtab)
+ symbols__delete(&dso->symbols);
+
+ if (!syms_ss->symtab) {
+ /*
+ * If the vmlinux is stripped, fail so we will fall back
+ * to using kallsyms. The vmlinux runtime symbols aren't
+ * of much use.
+ */
+ if (dso->kernel)
+ return err;
+ } else {
+ err = dso__load_sym_internal(dso, map, syms_ss, runtime_ss,
+ kmodule, 0);
+ if (err < 0)
+ return err;
+ nr = err;
+ }
+
+ if (syms_ss->dynsym) {
+ err = dso__load_sym_internal(dso, map, syms_ss, runtime_ss,
+ kmodule, 1);
+ if (err < 0)
+ return err;
+ err += nr;
+ }
+
+ return err;
+}
+
+static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
+{
+ GElf_Phdr phdr;
+ size_t i, phdrnum;
+ int err;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, &phdr) == NULL)
+ return -1;
+ if (phdr.p_type != PT_LOAD)
+ continue;
+ if (exe) {
+ if (!(phdr.p_flags & PF_X))
+ continue;
+ } else {
+ if (!(phdr.p_flags & PF_R))
+ continue;
+ }
+ sz = min(phdr.p_memsz, phdr.p_filesz);
+ if (!sz)
+ continue;
+ err = mapfn(phdr.p_vaddr, sz, phdr.p_offset, data);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+ bool *is_64_bit)
+{
+ int err;
+ Elf *elf;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return -1;
+
+ if (is_64_bit)
+ *is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+ err = elf_read_maps(elf, exe, mapfn, data);
+
+ elf_end(elf);
+ return err;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+ enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+ GElf_Ehdr ehdr;
+ Elf_Kind ek;
+ Elf *elf;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ goto out;
+
+ ek = elf_kind(elf);
+ if (ek != ELF_K_ELF)
+ goto out_end;
+
+ if (gelf_getclass(elf) == ELFCLASS64) {
+ dso_type = DSO__TYPE_64BIT;
+ goto out_end;
+ }
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out_end;
+
+ if (ehdr.e_machine == EM_X86_64)
+ dso_type = DSO__TYPE_X32BIT;
+ else
+ dso_type = DSO__TYPE_32BIT;
+out_end:
+ elf_end(elf);
+out:
+ return dso_type;
+}
+
+static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 len)
+{
+ ssize_t r;
+ size_t n;
+ int err = -1;
+ char *buf = malloc(page_size);
+
+ if (buf == NULL)
+ return -1;
+
+ if (lseek(to, to_offs, SEEK_SET) != to_offs)
+ goto out;
+
+ if (lseek(from, from_offs, SEEK_SET) != from_offs)
+ goto out;
+
+ while (len) {
+ n = page_size;
+ if (len < n)
+ n = len;
+ /* Use read because mmap won't work on proc files */
+ r = read(from, buf, n);
+ if (r < 0)
+ goto out;
+ if (!r)
+ break;
+ n = r;
+ r = write(to, buf, n);
+ if (r < 0)
+ goto out;
+ if ((size_t)r != n)
+ goto out;
+ len -= n;
+ }
+
+ err = 0;
+out:
+ free(buf);
+ return err;
+}
+
+struct kcore {
+ int fd;
+ int elfclass;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+};
+
+static int kcore__open(struct kcore *kcore, const char *filename)
+{
+ GElf_Ehdr *ehdr;
+
+ kcore->fd = open(filename, O_RDONLY);
+ if (kcore->fd == -1)
+ return -1;
+
+ kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL);
+ if (!kcore->elf)
+ goto out_close;
+
+ kcore->elfclass = gelf_getclass(kcore->elf);
+ if (kcore->elfclass == ELFCLASSNONE)
+ goto out_end;
+
+ ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+ if (!ehdr)
+ goto out_end;
+
+ return 0;
+
+out_end:
+ elf_end(kcore->elf);
+out_close:
+ close(kcore->fd);
+ return -1;
+}
+
+static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
+ bool temp)
+{
+ kcore->elfclass = elfclass;
+
+ if (temp)
+ kcore->fd = mkstemp(filename);
+ else
+ kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400);
+ if (kcore->fd == -1)
+ return -1;
+
+ kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL);
+ if (!kcore->elf)
+ goto out_close;
+
+ if (!gelf_newehdr(kcore->elf, elfclass))
+ goto out_end;
+
+ memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr));
+
+ return 0;
+
+out_end:
+ elf_end(kcore->elf);
+out_close:
+ close(kcore->fd);
+ unlink(filename);
+ return -1;
+}
+
+static void kcore__close(struct kcore *kcore)
+{
+ elf_end(kcore->elf);
+ close(kcore->fd);
+}
+
+static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count)
+{
+ GElf_Ehdr *ehdr = &to->ehdr;
+ GElf_Ehdr *kehdr = &from->ehdr;
+
+ memcpy(ehdr->e_ident, kehdr->e_ident, EI_NIDENT);
+ ehdr->e_type = kehdr->e_type;
+ ehdr->e_machine = kehdr->e_machine;
+ ehdr->e_version = kehdr->e_version;
+ ehdr->e_entry = 0;
+ ehdr->e_shoff = 0;
+ ehdr->e_flags = kehdr->e_flags;
+ ehdr->e_phnum = count;
+ ehdr->e_shentsize = 0;
+ ehdr->e_shnum = 0;
+ ehdr->e_shstrndx = 0;
+
+ if (from->elfclass == ELFCLASS32) {
+ ehdr->e_phoff = sizeof(Elf32_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf32_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf32_Phdr);
+ } else {
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+ }
+
+ if (!gelf_update_ehdr(to->elf, ehdr))
+ return -1;
+
+ if (!gelf_newphdr(to->elf, count))
+ return -1;
+
+ return 0;
+}
+
+static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset,
+ u64 addr, u64 len)
+{
+ GElf_Phdr phdr = {
+ .p_type = PT_LOAD,
+ .p_flags = PF_R | PF_W | PF_X,
+ .p_offset = offset,
+ .p_vaddr = addr,
+ .p_paddr = 0,
+ .p_filesz = len,
+ .p_memsz = len,
+ .p_align = page_size,
+ };
+
+ if (!gelf_update_phdr(kcore->elf, idx, &phdr))
+ return -1;
+
+ return 0;
+}
+
+static off_t kcore__write(struct kcore *kcore)
+{
+ return elf_update(kcore->elf, ELF_C_WRITE);
+}
+
+struct phdr_data {
+ off_t offset;
+ off_t rel;
+ u64 addr;
+ u64 len;
+ struct list_head node;
+ struct phdr_data *remaps;
+};
+
+struct sym_data {
+ u64 addr;
+ struct list_head node;
+};
+
+struct kcore_copy_info {
+ u64 stext;
+ u64 etext;
+ u64 first_symbol;
+ u64 last_symbol;
+ u64 first_module;
+ u64 first_module_symbol;
+ u64 last_module_symbol;
+ size_t phnum;
+ struct list_head phdrs;
+ struct list_head syms;
+};
+
+#define kcore_copy__for_each_phdr(k, p) \
+ list_for_each_entry((p), &(k)->phdrs, node)
+
+static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset)
+{
+ struct phdr_data *p = zalloc(sizeof(*p));
+
+ if (p) {
+ p->addr = addr;
+ p->len = len;
+ p->offset = offset;
+ }
+
+ return p;
+}
+
+static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci,
+ u64 addr, u64 len,
+ off_t offset)
+{
+ struct phdr_data *p = phdr_data__new(addr, len, offset);
+
+ if (p)
+ list_add_tail(&p->node, &kci->phdrs);
+
+ return p;
+}
+
+static void kcore_copy__free_phdrs(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, &kci->phdrs, node) {
+ list_del_init(&p->node);
+ free(p);
+ }
+}
+
+static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci,
+ u64 addr)
+{
+ struct sym_data *s = zalloc(sizeof(*s));
+
+ if (s) {
+ s->addr = addr;
+ list_add_tail(&s->node, &kci->syms);
+ }
+
+ return s;
+}
+
+static void kcore_copy__free_syms(struct kcore_copy_info *kci)
+{
+ struct sym_data *s, *tmp;
+
+ list_for_each_entry_safe(s, tmp, &kci->syms, node) {
+ list_del_init(&s->node);
+ free(s);
+ }
+}
+
+static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
+ u64 start)
+{
+ struct kcore_copy_info *kci = arg;
+
+ if (!kallsyms__is_function(type))
+ return 0;
+
+ if (strchr(name, '[')) {
+ if (!kci->first_module_symbol || start < kci->first_module_symbol)
+ kci->first_module_symbol = start;
+ if (start > kci->last_module_symbol)
+ kci->last_module_symbol = start;
+ return 0;
+ }
+
+ if (!kci->first_symbol || start < kci->first_symbol)
+ kci->first_symbol = start;
+
+ if (!kci->last_symbol || start > kci->last_symbol)
+ kci->last_symbol = start;
+
+ if (!strcmp(name, "_stext")) {
+ kci->stext = start;
+ return 0;
+ }
+
+ if (!strcmp(name, "_etext")) {
+ kci->etext = start;
+ return 0;
+ }
+
+ if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start))
+ return -1;
+
+ return 0;
+}
+
+static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci,
+ const char *dir)
+{
+ char kallsyms_filename[PATH_MAX];
+
+ scnprintf(kallsyms_filename, PATH_MAX, "%s/kallsyms", dir);
+
+ if (symbol__restricted_filename(kallsyms_filename, "/proc/kallsyms"))
+ return -1;
+
+ if (kallsyms__parse(kallsyms_filename, kci,
+ kcore_copy__process_kallsyms) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int kcore_copy__process_modules(void *arg,
+ const char *name __maybe_unused,
+ u64 start, u64 size __maybe_unused)
+{
+ struct kcore_copy_info *kci = arg;
+
+ if (!kci->first_module || start < kci->first_module)
+ kci->first_module = start;
+
+ return 0;
+}
+
+static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
+ const char *dir)
+{
+ char modules_filename[PATH_MAX];
+
+ scnprintf(modules_filename, PATH_MAX, "%s/modules", dir);
+
+ if (symbol__restricted_filename(modules_filename, "/proc/modules"))
+ return -1;
+
+ if (modules__parse(modules_filename, kci,
+ kcore_copy__process_modules) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end,
+ u64 pgoff, u64 s, u64 e)
+{
+ u64 len, offset;
+
+ if (s < start || s >= end)
+ return 0;
+
+ offset = (s - start) + pgoff;
+ len = e < end ? e - s : end - s;
+
+ return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1;
+}
+
+static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
+{
+ struct kcore_copy_info *kci = data;
+ u64 end = start + len;
+ struct sym_data *sdat;
+
+ if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext))
+ return -1;
+
+ if (kcore_copy__map(kci, start, end, pgoff, kci->first_module,
+ kci->last_module_symbol))
+ return -1;
+
+ list_for_each_entry(sdat, &kci->syms, node) {
+ u64 s = round_down(sdat->addr, page_size);
+
+ if (kcore_copy__map(kci, start, end, pgoff, s, s + len))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
+{
+ if (elf_read_maps(elf, true, kcore_copy__read_map, kci) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void kcore_copy__find_remaps(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p, *k = NULL;
+ u64 kend;
+
+ if (!kci->stext)
+ return;
+
+ /* Find phdr that corresponds to the kernel map (contains stext) */
+ kcore_copy__for_each_phdr(kci, p) {
+ u64 pend = p->addr + p->len - 1;
+
+ if (p->addr <= kci->stext && pend >= kci->stext) {
+ k = p;
+ break;
+ }
+ }
+
+ if (!k)
+ return;
+
+ kend = k->offset + k->len;
+
+ /* Find phdrs that remap the kernel */
+ kcore_copy__for_each_phdr(kci, p) {
+ u64 pend = p->offset + p->len;
+
+ if (p == k)
+ continue;
+
+ if (p->offset >= k->offset && pend <= kend)
+ p->remaps = k;
+ }
+}
+
+static void kcore_copy__layout(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p;
+ off_t rel = 0;
+
+ kcore_copy__find_remaps(kci);
+
+ kcore_copy__for_each_phdr(kci, p) {
+ if (!p->remaps) {
+ p->rel = rel;
+ rel += p->len;
+ }
+ kci->phnum += 1;
+ }
+
+ kcore_copy__for_each_phdr(kci, p) {
+ struct phdr_data *k = p->remaps;
+
+ if (k)
+ p->rel = p->offset - k->offset + k->rel;
+ }
+}
+
+static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
+ Elf *elf)
+{
+ if (kcore_copy__parse_kallsyms(kci, dir))
+ return -1;
+
+ if (kcore_copy__parse_modules(kci, dir))
+ return -1;
+
+ if (kci->stext)
+ kci->stext = round_down(kci->stext, page_size);
+ else
+ kci->stext = round_down(kci->first_symbol, page_size);
+
+ if (kci->etext) {
+ kci->etext = round_up(kci->etext, page_size);
+ } else if (kci->last_symbol) {
+ kci->etext = round_up(kci->last_symbol, page_size);
+ kci->etext += page_size;
+ }
+
+ if (kci->first_module_symbol &&
+ (!kci->first_module || kci->first_module_symbol < kci->first_module))
+ kci->first_module = kci->first_module_symbol;
+
+ kci->first_module = round_down(kci->first_module, page_size);
+
+ if (kci->last_module_symbol) {
+ kci->last_module_symbol = round_up(kci->last_module_symbol,
+ page_size);
+ kci->last_module_symbol += page_size;
+ }
+
+ if (!kci->stext || !kci->etext)
+ return -1;
+
+ if (kci->first_module && !kci->last_module_symbol)
+ return -1;
+
+ if (kcore_copy__read_maps(kci, elf))
+ return -1;
+
+ kcore_copy__layout(kci);
+
+ return 0;
+}
+
+static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
+ const char *name)
+{
+ char from_filename[PATH_MAX];
+ char to_filename[PATH_MAX];
+
+ scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+ scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+ return copyfile_mode(from_filename, to_filename, 0400);
+}
+
+static int kcore_copy__unlink(const char *dir, const char *name)
+{
+ char filename[PATH_MAX];
+
+ scnprintf(filename, PATH_MAX, "%s/%s", dir, name);
+
+ return unlink(filename);
+}
+
+static int kcore_copy__compare_fds(int from, int to)
+{
+ char *buf_from;
+ char *buf_to;
+ ssize_t ret;
+ size_t len;
+ int err = -1;
+
+ buf_from = malloc(page_size);
+ buf_to = malloc(page_size);
+ if (!buf_from || !buf_to)
+ goto out;
+
+ while (1) {
+ /* Use read because mmap won't work on proc files */
+ ret = read(from, buf_from, page_size);
+ if (ret < 0)
+ goto out;
+
+ if (!ret)
+ break;
+
+ len = ret;
+
+ if (readn(to, buf_to, len) != (int)len)
+ goto out;
+
+ if (memcmp(buf_from, buf_to, len))
+ goto out;
+ }
+
+ err = 0;
+out:
+ free(buf_to);
+ free(buf_from);
+ return err;
+}
+
+static int kcore_copy__compare_files(const char *from_filename,
+ const char *to_filename)
+{
+ int from, to, err = -1;
+
+ from = open(from_filename, O_RDONLY);
+ if (from < 0)
+ return -1;
+
+ to = open(to_filename, O_RDONLY);
+ if (to < 0)
+ goto out_close_from;
+
+ err = kcore_copy__compare_fds(from, to);
+
+ close(to);
+out_close_from:
+ close(from);
+ return err;
+}
+
+static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
+ const char *name)
+{
+ char from_filename[PATH_MAX];
+ char to_filename[PATH_MAX];
+
+ scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+ scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+ return kcore_copy__compare_files(from_filename, to_filename);
+}
+
+/**
+ * kcore_copy - copy kallsyms, modules and kcore from one directory to another.
+ * @from_dir: from directory
+ * @to_dir: to directory
+ *
+ * This function copies kallsyms, modules and kcore files from one directory to
+ * another. kallsyms and modules are copied entirely. Only code segments are
+ * copied from kcore. It is assumed that two segments suffice: one for the
+ * kernel proper and one for all the modules. The code segments are determined
+ * from kallsyms and modules files. The kernel map starts at _stext or the
+ * lowest function symbol, and ends at _etext or the highest function symbol.
+ * The module map starts at the lowest module address and ends at the highest
+ * module symbol. Start addresses are rounded down to the nearest page. End
+ * addresses are rounded up to the nearest page. An extra page is added to the
+ * highest kernel symbol and highest module symbol to, hopefully, encompass that
+ * symbol too. Because it contains only code sections, the resulting kcore is
+ * unusual. One significant peculiarity is that the mapping (start -> pgoff)
+ * is not the same for the kernel map and the modules map. That happens because
+ * the data is copied adjacently whereas the original kcore has gaps. Finally,
+ * kallsyms file is compared with its copy to check that modules have not been
+ * loaded or unloaded while the copies were taking place.
+ *
+ * Return: %0 on success, %-1 on failure.
+ */
+int kcore_copy(const char *from_dir, const char *to_dir)
+{
+ struct kcore kcore;
+ struct kcore extract;
+ int idx = 0, err = -1;
+ off_t offset, sz;
+ struct kcore_copy_info kci = { .stext = 0, };
+ char kcore_filename[PATH_MAX];
+ char extract_filename[PATH_MAX];
+ struct phdr_data *p;
+
+ INIT_LIST_HEAD(&kci.phdrs);
+ INIT_LIST_HEAD(&kci.syms);
+
+ if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
+ return -1;
+
+ if (kcore_copy__copy_file(from_dir, to_dir, "modules"))
+ goto out_unlink_kallsyms;
+
+ scnprintf(kcore_filename, PATH_MAX, "%s/kcore", from_dir);
+ scnprintf(extract_filename, PATH_MAX, "%s/kcore", to_dir);
+
+ if (kcore__open(&kcore, kcore_filename))
+ goto out_unlink_modules;
+
+ if (kcore_copy__calc_maps(&kci, from_dir, kcore.elf))
+ goto out_kcore_close;
+
+ if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
+ goto out_kcore_close;
+
+ if (kcore__copy_hdr(&kcore, &extract, kci.phnum))
+ goto out_extract_close;
+
+ offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) +
+ gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT);
+ offset = round_up(offset, page_size);
+
+ kcore_copy__for_each_phdr(&kci, p) {
+ off_t offs = p->rel + offset;
+
+ if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len))
+ goto out_extract_close;
+ }
+
+ sz = kcore__write(&extract);
+ if (sz < 0 || sz > offset)
+ goto out_extract_close;
+
+ kcore_copy__for_each_phdr(&kci, p) {
+ off_t offs = p->rel + offset;
+
+ if (p->remaps)
+ continue;
+ if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len))
+ goto out_extract_close;
+ }
+
+ if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
+ goto out_extract_close;
+
+ err = 0;
+
+out_extract_close:
+ kcore__close(&extract);
+ if (err)
+ unlink(extract_filename);
+out_kcore_close:
+ kcore__close(&kcore);
+out_unlink_modules:
+ if (err)
+ kcore_copy__unlink(to_dir, "modules");
+out_unlink_kallsyms:
+ if (err)
+ kcore_copy__unlink(to_dir, "kallsyms");
+
+ kcore_copy__free_phdrs(&kci);
+ kcore_copy__free_syms(&kci);
+
+ return err;
+}
+
+int kcore_extract__create(struct kcore_extract *kce)
+{
+ struct kcore kcore;
+ struct kcore extract;
+ size_t count = 1;
+ int idx = 0, err = -1;
+ off_t offset = page_size, sz;
+
+ if (kcore__open(&kcore, kce->kcore_filename))
+ return -1;
+
+ strcpy(kce->extract_filename, PERF_KCORE_EXTRACT);
+ if (kcore__init(&extract, kce->extract_filename, kcore.elfclass, true))
+ goto out_kcore_close;
+
+ if (kcore__copy_hdr(&kcore, &extract, count))
+ goto out_extract_close;
+
+ if (kcore__add_phdr(&extract, idx, offset, kce->addr, kce->len))
+ goto out_extract_close;
+
+ sz = kcore__write(&extract);
+ if (sz < 0 || sz > offset)
+ goto out_extract_close;
+
+ if (copy_bytes(kcore.fd, kce->offs, extract.fd, offset, kce->len))
+ goto out_extract_close;
+
+ err = 0;
+
+out_extract_close:
+ kcore__close(&extract);
+ if (err)
+ unlink(kce->extract_filename);
+out_kcore_close:
+ kcore__close(&kcore);
+
+ return err;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce)
+{
+ unlink(kce->extract_filename);
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+
+static void sdt_adjust_loc(struct sdt_note *tmp, GElf_Addr base_off)
+{
+ if (!base_off)
+ return;
+
+ if (tmp->bit32)
+ tmp->addr.a32[SDT_NOTE_IDX_LOC] =
+ tmp->addr.a32[SDT_NOTE_IDX_LOC] + base_off -
+ tmp->addr.a32[SDT_NOTE_IDX_BASE];
+ else
+ tmp->addr.a64[SDT_NOTE_IDX_LOC] =
+ tmp->addr.a64[SDT_NOTE_IDX_LOC] + base_off -
+ tmp->addr.a64[SDT_NOTE_IDX_BASE];
+}
+
+static void sdt_adjust_refctr(struct sdt_note *tmp, GElf_Addr base_addr,
+ GElf_Addr base_off)
+{
+ if (!base_off)
+ return;
+
+ if (tmp->bit32 && tmp->addr.a32[SDT_NOTE_IDX_REFCTR])
+ tmp->addr.a32[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+ else if (tmp->addr.a64[SDT_NOTE_IDX_REFCTR])
+ tmp->addr.a64[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+}
+
+/**
+ * populate_sdt_note : Parse raw data and identify SDT note
+ * @elf: elf of the opened file
+ * @data: raw data of a section with description offset applied
+ * @len: note description size
+ * @type: type of the note
+ * @sdt_notes: List to add the SDT note
+ *
+ * Responsible for parsing the @data in section .note.stapsdt in @elf and
+ * if its an SDT note, it appends to @sdt_notes list.
+ */
+static int populate_sdt_note(Elf **elf, const char *data, size_t len,
+ struct list_head *sdt_notes)
+{
+ const char *provider, *name, *args;
+ struct sdt_note *tmp = NULL;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ int ret = -EINVAL;
+
+ union {
+ Elf64_Addr a64[NR_ADDR];
+ Elf32_Addr a32[NR_ADDR];
+ } buf;
+
+ Elf_Data dst = {
+ .d_buf = &buf, .d_type = ELF_T_ADDR, .d_version = EV_CURRENT,
+ .d_size = gelf_fsize((*elf), ELF_T_ADDR, NR_ADDR, EV_CURRENT),
+ .d_off = 0, .d_align = 0
+ };
+ Elf_Data src = {
+ .d_buf = (void *) data, .d_type = ELF_T_ADDR,
+ .d_version = EV_CURRENT, .d_size = dst.d_size, .d_off = 0,
+ .d_align = 0
+ };
+
+ tmp = (struct sdt_note *)calloc(1, sizeof(struct sdt_note));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ INIT_LIST_HEAD(&tmp->note_list);
+
+ if (len < dst.d_size + 3)
+ goto out_free_note;
+
+ /* Translation from file representation to memory representation */
+ if (gelf_xlatetom(*elf, &dst, &src,
+ elf_getident(*elf, NULL)[EI_DATA]) == NULL) {
+ pr_err("gelf_xlatetom : %s\n", elf_errmsg(-1));
+ goto out_free_note;
+ }
+
+ /* Populate the fields of sdt_note */
+ provider = data + dst.d_size;
+
+ name = (const char *)memchr(provider, '\0', data + len - provider);
+ if (name++ == NULL)
+ goto out_free_note;
+
+ tmp->provider = strdup(provider);
+ if (!tmp->provider) {
+ ret = -ENOMEM;
+ goto out_free_note;
+ }
+ tmp->name = strdup(name);
+ if (!tmp->name) {
+ ret = -ENOMEM;
+ goto out_free_prov;
+ }
+
+ args = memchr(name, '\0', data + len - name);
+
+ /*
+ * There is no argument if:
+ * - We reached the end of the note;
+ * - There is not enough room to hold a potential string;
+ * - The argument string is empty or just contains ':'.
+ */
+ if (args == NULL || data + len - args < 2 ||
+ args[1] == ':' || args[1] == '\0')
+ tmp->args = NULL;
+ else {
+ tmp->args = strdup(++args);
+ if (!tmp->args) {
+ ret = -ENOMEM;
+ goto out_free_name;
+ }
+ }
+
+ if (gelf_getclass(*elf) == ELFCLASS32) {
+ memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr));
+ tmp->bit32 = true;
+ } else {
+ memcpy(&tmp->addr, &buf, 3 * sizeof(Elf64_Addr));
+ tmp->bit32 = false;
+ }
+
+ if (!gelf_getehdr(*elf, &ehdr)) {
+ pr_debug("%s : cannot get elf header.\n", __func__);
+ ret = -EBADF;
+ goto out_free_args;
+ }
+
+ /* Adjust the prelink effect :
+ * Find out the .stapsdt.base section.
+ * This scn will help us to handle prelinking (if present).
+ * Compare the retrieved file offset of the base section with the
+ * base address in the description of the SDT note. If its different,
+ * then accordingly, adjust the note location.
+ */
+ if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL))
+ sdt_adjust_loc(tmp, shdr.sh_offset);
+
+ /* Adjust reference counter offset */
+ if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_PROBES_SCN, NULL))
+ sdt_adjust_refctr(tmp, shdr.sh_addr, shdr.sh_offset);
+
+ list_add_tail(&tmp->note_list, sdt_notes);
+ return 0;
+
+out_free_args:
+ zfree(&tmp->args);
+out_free_name:
+ zfree(&tmp->name);
+out_free_prov:
+ zfree(&tmp->provider);
+out_free_note:
+ free(tmp);
+out_err:
+ return ret;
+}
+
+/**
+ * construct_sdt_notes_list : constructs a list of SDT notes
+ * @elf : elf to look into
+ * @sdt_notes : empty list_head
+ *
+ * Scans the sections in 'elf' for the section
+ * .note.stapsdt. It, then calls populate_sdt_note to find
+ * out the SDT events and populates the 'sdt_notes'.
+ */
+static int construct_sdt_notes_list(Elf *elf, struct list_head *sdt_notes)
+{
+ GElf_Ehdr ehdr;
+ Elf_Scn *scn = NULL;
+ Elf_Data *data;
+ GElf_Shdr shdr;
+ size_t shstrndx, next;
+ GElf_Nhdr nhdr;
+ size_t name_off, desc_off, offset;
+ int ret = 0;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ ret = -EBADF;
+ goto out_ret;
+ }
+ if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
+ ret = -EBADF;
+ goto out_ret;
+ }
+
+ /* Look for the required section */
+ scn = elf_section_by_name(elf, &ehdr, &shdr, SDT_NOTE_SCN, NULL);
+ if (!scn) {
+ ret = -ENOENT;
+ goto out_ret;
+ }
+
+ if ((shdr.sh_type != SHT_NOTE) || (shdr.sh_flags & SHF_ALLOC)) {
+ ret = -ENOENT;
+ goto out_ret;
+ }
+
+ data = elf_getdata(scn, NULL);
+
+ /* Get the SDT notes */
+ for (offset = 0; (next = gelf_getnote(data, offset, &nhdr, &name_off,
+ &desc_off)) > 0; offset = next) {
+ if (nhdr.n_namesz == sizeof(SDT_NOTE_NAME) &&
+ !memcmp(data->d_buf + name_off, SDT_NOTE_NAME,
+ sizeof(SDT_NOTE_NAME))) {
+ /* Check the type of the note */
+ if (nhdr.n_type != SDT_NOTE_TYPE)
+ goto out_ret;
+
+ ret = populate_sdt_note(&elf, ((data->d_buf) + desc_off),
+ nhdr.n_descsz, sdt_notes);
+ if (ret < 0)
+ goto out_ret;
+ }
+ }
+ if (list_empty(sdt_notes))
+ ret = -ENOENT;
+
+out_ret:
+ return ret;
+}
+
+/**
+ * get_sdt_note_list : Wrapper to construct a list of sdt notes
+ * @head : empty list_head
+ * @target : file to find SDT notes from
+ *
+ * This opens the file, initializes
+ * the ELF and then calls construct_sdt_notes_list.
+ */
+int get_sdt_note_list(struct list_head *head, const char *target)
+{
+ Elf *elf;
+ int fd, ret;
+
+ fd = open(target, O_RDONLY);
+ if (fd < 0)
+ return -EBADF;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ ret = -EBADF;
+ goto out_close;
+ }
+ ret = construct_sdt_notes_list(elf, head);
+ elf_end(elf);
+out_close:
+ close(fd);
+ return ret;
+}
+
+/**
+ * cleanup_sdt_note_list : free the sdt notes' list
+ * @sdt_notes: sdt notes' list
+ *
+ * Free up the SDT notes in @sdt_notes.
+ * Returns the number of SDT notes free'd.
+ */
+int cleanup_sdt_note_list(struct list_head *sdt_notes)
+{
+ struct sdt_note *tmp, *pos;
+ int nr_free = 0;
+
+ list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
+ list_del_init(&pos->note_list);
+ zfree(&pos->args);
+ zfree(&pos->name);
+ zfree(&pos->provider);
+ free(pos);
+ nr_free++;
+ }
+ return nr_free;
+}
+
+/**
+ * sdt_notes__get_count: Counts the number of sdt events
+ * @start: list_head to sdt_notes list
+ *
+ * Returns the number of SDT notes in a list
+ */
+int sdt_notes__get_count(struct list_head *start)
+{
+ struct sdt_note *sdt_ptr;
+ int count = 0;
+
+ list_for_each_entry(sdt_ptr, start, note_list)
+ count++;
+ return count;
+}
+#endif
+
+void symbol__elf_init(void)
+{
+ elf_version(EV_CURRENT);
+}
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
new file mode 100644
index 000000000..a81a14769
--- /dev/null
+++ b/tools/perf/util/symbol-minimal.c
@@ -0,0 +1,392 @@
+#include "dso.h"
+#include "symbol.h"
+#include "symsrc.h"
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <linux/zalloc.h>
+#include <internal/lib.h>
+
+static bool check_need_swap(int file_endian)
+{
+ const int data = 1;
+ u8 *check = (u8 *)&data;
+ int host_endian;
+
+ if (check[0] == 1)
+ host_endian = ELFDATA2LSB;
+ else
+ host_endian = ELFDATA2MSB;
+
+ return host_endian != file_endian;
+}
+
+#define NOTE_ALIGN(sz) (((sz) + 3) & ~3)
+
+#define NT_GNU_BUILD_ID 3
+
+static int read_build_id(void *note_data, size_t note_len, struct build_id *bid,
+ bool need_swap)
+{
+ size_t size = sizeof(bid->data);
+ struct {
+ u32 n_namesz;
+ u32 n_descsz;
+ u32 n_type;
+ } *nhdr;
+ void *ptr;
+
+ ptr = note_data;
+ while (ptr < (note_data + note_len)) {
+ const char *name;
+ size_t namesz, descsz;
+
+ nhdr = ptr;
+ if (need_swap) {
+ nhdr->n_namesz = bswap_32(nhdr->n_namesz);
+ nhdr->n_descsz = bswap_32(nhdr->n_descsz);
+ nhdr->n_type = bswap_32(nhdr->n_type);
+ }
+
+ namesz = NOTE_ALIGN(nhdr->n_namesz);
+ descsz = NOTE_ALIGN(nhdr->n_descsz);
+
+ ptr += sizeof(*nhdr);
+ name = ptr;
+ ptr += namesz;
+ if (nhdr->n_type == NT_GNU_BUILD_ID &&
+ nhdr->n_namesz == sizeof("GNU")) {
+ if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+ size_t sz = min(size, descsz);
+ memcpy(bid->data, ptr, sz);
+ memset(bid->data + sz, 0, size - sz);
+ bid->size = sz;
+ return 0;
+ }
+ }
+ ptr += descsz;
+ }
+
+ return -1;
+}
+
+int filename__read_debuglink(const char *filename __maybe_unused,
+ char *debuglink __maybe_unused,
+ size_t size __maybe_unused)
+{
+ return -1;
+}
+
+/*
+ * Just try PT_NOTE header otherwise fails
+ */
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+ FILE *fp;
+ int ret = -1;
+ bool need_swap = false;
+ u8 e_ident[EI_NIDENT];
+ size_t buf_size;
+ void *buf;
+ int i;
+
+ fp = fopen(filename, "r");
+ if (fp == NULL)
+ return -1;
+
+ if (fread(e_ident, sizeof(e_ident), 1, fp) != 1)
+ goto out;
+
+ if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+ e_ident[EI_VERSION] != EV_CURRENT)
+ goto out;
+
+ need_swap = check_need_swap(e_ident[EI_DATA]);
+
+ /* for simplicity */
+ fseek(fp, 0, SEEK_SET);
+
+ if (e_ident[EI_CLASS] == ELFCLASS32) {
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdr;
+
+ if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+ goto out;
+
+ if (need_swap) {
+ ehdr.e_phoff = bswap_32(ehdr.e_phoff);
+ ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+ ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ }
+
+ buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+ buf = malloc(buf_size);
+ if (buf == NULL)
+ goto out;
+
+ fseek(fp, ehdr.e_phoff, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+ void *tmp;
+ long offset;
+
+ if (need_swap) {
+ phdr->p_type = bswap_32(phdr->p_type);
+ phdr->p_offset = bswap_32(phdr->p_offset);
+ phdr->p_filesz = bswap_32(phdr->p_filesz);
+ }
+
+ if (phdr->p_type != PT_NOTE)
+ continue;
+
+ buf_size = phdr->p_filesz;
+ offset = phdr->p_offset;
+ tmp = realloc(buf, buf_size);
+ if (tmp == NULL)
+ goto out_free;
+
+ buf = tmp;
+ fseek(fp, offset, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ ret = read_build_id(buf, buf_size, bid, need_swap);
+ if (ret == 0)
+ ret = bid->size;
+ break;
+ }
+ } else {
+ Elf64_Ehdr ehdr;
+ Elf64_Phdr *phdr;
+
+ if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+ goto out;
+
+ if (need_swap) {
+ ehdr.e_phoff = bswap_64(ehdr.e_phoff);
+ ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+ ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ }
+
+ buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+ buf = malloc(buf_size);
+ if (buf == NULL)
+ goto out;
+
+ fseek(fp, ehdr.e_phoff, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+ void *tmp;
+ long offset;
+
+ if (need_swap) {
+ phdr->p_type = bswap_32(phdr->p_type);
+ phdr->p_offset = bswap_64(phdr->p_offset);
+ phdr->p_filesz = bswap_64(phdr->p_filesz);
+ }
+
+ if (phdr->p_type != PT_NOTE)
+ continue;
+
+ buf_size = phdr->p_filesz;
+ offset = phdr->p_offset;
+ tmp = realloc(buf, buf_size);
+ if (tmp == NULL)
+ goto out_free;
+
+ buf = tmp;
+ fseek(fp, offset, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ ret = read_build_id(buf, buf_size, bid, need_swap);
+ if (ret == 0)
+ ret = bid->size;
+ break;
+ }
+ }
+out_free:
+ free(buf);
+out:
+ fclose(fp);
+ return ret;
+}
+
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
+{
+ int fd;
+ int ret = -1;
+ struct stat stbuf;
+ size_t buf_size;
+ void *buf;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ if (fstat(fd, &stbuf) < 0)
+ goto out;
+
+ buf_size = stbuf.st_size;
+ buf = malloc(buf_size);
+ if (buf == NULL)
+ goto out;
+
+ if (read(fd, buf, buf_size) != (ssize_t) buf_size)
+ goto out_free;
+
+ ret = read_build_id(buf, buf_size, bid, false);
+out_free:
+ free(buf);
+out:
+ close(fd);
+ return ret;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+ enum dso_binary_type type)
+{
+ int fd = open(name, O_RDONLY);
+ if (fd < 0)
+ goto out_errno;
+
+ ss->name = strdup(name);
+ if (!ss->name)
+ goto out_close;
+
+ ss->fd = fd;
+ ss->type = type;
+
+ return 0;
+out_close:
+ close(fd);
+out_errno:
+ dso->load_errno = errno;
+ return -1;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss __maybe_unused)
+{
+ /* Assume all sym sources could be a runtime image. */
+ return true;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss __maybe_unused)
+{
+ return false;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+ zfree(&ss->name);
+ close(ss->fd);
+}
+
+int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
+ struct symsrc *ss __maybe_unused)
+{
+ return 0;
+}
+
+static int fd__is_64_bit(int fd)
+{
+ u8 e_ident[EI_NIDENT];
+
+ if (lseek(fd, 0, SEEK_SET))
+ return -1;
+
+ if (readn(fd, e_ident, sizeof(e_ident)) != sizeof(e_ident))
+ return -1;
+
+ if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+ e_ident[EI_VERSION] != EV_CURRENT)
+ return -1;
+
+ return e_ident[EI_CLASS] == ELFCLASS64;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+ Elf64_Ehdr ehdr;
+ int ret;
+
+ ret = fd__is_64_bit(fd);
+ if (ret < 0)
+ return DSO__TYPE_UNKNOWN;
+
+ if (ret)
+ return DSO__TYPE_64BIT;
+
+ if (readn(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
+ return DSO__TYPE_UNKNOWN;
+
+ if (ehdr.e_machine == EM_X86_64)
+ return DSO__TYPE_X32BIT;
+
+ return DSO__TYPE_32BIT;
+}
+
+int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
+ struct symsrc *ss,
+ struct symsrc *runtime_ss __maybe_unused,
+ int kmodule __maybe_unused)
+{
+ struct build_id bid;
+ int ret;
+
+ ret = fd__is_64_bit(ss->fd);
+ if (ret >= 0)
+ dso->is_64_bit = ret;
+
+ if (filename__read_build_id(ss->name, &bid) > 0)
+ dso__set_build_id(dso, &bid);
+ return 0;
+}
+
+int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused,
+ mapfn_t mapfn __maybe_unused, void *data __maybe_unused,
+ bool *is_64_bit __maybe_unused)
+{
+ return -1;
+}
+
+int kcore_extract__create(struct kcore_extract *kce __maybe_unused)
+{
+ return -1;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce __maybe_unused)
+{
+}
+
+int kcore_copy(const char *from_dir __maybe_unused,
+ const char *to_dir __maybe_unused)
+{
+ return -1;
+}
+
+void symbol__elf_init(void)
+{
+}
+
+char *dso__demangle_sym(struct dso *dso __maybe_unused,
+ int kmodule __maybe_unused,
+ const char *elf_name __maybe_unused)
+{
+ return NULL;
+}
+
+bool filename__has_section(const char *filename __maybe_unused, const char *sec __maybe_unused)
+{
+ return false;
+}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
new file mode 100644
index 000000000..3f36675b7
--- /dev/null
+++ b/tools/perf/util/symbol.c
@@ -0,0 +1,2825 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include "annotate.h"
+#include "build-id.h"
+#include "cap.h"
+#include "dso.h"
+#include "util.h" // lsdir()
+#include "debug.h"
+#include "event.h"
+#include "machine.h"
+#include "map.h"
+#include "symbol.h"
+#include "map_symbol.h"
+#include "mem-events.h"
+#include "symsrc.h"
+#include "strlist.h"
+#include "intlist.h"
+#include "namespaces.h"
+#include "header.h"
+#include "path.h"
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+
+#include <elf.h>
+#include <limits.h>
+#include <symbol/kallsyms.h>
+#include <sys/utsname.h>
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map);
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
+static bool symbol__is_idle(const char *name);
+
+int vmlinux_path__nr_entries;
+char **vmlinux_path;
+
+struct map_list_node {
+ struct list_head node;
+ struct map *map;
+};
+
+struct symbol_conf symbol_conf = {
+ .nanosecs = false,
+ .use_modules = true,
+ .try_vmlinux_path = true,
+ .demangle = true,
+ .demangle_kernel = false,
+ .cumulate_callchain = true,
+ .time_quantum = 100 * NSEC_PER_MSEC, /* 100ms */
+ .show_hist_headers = true,
+ .symfs = "",
+ .event_group = true,
+ .inline_name = true,
+ .res_sample = 0,
+};
+
+static enum dso_binary_type binary_type_symtab[] = {
+ DSO_BINARY_TYPE__KALLSYMS,
+ DSO_BINARY_TYPE__GUEST_KALLSYMS,
+ DSO_BINARY_TYPE__JAVA_JIT,
+ DSO_BINARY_TYPE__DEBUGLINK,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+ DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ DSO_BINARY_TYPE__GUEST_KMODULE,
+ DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
+
+static struct map_list_node *map_list_node__new(void)
+{
+ return malloc(sizeof(struct map_list_node));
+}
+
+static bool symbol_type__filter(char symbol_type)
+{
+ symbol_type = toupper(symbol_type);
+ return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B';
+}
+
+static int prefix_underscores_count(const char *str)
+{
+ const char *tail = str;
+
+ while (*tail == '_')
+ tail++;
+
+ return tail - str;
+}
+
+const char * __weak arch__normalize_symbol_name(const char *name)
+{
+ return name;
+}
+
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+ return strcmp(namea, nameb);
+}
+
+int __weak arch__compare_symbol_names_n(const char *namea, const char *nameb,
+ unsigned int n)
+{
+ return strncmp(namea, nameb, n);
+}
+
+int __weak arch__choose_best_symbol(struct symbol *syma,
+ struct symbol *symb __maybe_unused)
+{
+ /* Avoid "SyS" kernel syscall aliases */
+ if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
+ return SYMBOL_B;
+ if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+ return SYMBOL_B;
+
+ return SYMBOL_A;
+}
+
+static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
+{
+ s64 a;
+ s64 b;
+ size_t na, nb;
+
+ /* Prefer a symbol with non zero length */
+ a = syma->end - syma->start;
+ b = symb->end - symb->start;
+ if ((b == 0) && (a > 0))
+ return SYMBOL_A;
+ else if ((a == 0) && (b > 0))
+ return SYMBOL_B;
+
+ /* Prefer a non weak symbol over a weak one */
+ a = syma->binding == STB_WEAK;
+ b = symb->binding == STB_WEAK;
+ if (b && !a)
+ return SYMBOL_A;
+ if (a && !b)
+ return SYMBOL_B;
+
+ /* Prefer a global symbol over a non global one */
+ a = syma->binding == STB_GLOBAL;
+ b = symb->binding == STB_GLOBAL;
+ if (a && !b)
+ return SYMBOL_A;
+ if (b && !a)
+ return SYMBOL_B;
+
+ /* Prefer a symbol with less underscores */
+ a = prefix_underscores_count(syma->name);
+ b = prefix_underscores_count(symb->name);
+ if (b > a)
+ return SYMBOL_A;
+ else if (a > b)
+ return SYMBOL_B;
+
+ /* Choose the symbol with the longest name */
+ na = strlen(syma->name);
+ nb = strlen(symb->name);
+ if (na > nb)
+ return SYMBOL_A;
+ else if (na < nb)
+ return SYMBOL_B;
+
+ return arch__choose_best_symbol(syma, symb);
+}
+
+void symbols__fixup_duplicate(struct rb_root_cached *symbols)
+{
+ struct rb_node *nd;
+ struct symbol *curr, *next;
+
+ if (symbol_conf.allow_aliases)
+ return;
+
+ nd = rb_first_cached(symbols);
+
+ while (nd) {
+ curr = rb_entry(nd, struct symbol, rb_node);
+again:
+ nd = rb_next(&curr->rb_node);
+ next = rb_entry(nd, struct symbol, rb_node);
+
+ if (!nd)
+ break;
+
+ if (curr->start != next->start)
+ continue;
+
+ if (choose_best_symbol(curr, next) == SYMBOL_A) {
+ if (next->type == STT_GNU_IFUNC)
+ curr->ifunc_alias = true;
+ rb_erase_cached(&next->rb_node, symbols);
+ symbol__delete(next);
+ goto again;
+ } else {
+ if (curr->type == STT_GNU_IFUNC)
+ next->ifunc_alias = true;
+ nd = rb_next(&curr->rb_node);
+ rb_erase_cached(&curr->rb_node, symbols);
+ symbol__delete(curr);
+ }
+ }
+}
+
+/* Update zero-sized symbols using the address of the next symbol */
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
+{
+ struct rb_node *nd, *prevnd = rb_first_cached(symbols);
+ struct symbol *curr, *prev;
+
+ if (prevnd == NULL)
+ return;
+
+ curr = rb_entry(prevnd, struct symbol, rb_node);
+
+ for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+ prev = curr;
+ curr = rb_entry(nd, struct symbol, rb_node);
+
+ /*
+ * On some architecture kernel text segment start is located at
+ * some low memory address, while modules are located at high
+ * memory addresses (or vice versa). The gap between end of
+ * kernel text segment and beginning of first module's text
+ * segment is very big. Therefore do not fill this gap and do
+ * not assign it to the kernel dso map (kallsyms).
+ *
+ * In kallsyms, it determines module symbols using '[' character
+ * like in:
+ * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
+ */
+ if (prev->end == prev->start) {
+ /* Last kernel/module symbol mapped to end of page */
+ if (is_kallsyms && (!strchr(prev->name, '[') !=
+ !strchr(curr->name, '[')))
+ prev->end = roundup(prev->end + 4096, 4096);
+ else
+ prev->end = curr->start;
+
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
+ __func__, prev->name, prev->end);
+ }
+ }
+
+ /* Last entry */
+ if (curr->end == curr->start)
+ curr->end = roundup(curr->start, 4096) + 4096;
+}
+
+void maps__fixup_end(struct maps *maps)
+{
+ struct map_rb_node *prev = NULL, *curr;
+
+ down_write(maps__lock(maps));
+
+ maps__for_each_entry(maps, curr) {
+ if (prev != NULL && !map__end(prev->map))
+ map__set_end(prev->map, map__start(curr->map));
+
+ prev = curr;
+ }
+
+ /*
+ * We still haven't the actual symbols, so guess the
+ * last map final address.
+ */
+ if (curr && !map__end(curr->map))
+ map__set_end(curr->map, ~0ULL);
+
+ up_write(maps__lock(maps));
+}
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
+{
+ size_t namelen = strlen(name) + 1;
+ struct symbol *sym = calloc(1, (symbol_conf.priv_size +
+ sizeof(*sym) + namelen));
+ if (sym == NULL)
+ return NULL;
+
+ if (symbol_conf.priv_size) {
+ if (symbol_conf.init_annotation) {
+ struct annotation *notes = (void *)sym;
+ annotation__init(notes);
+ }
+ sym = ((void *)sym) + symbol_conf.priv_size;
+ }
+
+ sym->start = start;
+ sym->end = len ? start + len : start;
+ sym->type = type;
+ sym->binding = binding;
+ sym->namelen = namelen - 1;
+
+ pr_debug4("%s: %s %#" PRIx64 "-%#" PRIx64 "\n",
+ __func__, name, start, sym->end);
+ memcpy(sym->name, name, namelen);
+
+ return sym;
+}
+
+void symbol__delete(struct symbol *sym)
+{
+ if (symbol_conf.priv_size) {
+ if (symbol_conf.init_annotation) {
+ struct annotation *notes = symbol__annotation(sym);
+
+ annotation__exit(notes);
+ }
+ }
+ free(((void *)sym) - symbol_conf.priv_size);
+}
+
+void symbols__delete(struct rb_root_cached *symbols)
+{
+ struct symbol *pos;
+ struct rb_node *next = rb_first_cached(symbols);
+
+ while (next) {
+ pos = rb_entry(next, struct symbol, rb_node);
+ next = rb_next(&pos->rb_node);
+ rb_erase_cached(&pos->rb_node, symbols);
+ symbol__delete(pos);
+ }
+}
+
+void __symbols__insert(struct rb_root_cached *symbols,
+ struct symbol *sym, bool kernel)
+{
+ struct rb_node **p = &symbols->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ const u64 ip = sym->start;
+ struct symbol *s;
+ bool leftmost = true;
+
+ if (kernel) {
+ const char *name = sym->name;
+ /*
+ * ppc64 uses function descriptors and appends a '.' to the
+ * start of every instruction address. Remove it.
+ */
+ if (name[0] == '.')
+ name++;
+ sym->idle = symbol__is_idle(name);
+ }
+
+ while (*p != NULL) {
+ parent = *p;
+ s = rb_entry(parent, struct symbol, rb_node);
+ if (ip < s->start)
+ p = &(*p)->rb_left;
+ else {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ }
+ }
+ rb_link_node(&sym->rb_node, parent, p);
+ rb_insert_color_cached(&sym->rb_node, symbols, leftmost);
+}
+
+void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
+{
+ __symbols__insert(symbols, sym, false);
+}
+
+static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
+{
+ struct rb_node *n;
+
+ if (symbols == NULL)
+ return NULL;
+
+ n = symbols->rb_root.rb_node;
+
+ while (n) {
+ struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+ if (ip < s->start)
+ n = n->rb_left;
+ else if (ip > s->end || (ip == s->end && ip != s->start))
+ n = n->rb_right;
+ else
+ return s;
+ }
+
+ return NULL;
+}
+
+static struct symbol *symbols__first(struct rb_root_cached *symbols)
+{
+ struct rb_node *n = rb_first_cached(symbols);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
+static struct symbol *symbols__last(struct rb_root_cached *symbols)
+{
+ struct rb_node *n = rb_last(&symbols->rb_root);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
+static struct symbol *symbols__next(struct symbol *sym)
+{
+ struct rb_node *n = rb_next(&sym->rb_node);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
+static int symbols__sort_name_cmp(const void *vlhs, const void *vrhs)
+{
+ const struct symbol *lhs = *((const struct symbol **)vlhs);
+ const struct symbol *rhs = *((const struct symbol **)vrhs);
+
+ return strcmp(lhs->name, rhs->name);
+}
+
+static struct symbol **symbols__sort_by_name(struct rb_root_cached *source, size_t *len)
+{
+ struct rb_node *nd;
+ struct symbol **result;
+ size_t i = 0, size = 0;
+
+ for (nd = rb_first_cached(source); nd; nd = rb_next(nd))
+ size++;
+
+ result = malloc(sizeof(*result) * size);
+ if (!result)
+ return NULL;
+
+ for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) {
+ struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+
+ result[i++] = pos;
+ }
+ qsort(result, size, sizeof(*result), symbols__sort_name_cmp);
+ *len = size;
+ return result;
+}
+
+int symbol__match_symbol_name(const char *name, const char *str,
+ enum symbol_tag_include includes)
+{
+ const char *versioning;
+
+ if (includes == SYMBOL_TAG_INCLUDE__DEFAULT_ONLY &&
+ (versioning = strstr(name, "@@"))) {
+ int len = strlen(str);
+
+ if (len < versioning - name)
+ len = versioning - name;
+
+ return arch__compare_symbol_names_n(name, str, len);
+ } else
+ return arch__compare_symbol_names(name, str);
+}
+
+static struct symbol *symbols__find_by_name(struct symbol *symbols[],
+ size_t symbols_len,
+ const char *name,
+ enum symbol_tag_include includes,
+ size_t *found_idx)
+{
+ size_t i, lower = 0, upper = symbols_len;
+ struct symbol *s = NULL;
+
+ if (found_idx)
+ *found_idx = SIZE_MAX;
+
+ if (!symbols_len)
+ return NULL;
+
+ while (lower < upper) {
+ int cmp;
+
+ i = (lower + upper) / 2;
+ cmp = symbol__match_symbol_name(symbols[i]->name, name, includes);
+
+ if (cmp > 0)
+ upper = i;
+ else if (cmp < 0)
+ lower = i + 1;
+ else {
+ if (found_idx)
+ *found_idx = i;
+ s = symbols[i];
+ break;
+ }
+ }
+ if (s && includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY) {
+ /* return first symbol that has same name (if any) */
+ for (; i > 0; i--) {
+ struct symbol *tmp = symbols[i - 1];
+
+ if (!arch__compare_symbol_names(tmp->name, s->name)) {
+ if (found_idx)
+ *found_idx = i - 1;
+ s = tmp;
+ } else
+ break;
+ }
+ }
+ assert(!found_idx || !s || s == symbols[*found_idx]);
+ return s;
+}
+
+void dso__reset_find_symbol_cache(struct dso *dso)
+{
+ dso->last_find_result.addr = 0;
+ dso->last_find_result.symbol = NULL;
+}
+
+void dso__insert_symbol(struct dso *dso, struct symbol *sym)
+{
+ __symbols__insert(&dso->symbols, sym, dso->kernel);
+
+ /* update the symbol cache if necessary */
+ if (dso->last_find_result.addr >= sym->start &&
+ (dso->last_find_result.addr < sym->end ||
+ sym->start == sym->end)) {
+ dso->last_find_result.symbol = sym;
+ }
+}
+
+void dso__delete_symbol(struct dso *dso, struct symbol *sym)
+{
+ rb_erase_cached(&sym->rb_node, &dso->symbols);
+ symbol__delete(sym);
+ dso__reset_find_symbol_cache(dso);
+}
+
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
+{
+ if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
+ dso->last_find_result.addr = addr;
+ dso->last_find_result.symbol = symbols__find(&dso->symbols, addr);
+ }
+
+ return dso->last_find_result.symbol;
+}
+
+struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr)
+{
+ return symbols__find(&dso->symbols, addr);
+}
+
+struct symbol *dso__first_symbol(struct dso *dso)
+{
+ return symbols__first(&dso->symbols);
+}
+
+struct symbol *dso__last_symbol(struct dso *dso)
+{
+ return symbols__last(&dso->symbols);
+}
+
+struct symbol *dso__next_symbol(struct symbol *sym)
+{
+ return symbols__next(sym);
+}
+
+struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx)
+{
+ if (*idx + 1 >= dso->symbol_names_len)
+ return NULL;
+
+ ++*idx;
+ return dso->symbol_names[*idx];
+}
+
+ /*
+ * Returns first symbol that matched with @name.
+ */
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name, size_t *idx)
+{
+ struct symbol *s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len,
+ name, SYMBOL_TAG_INCLUDE__NONE, idx);
+ if (!s)
+ s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len,
+ name, SYMBOL_TAG_INCLUDE__DEFAULT_ONLY, idx);
+ return s;
+}
+
+void dso__sort_by_name(struct dso *dso)
+{
+ mutex_lock(&dso->lock);
+ if (!dso__sorted_by_name(dso)) {
+ size_t len;
+
+ dso->symbol_names = symbols__sort_by_name(&dso->symbols, &len);
+ if (dso->symbol_names) {
+ dso->symbol_names_len = len;
+ dso__set_sorted_by_name(dso);
+ }
+ }
+ mutex_unlock(&dso->lock);
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+static int hex2u64(const char *ptr, u64 *long_val)
+{
+ char *p;
+
+ *long_val = strtoull(ptr, &p, 16);
+
+ return p - ptr;
+}
+
+
+int modules__parse(const char *filename, void *arg,
+ int (*process_module)(void *arg, const char *name,
+ u64 start, u64 size))
+{
+ char *line = NULL;
+ size_t n;
+ FILE *file;
+ int err = 0;
+
+ file = fopen(filename, "r");
+ if (file == NULL)
+ return -1;
+
+ while (1) {
+ char name[PATH_MAX];
+ u64 start, size;
+ char *sep, *endptr;
+ ssize_t line_len;
+
+ line_len = getline(&line, &n, file);
+ if (line_len < 0) {
+ if (feof(file))
+ break;
+ err = -1;
+ goto out;
+ }
+
+ if (!line) {
+ err = -1;
+ goto out;
+ }
+
+ line[--line_len] = '\0'; /* \n */
+
+ sep = strrchr(line, 'x');
+ if (sep == NULL)
+ continue;
+
+ hex2u64(sep + 1, &start);
+
+ sep = strchr(line, ' ');
+ if (sep == NULL)
+ continue;
+
+ *sep = '\0';
+
+ scnprintf(name, sizeof(name), "[%s]", line);
+
+ size = strtoul(sep + 1, &endptr, 0);
+ if (*endptr != ' ' && *endptr != '\t')
+ continue;
+
+ err = process_module(arg, name, start, size);
+ if (err)
+ break;
+ }
+out:
+ free(line);
+ fclose(file);
+ return err;
+}
+
+/*
+ * These are symbols in the kernel image, so make sure that
+ * sym is from a kernel DSO.
+ */
+static bool symbol__is_idle(const char *name)
+{
+ const char * const idle_symbols[] = {
+ "acpi_idle_do_entry",
+ "acpi_processor_ffh_cstate_enter",
+ "arch_cpu_idle",
+ "cpu_idle",
+ "cpu_startup_entry",
+ "idle_cpu",
+ "intel_idle",
+ "default_idle",
+ "native_safe_halt",
+ "enter_idle",
+ "exit_idle",
+ "mwait_idle",
+ "mwait_idle_with_hints",
+ "mwait_idle_with_hints.constprop.0",
+ "poll_idle",
+ "ppc64_runlatch_off",
+ "pseries_dedicated_idle_sleep",
+ "psw_idle",
+ "psw_idle_exit",
+ NULL
+ };
+ int i;
+ static struct strlist *idle_symbols_list;
+
+ if (idle_symbols_list)
+ return strlist__has_entry(idle_symbols_list, name);
+
+ idle_symbols_list = strlist__new(NULL, NULL);
+
+ for (i = 0; idle_symbols[i]; i++)
+ strlist__add(idle_symbols_list, idle_symbols[i]);
+
+ return strlist__has_entry(idle_symbols_list, name);
+}
+
+static int map__process_kallsym_symbol(void *arg, const char *name,
+ char type, u64 start)
+{
+ struct symbol *sym;
+ struct dso *dso = arg;
+ struct rb_root_cached *root = &dso->symbols;
+
+ if (!symbol_type__filter(type))
+ return 0;
+
+ /* Ignore local symbols for ARM modules */
+ if (name[0] == '$')
+ return 0;
+
+ /*
+ * module symbols are not sorted so we add all
+ * symbols, setting length to 0, and rely on
+ * symbols__fixup_end() to fix it up.
+ */
+ sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name);
+ if (sym == NULL)
+ return -ENOMEM;
+ /*
+ * We will pass the symbols to the filter later, in
+ * map__split_kallsyms, when we have split the maps per module
+ */
+ __symbols__insert(root, sym, !strchr(name, '['));
+
+ return 0;
+}
+
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
+{
+ return kallsyms__parse(filename, dso, map__process_kallsym_symbol);
+}
+
+static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso)
+{
+ struct map *curr_map;
+ struct symbol *pos;
+ int count = 0;
+ struct rb_root_cached old_root = dso->symbols;
+ struct rb_root_cached *root = &dso->symbols;
+ struct rb_node *next = rb_first_cached(root);
+
+ if (!kmaps)
+ return -1;
+
+ *root = RB_ROOT_CACHED;
+
+ while (next) {
+ struct dso *curr_map_dso;
+ char *module;
+
+ pos = rb_entry(next, struct symbol, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ rb_erase_cached(&pos->rb_node, &old_root);
+ RB_CLEAR_NODE(&pos->rb_node);
+ module = strchr(pos->name, '\t');
+ if (module)
+ *module = '\0';
+
+ curr_map = maps__find(kmaps, pos->start);
+
+ if (!curr_map) {
+ symbol__delete(pos);
+ continue;
+ }
+ curr_map_dso = map__dso(curr_map);
+ pos->start -= map__start(curr_map) - map__pgoff(curr_map);
+ if (pos->end > map__end(curr_map))
+ pos->end = map__end(curr_map);
+ if (pos->end)
+ pos->end -= map__start(curr_map) - map__pgoff(curr_map);
+ symbols__insert(&curr_map_dso->symbols, pos);
+ ++count;
+ }
+
+ /* Symbols have been adjusted */
+ dso->adjust_symbols = 1;
+
+ return count;
+}
+
+/*
+ * Split the symbols into maps, making sure there are no overlaps, i.e. the
+ * kernel range is broken in several maps, named [kernel].N, as we don't have
+ * the original ELF section names vmlinux have.
+ */
+static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
+ struct map *initial_map)
+{
+ struct machine *machine;
+ struct map *curr_map = initial_map;
+ struct symbol *pos;
+ int count = 0, moved = 0;
+ struct rb_root_cached *root = &dso->symbols;
+ struct rb_node *next = rb_first_cached(root);
+ int kernel_range = 0;
+ bool x86_64;
+
+ if (!kmaps)
+ return -1;
+
+ machine = maps__machine(kmaps);
+
+ x86_64 = machine__is(machine, "x86_64");
+
+ while (next) {
+ char *module;
+
+ pos = rb_entry(next, struct symbol, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ module = strchr(pos->name, '\t');
+ if (module) {
+ struct dso *curr_map_dso;
+
+ if (!symbol_conf.use_modules)
+ goto discard_symbol;
+
+ *module++ = '\0';
+ curr_map_dso = map__dso(curr_map);
+ if (strcmp(curr_map_dso->short_name, module)) {
+ if (RC_CHK_ACCESS(curr_map) != RC_CHK_ACCESS(initial_map) &&
+ dso->kernel == DSO_SPACE__KERNEL_GUEST &&
+ machine__is_default_guest(machine)) {
+ /*
+ * We assume all symbols of a module are
+ * continuous in * kallsyms, so curr_map
+ * points to a module and all its
+ * symbols are in its kmap. Mark it as
+ * loaded.
+ */
+ dso__set_loaded(curr_map_dso);
+ }
+
+ curr_map = maps__find_by_name(kmaps, module);
+ if (curr_map == NULL) {
+ pr_debug("%s/proc/{kallsyms,modules} "
+ "inconsistency while looking "
+ "for \"%s\" module!\n",
+ machine->root_dir, module);
+ curr_map = initial_map;
+ goto discard_symbol;
+ }
+ curr_map_dso = map__dso(curr_map);
+ if (curr_map_dso->loaded &&
+ !machine__is_default_guest(machine))
+ goto discard_symbol;
+ }
+ /*
+ * So that we look just like we get from .ko files,
+ * i.e. not prelinked, relative to initial_map->start.
+ */
+ pos->start = map__map_ip(curr_map, pos->start);
+ pos->end = map__map_ip(curr_map, pos->end);
+ } else if (x86_64 && is_entry_trampoline(pos->name)) {
+ /*
+ * These symbols are not needed anymore since the
+ * trampoline maps refer to the text section and it's
+ * symbols instead. Avoid having to deal with
+ * relocations, and the assumption that the first symbol
+ * is the start of kernel text, by simply removing the
+ * symbols at this point.
+ */
+ goto discard_symbol;
+ } else if (curr_map != initial_map) {
+ char dso_name[PATH_MAX];
+ struct dso *ndso;
+
+ if (delta) {
+ /* Kernel was relocated at boot time */
+ pos->start -= delta;
+ pos->end -= delta;
+ }
+
+ if (count == 0) {
+ curr_map = initial_map;
+ goto add_symbol;
+ }
+
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+ snprintf(dso_name, sizeof(dso_name),
+ "[guest.kernel].%d",
+ kernel_range++);
+ else
+ snprintf(dso_name, sizeof(dso_name),
+ "[kernel].%d",
+ kernel_range++);
+
+ ndso = dso__new(dso_name);
+ if (ndso == NULL)
+ return -1;
+
+ ndso->kernel = dso->kernel;
+
+ curr_map = map__new2(pos->start, ndso);
+ if (curr_map == NULL) {
+ dso__put(ndso);
+ return -1;
+ }
+
+ map__set_map_ip(curr_map, identity__map_ip);
+ map__set_unmap_ip(curr_map, identity__map_ip);
+ if (maps__insert(kmaps, curr_map)) {
+ dso__put(ndso);
+ return -1;
+ }
+ ++kernel_range;
+ } else if (delta) {
+ /* Kernel was relocated at boot time */
+ pos->start -= delta;
+ pos->end -= delta;
+ }
+add_symbol:
+ if (curr_map != initial_map) {
+ struct dso *curr_map_dso = map__dso(curr_map);
+
+ rb_erase_cached(&pos->rb_node, root);
+ symbols__insert(&curr_map_dso->symbols, pos);
+ ++moved;
+ } else
+ ++count;
+
+ continue;
+discard_symbol:
+ rb_erase_cached(&pos->rb_node, root);
+ symbol__delete(pos);
+ }
+
+ if (curr_map != initial_map &&
+ dso->kernel == DSO_SPACE__KERNEL_GUEST &&
+ machine__is_default_guest(maps__machine(kmaps))) {
+ dso__set_loaded(map__dso(curr_map));
+ }
+
+ return count + moved;
+}
+
+bool symbol__restricted_filename(const char *filename,
+ const char *restricted_filename)
+{
+ bool restricted = false;
+
+ if (symbol_conf.kptr_restrict) {
+ char *r = realpath(filename, NULL);
+
+ if (r != NULL) {
+ restricted = strcmp(r, restricted_filename) == 0;
+ free(r);
+ return restricted;
+ }
+ }
+
+ return restricted;
+}
+
+struct module_info {
+ struct rb_node rb_node;
+ char *name;
+ u64 start;
+};
+
+static void add_module(struct module_info *mi, struct rb_root *modules)
+{
+ struct rb_node **p = &modules->rb_node;
+ struct rb_node *parent = NULL;
+ struct module_info *m;
+
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct module_info, rb_node);
+ if (strcmp(mi->name, m->name) < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&mi->rb_node, parent, p);
+ rb_insert_color(&mi->rb_node, modules);
+}
+
+static void delete_modules(struct rb_root *modules)
+{
+ struct module_info *mi;
+ struct rb_node *next = rb_first(modules);
+
+ while (next) {
+ mi = rb_entry(next, struct module_info, rb_node);
+ next = rb_next(&mi->rb_node);
+ rb_erase(&mi->rb_node, modules);
+ zfree(&mi->name);
+ free(mi);
+ }
+}
+
+static struct module_info *find_module(const char *name,
+ struct rb_root *modules)
+{
+ struct rb_node *n = modules->rb_node;
+
+ while (n) {
+ struct module_info *m;
+ int cmp;
+
+ m = rb_entry(n, struct module_info, rb_node);
+ cmp = strcmp(name, m->name);
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return m;
+ }
+
+ return NULL;
+}
+
+static int __read_proc_modules(void *arg, const char *name, u64 start,
+ u64 size __maybe_unused)
+{
+ struct rb_root *modules = arg;
+ struct module_info *mi;
+
+ mi = zalloc(sizeof(struct module_info));
+ if (!mi)
+ return -ENOMEM;
+
+ mi->name = strdup(name);
+ mi->start = start;
+
+ if (!mi->name) {
+ free(mi);
+ return -ENOMEM;
+ }
+
+ add_module(mi, modules);
+
+ return 0;
+}
+
+static int read_proc_modules(const char *filename, struct rb_root *modules)
+{
+ if (symbol__restricted_filename(filename, "/proc/modules"))
+ return -1;
+
+ if (modules__parse(filename, modules, __read_proc_modules)) {
+ delete_modules(modules);
+ return -1;
+ }
+
+ return 0;
+}
+
+int compare_proc_modules(const char *from, const char *to)
+{
+ struct rb_root from_modules = RB_ROOT;
+ struct rb_root to_modules = RB_ROOT;
+ struct rb_node *from_node, *to_node;
+ struct module_info *from_m, *to_m;
+ int ret = -1;
+
+ if (read_proc_modules(from, &from_modules))
+ return -1;
+
+ if (read_proc_modules(to, &to_modules))
+ goto out_delete_from;
+
+ from_node = rb_first(&from_modules);
+ to_node = rb_first(&to_modules);
+ while (from_node) {
+ if (!to_node)
+ break;
+
+ from_m = rb_entry(from_node, struct module_info, rb_node);
+ to_m = rb_entry(to_node, struct module_info, rb_node);
+
+ if (from_m->start != to_m->start ||
+ strcmp(from_m->name, to_m->name))
+ break;
+
+ from_node = rb_next(from_node);
+ to_node = rb_next(to_node);
+ }
+
+ if (!from_node && !to_node)
+ ret = 0;
+
+ delete_modules(&to_modules);
+out_delete_from:
+ delete_modules(&from_modules);
+
+ return ret;
+}
+
+static int do_validate_kcore_modules(const char *filename, struct maps *kmaps)
+{
+ struct rb_root modules = RB_ROOT;
+ struct map_rb_node *old_node;
+ int err;
+
+ err = read_proc_modules(filename, &modules);
+ if (err)
+ return err;
+
+ maps__for_each_entry(kmaps, old_node) {
+ struct map *old_map = old_node->map;
+ struct module_info *mi;
+ struct dso *dso;
+
+ if (!__map__is_kmodule(old_map)) {
+ continue;
+ }
+ dso = map__dso(old_map);
+ /* Module must be in memory at the same address */
+ mi = find_module(dso->short_name, &modules);
+ if (!mi || mi->start != map__start(old_map)) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+out:
+ delete_modules(&modules);
+ return err;
+}
+
+/*
+ * If kallsyms is referenced by name then we look for filename in the same
+ * directory.
+ */
+static bool filename_from_kallsyms_filename(char *filename,
+ const char *base_name,
+ const char *kallsyms_filename)
+{
+ char *name;
+
+ strcpy(filename, kallsyms_filename);
+ name = strrchr(filename, '/');
+ if (!name)
+ return false;
+
+ name += 1;
+
+ if (!strcmp(name, "kallsyms")) {
+ strcpy(name, base_name);
+ return true;
+ }
+
+ return false;
+}
+
+static int validate_kcore_modules(const char *kallsyms_filename,
+ struct map *map)
+{
+ struct maps *kmaps = map__kmaps(map);
+ char modules_filename[PATH_MAX];
+
+ if (!kmaps)
+ return -EINVAL;
+
+ if (!filename_from_kallsyms_filename(modules_filename, "modules",
+ kallsyms_filename))
+ return -EINVAL;
+
+ if (do_validate_kcore_modules(modules_filename, kmaps))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int validate_kcore_addresses(const char *kallsyms_filename,
+ struct map *map)
+{
+ struct kmap *kmap = map__kmap(map);
+
+ if (!kmap)
+ return -EINVAL;
+
+ if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
+ u64 start;
+
+ if (kallsyms__get_function_start(kallsyms_filename,
+ kmap->ref_reloc_sym->name, &start))
+ return -ENOENT;
+ if (start != kmap->ref_reloc_sym->addr)
+ return -EINVAL;
+ }
+
+ return validate_kcore_modules(kallsyms_filename, map);
+}
+
+struct kcore_mapfn_data {
+ struct dso *dso;
+ struct list_head maps;
+};
+
+static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+{
+ struct kcore_mapfn_data *md = data;
+ struct map_list_node *list_node = map_list_node__new();
+
+ if (!list_node)
+ return -ENOMEM;
+
+ list_node->map = map__new2(start, md->dso);
+ if (!list_node->map) {
+ free(list_node);
+ return -ENOMEM;
+ }
+
+ map__set_end(list_node->map, map__start(list_node->map) + len);
+ map__set_pgoff(list_node->map, pgoff);
+
+ list_add(&list_node->node, &md->maps);
+
+ return 0;
+}
+
+/*
+ * Merges map into maps by splitting the new map within the existing map
+ * regions.
+ */
+int maps__merge_in(struct maps *kmaps, struct map *new_map)
+{
+ struct map_rb_node *rb_node;
+ LIST_HEAD(merged);
+ int err = 0;
+
+ maps__for_each_entry(kmaps, rb_node) {
+ struct map *old_map = rb_node->map;
+
+ /* no overload with this one */
+ if (map__end(new_map) < map__start(old_map) ||
+ map__start(new_map) >= map__end(old_map))
+ continue;
+
+ if (map__start(new_map) < map__start(old_map)) {
+ /*
+ * |new......
+ * |old....
+ */
+ if (map__end(new_map) < map__end(old_map)) {
+ /*
+ * |new......| -> |new..|
+ * |old....| -> |old....|
+ */
+ map__set_end(new_map, map__start(old_map));
+ } else {
+ /*
+ * |new.............| -> |new..| |new..|
+ * |old....| -> |old....|
+ */
+ struct map_list_node *m = map_list_node__new();
+
+ if (!m) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ m->map = map__clone(new_map);
+ if (!m->map) {
+ free(m);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ map__set_end(m->map, map__start(old_map));
+ list_add_tail(&m->node, &merged);
+ map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
+ map__set_start(new_map, map__end(old_map));
+ }
+ } else {
+ /*
+ * |new......
+ * |old....
+ */
+ if (map__end(new_map) < map__end(old_map)) {
+ /*
+ * |new..| -> x
+ * |old.........| -> |old.........|
+ */
+ map__put(new_map);
+ new_map = NULL;
+ break;
+ } else {
+ /*
+ * |new......| -> |new...|
+ * |old....| -> |old....|
+ */
+ map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
+ map__set_start(new_map, map__end(old_map));
+ }
+ }
+ }
+
+out:
+ while (!list_empty(&merged)) {
+ struct map_list_node *old_node;
+
+ old_node = list_entry(merged.next, struct map_list_node, node);
+ list_del_init(&old_node->node);
+ if (!err)
+ err = maps__insert(kmaps, old_node->map);
+ map__put(old_node->map);
+ free(old_node);
+ }
+
+ if (new_map) {
+ if (!err)
+ err = maps__insert(kmaps, new_map);
+ map__put(new_map);
+ }
+ return err;
+}
+
+static int dso__load_kcore(struct dso *dso, struct map *map,
+ const char *kallsyms_filename)
+{
+ struct maps *kmaps = map__kmaps(map);
+ struct kcore_mapfn_data md;
+ struct map *replacement_map = NULL;
+ struct map_rb_node *old_node, *next;
+ struct machine *machine;
+ bool is_64_bit;
+ int err, fd;
+ char kcore_filename[PATH_MAX];
+ u64 stext;
+
+ if (!kmaps)
+ return -EINVAL;
+
+ machine = maps__machine(kmaps);
+
+ /* This function requires that the map is the kernel map */
+ if (!__map__is_kernel(map))
+ return -EINVAL;
+
+ if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
+ kallsyms_filename))
+ return -EINVAL;
+
+ /* Modules and kernel must be present at their original addresses */
+ if (validate_kcore_addresses(kallsyms_filename, map))
+ return -EINVAL;
+
+ md.dso = dso;
+ INIT_LIST_HEAD(&md.maps);
+
+ fd = open(kcore_filename, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n",
+ kcore_filename);
+ return -EINVAL;
+ }
+
+ /* Read new maps into temporary lists */
+ err = file__read_maps(fd, map__prot(map) & PROT_EXEC, kcore_mapfn, &md,
+ &is_64_bit);
+ if (err)
+ goto out_err;
+ dso->is_64_bit = is_64_bit;
+
+ if (list_empty(&md.maps)) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ /* Remove old maps */
+ maps__for_each_entry_safe(kmaps, old_node, next) {
+ struct map *old_map = old_node->map;
+
+ /*
+ * We need to preserve eBPF maps even if they are
+ * covered by kcore, because we need to access
+ * eBPF dso for source data.
+ */
+ if (old_map != map && !__map__is_bpf_prog(old_map))
+ maps__remove(kmaps, old_map);
+ }
+ machine->trampolines_mapped = false;
+
+ /* Find the kernel map using the '_stext' symbol */
+ if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
+ u64 replacement_size = 0;
+ struct map_list_node *new_node;
+
+ list_for_each_entry(new_node, &md.maps, node) {
+ struct map *new_map = new_node->map;
+ u64 new_size = map__size(new_map);
+
+ if (!(stext >= map__start(new_map) && stext < map__end(new_map)))
+ continue;
+
+ /*
+ * On some architectures, ARM64 for example, the kernel
+ * text can get allocated inside of the vmalloc segment.
+ * Select the smallest matching segment, in case stext
+ * falls within more than one in the list.
+ */
+ if (!replacement_map || new_size < replacement_size) {
+ replacement_map = new_map;
+ replacement_size = new_size;
+ }
+ }
+ }
+
+ if (!replacement_map)
+ replacement_map = list_entry(md.maps.next, struct map_list_node, node)->map;
+
+ /* Add new maps */
+ while (!list_empty(&md.maps)) {
+ struct map_list_node *new_node = list_entry(md.maps.next, struct map_list_node, node);
+ struct map *new_map = new_node->map;
+
+ list_del_init(&new_node->node);
+
+ if (RC_CHK_ACCESS(new_map) == RC_CHK_ACCESS(replacement_map)) {
+ struct map *map_ref;
+
+ map__set_start(map, map__start(new_map));
+ map__set_end(map, map__end(new_map));
+ map__set_pgoff(map, map__pgoff(new_map));
+ map__set_map_ip(map, map__map_ip_ptr(new_map));
+ map__set_unmap_ip(map, map__unmap_ip_ptr(new_map));
+ /* Ensure maps are correctly ordered */
+ map_ref = map__get(map);
+ maps__remove(kmaps, map_ref);
+ err = maps__insert(kmaps, map_ref);
+ map__put(map_ref);
+ map__put(new_map);
+ if (err)
+ goto out_err;
+ } else {
+ /*
+ * Merge kcore map into existing maps,
+ * and ensure that current maps (eBPF)
+ * stay intact.
+ */
+ if (maps__merge_in(kmaps, new_map)) {
+ err = -EINVAL;
+ goto out_err;
+ }
+ }
+ free(new_node);
+ }
+
+ if (machine__is(machine, "x86_64")) {
+ u64 addr;
+
+ /*
+ * If one of the corresponding symbols is there, assume the
+ * entry trampoline maps are too.
+ */
+ if (!kallsyms__get_function_start(kallsyms_filename,
+ ENTRY_TRAMPOLINE_NAME,
+ &addr))
+ machine->trampolines_mapped = true;
+ }
+
+ /*
+ * Set the data type and long name so that kcore can be read via
+ * dso__data_read_addr().
+ */
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+ dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE;
+ else
+ dso->binary_type = DSO_BINARY_TYPE__KCORE;
+ dso__set_long_name(dso, strdup(kcore_filename), true);
+
+ close(fd);
+
+ if (map__prot(map) & PROT_EXEC)
+ pr_debug("Using %s for kernel object code\n", kcore_filename);
+ else
+ pr_debug("Using %s for kernel data\n", kcore_filename);
+
+ return 0;
+
+out_err:
+ while (!list_empty(&md.maps)) {
+ struct map_list_node *list_node;
+
+ list_node = list_entry(md.maps.next, struct map_list_node, node);
+ list_del_init(&list_node->node);
+ map__zput(list_node->map);
+ free(list_node);
+ }
+ close(fd);
+ return err;
+}
+
+/*
+ * If the kernel is relocated at boot time, kallsyms won't match. Compute the
+ * delta based on the relocation reference symbol.
+ */
+static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta)
+{
+ u64 addr;
+
+ if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
+ return 0;
+
+ if (kallsyms__get_function_start(filename, kmap->ref_reloc_sym->name, &addr))
+ return -1;
+
+ *delta = addr - kmap->ref_reloc_sym->addr;
+ return 0;
+}
+
+int __dso__load_kallsyms(struct dso *dso, const char *filename,
+ struct map *map, bool no_kcore)
+{
+ struct kmap *kmap = map__kmap(map);
+ u64 delta = 0;
+
+ if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+ return -1;
+
+ if (!kmap || !kmap->kmaps)
+ return -1;
+
+ if (dso__load_all_kallsyms(dso, filename) < 0)
+ return -1;
+
+ if (kallsyms__delta(kmap, filename, &delta))
+ return -1;
+
+ symbols__fixup_end(&dso->symbols, true);
+ symbols__fixup_duplicate(&dso->symbols);
+
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+ dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+ else
+ dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
+
+ if (!no_kcore && !dso__load_kcore(dso, map, filename))
+ return maps__split_kallsyms_for_kcore(kmap->kmaps, dso);
+ else
+ return maps__split_kallsyms(kmap->kmaps, dso, delta, map);
+}
+
+int dso__load_kallsyms(struct dso *dso, const char *filename,
+ struct map *map)
+{
+ return __dso__load_kallsyms(dso, filename, map, false);
+}
+
+static int dso__load_perf_map(const char *map_path, struct dso *dso)
+{
+ char *line = NULL;
+ size_t n;
+ FILE *file;
+ int nr_syms = 0;
+
+ file = fopen(map_path, "r");
+ if (file == NULL)
+ goto out_failure;
+
+ while (!feof(file)) {
+ u64 start, size;
+ struct symbol *sym;
+ int line_len, len;
+
+ line_len = getline(&line, &n, file);
+ if (line_len < 0)
+ break;
+
+ if (!line)
+ goto out_failure;
+
+ line[--line_len] = '\0'; /* \n */
+
+ len = hex2u64(line, &start);
+
+ len++;
+ if (len + 2 >= line_len)
+ continue;
+
+ len += hex2u64(line + len, &size);
+
+ len++;
+ if (len + 2 >= line_len)
+ continue;
+
+ sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len);
+
+ if (sym == NULL)
+ goto out_delete_line;
+
+ symbols__insert(&dso->symbols, sym);
+ nr_syms++;
+ }
+
+ free(line);
+ fclose(file);
+
+ return nr_syms;
+
+out_delete_line:
+ free(line);
+out_failure:
+ return -1;
+}
+
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+
+static int bfd_symbols__cmpvalue(const void *a, const void *b)
+{
+ const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b;
+
+ if (bfd_asymbol_value(as) != bfd_asymbol_value(bs))
+ return bfd_asymbol_value(as) - bfd_asymbol_value(bs);
+
+ return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0];
+}
+
+static int bfd2elf_binding(asymbol *symbol)
+{
+ if (symbol->flags & BSF_WEAK)
+ return STB_WEAK;
+ if (symbol->flags & BSF_GLOBAL)
+ return STB_GLOBAL;
+ if (symbol->flags & BSF_LOCAL)
+ return STB_LOCAL;
+ return -1;
+}
+
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
+{
+ int err = -1;
+ long symbols_size, symbols_count, i;
+ asection *section;
+ asymbol **symbols, *sym;
+ struct symbol *symbol;
+ bfd *abfd;
+ u64 start, len;
+
+ abfd = bfd_openr(debugfile, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__,
+ dso->long_name);
+ goto out_close;
+ }
+
+ if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
+ goto out_close;
+
+ symbols_size = bfd_get_symtab_upper_bound(abfd);
+ if (symbols_size == 0) {
+ bfd_close(abfd);
+ return 0;
+ }
+
+ if (symbols_size < 0)
+ goto out_close;
+
+ symbols = malloc(symbols_size);
+ if (!symbols)
+ goto out_close;
+
+ symbols_count = bfd_canonicalize_symtab(abfd, symbols);
+ if (symbols_count < 0)
+ goto out_free;
+
+ section = bfd_get_section_by_name(abfd, ".text");
+ if (section) {
+ for (i = 0; i < symbols_count; ++i) {
+ if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") ||
+ !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__"))
+ break;
+ }
+ if (i < symbols_count) {
+ /* PE symbols can only have 4 bytes, so use .text high bits */
+ dso->text_offset = section->vma - (u32)section->vma;
+ dso->text_offset += (u32)bfd_asymbol_value(symbols[i]);
+ } else {
+ dso->text_offset = section->vma - section->filepos;
+ }
+ }
+
+ qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
+
+#ifdef bfd_get_section
+#define bfd_asymbol_section bfd_get_section
+#endif
+ for (i = 0; i < symbols_count; ++i) {
+ sym = symbols[i];
+ section = bfd_asymbol_section(sym);
+ if (bfd2elf_binding(sym) < 0)
+ continue;
+
+ while (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section &&
+ bfd2elf_binding(symbols[i + 1]) < 0)
+ i++;
+
+ if (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section)
+ len = symbols[i + 1]->value - sym->value;
+ else
+ len = section->size - sym->value;
+
+ start = bfd_asymbol_value(sym) - dso->text_offset;
+ symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC,
+ bfd_asymbol_name(sym));
+ if (!symbol)
+ goto out_free;
+
+ symbols__insert(&dso->symbols, symbol);
+ }
+#ifdef bfd_get_section
+#undef bfd_asymbol_section
+#endif
+
+ symbols__fixup_end(&dso->symbols, false);
+ symbols__fixup_duplicate(&dso->symbols);
+ dso->adjust_symbols = 1;
+
+ err = 0;
+out_free:
+ free(symbols);
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+#endif
+
+static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
+ enum dso_binary_type type)
+{
+ switch (type) {
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ return !kmod && dso->kernel == DSO_SPACE__USER;
+
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__KCORE:
+ return dso->kernel == DSO_SPACE__KERNEL;
+
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ return dso->kernel == DSO_SPACE__KERNEL_GUEST;
+
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ /*
+ * kernel modules know their symtab type - it's set when
+ * creating a module dso in machine__addnew_module_map().
+ */
+ return kmod && dso->symtab_type == type;
+
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ return true;
+
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ default:
+ return false;
+ }
+}
+
+/* Checks for the existence of the perf-<pid>.map file in two different
+ * locations. First, if the process is a separate mount namespace, check in
+ * that namespace using the pid of the innermost pid namespace. If's not in a
+ * namespace, or the file can't be found there, try in the mount namespace of
+ * the tracing process using our view of its pid.
+ */
+static int dso__find_perf_map(char *filebuf, size_t bufsz,
+ struct nsinfo **nsip)
+{
+ struct nscookie nsc;
+ struct nsinfo *nsi;
+ struct nsinfo *nnsi;
+ int rc = -1;
+
+ nsi = *nsip;
+
+ if (nsinfo__need_setns(nsi)) {
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__nstgid(nsi));
+ nsinfo__mountns_enter(nsi, &nsc);
+ rc = access(filebuf, R_OK);
+ nsinfo__mountns_exit(&nsc);
+ if (rc == 0)
+ return rc;
+ }
+
+ nnsi = nsinfo__copy(nsi);
+ if (nnsi) {
+ nsinfo__put(nsi);
+
+ nsinfo__clear_need_setns(nnsi);
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__tgid(nnsi));
+ *nsip = nnsi;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int dso__load(struct dso *dso, struct map *map)
+{
+ char *name;
+ int ret = -1;
+ u_int i;
+ struct machine *machine = NULL;
+ char *root_dir = (char *) "";
+ int ss_pos = 0;
+ struct symsrc ss_[2];
+ struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
+ bool kmod;
+ bool perfmap;
+ struct build_id bid;
+ struct nscookie nsc;
+ char newmapname[PATH_MAX];
+ const char *map_path = dso->long_name;
+
+ mutex_lock(&dso->lock);
+ perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0;
+ if (perfmap) {
+ if (dso->nsinfo && (dso__find_perf_map(newmapname,
+ sizeof(newmapname), &dso->nsinfo) == 0)) {
+ map_path = newmapname;
+ }
+ }
+
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+
+ /* check again under the dso->lock */
+ if (dso__loaded(dso)) {
+ ret = 1;
+ goto out;
+ }
+
+ kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+ if (dso->kernel && !kmod) {
+ if (dso->kernel == DSO_SPACE__KERNEL)
+ ret = dso__load_kernel_sym(dso, map);
+ else if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+ ret = dso__load_guest_kernel_sym(dso, map);
+
+ machine = maps__machine(map__kmaps(map));
+ if (machine__is(machine, "x86_64"))
+ machine__map_x86_64_entry_trampolines(machine, dso);
+ goto out;
+ }
+
+ dso->adjust_symbols = 0;
+
+ if (perfmap) {
+ ret = dso__load_perf_map(map_path, dso);
+ dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
+ DSO_BINARY_TYPE__NOT_FOUND;
+ goto out;
+ }
+
+ if (machine)
+ root_dir = machine->root_dir;
+
+ name = malloc(PATH_MAX);
+ if (!name)
+ goto out;
+
+ /*
+ * Read the build id if possible. This is required for
+ * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
+ */
+ if (!dso->has_build_id &&
+ is_regular_file(dso->long_name)) {
+ __symbol__join_symfs(name, PATH_MAX, dso->long_name);
+ if (filename__read_build_id(name, &bid) > 0)
+ dso__set_build_id(dso, &bid);
+ }
+
+ /*
+ * Iterate over candidate debug images.
+ * Keep track of "interesting" ones (those which have a symtab, dynsym,
+ * and/or opd section) for processing.
+ */
+ for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) {
+ struct symsrc *ss = &ss_[ss_pos];
+ bool next_slot = false;
+ bool is_reg;
+ bool nsexit;
+ int bfdrc = -1;
+ int sirc = -1;
+
+ enum dso_binary_type symtab_type = binary_type_symtab[i];
+
+ nsexit = (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE ||
+ symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO);
+
+ if (!dso__is_compatible_symtab_type(dso, kmod, symtab_type))
+ continue;
+
+ if (dso__read_binary_type_filename(dso, symtab_type,
+ root_dir, name, PATH_MAX))
+ continue;
+
+ if (nsexit)
+ nsinfo__mountns_exit(&nsc);
+
+ is_reg = is_regular_file(name);
+ if (!is_reg && errno == ENOENT && dso->nsinfo) {
+ char *new_name = dso__filename_with_chroot(dso, name);
+ if (new_name) {
+ is_reg = is_regular_file(new_name);
+ strlcpy(name, new_name, PATH_MAX);
+ free(new_name);
+ }
+ }
+
+#ifdef HAVE_LIBBFD_SUPPORT
+ if (is_reg)
+ bfdrc = dso__load_bfd_symbols(dso, name);
+#endif
+ if (is_reg && bfdrc < 0)
+ sirc = symsrc__init(ss, dso, name, symtab_type);
+
+ if (nsexit)
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+
+ if (bfdrc == 0) {
+ ret = 0;
+ break;
+ }
+
+ if (!is_reg || sirc < 0)
+ continue;
+
+ if (!syms_ss && symsrc__has_symtab(ss)) {
+ syms_ss = ss;
+ next_slot = true;
+ if (!dso->symsrc_filename)
+ dso->symsrc_filename = strdup(name);
+ }
+
+ if (!runtime_ss && symsrc__possibly_runtime(ss)) {
+ runtime_ss = ss;
+ next_slot = true;
+ }
+
+ if (next_slot) {
+ ss_pos++;
+
+ if (syms_ss && runtime_ss)
+ break;
+ } else {
+ symsrc__destroy(ss);
+ }
+
+ }
+
+ if (!runtime_ss && !syms_ss)
+ goto out_free;
+
+ if (runtime_ss && !syms_ss) {
+ syms_ss = runtime_ss;
+ }
+
+ /* We'll have to hope for the best */
+ if (!runtime_ss && syms_ss)
+ runtime_ss = syms_ss;
+
+ if (syms_ss)
+ ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
+ else
+ ret = -1;
+
+ if (ret > 0) {
+ int nr_plt;
+
+ nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss);
+ if (nr_plt > 0)
+ ret += nr_plt;
+ }
+
+ for (; ss_pos > 0; ss_pos--)
+ symsrc__destroy(&ss_[ss_pos - 1]);
+out_free:
+ free(name);
+ if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
+ ret = 0;
+out:
+ dso__set_loaded(dso);
+ mutex_unlock(&dso->lock);
+ nsinfo__mountns_exit(&nsc);
+
+ return ret;
+}
+
+static int map__strcmp(const void *a, const void *b)
+{
+ const struct map *map_a = *(const struct map **)a;
+ const struct map *map_b = *(const struct map **)b;
+ const struct dso *dso_a = map__dso(map_a);
+ const struct dso *dso_b = map__dso(map_b);
+ int ret = strcmp(dso_a->short_name, dso_b->short_name);
+
+ if (ret == 0 && map_a != map_b) {
+ /*
+ * Ensure distinct but name equal maps have an order in part to
+ * aid reference counting.
+ */
+ ret = (int)map__start(map_a) - (int)map__start(map_b);
+ if (ret == 0)
+ ret = (int)((intptr_t)map_a - (intptr_t)map_b);
+ }
+
+ return ret;
+}
+
+static int map__strcmp_name(const void *name, const void *b)
+{
+ const struct dso *dso = map__dso(*(const struct map **)b);
+
+ return strcmp(name, dso->short_name);
+}
+
+void __maps__sort_by_name(struct maps *maps)
+{
+ qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp);
+}
+
+static int map__groups__sort_by_name_from_rbtree(struct maps *maps)
+{
+ struct map_rb_node *rb_node;
+ struct map **maps_by_name = realloc(maps__maps_by_name(maps),
+ maps__nr_maps(maps) * sizeof(struct map *));
+ int i = 0;
+
+ if (maps_by_name == NULL)
+ return -1;
+
+ up_read(maps__lock(maps));
+ down_write(maps__lock(maps));
+
+ RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name;
+ RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps);
+
+ maps__for_each_entry(maps, rb_node)
+ maps_by_name[i++] = map__get(rb_node->map);
+
+ __maps__sort_by_name(maps);
+
+ up_write(maps__lock(maps));
+ down_read(maps__lock(maps));
+
+ return 0;
+}
+
+static struct map *__maps__find_by_name(struct maps *maps, const char *name)
+{
+ struct map **mapp;
+
+ if (maps__maps_by_name(maps) == NULL &&
+ map__groups__sort_by_name_from_rbtree(maps))
+ return NULL;
+
+ mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps),
+ sizeof(*mapp), map__strcmp_name);
+ if (mapp)
+ return *mapp;
+ return NULL;
+}
+
+struct map *maps__find_by_name(struct maps *maps, const char *name)
+{
+ struct map_rb_node *rb_node;
+ struct map *map;
+
+ down_read(maps__lock(maps));
+
+
+ if (RC_CHK_ACCESS(maps)->last_search_by_name) {
+ const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name);
+
+ if (strcmp(dso->short_name, name) == 0) {
+ map = RC_CHK_ACCESS(maps)->last_search_by_name;
+ goto out_unlock;
+ }
+ }
+ /*
+ * If we have maps->maps_by_name, then the name isn't in the rbtree,
+ * as maps->maps_by_name mirrors the rbtree when lookups by name are
+ * made.
+ */
+ map = __maps__find_by_name(maps, name);
+ if (map || maps__maps_by_name(maps) != NULL)
+ goto out_unlock;
+
+ /* Fallback to traversing the rbtree... */
+ maps__for_each_entry(maps, rb_node) {
+ struct dso *dso;
+
+ map = rb_node->map;
+ dso = map__dso(map);
+ if (strcmp(dso->short_name, name) == 0) {
+ RC_CHK_ACCESS(maps)->last_search_by_name = map;
+ goto out_unlock;
+ }
+ }
+ map = NULL;
+
+out_unlock:
+ up_read(maps__lock(maps));
+ return map;
+}
+
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+ const char *vmlinux, bool vmlinux_allocated)
+{
+ int err = -1;
+ struct symsrc ss;
+ char symfs_vmlinux[PATH_MAX];
+ enum dso_binary_type symtab_type;
+
+ if (vmlinux[0] == '/')
+ snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s", vmlinux);
+ else
+ symbol__join_symfs(symfs_vmlinux, vmlinux);
+
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+ symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+ else
+ symtab_type = DSO_BINARY_TYPE__VMLINUX;
+
+ if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
+ return -1;
+
+ /*
+ * dso__load_sym() may copy 'dso' which will result in the copies having
+ * an incorrect long name unless we set it here first.
+ */
+ dso__set_long_name(dso, vmlinux, vmlinux_allocated);
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+ dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+ else
+ dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
+
+ err = dso__load_sym(dso, map, &ss, &ss, 0);
+ symsrc__destroy(&ss);
+
+ if (err > 0) {
+ dso__set_loaded(dso);
+ pr_debug("Using %s for symbols\n", symfs_vmlinux);
+ }
+
+ return err;
+}
+
+int dso__load_vmlinux_path(struct dso *dso, struct map *map)
+{
+ int i, err = 0;
+ char *filename = NULL;
+
+ pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+ vmlinux_path__nr_entries + 1);
+
+ for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+ err = dso__load_vmlinux(dso, map, vmlinux_path[i], false);
+ if (err > 0)
+ goto out;
+ }
+
+ if (!symbol_conf.ignore_vmlinux_buildid)
+ filename = dso__build_id_filename(dso, NULL, 0, false);
+ if (filename != NULL) {
+ err = dso__load_vmlinux(dso, map, filename, true);
+ if (err > 0)
+ goto out;
+ free(filename);
+ }
+out:
+ return err;
+}
+
+static bool visible_dir_filter(const char *name, struct dirent *d)
+{
+ if (d->d_type != DT_DIR)
+ return false;
+ return lsdir_no_dot_filter(name, d);
+}
+
+static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
+{
+ char kallsyms_filename[PATH_MAX];
+ int ret = -1;
+ struct strlist *dirs;
+ struct str_node *nd;
+
+ dirs = lsdir(dir, visible_dir_filter);
+ if (!dirs)
+ return -1;
+
+ strlist__for_each_entry(nd, dirs) {
+ scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
+ "%s/%s/kallsyms", dir, nd->s);
+ if (!validate_kcore_addresses(kallsyms_filename, map)) {
+ strlcpy(dir, kallsyms_filename, dir_sz);
+ ret = 0;
+ break;
+ }
+ }
+
+ strlist__delete(dirs);
+
+ return ret;
+}
+
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+ int fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return false;
+ close(fd);
+ return true;
+}
+
+static char *dso__find_kallsyms(struct dso *dso, struct map *map)
+{
+ struct build_id bid;
+ char sbuild_id[SBUILD_ID_SIZE];
+ bool is_host = false;
+ char path[PATH_MAX];
+
+ if (!dso->has_build_id) {
+ /*
+ * Last resort, if we don't have a build-id and couldn't find
+ * any vmlinux file, try the running kernel kallsyms table.
+ */
+ goto proc_kallsyms;
+ }
+
+ if (sysfs__read_build_id("/sys/kernel/notes", &bid) == 0)
+ is_host = dso__build_id_equal(dso, &bid);
+
+ /* Try a fast path for /proc/kallsyms if possible */
+ if (is_host) {
+ /*
+ * Do not check the build-id cache, unless we know we cannot use
+ * /proc/kcore or module maps don't match to /proc/kallsyms.
+ * To check readability of /proc/kcore, do not use access(R_OK)
+ * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+ * can't check it.
+ */
+ if (filename__readable("/proc/kcore") &&
+ !validate_kcore_addresses("/proc/kallsyms", map))
+ goto proc_kallsyms;
+ }
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+
+ /* Find kallsyms in build-id cache with kcore */
+ scnprintf(path, sizeof(path), "%s/%s/%s",
+ buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
+ if (!find_matching_kcore(map, path, sizeof(path)))
+ return strdup(path);
+
+ /* Use current /proc/kallsyms if possible */
+ if (is_host) {
+proc_kallsyms:
+ return strdup("/proc/kallsyms");
+ }
+
+ /* Finally, find a cache of kallsyms */
+ if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
+ pr_err("No kallsyms or vmlinux with build-id %s was found\n",
+ sbuild_id);
+ return NULL;
+ }
+
+ return strdup(path);
+}
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map)
+{
+ int err;
+ const char *kallsyms_filename = NULL;
+ char *kallsyms_allocated_filename = NULL;
+ char *filename = NULL;
+
+ /*
+ * Step 1: if the user specified a kallsyms or vmlinux filename, use
+ * it and only it, reporting errors to the user if it cannot be used.
+ *
+ * For instance, try to analyse an ARM perf.data file _without_ a
+ * build-id, or if the user specifies the wrong path to the right
+ * vmlinux file, obviously we can't fallback to another vmlinux (a
+ * x86_86 one, on the machine where analysis is being performed, say),
+ * or worse, /proc/kallsyms.
+ *
+ * If the specified file _has_ a build-id and there is a build-id
+ * section in the perf.data file, we will still do the expected
+ * validation in dso__load_vmlinux and will bail out if they don't
+ * match.
+ */
+ if (symbol_conf.kallsyms_name != NULL) {
+ kallsyms_filename = symbol_conf.kallsyms_name;
+ goto do_kallsyms;
+ }
+
+ if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
+ return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
+ }
+
+ /*
+ * Before checking on common vmlinux locations, check if it's
+ * stored as standard build id binary (not kallsyms) under
+ * .debug cache.
+ */
+ if (!symbol_conf.ignore_vmlinux_buildid)
+ filename = __dso__build_id_filename(dso, NULL, 0, false, false);
+ if (filename != NULL) {
+ err = dso__load_vmlinux(dso, map, filename, true);
+ if (err > 0)
+ return err;
+ free(filename);
+ }
+
+ if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
+ err = dso__load_vmlinux_path(dso, map);
+ if (err > 0)
+ return err;
+ }
+
+ /* do not try local files if a symfs was given */
+ if (symbol_conf.symfs[0] != 0)
+ return -1;
+
+ kallsyms_allocated_filename = dso__find_kallsyms(dso, map);
+ if (!kallsyms_allocated_filename)
+ return -1;
+
+ kallsyms_filename = kallsyms_allocated_filename;
+
+do_kallsyms:
+ err = dso__load_kallsyms(dso, kallsyms_filename, map);
+ if (err > 0)
+ pr_debug("Using %s for symbols\n", kallsyms_filename);
+ free(kallsyms_allocated_filename);
+
+ if (err > 0 && !dso__is_kcore(dso)) {
+ dso->binary_type = DSO_BINARY_TYPE__KALLSYMS;
+ dso__set_long_name(dso, DSO__NAME_KALLSYMS, false);
+ map__fixup_start(map);
+ map__fixup_end(map);
+ }
+
+ return err;
+}
+
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
+{
+ int err;
+ const char *kallsyms_filename;
+ struct machine *machine = maps__machine(map__kmaps(map));
+ char path[PATH_MAX];
+
+ if (machine->kallsyms_filename) {
+ kallsyms_filename = machine->kallsyms_filename;
+ } else if (machine__is_default_guest(machine)) {
+ /*
+ * if the user specified a vmlinux filename, use it and only
+ * it, reporting errors to the user if it cannot be used.
+ * Or use file guest_kallsyms inputted by user on commandline
+ */
+ if (symbol_conf.default_guest_vmlinux_name != NULL) {
+ err = dso__load_vmlinux(dso, map,
+ symbol_conf.default_guest_vmlinux_name,
+ false);
+ return err;
+ }
+
+ kallsyms_filename = symbol_conf.default_guest_kallsyms;
+ if (!kallsyms_filename)
+ return -1;
+ } else {
+ sprintf(path, "%s/proc/kallsyms", machine->root_dir);
+ kallsyms_filename = path;
+ }
+
+ err = dso__load_kallsyms(dso, kallsyms_filename, map);
+ if (err > 0)
+ pr_debug("Using %s for symbols\n", kallsyms_filename);
+ if (err > 0 && !dso__is_kcore(dso)) {
+ dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+ dso__set_long_name(dso, machine->mmap_name, false);
+ map__fixup_start(map);
+ map__fixup_end(map);
+ }
+
+ return err;
+}
+
+static void vmlinux_path__exit(void)
+{
+ while (--vmlinux_path__nr_entries >= 0)
+ zfree(&vmlinux_path[vmlinux_path__nr_entries]);
+ vmlinux_path__nr_entries = 0;
+
+ zfree(&vmlinux_path);
+}
+
+static const char * const vmlinux_paths[] = {
+ "vmlinux",
+ "/boot/vmlinux"
+};
+
+static const char * const vmlinux_paths_upd[] = {
+ "/boot/vmlinux-%s",
+ "/usr/lib/debug/boot/vmlinux-%s",
+ "/lib/modules/%s/build/vmlinux",
+ "/usr/lib/debug/lib/modules/%s/vmlinux",
+ "/usr/lib/debug/boot/vmlinux-%s.debug"
+};
+
+static int vmlinux_path__add(const char *new_entry)
+{
+ vmlinux_path[vmlinux_path__nr_entries] = strdup(new_entry);
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ return -1;
+ ++vmlinux_path__nr_entries;
+
+ return 0;
+}
+
+static int vmlinux_path__init(struct perf_env *env)
+{
+ struct utsname uts;
+ char bf[PATH_MAX];
+ char *kernel_version;
+ unsigned int i;
+
+ vmlinux_path = malloc(sizeof(char *) * (ARRAY_SIZE(vmlinux_paths) +
+ ARRAY_SIZE(vmlinux_paths_upd)));
+ if (vmlinux_path == NULL)
+ return -1;
+
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths); i++)
+ if (vmlinux_path__add(vmlinux_paths[i]) < 0)
+ goto out_fail;
+
+ /* only try kernel version if no symfs was given */
+ if (symbol_conf.symfs[0] != 0)
+ return 0;
+
+ if (env) {
+ kernel_version = env->os_release;
+ } else {
+ if (uname(&uts) < 0)
+ goto out_fail;
+
+ kernel_version = uts.release;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths_upd); i++) {
+ snprintf(bf, sizeof(bf), vmlinux_paths_upd[i], kernel_version);
+ if (vmlinux_path__add(bf) < 0)
+ goto out_fail;
+ }
+
+ return 0;
+
+out_fail:
+ vmlinux_path__exit();
+ return -1;
+}
+
+int setup_list(struct strlist **list, const char *list_str,
+ const char *list_name)
+{
+ if (list_str == NULL)
+ return 0;
+
+ *list = strlist__new(list_str, NULL);
+ if (!*list) {
+ pr_err("problems parsing %s list\n", list_name);
+ return -1;
+ }
+
+ symbol_conf.has_filter = true;
+ return 0;
+}
+
+int setup_intlist(struct intlist **list, const char *list_str,
+ const char *list_name)
+{
+ if (list_str == NULL)
+ return 0;
+
+ *list = intlist__new(list_str);
+ if (!*list) {
+ pr_err("problems parsing %s list\n", list_name);
+ return -1;
+ }
+ return 0;
+}
+
+static int setup_addrlist(struct intlist **addr_list, struct strlist *sym_list)
+{
+ struct str_node *pos, *tmp;
+ unsigned long val;
+ char *sep;
+ const char *end;
+ int i = 0, err;
+
+ *addr_list = intlist__new(NULL);
+ if (!*addr_list)
+ return -1;
+
+ strlist__for_each_entry_safe(pos, tmp, sym_list) {
+ errno = 0;
+ val = strtoul(pos->s, &sep, 16);
+ if (errno || (sep == pos->s))
+ continue;
+
+ if (*sep != '\0') {
+ end = pos->s + strlen(pos->s) - 1;
+ while (end >= sep && isspace(*end))
+ end--;
+
+ if (end >= sep)
+ continue;
+ }
+
+ err = intlist__add(*addr_list, val);
+ if (err)
+ break;
+
+ strlist__remove(sym_list, pos);
+ i++;
+ }
+
+ if (i == 0) {
+ intlist__delete(*addr_list);
+ *addr_list = NULL;
+ }
+
+ return 0;
+}
+
+static bool symbol__read_kptr_restrict(void)
+{
+ bool value = false;
+ FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+
+ if (fp != NULL) {
+ char line[8];
+
+ if (fgets(line, sizeof(line), fp) != NULL)
+ value = perf_cap__capable(CAP_SYSLOG) ?
+ (atoi(line) >= 2) :
+ (atoi(line) != 0);
+
+ fclose(fp);
+ }
+
+ /* Per kernel/kallsyms.c:
+ * we also restrict when perf_event_paranoid > 1 w/o CAP_SYSLOG
+ */
+ if (perf_event_paranoid() > 1 && !perf_cap__capable(CAP_SYSLOG))
+ value = true;
+
+ return value;
+}
+
+int symbol__annotation_init(void)
+{
+ if (symbol_conf.init_annotation)
+ return 0;
+
+ if (symbol_conf.initialized) {
+ pr_err("Annotation needs to be init before symbol__init()\n");
+ return -1;
+ }
+
+ symbol_conf.priv_size += sizeof(struct annotation);
+ symbol_conf.init_annotation = true;
+ return 0;
+}
+
+int symbol__init(struct perf_env *env)
+{
+ const char *symfs;
+
+ if (symbol_conf.initialized)
+ return 0;
+
+ symbol_conf.priv_size = PERF_ALIGN(symbol_conf.priv_size, sizeof(u64));
+
+ symbol__elf_init();
+
+ if (symbol_conf.try_vmlinux_path && vmlinux_path__init(env) < 0)
+ return -1;
+
+ if (symbol_conf.field_sep && *symbol_conf.field_sep == '.') {
+ pr_err("'.' is the only non valid --field-separator argument\n");
+ return -1;
+ }
+
+ if (setup_list(&symbol_conf.dso_list,
+ symbol_conf.dso_list_str, "dso") < 0)
+ return -1;
+
+ if (setup_list(&symbol_conf.comm_list,
+ symbol_conf.comm_list_str, "comm") < 0)
+ goto out_free_dso_list;
+
+ if (setup_intlist(&symbol_conf.pid_list,
+ symbol_conf.pid_list_str, "pid") < 0)
+ goto out_free_comm_list;
+
+ if (setup_intlist(&symbol_conf.tid_list,
+ symbol_conf.tid_list_str, "tid") < 0)
+ goto out_free_pid_list;
+
+ if (setup_list(&symbol_conf.sym_list,
+ symbol_conf.sym_list_str, "symbol") < 0)
+ goto out_free_tid_list;
+
+ if (symbol_conf.sym_list &&
+ setup_addrlist(&symbol_conf.addr_list, symbol_conf.sym_list) < 0)
+ goto out_free_sym_list;
+
+ if (setup_list(&symbol_conf.bt_stop_list,
+ symbol_conf.bt_stop_list_str, "symbol") < 0)
+ goto out_free_sym_list;
+
+ /*
+ * A path to symbols of "/" is identical to ""
+ * reset here for simplicity.
+ */
+ symfs = realpath(symbol_conf.symfs, NULL);
+ if (symfs == NULL)
+ symfs = symbol_conf.symfs;
+ if (strcmp(symfs, "/") == 0)
+ symbol_conf.symfs = "";
+ if (symfs != symbol_conf.symfs)
+ free((void *)symfs);
+
+ symbol_conf.kptr_restrict = symbol__read_kptr_restrict();
+
+ symbol_conf.initialized = true;
+ return 0;
+
+out_free_sym_list:
+ strlist__delete(symbol_conf.sym_list);
+ intlist__delete(symbol_conf.addr_list);
+out_free_tid_list:
+ intlist__delete(symbol_conf.tid_list);
+out_free_pid_list:
+ intlist__delete(symbol_conf.pid_list);
+out_free_comm_list:
+ strlist__delete(symbol_conf.comm_list);
+out_free_dso_list:
+ strlist__delete(symbol_conf.dso_list);
+ return -1;
+}
+
+void symbol__exit(void)
+{
+ if (!symbol_conf.initialized)
+ return;
+ strlist__delete(symbol_conf.bt_stop_list);
+ strlist__delete(symbol_conf.sym_list);
+ strlist__delete(symbol_conf.dso_list);
+ strlist__delete(symbol_conf.comm_list);
+ intlist__delete(symbol_conf.tid_list);
+ intlist__delete(symbol_conf.pid_list);
+ intlist__delete(symbol_conf.addr_list);
+ vmlinux_path__exit();
+ symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
+ symbol_conf.bt_stop_list = NULL;
+ symbol_conf.initialized = false;
+}
+
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+ const char *dir, int unset __maybe_unused)
+{
+ char *bf = NULL;
+ int ret;
+
+ symbol_conf.symfs = strdup(dir);
+ if (symbol_conf.symfs == NULL)
+ return -ENOMEM;
+
+ /* skip the locally configured cache if a symfs is given, and
+ * config buildid dir to symfs/.debug
+ */
+ ret = asprintf(&bf, "%s/%s", dir, ".debug");
+ if (ret < 0)
+ return -ENOMEM;
+
+ set_buildid_dir(bf);
+
+ free(bf);
+ return 0;
+}
+
+struct mem_info *mem_info__get(struct mem_info *mi)
+{
+ if (mi)
+ refcount_inc(&mi->refcnt);
+ return mi;
+}
+
+void mem_info__put(struct mem_info *mi)
+{
+ if (mi && refcount_dec_and_test(&mi->refcnt))
+ free(mi);
+}
+
+struct mem_info *mem_info__new(void)
+{
+ struct mem_info *mi = zalloc(sizeof(*mi));
+
+ if (mi)
+ refcount_set(&mi->refcnt, 1);
+ return mi;
+}
+
+/*
+ * Checks that user supplied symbol kernel files are accessible because
+ * the default mechanism for accessing elf files fails silently. i.e. if
+ * debug syms for a build ID aren't found perf carries on normally. When
+ * they are user supplied we should assume that the user doesn't want to
+ * silently fail.
+ */
+int symbol__validate_sym_arguments(void)
+{
+ if (symbol_conf.vmlinux_name &&
+ access(symbol_conf.vmlinux_name, R_OK)) {
+ pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
+ return -EINVAL;
+ }
+ if (symbol_conf.kallsyms_name &&
+ access(symbol_conf.kallsyms_name, R_OK)) {
+ pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name);
+ return -EINVAL;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
new file mode 100644
index 000000000..af87c46b3
--- /dev/null
+++ b/tools/perf/util/symbol.h
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMBOL
+#define __PERF_SYMBOL 1
+
+#include <linux/types.h>
+#include <linux/refcount.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include "addr_location.h"
+#include "path.h"
+#include "symbol_conf.h"
+#include "spark.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+#include <libelf.h>
+#include <gelf.h>
+#endif
+#include <elf.h>
+
+struct dso;
+struct map;
+struct maps;
+struct option;
+struct build_id;
+
+/*
+ * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
+ * for newer versions we can use mmap to reduce memory usage:
+ */
+#ifdef ELF_C_READ_MMAP
+# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define PERF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+#ifdef HAVE_LIBELF_SUPPORT
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ GElf_Shdr *shp, const char *name, size_t *idx);
+#endif
+
+/**
+ * A symtab entry. When allocated this may be preceded by an annotation (see
+ * symbol__annotation) and/or a browser_index (see symbol__browser_index).
+ */
+struct symbol {
+ struct rb_node rb_node;
+ /** Range of symbol [start, end). */
+ u64 start;
+ u64 end;
+ /** Length of the string name. */
+ u16 namelen;
+ /** ELF symbol type as defined for st_info. E.g STT_OBJECT or STT_FUNC. */
+ u8 type:4;
+ /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
+ u8 binding:4;
+ /** Set true for kernel symbols of idle routines. */
+ u8 idle:1;
+ /** Resolvable but tools ignore it (e.g. idle routines). */
+ u8 ignore:1;
+ /** Symbol for an inlined function. */
+ u8 inlined:1;
+ /** Has symbol__annotate2 been performed. */
+ u8 annotate2:1;
+ /** Symbol is an alias of an STT_GNU_IFUNC */
+ u8 ifunc_alias:1;
+ /** Architecture specific. Unused except on PPC where it holds st_other. */
+ u8 arch_sym;
+ /** The name of length namelen associated with the symbol. */
+ char name[];
+};
+
+void symbol__delete(struct symbol *sym);
+void symbols__delete(struct rb_root_cached *symbols);
+
+/* symbols__for_each_entry - iterate over symbols (rb_root)
+ *
+ * @symbols: the rb_root of symbols
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @nd: the 'struct rb_node *' to use as a temporary storage
+ */
+#define symbols__for_each_entry(symbols, pos, nd) \
+ for (nd = rb_first_cached(symbols); \
+ nd && (pos = rb_entry(nd, struct symbol, rb_node)); \
+ nd = rb_next(nd))
+
+static inline size_t symbol__size(const struct symbol *sym)
+{
+ return sym->end - sym->start;
+}
+
+struct strlist;
+struct intlist;
+
+static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
+{
+ return path__join(bf, size, symbol_conf.symfs, path);
+}
+
+#define symbol__join_symfs(bf, path) __symbol__join_symfs(bf, sizeof(bf), path)
+
+extern int vmlinux_path__nr_entries;
+extern char **vmlinux_path;
+
+static inline void *symbol__priv(struct symbol *sym)
+{
+ return ((void *)sym) - symbol_conf.priv_size;
+}
+
+struct ref_reloc_sym {
+ const char *name;
+ u64 addr;
+ u64 unrelocated_addr;
+};
+
+int dso__load(struct dso *dso, struct map *map);
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+ const char *vmlinux, bool vmlinux_allocated);
+int dso__load_vmlinux_path(struct dso *dso, struct map *map);
+int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
+ bool no_kcore);
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
+
+void dso__insert_symbol(struct dso *dso,
+ struct symbol *sym);
+void dso__delete_symbol(struct dso *dso,
+ struct symbol *sym);
+
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
+struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr);
+
+struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name, size_t *idx);
+
+struct symbol *dso__first_symbol(struct dso *dso);
+struct symbol *dso__last_symbol(struct dso *dso);
+struct symbol *dso__next_symbol(struct symbol *sym);
+
+enum dso_type dso__type_fd(int fd);
+
+int filename__read_build_id(const char *filename, struct build_id *id);
+int sysfs__read_build_id(const char *filename, struct build_id *bid);
+int modules__parse(const char *filename, void *arg,
+ int (*process_module)(void *arg, const char *name,
+ u64 start, u64 size));
+int filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size);
+bool filename__has_section(const char *filename, const char *sec);
+
+struct perf_env;
+int symbol__init(struct perf_env *env);
+void symbol__exit(void);
+void symbol__elf_init(void);
+int symbol__annotation_init(void);
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name);
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr,
+ bool print_offsets, FILE *fp);
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al, FILE *fp);
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp);
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
+size_t symbol__fprintf(struct symbol *sym, FILE *fp);
+bool symbol__restricted_filename(const char *filename,
+ const char *restricted_filename);
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+ const char *dir, int unset __maybe_unused);
+
+struct symsrc;
+
+#ifdef HAVE_LIBBFD_SUPPORT
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile);
+#endif
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ struct symsrc *runtime_ss, int kmodule);
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
+
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
+ bool kernel);
+void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
+void symbols__fixup_duplicate(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
+void maps__fixup_end(struct maps *maps);
+
+typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+ bool *is_64_bit);
+
+#define PERF_KCORE_EXTRACT "/tmp/perf-kcore-XXXXXX"
+
+struct kcore_extract {
+ char *kcore_filename;
+ u64 addr;
+ u64 offs;
+ u64 len;
+ char extract_filename[sizeof(PERF_KCORE_EXTRACT)];
+ int fd;
+};
+
+int kcore_extract__create(struct kcore_extract *kce);
+void kcore_extract__delete(struct kcore_extract *kce);
+
+int kcore_copy(const char *from_dir, const char *to_dir);
+int compare_proc_modules(const char *from, const char *to);
+
+int setup_list(struct strlist **list, const char *list_str,
+ const char *list_name);
+int setup_intlist(struct intlist **list, const char *list_str,
+ const char *list_name);
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
+#endif
+
+const char *arch__normalize_symbol_name(const char *name);
+#define SYMBOL_A 0
+#define SYMBOL_B 1
+
+int arch__compare_symbol_names(const char *namea, const char *nameb);
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+ unsigned int n);
+int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
+
+enum symbol_tag_include {
+ SYMBOL_TAG_INCLUDE__NONE = 0,
+ SYMBOL_TAG_INCLUDE__DEFAULT_ONLY
+};
+
+int symbol__match_symbol_name(const char *namea, const char *nameb,
+ enum symbol_tag_include includes);
+
+/* structure containing an SDT note's info */
+struct sdt_note {
+ char *name; /* name of the note*/
+ char *provider; /* provider name */
+ char *args;
+ bool bit32; /* whether the location is 32 bits? */
+ union { /* location, base and semaphore addrs */
+ Elf64_Addr a64[3];
+ Elf32_Addr a32[3];
+ } addr;
+ struct list_head note_list; /* SDT notes' list */
+};
+
+int get_sdt_note_list(struct list_head *head, const char *target);
+int cleanup_sdt_note_list(struct list_head *sdt_notes);
+int sdt_notes__get_count(struct list_head *start);
+
+#define SDT_PROBES_SCN ".probes"
+#define SDT_BASE_SCN ".stapsdt.base"
+#define SDT_NOTE_SCN ".note.stapsdt"
+#define SDT_NOTE_TYPE 3
+#define SDT_NOTE_NAME "stapsdt"
+#define NR_ADDR 3
+
+enum {
+ SDT_NOTE_IDX_LOC = 0,
+ SDT_NOTE_IDX_BASE,
+ SDT_NOTE_IDX_REFCTR,
+};
+
+struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__get(struct mem_info *mi);
+void mem_info__put(struct mem_info *mi);
+
+static inline void __mem_info__zput(struct mem_info **mi)
+{
+ mem_info__put(*mi);
+ *mi = NULL;
+}
+
+#define mem_info__zput(mi) __mem_info__zput(&mi)
+
+int symbol__validate_sym_arguments(void);
+
+#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
new file mode 100644
index 000000000..0b589570d
--- /dev/null
+++ b/tools/perf/util/symbol_conf.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMBOL_CONF
+#define __PERF_SYMBOL_CONF 1
+
+#include <stdbool.h>
+
+struct strlist;
+struct intlist;
+
+struct symbol_conf {
+ bool nanosecs;
+ unsigned short priv_size;
+ bool try_vmlinux_path,
+ init_annotation,
+ force,
+ ignore_vmlinux,
+ ignore_vmlinux_buildid,
+ show_kernel_path,
+ use_modules,
+ allow_aliases,
+ show_nr_samples,
+ show_total_period,
+ use_callchain,
+ cumulate_callchain,
+ show_branchflag_count,
+ exclude_other,
+ show_cpu_utilization,
+ initialized,
+ kptr_restrict,
+ event_group,
+ demangle,
+ demangle_kernel,
+ filter_relative,
+ show_hist_headers,
+ has_filter,
+ show_ref_callgraph,
+ hide_unresolved,
+ raw_trace,
+ report_hierarchy,
+ report_block,
+ report_individual_block,
+ inline_name,
+ disable_add2line_warn,
+ buildid_mmap2,
+ guest_code;
+ const char *vmlinux_name,
+ *kallsyms_name,
+ *source_prefix,
+ *field_sep,
+ *graph_function;
+ const char *default_guest_vmlinux_name,
+ *default_guest_kallsyms,
+ *default_guest_modules;
+ const char *guestmount;
+ const char *dso_list_str,
+ *comm_list_str,
+ *pid_list_str,
+ *tid_list_str,
+ *sym_list_str,
+ *col_width_list_str,
+ *bt_stop_list_str;
+ char *addr2line_path;
+ unsigned long time_quantum;
+ struct strlist *dso_list,
+ *comm_list,
+ *sym_list,
+ *dso_from_list,
+ *dso_to_list,
+ *sym_from_list,
+ *sym_to_list,
+ *bt_stop_list;
+ struct intlist *pid_list,
+ *tid_list,
+ *addr_list;
+ const char *symfs;
+ int res_sample;
+ int pad_output_len_dso;
+ int group_sort_idx;
+ int addr_range;
+};
+
+extern struct symbol_conf symbol_conf;
+
+#endif // __PERF_SYMBOL_CONF
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
new file mode 100644
index 000000000..088f4abf2
--- /dev/null
+++ b/tools/perf/util/symbol_fprintf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "dso.h"
+#include "map.h"
+#include "symbol.h"
+
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+{
+ return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
+ sym->start, sym->end,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w',
+ sym->name);
+}
+
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr,
+ bool print_offsets, FILE *fp)
+{
+ unsigned long offset;
+ size_t length;
+
+ if (sym) {
+ length = fprintf(fp, "%s", sym->name);
+ if (al && print_offsets) {
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - map__start(al->map) - sym->start;
+ length += fprintf(fp, "+0x%lx", offset);
+ }
+ return length;
+ } else if (al && unknown_as_addr)
+ return fprintf(fp, "[%#" PRIx64 "]", al->addr);
+ else
+ return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, al, false, true, fp);
+}
+
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp);
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp);
+}
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+ FILE *fp)
+{
+ size_t ret = 0;
+
+ for (size_t i = 0; i < dso->symbol_names_len; i++) {
+ struct symbol *pos = dso->symbol_names[i];
+
+ ret += fprintf(fp, "%s\n", pos->name);
+ }
+ return ret;
+}
diff --git a/tools/perf/util/symsrc.h b/tools/perf/util/symsrc.h
new file mode 100644
index 000000000..edf82028c
--- /dev/null
+++ b/tools/perf/util/symsrc.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMSRC_
+#define __PERF_SYMSRC_ 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include "dso.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+#include <libelf.h>
+#include <gelf.h>
+#endif
+#include <elf.h>
+
+struct symsrc {
+ char *name;
+ int fd;
+ enum dso_binary_type type;
+
+#ifdef HAVE_LIBELF_SUPPORT
+ Elf *elf;
+ GElf_Ehdr ehdr;
+
+ Elf_Scn *opdsec;
+ size_t opdidx;
+ GElf_Shdr opdshdr;
+
+ Elf_Scn *symtab;
+ size_t symtab_idx;
+ GElf_Shdr symshdr;
+
+ Elf_Scn *dynsym;
+ size_t dynsym_idx;
+ GElf_Shdr dynshdr;
+
+ bool adjust_symbols;
+ bool is_64_bit;
+#endif
+};
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type);
+void symsrc__destroy(struct symsrc *ss);
+
+bool symsrc__has_symtab(struct symsrc *ss);
+bool symsrc__possibly_runtime(struct symsrc *ss);
+
+#endif /* __PERF_SYMSRC_ */
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
new file mode 100644
index 000000000..a0579c7d7
--- /dev/null
+++ b/tools/perf/util/synthetic-events.c
@@ -0,0 +1,2416 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/cgroup.h"
+#include "util/data.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/map.h"
+#include "util/map_symbol.h"
+#include "util/branch.h"
+#include "util/memswap.h"
+#include "util/namespaces.h"
+#include "util/session.h"
+#include "util/stat.h"
+#include "util/symbol.h"
+#include "util/synthetic-events.h"
+#include "util/target.h"
+#include "util/time-utils.h"
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/perf_event.h>
+#include <asm/bug.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <internal/lib.h> // page_size
+#include <internal/threadmap.h>
+#include <perf/threadmap.h>
+#include <symbol/kallsyms.h>
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
+#include <api/io.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
+
+unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
+
+int perf_tool__process_synth_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct machine *machine,
+ perf_event__handler_t process)
+{
+ struct perf_sample synth_sample = {
+ .pid = -1,
+ .tid = -1,
+ .time = -1,
+ .stream_id = -1,
+ .cpu = -1,
+ .period = 1,
+ .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+ };
+
+ return process(tool, event, &synth_sample, machine);
+};
+
+/*
+ * Assumes that the first 4095 bytes of /proc/pid/stat contains
+ * the comm, tgid and ppid.
+ */
+static int perf_event__get_comm_ids(pid_t pid, pid_t tid, char *comm, size_t len,
+ pid_t *tgid, pid_t *ppid, bool *kernel)
+{
+ char bf[4096];
+ int fd;
+ size_t size = 0;
+ ssize_t n;
+ char *name, *tgids, *ppids, *vmpeak, *threads;
+
+ *tgid = -1;
+ *ppid = -1;
+
+ if (pid)
+ snprintf(bf, sizeof(bf), "/proc/%d/task/%d/status", pid, tid);
+ else
+ snprintf(bf, sizeof(bf), "/proc/%d/status", tid);
+
+ fd = open(bf, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("couldn't open %s\n", bf);
+ return -1;
+ }
+
+ n = read(fd, bf, sizeof(bf) - 1);
+ close(fd);
+ if (n <= 0) {
+ pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
+ tid);
+ return -1;
+ }
+ bf[n] = '\0';
+
+ name = strstr(bf, "Name:");
+ tgids = strstr(name ?: bf, "Tgid:");
+ ppids = strstr(tgids ?: bf, "PPid:");
+ vmpeak = strstr(ppids ?: bf, "VmPeak:");
+
+ if (vmpeak)
+ threads = NULL;
+ else
+ threads = strstr(ppids ?: bf, "Threads:");
+
+ if (name) {
+ char *nl;
+
+ name = skip_spaces(name + 5); /* strlen("Name:") */
+ nl = strchr(name, '\n');
+ if (nl)
+ *nl = '\0';
+
+ size = strlen(name);
+ if (size >= len)
+ size = len - 1;
+ memcpy(comm, name, size);
+ comm[size] = '\0';
+ } else {
+ pr_debug("Name: string not found for pid %d\n", tid);
+ }
+
+ if (tgids) {
+ tgids += 5; /* strlen("Tgid:") */
+ *tgid = atoi(tgids);
+ } else {
+ pr_debug("Tgid: string not found for pid %d\n", tid);
+ }
+
+ if (ppids) {
+ ppids += 5; /* strlen("PPid:") */
+ *ppid = atoi(ppids);
+ } else {
+ pr_debug("PPid: string not found for pid %d\n", tid);
+ }
+
+ if (!vmpeak && threads)
+ *kernel = true;
+ else
+ *kernel = false;
+
+ return 0;
+}
+
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t tid,
+ struct machine *machine,
+ pid_t *tgid, pid_t *ppid, bool *kernel)
+{
+ size_t size;
+
+ *ppid = -1;
+
+ memset(&event->comm, 0, sizeof(event->comm));
+
+ if (machine__is_host(machine)) {
+ if (perf_event__get_comm_ids(pid, tid, event->comm.comm,
+ sizeof(event->comm.comm),
+ tgid, ppid, kernel) != 0) {
+ return -1;
+ }
+ } else {
+ *tgid = machine->pid;
+ }
+
+ if (*tgid < 0)
+ return -1;
+
+ event->comm.pid = *tgid;
+ event->comm.header.type = PERF_RECORD_COMM;
+
+ size = strlen(event->comm.comm) + 1;
+ size = PERF_ALIGN(size, sizeof(u64));
+ memset(event->comm.comm + size, 0, machine->id_hdr_size);
+ event->comm.header.size = (sizeof(event->comm) -
+ (sizeof(event->comm.comm) - size) +
+ machine->id_hdr_size);
+ event->comm.tid = tid;
+
+ return 0;
+}
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+ union perf_event *event, pid_t pid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ pid_t tgid, ppid;
+ bool kernel_thread;
+
+ if (perf_event__prepare_comm(event, 0, pid, machine, &tgid, &ppid,
+ &kernel_thread) != 0)
+ return -1;
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return tgid;
+}
+
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+ struct perf_ns_link_info *ns_link_info)
+{
+ struct stat64 st;
+ char proc_ns[128];
+
+ sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+ if (stat64(proc_ns, &st) == 0) {
+ ns_link_info->dev = st.st_dev;
+ ns_link_info->ino = st.st_ino;
+ }
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ u32 idx;
+ struct perf_ns_link_info *ns_link_info;
+
+ if (!tool || !tool->namespace_events)
+ return 0;
+
+ memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size));
+
+ event->namespaces.pid = tgid;
+ event->namespaces.tid = pid;
+
+ event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+ ns_link_info = event->namespaces.link_info;
+
+ for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+ perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+ &ns_link_info[idx]);
+
+ event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+ event->namespaces.header.size = (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int perf_event__synthesize_fork(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid, pid_t ppid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
+
+ /*
+ * for main thread set parent to ppid from status file. For other
+ * threads set parent pid to main thread. ie., assume main thread
+ * spawns all threads in a process
+ */
+ if (tgid == pid) {
+ event->fork.ppid = ppid;
+ event->fork.ptid = ppid;
+ } else {
+ event->fork.ppid = tgid;
+ event->fork.ptid = tgid;
+ }
+ event->fork.pid = tgid;
+ event->fork.tid = pid;
+ event->fork.header.type = PERF_RECORD_FORK;
+ event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
+
+ event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return 0;
+}
+
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+ u32 *prot, u32 *flags, __u64 *offset,
+ u32 *maj, u32 *min,
+ __u64 *inode,
+ ssize_t pathname_size, char *pathname)
+{
+ __u64 temp;
+ int ch;
+ char *start_pathname = pathname;
+
+ if (io__get_hex(io, start) != '-')
+ return false;
+ if (io__get_hex(io, end) != ' ')
+ return false;
+
+ /* map protection and flags bits */
+ *prot = 0;
+ ch = io__get_char(io);
+ if (ch == 'r')
+ *prot |= PROT_READ;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 'w')
+ *prot |= PROT_WRITE;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 'x')
+ *prot |= PROT_EXEC;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 's')
+ *flags = MAP_SHARED;
+ else if (ch == 'p')
+ *flags = MAP_PRIVATE;
+ else
+ return false;
+ if (io__get_char(io) != ' ')
+ return false;
+
+ if (io__get_hex(io, offset) != ' ')
+ return false;
+
+ if (io__get_hex(io, &temp) != ':')
+ return false;
+ *maj = temp;
+ if (io__get_hex(io, &temp) != ' ')
+ return false;
+ *min = temp;
+
+ ch = io__get_dec(io, inode);
+ if (ch != ' ') {
+ *pathname = '\0';
+ return ch == '\n';
+ }
+ do {
+ ch = io__get_char(io);
+ } while (ch == ' ');
+ while (true) {
+ if (ch < 0)
+ return false;
+ if (ch == '\0' || ch == '\n' ||
+ (pathname + 1 - start_pathname) >= pathname_size) {
+ *pathname = '\0';
+ return true;
+ }
+ *pathname++ = ch;
+ ch = io__get_char(io);
+ }
+}
+
+static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
+ struct machine *machine,
+ bool is_kernel)
+{
+ struct build_id bid;
+ struct nsinfo *nsi;
+ struct nscookie nc;
+ struct dso *dso = NULL;
+ struct dso_id id;
+ int rc;
+
+ if (is_kernel) {
+ rc = sysfs__read_build_id("/sys/kernel/notes", &bid);
+ goto out;
+ }
+
+ id.maj = event->maj;
+ id.min = event->min;
+ id.ino = event->ino;
+ id.ino_generation = event->ino_generation;
+
+ dso = dsos__findnew_id(&machine->dsos, event->filename, &id);
+ if (dso && dso->has_build_id) {
+ bid = dso->bid;
+ rc = 0;
+ goto out;
+ }
+
+ nsi = nsinfo__new(event->pid);
+ nsinfo__mountns_enter(nsi, &nc);
+
+ rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
+
+ nsinfo__mountns_exit(&nc);
+ nsinfo__put(nsi);
+
+out:
+ if (rc == 0) {
+ memcpy(event->build_id, bid.data, sizeof(bid.data));
+ event->build_id_size = (u8) bid.size;
+ event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
+ event->__reserved_1 = 0;
+ event->__reserved_2 = 0;
+
+ if (dso && !dso->has_build_id)
+ dso__set_build_id(dso, &bid);
+ } else {
+ if (event->filename[0] == '/') {
+ pr_debug2("Failed to read build ID for %s\n",
+ event->filename);
+ }
+ }
+ dso__put(dso);
+}
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool mmap_data)
+{
+ unsigned long long t;
+ char bf[BUFSIZ];
+ struct io io;
+ bool truncation = false;
+ unsigned long long timeout = proc_map_timeout * 1000000ULL;
+ int rc = 0;
+ const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+ int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
+
+ if (machine__is_default_guest(machine))
+ return 0;
+
+ snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
+ machine->root_dir, pid, pid);
+
+ io.fd = open(bf, O_RDONLY, 0);
+ if (io.fd < 0) {
+ /*
+ * We raced with a task exiting - just return:
+ */
+ pr_debug("couldn't open %s\n", bf);
+ return -1;
+ }
+ io__init(&io, io.fd, bf, sizeof(bf));
+
+ event->header.type = PERF_RECORD_MMAP2;
+ t = rdclock();
+
+ while (!io.eof) {
+ static const char anonstr[] = "//anon";
+ size_t size, aligned_size;
+
+ /* ensure null termination since stack will be reused. */
+ event->mmap2.filename[0] = '\0';
+
+ /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
+ if (!read_proc_maps_line(&io,
+ &event->mmap2.start,
+ &event->mmap2.len,
+ &event->mmap2.prot,
+ &event->mmap2.flags,
+ &event->mmap2.pgoff,
+ &event->mmap2.maj,
+ &event->mmap2.min,
+ &event->mmap2.ino,
+ sizeof(event->mmap2.filename),
+ event->mmap2.filename))
+ continue;
+
+ if ((rdclock() - t) > timeout) {
+ pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
+ "You may want to increase "
+ "the time limit by --proc-map-timeout\n",
+ machine->root_dir, pid, pid);
+ truncation = true;
+ goto out;
+ }
+
+ event->mmap2.ino_generation = 0;
+
+ /*
+ * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+ */
+ if (machine__is_host(machine))
+ event->header.misc = PERF_RECORD_MISC_USER;
+ else
+ event->header.misc = PERF_RECORD_MISC_GUEST_USER;
+
+ if ((event->mmap2.prot & PROT_EXEC) == 0) {
+ if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
+ continue;
+
+ event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+ }
+
+out:
+ if (truncation)
+ event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
+ if (!strcmp(event->mmap2.filename, ""))
+ strcpy(event->mmap2.filename, anonstr);
+
+ if (hugetlbfs_mnt_len &&
+ !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+ hugetlbfs_mnt_len)) {
+ strcpy(event->mmap2.filename, anonstr);
+ event->mmap2.flags |= MAP_HUGETLB;
+ }
+
+ size = strlen(event->mmap2.filename) + 1;
+ aligned_size = PERF_ALIGN(size, sizeof(u64));
+ event->mmap2.len -= event->mmap.start;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - aligned_size));
+ memset(event->mmap2.filename + size, 0, machine->id_hdr_size +
+ (aligned_size - size));
+ event->mmap2.header.size += machine->id_hdr_size;
+ event->mmap2.pid = tgid;
+ event->mmap2.tid = pid;
+
+ if (symbol_conf.buildid_mmap2)
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+ rc = -1;
+ break;
+ }
+
+ if (truncation)
+ break;
+ }
+
+ close(io.fd);
+ return rc;
+}
+
+#ifdef HAVE_FILE_HANDLE
+static int perf_event__synthesize_cgroup(struct perf_tool *tool,
+ union perf_event *event,
+ char *path, size_t mount_len,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path);
+ size_t path_len = strlen(path) - mount_len + 1;
+ struct {
+ struct file_handle fh;
+ uint64_t cgroup_id;
+ } handle;
+ int mount_id;
+
+ while (path_len % sizeof(u64))
+ path[mount_len + path_len++] = '\0';
+
+ memset(&event->cgroup, 0, event_size);
+
+ event->cgroup.header.type = PERF_RECORD_CGROUP;
+ event->cgroup.header.size = event_size + path_len + machine->id_hdr_size;
+
+ handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+ if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) {
+ pr_debug("stat failed: %s\n", path);
+ return -1;
+ }
+
+ event->cgroup.id = handle.cgroup_id;
+ strncpy(event->cgroup.path, path + mount_len, path_len);
+ memset(event->cgroup.path + path_len, 0, machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) < 0) {
+ pr_debug("process synth event failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
+ union perf_event *event,
+ char *path, size_t mount_len,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ size_t pos = strlen(path);
+ DIR *d;
+ struct dirent *dent;
+ int ret = 0;
+
+ if (perf_event__synthesize_cgroup(tool, event, path, mount_len,
+ process, machine) < 0)
+ return -1;
+
+ d = opendir(path);
+ if (d == NULL) {
+ pr_debug("failed to open directory: %s\n", path);
+ return -1;
+ }
+
+ while ((dent = readdir(d)) != NULL) {
+ if (dent->d_type != DT_DIR)
+ continue;
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ /* any sane path should be less than PATH_MAX */
+ if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX)
+ continue;
+
+ if (path[pos - 1] != '/')
+ strcat(path, "/");
+ strcat(path, dent->d_name);
+
+ ret = perf_event__walk_cgroup_tree(tool, event, path,
+ mount_len, process, machine);
+ if (ret < 0)
+ break;
+
+ path[pos] = '\0';
+ }
+
+ closedir(d);
+ return ret;
+}
+
+int perf_event__synthesize_cgroups(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event;
+ char cgrp_root[PATH_MAX];
+ size_t mount_len; /* length of mount point in the path */
+
+ if (!tool || !tool->cgroup_events)
+ return 0;
+
+ if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) {
+ pr_debug("cannot find cgroup mount point\n");
+ return -1;
+ }
+
+ mount_len = strlen(cgrp_root);
+ /* make sure the path starts with a slash (after mount point) */
+ strcat(cgrp_root, "/");
+
+ if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len,
+ process, machine) < 0)
+ return -1;
+
+ return 0;
+}
+#else
+int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return -1;
+}
+#endif
+
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
+ struct machine *machine)
+{
+ int rc = 0;
+ struct map_rb_node *pos;
+ struct maps *maps = machine__kernel_maps(machine);
+ union perf_event *event;
+ size_t size = symbol_conf.buildid_mmap2 ?
+ sizeof(event->mmap2) : sizeof(event->mmap);
+
+ event = zalloc(size + machine->id_hdr_size);
+ if (event == NULL) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for kernel modules\n");
+ return -1;
+ }
+
+ /*
+ * kernel uses 0 for user space maps, see kernel/perf_event.c
+ * __perf_event_mmap
+ */
+ if (machine__is_host(machine))
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
+ else
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+ maps__for_each_entry(maps, pos) {
+ struct map *map = pos->map;
+ struct dso *dso;
+
+ if (!__map__is_kmodule(map))
+ continue;
+
+ dso = map__dso(map);
+ if (symbol_conf.buildid_mmap2) {
+ size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64));
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size));
+ memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+ event->mmap2.header.size += machine->id_hdr_size;
+ event->mmap2.start = map__start(map);
+ event->mmap2.len = map__size(map);
+ event->mmap2.pid = machine->pid;
+
+ memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1);
+
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
+ } else {
+ size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64));
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size));
+ memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+ event->mmap.header.size += machine->id_hdr_size;
+ event->mmap.start = map__start(map);
+ event->mmap.len = map__size(map);
+ event->mmap.pid = machine->pid;
+
+ memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1);
+ }
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+ rc = -1;
+ break;
+ }
+ }
+
+ free(event);
+ return rc;
+}
+
+static int filter_task(const struct dirent *dirent)
+{
+ return isdigit(dirent->d_name[0]);
+}
+
+static int __event__synthesize_thread(union perf_event *comm_event,
+ union perf_event *mmap_event,
+ union perf_event *fork_event,
+ union perf_event *namespaces_event,
+ pid_t pid, int full, perf_event__handler_t process,
+ struct perf_tool *tool, struct machine *machine,
+ bool needs_mmap, bool mmap_data)
+{
+ char filename[PATH_MAX];
+ struct dirent **dirent;
+ pid_t tgid, ppid;
+ int rc = 0;
+ int i, n;
+
+ /* special case: only send one comm event using passed in pid */
+ if (!full) {
+ tgid = perf_event__synthesize_comm(tool, comm_event, pid,
+ process, machine);
+
+ if (tgid == -1)
+ return -1;
+
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+ tgid, process, machine) < 0)
+ return -1;
+
+ /*
+ * send mmap only for thread group leader
+ * see thread__init_maps()
+ */
+ if (pid == tgid && needs_mmap &&
+ perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+ process, machine, mmap_data))
+ return -1;
+
+ return 0;
+ }
+
+ if (machine__is_default_guest(machine))
+ return 0;
+
+ snprintf(filename, sizeof(filename), "%s/proc/%d/task",
+ machine->root_dir, pid);
+
+ n = scandir(filename, &dirent, filter_task, NULL);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ char *end;
+ pid_t _pid;
+ bool kernel_thread = false;
+
+ _pid = strtol(dirent[i]->d_name, &end, 10);
+ if (*end)
+ continue;
+
+ /* some threads may exit just after scan, ignore it */
+ if (perf_event__prepare_comm(comm_event, pid, _pid, machine,
+ &tgid, &ppid, &kernel_thread) != 0)
+ continue;
+
+ rc = -1;
+ if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
+ ppid, process, machine) < 0)
+ break;
+
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+ tgid, process, machine) < 0)
+ break;
+
+ /*
+ * Send the prepared comm event
+ */
+ if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
+ break;
+
+ rc = 0;
+ if (_pid == pid && !kernel_thread && needs_mmap) {
+ /* process the parent's maps too */
+ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+ process, machine, mmap_data);
+ if (rc)
+ break;
+ }
+ }
+
+ for (i = 0; i < n; i++)
+ zfree(&dirent[i]);
+ free(dirent);
+
+ return rc;
+}
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+ struct perf_thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool needs_mmap, bool mmap_data)
+{
+ union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
+ int err = -1, thread, j;
+
+ comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+ if (comm_event == NULL)
+ goto out;
+
+ mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+ if (mmap_event == NULL)
+ goto out_free_comm;
+
+ fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+ if (fork_event == NULL)
+ goto out_free_mmap;
+
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
+ err = 0;
+ for (thread = 0; thread < threads->nr; ++thread) {
+ if (__event__synthesize_thread(comm_event, mmap_event,
+ fork_event, namespaces_event,
+ perf_thread_map__pid(threads, thread), 0,
+ process, tool, machine,
+ needs_mmap, mmap_data)) {
+ err = -1;
+ break;
+ }
+
+ /*
+ * comm.pid is set to thread group id by
+ * perf_event__synthesize_comm
+ */
+ if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) {
+ bool need_leader = true;
+
+ /* is thread group leader in thread_map? */
+ for (j = 0; j < threads->nr; ++j) {
+ if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) {
+ need_leader = false;
+ break;
+ }
+ }
+
+ /* if not, generate events for it */
+ if (need_leader &&
+ __event__synthesize_thread(comm_event, mmap_event,
+ fork_event, namespaces_event,
+ comm_event->comm.pid, 0,
+ process, tool, machine,
+ needs_mmap, mmap_data)) {
+ err = -1;
+ break;
+ }
+ }
+ }
+ free(namespaces_event);
+out_free_fork:
+ free(fork_event);
+out_free_mmap:
+ free(mmap_event);
+out_free_comm:
+ free(comm_event);
+out:
+ return err;
+}
+
+static int __perf_event__synthesize_threads(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool needs_mmap,
+ bool mmap_data,
+ struct dirent **dirent,
+ int start,
+ int num)
+{
+ union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
+ int err = -1;
+ char *end;
+ pid_t pid;
+ int i;
+
+ comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+ if (comm_event == NULL)
+ goto out;
+
+ mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+ if (mmap_event == NULL)
+ goto out_free_comm;
+
+ fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+ if (fork_event == NULL)
+ goto out_free_mmap;
+
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
+ for (i = start; i < start + num; i++) {
+ if (!isdigit(dirent[i]->d_name[0]))
+ continue;
+
+ pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
+ /* only interested in proper numerical dirents */
+ if (*end)
+ continue;
+ /*
+ * We may race with exiting thread, so don't stop just because
+ * one thread couldn't be synthesized.
+ */
+ __event__synthesize_thread(comm_event, mmap_event, fork_event,
+ namespaces_event, pid, 1, process,
+ tool, machine, needs_mmap, mmap_data);
+ }
+ err = 0;
+
+ free(namespaces_event);
+out_free_fork:
+ free(fork_event);
+out_free_mmap:
+ free(mmap_event);
+out_free_comm:
+ free(comm_event);
+out:
+ return err;
+}
+
+struct synthesize_threads_arg {
+ struct perf_tool *tool;
+ perf_event__handler_t process;
+ struct machine *machine;
+ bool needs_mmap;
+ bool mmap_data;
+ struct dirent **dirent;
+ int num;
+ int start;
+};
+
+static void *synthesize_threads_worker(void *arg)
+{
+ struct synthesize_threads_arg *args = arg;
+
+ __perf_event__synthesize_threads(args->tool, args->process,
+ args->machine,
+ args->needs_mmap, args->mmap_data,
+ args->dirent,
+ args->start, args->num);
+ return NULL;
+}
+
+int perf_event__synthesize_threads(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool needs_mmap, bool mmap_data,
+ unsigned int nr_threads_synthesize)
+{
+ struct synthesize_threads_arg *args = NULL;
+ pthread_t *synthesize_threads = NULL;
+ char proc_path[PATH_MAX];
+ struct dirent **dirent;
+ int num_per_thread;
+ int m, n, i, j;
+ int thread_nr;
+ int base = 0;
+ int err = -1;
+
+
+ if (machine__is_default_guest(machine))
+ return 0;
+
+ snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
+ n = scandir(proc_path, &dirent, filter_task, NULL);
+ if (n < 0)
+ return err;
+
+ if (nr_threads_synthesize == UINT_MAX)
+ thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
+ else
+ thread_nr = nr_threads_synthesize;
+
+ if (thread_nr <= 1) {
+ err = __perf_event__synthesize_threads(tool, process,
+ machine,
+ needs_mmap, mmap_data,
+ dirent, base, n);
+ goto free_dirent;
+ }
+ if (thread_nr > n)
+ thread_nr = n;
+
+ synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+ if (synthesize_threads == NULL)
+ goto free_dirent;
+
+ args = calloc(sizeof(*args), thread_nr);
+ if (args == NULL)
+ goto free_threads;
+
+ num_per_thread = n / thread_nr;
+ m = n % thread_nr;
+ for (i = 0; i < thread_nr; i++) {
+ args[i].tool = tool;
+ args[i].process = process;
+ args[i].machine = machine;
+ args[i].needs_mmap = needs_mmap;
+ args[i].mmap_data = mmap_data;
+ args[i].dirent = dirent;
+ }
+ for (i = 0; i < m; i++) {
+ args[i].num = num_per_thread + 1;
+ args[i].start = i * args[i].num;
+ }
+ if (i != 0)
+ base = args[i-1].start + args[i-1].num;
+ for (j = i; j < thread_nr; j++) {
+ args[j].num = num_per_thread;
+ args[j].start = base + (j - i) * args[i].num;
+ }
+
+ for (i = 0; i < thread_nr; i++) {
+ if (pthread_create(&synthesize_threads[i], NULL,
+ synthesize_threads_worker, &args[i]))
+ goto out_join;
+ }
+ err = 0;
+out_join:
+ for (i = 0; i < thread_nr; i++)
+ pthread_join(synthesize_threads[i], NULL);
+ free(args);
+free_threads:
+ free(synthesize_threads);
+free_dirent:
+ for (i = 0; i < n; i++)
+ zfree(&dirent[i]);
+ free(dirent);
+
+ return err;
+}
+
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event *event;
+ size_t size = symbol_conf.buildid_mmap2 ?
+ sizeof(event->mmap2) : sizeof(event->mmap);
+ struct map *map = machine__kernel_map(machine);
+ struct kmap *kmap;
+ int err;
+
+ if (map == NULL)
+ return -1;
+
+ kmap = map__kmap(map);
+ if (!kmap->ref_reloc_sym)
+ return -1;
+
+ /*
+ * We should get this from /sys/kernel/sections/.text, but till that is
+ * available use this, and after it is use this as a fallback for older
+ * kernels.
+ */
+ event = zalloc(size + machine->id_hdr_size);
+ if (event == NULL) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for kernel modules\n");
+ return -1;
+ }
+
+ if (machine__is_host(machine)) {
+ /*
+ * kernel uses PERF_RECORD_MISC_USER for user space maps,
+ * see kernel/perf_event.c __perf_event_mmap
+ */
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
+ } else {
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+ }
+
+ if (symbol_conf.buildid_mmap2) {
+ size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename),
+ "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+ size = PERF_ALIGN(size, sizeof(u64));
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size) + machine->id_hdr_size);
+ event->mmap2.pgoff = kmap->ref_reloc_sym->addr;
+ event->mmap2.start = map__start(map);
+ event->mmap2.len = map__end(map) - event->mmap.start;
+ event->mmap2.pid = machine->pid;
+
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, true);
+ } else {
+ size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
+ "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+ size = PERF_ALIGN(size, sizeof(u64));
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
+ event->mmap.pgoff = kmap->ref_reloc_sym->addr;
+ event->mmap.start = map__start(map);
+ event->mmap.len = map__end(map) - event->mmap.start;
+ event->mmap.pid = machine->pid;
+ }
+
+ err = perf_tool__process_synth_event(tool, event, machine, process);
+ free(event);
+
+ return err;
+}
+
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ int err;
+
+ err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+ if (err < 0)
+ return err;
+
+ return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+ struct perf_thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event *event;
+ int i, err, size;
+
+ size = sizeof(event->thread_map);
+ size += threads->nr * sizeof(event->thread_map.entries[0]);
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_THREAD_MAP;
+ event->header.size = size;
+ event->thread_map.nr = threads->nr;
+
+ for (i = 0; i < threads->nr; i++) {
+ struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i];
+ char *comm = perf_thread_map__comm(threads, i);
+
+ if (!comm)
+ comm = (char *) "";
+
+ entry->pid = perf_thread_map__pid(threads, i);
+ strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+ }
+
+ err = process(tool, event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+struct synthesize_cpu_map_data {
+ const struct perf_cpu_map *map;
+ int nr;
+ int min_cpu;
+ int max_cpu;
+ int has_any_cpu;
+ int type;
+ size_t size;
+ struct perf_record_cpu_map_data *data;
+};
+
+static void synthesize_cpus(struct synthesize_cpu_map_data *data)
+{
+ data->data->type = PERF_CPU_MAP__CPUS;
+ data->data->cpus_data.nr = data->nr;
+ for (int i = 0; i < data->nr; i++)
+ data->data->cpus_data.cpu[i] = perf_cpu_map__cpu(data->map, i).cpu;
+}
+
+static void synthesize_mask(struct synthesize_cpu_map_data *data)
+{
+ int idx;
+ struct perf_cpu cpu;
+
+ /* Due to padding, the 4bytes per entry mask variant is always smaller. */
+ data->data->type = PERF_CPU_MAP__MASK;
+ data->data->mask32_data.nr = BITS_TO_U32(data->max_cpu);
+ data->data->mask32_data.long_size = 4;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, data->map) {
+ int bit_word = cpu.cpu / 32;
+ u32 bit_mask = 1U << (cpu.cpu & 31);
+
+ data->data->mask32_data.mask[bit_word] |= bit_mask;
+ }
+}
+
+static void synthesize_range_cpus(struct synthesize_cpu_map_data *data)
+{
+ data->data->type = PERF_CPU_MAP__RANGE_CPUS;
+ data->data->range_cpu_data.any_cpu = data->has_any_cpu;
+ data->data->range_cpu_data.start_cpu = data->min_cpu;
+ data->data->range_cpu_data.end_cpu = data->max_cpu;
+}
+
+static void *cpu_map_data__alloc(struct synthesize_cpu_map_data *syn_data,
+ size_t header_size)
+{
+ size_t size_cpus, size_mask;
+
+ syn_data->nr = perf_cpu_map__nr(syn_data->map);
+ syn_data->has_any_cpu = (perf_cpu_map__cpu(syn_data->map, 0).cpu == -1) ? 1 : 0;
+
+ syn_data->min_cpu = perf_cpu_map__cpu(syn_data->map, syn_data->has_any_cpu).cpu;
+ syn_data->max_cpu = perf_cpu_map__max(syn_data->map).cpu;
+ if (syn_data->max_cpu - syn_data->min_cpu + 1 == syn_data->nr - syn_data->has_any_cpu) {
+ /* A consecutive range of CPUs can be encoded using a range. */
+ assert(sizeof(u16) + sizeof(struct perf_record_range_cpu_map) == sizeof(u64));
+ syn_data->type = PERF_CPU_MAP__RANGE_CPUS;
+ syn_data->size = header_size + sizeof(u64);
+ return zalloc(syn_data->size);
+ }
+
+ size_cpus = sizeof(u16) + sizeof(struct cpu_map_entries) + syn_data->nr * sizeof(u16);
+ /* Due to padding, the 4bytes per entry mask variant is always smaller. */
+ size_mask = sizeof(u16) + sizeof(struct perf_record_mask_cpu_map32) +
+ BITS_TO_U32(syn_data->max_cpu) * sizeof(__u32);
+ if (syn_data->has_any_cpu || size_cpus < size_mask) {
+ /* Follow the CPU map encoding. */
+ syn_data->type = PERF_CPU_MAP__CPUS;
+ syn_data->size = header_size + PERF_ALIGN(size_cpus, sizeof(u64));
+ return zalloc(syn_data->size);
+ }
+ /* Encode using a bitmask. */
+ syn_data->type = PERF_CPU_MAP__MASK;
+ syn_data->size = header_size + PERF_ALIGN(size_mask, sizeof(u64));
+ return zalloc(syn_data->size);
+}
+
+static void cpu_map_data__synthesize(struct synthesize_cpu_map_data *data)
+{
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
+ synthesize_cpus(data);
+ break;
+ case PERF_CPU_MAP__MASK:
+ synthesize_mask(data);
+ break;
+ case PERF_CPU_MAP__RANGE_CPUS:
+ synthesize_range_cpus(data);
+ break;
+ default:
+ break;
+ }
+}
+
+static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map)
+{
+ struct synthesize_cpu_map_data syn_data = { .map = map };
+ struct perf_record_cpu_map *event;
+
+
+ event = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header));
+ if (!event)
+ return NULL;
+
+ syn_data.data = &event->data;
+ event->header.type = PERF_RECORD_CPU_MAP;
+ event->header.size = syn_data.size;
+ cpu_map_data__synthesize(&syn_data);
+ return event;
+}
+
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+ const struct perf_cpu_map *map,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct perf_record_cpu_map *event;
+ int err;
+
+ event = cpu_map_event__new(map);
+ if (!event)
+ return -ENOMEM;
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+ struct perf_stat_config *config,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct perf_record_stat_config *event;
+ int size, i = 0, err;
+
+ size = sizeof(*event);
+ size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_STAT_CONFIG;
+ event->header.size = size;
+ event->nr = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val) \
+ event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \
+ event->data[i].val = __val; \
+ i++;
+
+ ADD(AGGR_MODE, config->aggr_mode)
+ ADD(INTERVAL, config->interval)
+ ADD(SCALE, config->scale)
+ ADD(AGGR_LEVEL, config->aggr_level)
+
+ WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+ "stat config terms unbalanced\n");
+#undef ADD
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+ struct perf_cpu cpu, u32 thread, u64 id,
+ struct perf_counts_values *count,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct perf_record_stat event;
+
+ event.header.type = PERF_RECORD_STAT;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.id = id;
+ event.cpu = cpu.cpu;
+ event.thread = thread;
+ event.val = count->val;
+ event.ena = count->ena;
+ event.run = count->run;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+ u64 evtime, u64 type,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct perf_record_stat_round event;
+
+ event.header.type = PERF_RECORD_STAT_ROUND;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.time = evtime;
+ event.type = type;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format)
+{
+ size_t sz, result = sizeof(struct perf_record_sample);
+
+ if (type & PERF_SAMPLE_IDENTIFIER)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_IP)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_TID)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_TIME)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_ADDR)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_ID)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_STREAM_ID)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_CPU)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_PERIOD)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_READ) {
+ result += sizeof(u64);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ result += sizeof(u64);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ result += sizeof(u64);
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_GROUP) {
+ sz = sample_read_value_size(read_format);
+ result += sz * sample->read.group.nr;
+ } else {
+ result += sizeof(u64);
+ if (read_format & PERF_FORMAT_LOST)
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ sz = (sample->callchain->nr + 1) * sizeof(u64);
+ result += sz;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ result += sizeof(u32);
+ result += sample->raw_size;
+ }
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
+ result += sz;
+ }
+
+ if (type & PERF_SAMPLE_REGS_USER) {
+ if (sample->user_regs.abi) {
+ result += sizeof(u64);
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+ result += sz;
+ } else {
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_STACK_USER) {
+ sz = sample->user_stack.size;
+ result += sizeof(u64);
+ if (sz) {
+ result += sz;
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT_TYPE)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_DATA_SRC)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_TRANSACTION)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ if (sample->intr_regs.abi) {
+ result += sizeof(u64);
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+ result += sz;
+ } else {
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_PHYS_ADDR)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_CGROUP)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_AUX) {
+ result += sizeof(u64);
+ result += sample->aux_sample.size;
+ }
+
+ return result;
+}
+
+void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+ __u64 *array, u64 type __maybe_unused)
+{
+ *array = data->weight;
+}
+
+static __u64 *copy_read_group_values(__u64 *array, __u64 read_format,
+ const struct perf_sample *sample)
+{
+ size_t sz = sample_read_value_size(read_format);
+ struct sample_read_value *v = sample->read.group.values;
+
+ sample_read_group__for_each(v, sample->read.group.nr, read_format) {
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ memcpy(array, v, sz);
+ array = (void *)array + sz;
+ }
+ return array;
+}
+
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format,
+ const struct perf_sample *sample)
+{
+ __u64 *array;
+ size_t sz;
+ /*
+ * used for cross-endian analysis. See git commit 65014ab3
+ * for why this goofiness is needed.
+ */
+ union u64_swap u;
+
+ array = event->sample.array;
+
+ if (type & PERF_SAMPLE_IDENTIFIER) {
+ *array = sample->id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_IP) {
+ *array = sample->ip;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val32[0] = sample->pid;
+ u.val32[1] = sample->tid;
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ *array = sample->time;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ *array = sample->addr;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ *array = sample->id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ *array = sample->stream_id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u.val32[0] = sample->cpu;
+ u.val32[1] = 0;
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ *array = sample->period;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_READ) {
+ if (read_format & PERF_FORMAT_GROUP)
+ *array = sample->read.group.nr;
+ else
+ *array = sample->read.one.value;
+ array++;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ *array = sample->read.time_enabled;
+ array++;
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ *array = sample->read.time_running;
+ array++;
+ }
+
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_GROUP) {
+ array = copy_read_group_values(array, read_format,
+ sample);
+ } else {
+ *array = sample->read.one.id;
+ array++;
+
+ if (read_format & PERF_FORMAT_LOST) {
+ *array = sample->read.one.lost;
+ array++;
+ }
+ }
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ sz = (sample->callchain->nr + 1) * sizeof(u64);
+ memcpy(array, sample->callchain, sz);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ u.val32[0] = sample->raw_size;
+ *array = u.val64;
+ array = (void *)array + sizeof(u32);
+
+ memcpy(array, sample->raw_data, sample->raw_size);
+ array = (void *)array + sample->raw_size;
+ }
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
+ memcpy(array, sample->branch_stack, sz);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_REGS_USER) {
+ if (sample->user_regs.abi) {
+ *array++ = sample->user_regs.abi;
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+ memcpy(array, sample->user_regs.regs, sz);
+ array = (void *)array + sz;
+ } else {
+ *array++ = 0;
+ }
+ }
+
+ if (type & PERF_SAMPLE_STACK_USER) {
+ sz = sample->user_stack.size;
+ *array++ = sz;
+ if (sz) {
+ memcpy(array, sample->user_stack.data, sz);
+ array = (void *)array + sz;
+ *array++ = sz;
+ }
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT_TYPE) {
+ arch_perf_synthesize_sample_weight(sample, array, type);
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_DATA_SRC) {
+ *array = sample->data_src;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TRANSACTION) {
+ *array = sample->transaction;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ if (sample->intr_regs.abi) {
+ *array++ = sample->intr_regs.abi;
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+ memcpy(array, sample->intr_regs.regs, sz);
+ array = (void *)array + sz;
+ } else {
+ *array++ = 0;
+ }
+ }
+
+ if (type & PERF_SAMPLE_PHYS_ADDR) {
+ *array = sample->phys_addr;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CGROUP) {
+ *array = sample->cgroup;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+ *array = sample->data_page_size;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
+ *array = sample->code_page_size;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_AUX) {
+ sz = sample->aux_sample.size;
+ *array++ = sz;
+ memcpy(array, sample->aux_sample.data, sz);
+ array = (void *)array + sz;
+ }
+
+ return 0;
+}
+
+int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample)
+{
+ __u64 *start = array;
+
+ /*
+ * used for cross-endian analysis. See git commit 65014ab3
+ * for why this goofiness is needed.
+ */
+ union u64_swap u;
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val32[0] = sample->pid;
+ u.val32[1] = sample->tid;
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ *array = sample->time;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ *array = sample->id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ *array = sample->stream_id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u.val32[0] = sample->cpu;
+ u.val32[1] = 0;
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_IDENTIFIER) {
+ *array = sample->id;
+ array++;
+ }
+
+ return (void *)array - (void *)start;
+}
+
+int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+ struct evlist *evlist, struct machine *machine, size_t from)
+{
+ union perf_event *ev;
+ struct evsel *evsel;
+ size_t nr = 0, i = 0, sz, max_nr, n, pos;
+ size_t e1_sz = sizeof(struct id_index_entry);
+ size_t e2_sz = sizeof(struct id_index_entry_2);
+ size_t etot_sz = e1_sz + e2_sz;
+ bool e2_needed = false;
+ int err;
+
+ max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) / etot_sz;
+
+ pos = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ if (pos++ < from)
+ continue;
+ nr += evsel->core.ids;
+ }
+
+ if (!nr)
+ return 0;
+
+ pr_debug2("Synthesizing id index\n");
+
+ n = nr > max_nr ? max_nr : nr;
+ sz = sizeof(struct perf_record_id_index) + n * etot_sz;
+ ev = zalloc(sz);
+ if (!ev)
+ return -ENOMEM;
+
+ sz = sizeof(struct perf_record_id_index) + n * e1_sz;
+
+ ev->id_index.header.type = PERF_RECORD_ID_INDEX;
+ ev->id_index.nr = n;
+
+ pos = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ u32 j;
+
+ if (pos++ < from)
+ continue;
+ for (j = 0; j < evsel->core.ids; j++, i++) {
+ struct id_index_entry *e;
+ struct id_index_entry_2 *e2;
+ struct perf_sample_id *sid;
+
+ if (i >= n) {
+ ev->id_index.header.size = sz + (e2_needed ? n * e2_sz : 0);
+ err = process(tool, ev, NULL, machine);
+ if (err)
+ goto out_err;
+ nr -= n;
+ i = 0;
+ e2_needed = false;
+ }
+
+ e = &ev->id_index.entries[i];
+
+ e->id = evsel->core.id[j];
+
+ sid = evlist__id2sid(evlist, e->id);
+ if (!sid) {
+ free(ev);
+ return -ENOENT;
+ }
+
+ e->idx = sid->idx;
+ e->cpu = sid->cpu.cpu;
+ e->tid = sid->tid;
+
+ if (sid->machine_pid)
+ e2_needed = true;
+
+ e2 = (void *)ev + sz;
+ e2[i].machine_pid = sid->machine_pid;
+ e2[i].vcpu = sid->vcpu.cpu;
+ }
+ }
+
+ sz = sizeof(struct perf_record_id_index) + nr * e1_sz;
+ ev->id_index.header.size = sz + (e2_needed ? nr * e2_sz : 0);
+ ev->id_index.nr = nr;
+
+ err = process(tool, ev, NULL, machine);
+out_err:
+ free(ev);
+
+ return err;
+}
+
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+ struct evlist *evlist, struct machine *machine)
+{
+ return __perf_event__synthesize_id_index(tool, process, evlist, machine, 0);
+}
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+ struct target *target, struct perf_thread_map *threads,
+ perf_event__handler_t process, bool needs_mmap,
+ bool data_mmap, unsigned int nr_threads_synthesize)
+{
+ /*
+ * When perf runs in non-root PID namespace, and the namespace's proc FS
+ * is not mounted, nsinfo__is_in_root_namespace() returns false.
+ * In this case, the proc FS is coming for the parent namespace, thus
+ * perf tool will wrongly gather process info from its parent PID
+ * namespace.
+ *
+ * To avoid the confusion that the perf tool runs in a child PID
+ * namespace but it synthesizes thread info from its parent PID
+ * namespace, returns failure with warning.
+ */
+ if (!nsinfo__is_in_root_namespace()) {
+ pr_err("Perf runs in non-root PID namespace but it tries to ");
+ pr_err("gather process info from its parent PID namespace.\n");
+ pr_err("Please mount the proc file system properly, e.g. ");
+ pr_err("add the option '--mount-proc' for unshare command.\n");
+ return -EPERM;
+ }
+
+ if (target__has_task(target))
+ return perf_event__synthesize_thread_map(tool, threads, process, machine,
+ needs_mmap, data_mmap);
+ else if (target__has_cpu(target))
+ return perf_event__synthesize_threads(tool, process, machine,
+ needs_mmap, data_mmap,
+ nr_threads_synthesize);
+ /* command specified */
+ return 0;
+}
+
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+ struct perf_thread_map *threads, bool needs_mmap,
+ bool data_mmap, unsigned int nr_threads_synthesize)
+{
+ return __machine__synthesize_threads(machine, NULL, target, threads,
+ perf_event__process, needs_mmap,
+ data_mmap, nr_threads_synthesize);
+}
+
+static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id)
+{
+ struct perf_record_event_update *ev;
+
+ size += sizeof(*ev);
+ size = PERF_ALIGN(size, sizeof(u64));
+
+ ev = zalloc(size);
+ if (ev) {
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)size;
+ ev->type = type;
+ ev->id = id;
+ }
+ return ev;
+}
+
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel,
+ perf_event__handler_t process)
+{
+ size_t size = strlen(evsel->unit);
+ struct perf_record_event_update *ev;
+ int err;
+
+ ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->core.id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strlcpy(ev->unit, evsel->unit, size + 1);
+ err = process(tool, (union perf_event *)ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct perf_record_event_update *ev;
+ struct perf_record_event_update_scale *ev_data;
+ int err;
+
+ ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->core.id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ ev->scale.scale = evsel->scale;
+ err = process(tool, (union perf_event *)ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct perf_record_event_update *ev;
+ size_t len = strlen(evsel__name(evsel));
+ int err;
+
+ ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->core.id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strlcpy(ev->name, evsel->name, len + 1);
+ err = process(tool, (union perf_event *)ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
+ struct perf_record_event_update *ev;
+ int err;
+
+ ev = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header) + 2 * sizeof(u64));
+ if (!ev)
+ return -ENOMEM;
+
+ syn_data.data = &ev->cpus.cpus;
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)syn_data.size;
+ ev->type = PERF_EVENT_UPDATE__CPUS;
+ ev->id = evsel->core.id[0];
+ cpu_map_data__synthesize(&syn_data);
+
+ err = process(tool, (union perf_event *)ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist,
+ perf_event__handler_t process)
+{
+ struct evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->core.ids,
+ evsel->core.id, process);
+ if (err) {
+ pr_debug("failed to create perf header attribute\n");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static bool has_unit(struct evsel *evsel)
+{
+ return evsel->unit && *evsel->unit;
+}
+
+static bool has_scale(struct evsel *evsel)
+{
+ return evsel->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list,
+ perf_event__handler_t process, bool is_pipe)
+{
+ struct evsel *evsel;
+ int err;
+
+ /*
+ * Synthesize other events stuff not carried within
+ * attr event - unit, scale, name
+ */
+ evlist__for_each_entry(evsel_list, evsel) {
+ if (!evsel->supported)
+ continue;
+
+ /*
+ * Synthesize unit and scale only if it's defined.
+ */
+ if (has_unit(evsel)) {
+ err = perf_event__synthesize_event_update_unit(tool, evsel, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel unit.\n");
+ return err;
+ }
+ }
+
+ if (has_scale(evsel)) {
+ err = perf_event__synthesize_event_update_scale(tool, evsel, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel evsel.\n");
+ return err;
+ }
+ }
+
+ if (evsel->core.own_cpus) {
+ err = perf_event__synthesize_event_update_cpus(tool, evsel, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel cpus.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Name is needed only for pipe output,
+ * perf.data carries event names.
+ */
+ if (is_pipe) {
+ err = perf_event__synthesize_event_update_name(tool, evsel, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel name.\n");
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr,
+ u32 ids, u64 *id, perf_event__handler_t process)
+{
+ union perf_event *ev;
+ size_t size;
+ int err;
+
+ size = sizeof(struct perf_event_attr);
+ size = PERF_ALIGN(size, sizeof(u64));
+ size += sizeof(struct perf_event_header);
+ size += ids * sizeof(u64);
+
+ ev = zalloc(size);
+
+ if (ev == NULL)
+ return -ENOMEM;
+
+ ev->attr.attr = *attr;
+ memcpy(perf_record_header_attr_id(ev), id, ids * sizeof(u64));
+
+ ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
+ ev->attr.header.size = (u16)size;
+
+ if (ev->attr.header.size == size)
+ err = process(tool, ev, NULL, NULL);
+ else
+ err = -E2BIG;
+
+ free(ev);
+
+ return err;
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist,
+ perf_event__handler_t process)
+{
+ union perf_event ev;
+ struct tracing_data *tdata;
+ ssize_t size = 0, aligned_size = 0, padding;
+ struct feat_fd ff;
+
+ /*
+ * We are going to store the size of the data followed
+ * by the data contents. Since the fd descriptor is a pipe,
+ * we cannot seek back to store the size of the data once
+ * we know it. Instead we:
+ *
+ * - write the tracing data to the temp file
+ * - get/write the data size to pipe
+ * - write the tracing data from the temp file
+ * to the pipe
+ */
+ tdata = tracing_data_get(&evlist->core.entries, fd, true);
+ if (!tdata)
+ return -1;
+
+ memset(&ev, 0, sizeof(ev));
+
+ ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
+ size = tdata->size;
+ aligned_size = PERF_ALIGN(size, sizeof(u64));
+ padding = aligned_size - size;
+ ev.tracing_data.header.size = sizeof(ev.tracing_data);
+ ev.tracing_data.size = aligned_size;
+
+ process(tool, &ev, NULL, NULL);
+
+ /*
+ * The put function will copy all the tracing data
+ * stored in temp file to the pipe.
+ */
+ tracing_data_put(tdata);
+
+ ff = (struct feat_fd){ .fd = fd };
+ if (write_padded(&ff, NULL, 0, padding))
+ return -1;
+
+ return aligned_size;
+}
+#endif
+
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc,
+ perf_event__handler_t process, struct machine *machine)
+{
+ union perf_event ev;
+ size_t len;
+
+ if (!pos->hit)
+ return 0;
+
+ memset(&ev, 0, sizeof(ev));
+
+ len = pos->long_name_len + 1;
+ len = PERF_ALIGN(len, NAME_ALIGN);
+ ev.build_id.size = min(pos->bid.size, sizeof(pos->bid.data));
+ memcpy(&ev.build_id.build_id, pos->bid.data, ev.build_id.size);
+ ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
+ ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE;
+ ev.build_id.pid = machine->pid;
+ ev.build_id.header.size = sizeof(ev.build_id) + len;
+ memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
+
+ return process(tool, &ev, NULL, machine);
+}
+
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool,
+ struct evlist *evlist, perf_event__handler_t process, bool attrs)
+{
+ int err;
+
+ if (attrs) {
+ err = perf_event__synthesize_attrs(tool, evlist, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize attrs.\n");
+ return err;
+ }
+ }
+
+ err = perf_event__synthesize_extra_attr(tool, evlist, process, attrs);
+ err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, process, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_cpu_map(tool, evlist->core.user_requested_cpus, process, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_stat_config(tool, config, process, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize config.\n");
+ return err;
+ }
+
+ return 0;
+}
+
+extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
+
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
+ struct evlist *evlist, perf_event__handler_t process)
+{
+ struct perf_header *header = &session->header;
+ struct perf_record_header_feature *fe;
+ struct feat_fd ff;
+ size_t sz, sz_hdr;
+ int feat, ret;
+
+ sz_hdr = sizeof(fe->header);
+ sz = sizeof(union perf_event);
+ /* get a nice alignment */
+ sz = PERF_ALIGN(sz, page_size);
+
+ memset(&ff, 0, sizeof(ff));
+
+ ff.buf = malloc(sz);
+ if (!ff.buf)
+ return -ENOMEM;
+
+ ff.size = sz - sz_hdr;
+ ff.ph = &session->header;
+
+ for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+ if (!feat_ops[feat].synthesize) {
+ pr_debug("No record header feature for header :%d\n", feat);
+ continue;
+ }
+
+ ff.offset = sizeof(*fe);
+
+ ret = feat_ops[feat].write(&ff, evlist);
+ if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
+ pr_debug("Error writing feature\n");
+ continue;
+ }
+ /* ff.buf may have changed due to realloc in do_write() */
+ fe = ff.buf;
+ memset(fe, 0, sizeof(*fe));
+
+ fe->feat_id = feat;
+ fe->header.type = PERF_RECORD_HEADER_FEATURE;
+ fe->header.size = ff.offset;
+
+ ret = process(tool, ff.buf, NULL, NULL);
+ if (ret) {
+ free(ff.buf);
+ return ret;
+ }
+ }
+
+ /* Send HEADER_LAST_FEATURE mark. */
+ fe = ff.buf;
+ fe->feat_id = HEADER_LAST_FEATURE;
+ fe->header.type = PERF_RECORD_HEADER_FEATURE;
+ fe->header.size = sizeof(*fe);
+
+ ret = process(tool, ff.buf, NULL, NULL);
+
+ free(ff.buf);
+ return ret;
+}
+
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+ struct perf_session *session,
+ struct perf_data *data,
+ perf_event__handler_t process)
+{
+ int err;
+ int ret = 0;
+ struct evlist *evlist = session->evlist;
+
+ /*
+ * We need to synthesize events first, because some
+ * features works on top of them (on report side).
+ */
+ err = perf_event__synthesize_attrs(tool, evlist, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize attrs.\n");
+ return err;
+ }
+ ret += err;
+
+ err = perf_event__synthesize_features(tool, session, evlist, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize features.\n");
+ return err;
+ }
+ ret += err;
+
+#ifdef HAVE_LIBTRACEEVENT
+ if (have_tracepoints(&evlist->core.entries)) {
+ int fd = perf_data__fd(data);
+
+ /*
+ * FIXME err <= 0 here actually means that
+ * there were no tracepoints so its not really
+ * an error, just that we don't need to
+ * synthesize anything. We really have to
+ * return this more properly and also
+ * propagate errors that now are calling die()
+ */
+ err = perf_event__synthesize_tracing_data(tool, fd, evlist,
+ process);
+ if (err <= 0) {
+ pr_err("Couldn't record tracing data.\n");
+ return err;
+ }
+ ret += err;
+ }
+#else
+ (void)data;
+#endif
+
+ return ret;
+}
+
+int parse_synth_opt(char *synth)
+{
+ char *p, *q;
+ int ret = 0;
+
+ if (synth == NULL)
+ return -1;
+
+ for (q = synth; (p = strsep(&q, ",")); p = q) {
+ if (!strcasecmp(p, "no") || !strcasecmp(p, "none"))
+ return 0;
+
+ if (!strcasecmp(p, "all"))
+ return PERF_SYNTH_ALL;
+
+ if (!strcasecmp(p, "task"))
+ ret |= PERF_SYNTH_TASK;
+ else if (!strcasecmp(p, "mmap"))
+ ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP;
+ else if (!strcasecmp(p, "cgroup"))
+ ret |= PERF_SYNTH_CGROUP;
+ else
+ return -1;
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
new file mode 100644
index 000000000..53737d161
--- /dev/null
+++ b/tools/perf/util/synthetic-events.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYNTHETIC_EVENTS_H
+#define __PERF_SYNTHETIC_EVENTS_H
+
+#include <stdbool.h>
+#include <sys/types.h> // pid_t
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <perf/cpumap.h>
+
+struct auxtrace_record;
+struct dso;
+struct evlist;
+struct evsel;
+struct machine;
+struct perf_counts_values;
+struct perf_cpu_map;
+struct perf_data;
+struct perf_event_attr;
+struct perf_event_mmap_page;
+struct perf_sample;
+struct perf_session;
+struct perf_stat_config;
+struct perf_thread_map;
+struct perf_tool;
+struct record_opts;
+struct target;
+
+union perf_event;
+
+enum perf_record_synth {
+ PERF_SYNTH_TASK = 1 << 0,
+ PERF_SYNTH_MMAP = 1 << 1,
+ PERF_SYNTH_CGROUP = 1 << 2,
+
+ /* last element */
+ PERF_SYNTH_MAX = 1 << 3,
+};
+#define PERF_SYNTH_ALL (PERF_SYNTH_MAX - 1)
+
+int parse_synth_opt(char *str);
+
+typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
+int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from);
+int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample);
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
+int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
+int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
+int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
+
+int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format);
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+ struct target *target, struct perf_thread_map *threads,
+ perf_event__handler_t process, bool needs_mmap, bool data_mmap,
+ unsigned int nr_threads_synthesize);
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+ struct perf_thread_map *threads, bool needs_mmap, bool data_mmap,
+ unsigned int nr_threads_synthesize);
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool,
+ struct perf_session *session, perf_event__handler_t process);
+
+#else // HAVE_AUXTRACE_SUPPORT
+
+#include <errno.h>
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+ struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ perf_event__handler_t process __maybe_unused)
+{
+ return -EINVAL;
+}
+#endif // HAVE_AUXTRACE_SUPPORT
+
+#ifdef HAVE_LIBBPF_SUPPORT
+int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process,
+ struct machine *machine, struct record_opts *opts);
+#else // HAVE_LIBBPF_SUPPORT
+static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return 0;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+ struct perf_session *session,
+ struct perf_data *data,
+ perf_event__handler_t process);
+
+#endif // __PERF_SYNTHETIC_EVENTS_H
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
new file mode 100644
index 000000000..63be7b587
--- /dev/null
+++ b/tools/perf/util/syscalltbl.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * System call table mapper
+ *
+ * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "syscalltbl.h"
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
+#include <string.h>
+#include "string2.h"
+
+#if defined(__x86_64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_x86_64;
+#elif defined(__s390x__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_s390_64;
+#elif defined(__powerpc64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_powerpc_64;
+#elif defined(__powerpc__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_powerpc_32;
+#elif defined(__aarch64__)
+#include <asm/syscalls.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_arm64;
+#elif defined(__mips__)
+#include <asm/syscalls_n64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_mips_n64;
+#elif defined(__loongarch__)
+#include <asm/syscalls.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl_loongarch;
+#endif
+
+struct syscall {
+ int id;
+ const char *name;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
+{
+ const char *key = vkey;
+ const struct syscall *entry = ventry;
+
+ return strcmp(key, entry->name);
+}
+
+static int syscallcmp(const void *va, const void *vb)
+{
+ const struct syscall *a = va, *b = vb;
+
+ return strcmp(a->name, b->name);
+}
+
+static int syscalltbl__init_native(struct syscalltbl *tbl)
+{
+ int nr_entries = 0, i, j;
+ struct syscall *entries;
+
+ for (i = 0; i <= syscalltbl_native_max_id; ++i)
+ if (syscalltbl_native[i])
+ ++nr_entries;
+
+ entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
+ if (tbl->syscalls.entries == NULL)
+ return -1;
+
+ for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
+ if (syscalltbl_native[i]) {
+ entries[j].name = syscalltbl_native[i];
+ entries[j].id = i;
+ ++j;
+ }
+ }
+
+ qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
+ tbl->syscalls.nr_entries = nr_entries;
+ tbl->syscalls.max_id = syscalltbl_native_max_id;
+ return 0;
+}
+
+struct syscalltbl *syscalltbl__new(void)
+{
+ struct syscalltbl *tbl = malloc(sizeof(*tbl));
+ if (tbl) {
+ if (syscalltbl__init_native(tbl)) {
+ free(tbl);
+ return NULL;
+ }
+ }
+ return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+ zfree(&tbl->syscalls.entries);
+ free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+{
+ return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+ struct syscall *sc = bsearch(name, tbl->syscalls.entries,
+ tbl->syscalls.nr_entries, sizeof(*sc),
+ syscallcmpname);
+
+ return sc ? sc->id : -1;
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ int i;
+ struct syscall *syscalls = tbl->syscalls.entries;
+
+ for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
+ if (strglobmatch(syscalls[i].name, syscall_glob)) {
+ *idx = i;
+ return syscalls[i].id;
+ }
+ }
+
+ return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ *idx = -1;
+ return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+
+#else /* HAVE_SYSCALL_TABLE_SUPPORT */
+
+#include <libaudit.h>
+
+struct syscalltbl *syscalltbl__new(void)
+{
+ struct syscalltbl *tbl = zalloc(sizeof(*tbl));
+ if (tbl)
+ tbl->audit_machine = audit_detect_machine();
+ return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+ free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
+{
+ return audit_syscall_to_name(id, tbl->audit_machine);
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+ return audit_name_to_syscall(name, tbl->audit_machine);
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
+ const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
+{
+ return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
new file mode 100644
index 000000000..a41d2ca9e
--- /dev/null
+++ b/tools/perf/util/syscalltbl.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYSCALLTBL_H
+#define __PERF_SYSCALLTBL_H
+
+struct syscalltbl {
+ int audit_machine;
+ struct {
+ int max_id;
+ int nr_entries;
+ void *entries;
+ } syscalls;
+};
+
+struct syscalltbl *syscalltbl__new(void);
+void syscalltbl__delete(struct syscalltbl *tbl);
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
+int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+
+#endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
new file mode 100644
index 000000000..0f383418e
--- /dev/null
+++ b/tools/perf/util/target.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helper functions for handling target threads/cpus
+ *
+ * Copyright (C) 2012, LG Electronics, Namhyung Kim <namhyung.kim@lge.com>
+ */
+
+#include "target.h"
+
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+enum target_errno target__validate(struct target *target)
+{
+ enum target_errno ret = TARGET_ERRNO__SUCCESS;
+
+ if (target->pid)
+ target->tid = target->pid;
+
+ /* CPU and PID are mutually exclusive */
+ if (target->tid && target->cpu_list) {
+ target->cpu_list = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__PID_OVERRIDE_CPU;
+ }
+
+ /* UID and PID are mutually exclusive */
+ if (target->tid && target->uid_str) {
+ target->uid_str = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__PID_OVERRIDE_UID;
+ }
+
+ /* UID and CPU are mutually exclusive */
+ if (target->uid_str && target->cpu_list) {
+ target->cpu_list = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__UID_OVERRIDE_CPU;
+ }
+
+ /* PID and SYSTEM are mutually exclusive */
+ if (target->tid && target->system_wide) {
+ target->system_wide = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM;
+ }
+
+ /* UID and SYSTEM are mutually exclusive */
+ if (target->uid_str && target->system_wide) {
+ target->system_wide = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM;
+ }
+
+ /* BPF and CPU are mutually exclusive */
+ if (target->bpf_str && target->cpu_list) {
+ target->cpu_list = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_CPU;
+ }
+
+ /* BPF and PID/TID are mutually exclusive */
+ if (target->bpf_str && target->tid) {
+ target->tid = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_PID;
+ }
+
+ /* BPF and UID are mutually exclusive */
+ if (target->bpf_str && target->uid_str) {
+ target->uid_str = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_UID;
+ }
+
+ /* BPF and THREADS are mutually exclusive */
+ if (target->bpf_str && target->per_thread) {
+ target->per_thread = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__BPF_OVERRIDE_THREAD;
+ }
+
+ /* THREAD and SYSTEM/CPU are mutually exclusive */
+ if (target->per_thread && (target->system_wide || target->cpu_list)) {
+ target->per_thread = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD;
+ }
+
+ return ret;
+}
+
+enum target_errno target__parse_uid(struct target *target)
+{
+ struct passwd pwd, *result;
+ char buf[1024];
+ const char *str = target->uid_str;
+
+ target->uid = UINT_MAX;
+ if (str == NULL)
+ return TARGET_ERRNO__SUCCESS;
+
+ /* Try user name first */
+ getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL) {
+ /*
+ * The user name not found. Maybe it's a UID number.
+ */
+ char *endptr;
+ int uid = strtol(str, &endptr, 10);
+
+ if (*endptr != '\0')
+ return TARGET_ERRNO__INVALID_UID;
+
+ getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL)
+ return TARGET_ERRNO__USER_NOT_FOUND;
+ }
+
+ target->uid = result->pw_uid;
+ return TARGET_ERRNO__SUCCESS;
+}
+
+/*
+ * This must have a same ordering as the enum target_errno.
+ */
+static const char *target__error_str[] = {
+ "PID/TID switch overriding CPU",
+ "PID/TID switch overriding UID",
+ "UID switch overriding CPU",
+ "PID/TID switch overriding SYSTEM",
+ "UID switch overriding SYSTEM",
+ "SYSTEM/CPU switch overriding PER-THREAD",
+ "BPF switch overriding CPU",
+ "BPF switch overriding PID/TID",
+ "BPF switch overriding UID",
+ "BPF switch overriding THREAD",
+ "Invalid User: %s",
+ "Problems obtaining information for user %s",
+};
+
+int target__strerror(struct target *target, int errnum,
+ char *buf, size_t buflen)
+{
+ int idx;
+ const char *msg;
+
+ BUG_ON(buflen == 0);
+
+ if (errnum >= 0) {
+ str_error_r(errnum, buf, buflen);
+ return 0;
+ }
+
+ if (errnum < __TARGET_ERRNO__START || errnum >= __TARGET_ERRNO__END)
+ return -1;
+
+ idx = errnum - __TARGET_ERRNO__START;
+ msg = target__error_str[idx];
+
+ switch (errnum) {
+ case TARGET_ERRNO__PID_OVERRIDE_CPU ...
+ TARGET_ERRNO__BPF_OVERRIDE_THREAD:
+ snprintf(buf, buflen, "%s", msg);
+ break;
+
+ case TARGET_ERRNO__INVALID_UID:
+ case TARGET_ERRNO__USER_NOT_FOUND:
+ snprintf(buf, buflen, msg, target->uid_str);
+ break;
+
+ default:
+ /* cannot reach here */
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
new file mode 100644
index 000000000..d582cae8e
--- /dev/null
+++ b/tools/perf/util/target.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TARGET_H
+#define _PERF_TARGET_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+struct target {
+ const char *pid;
+ const char *tid;
+ const char *cpu_list;
+ const char *uid_str;
+ const char *bpf_str;
+ uid_t uid;
+ bool system_wide;
+ bool uses_mmap;
+ bool default_per_cpu;
+ bool per_thread;
+ bool use_bpf;
+ int initial_delay;
+ const char *attr_map;
+};
+
+enum target_errno {
+ TARGET_ERRNO__SUCCESS = 0,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __TARGET_ERRNO__START = -10000,
+
+ /* for target__validate() */
+ TARGET_ERRNO__PID_OVERRIDE_CPU = __TARGET_ERRNO__START,
+ TARGET_ERRNO__PID_OVERRIDE_UID,
+ TARGET_ERRNO__UID_OVERRIDE_CPU,
+ TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
+ TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
+ TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
+ TARGET_ERRNO__BPF_OVERRIDE_CPU,
+ TARGET_ERRNO__BPF_OVERRIDE_PID,
+ TARGET_ERRNO__BPF_OVERRIDE_UID,
+ TARGET_ERRNO__BPF_OVERRIDE_THREAD,
+
+ /* for target__parse_uid() */
+ TARGET_ERRNO__INVALID_UID,
+ TARGET_ERRNO__USER_NOT_FOUND,
+
+ __TARGET_ERRNO__END,
+};
+
+enum target_errno target__validate(struct target *target);
+enum target_errno target__parse_uid(struct target *target);
+
+int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
+
+static inline bool target__has_task(struct target *target)
+{
+ return target->tid || target->pid || target->uid_str;
+}
+
+static inline bool target__has_cpu(struct target *target)
+{
+ return target->system_wide || target->cpu_list;
+}
+
+static inline bool target__none(struct target *target)
+{
+ return !target__has_task(target) && !target__has_cpu(target);
+}
+
+static inline bool target__enable_on_exec(struct target *target)
+{
+ /*
+ * Normally enable_on_exec should be set if:
+ * 1) The tracee process is forked (not attaching to existed task or cpu).
+ * 2) And initial_delay is not configured.
+ * Otherwise, we enable tracee events manually.
+ */
+ return target__none(target) && !target->initial_delay;
+}
+
+static inline bool target__has_per_thread(struct target *target)
+{
+ return target->system_wide && target->per_thread;
+}
+
+static inline bool target__uses_dummy_map(struct target *target)
+{
+ bool use_dummy = false;
+
+ if (target->default_per_cpu)
+ use_dummy = target->per_thread ? true : false;
+ else if (target__has_task(target) ||
+ (!target__has_cpu(target) && !target->uses_mmap))
+ use_dummy = true;
+ else if (target__has_per_thread(target))
+ use_dummy = true;
+
+ return use_dummy;
+}
+
+#endif /* _PERF_TARGET_H */
diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c
new file mode 100644
index 000000000..e7aa82c06
--- /dev/null
+++ b/tools/perf/util/term.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "term.h"
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+void get_term_dimensions(struct winsize *ws)
+{
+ char *s = getenv("LINES");
+
+ if (s != NULL) {
+ ws->ws_row = atoi(s);
+ s = getenv("COLUMNS");
+ if (s != NULL) {
+ ws->ws_col = atoi(s);
+ if (ws->ws_row && ws->ws_col)
+ return;
+ }
+ }
+#ifdef TIOCGWINSZ
+ if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+ ws->ws_row && ws->ws_col)
+ return;
+#endif
+ ws->ws_row = 25;
+ ws->ws_col = 80;
+}
+
+void set_term_quiet_input(struct termios *old)
+{
+ struct termios tc;
+
+ tcgetattr(0, old);
+ tc = *old;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+ tcsetattr(0, TCSANOW, &tc);
+}
diff --git a/tools/perf/util/term.h b/tools/perf/util/term.h
new file mode 100644
index 000000000..607b170a9
--- /dev/null
+++ b/tools/perf/util/term.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TERM_H
+#define __PERF_TERM_H
+
+struct termios;
+struct winsize;
+
+void get_term_dimensions(struct winsize *ws);
+void set_term_quiet_input(struct termios *old);
+
+#endif /* __PERF_TERM_H */
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
new file mode 100644
index 000000000..c6a0a27b1
--- /dev/null
+++ b/tools/perf/util/thread-stack.c
@@ -0,0 +1,1239 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * thread-stack.c: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/zalloc.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "thread.h"
+#include "event.h"
+#include "machine.h"
+#include "env.h"
+#include "debug.h"
+#include "symbol.h"
+#include "comm.h"
+#include "call-path.h"
+#include "thread-stack.h"
+
+#define STACK_GROWTH 2048
+
+/*
+ * State of retpoline detection.
+ *
+ * RETPOLINE_NONE: no retpoline detection
+ * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
+ * X86_RETPOLINE_DETECTED: x86 retpoline detected
+ */
+enum retpoline_state_t {
+ RETPOLINE_NONE,
+ X86_RETPOLINE_POSSIBLE,
+ X86_RETPOLINE_DETECTED,
+};
+
+/**
+ * struct thread_stack_entry - thread stack entry.
+ * @ret_addr: return address
+ * @timestamp: timestamp (if known)
+ * @ref: external reference (e.g. db_id of sample)
+ * @branch_count: the branch count when the entry was created
+ * @insn_count: the instruction count when the entry was created
+ * @cyc_count the cycle count when the entry was created
+ * @db_id: id used for db-export
+ * @cp: call path
+ * @no_call: a 'call' was not seen
+ * @trace_end: a 'call' but trace ended
+ * @non_call: a branch but not a 'call' to the start of a different symbol
+ */
+struct thread_stack_entry {
+ u64 ret_addr;
+ u64 timestamp;
+ u64 ref;
+ u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
+ u64 db_id;
+ struct call_path *cp;
+ bool no_call;
+ bool trace_end;
+ bool non_call;
+};
+
+/**
+ * struct thread_stack - thread stack constructed from 'call' and 'return'
+ * branch samples.
+ * @stack: array that holds the stack
+ * @cnt: number of entries in the stack
+ * @sz: current maximum stack size
+ * @trace_nr: current trace number
+ * @branch_count: running branch count
+ * @insn_count: running instruction count
+ * @cyc_count running cycle count
+ * @kernel_start: kernel start address
+ * @last_time: last timestamp
+ * @crp: call/return processor
+ * @comm: current comm
+ * @arr_sz: size of array if this is the first element of an array
+ * @rstate: used to detect retpolines
+ * @br_stack_rb: branch stack (ring buffer)
+ * @br_stack_sz: maximum branch stack size
+ * @br_stack_pos: current position in @br_stack_rb
+ * @mispred_all: mark all branches as mispredicted
+ */
+struct thread_stack {
+ struct thread_stack_entry *stack;
+ size_t cnt;
+ size_t sz;
+ u64 trace_nr;
+ u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
+ u64 kernel_start;
+ u64 last_time;
+ struct call_return_processor *crp;
+ struct comm *comm;
+ unsigned int arr_sz;
+ enum retpoline_state_t rstate;
+ struct branch_stack *br_stack_rb;
+ unsigned int br_stack_sz;
+ unsigned int br_stack_pos;
+ bool mispred_all;
+};
+
+/*
+ * Assume pid == tid == 0 identifies the idle task as defined by
+ * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
+ * and therefore requires a stack for each cpu.
+ */
+static inline bool thread_stack__per_cpu(struct thread *thread)
+{
+ return !(thread__tid(thread) || thread__pid(thread));
+}
+
+static int thread_stack__grow(struct thread_stack *ts)
+{
+ struct thread_stack_entry *new_stack;
+ size_t sz, new_sz;
+
+ new_sz = ts->sz + STACK_GROWTH;
+ sz = new_sz * sizeof(struct thread_stack_entry);
+
+ new_stack = realloc(ts->stack, sz);
+ if (!new_stack)
+ return -ENOMEM;
+
+ ts->stack = new_stack;
+ ts->sz = new_sz;
+
+ return 0;
+}
+
+static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
+ struct call_return_processor *crp,
+ bool callstack, unsigned int br_stack_sz)
+{
+ int err;
+
+ if (callstack) {
+ err = thread_stack__grow(ts);
+ if (err)
+ return err;
+ }
+
+ if (br_stack_sz) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += br_stack_sz * sizeof(struct branch_entry);
+ ts->br_stack_rb = zalloc(sz);
+ if (!ts->br_stack_rb)
+ return -ENOMEM;
+ ts->br_stack_sz = br_stack_sz;
+ }
+
+ if (thread__maps(thread) && maps__machine(thread__maps(thread))) {
+ struct machine *machine = maps__machine(thread__maps(thread));
+ const char *arch = perf_env__arch(machine->env);
+
+ ts->kernel_start = machine__kernel_start(machine);
+ if (!strcmp(arch, "x86"))
+ ts->rstate = X86_RETPOLINE_POSSIBLE;
+ } else {
+ ts->kernel_start = 1ULL << 63;
+ }
+ ts->crp = crp;
+
+ return 0;
+}
+
+static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
+ struct call_return_processor *crp,
+ bool callstack,
+ unsigned int br_stack_sz)
+{
+ struct thread_stack *ts = thread__ts(thread), *new_ts;
+ unsigned int old_sz = ts ? ts->arr_sz : 0;
+ unsigned int new_sz = 1;
+
+ if (thread_stack__per_cpu(thread) && cpu > 0)
+ new_sz = roundup_pow_of_two(cpu + 1);
+
+ if (!ts || new_sz > old_sz) {
+ new_ts = calloc(new_sz, sizeof(*ts));
+ if (!new_ts)
+ return NULL;
+ if (ts)
+ memcpy(new_ts, ts, old_sz * sizeof(*ts));
+ new_ts->arr_sz = new_sz;
+ free(thread__ts(thread));
+ thread__set_ts(thread, new_ts);
+ ts = new_ts;
+ }
+
+ if (thread_stack__per_cpu(thread) && cpu > 0 &&
+ (unsigned int)cpu < ts->arr_sz)
+ ts += cpu;
+
+ if (!ts->stack &&
+ thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
+ return NULL;
+
+ return ts;
+}
+
+static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
+{
+ struct thread_stack *ts = thread__ts(thread);
+
+ if (cpu < 0)
+ cpu = 0;
+
+ if (!ts || (unsigned int)cpu >= ts->arr_sz)
+ return NULL;
+
+ ts += cpu;
+
+ if (!ts->stack)
+ return NULL;
+
+ return ts;
+}
+
+static inline struct thread_stack *thread__stack(struct thread *thread,
+ int cpu)
+{
+ if (!thread)
+ return NULL;
+
+ if (thread_stack__per_cpu(thread))
+ return thread__cpu_stack(thread, cpu);
+
+ return thread__ts(thread);
+}
+
+static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
+ bool trace_end)
+{
+ int err = 0;
+
+ if (ts->cnt == ts->sz) {
+ err = thread_stack__grow(ts);
+ if (err) {
+ pr_warning("Out of memory: discarding thread stack\n");
+ ts->cnt = 0;
+ }
+ }
+
+ ts->stack[ts->cnt].trace_end = trace_end;
+ ts->stack[ts->cnt++].ret_addr = ret_addr;
+
+ return err;
+}
+
+static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
+{
+ size_t i;
+
+ /*
+ * In some cases there may be functions which are not seen to return.
+ * For example when setjmp / longjmp has been used. Or the perf context
+ * switch in the kernel which doesn't stop and start tracing in exactly
+ * the same code path. When that happens the return address will be
+ * further down the stack. If the return address is not found at all,
+ * we assume the opposite (i.e. this is a return for a call that wasn't
+ * seen for some reason) and leave the stack alone.
+ */
+ for (i = ts->cnt; i; ) {
+ if (ts->stack[--i].ret_addr == ret_addr) {
+ ts->cnt = i;
+ return;
+ }
+ }
+}
+
+static void thread_stack__pop_trace_end(struct thread_stack *ts)
+{
+ size_t i;
+
+ for (i = ts->cnt; i; ) {
+ if (ts->stack[--i].trace_end)
+ ts->cnt = i;
+ else
+ return;
+ }
+}
+
+static bool thread_stack__in_kernel(struct thread_stack *ts)
+{
+ if (!ts->cnt)
+ return false;
+
+ return ts->stack[ts->cnt - 1].cp->in_kernel;
+}
+
+static int thread_stack__call_return(struct thread *thread,
+ struct thread_stack *ts, size_t idx,
+ u64 timestamp, u64 ref, bool no_return)
+{
+ struct call_return_processor *crp = ts->crp;
+ struct thread_stack_entry *tse;
+ struct call_return cr = {
+ .thread = thread,
+ .comm = ts->comm,
+ .db_id = 0,
+ };
+ u64 *parent_db_id;
+
+ tse = &ts->stack[idx];
+ cr.cp = tse->cp;
+ cr.call_time = tse->timestamp;
+ cr.return_time = timestamp;
+ cr.branch_count = ts->branch_count - tse->branch_count;
+ cr.insn_count = ts->insn_count - tse->insn_count;
+ cr.cyc_count = ts->cyc_count - tse->cyc_count;
+ cr.db_id = tse->db_id;
+ cr.call_ref = tse->ref;
+ cr.return_ref = ref;
+ if (tse->no_call)
+ cr.flags |= CALL_RETURN_NO_CALL;
+ if (no_return)
+ cr.flags |= CALL_RETURN_NO_RETURN;
+ if (tse->non_call)
+ cr.flags |= CALL_RETURN_NON_CALL;
+
+ /*
+ * The parent db_id must be assigned before exporting the child. Note
+ * it is not possible to export the parent first because its information
+ * is not yet complete because its 'return' has not yet been processed.
+ */
+ parent_db_id = idx ? &(tse - 1)->db_id : NULL;
+
+ return crp->process(&cr, parent_db_id, crp->data);
+}
+
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+{
+ struct call_return_processor *crp = ts->crp;
+ int err;
+
+ if (!crp) {
+ ts->cnt = 0;
+ ts->br_stack_pos = 0;
+ if (ts->br_stack_rb)
+ ts->br_stack_rb->nr = 0;
+ return 0;
+ }
+
+ while (ts->cnt) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ ts->last_time, 0, true);
+ if (err) {
+ pr_err("Error flushing thread stack!\n");
+ ts->cnt = 0;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int thread_stack__flush(struct thread *thread)
+{
+ struct thread_stack *ts = thread__ts(thread);
+ unsigned int pos;
+ int err = 0;
+
+ if (ts) {
+ for (pos = 0; pos < ts->arr_sz; pos++) {
+ int ret = __thread_stack__flush(thread, ts + pos);
+
+ if (ret)
+ err = ret;
+ }
+ }
+
+ return err;
+}
+
+static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
+ u64 from_ip, u64 to_ip)
+{
+ struct branch_stack *bs = ts->br_stack_rb;
+ struct branch_entry *be;
+
+ if (!ts->br_stack_pos)
+ ts->br_stack_pos = ts->br_stack_sz;
+
+ ts->br_stack_pos -= 1;
+
+ be = &bs->entries[ts->br_stack_pos];
+ be->from = from_ip;
+ be->to = to_ip;
+ be->flags.value = 0;
+ be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
+ be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
+ /* No support for mispredict */
+ be->flags.mispred = ts->mispred_all;
+
+ if (bs->nr < ts->br_stack_sz)
+ bs->nr += 1;
+}
+
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
+ u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+ unsigned int br_stack_sz, bool mispred_all)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
+ if (!thread)
+ return -EINVAL;
+
+ if (!ts) {
+ ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
+ if (!ts) {
+ pr_warning("Out of memory: no thread stack\n");
+ return -ENOMEM;
+ }
+ ts->trace_nr = trace_nr;
+ ts->mispred_all = mispred_all;
+ }
+
+ /*
+ * When the trace is discontinuous, the trace_nr changes. In that case
+ * the stack might be completely invalid. Better to report nothing than
+ * to report something misleading, so flush the stack.
+ */
+ if (trace_nr != ts->trace_nr) {
+ if (ts->trace_nr)
+ __thread_stack__flush(thread, ts);
+ ts->trace_nr = trace_nr;
+ }
+
+ if (br_stack_sz)
+ thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
+
+ /*
+ * Stop here if thread_stack__process() is in use, or not recording call
+ * stack.
+ */
+ if (ts->crp || !callstack)
+ return 0;
+
+ if (flags & PERF_IP_FLAG_CALL) {
+ u64 ret_addr;
+
+ if (!to_ip)
+ return 0;
+ ret_addr = from_ip + insn_len;
+ if (ret_addr == to_ip)
+ return 0; /* Zero-length calls are excluded */
+ return thread_stack__push(ts, ret_addr,
+ flags & PERF_IP_FLAG_TRACE_END);
+ } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
+ /*
+ * If the caller did not change the trace number (which would
+ * have flushed the stack) then try to make sense of the stack.
+ * Possibly, tracing began after returning to the current
+ * address, so try to pop that. Also, do not expect a call made
+ * when the trace ended, to return, so pop that.
+ */
+ thread_stack__pop(ts, to_ip);
+ thread_stack__pop_trace_end(ts);
+ } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
+ thread_stack__pop(ts, to_ip);
+ }
+
+ return 0;
+}
+
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
+ if (!ts)
+ return;
+
+ if (trace_nr != ts->trace_nr) {
+ if (ts->trace_nr)
+ __thread_stack__flush(thread, ts);
+ ts->trace_nr = trace_nr;
+ }
+}
+
+static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
+{
+ __thread_stack__flush(thread, ts);
+ zfree(&ts->stack);
+ zfree(&ts->br_stack_rb);
+}
+
+static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
+{
+ unsigned int arr_sz = ts->arr_sz;
+
+ __thread_stack__free(thread, ts);
+ memset(ts, 0, sizeof(*ts));
+ ts->arr_sz = arr_sz;
+}
+
+void thread_stack__free(struct thread *thread)
+{
+ struct thread_stack *ts = thread__ts(thread);
+ unsigned int pos;
+
+ if (ts) {
+ for (pos = 0; pos < ts->arr_sz; pos++)
+ __thread_stack__free(thread, ts + pos);
+ free(thread__ts(thread));
+ thread__set_ts(thread, NULL);
+ }
+}
+
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
+{
+ return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
+
+void thread_stack__sample(struct thread *thread, int cpu,
+ struct ip_callchain *chain,
+ size_t sz, u64 ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ u64 context = callchain_context(ip, kernel_start);
+ u64 last_context;
+ size_t i, j;
+
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
+
+ chain->ips[0] = context;
+ chain->ips[1] = ip;
+
+ if (!ts) {
+ chain->nr = 2;
+ return;
+ }
+
+ last_context = context;
+
+ for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (i >= sz - 1)
+ break;
+ chain->ips[i++] = context;
+ last_context = context;
+ }
+ chain->ips[i] = ip;
+ }
+
+ chain->nr = i;
+}
+
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the call chain.
+ */
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz,
+ u64 sample_ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ u64 sample_context = callchain_context(sample_ip, kernel_start);
+ u64 last_context, context, ip;
+ size_t nr = 0, j;
+
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
+
+ if (!ts)
+ goto out;
+
+ /*
+ * When tracing kernel space, kernel addresses occur at the top of the
+ * call chain after the event occurred but before tracing stopped.
+ * Skip them.
+ */
+ for (j = 1; j <= ts->cnt; j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context == PERF_CONTEXT_USER ||
+ (context == sample_context && ip == sample_ip))
+ break;
+ }
+
+ last_context = sample_ip; /* Use sample_ip as an invalid context */
+
+ for (; nr < sz && j <= ts->cnt; nr++, j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (nr >= sz - 1)
+ break;
+ chain->ips[nr++] = context;
+ last_context = context;
+ }
+ chain->ips[nr] = ip;
+ }
+out:
+ if (nr) {
+ chain->nr = nr;
+ } else {
+ chain->ips[0] = sample_context;
+ chain->ips[1] = sample_ip;
+ chain->nr = 2;
+ }
+}
+
+void thread_stack__br_sample(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ const size_t bsz = sizeof(struct branch_entry);
+ struct branch_stack *src;
+ struct branch_entry *be;
+ unsigned int nr;
+
+ dst->nr = 0;
+
+ if (!ts)
+ return;
+
+ src = ts->br_stack_rb;
+ if (!src->nr)
+ return;
+
+ dst->nr = min((unsigned int)src->nr, sz);
+
+ be = &dst->entries[0];
+ nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
+ memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
+
+ if (src->nr >= ts->br_stack_sz) {
+ sz -= nr;
+ be = &dst->entries[nr];
+ nr = min(ts->br_stack_pos, sz);
+ memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
+ }
+}
+
+/* Start of user space branch entries */
+static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
+{
+ if (!*start)
+ *start = be->to && be->to < kernel_start;
+
+ return *start;
+}
+
+/*
+ * Start of branch entries after the ip fell in between 2 branches, or user
+ * space branch entries.
+ */
+static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
+ bool *start, struct branch_entry *nb)
+{
+ if (!*start) {
+ *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
+ be->from < kernel_start ||
+ (be->to && be->to < kernel_start);
+ }
+
+ return *start;
+}
+
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the branch stack.
+ */
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz,
+ u64 ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ struct branch_entry *d, *s, *spos, *ssz;
+ struct branch_stack *src;
+ unsigned int nr = 0;
+ bool start = false;
+
+ dst->nr = 0;
+
+ if (!ts)
+ return;
+
+ src = ts->br_stack_rb;
+ if (!src->nr)
+ return;
+
+ spos = &src->entries[ts->br_stack_pos];
+ ssz = &src->entries[ts->br_stack_sz];
+
+ d = &dst->entries[0];
+ s = spos;
+
+ if (ip < kernel_start) {
+ /*
+ * User space sample: start copying branch entries when the
+ * branch is in user space.
+ */
+ for (s = spos; s < ssz && nr < sz; s++) {
+ if (us_start(s, kernel_start, &start)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ }
+
+ if (src->nr >= ts->br_stack_sz) {
+ for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+ if (us_start(s, kernel_start, &start)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ }
+ }
+ } else {
+ struct branch_entry *nb = NULL;
+
+ /*
+ * Kernel space sample: start copying branch entries when the ip
+ * falls in between 2 branches (or the branch is in user space
+ * because then the start must have been missed).
+ */
+ for (s = spos; s < ssz && nr < sz; s++) {
+ if (ks_start(s, ip, kernel_start, &start, nb)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ nb = s;
+ }
+
+ if (src->nr >= ts->br_stack_sz) {
+ for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+ if (ks_start(s, ip, kernel_start, &start, nb)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ nb = s;
+ }
+ }
+ }
+
+ dst->nr = nr;
+}
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
+ void *data)
+{
+ struct call_return_processor *crp;
+
+ crp = zalloc(sizeof(struct call_return_processor));
+ if (!crp)
+ return NULL;
+ crp->cpr = call_path_root__new();
+ if (!crp->cpr)
+ goto out_free;
+ crp->process = process;
+ crp->data = data;
+ return crp;
+
+out_free:
+ free(crp);
+ return NULL;
+}
+
+void call_return_processor__free(struct call_return_processor *crp)
+{
+ if (crp) {
+ call_path_root__free(crp->cpr);
+ free(crp);
+ }
+}
+
+static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
+ u64 timestamp, u64 ref, struct call_path *cp,
+ bool no_call, bool trace_end)
+{
+ struct thread_stack_entry *tse;
+ int err;
+
+ if (!cp)
+ return -ENOMEM;
+
+ if (ts->cnt == ts->sz) {
+ err = thread_stack__grow(ts);
+ if (err)
+ return err;
+ }
+
+ tse = &ts->stack[ts->cnt++];
+ tse->ret_addr = ret_addr;
+ tse->timestamp = timestamp;
+ tse->ref = ref;
+ tse->branch_count = ts->branch_count;
+ tse->insn_count = ts->insn_count;
+ tse->cyc_count = ts->cyc_count;
+ tse->cp = cp;
+ tse->no_call = no_call;
+ tse->trace_end = trace_end;
+ tse->non_call = false;
+ tse->db_id = 0;
+
+ return 0;
+}
+
+static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
+ u64 ret_addr, u64 timestamp, u64 ref,
+ struct symbol *sym)
+{
+ int err;
+
+ if (!ts->cnt)
+ return 1;
+
+ if (ts->cnt == 1) {
+ struct thread_stack_entry *tse = &ts->stack[0];
+
+ if (tse->cp->sym == sym)
+ return thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ }
+
+ if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
+ !ts->stack[ts->cnt - 1].non_call) {
+ return thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ } else {
+ size_t i = ts->cnt - 1;
+
+ while (i--) {
+ if (ts->stack[i].ret_addr != ret_addr ||
+ ts->stack[i].non_call)
+ continue;
+ i += 1;
+ while (ts->cnt > i) {
+ err = thread_stack__call_return(thread, ts,
+ --ts->cnt,
+ timestamp, ref,
+ true);
+ if (err)
+ return err;
+ }
+ return thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ }
+ }
+
+ return 1;
+}
+
+static int thread_stack__bottom(struct thread_stack *ts,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref)
+{
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+ struct symbol *sym;
+ u64 ip;
+
+ if (sample->ip) {
+ ip = sample->ip;
+ sym = from_al->sym;
+ } else if (sample->addr) {
+ ip = sample->addr;
+ sym = to_al->sym;
+ } else {
+ return 0;
+ }
+
+ cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
+ ts->kernel_start);
+
+ return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
+ true, false);
+}
+
+static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
+ struct perf_sample *sample, u64 ref)
+{
+ u64 tm = sample->time;
+ int err;
+
+ /* Return to userspace, so pop all kernel addresses */
+ while (thread_stack__in_kernel(ts)) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ tm, ref, true);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int thread_stack__no_call_return(struct thread *thread,
+ struct thread_stack *ts,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref)
+{
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *root = &cpr->call_path;
+ struct symbol *fsym = from_al->sym;
+ struct symbol *tsym = to_al->sym;
+ struct call_path *cp, *parent;
+ u64 ks = ts->kernel_start;
+ u64 addr = sample->addr;
+ u64 tm = sample->time;
+ u64 ip = sample->ip;
+ int err;
+
+ if (ip >= ks && addr < ks) {
+ /* Return to userspace, so pop all kernel addresses */
+ err = thread_stack__pop_ks(thread, ts, sample, ref);
+ if (err)
+ return err;
+
+ /* If the stack is empty, push the userspace address */
+ if (!ts->cnt) {
+ cp = call_path__findnew(cpr, root, tsym, addr, ks);
+ return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
+ false);
+ }
+ } else if (thread_stack__in_kernel(ts) && ip < ks) {
+ /* Return to userspace, so pop all kernel addresses */
+ err = thread_stack__pop_ks(thread, ts, sample, ref);
+ if (err)
+ return err;
+ }
+
+ if (ts->cnt)
+ parent = ts->stack[ts->cnt - 1].cp;
+ else
+ parent = root;
+
+ if (parent->sym == from_al->sym) {
+ /*
+ * At the bottom of the stack, assume the missing 'call' was
+ * before the trace started. So, pop the current symbol and push
+ * the 'to' symbol.
+ */
+ if (ts->cnt == 1) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ tm, ref, false);
+ if (err)
+ return err;
+ }
+
+ if (!ts->cnt) {
+ cp = call_path__findnew(cpr, root, tsym, addr, ks);
+
+ return thread_stack__push_cp(ts, addr, tm, ref, cp,
+ true, false);
+ }
+
+ /*
+ * Otherwise assume the 'return' is being used as a jump (e.g.
+ * retpoline) and just push the 'to' symbol.
+ */
+ cp = call_path__findnew(cpr, parent, tsym, addr, ks);
+
+ err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
+ if (!err)
+ ts->stack[ts->cnt - 1].non_call = true;
+
+ return err;
+ }
+
+ /*
+ * Assume 'parent' has not yet returned, so push 'to', and then push and
+ * pop 'from'.
+ */
+
+ cp = call_path__findnew(cpr, parent, tsym, addr, ks);
+
+ err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
+ if (err)
+ return err;
+
+ cp = call_path__findnew(cpr, cp, fsym, ip, ks);
+
+ err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
+ if (err)
+ return err;
+
+ return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
+}
+
+static int thread_stack__trace_begin(struct thread *thread,
+ struct thread_stack *ts, u64 timestamp,
+ u64 ref)
+{
+ struct thread_stack_entry *tse;
+ int err;
+
+ if (!ts->cnt)
+ return 0;
+
+ /* Pop trace end */
+ tse = &ts->stack[ts->cnt - 1];
+ if (tse->trace_end) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int thread_stack__trace_end(struct thread_stack *ts,
+ struct perf_sample *sample, u64 ref)
+{
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+ u64 ret_addr;
+
+ /* No point having 'trace end' on the bottom of the stack */
+ if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
+ return 0;
+
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
+ ts->kernel_start);
+
+ ret_addr = sample->ip + sample->insn_len;
+
+ return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
+ false, true);
+}
+
+static bool is_x86_retpoline(const char *name)
+{
+ return strstr(name, "__x86_indirect_thunk_") == name;
+}
+
+/*
+ * x86 retpoline functions pollute the call graph. This function removes them.
+ * This does not handle function return thunks, nor is there any improvement
+ * for the handling of inline thunks or extern thunks.
+ */
+static int thread_stack__x86_retpoline(struct thread_stack *ts,
+ struct perf_sample *sample,
+ struct addr_location *to_al)
+{
+ struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct symbol *sym = tse->cp->sym;
+ struct symbol *tsym = to_al->sym;
+ struct call_path *cp;
+
+ if (sym && is_x86_retpoline(sym->name)) {
+ /*
+ * This is a x86 retpoline fn. It pollutes the call graph by
+ * showing up everywhere there is an indirect branch, but does
+ * not itself mean anything. Here the top-of-stack is removed,
+ * by decrementing the stack count, and then further down, the
+ * resulting top-of-stack is replaced with the actual target.
+ * The result is that the retpoline functions will no longer
+ * appear in the call graph. Note this only affects the call
+ * graph, since all the original branches are left unchanged.
+ */
+ ts->cnt -= 1;
+ sym = ts->stack[ts->cnt - 2].cp->sym;
+ if (sym && sym == tsym && to_al->addr != tsym->start) {
+ /*
+ * Target is back to the middle of the symbol we came
+ * from so assume it is an indirect jmp and forget it
+ * altogether.
+ */
+ ts->cnt -= 1;
+ return 0;
+ }
+ } else if (sym && sym == tsym) {
+ /*
+ * Target is back to the symbol we came from so assume it is an
+ * indirect jmp and forget it altogether.
+ */
+ ts->cnt -= 1;
+ return 0;
+ }
+
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
+ sample->addr, ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+
+ /* Replace the top-of-stack with the actual target */
+ ts->stack[ts->cnt - 1].cp = cp;
+
+ return 0;
+}
+
+int thread_stack__process(struct thread *thread, struct comm *comm,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref,
+ struct call_return_processor *crp)
+{
+ struct thread_stack *ts = thread__stack(thread, sample->cpu);
+ enum retpoline_state_t rstate;
+ int err = 0;
+
+ if (ts && !ts->crp) {
+ /* Supersede thread_stack__event() */
+ thread_stack__reset(thread, ts);
+ ts = NULL;
+ }
+
+ if (!ts) {
+ ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
+ if (!ts)
+ return -ENOMEM;
+ ts->comm = comm;
+ }
+
+ rstate = ts->rstate;
+ if (rstate == X86_RETPOLINE_DETECTED)
+ ts->rstate = X86_RETPOLINE_POSSIBLE;
+
+ /* Flush stack on exec */
+ if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) {
+ err = __thread_stack__flush(thread, ts);
+ if (err)
+ return err;
+ ts->comm = comm;
+ }
+
+ /* If the stack is empty, put the current symbol on the stack */
+ if (!ts->cnt) {
+ err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
+ if (err)
+ return err;
+ }
+
+ ts->branch_count += 1;
+ ts->insn_count += sample->insn_cnt;
+ ts->cyc_count += sample->cyc_cnt;
+ ts->last_time = sample->time;
+
+ if (sample->flags & PERF_IP_FLAG_CALL) {
+ bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+ u64 ret_addr;
+
+ if (!sample->ip || !sample->addr)
+ return 0;
+
+ ret_addr = sample->ip + sample->insn_len;
+ if (ret_addr == sample->addr)
+ return 0; /* Zero-length calls are excluded */
+
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+ to_al->sym, sample->addr,
+ ts->kernel_start);
+ err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
+ cp, false, trace_end);
+
+ /*
+ * A call to the same symbol but not the start of the symbol,
+ * may be the start of a x86 retpoline.
+ */
+ if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
+ from_al->sym == to_al->sym &&
+ to_al->addr != to_al->sym->start)
+ ts->rstate = X86_RETPOLINE_DETECTED;
+
+ } else if (sample->flags & PERF_IP_FLAG_RETURN) {
+ if (!sample->addr) {
+ u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
+ PERF_IP_FLAG_INTERRUPT;
+
+ if (!(sample->flags & return_from_kernel))
+ return 0;
+
+ /* Pop kernel stack */
+ return thread_stack__pop_ks(thread, ts, sample, ref);
+ }
+
+ if (!sample->ip)
+ return 0;
+
+ /* x86 retpoline 'return' doesn't match the stack */
+ if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
+ ts->stack[ts->cnt - 1].ret_addr != sample->addr)
+ return thread_stack__x86_retpoline(ts, sample, to_al);
+
+ err = thread_stack__pop_cp(thread, ts, sample->addr,
+ sample->time, ref, from_al->sym);
+ if (err) {
+ if (err < 0)
+ return err;
+ err = thread_stack__no_call_return(thread, ts, sample,
+ from_al, to_al, ref);
+ }
+ } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
+ err = thread_stack__trace_begin(thread, ts, sample->time, ref);
+ } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
+ err = thread_stack__trace_end(ts, sample, ref);
+ } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
+ from_al->sym != to_al->sym && to_al->sym &&
+ to_al->addr == to_al->sym->start) {
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+
+ /*
+ * The compiler might optimize a call/ret combination by making
+ * it a jmp. Make that visible by recording on the stack a
+ * branch to the start of a different symbol. Note, that means
+ * when a ret pops the stack, all jmps must be popped off first.
+ */
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+ to_al->sym, sample->addr,
+ ts->kernel_start);
+ err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
+ false);
+ if (!err)
+ ts->stack[ts->cnt - 1].non_call = true;
+ }
+
+ return err;
+}
+
+size_t thread_stack__depth(struct thread *thread, int cpu)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
+ if (!ts)
+ return 0;
+ return ts->cnt;
+}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
new file mode 100644
index 000000000..b3cd09beb
--- /dev/null
+++ b/tools/perf/util/thread-stack.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * thread-stack.h: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#ifndef __PERF_THREAD_STACK_H
+#define __PERF_THREAD_STACK_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+struct thread;
+struct comm;
+struct ip_callchain;
+struct symbol;
+struct dso;
+struct perf_sample;
+struct addr_location;
+struct call_path;
+
+/*
+ * Call/Return flags.
+ *
+ * CALL_RETURN_NO_CALL: 'return' but no matching 'call'
+ * CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
+ * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different
+ * symbol
+ */
+enum {
+ CALL_RETURN_NO_CALL = 1 << 0,
+ CALL_RETURN_NO_RETURN = 1 << 1,
+ CALL_RETURN_NON_CALL = 1 << 2,
+};
+
+/**
+ * struct call_return - paired call/return information.
+ * @thread: thread in which call/return occurred
+ * @comm: comm in which call/return occurred
+ * @cp: call path
+ * @call_time: timestamp of call (if known)
+ * @return_time: timestamp of return (if known)
+ * @branch_count: number of branches seen between call and return
+ * @insn_count: approx. number of instructions between call and return
+ * @cyc_count: approx. number of cycles between call and return
+ * @call_ref: external reference to 'call' sample (e.g. db_id)
+ * @return_ref: external reference to 'return' sample (e.g. db_id)
+ * @db_id: id used for db-export
+ * @parent_db_id: id of parent call used for db-export
+ * @flags: Call/Return flags
+ */
+struct call_return {
+ struct thread *thread;
+ struct comm *comm;
+ struct call_path *cp;
+ u64 call_time;
+ u64 return_time;
+ u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
+ u64 call_ref;
+ u64 return_ref;
+ u64 db_id;
+ u64 parent_db_id;
+ u32 flags;
+};
+
+/**
+ * struct call_return_processor - provides a call-back to consume call-return
+ * information.
+ * @cpr: call path root
+ * @process: call-back that accepts call/return information
+ * @data: anonymous data for call-back
+ */
+struct call_return_processor {
+ struct call_path_root *cpr;
+ int (*process)(struct call_return *cr, u64 *parent_db_id, void *data);
+ void *data;
+};
+
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
+ u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+ unsigned int br_stack_sz, bool mispred_all);
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
+void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
+ size_t sz, u64 ip, u64 kernel_start);
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz, u64 ip,
+ u64 kernel_start);
+void thread_stack__br_sample(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz);
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz,
+ u64 sample_ip, u64 kernel_start);
+int thread_stack__flush(struct thread *thread);
+void thread_stack__free(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread, int cpu);
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
+ void *data);
+void call_return_processor__free(struct call_return_processor *crp);
+int thread_stack__process(struct thread *thread, struct comm *comm,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref,
+ struct call_return_processor *crp);
+
+#endif
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
new file mode 100644
index 000000000..fe5e6991a
--- /dev/null
+++ b/tools/perf/util/thread.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include "dso.h"
+#include "session.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "namespaces.h"
+#include "comm.h"
+#include "map.h"
+#include "symbol.h"
+#include "unwind.h"
+#include "callchain.h"
+
+#include <api/fs/fs.h>
+
+int thread__init_maps(struct thread *thread, struct machine *machine)
+{
+ pid_t pid = thread__pid(thread);
+
+ if (pid == thread__tid(thread) || pid == -1) {
+ thread__set_maps(thread, maps__new(machine));
+ } else {
+ struct thread *leader = __machine__findnew_thread(machine, pid, pid);
+
+ if (leader) {
+ thread__set_maps(thread, maps__get(thread__maps(leader)));
+ thread__put(leader);
+ }
+ }
+
+ return thread__maps(thread) ? 0 : -1;
+}
+
+struct thread *thread__new(pid_t pid, pid_t tid)
+{
+ char *comm_str;
+ struct comm *comm;
+ RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread));
+ struct thread *thread;
+
+ if (ADD_RC_CHK(thread, _thread) != NULL) {
+ thread__set_pid(thread, pid);
+ thread__set_tid(thread, tid);
+ thread__set_ppid(thread, -1);
+ thread__set_cpu(thread, -1);
+ thread__set_guest_cpu(thread, -1);
+ thread__set_lbr_stitch_enable(thread, false);
+ INIT_LIST_HEAD(thread__namespaces_list(thread));
+ INIT_LIST_HEAD(thread__comm_list(thread));
+ init_rwsem(thread__namespaces_lock(thread));
+ init_rwsem(thread__comm_lock(thread));
+
+ comm_str = malloc(32);
+ if (!comm_str)
+ goto err_thread;
+
+ snprintf(comm_str, 32, ":%d", tid);
+ comm = comm__new(comm_str, 0, false);
+ free(comm_str);
+ if (!comm)
+ goto err_thread;
+
+ list_add(&comm->list, thread__comm_list(thread));
+ refcount_set(thread__refcnt(thread), 1);
+ /* Thread holds first ref to nsdata. */
+ RC_CHK_ACCESS(thread)->nsinfo = nsinfo__new(pid);
+ srccode_state_init(thread__srccode_state(thread));
+ }
+
+ return thread;
+
+err_thread:
+ free(thread);
+ return NULL;
+}
+
+static void (*thread__priv_destructor)(void *priv);
+
+void thread__set_priv_destructor(void (*destructor)(void *priv))
+{
+ assert(thread__priv_destructor == NULL);
+
+ thread__priv_destructor = destructor;
+}
+
+void thread__delete(struct thread *thread)
+{
+ struct namespaces *namespaces, *tmp_namespaces;
+ struct comm *comm, *tmp_comm;
+
+ thread_stack__free(thread);
+
+ if (thread__maps(thread)) {
+ maps__put(thread__maps(thread));
+ thread__set_maps(thread, NULL);
+ }
+ down_write(thread__namespaces_lock(thread));
+ list_for_each_entry_safe(namespaces, tmp_namespaces,
+ thread__namespaces_list(thread), list) {
+ list_del_init(&namespaces->list);
+ namespaces__free(namespaces);
+ }
+ up_write(thread__namespaces_lock(thread));
+
+ down_write(thread__comm_lock(thread));
+ list_for_each_entry_safe(comm, tmp_comm, thread__comm_list(thread), list) {
+ list_del_init(&comm->list);
+ comm__free(comm);
+ }
+ up_write(thread__comm_lock(thread));
+
+ nsinfo__zput(RC_CHK_ACCESS(thread)->nsinfo);
+ srccode_state_free(thread__srccode_state(thread));
+
+ exit_rwsem(thread__namespaces_lock(thread));
+ exit_rwsem(thread__comm_lock(thread));
+ thread__free_stitch_list(thread);
+
+ if (thread__priv_destructor)
+ thread__priv_destructor(thread__priv(thread));
+
+ RC_CHK_FREE(thread);
+}
+
+struct thread *thread__get(struct thread *thread)
+{
+ struct thread *result;
+
+ if (RC_CHK_GET(result, thread))
+ refcount_inc(thread__refcnt(thread));
+
+ return result;
+}
+
+void thread__put(struct thread *thread)
+{
+ if (thread && refcount_dec_and_test(thread__refcnt(thread)))
+ thread__delete(thread);
+ else
+ RC_CHK_PUT(thread);
+}
+
+static struct namespaces *__thread__namespaces(struct thread *thread)
+{
+ if (list_empty(thread__namespaces_list(thread)))
+ return NULL;
+
+ return list_first_entry(thread__namespaces_list(thread), struct namespaces, list);
+}
+
+struct namespaces *thread__namespaces(struct thread *thread)
+{
+ struct namespaces *ns;
+
+ down_read(thread__namespaces_lock(thread));
+ ns = __thread__namespaces(thread);
+ up_read(thread__namespaces_lock(thread));
+
+ return ns;
+}
+
+static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct perf_record_namespaces *event)
+{
+ struct namespaces *new, *curr = __thread__namespaces(thread);
+
+ new = namespaces__new(event);
+ if (!new)
+ return -ENOMEM;
+
+ list_add(&new->list, thread__namespaces_list(thread));
+
+ if (timestamp && curr) {
+ /*
+ * setns syscall must have changed few or all the namespaces
+ * of this thread. Update end time for the namespaces
+ * previously used.
+ */
+ curr = list_next_entry(new, list);
+ curr->end_time = timestamp;
+ }
+
+ return 0;
+}
+
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct perf_record_namespaces *event)
+{
+ int ret;
+
+ down_write(thread__namespaces_lock(thread));
+ ret = __thread__set_namespaces(thread, timestamp, event);
+ up_write(thread__namespaces_lock(thread));
+ return ret;
+}
+
+struct comm *thread__comm(struct thread *thread)
+{
+ if (list_empty(thread__comm_list(thread)))
+ return NULL;
+
+ return list_first_entry(thread__comm_list(thread), struct comm, list);
+}
+
+struct comm *thread__exec_comm(struct thread *thread)
+{
+ struct comm *comm, *last = NULL, *second_last = NULL;
+
+ list_for_each_entry(comm, thread__comm_list(thread), list) {
+ if (comm->exec)
+ return comm;
+ second_last = last;
+ last = comm;
+ }
+
+ /*
+ * 'last' with no start time might be the parent's comm of a synthesized
+ * thread (created by processing a synthesized fork event). For a main
+ * thread, that is very probably wrong. Prefer a later comm to avoid
+ * that case.
+ */
+ if (second_last && !last->start && thread__pid(thread) == thread__tid(thread))
+ return second_last;
+
+ return last;
+}
+
+static int ____thread__set_comm(struct thread *thread, const char *str,
+ u64 timestamp, bool exec)
+{
+ struct comm *new, *curr = thread__comm(thread);
+
+ /* Override the default :tid entry */
+ if (!thread__comm_set(thread)) {
+ int err = comm__override(curr, str, timestamp, exec);
+ if (err)
+ return err;
+ } else {
+ new = comm__new(str, timestamp, exec);
+ if (!new)
+ return -ENOMEM;
+ list_add(&new->list, thread__comm_list(thread));
+
+ if (exec)
+ unwind__flush_access(thread__maps(thread));
+ }
+
+ thread__set_comm_set(thread, true);
+
+ return 0;
+}
+
+int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
+ bool exec)
+{
+ int ret;
+
+ down_write(thread__comm_lock(thread));
+ ret = ____thread__set_comm(thread, str, timestamp, exec);
+ up_write(thread__comm_lock(thread));
+ return ret;
+}
+
+int thread__set_comm_from_proc(struct thread *thread)
+{
+ char path[64];
+ char *comm = NULL;
+ size_t sz;
+ int err = -1;
+
+ if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
+ thread__pid(thread), thread__tid(thread)) >= (int)sizeof(path)) &&
+ procfs__read_str(path, &comm, &sz) == 0) {
+ comm[sz - 1] = '\0';
+ err = thread__set_comm(thread, comm, 0);
+ }
+
+ return err;
+}
+
+static const char *__thread__comm_str(struct thread *thread)
+{
+ const struct comm *comm = thread__comm(thread);
+
+ if (!comm)
+ return NULL;
+
+ return comm__str(comm);
+}
+
+const char *thread__comm_str(struct thread *thread)
+{
+ const char *str;
+
+ down_read(thread__comm_lock(thread));
+ str = __thread__comm_str(thread);
+ up_read(thread__comm_lock(thread));
+
+ return str;
+}
+
+static int __thread__comm_len(struct thread *thread, const char *comm)
+{
+ if (!comm)
+ return 0;
+ thread__set_comm_len(thread, strlen(comm));
+
+ return thread__var_comm_len(thread);
+}
+
+/* CHECKME: it should probably better return the max comm len from its comm list */
+int thread__comm_len(struct thread *thread)
+{
+ int comm_len = thread__var_comm_len(thread);
+
+ if (!comm_len) {
+ const char *comm;
+
+ down_read(thread__comm_lock(thread));
+ comm = __thread__comm_str(thread);
+ comm_len = __thread__comm_len(thread, comm);
+ up_read(thread__comm_lock(thread));
+ }
+
+ return comm_len;
+}
+
+size_t thread__fprintf(struct thread *thread, FILE *fp)
+{
+ return fprintf(fp, "Thread %d %s\n", thread__tid(thread), thread__comm_str(thread)) +
+ maps__fprintf(thread__maps(thread), fp);
+}
+
+int thread__insert_map(struct thread *thread, struct map *map)
+{
+ int ret;
+
+ ret = unwind__prepare_access(thread__maps(thread), map, NULL);
+ if (ret)
+ return ret;
+
+ maps__fixup_overlappings(thread__maps(thread), map, stderr);
+ return maps__insert(thread__maps(thread), map);
+}
+
+static int __thread__prepare_access(struct thread *thread)
+{
+ bool initialized = false;
+ int err = 0;
+ struct maps *maps = thread__maps(thread);
+ struct map_rb_node *rb_node;
+
+ down_read(maps__lock(maps));
+
+ maps__for_each_entry(maps, rb_node) {
+ err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized);
+ if (err || initialized)
+ break;
+ }
+
+ up_read(maps__lock(maps));
+
+ return err;
+}
+
+static int thread__prepare_access(struct thread *thread)
+{
+ int err = 0;
+
+ if (dwarf_callchain_users)
+ err = __thread__prepare_access(thread);
+
+ return err;
+}
+
+static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone)
+{
+ /* This is new thread, we share map groups for process. */
+ if (thread__pid(thread) == thread__pid(parent))
+ return thread__prepare_access(thread);
+
+ if (thread__maps(thread) == thread__maps(parent)) {
+ pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
+ thread__pid(thread), thread__tid(thread),
+ thread__pid(parent), thread__tid(parent));
+ return 0;
+ }
+ /* But this one is new process, copy maps. */
+ return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0;
+}
+
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
+{
+ if (thread__comm_set(parent)) {
+ const char *comm = thread__comm_str(parent);
+ int err;
+ if (!comm)
+ return -ENOMEM;
+ err = thread__set_comm(thread, comm, timestamp);
+ if (err)
+ return err;
+ }
+
+ thread__set_ppid(thread, thread__tid(parent));
+ return thread__clone_maps(thread, parent, do_maps_clone);
+}
+
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
+ struct addr_location *al)
+{
+ size_t i;
+ const u8 cpumodes[] = {
+ PERF_RECORD_MISC_USER,
+ PERF_RECORD_MISC_KERNEL,
+ PERF_RECORD_MISC_GUEST_USER,
+ PERF_RECORD_MISC_GUEST_KERNEL
+ };
+
+ for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
+ thread__find_symbol(thread, cpumodes[i], addr, al);
+ if (al->map)
+ break;
+ }
+}
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+ if (thread__pid(thread) == thread__tid(thread))
+ return thread__get(thread);
+
+ if (thread__pid(thread) == -1)
+ return NULL;
+
+ return machine__find_thread(machine, thread__pid(thread), thread__pid(thread));
+}
+
+int thread__memcpy(struct thread *thread, struct machine *machine,
+ void *buf, u64 ip, int len, bool *is64bit)
+{
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ struct addr_location al;
+ struct dso *dso;
+ long offset;
+
+ if (machine__kernel_ip(machine, ip))
+ cpumode = PERF_RECORD_MISC_KERNEL;
+
+ addr_location__init(&al);
+ if (!thread__find_map(thread, cpumode, ip, &al)) {
+ addr_location__exit(&al);
+ return -1;
+ }
+
+ dso = map__dso(al.map);
+
+ if (!dso || dso->data.status == DSO_DATA_STATUS_ERROR || map__load(al.map) < 0) {
+ addr_location__exit(&al);
+ return -1;
+ }
+
+ offset = map__map_ip(al.map, ip);
+ if (is64bit)
+ *is64bit = dso->is_64_bit;
+
+ addr_location__exit(&al);
+
+ return dso__data_read_offset(dso, machine, offset, buf, len);
+}
+
+void thread__free_stitch_list(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
+ struct stitch_list *pos, *tmp;
+
+ if (!lbr_stitch)
+ return;
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ zfree(&lbr_stitch->prev_lbr_cursor);
+ free(thread__lbr_stitch(thread));
+ thread__set_lbr_stitch(thread, NULL);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
new file mode 100644
index 000000000..e79225a0e
--- /dev/null
+++ b/tools/perf/util/thread.h
@@ -0,0 +1,342 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_H
+#define __PERF_THREAD_H
+
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "srccode.h"
+#include "symbol_conf.h"
+#include <strlist.h>
+#include <intlist.h>
+#include "rwsem.h"
+#include "event.h"
+#include "callchain.h"
+#include <internal/rc_check.h>
+
+struct addr_location;
+struct map;
+struct perf_record_namespaces;
+struct thread_stack;
+struct unwind_libunwind_ops;
+
+struct lbr_stitch {
+ struct list_head lists;
+ struct list_head free_lists;
+ struct perf_sample prev_sample;
+ struct callchain_cursor_node *prev_lbr_cursor;
+};
+
+struct thread_rb_node {
+ struct rb_node rb_node;
+ struct thread *thread;
+};
+
+DECLARE_RC_STRUCT(thread) {
+ struct maps *maps;
+ pid_t pid_; /* Not all tools update this */
+ pid_t tid;
+ pid_t ppid;
+ int cpu;
+ int guest_cpu; /* For QEMU thread */
+ refcount_t refcnt;
+ bool comm_set;
+ int comm_len;
+ struct list_head namespaces_list;
+ struct rw_semaphore namespaces_lock;
+ struct list_head comm_list;
+ struct rw_semaphore comm_lock;
+ u64 db_id;
+
+ void *priv;
+ struct thread_stack *ts;
+ struct nsinfo *nsinfo;
+ struct srccode_state srccode_state;
+ bool filter;
+ int filter_entry_depth;
+
+ /* LBR call stack stitch */
+ bool lbr_stitch_enable;
+ struct lbr_stitch *lbr_stitch;
+};
+
+struct machine;
+struct namespaces;
+struct comm;
+
+struct thread *thread__new(pid_t pid, pid_t tid);
+int thread__init_maps(struct thread *thread, struct machine *machine);
+void thread__delete(struct thread *thread);
+
+void thread__set_priv_destructor(void (*destructor)(void *priv));
+
+struct thread *thread__get(struct thread *thread);
+void thread__put(struct thread *thread);
+
+static inline void __thread__zput(struct thread **thread)
+{
+ thread__put(*thread);
+ *thread = NULL;
+}
+
+#define thread__zput(thread) __thread__zput(&thread)
+
+struct namespaces *thread__namespaces(struct thread *thread);
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct perf_record_namespaces *event);
+
+int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
+ bool exec);
+static inline int thread__set_comm(struct thread *thread, const char *comm,
+ u64 timestamp)
+{
+ return __thread__set_comm(thread, comm, timestamp, false);
+}
+
+int thread__set_comm_from_proc(struct thread *thread);
+
+int thread__comm_len(struct thread *thread);
+struct comm *thread__comm(struct thread *thread);
+struct comm *thread__exec_comm(struct thread *thread);
+const char *thread__comm_str(struct thread *thread);
+int thread__insert_map(struct thread *thread, struct map *map);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
+size_t thread__fprintf(struct thread *thread, FILE *fp);
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al);
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al);
+
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al);
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al);
+
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
+ struct addr_location *al);
+
+int thread__memcpy(struct thread *thread, struct machine *machine,
+ void *buf, u64 ip, int len, bool *is64bit);
+
+static inline struct maps *thread__maps(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->maps;
+}
+
+static inline void thread__set_maps(struct thread *thread, struct maps *maps)
+{
+ RC_CHK_ACCESS(thread)->maps = maps;
+}
+
+static inline pid_t thread__pid(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->pid_;
+}
+
+static inline void thread__set_pid(struct thread *thread, pid_t pid_)
+{
+ RC_CHK_ACCESS(thread)->pid_ = pid_;
+}
+
+static inline pid_t thread__tid(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->tid;
+}
+
+static inline void thread__set_tid(struct thread *thread, pid_t tid)
+{
+ RC_CHK_ACCESS(thread)->tid = tid;
+}
+
+static inline pid_t thread__ppid(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->ppid;
+}
+
+static inline void thread__set_ppid(struct thread *thread, pid_t ppid)
+{
+ RC_CHK_ACCESS(thread)->ppid = ppid;
+}
+
+static inline int thread__cpu(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->cpu;
+}
+
+static inline void thread__set_cpu(struct thread *thread, int cpu)
+{
+ RC_CHK_ACCESS(thread)->cpu = cpu;
+}
+
+static inline int thread__guest_cpu(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->guest_cpu;
+}
+
+static inline void thread__set_guest_cpu(struct thread *thread, int guest_cpu)
+{
+ RC_CHK_ACCESS(thread)->guest_cpu = guest_cpu;
+}
+
+static inline refcount_t *thread__refcnt(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->refcnt;
+}
+
+static inline bool thread__comm_set(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->comm_set;
+}
+
+static inline void thread__set_comm_set(struct thread *thread, bool set)
+{
+ RC_CHK_ACCESS(thread)->comm_set = set;
+}
+
+static inline int thread__var_comm_len(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->comm_len;
+}
+
+static inline void thread__set_comm_len(struct thread *thread, int len)
+{
+ RC_CHK_ACCESS(thread)->comm_len = len;
+}
+
+static inline struct list_head *thread__namespaces_list(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->namespaces_list;
+}
+
+static inline int thread__namespaces_list_empty(const struct thread *thread)
+{
+ return list_empty(&RC_CHK_ACCESS(thread)->namespaces_list);
+}
+
+static inline struct rw_semaphore *thread__namespaces_lock(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->namespaces_lock;
+}
+
+static inline struct list_head *thread__comm_list(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->comm_list;
+}
+
+static inline struct rw_semaphore *thread__comm_lock(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->comm_lock;
+}
+
+static inline u64 thread__db_id(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->db_id;
+}
+
+static inline void thread__set_db_id(struct thread *thread, u64 db_id)
+{
+ RC_CHK_ACCESS(thread)->db_id = db_id;
+}
+
+static inline void *thread__priv(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->priv;
+}
+
+static inline void thread__set_priv(struct thread *thread, void *p)
+{
+ RC_CHK_ACCESS(thread)->priv = p;
+}
+
+static inline struct thread_stack *thread__ts(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->ts;
+}
+
+static inline void thread__set_ts(struct thread *thread, struct thread_stack *ts)
+{
+ RC_CHK_ACCESS(thread)->ts = ts;
+}
+
+static inline struct nsinfo *thread__nsinfo(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->nsinfo;
+}
+
+static inline struct srccode_state *thread__srccode_state(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->srccode_state;
+}
+
+static inline bool thread__filter(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->filter;
+}
+
+static inline void thread__set_filter(struct thread *thread, bool filter)
+{
+ RC_CHK_ACCESS(thread)->filter = filter;
+}
+
+static inline int thread__filter_entry_depth(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->filter_entry_depth;
+}
+
+static inline void thread__set_filter_entry_depth(struct thread *thread, int depth)
+{
+ RC_CHK_ACCESS(thread)->filter_entry_depth = depth;
+}
+
+static inline bool thread__lbr_stitch_enable(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->lbr_stitch_enable;
+}
+
+static inline void thread__set_lbr_stitch_enable(struct thread *thread, bool en)
+{
+ RC_CHK_ACCESS(thread)->lbr_stitch_enable = en;
+}
+
+static inline struct lbr_stitch *thread__lbr_stitch(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->lbr_stitch;
+}
+
+static inline void thread__set_lbr_stitch(struct thread *thread, struct lbr_stitch *lbrs)
+{
+ RC_CHK_ACCESS(thread)->lbr_stitch = lbrs;
+}
+
+static inline bool thread__is_filtered(struct thread *thread)
+{
+ if (symbol_conf.comm_list &&
+ !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread))) {
+ return true;
+ }
+
+ if (symbol_conf.pid_list &&
+ !intlist__has_entry(symbol_conf.pid_list, thread__pid(thread))) {
+ return true;
+ }
+
+ if (symbol_conf.tid_list &&
+ !intlist__has_entry(symbol_conf.tid_list, thread__tid(thread))) {
+ return true;
+ }
+
+ return false;
+}
+
+void thread__free_stitch_list(struct thread *thread);
+
+void thread__resolve(struct thread *thread, struct addr_location *al,
+ struct perf_sample *sample);
+
+#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000..e848579e6
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "string2.h"
+#include "strlist.h"
+#include <string.h>
+#include <api/fs/fs.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include "asm/bug.h"
+#include "thread_map.h"
+#include "debug.h"
+#include "event.h"
+#include <internal/threadmap.h>
+
+/* Skip "." and ".." directories */
+static int filter(const struct dirent *dir)
+{
+ if (dir->d_name[0] == '.')
+ return 0;
+ else
+ return 1;
+}
+
+#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
+
+struct perf_thread_map *thread_map__new_by_pid(pid_t pid)
+{
+ struct perf_thread_map *threads;
+ char name[256];
+ int items;
+ struct dirent **namelist = NULL;
+ int i;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ return NULL;
+
+ threads = thread_map__alloc(items);
+ if (threads != NULL) {
+ for (i = 0; i < items; i++)
+ perf_thread_map__set_pid(threads, i, atoi(namelist[i]->d_name));
+ threads->nr = items;
+ refcount_set(&threads->refcnt, 1);
+ }
+
+ for (i=0; i<items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+ return threads;
+}
+
+struct perf_thread_map *thread_map__new_by_tid(pid_t tid)
+{
+ struct perf_thread_map *threads = thread_map__alloc(1);
+
+ if (threads != NULL) {
+ perf_thread_map__set_pid(threads, 0, tid);
+ threads->nr = 1;
+ refcount_set(&threads->refcnt, 1);
+ }
+
+ return threads;
+}
+
+static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid)
+{
+ DIR *proc;
+ int max_threads = 32, items, i;
+ char path[NAME_MAX + 1 + 6];
+ struct dirent *dirent, **namelist = NULL;
+ struct perf_thread_map *threads = thread_map__alloc(max_threads);
+
+ if (threads == NULL)
+ goto out;
+
+ proc = opendir("/proc");
+ if (proc == NULL)
+ goto out_free_threads;
+
+ threads->nr = 0;
+ refcount_set(&threads->refcnt, 1);
+
+ while ((dirent = readdir(proc)) != NULL) {
+ char *end;
+ bool grow = false;
+ pid_t pid = strtol(dirent->d_name, &end, 10);
+
+ if (*end) /* only interested in proper numerical dirents */
+ continue;
+
+ snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
+
+ if (uid != UINT_MAX) {
+ struct stat st;
+
+ if (stat(path, &st) != 0 || st.st_uid != uid)
+ continue;
+ }
+
+ snprintf(path, sizeof(path), "/proc/%d/task", pid);
+ items = scandir(path, &namelist, filter, NULL);
+ if (items <= 0)
+ goto out_free_closedir;
+
+ while (threads->nr + items >= max_threads) {
+ max_threads *= 2;
+ grow = true;
+ }
+
+ if (grow) {
+ struct perf_thread_map *tmp;
+
+ tmp = perf_thread_map__realloc(threads, max_threads);
+ if (tmp == NULL)
+ goto out_free_namelist;
+
+ threads = tmp;
+ }
+
+ for (i = 0; i < items; i++) {
+ perf_thread_map__set_pid(threads, threads->nr + i,
+ atoi(namelist[i]->d_name));
+ }
+
+ for (i = 0; i < items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+ threads->nr += items;
+ }
+
+out_closedir:
+ closedir(proc);
+out:
+ return threads;
+
+out_free_threads:
+ free(threads);
+ return NULL;
+
+out_free_namelist:
+ for (i = 0; i < items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+out_free_closedir:
+ zfree(&threads);
+ goto out_closedir;
+}
+
+struct perf_thread_map *thread_map__new_all_cpus(void)
+{
+ return __thread_map__new_all_cpus(UINT_MAX);
+}
+
+struct perf_thread_map *thread_map__new_by_uid(uid_t uid)
+{
+ return __thread_map__new_all_cpus(uid);
+}
+
+struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
+{
+ if (pid != -1)
+ return thread_map__new_by_pid(pid);
+
+ if (tid == -1 && uid != UINT_MAX)
+ return thread_map__new_by_uid(uid);
+
+ return thread_map__new_by_tid(tid);
+}
+
+static struct perf_thread_map *thread_map__new_by_pid_str(const char *pid_str)
+{
+ struct perf_thread_map *threads = NULL, *nt;
+ char name[256];
+ int items, total_tasks = 0;
+ struct dirent **namelist = NULL;
+ int i, j = 0;
+ pid_t pid, prev_pid = INT_MAX;
+ char *end_ptr;
+ struct str_node *pos;
+ struct strlist_config slist_config = { .dont_dupstr = true, };
+ struct strlist *slist = strlist__new(pid_str, &slist_config);
+
+ if (!slist)
+ return NULL;
+
+ strlist__for_each_entry(pos, slist) {
+ pid = strtol(pos->s, &end_ptr, 10);
+
+ if (pid == INT_MIN || pid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ goto out_free_threads;
+
+ if (pid == prev_pid)
+ continue;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ goto out_free_threads;
+
+ total_tasks += items;
+ nt = perf_thread_map__realloc(threads, total_tasks);
+ if (nt == NULL)
+ goto out_free_namelist;
+
+ threads = nt;
+
+ for (i = 0; i < items; i++) {
+ perf_thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name));
+ zfree(&namelist[i]);
+ }
+ threads->nr = total_tasks;
+ free(namelist);
+ }
+
+out:
+ strlist__delete(slist);
+ if (threads)
+ refcount_set(&threads->refcnt, 1);
+ return threads;
+
+out_free_namelist:
+ for (i = 0; i < items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+out_free_threads:
+ zfree(&threads);
+ goto out;
+}
+
+struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str)
+{
+ struct perf_thread_map *threads = NULL, *nt;
+ int ntasks = 0;
+ pid_t tid, prev_tid = INT_MAX;
+ char *end_ptr;
+ struct str_node *pos;
+ struct strlist_config slist_config = { .dont_dupstr = true, };
+ struct strlist *slist;
+
+ /* perf-stat expects threads to be generated even if tid not given */
+ if (!tid_str)
+ return perf_thread_map__new_dummy();
+
+ slist = strlist__new(tid_str, &slist_config);
+ if (!slist)
+ return NULL;
+
+ strlist__for_each_entry(pos, slist) {
+ tid = strtol(pos->s, &end_ptr, 10);
+
+ if (tid == INT_MIN || tid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ goto out_free_threads;
+
+ if (tid == prev_tid)
+ continue;
+
+ ntasks++;
+ nt = perf_thread_map__realloc(threads, ntasks);
+
+ if (nt == NULL)
+ goto out_free_threads;
+
+ threads = nt;
+ perf_thread_map__set_pid(threads, ntasks - 1, tid);
+ threads->nr = ntasks;
+ }
+out:
+ if (threads)
+ refcount_set(&threads->refcnt, 1);
+ return threads;
+
+out_free_threads:
+ zfree(&threads);
+ strlist__delete(slist);
+ goto out;
+}
+
+struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid,
+ uid_t uid, bool all_threads)
+{
+ if (pid)
+ return thread_map__new_by_pid_str(pid);
+
+ if (!tid && uid != UINT_MAX)
+ return thread_map__new_by_uid(uid);
+
+ if (all_threads)
+ return thread_map__new_all_cpus();
+
+ return thread_map__new_by_tid_str(tid);
+}
+
+size_t thread_map__fprintf(struct perf_thread_map *threads, FILE *fp)
+{
+ int i;
+ size_t printed = fprintf(fp, "%d thread%s: ",
+ threads->nr, threads->nr > 1 ? "s" : "");
+ for (i = 0; i < threads->nr; ++i)
+ printed += fprintf(fp, "%s%d", i ? ", " : "", perf_thread_map__pid(threads, i));
+
+ return printed + fprintf(fp, "\n");
+}
+
+static int get_comm(char **comm, pid_t pid)
+{
+ char *path;
+ size_t size;
+ int err;
+
+ if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1)
+ return -ENOMEM;
+
+ err = filename__read_str(path, comm, &size);
+ if (!err) {
+ /*
+ * We're reading 16 bytes, while filename__read_str
+ * allocates data per BUFSIZ bytes, so we can safely
+ * mark the end of the string.
+ */
+ (*comm)[size] = 0;
+ strim(*comm);
+ }
+
+ free(path);
+ return err;
+}
+
+static void comm_init(struct perf_thread_map *map, int i)
+{
+ pid_t pid = perf_thread_map__pid(map, i);
+ char *comm = NULL;
+
+ /* dummy pid comm initialization */
+ if (pid == -1) {
+ map->map[i].comm = strdup("dummy");
+ return;
+ }
+
+ /*
+ * The comm name is like extra bonus ;-),
+ * so just warn if we fail for any reason.
+ */
+ if (get_comm(&comm, pid))
+ pr_warning("Couldn't resolve comm name for pid %d\n", pid);
+
+ map->map[i].comm = comm;
+}
+
+void thread_map__read_comms(struct perf_thread_map *threads)
+{
+ int i;
+
+ for (i = 0; i < threads->nr; ++i)
+ comm_init(threads, i);
+}
+
+static void thread_map__copy_event(struct perf_thread_map *threads,
+ struct perf_record_thread_map *event)
+{
+ unsigned i;
+
+ threads->nr = (int) event->nr;
+
+ for (i = 0; i < event->nr; i++) {
+ perf_thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid);
+ threads->map[i].comm = strndup(event->entries[i].comm, 16);
+ }
+
+ refcount_set(&threads->refcnt, 1);
+}
+
+struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event)
+{
+ struct perf_thread_map *threads;
+
+ threads = thread_map__alloc(event->nr);
+ if (threads)
+ thread_map__copy_event(threads, event);
+
+ return threads;
+}
+
+bool thread_map__has(struct perf_thread_map *threads, pid_t pid)
+{
+ int i;
+
+ for (i = 0; i < threads->nr; ++i) {
+ if (threads->map[i].pid == pid)
+ return true;
+ }
+
+ return false;
+}
+
+int thread_map__remove(struct perf_thread_map *threads, int idx)
+{
+ int i;
+
+ if (threads->nr < 1)
+ return -EINVAL;
+
+ if (idx >= threads->nr)
+ return -EINVAL;
+
+ /*
+ * Free the 'idx' item and shift the rest up.
+ */
+ zfree(&threads->map[idx].comm);
+
+ for (i = idx; i < threads->nr - 1; i++)
+ threads->map[i] = threads->map[i + 1];
+
+ threads->nr--;
+ return 0;
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000..00ec05fc1
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_MAP_H
+#define __PERF_THREAD_MAP_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <perf/threadmap.h>
+
+struct perf_record_thread_map;
+
+struct perf_thread_map *thread_map__new_dummy(void);
+struct perf_thread_map *thread_map__new_by_pid(pid_t pid);
+struct perf_thread_map *thread_map__new_by_tid(pid_t tid);
+struct perf_thread_map *thread_map__new_by_uid(uid_t uid);
+struct perf_thread_map *thread_map__new_all_cpus(void);
+struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event);
+
+struct perf_thread_map *thread_map__new_str(const char *pid,
+ const char *tid, uid_t uid, bool all_threads);
+
+struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str);
+
+size_t thread_map__fprintf(struct perf_thread_map *threads, FILE *fp);
+
+void thread_map__read_comms(struct perf_thread_map *threads);
+bool thread_map__has(struct perf_thread_map *threads, pid_t pid);
+int thread_map__remove(struct perf_thread_map *threads, int idx);
+#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
new file mode 100644
index 000000000..302443921
--- /dev/null
+++ b/tools/perf/util/time-utils.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <sys/time.h>
+#include <linux/time64.h>
+#include <time.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+#include <linux/ctype.h>
+
+#include "debug.h"
+#include "time-utils.h"
+#include "session.h"
+#include "evlist.h"
+
+int parse_nsec_time(const char *str, u64 *ptime)
+{
+ u64 time_sec, time_nsec;
+ char *end;
+
+ time_sec = strtoul(str, &end, 10);
+ if (*end != '.' && *end != '\0')
+ return -1;
+
+ if (*end == '.') {
+ int i;
+ char nsec_buf[10];
+
+ if (strlen(++end) > 9)
+ return -1;
+
+ strncpy(nsec_buf, end, 9);
+ nsec_buf[9] = '\0';
+
+ /* make it nsec precision */
+ for (i = strlen(nsec_buf); i < 9; i++)
+ nsec_buf[i] = '0';
+
+ time_nsec = strtoul(nsec_buf, &end, 10);
+ if (*end != '\0')
+ return -1;
+ } else
+ time_nsec = 0;
+
+ *ptime = time_sec * NSEC_PER_SEC + time_nsec;
+ return 0;
+}
+
+static int parse_timestr_sec_nsec(struct perf_time_interval *ptime,
+ char *start_str, char *end_str)
+{
+ if (start_str && (*start_str != '\0') &&
+ (parse_nsec_time(start_str, &ptime->start) != 0)) {
+ return -1;
+ }
+
+ if (end_str && (*end_str != '\0') &&
+ (parse_nsec_time(end_str, &ptime->end) != 0)) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int split_start_end(char **start, char **end, const char *ostr, char ch)
+{
+ char *start_str, *end_str;
+ char *d, *str;
+
+ if (ostr == NULL || *ostr == '\0')
+ return 0;
+
+ /* copy original string because we need to modify it */
+ str = strdup(ostr);
+ if (str == NULL)
+ return -ENOMEM;
+
+ start_str = str;
+ d = strchr(start_str, ch);
+ if (d) {
+ *d = '\0';
+ ++d;
+ }
+ end_str = d;
+
+ *start = start_str;
+ *end = end_str;
+
+ return 0;
+}
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
+{
+ char *start_str = NULL, *end_str;
+ int rc;
+
+ rc = split_start_end(&start_str, &end_str, ostr, ',');
+ if (rc || !start_str)
+ return rc;
+
+ ptime->start = 0;
+ ptime->end = 0;
+
+ rc = parse_timestr_sec_nsec(ptime, start_str, end_str);
+
+ free(start_str);
+
+ /* make sure end time is after start time if it was given */
+ if (rc == 0 && ptime->end && ptime->end < ptime->start)
+ return -EINVAL;
+
+ pr_debug("start time %" PRIu64 ", ", ptime->start);
+ pr_debug("end time %" PRIu64 "\n", ptime->end);
+
+ return rc;
+}
+
+static int perf_time__parse_strs(struct perf_time_interval *ptime,
+ const char *ostr, int size)
+{
+ const char *cp;
+ char *str, *arg, *p;
+ int i, num = 0, rc = 0;
+
+ /* Count the commas */
+ for (cp = ostr; *cp; cp++)
+ num += !!(*cp == ',');
+
+ if (!num)
+ return -EINVAL;
+
+ BUG_ON(num > size);
+
+ str = strdup(ostr);
+ if (!str)
+ return -ENOMEM;
+
+ /* Split the string and parse each piece, except the last */
+ for (i = 0, p = str; i < num - 1; i++) {
+ arg = p;
+ /* Find next comma, there must be one */
+ p = skip_spaces(strchr(p, ',') + 1);
+ /* Skip the value, must not contain space or comma */
+ while (*p && !isspace(*p)) {
+ if (*p++ == ',') {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+ /* Split and parse */
+ if (*p)
+ *p++ = 0;
+ rc = perf_time__parse_str(ptime + i, arg);
+ if (rc < 0)
+ goto out;
+ }
+
+ /* Parse the last piece */
+ rc = perf_time__parse_str(ptime + i, p);
+ if (rc < 0)
+ goto out;
+
+ /* Check there is no overlap */
+ for (i = 0; i < num - 1; i++) {
+ if (ptime[i].end >= ptime[i + 1].start) {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+
+ rc = num;
+out:
+ free(str);
+
+ return rc;
+}
+
+static int parse_percent(double *pcnt, char *str)
+{
+ char *c, *endptr;
+ double d;
+
+ c = strchr(str, '%');
+ if (c)
+ *c = '\0';
+ else
+ return -1;
+
+ d = strtod(str, &endptr);
+ if (endptr != str + strlen(str))
+ return -1;
+
+ *pcnt = d / 100.0;
+ return 0;
+}
+
+static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt,
+ double end_pcnt, u64 start, u64 end)
+{
+ u64 total = end - start;
+
+ if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+ end_pcnt < 0.0 || end_pcnt > 1.0) {
+ return -1;
+ }
+
+ ptime->start = start + round(start_pcnt * total);
+ ptime->end = start + round(end_pcnt * total);
+
+ if (ptime->end > ptime->start && ptime->end != end)
+ ptime->end -= 1;
+
+ return 0;
+}
+
+static int percent_slash_split(char *str, struct perf_time_interval *ptime,
+ u64 start, u64 end)
+{
+ char *p, *end_str;
+ double pcnt, start_pcnt, end_pcnt;
+ int i;
+
+ /*
+ * Example:
+ * 10%/2: select the second 10% slice and the third 10% slice
+ */
+
+ /* We can modify this string since the original one is copied */
+ p = strchr(str, '/');
+ if (!p)
+ return -1;
+
+ *p = '\0';
+ if (parse_percent(&pcnt, str) < 0)
+ return -1;
+
+ p++;
+ i = (int)strtol(p, &end_str, 10);
+ if (*end_str)
+ return -1;
+
+ if (pcnt <= 0.0)
+ return -1;
+
+ start_pcnt = pcnt * (i - 1);
+ end_pcnt = pcnt * i;
+
+ return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
+}
+
+static int percent_dash_split(char *str, struct perf_time_interval *ptime,
+ u64 start, u64 end)
+{
+ char *start_str = NULL, *end_str;
+ double start_pcnt, end_pcnt;
+ int ret;
+
+ /*
+ * Example: 0%-10%
+ */
+
+ ret = split_start_end(&start_str, &end_str, str, '-');
+ if (ret || !start_str)
+ return ret;
+
+ if ((parse_percent(&start_pcnt, start_str) != 0) ||
+ (parse_percent(&end_pcnt, end_str) != 0)) {
+ free(start_str);
+ return -1;
+ }
+
+ free(start_str);
+
+ return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
+}
+
+typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
+ u64 start, u64 end);
+
+static int percent_comma_split(struct perf_time_interval *ptime_buf, int num,
+ const char *ostr, u64 start, u64 end,
+ time_pecent_split func)
+{
+ char *str, *p1, *p2;
+ int len, ret, i = 0;
+
+ str = strdup(ostr);
+ if (str == NULL)
+ return -ENOMEM;
+
+ len = strlen(str);
+ p1 = str;
+
+ while (p1 < str + len) {
+ if (i >= num) {
+ free(str);
+ return -1;
+ }
+
+ p2 = strchr(p1, ',');
+ if (p2)
+ *p2 = '\0';
+
+ ret = (func)(p1, &ptime_buf[i], start, end);
+ if (ret < 0) {
+ free(str);
+ return -1;
+ }
+
+ pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start);
+ pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end);
+
+ i++;
+
+ if (p2)
+ p1 = p2 + 1;
+ else
+ break;
+ }
+
+ free(str);
+ return i;
+}
+
+static int one_percent_convert(struct perf_time_interval *ptime_buf,
+ const char *ostr, u64 start, u64 end, char *c)
+{
+ char *str;
+ int len = strlen(ostr), ret;
+
+ /*
+ * c points to '%'.
+ * '%' should be the last character
+ */
+ if (ostr + len - 1 != c)
+ return -1;
+
+ /*
+ * Construct a string like "xx%/1"
+ */
+ str = malloc(len + 3);
+ if (str == NULL)
+ return -ENOMEM;
+
+ memcpy(str, ostr, len);
+ strcpy(str + len, "/1");
+
+ ret = percent_slash_split(str, ptime_buf, start, end);
+ if (ret == 0)
+ ret = 1;
+
+ free(str);
+ return ret;
+}
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+ const char *ostr, u64 start, u64 end)
+{
+ char *c;
+
+ /*
+ * ostr example:
+ * 10%/2,10%/3: select the second 10% slice and the third 10% slice
+ * 0%-10%,30%-40%: multiple time range
+ * 50%: just one percent
+ */
+
+ memset(ptime_buf, 0, sizeof(*ptime_buf) * num);
+
+ c = strchr(ostr, '/');
+ if (c) {
+ return percent_comma_split(ptime_buf, num, ostr, start,
+ end, percent_slash_split);
+ }
+
+ c = strchr(ostr, '-');
+ if (c) {
+ return percent_comma_split(ptime_buf, num, ostr, start,
+ end, percent_dash_split);
+ }
+
+ c = strchr(ostr, '%');
+ if (c)
+ return one_percent_convert(ptime_buf, ostr, start, end, c);
+
+ return -1;
+}
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size)
+{
+ const char *p1, *p2;
+ int i = 1;
+ struct perf_time_interval *ptime;
+
+ /*
+ * At least allocate one time range.
+ */
+ if (!ostr)
+ goto alloc;
+
+ p1 = ostr;
+ while (p1 < ostr + strlen(ostr)) {
+ p2 = strchr(p1, ',');
+ if (!p2)
+ break;
+
+ p1 = p2 + 1;
+ i++;
+ }
+
+alloc:
+ *size = i;
+ ptime = calloc(i, sizeof(*ptime));
+ return ptime;
+}
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
+{
+ /* if time is not set don't drop sample */
+ if (timestamp == 0)
+ return false;
+
+ /* otherwise compare sample time to time window */
+ if ((ptime->start && timestamp < ptime->start) ||
+ (ptime->end && timestamp > ptime->end)) {
+ return true;
+ }
+
+ return false;
+}
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+ int num, u64 timestamp)
+{
+ struct perf_time_interval *ptime;
+ int i;
+
+ if ((!ptime_buf) || (timestamp == 0) || (num == 0))
+ return false;
+
+ if (num == 1)
+ return perf_time__skip_sample(&ptime_buf[0], timestamp);
+
+ /*
+ * start/end of multiple time ranges must be valid.
+ */
+ for (i = 0; i < num; i++) {
+ ptime = &ptime_buf[i];
+
+ if (timestamp >= ptime->start &&
+ (timestamp <= ptime->end || !ptime->end)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+int perf_time__parse_for_ranges_reltime(const char *time_str,
+ struct perf_session *session,
+ struct perf_time_interval **ranges,
+ int *range_size, int *range_num,
+ bool reltime)
+{
+ bool has_percent = strchr(time_str, '%');
+ struct perf_time_interval *ptime_range;
+ int size, num, ret = -EINVAL;
+
+ ptime_range = perf_time__range_alloc(time_str, &size);
+ if (!ptime_range)
+ return -ENOMEM;
+
+ if (has_percent || reltime) {
+ if (session->evlist->first_sample_time == 0 &&
+ session->evlist->last_sample_time == 0) {
+ pr_err("HINT: no first/last sample time found in perf data.\n"
+ "Please use latest perf binary to execute 'perf record'\n"
+ "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
+ goto error;
+ }
+ }
+
+ if (has_percent) {
+ num = perf_time__percent_parse_str(
+ ptime_range, size,
+ time_str,
+ session->evlist->first_sample_time,
+ session->evlist->last_sample_time);
+ } else {
+ num = perf_time__parse_strs(ptime_range, time_str, size);
+ }
+
+ if (num < 0)
+ goto error_invalid;
+
+ if (reltime) {
+ int i;
+
+ for (i = 0; i < num; i++) {
+ ptime_range[i].start += session->evlist->first_sample_time;
+ ptime_range[i].end += session->evlist->first_sample_time;
+ }
+ }
+
+ *range_size = size;
+ *range_num = num;
+ *ranges = ptime_range;
+ return 0;
+
+error_invalid:
+ pr_err("Invalid time string\n");
+error:
+ free(ptime_range);
+ return ret;
+}
+
+int perf_time__parse_for_ranges(const char *time_str,
+ struct perf_session *session,
+ struct perf_time_interval **ranges,
+ int *range_size, int *range_num)
+{
+ return perf_time__parse_for_ranges_reltime(time_str, session, ranges,
+ range_size, range_num, false);
+}
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
+{
+ u64 sec = timestamp / NSEC_PER_SEC;
+ u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC;
+
+ return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec);
+}
+
+int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz)
+{
+ u64 sec = timestamp / NSEC_PER_SEC,
+ nsec = timestamp % NSEC_PER_SEC;
+
+ return scnprintf(buf, sz, "%" PRIu64 ".%09" PRIu64, sec, nsec);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+ struct timeval tv;
+ struct tm tm;
+ char dt[32];
+
+ if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+ return -1;
+
+ if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+ return -1;
+
+ scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+ return 0;
+}
diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h
new file mode 100644
index 000000000..1142b0bdd
--- /dev/null
+++ b/tools/perf/util/time-utils.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TIME_UTILS_H_
+#define _TIME_UTILS_H_
+
+#include <stddef.h>
+#include <time.h>
+#include <linux/types.h>
+
+struct perf_time_interval {
+ u64 start, end;
+};
+
+int parse_nsec_time(const char *str, u64 *ptime);
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+ const char *ostr, u64 start, u64 end);
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size);
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+ int num, u64 timestamp);
+
+struct perf_session;
+
+int perf_time__parse_for_ranges_reltime(const char *str, struct perf_session *session,
+ struct perf_time_interval **ranges,
+ int *range_size, int *range_num,
+ bool reltime);
+
+int perf_time__parse_for_ranges(const char *str, struct perf_session *session,
+ struct perf_time_interval **ranges,
+ int *range_size, int *range_num);
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
+int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz);
+
+int fetch_current_timestamp(char *buf, size_t sz);
+
+static inline unsigned long long rdclock(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+#endif
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
new file mode 100644
index 000000000..c957fb849
--- /dev/null
+++ b/tools/perf/util/tool.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOOL_H
+#define __PERF_TOOL_H
+
+#include <stdbool.h>
+
+#include <linux/types.h>
+
+struct perf_session;
+union perf_event;
+struct evlist;
+struct evsel;
+struct perf_sample;
+struct perf_tool;
+struct machine;
+struct ordered_events;
+
+typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel, struct machine *machine);
+
+typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
+
+typedef int (*event_attr_op)(struct perf_tool *tool,
+ union perf_event *event,
+ struct evlist **pevlist);
+
+typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
+typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
+typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data,
+ const char *str);
+
+typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
+ struct ordered_events *oe);
+
+enum show_feature_header {
+ SHOW_FEAT_NO_HEADER = 0,
+ SHOW_FEAT_HEADER,
+ SHOW_FEAT_HEADER_FULL_INFO,
+};
+
+struct perf_tool {
+ event_sample sample,
+ read;
+ event_op mmap,
+ mmap2,
+ comm,
+ namespaces,
+ cgroup,
+ fork,
+ exit,
+ lost,
+ lost_samples,
+ aux,
+ itrace_start,
+ aux_output_hw_id,
+ context_switch,
+ throttle,
+ unthrottle,
+ ksymbol,
+ bpf,
+ text_poke;
+
+ event_attr_op attr;
+ event_attr_op event_update;
+ event_op2 tracing_data;
+ event_oe finished_round;
+ event_op2 build_id,
+ id_index,
+ auxtrace_info,
+ auxtrace_error,
+ time_conv,
+ thread_map,
+ cpu_map,
+ stat_config,
+ stat,
+ stat_round,
+ feature,
+ finished_init;
+ event_op4 compressed;
+ event_op3 auxtrace;
+ bool ordered_events;
+ bool ordering_requires_timestamps;
+ bool namespace_events;
+ bool cgroup_events;
+ bool no_warn;
+ enum show_feature_header show_feat_hdr;
+};
+
+#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
new file mode 100644
index 000000000..be7157de0
--- /dev/null
+++ b/tools/perf/util/top.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Refactored from builtin-top.c, see that files for further copyright notes.
+ */
+
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "symbol.h"
+#include "top.h"
+#include "util.h"
+#include <inttypes.h>
+
+#define SNPRINTF(buf, size, fmt, args...) \
+({ \
+ size_t r = snprintf(buf, size, fmt, ## args); \
+ r > size ? size : r; \
+})
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
+{
+ float samples_per_sec;
+ float ksamples_per_sec;
+ float esamples_percent;
+ struct record_opts *opts = &top->record_opts;
+ struct target *target = &opts->target;
+ size_t ret = 0;
+
+ if (top->samples) {
+ samples_per_sec = top->samples / top->delay_secs;
+ ksamples_per_sec = top->kernel_samples / top->delay_secs;
+ esamples_percent = (100.0 * top->exact_samples) / top->samples;
+ } else {
+ samples_per_sec = ksamples_per_sec = esamples_percent = 0.0;
+ }
+
+ if (!perf_guest) {
+ float ksamples_percent = 0.0;
+
+ if (samples_per_sec)
+ ksamples_percent = (100.0 * ksamples_per_sec) /
+ samples_per_sec;
+ ret = SNPRINTF(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
+ " exact: %4.1f%% lost: %" PRIu64 "/%" PRIu64 " drop: %" PRIu64 "/%" PRIu64 " [",
+ samples_per_sec, ksamples_percent, esamples_percent,
+ top->lost, top->lost_total, top->drop, top->drop_total);
+ } else {
+ float us_samples_per_sec = top->us_samples / top->delay_secs;
+ float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
+ float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
+
+ ret = SNPRINTF(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
+ " guest kernel:%4.1f%% guest us:%4.1f%%"
+ " exact: %4.1f%% [", samples_per_sec,
+ 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_kernel_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_us_samples_per_sec) /
+ samples_per_sec)),
+ esamples_percent);
+ }
+
+ if (top->evlist->core.nr_entries == 1) {
+ struct evsel *first = evlist__first(top->evlist);
+ ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
+ (uint64_t)first->core.attr.sample_period,
+ opts->freq ? "Hz" : "");
+ }
+
+ ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel));
+
+ ret += SNPRINTF(bf + ret, size - ret, "], ");
+
+ if (target->pid)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
+ target->pid);
+ else if (target->tid)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
+ target->tid);
+ else if (target->uid_str != NULL)
+ ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+ target->uid_str);
+ else
+ ret += SNPRINTF(bf + ret, size - ret, " (all");
+
+ if (target->cpu_list)
+ ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
+ perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
+ ? "s" : "",
+ target->cpu_list);
+ else {
+ if (target->tid)
+ ret += SNPRINTF(bf + ret, size - ret, ")");
+ else
+ ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
+ perf_cpu_map__nr(top->evlist->core.user_requested_cpus),
+ perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
+ ? "s" : "");
+ }
+
+ perf_top__reset_sample_counters(top);
+ return ret;
+}
+
+void perf_top__reset_sample_counters(struct perf_top *top)
+{
+ top->samples = top->us_samples = top->kernel_samples =
+ top->exact_samples = top->guest_kernel_samples =
+ top->guest_us_samples = top->lost = top->drop = 0;
+}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
new file mode 100644
index 000000000..a8b0d79bd
--- /dev/null
+++ b/tools/perf/util/top.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOP_H
+#define __PERF_TOP_H 1
+
+#include "tool.h"
+#include "evswitch.h"
+#include "annotate.h"
+#include "mutex.h"
+#include "ordered-events.h"
+#include "record.h"
+#include <linux/types.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ioctl.h>
+
+struct evlist;
+struct evsel;
+struct perf_session;
+
+struct perf_top {
+ struct perf_tool tool;
+ struct evlist *evlist, *sb_evlist;
+ struct record_opts record_opts;
+ struct annotation_options annotation_opts;
+ struct evswitch evswitch;
+ /*
+ * Symbols will be added here in perf_event__process_sample and will
+ * get out after decayed.
+ */
+ u64 samples, lost, lost_total, drop, drop_total;
+ u64 kernel_samples, us_samples;
+ u64 exact_samples;
+ u64 guest_us_samples, guest_kernel_samples;
+ int print_entries, count_filter, delay_secs;
+ int max_stack;
+ bool hide_kernel_symbols, hide_user_symbols, zero;
+#ifdef HAVE_SLANG_SUPPORT
+ bool use_tui;
+#endif
+ bool use_stdio;
+ bool vmlinux_warned;
+ bool dump_symtab;
+ bool stitch_lbr;
+ struct hist_entry *sym_filter_entry;
+ struct evsel *sym_evsel;
+ struct perf_session *session;
+ struct winsize winsize;
+ int realtime_prio;
+ const char *sym_filter;
+ float min_percent;
+ unsigned int nr_threads_synthesize;
+
+ struct {
+ struct ordered_events *in;
+ struct ordered_events data[2];
+ bool rotate;
+ struct mutex mutex;
+ struct cond cond;
+ } qe;
+};
+
+#define CONSOLE_CLEAR ""
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
+void perf_top__reset_sample_counters(struct perf_top *top);
+#endif /* __PERF_TOP_H */
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
new file mode 100644
index 000000000..18fd5fed5
--- /dev/null
+++ b/tools/perf/util/topdown.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "topdown.h"
+#include <linux/kernel.h>
+
+__weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused)
+{
+ return false;
+}
diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
new file mode 100644
index 000000000..1996c5fed
--- /dev/null
+++ b/tools/perf/util/topdown.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TOPDOWN_H
+#define TOPDOWN_H 1
+
+#include <stdbool.h>
+
+struct evsel;
+
+bool arch_topdown_sample_read(struct evsel *leader);
+
+#endif
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
new file mode 100644
index 000000000..319ccf09a
--- /dev/null
+++ b/tools/perf/util/trace-event-info.c
@@ -0,0 +1,702 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008,2009, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <dirent.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <internal/lib.h> // page_size
+#include <sys/param.h>
+
+#include "trace-event.h"
+#include "tracepoint.h"
+#include <api/fs/tracing_path.h>
+#include "evsel.h"
+#include "debug.h"
+#include "util.h"
+
+#define VERSION "0.6"
+#define MAX_EVENT_LENGTH 512
+
+static int output_fd;
+
+struct tracepoint_path {
+ char *system;
+ char *name;
+ struct tracepoint_path *next;
+};
+
+/* unfortunately, you can not stat debugfs or proc files for size */
+static int record_file(const char *file, ssize_t hdr_sz)
+{
+ unsigned long long size = 0;
+ char buf[BUFSIZ], *sizep;
+ off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR);
+ int r, fd;
+ int err = -EIO;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("Can't read '%s'", file);
+ return -errno;
+ }
+
+ /* put in zeros for file size, then fill true size later */
+ if (hdr_sz) {
+ if (write(output_fd, &size, hdr_sz) != hdr_sz)
+ goto out;
+ }
+
+ do {
+ r = read(fd, buf, BUFSIZ);
+ if (r > 0) {
+ size += r;
+ if (write(output_fd, buf, r) != r)
+ goto out;
+ }
+ } while (r > 0);
+
+ /* ugh, handle big-endian hdr_size == 4 */
+ sizep = (char*)&size;
+ if (host_is_bigendian())
+ sizep += sizeof(u64) - hdr_sz;
+
+ if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) {
+ pr_debug("writing file size failed\n");
+ goto out;
+ }
+
+ err = 0;
+out:
+ close(fd);
+ return err;
+}
+
+static int record_header_files(void)
+{
+ char *path = get_events_file("header_page");
+ struct stat st;
+ int err = -EIO;
+
+ if (!path) {
+ pr_debug("can't get tracing/events/header_page");
+ return -ENOMEM;
+ }
+
+ if (stat(path, &st) < 0) {
+ pr_debug("can't read '%s'", path);
+ goto out;
+ }
+
+ if (write(output_fd, "header_page", 12) != 12) {
+ pr_debug("can't write header_page\n");
+ goto out;
+ }
+
+ if (record_file(path, 8) < 0) {
+ pr_debug("can't record header_page file\n");
+ goto out;
+ }
+
+ put_events_file(path);
+
+ path = get_events_file("header_event");
+ if (!path) {
+ pr_debug("can't get tracing/events/header_event");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (stat(path, &st) < 0) {
+ pr_debug("can't read '%s'", path);
+ goto out;
+ }
+
+ if (write(output_fd, "header_event", 13) != 13) {
+ pr_debug("can't write header_event\n");
+ goto out;
+ }
+
+ if (record_file(path, 8) < 0) {
+ pr_debug("can't record header_event file\n");
+ goto out;
+ }
+
+ err = 0;
+out:
+ put_events_file(path);
+ return err;
+}
+
+static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+ while (tps) {
+ if (!strcmp(sys, tps->name))
+ return true;
+ tps = tps->next;
+ }
+
+ return false;
+}
+
+#define for_each_event_tps(dir, dent, tps) \
+ while ((dent = readdir(dir))) \
+ if (dent->d_type == DT_DIR && \
+ (strcmp(dent->d_name, ".")) && \
+ (strcmp(dent->d_name, ".."))) \
+
+static int copy_event_system(const char *sys, struct tracepoint_path *tps)
+{
+ struct dirent *dent;
+ struct stat st;
+ char *format;
+ DIR *dir;
+ int count = 0;
+ int ret;
+ int err;
+
+ dir = opendir(sys);
+ if (!dir) {
+ pr_debug("can't read directory '%s'", sys);
+ return -errno;
+ }
+
+ for_each_event_tps(dir, dent, tps) {
+ if (!name_in_tp_list(dent->d_name, tps))
+ continue;
+
+ if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ret = stat(format, &st);
+ free(format);
+ if (ret < 0)
+ continue;
+ count++;
+ }
+
+ if (write(output_fd, &count, 4) != 4) {
+ err = -EIO;
+ pr_debug("can't write count\n");
+ goto out;
+ }
+
+ rewinddir(dir);
+ for_each_event_tps(dir, dent, tps) {
+ if (!name_in_tp_list(dent->d_name, tps))
+ continue;
+
+ if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ret = stat(format, &st);
+
+ if (ret >= 0) {
+ err = record_file(format, 8);
+ if (err) {
+ free(format);
+ goto out;
+ }
+ }
+ free(format);
+ }
+ err = 0;
+out:
+ closedir(dir);
+ return err;
+}
+
+static int record_ftrace_files(struct tracepoint_path *tps)
+{
+ char *path;
+ int ret;
+
+ path = get_events_file("ftrace");
+ if (!path) {
+ pr_debug("can't get tracing/events/ftrace");
+ return -ENOMEM;
+ }
+
+ ret = copy_event_system(path, tps);
+
+ put_tracing_file(path);
+
+ return ret;
+}
+
+static bool system_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+ while (tps) {
+ if (!strcmp(sys, tps->system))
+ return true;
+ tps = tps->next;
+ }
+
+ return false;
+}
+
+static int record_event_files(struct tracepoint_path *tps)
+{
+ struct dirent *dent;
+ struct stat st;
+ char *path;
+ char *sys;
+ DIR *dir;
+ int count = 0;
+ int ret;
+ int err;
+
+ path = get_tracing_file("events");
+ if (!path) {
+ pr_debug("can't get tracing/events");
+ return -ENOMEM;
+ }
+
+ dir = opendir(path);
+ if (!dir) {
+ err = -errno;
+ pr_debug("can't read directory '%s'", path);
+ goto out;
+ }
+
+ for_each_event_tps(dir, dent, tps) {
+ if (strcmp(dent->d_name, "ftrace") == 0 ||
+ !system_in_tp_list(dent->d_name, tps))
+ continue;
+
+ count++;
+ }
+
+ if (write(output_fd, &count, 4) != 4) {
+ err = -EIO;
+ pr_debug("can't write count\n");
+ goto out;
+ }
+
+ rewinddir(dir);
+ for_each_event_tps(dir, dent, tps) {
+ if (strcmp(dent->d_name, "ftrace") == 0 ||
+ !system_in_tp_list(dent->d_name, tps))
+ continue;
+
+ if (asprintf(&sys, "%s/%s", path, dent->d_name) < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ret = stat(sys, &st);
+ if (ret >= 0) {
+ ssize_t size = strlen(dent->d_name) + 1;
+
+ if (write(output_fd, dent->d_name, size) != size ||
+ copy_event_system(sys, tps) < 0) {
+ err = -EIO;
+ free(sys);
+ goto out;
+ }
+ }
+ free(sys);
+ }
+ err = 0;
+out:
+ closedir(dir);
+ put_tracing_file(path);
+
+ return err;
+}
+
+static int record_proc_kallsyms(void)
+{
+ unsigned long long size = 0;
+ /*
+ * Just to keep older perf.data file parsers happy, record a zero
+ * sized kallsyms file, i.e. do the same thing that was done when
+ * /proc/kallsyms (or something specified via --kallsyms, in a
+ * different path) couldn't be read.
+ */
+ return write(output_fd, &size, 4) != 4 ? -EIO : 0;
+}
+
+static int record_ftrace_printk(void)
+{
+ unsigned int size;
+ char *path;
+ struct stat st;
+ int ret, err = 0;
+
+ path = get_tracing_file("printk_formats");
+ if (!path) {
+ pr_debug("can't get tracing/printk_formats");
+ return -ENOMEM;
+ }
+
+ ret = stat(path, &st);
+ if (ret < 0) {
+ /* not found */
+ size = 0;
+ if (write(output_fd, &size, 4) != 4)
+ err = -EIO;
+ goto out;
+ }
+ err = record_file(path, 4);
+
+out:
+ put_tracing_file(path);
+ return err;
+}
+
+static int record_saved_cmdline(void)
+{
+ unsigned long long size;
+ char *path;
+ struct stat st;
+ int ret, err = 0;
+
+ path = get_tracing_file("saved_cmdlines");
+ if (!path) {
+ pr_debug("can't get tracing/saved_cmdline");
+ return -ENOMEM;
+ }
+
+ ret = stat(path, &st);
+ if (ret < 0) {
+ /* not found */
+ size = 0;
+ if (write(output_fd, &size, 8) != 8)
+ err = -EIO;
+ goto out;
+ }
+ err = record_file(path, 8);
+
+out:
+ put_tracing_file(path);
+ return err;
+}
+
+static void
+put_tracepoints_path(struct tracepoint_path *tps)
+{
+ while (tps) {
+ struct tracepoint_path *t = tps;
+
+ tps = tps->next;
+ zfree(&t->name);
+ zfree(&t->system);
+ free(t);
+ }
+}
+
+static struct tracepoint_path *tracepoint_id_to_path(u64 config)
+{
+ struct tracepoint_path *path = NULL;
+ DIR *sys_dir, *evt_dir;
+ struct dirent *sys_dirent, *evt_dirent;
+ char id_buf[24];
+ int fd;
+ u64 id;
+ char evt_path[MAXPATHLEN];
+ char *dir_path;
+
+ sys_dir = tracing_events__opendir();
+ if (!sys_dir)
+ return NULL;
+
+ for_each_subsystem(sys_dir, sys_dirent) {
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
+ evt_dir = opendir(dir_path);
+ if (!evt_dir)
+ goto next;
+
+ for_each_event(dir_path, evt_dir, evt_dirent) {
+
+ scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
+ evt_dirent->d_name);
+ fd = open(evt_path, O_RDONLY);
+ if (fd < 0)
+ continue;
+ if (read(fd, id_buf, sizeof(id_buf)) < 0) {
+ close(fd);
+ continue;
+ }
+ close(fd);
+ id = atoll(id_buf);
+ if (id == config) {
+ put_events_file(dir_path);
+ closedir(evt_dir);
+ closedir(sys_dir);
+ path = zalloc(sizeof(*path));
+ if (!path)
+ return NULL;
+ if (asprintf(&path->system, "%.*s",
+ MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) {
+ free(path);
+ return NULL;
+ }
+ if (asprintf(&path->name, "%.*s",
+ MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) {
+ zfree(&path->system);
+ free(path);
+ return NULL;
+ }
+ return path;
+ }
+ }
+ closedir(evt_dir);
+next:
+ put_events_file(dir_path);
+ }
+
+ closedir(sys_dir);
+ return NULL;
+}
+
+char *tracepoint_id_to_name(u64 config)
+{
+ struct tracepoint_path *path = tracepoint_id_to_path(config);
+ char *buf = NULL;
+
+ if (path && asprintf(&buf, "%s:%s", path->system, path->name) < 0)
+ buf = NULL;
+
+ put_tracepoints_path(path);
+ return buf;
+}
+
+static struct tracepoint_path *tracepoint_name_to_path(const char *name)
+{
+ struct tracepoint_path *path = zalloc(sizeof(*path));
+ char *str = strchr(name, ':');
+
+ if (path == NULL || str == NULL) {
+ free(path);
+ return NULL;
+ }
+
+ path->system = strndup(name, str - name);
+ path->name = strdup(str+1);
+
+ if (path->system == NULL || path->name == NULL) {
+ zfree(&path->system);
+ zfree(&path->name);
+ zfree(&path);
+ }
+
+ return path;
+}
+
+static struct tracepoint_path *
+get_tracepoints_path(struct list_head *pattrs)
+{
+ struct tracepoint_path path, *ppath = &path;
+ struct evsel *pos;
+ int nr_tracepoints = 0;
+
+ list_for_each_entry(pos, pattrs, core.node) {
+ if (pos->core.attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+ ++nr_tracepoints;
+
+ if (pos->name) {
+ ppath->next = tracepoint_name_to_path(pos->name);
+ if (ppath->next)
+ goto next;
+
+ if (strchr(pos->name, ':') == NULL)
+ goto try_id;
+
+ goto error;
+ }
+
+try_id:
+ ppath->next = tracepoint_id_to_path(pos->core.attr.config);
+ if (!ppath->next) {
+error:
+ pr_debug("No memory to alloc tracepoints list\n");
+ put_tracepoints_path(path.next);
+ return NULL;
+ }
+next:
+ ppath = ppath->next;
+ }
+
+ return nr_tracepoints > 0 ? path.next : NULL;
+}
+
+bool have_tracepoints(struct list_head *pattrs)
+{
+ struct evsel *pos;
+
+ list_for_each_entry(pos, pattrs, core.node)
+ if (pos->core.attr.type == PERF_TYPE_TRACEPOINT)
+ return true;
+
+ return false;
+}
+
+static int tracing_data_header(void)
+{
+ char buf[20];
+ ssize_t size;
+
+ /* just guessing this is someone's birthday.. ;) */
+ buf[0] = 23;
+ buf[1] = 8;
+ buf[2] = 68;
+ memcpy(buf + 3, "tracing", 7);
+
+ if (write(output_fd, buf, 10) != 10)
+ return -1;
+
+ size = strlen(VERSION) + 1;
+ if (write(output_fd, VERSION, size) != size)
+ return -1;
+
+ /* save endian */
+ if (host_is_bigendian())
+ buf[0] = 1;
+ else
+ buf[0] = 0;
+
+ if (write(output_fd, buf, 1) != 1)
+ return -1;
+
+ /* save size of long */
+ buf[0] = sizeof(long);
+ if (write(output_fd, buf, 1) != 1)
+ return -1;
+
+ /* save page_size */
+ if (write(output_fd, &page_size, 4) != 4)
+ return -1;
+
+ return 0;
+}
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+ int fd, bool temp)
+{
+ struct tracepoint_path *tps;
+ struct tracing_data *tdata;
+ int err;
+
+ output_fd = fd;
+
+ tps = get_tracepoints_path(pattrs);
+ if (!tps)
+ return NULL;
+
+ tdata = malloc(sizeof(*tdata));
+ if (!tdata)
+ return NULL;
+
+ tdata->temp = temp;
+ tdata->size = 0;
+
+ if (temp) {
+ int temp_fd;
+
+ snprintf(tdata->temp_file, sizeof(tdata->temp_file),
+ "/tmp/perf-XXXXXX");
+ if (!mkstemp(tdata->temp_file)) {
+ pr_debug("Can't make temp file");
+ free(tdata);
+ return NULL;
+ }
+
+ temp_fd = open(tdata->temp_file, O_RDWR);
+ if (temp_fd < 0) {
+ pr_debug("Can't read '%s'", tdata->temp_file);
+ free(tdata);
+ return NULL;
+ }
+
+ /*
+ * Set the temp file the default output, so all the
+ * tracing data are stored into it.
+ */
+ output_fd = temp_fd;
+ }
+
+ err = tracing_data_header();
+ if (err)
+ goto out;
+ err = record_header_files();
+ if (err)
+ goto out;
+ err = record_ftrace_files(tps);
+ if (err)
+ goto out;
+ err = record_event_files(tps);
+ if (err)
+ goto out;
+ err = record_proc_kallsyms();
+ if (err)
+ goto out;
+ err = record_ftrace_printk();
+ if (err)
+ goto out;
+ err = record_saved_cmdline();
+
+out:
+ /*
+ * All tracing data are stored by now, we can restore
+ * the default output file in case we used temp file.
+ */
+ if (temp) {
+ tdata->size = lseek(output_fd, 0, SEEK_CUR);
+ close(output_fd);
+ output_fd = fd;
+ }
+
+ if (err)
+ zfree(&tdata);
+
+ put_tracepoints_path(tps);
+ return tdata;
+}
+
+int tracing_data_put(struct tracing_data *tdata)
+{
+ int err = 0;
+
+ if (tdata->temp) {
+ err = record_file(tdata->temp_file, 0);
+ unlink(tdata->temp_file);
+ }
+
+ free(tdata);
+ return err;
+}
+
+int read_tracing_data(int fd, struct list_head *pattrs)
+{
+ int err;
+ struct tracing_data *tdata;
+
+ /*
+ * We work over the real file, so we can write data
+ * directly, no temp file is needed.
+ */
+ tdata = tracing_data_get(pattrs, fd, false);
+ if (!tdata)
+ return -ENOMEM;
+
+ err = tracing_data_put(tdata);
+ return err;
+}
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
new file mode 100644
index 000000000..2d3c2576b
--- /dev/null
+++ b/tools/perf/util/trace-event-parse.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "trace-event.h"
+
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <traceevent/event-parse.h>
+
+static int get_common_field(struct scripting_context *context,
+ int *offset, int *size, const char *type)
+{
+ struct tep_handle *pevent = context->pevent;
+ struct tep_event *event;
+ struct tep_format_field *field;
+
+ if (!*size) {
+
+ event = tep_get_first_event(pevent);
+ if (!event)
+ return 0;
+
+ field = tep_find_common_field(event, type);
+ if (!field)
+ return 0;
+ *offset = field->offset;
+ *size = field->size;
+ }
+
+ return tep_read_number(pevent, context->event_data + *offset, *size);
+}
+
+int common_lock_depth(struct scripting_context *context)
+{
+ static int offset;
+ static int size;
+ int ret;
+
+ ret = get_common_field(context, &size, &offset,
+ "common_lock_depth");
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+int common_flags(struct scripting_context *context)
+{
+ static int offset;
+ static int size;
+ int ret;
+
+ ret = get_common_field(context, &size, &offset,
+ "common_flags");
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+int common_pc(struct scripting_context *context)
+{
+ static int offset;
+ static int size;
+ int ret;
+
+ ret = get_common_field(context, &size, &offset,
+ "common_preempt_count");
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+unsigned long long
+raw_field_value(struct tep_event *event, const char *name, void *data)
+{
+ struct tep_format_field *field;
+ unsigned long long val;
+
+ field = tep_find_any_field(event, name);
+ if (!field)
+ return 0ULL;
+
+ tep_read_number_field(field, data, &val);
+
+ return val;
+}
+
+unsigned long long read_size(struct tep_event *event, void *ptr, int size)
+{
+ return tep_read_number(event->tep, ptr, size);
+}
+
+void event_format__fprintf(struct tep_event *event,
+ int cpu, void *data, int size, FILE *fp)
+{
+ struct tep_record record;
+ struct trace_seq s;
+
+ memset(&record, 0, sizeof(record));
+ record.cpu = cpu;
+ record.size = size;
+ record.data = data;
+
+ trace_seq_init(&s);
+ tep_print_event(event->tep, &s, &record, "%s", TEP_PRINT_INFO);
+ trace_seq_do_fprintf(&s, fp);
+ trace_seq_destroy(&s);
+}
+
+void event_format__print(struct tep_event *event,
+ int cpu, void *data, int size)
+{
+ return event_format__fprintf(event, cpu, data, size, stdout);
+}
+
+void parse_ftrace_printk(struct tep_handle *pevent,
+ char *file, unsigned int size __maybe_unused)
+{
+ unsigned long long addr;
+ char *printk;
+ char *line;
+ char *next = NULL;
+ char *addr_str;
+ char *fmt = NULL;
+
+ line = strtok_r(file, "\n", &next);
+ while (line) {
+ addr_str = strtok_r(line, ":", &fmt);
+ if (!addr_str) {
+ pr_warning("printk format with empty entry");
+ break;
+ }
+ addr = strtoull(addr_str, NULL, 16);
+ /* fmt still has a space, skip it */
+ printk = strdup(fmt+1);
+ line = strtok_r(NULL, "\n", &next);
+ tep_register_print_string(pevent, printk, addr);
+ free(printk);
+ }
+}
+
+void parse_saved_cmdline(struct tep_handle *pevent,
+ char *file, unsigned int size __maybe_unused)
+{
+ char comm[17]; /* Max comm length in the kernel is 16. */
+ char *line;
+ char *next = NULL;
+ int pid;
+
+ line = strtok_r(file, "\n", &next);
+ while (line) {
+ if (sscanf(line, "%d %16s", &pid, comm) == 2)
+ tep_register_comm(pevent, comm, pid);
+ line = strtok_r(NULL, "\n", &next);
+ }
+}
+
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size)
+{
+ return tep_parse_event(pevent, buf, size, "ftrace");
+}
+
+int parse_event_file(struct tep_handle *pevent,
+ char *buf, unsigned long size, char *sys)
+{
+ return tep_parse_event(pevent, buf, size, sys);
+}
+
+struct flag {
+ const char *name;
+ unsigned long long value;
+};
+
+static const struct flag flags[] = {
+ { "HI_SOFTIRQ", 0 },
+ { "TIMER_SOFTIRQ", 1 },
+ { "NET_TX_SOFTIRQ", 2 },
+ { "NET_RX_SOFTIRQ", 3 },
+ { "BLOCK_SOFTIRQ", 4 },
+ { "IRQ_POLL_SOFTIRQ", 5 },
+ { "TASKLET_SOFTIRQ", 6 },
+ { "SCHED_SOFTIRQ", 7 },
+ { "HRTIMER_SOFTIRQ", 8 },
+ { "RCU_SOFTIRQ", 9 },
+
+ { "HRTIMER_NORESTART", 0 },
+ { "HRTIMER_RESTART", 1 },
+};
+
+unsigned long long eval_flag(const char *flag)
+{
+ int i;
+
+ /*
+ * Some flags in the format files do not get converted.
+ * If the flag is not numeric, see if it is something that
+ * we already know about.
+ */
+ if (isdigit(flag[0]))
+ return strtoull(flag, NULL, 0);
+
+ for (i = 0; i < (int)(ARRAY_SIZE(flags)); i++)
+ if (strcmp(flags[i].name, flag) == 0)
+ return flags[i].value;
+
+ return 0;
+}
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
new file mode 100644
index 000000000..1162c49b8
--- /dev/null
+++ b/tools/perf/util/trace-event-read.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <traceevent/event-parse.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "trace-event.h"
+#include "debug.h"
+#include "util.h"
+
+static int input_fd;
+
+static ssize_t trace_data_size;
+static bool repipe;
+
+static int __do_read(int fd, void *buf, int size)
+{
+ int rsize = size;
+
+ while (size) {
+ int ret = read(fd, buf, size);
+
+ if (ret <= 0)
+ return -1;
+
+ if (repipe) {
+ int retw = write(STDOUT_FILENO, buf, ret);
+
+ if (retw <= 0 || retw != ret) {
+ pr_debug("repiping input file");
+ return -1;
+ }
+ }
+
+ size -= ret;
+ buf += ret;
+ }
+
+ return rsize;
+}
+
+static int do_read(void *data, int size)
+{
+ int r;
+
+ r = __do_read(input_fd, data, size);
+ if (r <= 0) {
+ pr_debug("reading input file (size expected=%d received=%d)",
+ size, r);
+ return -1;
+ }
+
+ trace_data_size += r;
+
+ return r;
+}
+
+/* If it fails, the next read will report it */
+static void skip(int size)
+{
+ char buf[BUFSIZ];
+ int r;
+
+ while (size) {
+ r = size > BUFSIZ ? BUFSIZ : size;
+ do_read(buf, r);
+ size -= r;
+ }
+}
+
+static unsigned int read4(struct tep_handle *pevent)
+{
+ unsigned int data;
+
+ if (do_read(&data, 4) < 0)
+ return 0;
+ return tep_read_number(pevent, &data, 4);
+}
+
+static unsigned long long read8(struct tep_handle *pevent)
+{
+ unsigned long long data;
+
+ if (do_read(&data, 8) < 0)
+ return 0;
+ return tep_read_number(pevent, &data, 8);
+}
+
+static char *read_string(void)
+{
+ char buf[BUFSIZ];
+ char *str = NULL;
+ int size = 0;
+ off_t r;
+ char c;
+
+ for (;;) {
+ r = read(input_fd, &c, 1);
+ if (r < 0) {
+ pr_debug("reading input file");
+ goto out;
+ }
+
+ if (!r) {
+ pr_debug("no data");
+ goto out;
+ }
+
+ if (repipe) {
+ int retw = write(STDOUT_FILENO, &c, 1);
+
+ if (retw <= 0 || retw != r) {
+ pr_debug("repiping input file string");
+ goto out;
+ }
+ }
+
+ buf[size++] = c;
+
+ if (!c)
+ break;
+ }
+
+ trace_data_size += size;
+
+ str = malloc(size);
+ if (str)
+ memcpy(str, buf, size);
+out:
+ return str;
+}
+
+static int read_proc_kallsyms(struct tep_handle *pevent)
+{
+ unsigned int size;
+
+ size = read4(pevent);
+ if (!size)
+ return 0;
+ /*
+ * Just skip it, now that we configure libtraceevent to use the
+ * tools/perf/ symbol resolver.
+ *
+ * We need to skip it so that we can continue parsing old perf.data
+ * files, that contains this /proc/kallsyms payload.
+ *
+ * Newer perf.data files will have just the 4-bytes zeros "kallsyms
+ * payload", so that older tools can continue reading it and interpret
+ * it as "no kallsyms payload is present".
+ */
+ lseek(input_fd, size, SEEK_CUR);
+ trace_data_size += size;
+ return 0;
+}
+
+static int read_ftrace_printk(struct tep_handle *pevent)
+{
+ unsigned int size;
+ char *buf;
+
+ /* it can have 0 size */
+ size = read4(pevent);
+ if (!size)
+ return 0;
+
+ buf = malloc(size + 1);
+ if (buf == NULL)
+ return -1;
+
+ if (do_read(buf, size) < 0) {
+ free(buf);
+ return -1;
+ }
+
+ buf[size] = '\0';
+
+ parse_ftrace_printk(pevent, buf, size);
+
+ free(buf);
+ return 0;
+}
+
+static int read_header_files(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ char *header_page;
+ char buf[BUFSIZ];
+ int ret = 0;
+
+ if (do_read(buf, 12) < 0)
+ return -1;
+
+ if (memcmp(buf, "header_page", 12) != 0) {
+ pr_debug("did not read header page");
+ return -1;
+ }
+
+ size = read8(pevent);
+
+ header_page = malloc(size);
+ if (header_page == NULL)
+ return -1;
+
+ if (do_read(header_page, size) < 0) {
+ pr_debug("did not read header page");
+ free(header_page);
+ return -1;
+ }
+
+ if (!tep_parse_header_page(pevent, header_page, size,
+ tep_get_long_size(pevent))) {
+ /*
+ * The commit field in the page is of type long,
+ * use that instead, since it represents the kernel.
+ */
+ tep_set_long_size(pevent, tep_get_header_page_size(pevent));
+ }
+ free(header_page);
+
+ if (do_read(buf, 13) < 0)
+ return -1;
+
+ if (memcmp(buf, "header_event", 13) != 0) {
+ pr_debug("did not read header event");
+ return -1;
+ }
+
+ size = read8(pevent);
+ skip(size);
+
+ return ret;
+}
+
+static int read_ftrace_file(struct tep_handle *pevent, unsigned long long size)
+{
+ int ret;
+ char *buf;
+
+ buf = malloc(size);
+ if (buf == NULL) {
+ pr_debug("memory allocation failure\n");
+ return -1;
+ }
+
+ ret = do_read(buf, size);
+ if (ret < 0) {
+ pr_debug("error reading ftrace file.\n");
+ goto out;
+ }
+
+ ret = parse_ftrace_file(pevent, buf, size);
+ if (ret < 0)
+ pr_debug("error parsing ftrace file.\n");
+out:
+ free(buf);
+ return ret;
+}
+
+static int read_event_file(struct tep_handle *pevent, char *sys,
+ unsigned long long size)
+{
+ int ret;
+ char *buf;
+
+ buf = malloc(size);
+ if (buf == NULL) {
+ pr_debug("memory allocation failure\n");
+ return -1;
+ }
+
+ ret = do_read(buf, size);
+ if (ret < 0)
+ goto out;
+
+ ret = parse_event_file(pevent, buf, size, sys);
+ if (ret < 0)
+ pr_debug("error parsing event file.\n");
+out:
+ free(buf);
+ return ret;
+}
+
+static int read_ftrace_files(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ int count;
+ int i;
+ int ret;
+
+ count = read4(pevent);
+
+ for (i = 0; i < count; i++) {
+ size = read8(pevent);
+ ret = read_ftrace_file(pevent, size);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int read_event_files(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ char *sys;
+ int systems;
+ int count;
+ int i,x;
+ int ret;
+
+ systems = read4(pevent);
+
+ for (i = 0; i < systems; i++) {
+ sys = read_string();
+ if (sys == NULL)
+ return -1;
+
+ count = read4(pevent);
+
+ for (x=0; x < count; x++) {
+ size = read8(pevent);
+ ret = read_event_file(pevent, sys, size);
+ if (ret) {
+ free(sys);
+ return ret;
+ }
+ }
+ free(sys);
+ }
+ return 0;
+}
+
+static int read_saved_cmdline(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ char *buf;
+ int ret;
+
+ /* it can have 0 size */
+ size = read8(pevent);
+ if (!size)
+ return 0;
+
+ buf = malloc(size + 1);
+ if (buf == NULL) {
+ pr_debug("memory allocation failure\n");
+ return -1;
+ }
+
+ ret = do_read(buf, size);
+ if (ret < 0) {
+ pr_debug("error reading saved cmdlines\n");
+ goto out;
+ }
+ buf[ret] = '\0';
+
+ parse_saved_cmdline(pevent, buf, size);
+ ret = 0;
+out:
+ free(buf);
+ return ret;
+}
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
+{
+ char buf[BUFSIZ];
+ char test[] = { 23, 8, 68 };
+ char *version;
+ int show_version = 0;
+ int show_funcs = 0;
+ int show_printk = 0;
+ ssize_t size = -1;
+ int file_bigendian;
+ int host_bigendian;
+ int file_long_size;
+ int file_page_size;
+ struct tep_handle *pevent = NULL;
+ int err;
+
+ repipe = __repipe;
+ input_fd = fd;
+
+ if (do_read(buf, 3) < 0)
+ return -1;
+ if (memcmp(buf, test, 3) != 0) {
+ pr_debug("no trace data in the file");
+ return -1;
+ }
+
+ if (do_read(buf, 7) < 0)
+ return -1;
+ if (memcmp(buf, "tracing", 7) != 0) {
+ pr_debug("not a trace file (missing 'tracing' tag)");
+ return -1;
+ }
+
+ version = read_string();
+ if (version == NULL)
+ return -1;
+ if (show_version)
+ printf("version = %s\n", version);
+
+ if (do_read(buf, 1) < 0) {
+ free(version);
+ return -1;
+ }
+ file_bigendian = buf[0];
+ host_bigendian = host_is_bigendian() ? 1 : 0;
+
+ if (trace_event__init(tevent)) {
+ pr_debug("trace_event__init failed");
+ goto out;
+ }
+
+ pevent = tevent->pevent;
+
+ tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+ tep_set_file_bigendian(pevent, file_bigendian);
+ tep_set_local_bigendian(pevent, host_bigendian);
+
+ if (do_read(buf, 1) < 0)
+ goto out;
+ file_long_size = buf[0];
+
+ file_page_size = read4(pevent);
+ if (!file_page_size)
+ goto out;
+
+ tep_set_long_size(pevent, file_long_size);
+ tep_set_page_size(pevent, file_page_size);
+
+ err = read_header_files(pevent);
+ if (err)
+ goto out;
+ err = read_ftrace_files(pevent);
+ if (err)
+ goto out;
+ err = read_event_files(pevent);
+ if (err)
+ goto out;
+ err = read_proc_kallsyms(pevent);
+ if (err)
+ goto out;
+ err = read_ftrace_printk(pevent);
+ if (err)
+ goto out;
+ if (atof(version) >= 0.6) {
+ err = read_saved_cmdline(pevent);
+ if (err)
+ goto out;
+ }
+
+ size = trace_data_size;
+ repipe = false;
+
+ if (show_funcs) {
+ tep_print_funcs(pevent);
+ } else if (show_printk) {
+ tep_print_printk(pevent);
+ }
+
+ pevent = NULL;
+
+out:
+ if (pevent)
+ trace_event__cleanup(tevent);
+ free(version);
+ return size;
+}
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
new file mode 100644
index 000000000..bd0000300
--- /dev/null
+++ b/tools/perf/util/trace-event-scripting.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * trace-event-scripting. Scripting engine common and initialization code.
+ *
+ * Copyright (C) 2009-2010 Tom Zanussi <tzanussi@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
+#include "debug.h"
+#include "trace-event.h"
+#include "evsel.h"
+#include <linux/zalloc.h>
+#include "util/sample.h"
+
+struct scripting_context *scripting_context;
+
+void scripting_context__update(struct scripting_context *c,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al)
+{
+ c->event_data = sample->raw_data;
+ c->pevent = NULL;
+#ifdef HAVE_LIBTRACEEVENT
+ if (evsel->tp_format)
+ c->pevent = evsel->tp_format->tep;
+#endif
+ c->event = event;
+ c->sample = sample;
+ c->evsel = evsel;
+ c->al = al;
+ c->addr_al = addr_al;
+}
+
+static int flush_script_unsupported(void)
+{
+ return 0;
+}
+
+static int stop_script_unsupported(void)
+{
+ return 0;
+}
+
+static void process_event_unsupported(union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct addr_location *al __maybe_unused,
+ struct addr_location *addr_al __maybe_unused)
+{
+}
+
+static void print_python_unsupported_msg(void)
+{
+ fprintf(stderr, "Python scripting not supported."
+ " Install libpython and rebuild perf to enable it.\n"
+ "For example:\n # apt-get install python-dev (ubuntu)"
+ "\n # yum install python-devel (Fedora)"
+ "\n etc.\n");
+}
+
+static int python_start_script_unsupported(const char *script __maybe_unused,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ print_python_unsupported_msg();
+
+ return -1;
+}
+
+static int python_generate_script_unsupported(struct tep_handle *pevent
+ __maybe_unused,
+ const char *outfile
+ __maybe_unused)
+{
+ print_python_unsupported_msg();
+
+ return -1;
+}
+
+struct scripting_ops python_scripting_unsupported_ops = {
+ .name = "Python",
+ .dirname = "python",
+ .start_script = python_start_script_unsupported,
+ .flush_script = flush_script_unsupported,
+ .stop_script = stop_script_unsupported,
+ .process_event = process_event_unsupported,
+ .generate_script = python_generate_script_unsupported,
+};
+
+static void register_python_scripting(struct scripting_ops *scripting_ops)
+{
+ if (scripting_context == NULL)
+ scripting_context = malloc(sizeof(*scripting_context));
+
+ if (scripting_context == NULL ||
+ script_spec_register("Python", scripting_ops) ||
+ script_spec_register("py", scripting_ops)) {
+ pr_err("Error registering Python script extension: disabling it\n");
+ zfree(&scripting_context);
+ }
+}
+
+#ifndef HAVE_LIBPYTHON_SUPPORT
+void setup_python_scripting(void)
+{
+ register_python_scripting(&python_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops python_scripting_ops;
+
+void setup_python_scripting(void)
+{
+ register_python_scripting(&python_scripting_ops);
+}
+#endif
+
+#ifdef HAVE_LIBTRACEEVENT
+static void print_perl_unsupported_msg(void)
+{
+ fprintf(stderr, "Perl scripting not supported."
+ " Install libperl and rebuild perf to enable it.\n"
+ "For example:\n # apt-get install libperl-dev (ubuntu)"
+ "\n # yum install 'perl(ExtUtils::Embed)' (Fedora)"
+ "\n etc.\n");
+}
+
+static int perl_start_script_unsupported(const char *script __maybe_unused,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ print_perl_unsupported_msg();
+
+ return -1;
+}
+
+static int perl_generate_script_unsupported(struct tep_handle *pevent
+ __maybe_unused,
+ const char *outfile __maybe_unused)
+{
+ print_perl_unsupported_msg();
+
+ return -1;
+}
+
+struct scripting_ops perl_scripting_unsupported_ops = {
+ .name = "Perl",
+ .dirname = "perl",
+ .start_script = perl_start_script_unsupported,
+ .flush_script = flush_script_unsupported,
+ .stop_script = stop_script_unsupported,
+ .process_event = process_event_unsupported,
+ .generate_script = perl_generate_script_unsupported,
+};
+
+static void register_perl_scripting(struct scripting_ops *scripting_ops)
+{
+ if (scripting_context == NULL)
+ scripting_context = malloc(sizeof(*scripting_context));
+
+ if (scripting_context == NULL ||
+ script_spec_register("Perl", scripting_ops) ||
+ script_spec_register("pl", scripting_ops)) {
+ pr_err("Error registering Perl script extension: disabling it\n");
+ zfree(&scripting_context);
+ }
+}
+
+#ifndef HAVE_LIBPERL_SUPPORT
+void setup_perl_scripting(void)
+{
+ register_perl_scripting(&perl_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops perl_scripting_ops;
+
+void setup_perl_scripting(void)
+{
+ register_perl_scripting(&perl_scripting_ops);
+}
+#endif
+#endif
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
new file mode 100644
index 000000000..8ad75b31e
--- /dev/null
+++ b/tools/perf/util/trace-event.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <traceevent/event-parse.h>
+#include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
+#include "trace-event.h"
+#include "machine.h"
+
+/*
+ * global trace_event object used by trace_event__tp_format
+ *
+ * TODO There's no cleanup call for this. Add some sort of
+ * __exit function support and call trace_event__cleanup
+ * there.
+ */
+static struct trace_event tevent;
+static bool tevent_initialized;
+
+int trace_event__init(struct trace_event *t)
+{
+ struct tep_handle *pevent = tep_alloc();
+
+ if (pevent) {
+ t->plugin_list = tep_load_plugins(pevent);
+ t->pevent = pevent;
+ }
+
+ return pevent ? 0 : -1;
+}
+
+static int trace_event__init2(void)
+{
+ int be = tep_is_bigendian();
+ struct tep_handle *pevent;
+
+ if (trace_event__init(&tevent))
+ return -1;
+
+ pevent = tevent.pevent;
+ tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+ tep_set_file_bigendian(pevent, be);
+ tep_set_local_bigendian(pevent, be);
+ tevent_initialized = true;
+ return 0;
+}
+
+int trace_event__register_resolver(struct machine *machine,
+ tep_func_resolver_t *func)
+{
+ if (!tevent_initialized && trace_event__init2())
+ return -1;
+
+ return tep_set_function_resolver(tevent.pevent, func, machine);
+}
+
+void trace_event__cleanup(struct trace_event *t)
+{
+ tep_unload_plugins(t->plugin_list, t->pevent);
+ tep_free(t->pevent);
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static struct tep_event*
+tp_format(const char *sys, const char *name)
+{
+ char *tp_dir = get_events_file(sys);
+ struct tep_handle *pevent = tevent.pevent;
+ struct tep_event *event = NULL;
+ char path[PATH_MAX];
+ size_t size;
+ char *data;
+ int err;
+
+ if (!tp_dir)
+ return ERR_PTR(-errno);
+
+ scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name);
+ put_events_file(tp_dir);
+
+ err = filename__read_str(path, &data, &size);
+ if (err)
+ return ERR_PTR(err);
+
+ tep_parse_format(pevent, &event, data, size, sys);
+
+ free(data);
+ return event;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+struct tep_event*
+trace_event__tp_format(const char *sys, const char *name)
+{
+ if (!tevent_initialized && trace_event__init2())
+ return ERR_PTR(-ENOMEM);
+
+ return tp_format(sys, name);
+}
+
+struct tep_event *trace_event__tp_format_id(int id)
+{
+ if (!tevent_initialized && trace_event__init2())
+ return ERR_PTR(-ENOMEM);
+
+ return tep_find_event(tevent.pevent, id);
+}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
new file mode 100644
index 000000000..a69ee2941
--- /dev/null
+++ b/tools/perf/util/trace-event.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UTIL_TRACE_EVENT_H
+#define _PERF_UTIL_TRACE_EVENT_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <linux/types.h>
+
+struct evlist;
+struct machine;
+struct perf_sample;
+union perf_event;
+struct perf_tool;
+struct thread;
+struct tep_plugin_list;
+struct evsel;
+
+struct trace_event {
+ struct tep_handle *pevent;
+ struct tep_plugin_list *plugin_list;
+};
+
+/* Computes a version number comparable with LIBTRACEEVENT_VERSION from Makefile.config. */
+#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c))
+
+typedef char *(tep_func_resolver_t)(void *priv,
+ unsigned long long *addrp, char **modp);
+
+bool have_tracepoints(struct list_head *evlist);
+
+int trace_event__init(struct trace_event *t);
+void trace_event__cleanup(struct trace_event *t);
+int trace_event__register_resolver(struct machine *machine,
+ tep_func_resolver_t *func);
+struct tep_event*
+trace_event__tp_format(const char *sys, const char *name);
+
+struct tep_event *trace_event__tp_format_id(int id);
+
+void event_format__fprintf(struct tep_event *event,
+ int cpu, void *data, int size, FILE *fp);
+
+void event_format__print(struct tep_event *event,
+ int cpu, void *data, int size);
+
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
+int parse_event_file(struct tep_handle *pevent,
+ char *buf, unsigned long size, char *sys);
+
+unsigned long long
+raw_field_value(struct tep_event *event, const char *name, void *data);
+
+void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size);
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
+
+unsigned long long read_size(struct tep_event *event, void *ptr, int size);
+unsigned long long eval_flag(const char *flag);
+
+int read_tracing_data(int fd, struct list_head *pattrs);
+
+/*
+ * Return the tracepoint name in the format "subsystem:event_name",
+ * callers should free the returned string.
+ */
+char *tracepoint_id_to_name(u64 config);
+
+struct tracing_data {
+ /* size is only valid if temp is 'true' */
+ ssize_t size;
+ bool temp;
+ char temp_file[50];
+};
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+ int fd, bool temp);
+int tracing_data_put(struct tracing_data *tdata);
+
+
+struct addr_location;
+
+struct perf_session;
+struct perf_stat_config;
+
+struct scripting_ops {
+ const char *name;
+ const char *dirname; /* For script path .../scripts/<dirname>/... */
+ int (*start_script)(const char *script, int argc, const char **argv,
+ struct perf_session *session);
+ int (*flush_script) (void);
+ int (*stop_script) (void);
+ void (*process_event) (union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al);
+ void (*process_switch)(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+ void (*process_auxtrace_error)(struct perf_session *session,
+ union perf_event *event);
+ void (*process_stat)(struct perf_stat_config *config,
+ struct evsel *evsel, u64 tstamp);
+ void (*process_stat_interval)(u64 tstamp);
+ void (*process_throttle)(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+ int (*generate_script) (struct tep_handle *pevent, const char *outfile);
+};
+
+extern unsigned int scripting_max_stack;
+
+int script_spec_register(const char *spec, struct scripting_ops *ops);
+
+void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
+ struct machine *machine);
+
+void setup_perl_scripting(void);
+void setup_python_scripting(void);
+
+struct scripting_context {
+ struct tep_handle *pevent;
+ void *event_data;
+ union perf_event *event;
+ struct perf_sample *sample;
+ struct evsel *evsel;
+ struct addr_location *al;
+ struct addr_location *addr_al;
+ struct perf_session *session;
+};
+
+void scripting_context__update(struct scripting_context *scripting_context,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct addr_location *al,
+ struct addr_location *addr_al);
+
+int common_pc(struct scripting_context *context);
+int common_flags(struct scripting_context *context);
+int common_lock_depth(struct scripting_context *context);
+
+#define SAMPLE_FLAGS_BUF_SIZE 64
+int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
+
+#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
+#include <traceevent/event-parse.h>
+
+static inline bool tep_field_is_relative(unsigned long flags)
+{
+ return (flags & TEP_FIELD_IS_RELATIVE) != 0;
+}
+#else
+#include <linux/compiler.h>
+
+static inline bool tep_field_is_relative(unsigned long flags __maybe_unused)
+{
+ return false;
+}
+#endif
+
+#endif /* _PERF_UTIL_TRACE_EVENT_H */
diff --git a/tools/perf/util/tracepoint.c b/tools/perf/util/tracepoint.c
new file mode 100644
index 000000000..92dd8b455
--- /dev/null
+++ b/tools/perf/util/tracepoint.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tracepoint.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#include <api/fs/tracing_path.h>
+
+int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
+{
+ char evt_path[MAXPATHLEN];
+ int fd;
+
+ snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name);
+ fd = open(evt_path, O_RDONLY);
+ if (fd < 0)
+ return -EINVAL;
+ close(fd);
+
+ return 0;
+}
+
+/*
+ * Check whether event is in <debugfs_mount_point>/tracing/events
+ */
+int is_valid_tracepoint(const char *event_string)
+{
+ DIR *sys_dir, *evt_dir;
+ struct dirent *sys_dirent, *evt_dirent;
+ char evt_path[MAXPATHLEN];
+ char *dir_path;
+
+ sys_dir = tracing_events__opendir();
+ if (!sys_dir)
+ return 0;
+
+ for_each_subsystem(sys_dir, sys_dirent) {
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
+ evt_dir = opendir(dir_path);
+ if (!evt_dir)
+ goto next;
+
+ for_each_event(dir_path, evt_dir, evt_dirent) {
+ snprintf(evt_path, MAXPATHLEN, "%s:%s",
+ sys_dirent->d_name, evt_dirent->d_name);
+ if (!strcmp(evt_path, event_string)) {
+ closedir(evt_dir);
+ put_events_file(dir_path);
+ closedir(sys_dir);
+ return 1;
+ }
+ }
+ closedir(evt_dir);
+next:
+ put_events_file(dir_path);
+ }
+ closedir(sys_dir);
+ return 0;
+}
diff --git a/tools/perf/util/tracepoint.h b/tools/perf/util/tracepoint.h
new file mode 100644
index 000000000..c4a110fe8
--- /dev/null
+++ b/tools/perf/util/tracepoint.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TRACEPOINT_H
+#define __PERF_TRACEPOINT_H
+
+#include <dirent.h>
+#include <string.h>
+
+int tp_event_has_id(const char *dir_path, struct dirent *evt_dir);
+
+#define for_each_event(dir_path, evt_dir, evt_dirent) \
+ while ((evt_dirent = readdir(evt_dir)) != NULL) \
+ if (evt_dirent->d_type == DT_DIR && \
+ (strcmp(evt_dirent->d_name, ".")) && \
+ (strcmp(evt_dirent->d_name, "..")) && \
+ (!tp_event_has_id(dir_path, evt_dirent)))
+
+#define for_each_subsystem(sys_dir, sys_dirent) \
+ while ((sys_dirent = readdir(sys_dir)) != NULL) \
+ if (sys_dirent->d_type == DT_DIR && \
+ (strcmp(sys_dirent->d_name, ".")) && \
+ (strcmp(sys_dirent->d_name, "..")))
+
+int is_valid_tracepoint(const char *event_string);
+
+#endif /* __PERF_TRACEPOINT_H */
diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
new file mode 100644
index 000000000..33e997f9c
--- /dev/null
+++ b/tools/perf/util/trigger.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRIGGER_H_
+#define __TRIGGER_H_ 1
+
+#include "asm/bug.h"
+
+/*
+ * Use trigger to model operations which need to be executed when
+ * an event (a signal, for example) is observed.
+ *
+ * States and transits:
+ *
+ *
+ * OFF--> ON --> READY --(hit)--> HIT
+ * ^ |
+ * | (ready)
+ * | |
+ * \_____________/
+ *
+ * is_hit and is_ready are two key functions to query the state of
+ * a trigger. is_hit means the event already happen; is_ready means the
+ * trigger is waiting for the event.
+ */
+
+struct trigger {
+ volatile enum {
+ TRIGGER_ERROR = -2,
+ TRIGGER_OFF = -1,
+ TRIGGER_ON = 0,
+ TRIGGER_READY = 1,
+ TRIGGER_HIT = 2,
+ } state;
+ const char *name;
+};
+
+#define TRIGGER_WARN_ONCE(t, exp) \
+ WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \
+ t->name, t->state, __func__)
+
+static inline bool trigger_is_available(struct trigger *t)
+{
+ return t->state >= 0;
+}
+
+static inline bool trigger_is_error(struct trigger *t)
+{
+ return t->state <= TRIGGER_ERROR;
+}
+
+static inline void trigger_on(struct trigger *t)
+{
+ TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
+ t->state = TRIGGER_ON;
+}
+
+static inline void trigger_ready(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ t->state = TRIGGER_READY;
+}
+
+static inline void trigger_hit(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ TRIGGER_WARN_ONCE(t, TRIGGER_READY);
+ t->state = TRIGGER_HIT;
+}
+
+static inline void trigger_off(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ t->state = TRIGGER_OFF;
+}
+
+static inline void trigger_error(struct trigger *t)
+{
+ t->state = TRIGGER_ERROR;
+}
+
+static inline bool trigger_is_ready(struct trigger *t)
+{
+ return t->state == TRIGGER_READY;
+}
+
+static inline bool trigger_is_hit(struct trigger *t)
+{
+ return t->state == TRIGGER_HIT;
+}
+
+#define DEFINE_TRIGGER(n) \
+struct trigger n = {.state = TRIGGER_OFF, .name = #n}
+#endif
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
new file mode 100644
index 000000000..f19791d46
--- /dev/null
+++ b/tools/perf/util/tsc.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+
+#include "event.h"
+#include "synthetic-events.h"
+#include "debug.h"
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+ u64 t, quot, rem;
+
+ t = ns - tc->time_zero;
+ quot = t / tc->time_mult;
+ rem = t % tc->time_mult;
+ return (quot << tc->time_shift) +
+ (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+ u64 quot, rem;
+
+ if (tc->cap_user_time_short)
+ cyc = tc->time_cycles +
+ ((cyc - tc->time_cycles) & tc->time_mask);
+
+ quot = cyc >> tc->time_shift;
+ rem = cyc & (((u64)1 << tc->time_shift) - 1);
+ return tc->time_zero + quot * tc->time_mult +
+ ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ tc->time_cycles = pc->time_cycles;
+ tc->time_mask = pc->time_mask;
+ tc->cap_user_time_zero = pc->cap_user_time_zero;
+ tc->cap_user_time_short = pc->cap_user_time_short;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!tc->cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event = {
+ .time_conv = {
+ .header = {
+ .type = PERF_RECORD_TIME_CONV,
+ .size = sizeof(struct perf_record_time_conv),
+ },
+ },
+ };
+ struct perf_tsc_conversion tc;
+ int err;
+
+ if (!pc)
+ return 0;
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ pr_debug2("Synthesizing TSC conversion information\n");
+
+ event.time_conv.time_mult = tc.time_mult;
+ event.time_conv.time_shift = tc.time_shift;
+ event.time_conv.time_zero = tc.time_zero;
+ event.time_conv.time_cycles = tc.time_cycles;
+ event.time_conv.time_mask = tc.time_mask;
+ event.time_conv.cap_user_time_zero = tc.cap_user_time_zero;
+ event.time_conv.cap_user_time_short = tc.cap_user_time_short;
+
+ return process(tool, &event, NULL, machine);
+}
+
+u64 __weak rdtsc(void)
+{
+ return 0;
+}
+
+size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp)
+{
+ struct perf_record_time_conv *tc = (struct perf_record_time_conv *)event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift);
+ ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult);
+ ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero);
+
+ /*
+ * The event TIME_CONV was extended for the fields from "time_cycles"
+ * when supported cap_user_time_short, for backward compatibility,
+ * prints the extended fields only if they are contained in the event.
+ */
+ if (event_contains(*tc, time_cycles)) {
+ ret += fprintf(fp, "... Time Cycles %" PRI_lu64 "\n",
+ tc->time_cycles);
+ ret += fprintf(fp, "... Time Mask %#" PRI_lx64 "\n",
+ tc->time_mask);
+ ret += fprintf(fp, "... Cap Time Zero %" PRId32 "\n",
+ tc->cap_user_time_zero);
+ ret += fprintf(fp, "... Cap Time Short %" PRId32 "\n",
+ tc->cap_user_time_short);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
new file mode 100644
index 000000000..88fd1c4c1
--- /dev/null
+++ b/tools/perf/util/tsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TSC_H
+#define __PERF_TSC_H
+
+#include <linux/types.h>
+
+#include "event.h"
+
+struct perf_tsc_conversion {
+ u16 time_shift;
+ u32 time_mult;
+ u64 time_zero;
+ u64 time_cycles;
+ u64 time_mask;
+
+ bool cap_user_time_zero;
+ bool cap_user_time_short;
+};
+
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+u64 rdtsc(void);
+double arch_get_tsc_freq(void);
+
+size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp);
+
+#endif // __PERF_TSC_H
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
new file mode 100644
index 000000000..32c39cfe2
--- /dev/null
+++ b/tools/perf/util/units.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "units.h"
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
+{
+ struct parse_tag *i = tags;
+
+ while (i->tag) {
+ char *s = strchr(str, i->tag);
+
+ if (s) {
+ unsigned long int value;
+ char *endptr;
+
+ value = strtoul(str, &endptr, 10);
+ if (s != endptr)
+ break;
+
+ if (value > ULONG_MAX / i->mult)
+ break;
+ value *= i->mult;
+ return value;
+ }
+ i++;
+ }
+
+ return (unsigned long) -1;
+}
+
+double convert_unit_double(double value, char *unit)
+{
+ *unit = ' ';
+
+ if (value > 1000.0) {
+ value /= 1000.0;
+ *unit = 'K';
+ }
+
+ if (value > 1000.0) {
+ value /= 1000.0;
+ *unit = 'M';
+ }
+
+ if (value > 1000.0) {
+ value /= 1000.0;
+ *unit = 'G';
+ }
+
+ return value;
+}
+
+unsigned long convert_unit(unsigned long value, char *unit)
+{
+ double v = convert_unit_double((double)value, unit);
+
+ return (unsigned long)v;
+}
+
+int unit_number__scnprintf(char *buf, size_t size, u64 n)
+{
+ char unit[4] = "BKMG";
+ int i = 0;
+
+ while (((n / 1024) > 1) && (i < 3)) {
+ n /= 1024;
+ i++;
+ }
+
+ return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]);
+}
diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h
new file mode 100644
index 000000000..ea43e74e3
--- /dev/null
+++ b/tools/perf/util/units.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UNIT_H
+#define PERF_UNIT_H
+
+#include <stddef.h>
+#include <linux/types.h>
+
+struct parse_tag {
+ char tag;
+ int mult;
+};
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+
+double convert_unit_double(double value, char *unit);
+unsigned long convert_unit(unsigned long value, char *unit);
+int unit_number__scnprintf(char *buf, size_t size, u64 n);
+
+#endif /* PERF_UNIT_H */
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
new file mode 100644
index 000000000..6013335a8
--- /dev/null
+++ b/tools/perf/util/unwind-libdw.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include "debug.h"
+#include "dso.h"
+#include "unwind.h"
+#include "unwind-libdw.h"
+#include "machine.h"
+#include "map.h"
+#include "symbol.h"
+#include "thread.h"
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include "event.h"
+#include "perf_regs.h"
+#include "callchain.h"
+#include "util/env.h"
+
+static char *debuginfo_path;
+
+static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata,
+ const char *modname __maybe_unused, Dwarf_Addr base __maybe_unused,
+ const char *file_name, const char *debuglink_file __maybe_unused,
+ GElf_Word debuglink_crc __maybe_unused, char **debuginfo_file_name)
+{
+ const struct dso *dso = *userdata;
+
+ assert(dso);
+ if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename))
+ *debuginfo_file_name = strdup(dso->symsrc_filename);
+ return -1;
+}
+
+static const Dwfl_Callbacks offline_callbacks = {
+ .find_debuginfo = __find_debuginfo,
+ .debuginfo_path = &debuginfo_path,
+ .section_address = dwfl_offline_section_address,
+ // .find_elf is not set as we use dwfl_report_elf() instead.
+};
+
+static int __report_module(struct addr_location *al, u64 ip,
+ struct unwind_info *ui)
+{
+ Dwfl_Module *mod;
+ struct dso *dso = NULL;
+ Dwarf_Addr base;
+ /*
+ * Some callers will use al->sym, so we can't just use the
+ * cheaper thread__find_map() here.
+ */
+ thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al);
+
+ if (al->map)
+ dso = map__dso(al->map);
+
+ if (!dso)
+ return 0;
+
+ /*
+ * The generated JIT DSO files only map the code segment without
+ * ELF headers. Since JIT codes used to be packed in a memory
+ * segment, calculating the base address using pgoff falls into
+ * a different code in another DSO. So just use the map->start
+ * directly to pick the correct one.
+ */
+ if (!strncmp(dso->long_name, "/tmp/jitted-", 12))
+ base = map__start(al->map);
+ else
+ base = map__start(al->map) - map__pgoff(al->map);
+
+ mod = dwfl_addrmodule(ui->dwfl, ip);
+ if (mod) {
+ Dwarf_Addr s;
+
+ dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+ if (s != base)
+ mod = NULL;
+ }
+
+ if (!mod) {
+ char filename[PATH_MAX];
+
+ __symbol__join_symfs(filename, sizeof(filename), dso->long_name);
+ mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
+ base, false);
+ }
+ if (!mod) {
+ char filename[PATH_MAX];
+
+ if (dso__build_id_filename(dso, filename, sizeof(filename), false))
+ mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
+ base, false);
+ }
+
+ if (mod) {
+ void **userdatap;
+
+ dwfl_module_info(mod, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL);
+ *userdatap = dso;
+ }
+
+ return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
+}
+
+static int report_module(u64 ip, struct unwind_info *ui)
+{
+ struct addr_location al;
+ int res;
+
+ addr_location__init(&al);
+ res = __report_module(&al, ip, ui);
+ addr_location__exit(&al);
+ return res;
+}
+
+/*
+ * Store all entries within entries array,
+ * we will process it after we finish unwind.
+ */
+static int entry(u64 ip, struct unwind_info *ui)
+
+{
+ struct unwind_entry *e = &ui->entries[ui->idx++];
+ struct addr_location al;
+
+ addr_location__init(&al);
+ if (__report_module(&al, ip, ui)) {
+ addr_location__exit(&al);
+ return -1;
+ }
+
+ e->ip = ip;
+ e->ms.maps = al.maps;
+ e->ms.map = al.map;
+ e->ms.sym = al.sym;
+
+ pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+ al.sym ? al.sym->name : "''",
+ ip,
+ al.map ? map__map_ip(al.map, ip) : (u64) 0);
+ addr_location__exit(&al);
+ return 0;
+}
+
+static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp)
+{
+ /* We want only single thread to be processed. */
+ if (*thread_argp != NULL)
+ return 0;
+
+ *thread_argp = arg;
+ return dwfl_pid(dwfl);
+}
+
+static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
+ Dwarf_Word *data)
+{
+ struct addr_location al;
+ ssize_t size;
+ struct dso *dso;
+
+ addr_location__init(&al);
+ if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
+ pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ goto out_fail;
+ }
+ dso = map__dso(al.map);
+ if (!dso)
+ goto out_fail;
+
+ size = dso__data_read_addr(dso, al.map, ui->machine, addr, (u8 *) data, sizeof(*data));
+
+ addr_location__exit(&al);
+ return !(size == sizeof(*data));
+out_fail:
+ addr_location__exit(&al);
+ return -1;
+}
+
+static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *result,
+ void *arg)
+{
+ struct unwind_info *ui = arg;
+ const char *arch = perf_env__arch(ui->machine->env);
+ struct stack_dump *stack = &ui->sample->user_stack;
+ u64 start, end;
+ int offset;
+ int ret;
+
+ ret = perf_reg_value(&start, &ui->sample->user_regs,
+ perf_arch_reg_sp(arch));
+ if (ret)
+ return false;
+
+ end = start + stack->size;
+
+ /* Check overflow. */
+ if (addr + sizeof(Dwarf_Word) < addr)
+ return false;
+
+ if (addr < start || addr + sizeof(Dwarf_Word) > end) {
+ ret = access_dso_mem(ui, addr, result);
+ if (ret) {
+ pr_debug("unwind: access_mem 0x%" PRIx64 " not inside range"
+ " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ addr, start, end);
+ return false;
+ }
+ return true;
+ }
+
+ offset = addr - start;
+ *result = *(Dwarf_Word *)&stack->data[offset];
+ pr_debug("unwind: access_mem addr 0x%" PRIx64 ", val %lx, offset %d\n",
+ addr, (unsigned long)*result, offset);
+ return true;
+}
+
+static const Dwfl_Thread_Callbacks callbacks = {
+ .next_thread = next_thread,
+ .memory_read = memory_read,
+ .set_initial_registers = libdw__arch_set_initial_registers,
+};
+
+static int
+frame_callback(Dwfl_Frame *state, void *arg)
+{
+ struct unwind_info *ui = arg;
+ Dwarf_Addr pc;
+ bool isactivation;
+
+ if (!dwfl_frame_pc(state, &pc, NULL)) {
+ if (!ui->best_effort)
+ pr_err("%s", dwfl_errmsg(-1));
+ return DWARF_CB_ABORT;
+ }
+
+ // report the module before we query for isactivation
+ report_module(pc, ui);
+
+ if (!dwfl_frame_pc(state, &pc, &isactivation)) {
+ if (!ui->best_effort)
+ pr_err("%s", dwfl_errmsg(-1));
+ return DWARF_CB_ABORT;
+ }
+
+ if (!isactivation)
+ --pc;
+
+ return entry(pc, ui) || !(--ui->max_stack) ?
+ DWARF_CB_ABORT : DWARF_CB_OK;
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data,
+ int max_stack,
+ bool best_effort)
+{
+ struct unwind_info *ui, ui_buf = {
+ .sample = data,
+ .thread = thread,
+ .machine = RC_CHK_ACCESS(thread__maps(thread))->machine,
+ .cb = cb,
+ .arg = arg,
+ .max_stack = max_stack,
+ .best_effort = best_effort
+ };
+ const char *arch = perf_env__arch(ui_buf.machine->env);
+ Dwarf_Word ip;
+ int err = -EINVAL, i;
+
+ if (!data->user_regs.regs)
+ return -EINVAL;
+
+ ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
+ if (!ui)
+ return -ENOMEM;
+
+ *ui = ui_buf;
+
+ ui->dwfl = dwfl_begin(&offline_callbacks);
+ if (!ui->dwfl)
+ goto out;
+
+ err = perf_reg_value(&ip, &data->user_regs, perf_arch_reg_ip(arch));
+ if (err)
+ goto out;
+
+ err = report_module(ip, ui);
+ if (err)
+ goto out;
+
+ err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread__tid(thread), &callbacks, ui);
+ if (err)
+ goto out;
+
+ err = dwfl_getthread_frames(ui->dwfl, thread__tid(thread), frame_callback, ui);
+
+ if (err && ui->max_stack != max_stack)
+ err = 0;
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < ui->idx && !err; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = ui->idx - i - 1;
+
+ err = ui->entries[j].ip ? ui->cb(&ui->entries[j], ui->arg) : 0;
+ }
+
+ out:
+ if (err)
+ pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1));
+
+ dwfl_end(ui->dwfl);
+ free(ui);
+ return 0;
+}
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
new file mode 100644
index 000000000..8c88bc4f2
--- /dev/null
+++ b/tools/perf/util/unwind-libdw.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UNWIND_LIBDW_H
+#define __PERF_UNWIND_LIBDW_H
+
+#include <elfutils/libdwfl.h>
+#include "unwind.h"
+
+struct machine;
+struct perf_sample;
+struct thread;
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg);
+
+struct unwind_info {
+ Dwfl *dwfl;
+ struct perf_sample *sample;
+ struct machine *machine;
+ struct thread *thread;
+ unwind_entry_cb_t cb;
+ void *arg;
+ int max_stack;
+ int idx;
+ bool best_effort;
+ struct unwind_entry entries[];
+};
+
+#endif /* __PERF_UNWIND_LIBDW_H */
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
new file mode 100644
index 000000000..5e5c3395a
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -0,0 +1,812 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "map.h"
+#include "symbol.h"
+#include "debug.h"
+#include "asm/bug.h"
+#include "dso.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip,
+ unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */
+#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit 0xff
+#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */
+#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */
+#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */
+#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */
+#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr 0x00 /* absolute value */
+#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */
+#define DW_EH_PE_aligned 0x50 /* aligned pointer */
+
+/* Flags intentionally not handled, since they're not needed:
+ * #define DW_EH_PE_indirect 0x80
+ * #define DW_EH_PE_uleb128 0x01
+ * #define DW_EH_PE_udata2 0x02
+ * #define DW_EH_PE_sleb128 0x09
+ * #define DW_EH_PE_sdata2 0x0a
+ * #define DW_EH_PE_textrel 0x20
+ * #define DW_EH_PE_datarel 0x30
+ */
+
+struct unwind_info {
+ struct perf_sample *sample;
+ struct machine *machine;
+ struct thread *thread;
+ bool best_effort;
+};
+
+#define dw_read(ptr, type, end) ({ \
+ type *__p = (type *) ptr; \
+ type __v; \
+ if ((__p + 1) > (type *) end) \
+ return -EINVAL; \
+ __v = *__p++; \
+ ptr = (typeof(ptr)) __p; \
+ __v; \
+ })
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+ u8 encoding)
+{
+ u8 *cur = *p;
+ *val = 0;
+
+ switch (encoding) {
+ case DW_EH_PE_omit:
+ *val = 0;
+ goto out;
+ case DW_EH_PE_ptr:
+ *val = dw_read(cur, unsigned long, end);
+ goto out;
+ default:
+ break;
+ }
+
+ switch (encoding & DW_EH_PE_APPL_MASK) {
+ case DW_EH_PE_absptr:
+ break;
+ case DW_EH_PE_pcrel:
+ *val = (unsigned long) cur;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((encoding & 0x07) == 0x00)
+ encoding |= DW_EH_PE_udata4;
+
+ switch (encoding & DW_EH_PE_FORMAT_MASK) {
+ case DW_EH_PE_sdata4:
+ *val += dw_read(cur, s32, end);
+ break;
+ case DW_EH_PE_udata4:
+ *val += dw_read(cur, u32, end);
+ break;
+ case DW_EH_PE_sdata8:
+ *val += dw_read(cur, s64, end);
+ break;
+ case DW_EH_PE_udata8:
+ *val += dw_read(cur, u64, end);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ out:
+ *p = cur;
+ return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({ \
+ u64 __v; \
+ if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \
+ return -EINVAL; \
+ } \
+ __v; \
+ })
+
+static int elf_section_address_and_offset(int fd, const char *name, u64 *address, u64 *offset)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ int ret = -1;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return -1;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out_err;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+ goto out_err;
+
+ *address = shdr.sh_addr;
+ *offset = shdr.sh_offset;
+ ret = 0;
+out_err:
+ elf_end(elf);
+ return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static u64 elf_section_offset(int fd, const char *name)
+{
+ u64 address, offset = 0;
+
+ if (elf_section_address_and_offset(fd, name, &address, &offset))
+ return 0;
+
+ return offset;
+}
+#endif
+
+static u64 elf_base_address(int fd)
+{
+ Elf *elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ GElf_Phdr phdr;
+ u64 retval = 0;
+ size_t i, phdrnum = 0;
+
+ if (elf == NULL)
+ return 0;
+ (void)elf_getphdrnum(elf, &phdrnum);
+ /* PT_LOAD segments are sorted by p_vaddr, so the first has the minimum p_vaddr. */
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, &phdr) && phdr.p_type == PT_LOAD) {
+ retval = phdr.p_vaddr & -getpagesize();
+ break;
+ }
+ }
+
+ elf_end(elf);
+ return retval;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ int retval = 0;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return 0;
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out;
+
+ retval = (ehdr.e_type == ET_EXEC);
+
+out:
+ elf_end(elf);
+ pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+ return retval;
+}
+#endif
+
+struct table_entry {
+ u32 start_ip_offset;
+ u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+ unsigned char version;
+ unsigned char eh_frame_ptr_enc;
+ unsigned char fde_count_enc;
+ unsigned char table_enc;
+
+ /*
+ * The rest of the header is variable-length and consists of the
+ * following members:
+ *
+ * encoded_t eh_frame_ptr;
+ * encoded_t fde_count;
+ */
+
+ /* A single encoded pointer should not be more than 8 bytes. */
+ u64 enc[2];
+
+ /*
+ * struct {
+ * encoded_t start_ip;
+ * encoded_t fde_addr;
+ * } binary_search_table[fde_count];
+ */
+ char data[];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+ u64 offset, u64 *table_data_offset, u64 *fde_count)
+{
+ struct eh_frame_hdr hdr;
+ u8 *enc = (u8 *) &hdr.enc;
+ u8 *end = (u8 *) &hdr.data;
+ ssize_t r;
+
+ r = dso__data_read_offset(dso, machine, offset,
+ (u8 *) &hdr, sizeof(hdr));
+ if (r != sizeof(hdr))
+ return -EINVAL;
+
+ /* We dont need eh_frame_ptr, just skip it. */
+ dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+ *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+ *table_data_offset = enc - (u8 *) &hdr;
+ return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
+ u64 *table_data, u64 *segbase,
+ u64 *fde_count)
+{
+ struct map_rb_node *map_node;
+ u64 base_addr = UINT64_MAX;
+ int ret, fd;
+
+ if (dso->data.eh_frame_hdr_offset == 0) {
+ fd = dso__data_get_fd(dso, ui->machine);
+ if (fd < 0)
+ return -EINVAL;
+
+ /* Check the .eh_frame section for unwinding info */
+ ret = elf_section_address_and_offset(fd, ".eh_frame_hdr",
+ &dso->data.eh_frame_hdr_addr,
+ &dso->data.eh_frame_hdr_offset);
+ dso->data.elf_base_addr = elf_base_address(fd);
+ dso__data_put_fd(dso);
+ if (ret || dso->data.eh_frame_hdr_offset == 0)
+ return -EINVAL;
+ }
+
+ maps__for_each_entry(thread__maps(ui->thread), map_node) {
+ struct map *map = map_node->map;
+ u64 start = map__start(map) - map__pgoff(map);
+
+ if (map__dso(map) == dso && start < base_addr)
+ base_addr = start;
+ }
+ base_addr -= dso->data.elf_base_addr;
+ /* Address of .eh_frame_hdr */
+ *segbase = base_addr + dso->data.eh_frame_hdr_addr;
+ ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset,
+ table_data, fde_count);
+ if (ret)
+ return ret;
+ /* binary_search_table offset plus .eh_frame_hdr address */
+ *table_data += *segbase;
+ return 0;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+ struct machine *machine, u64 *offset)
+{
+ int fd;
+ u64 ofs = dso->data.debug_frame_offset;
+
+ /* debug_frame can reside in:
+ * - dso
+ * - debug pointed by symsrc_filename
+ * - gnu_debuglink, which doesn't necessary
+ * has to be pointed by symsrc_filename
+ */
+ if (ofs == 0) {
+ fd = dso__data_get_fd(dso, machine);
+ if (fd >= 0) {
+ ofs = elf_section_offset(fd, ".debug_frame");
+ dso__data_put_fd(dso);
+ }
+
+ if (ofs <= 0) {
+ fd = open(dso->symsrc_filename, O_RDONLY);
+ if (fd >= 0) {
+ ofs = elf_section_offset(fd, ".debug_frame");
+ close(fd);
+ }
+ }
+
+ if (ofs <= 0) {
+ char *debuglink = malloc(PATH_MAX);
+ int ret = 0;
+
+ ret = dso__read_binary_type_filename(
+ dso, DSO_BINARY_TYPE__DEBUGLINK,
+ machine->root_dir, debuglink, PATH_MAX);
+ if (!ret) {
+ fd = open(debuglink, O_RDONLY);
+ if (fd >= 0) {
+ ofs = elf_section_offset(fd,
+ ".debug_frame");
+ close(fd);
+ }
+ }
+ if (ofs > 0) {
+ if (dso->symsrc_filename != NULL) {
+ pr_warning(
+ "%s: overwrite symsrc(%s,%s)\n",
+ __func__,
+ dso->symsrc_filename,
+ debuglink);
+ zfree(&dso->symsrc_filename);
+ }
+ dso->symsrc_filename = debuglink;
+ } else {
+ free(debuglink);
+ }
+ }
+
+ dso->data.debug_frame_offset = ofs;
+ }
+
+ *offset = ofs;
+ if (*offset)
+ return 0;
+
+ return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+ struct addr_location al;
+ struct map *ret;
+
+ addr_location__init(&al);
+ thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
+ ret = map__get(al.map);
+ addr_location__exit(&al);
+ return ret;
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+ int need_unwind_info, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct map *map;
+ struct dso *dso;
+ unw_dyn_info_t di;
+ u64 table_data, segbase, fde_count;
+ int ret = -EINVAL;
+
+ map = find_map(ip, ui);
+ if (!map)
+ return -EINVAL;
+
+ dso = map__dso(map);
+ if (!dso) {
+ map__put(map);
+ return -EINVAL;
+ }
+
+ pr_debug("unwind: find_proc_info dso %s\n", dso->name);
+
+ /* Check the .eh_frame section for unwinding info */
+ if (!read_unwind_spec_eh_frame(dso, ui, &table_data, &segbase, &fde_count)) {
+ memset(&di, 0, sizeof(di));
+ di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
+ di.start_ip = map__start(map);
+ di.end_ip = map__end(map);
+ di.u.rti.segbase = segbase;
+ di.u.rti.table_data = table_data;
+ di.u.rti.table_len = fde_count * sizeof(struct table_entry)
+ / sizeof(unw_word_t);
+ ret = dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+ /* Check the .debug_frame section for unwinding info */
+ if (ret < 0 &&
+ !read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) {
+ int fd = dso__data_get_fd(dso, ui->machine);
+ int is_exec = elf_is_exec(fd, dso->name);
+ u64 start = map__start(map);
+ unw_word_t base = is_exec ? 0 : start;
+ const char *symfile;
+
+ if (fd >= 0)
+ dso__data_put_fd(dso);
+
+ symfile = dso->symsrc_filename ?: dso->name;
+
+ memset(&di, 0, sizeof(di));
+ if (dwarf_find_debug_frame(0, &di, ip, base, symfile, start, map__end(map)))
+ ret = dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+#endif
+ map__put(map);
+ return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t __maybe_unused num,
+ unw_fpreg_t __maybe_unused *val,
+ int __maybe_unused __write,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: access_fpreg unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused *dil_addr,
+ void __maybe_unused *arg)
+{
+ return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+ unw_cursor_t __maybe_unused *cu,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: resume unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused addr,
+ char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+ unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+ pr_err("unwind: get_proc_name unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+ unw_word_t *data)
+{
+ struct map *map;
+ struct dso *dso;
+ ssize_t size;
+
+ map = find_map(addr, ui);
+ if (!map) {
+ pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ return -1;
+ }
+
+ dso = map__dso(map);
+
+ if (!dso) {
+ map__put(map);
+ return -1;
+ }
+
+ size = dso__data_read_addr(dso, map, ui->machine,
+ addr, (u8 *) data, sizeof(*data));
+ map__put(map);
+ return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+ unw_word_t addr, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ const char *arch = perf_env__arch(ui->machine->env);
+ struct stack_dump *stack = &ui->sample->user_stack;
+ u64 start, end;
+ int offset;
+ int ret;
+
+ /* Don't support write, probably not needed. */
+ if (__write || !stack || !ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ ret = perf_reg_value(&start, &ui->sample->user_regs,
+ perf_arch_reg_sp(arch));
+ if (ret)
+ return ret;
+
+ end = start + stack->size;
+
+ /* Check overflow. */
+ if (addr + sizeof(unw_word_t) < addr)
+ return -EINVAL;
+
+ if (addr < start || addr + sizeof(unw_word_t) >= end) {
+ ret = access_dso_mem(ui, addr, valp);
+ if (ret) {
+ pr_debug("unwind: access_mem %p not inside range"
+ " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ (void *) (uintptr_t) addr, start, end);
+ *valp = 0;
+ return ret;
+ }
+ return 0;
+ }
+
+ offset = addr - start;
+ *valp = *(unw_word_t *)&stack->data[offset];
+ pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+ (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+ return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t regnum, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ int id, ret;
+ u64 val;
+
+ /* Don't support write, I suspect we don't need it. */
+ if (__write) {
+ pr_err("unwind: access_reg w %d\n", regnum);
+ return 0;
+ }
+
+ if (!ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ id = LIBUNWIND__ARCH_REG_ID(regnum);
+ if (id < 0)
+ return -EINVAL;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+ if (ret) {
+ if (!ui->best_effort)
+ pr_err("unwind: can't read reg %d\n", regnum);
+ return ret;
+ }
+
+ *valp = (unw_word_t) val;
+ pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+ return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+ unw_proc_info_t *pi __maybe_unused,
+ void *arg __maybe_unused)
+{
+ pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+ unwind_entry_cb_t cb, void *arg)
+{
+ struct unwind_entry e;
+ struct addr_location al;
+ int ret;
+
+ addr_location__init(&al);
+ e.ms.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
+ e.ip = ip;
+ e.ms.map = al.map;
+ e.ms.maps = al.maps;
+
+ pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+ al.sym ? al.sym->name : "''",
+ ip,
+ al.map ? map__map_ip(al.map, ip) : (u64) 0);
+
+ ret = cb(&e, arg);
+ addr_location__exit(&al);
+ return ret;
+}
+
+static void display_error(int err)
+{
+ switch (err) {
+ case UNW_EINVAL:
+ pr_err("unwind: Only supports local.\n");
+ break;
+ case UNW_EUNSPEC:
+ pr_err("unwind: Unspecified error.\n");
+ break;
+ case UNW_EBADREG:
+ pr_err("unwind: Register unavailable.\n");
+ break;
+ default:
+ break;
+ }
+}
+
+static unw_accessors_t accessors = {
+ .find_proc_info = find_proc_info,
+ .put_unwind_info = put_unwind_info,
+ .get_dyn_info_list_addr = get_dyn_info_list_addr,
+ .access_mem = access_mem,
+ .access_reg = access_reg,
+ .access_fpreg = access_fpreg,
+ .resume = resume,
+ .get_proc_name = get_proc_name,
+};
+
+static int _unwind__prepare_access(struct maps *maps)
+{
+ void *addr_space = unw_create_addr_space(&accessors, 0);
+
+ RC_CHK_ACCESS(maps)->addr_space = addr_space;
+ if (!addr_space) {
+ pr_err("unwind: Can't create unwind address space.\n");
+ return -ENOMEM;
+ }
+
+ unw_set_caching_policy(addr_space, UNW_CACHE_GLOBAL);
+ return 0;
+}
+
+static void _unwind__flush_access(struct maps *maps)
+{
+ unw_flush_cache(maps__addr_space(maps), 0, 0);
+}
+
+static void _unwind__finish_access(struct maps *maps)
+{
+ unw_destroy_addr_space(maps__addr_space(maps));
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+ void *arg, int max_stack)
+{
+ const char *arch = perf_env__arch(ui->machine->env);
+ u64 val;
+ unw_word_t ips[max_stack];
+ unw_addr_space_t addr_space;
+ unw_cursor_t c;
+ int ret, i = 0;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs,
+ perf_arch_reg_ip(arch));
+ if (ret)
+ return ret;
+
+ ips[i++] = (unw_word_t) val;
+
+ /*
+ * If we need more than one entry, do the DWARF
+ * unwind itself.
+ */
+ if (max_stack - 1 > 0) {
+ WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+ addr_space = maps__addr_space(thread__maps(ui->thread));
+
+ if (addr_space == NULL)
+ return -1;
+
+ ret = unw_init_remote(&c, addr_space, ui);
+ if (ret && !ui->best_effort)
+ display_error(ret);
+
+ while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+ unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+
+ /*
+ * Decrement the IP for any non-activation frames.
+ * this is required to properly find the srcline
+ * for caller frames.
+ * See also the documentation for dwfl_frame_pc(),
+ * which this code tries to replicate.
+ */
+ if (unw_is_signal_frame(&c) <= 0)
+ --ips[i];
+
+ ++i;
+ }
+
+ max_stack = i;
+ }
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < max_stack && !ret; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = max_stack - i - 1;
+ ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+ }
+
+ return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack,
+ bool best_effort)
+{
+ struct unwind_info ui = {
+ .sample = data,
+ .thread = thread,
+ .machine = maps__machine(thread__maps(thread)),
+ .best_effort = best_effort
+ };
+
+ if (!data->user_regs.regs)
+ return -EINVAL;
+
+ if (max_stack <= 0)
+ return -EINVAL;
+
+ return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+ .prepare_access = _unwind__prepare_access,
+ .flush_access = _unwind__flush_access,
+ .finish_access = _unwind__finish_access,
+ .get_entries = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
new file mode 100644
index 000000000..76cd63de8
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "unwind.h"
+#include "dso.h"
+#include "map.h"
+#include "thread.h"
+#include "session.h"
+#include "debug.h"
+#include "env.h"
+#include "callchain.h"
+
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
+
+static void unwind__register_ops(struct maps *maps, struct unwind_libunwind_ops *ops)
+{
+ RC_CHK_ACCESS(maps)->unwind_libunwind_ops = ops;
+}
+
+int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized)
+{
+ const char *arch;
+ enum dso_type dso_type;
+ struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
+ struct dso *dso = map__dso(map);
+ struct machine *machine;
+ int err;
+
+ if (!dwarf_callchain_users)
+ return 0;
+
+ if (maps__addr_space(maps)) {
+ pr_debug("unwind: thread map already set, dso=%s\n", dso->name);
+ if (initialized)
+ *initialized = true;
+ return 0;
+ }
+
+ machine = maps__machine(maps);
+ /* env->arch is NULL for live-mode (i.e. perf top) */
+ if (!machine->env || !machine->env->arch)
+ goto out_register;
+
+ dso_type = dso__type(dso, machine);
+ if (dso_type == DSO__TYPE_UNKNOWN)
+ return 0;
+
+ arch = perf_env__arch(machine->env);
+
+ if (!strcmp(arch, "x86")) {
+ if (dso_type != DSO__TYPE_64BIT)
+ ops = x86_32_unwind_libunwind_ops;
+ } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+ if (dso_type == DSO__TYPE_64BIT)
+ ops = arm64_unwind_libunwind_ops;
+ }
+
+ if (!ops) {
+ pr_warning_once("unwind: target platform=%s is not supported\n", arch);
+ return 0;
+ }
+out_register:
+ unwind__register_ops(maps, ops);
+
+ err = maps__unwind_libunwind_ops(maps)->prepare_access(maps);
+ if (initialized)
+ *initialized = err ? false : true;
+ return err;
+}
+
+void unwind__flush_access(struct maps *maps)
+{
+ const struct unwind_libunwind_ops *ops = maps__unwind_libunwind_ops(maps);
+
+ if (ops)
+ ops->flush_access(maps);
+}
+
+void unwind__finish_access(struct maps *maps)
+{
+ const struct unwind_libunwind_ops *ops = maps__unwind_libunwind_ops(maps);
+
+ if (ops)
+ ops->finish_access(maps);
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack,
+ bool best_effort)
+{
+ const struct unwind_libunwind_ops *ops = maps__unwind_libunwind_ops(thread__maps(thread));
+
+ if (ops)
+ return ops->get_entries(cb, arg, thread, data, max_stack, best_effort);
+ return 0;
+}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
new file mode 100644
index 000000000..9f7164c6d
--- /dev/null
+++ b/tools/perf/util/unwind.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UNWIND_H
+#define __UNWIND_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "util/map_symbol.h"
+
+struct maps;
+struct perf_sample;
+struct thread;
+
+struct unwind_entry {
+ struct map_symbol ms;
+ u64 ip;
+};
+
+typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
+
+struct unwind_libunwind_ops {
+ int (*prepare_access)(struct maps *maps);
+ void (*flush_access)(struct maps *maps);
+ void (*finish_access)(struct maps *maps);
+ int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack, bool best_effort);
+};
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+/*
+ * When best_effort is set, don't report errors and fail silently. This could
+ * be expanded in the future to be more permissive about things other than
+ * error messages.
+ */
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack,
+ bool best_effort);
+/* libunwind specific */
+#ifdef HAVE_LIBUNWIND_SUPPORT
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized);
+void unwind__flush_access(struct maps *maps);
+void unwind__finish_access(struct maps *maps);
+#else
+static inline int unwind__prepare_access(struct maps *maps __maybe_unused,
+ struct map *map __maybe_unused,
+ bool *initialized __maybe_unused)
+{
+ return 0;
+}
+
+static inline void unwind__flush_access(struct maps *maps __maybe_unused) {}
+static inline void unwind__finish_access(struct maps *maps __maybe_unused) {}
+#endif
+#else
+static inline int
+unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
+ void *arg __maybe_unused,
+ struct thread *thread __maybe_unused,
+ struct perf_sample *data __maybe_unused,
+ int max_stack __maybe_unused,
+ bool best_effort __maybe_unused)
+{
+ return 0;
+}
+
+static inline int unwind__prepare_access(struct maps *maps __maybe_unused,
+ struct map *map __maybe_unused,
+ bool *initialized __maybe_unused)
+{
+ return 0;
+}
+
+static inline void unwind__flush_access(struct maps *maps __maybe_unused) {}
+static inline void unwind__finish_access(struct maps *maps __maybe_unused) {}
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
+#endif /* __UNWIND_H */
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
new file mode 100644
index 000000000..4c8ffbad2
--- /dev/null
+++ b/tools/perf/util/usage.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * usage.c
+ *
+ * Various reporting routines.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+const char perf_usage_string[] =
+ "perf [--version] [--help] [OPTIONS] COMMAND [ARGS]";
+
+const char perf_more_info_string[] =
+ "See 'perf help COMMAND' for more information on a specific command.";
+
+static __noreturn void usage_builtin(const char *err)
+{
+ fprintf(stderr, "\n Usage: %s\n", err);
+ exit(129);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) __noreturn = usage_builtin;
+
+void usage(const char *err)
+{
+ usage_routine(err);
+}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
new file mode 100644
index 000000000..c1fd9ba6d
--- /dev/null
+++ b/tools/perf/util/util.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "debug.h"
+#include "event.h"
+#include <api/fs/fs.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/time64.h>
+#include <linux/overflow.h>
+#include <unistd.h>
+#include "cap.h"
+#include "strlist.h"
+#include "string2.h"
+
+/*
+ * XXX We need to find a better place for these things...
+ */
+
+const char *input_name;
+
+bool perf_singlethreaded = true;
+
+void perf_set_singlethreaded(void)
+{
+ perf_singlethreaded = true;
+}
+
+void perf_set_multithreaded(void)
+{
+ perf_singlethreaded = false;
+}
+
+int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
+
+int sysctl__max_stack(void)
+{
+ int value;
+
+ if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+ sysctl_perf_event_max_stack = value;
+
+ if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
+ sysctl_perf_event_max_contexts_per_stack = value;
+
+ return sysctl_perf_event_max_stack;
+}
+
+bool sysctl__nmi_watchdog_enabled(void)
+{
+ static bool cached;
+ static bool nmi_watchdog;
+ int value;
+
+ if (cached)
+ return nmi_watchdog;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0)
+ return false;
+
+ nmi_watchdog = (value > 0) ? true : false;
+ cached = true;
+
+ return nmi_watchdog;
+}
+
+bool test_attr__enabled;
+
+bool perf_host = true;
+bool perf_guest = false;
+
+void event_attr_init(struct perf_event_attr *attr)
+{
+ if (!perf_host)
+ attr->exclude_host = 1;
+ if (!perf_guest)
+ attr->exclude_guest = 1;
+ /* to capture ABI version */
+ attr->size = sizeof(*attr);
+}
+
+int mkdir_p(char *path, mode_t mode)
+{
+ struct stat st;
+ int err;
+ char *d = path;
+
+ if (*d != '/')
+ return -1;
+
+ if (stat(path, &st) == 0)
+ return 0;
+
+ while (*++d == '/');
+
+ while ((d = strchr(d, '/'))) {
+ *d = '\0';
+ err = stat(path, &st) && mkdir(path, mode);
+ *d++ = '/';
+ if (err)
+ return -1;
+ while (*d == '/')
+ ++d;
+ }
+ return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
+}
+
+static bool match_pat(char *file, const char **pat)
+{
+ int i = 0;
+
+ if (!pat)
+ return true;
+
+ while (pat[i]) {
+ if (strglobmatch(file, pat[i]))
+ return true;
+
+ i++;
+ }
+
+ return false;
+}
+
+/*
+ * The depth specify how deep the removal will go.
+ * 0 - will remove only files under the 'path' directory
+ * 1 .. x - will dive in x-level deep under the 'path' directory
+ *
+ * If specified the pat is array of string patterns ended with NULL,
+ * which are checked upon every file/directory found. Only matching
+ * ones are removed.
+ *
+ * The function returns:
+ * 0 on success
+ * -1 on removal failure with errno set
+ * -2 on pattern failure
+ */
+static int rm_rf_depth_pat(const char *path, int depth, const char **pat)
+{
+ DIR *dir;
+ int ret;
+ struct dirent *d;
+ char namebuf[PATH_MAX];
+ struct stat statbuf;
+
+ /* Do not fail if there's no file. */
+ ret = lstat(path, &statbuf);
+ if (ret)
+ return 0;
+
+ /* Try to remove any file we get. */
+ if (!(statbuf.st_mode & S_IFDIR))
+ return unlink(path);
+
+ /* We have directory in path. */
+ dir = opendir(path);
+ if (dir == NULL)
+ return -1;
+
+ while ((d = readdir(dir)) != NULL && !ret) {
+
+ if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+ continue;
+
+ if (!match_pat(d->d_name, pat)) {
+ ret = -2;
+ break;
+ }
+
+ scnprintf(namebuf, sizeof(namebuf), "%s/%s",
+ path, d->d_name);
+
+ /* We have to check symbolic link itself */
+ ret = lstat(namebuf, &statbuf);
+ if (ret < 0) {
+ pr_debug("stat failed: %s\n", namebuf);
+ break;
+ }
+
+ if (S_ISDIR(statbuf.st_mode))
+ ret = depth ? rm_rf_depth_pat(namebuf, depth - 1, pat) : 0;
+ else
+ ret = unlink(namebuf);
+ }
+ closedir(dir);
+
+ if (ret < 0)
+ return ret;
+
+ return rmdir(path);
+}
+
+static int rm_rf_a_kcore_dir(const char *path, const char *name)
+{
+ char kcore_dir_path[PATH_MAX];
+ const char *pat[] = {
+ "kcore",
+ "kallsyms",
+ "modules",
+ NULL,
+ };
+
+ snprintf(kcore_dir_path, sizeof(kcore_dir_path), "%s/%s", path, name);
+
+ return rm_rf_depth_pat(kcore_dir_path, 0, pat);
+}
+
+static bool kcore_dir_filter(const char *name __maybe_unused, struct dirent *d)
+{
+ const char *pat[] = {
+ "kcore_dir",
+ "kcore_dir__[1-9]*",
+ NULL,
+ };
+
+ return match_pat(d->d_name, pat);
+}
+
+static int rm_rf_kcore_dir(const char *path)
+{
+ struct strlist *kcore_dirs;
+ struct str_node *nd;
+ int ret;
+
+ kcore_dirs = lsdir(path, kcore_dir_filter);
+
+ if (!kcore_dirs)
+ return 0;
+
+ strlist__for_each_entry(nd, kcore_dirs) {
+ ret = rm_rf_a_kcore_dir(path, nd->s);
+ if (ret)
+ return ret;
+ }
+
+ strlist__delete(kcore_dirs);
+
+ return 0;
+}
+
+int rm_rf_perf_data(const char *path)
+{
+ const char *pat[] = {
+ "data",
+ "data.*",
+ NULL,
+ };
+
+ rm_rf_kcore_dir(path);
+
+ return rm_rf_depth_pat(path, 0, pat);
+}
+
+int rm_rf(const char *path)
+{
+ return rm_rf_depth_pat(path, INT_MAX, NULL);
+}
+
+/* A filter which removes dot files */
+bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
+{
+ return d->d_name[0] != '.';
+}
+
+/* lsdir reads a directory and store it in strlist */
+struct strlist *lsdir(const char *name,
+ bool (*filter)(const char *, struct dirent *))
+{
+ struct strlist *list = NULL;
+ DIR *dir;
+ struct dirent *d;
+
+ dir = opendir(name);
+ if (!dir)
+ return NULL;
+
+ list = strlist__new(NULL, NULL);
+ if (!list) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ while ((d = readdir(dir)) != NULL) {
+ if (!filter || filter(name, d))
+ strlist__add(list, d->d_name);
+ }
+
+out:
+ closedir(dir);
+ return list;
+}
+
+size_t hex_width(u64 v)
+{
+ size_t n = 1;
+
+ while ((v >>= 4))
+ ++n;
+
+ return n;
+}
+
+int perf_event_paranoid(void)
+{
+ int value;
+
+ if (sysctl__read_int("kernel/perf_event_paranoid", &value))
+ return INT_MAX;
+
+ return value;
+}
+
+bool perf_event_paranoid_check(int max_level)
+{
+ return perf_cap__capable(CAP_SYS_ADMIN) ||
+ perf_cap__capable(CAP_PERFMON) ||
+ perf_event_paranoid() <= max_level;
+}
+
+static int
+fetch_ubuntu_kernel_version(unsigned int *puint)
+{
+ ssize_t len;
+ size_t line_len = 0;
+ char *ptr, *line = NULL;
+ int version, patchlevel, sublevel, err;
+ FILE *vsig;
+
+ if (!puint)
+ return 0;
+
+ vsig = fopen("/proc/version_signature", "r");
+ if (!vsig) {
+ pr_debug("Open /proc/version_signature failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ len = getline(&line, &line_len, vsig);
+ fclose(vsig);
+ err = -1;
+ if (len <= 0) {
+ pr_debug("Reading from /proc/version_signature failed: %s\n",
+ strerror(errno));
+ goto errout;
+ }
+
+ ptr = strrchr(line, ' ');
+ if (!ptr) {
+ pr_debug("Parsing /proc/version_signature failed: %s\n", line);
+ goto errout;
+ }
+
+ err = sscanf(ptr + 1, "%d.%d.%d",
+ &version, &patchlevel, &sublevel);
+ if (err != 3) {
+ pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n",
+ line);
+ goto errout;
+ }
+
+ *puint = (version << 16) + (patchlevel << 8) + sublevel;
+ err = 0;
+errout:
+ free(line);
+ return err;
+}
+
+int
+fetch_kernel_version(unsigned int *puint, char *str,
+ size_t str_size)
+{
+ struct utsname utsname;
+ int version, patchlevel, sublevel, err;
+ bool int_ver_ready = false;
+
+ if (access("/proc/version_signature", R_OK) == 0)
+ if (!fetch_ubuntu_kernel_version(puint))
+ int_ver_ready = true;
+
+ if (uname(&utsname))
+ return -1;
+
+ if (str && str_size) {
+ strncpy(str, utsname.release, str_size);
+ str[str_size - 1] = '\0';
+ }
+
+ if (!puint || int_ver_ready)
+ return 0;
+
+ err = sscanf(utsname.release, "%d.%d.%d",
+ &version, &patchlevel, &sublevel);
+
+ if (err != 3) {
+ pr_debug("Unable to get kernel version from uname '%s'\n",
+ utsname.release);
+ return -1;
+ }
+
+ *puint = (version << 16) + (patchlevel << 8) + sublevel;
+ return 0;
+}
+
+int perf_tip(char **strp, const char *dirpath)
+{
+ struct strlist *tips;
+ struct str_node *node;
+ struct strlist_config conf = {
+ .dirname = dirpath,
+ .file_only = true,
+ };
+ int ret = 0;
+
+ *strp = NULL;
+ tips = strlist__new("tips.txt", &conf);
+ if (tips == NULL)
+ return -errno;
+
+ if (strlist__nr_entries(tips) == 0)
+ goto out;
+
+ node = strlist__entry(tips, random() % strlist__nr_entries(tips));
+ if (asprintf(strp, "Tip: %s", node->s) < 0)
+ ret = -ENOMEM;
+
+out:
+ strlist__delete(tips);
+
+ return ret;
+}
+
+char *perf_exe(char *buf, int len)
+{
+ int n = readlink("/proc/self/exe", buf, len);
+ if (n > 0) {
+ buf[n] = 0;
+ return buf;
+ }
+ return strcpy(buf, "perf");
+}
+
+void perf_debuginfod_setup(struct perf_debuginfod *di)
+{
+ /*
+ * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod
+ * processing is not triggered, otherwise we set it to 'di->urls'
+ * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value.
+ */
+ if (!di->set)
+ setenv("DEBUGINFOD_URLS", "", 1);
+ else if (di->urls && strcmp(di->urls, "system"))
+ setenv("DEBUGINFOD_URLS", di->urls, 1);
+
+ pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS"));
+
+#ifndef HAVE_DEBUGINFOD_SUPPORT
+ if (di->set)
+ pr_warning("WARNING: debuginfod support requested, but perf is not built with it\n");
+#endif
+}
+
+/*
+ * Return a new filename prepended with task's root directory if it's in
+ * a chroot. Callers should free the returned string.
+ */
+char *filename_with_chroot(int pid, const char *filename)
+{
+ char buf[PATH_MAX];
+ char proc_root[32];
+ char *new_name = NULL;
+ int ret;
+
+ scnprintf(proc_root, sizeof(proc_root), "/proc/%d/root", pid);
+ ret = readlink(proc_root, buf, sizeof(buf) - 1);
+ if (ret <= 0)
+ return NULL;
+
+ /* readlink(2) does not append a null byte to buf */
+ buf[ret] = '\0';
+
+ if (!strcmp(buf, "/"))
+ return NULL;
+
+ if (strstr(buf, "(deleted)"))
+ return NULL;
+
+ if (asprintf(&new_name, "%s/%s", buf, filename) < 0)
+ return NULL;
+
+ return new_name;
+}
+
+/*
+ * Reallocate an array *arr of size *arr_sz so that it is big enough to contain
+ * x elements of size msz, initializing new entries to *init_val or zero if
+ * init_val is NULL
+ */
+int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x, size_t msz, const void *init_val)
+{
+ size_t new_sz = *arr_sz;
+ void *new_arr;
+ size_t i;
+
+ if (!new_sz)
+ new_sz = msz >= 64 ? 1 : roundup(64, msz); /* Start with at least 64 bytes */
+ while (x >= new_sz) {
+ if (check_mul_overflow(new_sz, (size_t)2, &new_sz))
+ return -ENOMEM;
+ }
+ if (new_sz == *arr_sz)
+ return 0;
+ new_arr = calloc(new_sz, msz);
+ if (!new_arr)
+ return -ENOMEM;
+ if (*arr_sz)
+ memcpy(new_arr, *arr, *arr_sz * msz);
+ if (init_val) {
+ for (i = *arr_sz; i < new_sz; i++)
+ memcpy(new_arr + (i * msz), init_val, msz);
+ }
+ *arr = new_arr;
+ *arr_sz = new_sz;
+ return 0;
+}
+
+#ifndef HAVE_SCHED_GETCPU_SUPPORT
+int sched_getcpu(void)
+{
+#ifdef __NR_getcpu
+ unsigned int cpu;
+ int err = syscall(__NR_getcpu, &cpu, NULL, NULL);
+
+ if (!err)
+ return cpu;
+#else
+ errno = ENOSYS;
+#endif
+ return -1;
+}
+#endif
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
new file mode 100644
index 000000000..7c8915d92
--- /dev/null
+++ b/tools/perf/util/util.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UTIL_H
+#define __PERF_UTIL_H
+
+#define _BSD_SOURCE 1
+/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
+#define _DEFAULT_SOURCE 1
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+#ifndef __cplusplus
+#include <internal/cpumap.h>
+#endif
+
+extern const char perf_usage_string[];
+extern const char perf_more_info_string[];
+
+extern const char *input_name;
+
+extern bool perf_host;
+extern bool perf_guest;
+
+/* General helper functions */
+void usage(const char *err) __noreturn;
+void die(const char *err, ...) __noreturn __printf(1, 2);
+
+struct dirent;
+struct strlist;
+
+int mkdir_p(char *path, mode_t mode);
+int rm_rf(const char *path);
+int rm_rf_perf_data(const char *path);
+struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
+bool lsdir_no_dot_filter(const char *name, struct dirent *d);
+
+size_t hex_width(u64 v);
+
+int sysctl__max_stack(void);
+
+bool sysctl__nmi_watchdog_enabled(void);
+
+int fetch_kernel_version(unsigned int *puint,
+ char *str, size_t str_sz);
+#define KVER_VERSION(x) (((x) >> 16) & 0xff)
+#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff)
+#define KVER_SUBLEVEL(x) ((x) & 0xff)
+#define KVER_FMT "%d.%d.%d"
+#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
+
+int perf_tip(char **strp, const char *dirpath);
+
+#ifndef HAVE_SCHED_GETCPU_SUPPORT
+int sched_getcpu(void);
+#endif
+
+extern bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void);
+void perf_set_multithreaded(void);
+
+char *perf_exe(char *buf, int len);
+
+#ifndef O_CLOEXEC
+#ifdef __sparc__
+#define O_CLOEXEC 0x400000
+#elif defined(__alpha__) || defined(__hppa__)
+#define O_CLOEXEC 010000000
+#else
+#define O_CLOEXEC 02000000
+#endif
+#endif
+
+extern bool test_attr__enabled;
+void test_attr__ready(void);
+void test_attr__init(void);
+struct perf_event_attr;
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
+ int fd, int group_fd, unsigned long flags);
+
+struct perf_debuginfod {
+ const char *urls;
+ bool set;
+};
+void perf_debuginfod_setup(struct perf_debuginfod *di);
+
+char *filename_with_chroot(int pid, const char *filename);
+
+int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x,
+ size_t msz, const void *init_val);
+
+#define realloc_array_as_needed(a, n, x, v) ({ \
+ typeof(x) __x = (x); \
+ __x >= (n) ? \
+ do_realloc_array_as_needed((void **)&(a), \
+ &(n), \
+ __x, \
+ sizeof(*(a)), \
+ (const void *)(v)) : \
+ 0; \
+ })
+
+static inline bool host_is_bigendian(void)
+{
+#ifdef __BYTE_ORDER__
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return false;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return true;
+#else
+#error "Unrecognized __BYTE_ORDER__"
+#endif
+#else /* !__BYTE_ORDER__ */
+ unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
+ unsigned int *ptr;
+
+ ptr = (unsigned int *)(void *)str;
+ return *ptr == 0x01020304;
+#endif
+}
+
+#endif /* __PERF_UTIL_H */
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
new file mode 100644
index 000000000..b9823f414
--- /dev/null
+++ b/tools/perf/util/values.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/zalloc.h>
+
+#include "values.h"
+#include "debug.h"
+
+int perf_read_values_init(struct perf_read_values *values)
+{
+ values->threads_max = 16;
+ values->pid = malloc(values->threads_max * sizeof(*values->pid));
+ values->tid = malloc(values->threads_max * sizeof(*values->tid));
+ values->value = zalloc(values->threads_max * sizeof(*values->value));
+ if (!values->pid || !values->tid || !values->value) {
+ pr_debug("failed to allocate read_values threads arrays");
+ goto out_free_pid;
+ }
+ values->threads = 0;
+
+ values->counters_max = 16;
+ values->counterrawid = malloc(values->counters_max
+ * sizeof(*values->counterrawid));
+ values->countername = malloc(values->counters_max
+ * sizeof(*values->countername));
+ if (!values->counterrawid || !values->countername) {
+ pr_debug("failed to allocate read_values counters arrays");
+ goto out_free_counter;
+ }
+ values->counters = 0;
+
+ return 0;
+
+out_free_counter:
+ zfree(&values->counterrawid);
+ zfree(&values->countername);
+out_free_pid:
+ zfree(&values->pid);
+ zfree(&values->tid);
+ zfree(&values->value);
+ return -ENOMEM;
+}
+
+void perf_read_values_destroy(struct perf_read_values *values)
+{
+ int i;
+
+ if (!values->threads_max || !values->counters_max)
+ return;
+
+ for (i = 0; i < values->threads; i++)
+ zfree(&values->value[i]);
+ zfree(&values->value);
+ zfree(&values->pid);
+ zfree(&values->tid);
+ zfree(&values->counterrawid);
+ for (i = 0; i < values->counters; i++)
+ zfree(&values->countername[i]);
+ zfree(&values->countername);
+}
+
+static int perf_read_values__enlarge_threads(struct perf_read_values *values)
+{
+ int nthreads_max = values->threads_max * 2;
+ void *npid = realloc(values->pid, nthreads_max * sizeof(*values->pid)),
+ *ntid = realloc(values->tid, nthreads_max * sizeof(*values->tid)),
+ *nvalue = realloc(values->value, nthreads_max * sizeof(*values->value));
+
+ if (!npid || !ntid || !nvalue)
+ goto out_err;
+
+ values->threads_max = nthreads_max;
+ values->pid = npid;
+ values->tid = ntid;
+ values->value = nvalue;
+ return 0;
+out_err:
+ free(npid);
+ free(ntid);
+ free(nvalue);
+ pr_debug("failed to enlarge read_values threads arrays");
+ return -ENOMEM;
+}
+
+static int perf_read_values__findnew_thread(struct perf_read_values *values,
+ u32 pid, u32 tid)
+{
+ int i;
+
+ for (i = 0; i < values->threads; i++)
+ if (values->pid[i] == pid && values->tid[i] == tid)
+ return i;
+
+ if (values->threads == values->threads_max) {
+ i = perf_read_values__enlarge_threads(values);
+ if (i < 0)
+ return i;
+ }
+
+ i = values->threads;
+
+ values->value[i] = zalloc(values->counters_max * sizeof(**values->value));
+ if (!values->value[i]) {
+ pr_debug("failed to allocate read_values counters array");
+ return -ENOMEM;
+ }
+ values->pid[i] = pid;
+ values->tid[i] = tid;
+ values->threads = i + 1;
+
+ return i;
+}
+
+static int perf_read_values__enlarge_counters(struct perf_read_values *values)
+{
+ char **countername;
+ int i, counters_max = values->counters_max * 2;
+ u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid));
+
+ if (!counterrawid) {
+ pr_debug("failed to enlarge read_values rawid array");
+ goto out_enomem;
+ }
+
+ countername = realloc(values->countername, counters_max * sizeof(*values->countername));
+ if (!countername) {
+ pr_debug("failed to enlarge read_values rawid array");
+ goto out_free_rawid;
+ }
+
+ for (i = 0; i < values->threads; i++) {
+ u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value));
+ int j;
+
+ if (!value) {
+ pr_debug("failed to enlarge read_values ->values array");
+ goto out_free_name;
+ }
+
+ for (j = values->counters_max; j < counters_max; j++)
+ value[j] = 0;
+
+ values->value[i] = value;
+ }
+
+ values->counters_max = counters_max;
+ values->counterrawid = counterrawid;
+ values->countername = countername;
+
+ return 0;
+out_free_name:
+ free(countername);
+out_free_rawid:
+ free(counterrawid);
+out_enomem:
+ return -ENOMEM;
+}
+
+static int perf_read_values__findnew_counter(struct perf_read_values *values,
+ u64 rawid, const char *name)
+{
+ int i;
+
+ for (i = 0; i < values->counters; i++)
+ if (values->counterrawid[i] == rawid)
+ return i;
+
+ if (values->counters == values->counters_max) {
+ i = perf_read_values__enlarge_counters(values);
+ if (i)
+ return i;
+ }
+
+ i = values->counters++;
+ values->counterrawid[i] = rawid;
+ values->countername[i] = strdup(name);
+
+ return i;
+}
+
+int perf_read_values_add_value(struct perf_read_values *values,
+ u32 pid, u32 tid,
+ u64 rawid, const char *name, u64 value)
+{
+ int tindex, cindex;
+
+ tindex = perf_read_values__findnew_thread(values, pid, tid);
+ if (tindex < 0)
+ return tindex;
+ cindex = perf_read_values__findnew_counter(values, rawid, name);
+ if (cindex < 0)
+ return cindex;
+
+ values->value[tindex][cindex] += value;
+ return 0;
+}
+
+static void perf_read_values__display_pretty(FILE *fp,
+ struct perf_read_values *values)
+{
+ int i, j;
+ int pidwidth, tidwidth;
+ int *counterwidth;
+
+ counterwidth = malloc(values->counters * sizeof(*counterwidth));
+ if (!counterwidth) {
+ fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n");
+ return;
+ }
+ tidwidth = 3;
+ pidwidth = 3;
+ for (j = 0; j < values->counters; j++)
+ counterwidth[j] = strlen(values->countername[j]);
+ for (i = 0; i < values->threads; i++) {
+ int width;
+
+ width = snprintf(NULL, 0, "%d", values->pid[i]);
+ if (width > pidwidth)
+ pidwidth = width;
+ width = snprintf(NULL, 0, "%d", values->tid[i]);
+ if (width > tidwidth)
+ tidwidth = width;
+ for (j = 0; j < values->counters; j++) {
+ width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+ if (width > counterwidth[j])
+ counterwidth[j] = width;
+ }
+ }
+
+ fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID");
+ for (j = 0; j < values->counters; j++)
+ fprintf(fp, " %*s", counterwidth[j], values->countername[j]);
+ fprintf(fp, "\n");
+
+ for (i = 0; i < values->threads; i++) {
+ fprintf(fp, " %*d %*d", pidwidth, values->pid[i],
+ tidwidth, values->tid[i]);
+ for (j = 0; j < values->counters; j++)
+ fprintf(fp, " %*" PRIu64,
+ counterwidth[j], values->value[i][j]);
+ fprintf(fp, "\n");
+ }
+ free(counterwidth);
+}
+
+static void perf_read_values__display_raw(FILE *fp,
+ struct perf_read_values *values)
+{
+ int width, pidwidth, tidwidth, namewidth, rawwidth, countwidth;
+ int i, j;
+
+ tidwidth = 3; /* TID */
+ pidwidth = 3; /* PID */
+ namewidth = 4; /* "Name" */
+ rawwidth = 3; /* "Raw" */
+ countwidth = 5; /* "Count" */
+
+ for (i = 0; i < values->threads; i++) {
+ width = snprintf(NULL, 0, "%d", values->pid[i]);
+ if (width > pidwidth)
+ pidwidth = width;
+ width = snprintf(NULL, 0, "%d", values->tid[i]);
+ if (width > tidwidth)
+ tidwidth = width;
+ }
+ for (j = 0; j < values->counters; j++) {
+ width = strlen(values->countername[j]);
+ if (width > namewidth)
+ namewidth = width;
+ width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]);
+ if (width > rawwidth)
+ rawwidth = width;
+ }
+ for (i = 0; i < values->threads; i++) {
+ for (j = 0; j < values->counters; j++) {
+ width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+ if (width > countwidth)
+ countwidth = width;
+ }
+ }
+
+ fprintf(fp, "# %*s %*s %*s %*s %*s\n",
+ pidwidth, "PID", tidwidth, "TID",
+ namewidth, "Name", rawwidth, "Raw",
+ countwidth, "Count");
+ for (i = 0; i < values->threads; i++)
+ for (j = 0; j < values->counters; j++)
+ fprintf(fp, " %*d %*d %*s %*" PRIx64 " %*" PRIu64,
+ pidwidth, values->pid[i],
+ tidwidth, values->tid[i],
+ namewidth, values->countername[j],
+ rawwidth, values->counterrawid[j],
+ countwidth, values->value[i][j]);
+}
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw)
+{
+ if (raw)
+ perf_read_values__display_raw(fp, values);
+ else
+ perf_read_values__display_pretty(fp, values);
+}
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
new file mode 100644
index 000000000..8c41f22f4
--- /dev/null
+++ b/tools/perf/util/values.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VALUES_H
+#define __PERF_VALUES_H
+
+#include <linux/types.h>
+
+struct perf_read_values {
+ int threads;
+ int threads_max;
+ u32 *pid, *tid;
+ int counters;
+ int counters_max;
+ u64 *counterrawid;
+ char **countername;
+ u64 **value;
+};
+
+int perf_read_values_init(struct perf_read_values *values);
+void perf_read_values_destroy(struct perf_read_values *values);
+
+int perf_read_values_add_value(struct perf_read_values *values,
+ u32 pid, u32 tid,
+ u64 rawid, const char *name, u64 value);
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values,
+ int raw);
+
+#endif /* __PERF_VALUES_H */
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
new file mode 100644
index 000000000..ae3eee69b
--- /dev/null
+++ b/tools/perf/util/vdso.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+
+#include "vdso.h"
+#include "dso.h"
+#include <internal/lib.h>
+#include "map.h"
+#include "symbol.h"
+#include "machine.h"
+#include "thread.h"
+#include "linux/string.h"
+#include <linux/zalloc.h>
+#include "debug.h"
+
+/*
+ * Include definition of find_map() also used in perf-read-vdso.c for
+ * building perf-read-vdso32 and perf-read-vdsox32.
+ */
+#include "find-map.c"
+
+#define VDSO__TEMP_FILE_NAME "/tmp/perf-vdso.so-XXXXXX"
+
+struct vdso_file {
+ bool found;
+ bool error;
+ char temp_file_name[sizeof(VDSO__TEMP_FILE_NAME)];
+ const char *dso_name;
+ const char *read_prog;
+};
+
+struct vdso_info {
+ struct vdso_file vdso;
+#if BITS_PER_LONG == 64
+ struct vdso_file vdso32;
+ struct vdso_file vdsox32;
+#endif
+};
+
+static struct vdso_info *vdso_info__new(void)
+{
+ static const struct vdso_info vdso_info_init = {
+ .vdso = {
+ .temp_file_name = VDSO__TEMP_FILE_NAME,
+ .dso_name = DSO__NAME_VDSO,
+ },
+#if BITS_PER_LONG == 64
+ .vdso32 = {
+ .temp_file_name = VDSO__TEMP_FILE_NAME,
+ .dso_name = DSO__NAME_VDSO32,
+ .read_prog = "perf-read-vdso32",
+ },
+ .vdsox32 = {
+ .temp_file_name = VDSO__TEMP_FILE_NAME,
+ .dso_name = DSO__NAME_VDSOX32,
+ .read_prog = "perf-read-vdsox32",
+ },
+#endif
+ };
+
+ return memdup(&vdso_info_init, sizeof(vdso_info_init));
+}
+
+static char *get_file(struct vdso_file *vdso_file)
+{
+ char *vdso = NULL;
+ char *buf = NULL;
+ void *start, *end;
+ size_t size;
+ int fd;
+
+ if (vdso_file->found)
+ return vdso_file->temp_file_name;
+
+ if (vdso_file->error || find_map(&start, &end, VDSO__MAP_NAME))
+ return NULL;
+
+ size = end - start;
+
+ buf = memdup(start, size);
+ if (!buf)
+ return NULL;
+
+ fd = mkstemp(vdso_file->temp_file_name);
+ if (fd < 0)
+ goto out;
+
+ if (size == (size_t) write(fd, buf, size))
+ vdso = vdso_file->temp_file_name;
+
+ close(fd);
+
+ out:
+ free(buf);
+
+ vdso_file->found = (vdso != NULL);
+ vdso_file->error = !vdso_file->found;
+ return vdso;
+}
+
+void machine__exit_vdso(struct machine *machine)
+{
+ struct vdso_info *vdso_info = machine->vdso_info;
+
+ if (!vdso_info)
+ return;
+
+ if (vdso_info->vdso.found)
+ unlink(vdso_info->vdso.temp_file_name);
+#if BITS_PER_LONG == 64
+ if (vdso_info->vdso32.found)
+ unlink(vdso_info->vdso32.temp_file_name);
+ if (vdso_info->vdsox32.found)
+ unlink(vdso_info->vdsox32.temp_file_name);
+#endif
+
+ zfree(&machine->vdso_info);
+}
+
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+ const char *long_name)
+{
+ struct dso *dso;
+
+ dso = dso__new(short_name);
+ if (dso != NULL) {
+ __dsos__add(&machine->dsos, dso);
+ dso__set_long_name(dso, long_name, false);
+ /* Put dso here because __dsos_add already got it */
+ dso__put(dso);
+ }
+
+ return dso;
+}
+
+static enum dso_type machine__thread_dso_type(struct machine *machine,
+ struct thread *thread)
+{
+ enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+ struct map_rb_node *rb_node;
+
+ maps__for_each_entry(thread__maps(thread), rb_node) {
+ struct dso *dso = map__dso(rb_node->map);
+
+ if (!dso || dso->long_name[0] != '/')
+ continue;
+ dso_type = dso__type(dso, machine);
+ if (dso_type != DSO__TYPE_UNKNOWN)
+ break;
+ }
+
+ return dso_type;
+}
+
+#if BITS_PER_LONG == 64
+
+static int vdso__do_copy_compat(FILE *f, int fd)
+{
+ char buf[4096];
+ size_t count;
+
+ while (1) {
+ count = fread(buf, 1, sizeof(buf), f);
+ if (ferror(f))
+ return -errno;
+ if (feof(f))
+ break;
+ if (count && writen(fd, buf, count) != (ssize_t)count)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int vdso__copy_compat(const char *prog, int fd)
+{
+ FILE *f;
+ int err;
+
+ f = popen(prog, "r");
+ if (!f)
+ return -errno;
+
+ err = vdso__do_copy_compat(f, fd);
+
+ if (pclose(f) == -1)
+ return -errno;
+
+ return err;
+}
+
+static int vdso__create_compat_file(const char *prog, char *temp_name)
+{
+ int fd, err;
+
+ fd = mkstemp(temp_name);
+ if (fd < 0)
+ return -errno;
+
+ err = vdso__copy_compat(prog, fd);
+
+ if (close(fd) == -1)
+ return -errno;
+
+ return err;
+}
+
+static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
+{
+ int err;
+
+ if (vdso_file->found)
+ return vdso_file->temp_file_name;
+
+ if (vdso_file->error)
+ return NULL;
+
+ err = vdso__create_compat_file(vdso_file->read_prog,
+ vdso_file->temp_file_name);
+ if (err) {
+ pr_err("%s failed, error %d\n", vdso_file->read_prog, err);
+ vdso_file->error = true;
+ return NULL;
+ }
+
+ vdso_file->found = true;
+
+ return vdso_file->temp_file_name;
+}
+
+static struct dso *__machine__findnew_compat(struct machine *machine,
+ struct vdso_file *vdso_file)
+{
+ const char *file_name;
+ struct dso *dso;
+
+ dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
+ if (dso)
+ goto out;
+
+ file_name = vdso__get_compat_file(vdso_file);
+ if (!file_name)
+ goto out;
+
+ dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out:
+ return dso;
+}
+
+static int __machine__findnew_vdso_compat(struct machine *machine,
+ struct thread *thread,
+ struct vdso_info *vdso_info,
+ struct dso **dso)
+{
+ enum dso_type dso_type;
+
+ dso_type = machine__thread_dso_type(machine, thread);
+
+#ifndef HAVE_PERF_READ_VDSO32
+ if (dso_type == DSO__TYPE_32BIT)
+ return 0;
+#endif
+#ifndef HAVE_PERF_READ_VDSOX32
+ if (dso_type == DSO__TYPE_X32BIT)
+ return 0;
+#endif
+
+ switch (dso_type) {
+ case DSO__TYPE_32BIT:
+ *dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
+ return 1;
+ case DSO__TYPE_X32BIT:
+ *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
+ return 1;
+ case DSO__TYPE_UNKNOWN:
+ case DSO__TYPE_64BIT:
+ default:
+ return 0;
+ }
+}
+
+#endif
+
+static struct dso *machine__find_vdso(struct machine *machine,
+ struct thread *thread)
+{
+ struct dso *dso = NULL;
+ enum dso_type dso_type;
+
+ dso_type = machine__thread_dso_type(machine, thread);
+ switch (dso_type) {
+ case DSO__TYPE_32BIT:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
+ if (!dso) {
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO,
+ true);
+ if (dso && dso_type != dso__type(dso, machine))
+ dso = NULL;
+ }
+ break;
+ case DSO__TYPE_X32BIT:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
+ break;
+ case DSO__TYPE_64BIT:
+ case DSO__TYPE_UNKNOWN:
+ default:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+ break;
+ }
+
+ return dso;
+}
+
+struct dso *machine__findnew_vdso(struct machine *machine,
+ struct thread *thread)
+{
+ struct vdso_info *vdso_info;
+ struct dso *dso = NULL;
+
+ down_write(&machine->dsos.lock);
+ if (!machine->vdso_info)
+ machine->vdso_info = vdso_info__new();
+
+ vdso_info = machine->vdso_info;
+ if (!vdso_info)
+ goto out_unlock;
+
+ dso = machine__find_vdso(machine, thread);
+ if (dso)
+ goto out_unlock;
+
+#if BITS_PER_LONG == 64
+ if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+ goto out_unlock;
+#endif
+
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+ if (!dso) {
+ char *file;
+
+ file = get_file(&vdso_info->vdso);
+ if (file)
+ dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
+ }
+
+out_unlock:
+ dso__get(dso);
+ up_write(&machine->dsos.lock);
+ return dso;
+}
+
+bool dso__is_vdso(struct dso *dso)
+{
+ return !strcmp(dso->short_name, DSO__NAME_VDSO) ||
+ !strcmp(dso->short_name, DSO__NAME_VDSO32) ||
+ !strcmp(dso->short_name, DSO__NAME_VDSOX32);
+}
diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h
new file mode 100644
index 000000000..bc74ace60
--- /dev/null
+++ b/tools/perf/util/vdso.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VDSO__
+#define __PERF_VDSO__
+
+#include <linux/types.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define VDSO__MAP_NAME "[vdso]"
+
+#define DSO__NAME_VDSO "[vdso]"
+#define DSO__NAME_VDSO32 "[vdso32]"
+#define DSO__NAME_VDSOX32 "[vdsox32]"
+
+static inline bool is_vdso_map(const char *filename)
+{
+ return !strcmp(filename, VDSO__MAP_NAME);
+}
+
+struct dso;
+
+bool dso__is_vdso(struct dso *dso);
+
+struct machine;
+struct thread;
+
+struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
+void machine__exit_vdso(struct machine *machine);
+
+#endif /* __PERF_VDSO__ */
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
new file mode 100644
index 000000000..78d2297c1
--- /dev/null
+++ b/tools/perf/util/zlib.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <zlib.h>
+#include <linux/compiler.h>
+#include <internal/lib.h>
+
+#include "util/compress.h"
+
+#define CHUNK_SIZE 16384
+
+int gzip_decompress_to_file(const char *input, int output_fd)
+{
+ int ret = Z_STREAM_ERROR;
+ int input_fd;
+ void *ptr;
+ int len;
+ struct stat stbuf;
+ unsigned char buf[CHUNK_SIZE];
+ z_stream zs = {
+ .zalloc = Z_NULL,
+ .zfree = Z_NULL,
+ .opaque = Z_NULL,
+ .avail_in = 0,
+ .next_in = Z_NULL,
+ };
+
+ input_fd = open(input, O_RDONLY);
+ if (input_fd < 0)
+ return -1;
+
+ if (fstat(input_fd, &stbuf) < 0)
+ goto out_close;
+
+ ptr = mmap(NULL, stbuf.st_size, PROT_READ, MAP_PRIVATE, input_fd, 0);
+ if (ptr == MAP_FAILED)
+ goto out_close;
+
+ if (inflateInit2(&zs, 16 + MAX_WBITS) != Z_OK)
+ goto out_unmap;
+
+ zs.next_in = ptr;
+ zs.avail_in = stbuf.st_size;
+
+ do {
+ zs.next_out = buf;
+ zs.avail_out = CHUNK_SIZE;
+
+ ret = inflate(&zs, Z_NO_FLUSH);
+ switch (ret) {
+ case Z_NEED_DICT:
+ ret = Z_DATA_ERROR;
+ /* fall through */
+ case Z_DATA_ERROR:
+ case Z_MEM_ERROR:
+ goto out;
+ default:
+ break;
+ }
+
+ len = CHUNK_SIZE - zs.avail_out;
+ if (writen(output_fd, buf, len) != len) {
+ ret = Z_DATA_ERROR;
+ goto out;
+ }
+
+ } while (ret != Z_STREAM_END);
+
+out:
+ inflateEnd(&zs);
+out_unmap:
+ munmap(ptr, stbuf.st_size);
+out_close:
+ close(input_fd);
+
+ return ret == Z_STREAM_END ? 0 : -1;
+}
+
+bool gzip_is_compressed(const char *input)
+{
+ int fd = open(input, O_RDONLY);
+ const uint8_t magic[2] = { 0x1f, 0x8b };
+ char buf[2] = { 0 };
+ ssize_t rc;
+
+ if (fd < 0)
+ return -1;
+
+ rc = read(fd, buf, sizeof(buf));
+ close(fd);
+ return rc == sizeof(buf) ?
+ memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
new file mode 100644
index 000000000..48dd2b018
--- /dev/null
+++ b/tools/perf/util/zstd.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include "util/compress.h"
+#include "util/debug.h"
+
+int zstd_init(struct zstd_data *data, int level)
+{
+ size_t ret;
+
+ data->dstream = ZSTD_createDStream();
+ if (data->dstream == NULL) {
+ pr_err("Couldn't create decompression stream.\n");
+ return -1;
+ }
+
+ ret = ZSTD_initDStream(data->dstream);
+ if (ZSTD_isError(ret)) {
+ pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret));
+ return -1;
+ }
+
+ if (!level)
+ return 0;
+
+ data->cstream = ZSTD_createCStream();
+ if (data->cstream == NULL) {
+ pr_err("Couldn't create compression stream.\n");
+ return -1;
+ }
+
+ ret = ZSTD_initCStream(data->cstream, level);
+ if (ZSTD_isError(ret)) {
+ pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
+ return -1;
+ }
+
+ return 0;
+}
+
+int zstd_fini(struct zstd_data *data)
+{
+ if (data->dstream) {
+ ZSTD_freeDStream(data->dstream);
+ data->dstream = NULL;
+ }
+
+ if (data->cstream) {
+ ZSTD_freeCStream(data->cstream);
+ data->cstream = NULL;
+ }
+
+ return 0;
+}
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
+ void *src, size_t src_size, size_t max_record_size,
+ size_t process_header(void *record, size_t increment))
+{
+ size_t ret, size, compressed = 0;
+ ZSTD_inBuffer input = { src, src_size, 0 };
+ ZSTD_outBuffer output;
+ void *record;
+
+ while (input.pos < input.size) {
+ record = dst;
+ size = process_header(record, 0);
+ compressed += size;
+ dst += size;
+ dst_size -= size;
+ output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
+ max_record_size : dst_size, 0 };
+ ret = ZSTD_compressStream(data->cstream, &output, &input);
+ ZSTD_flushStream(data->cstream, &output);
+ if (ZSTD_isError(ret)) {
+ pr_err("failed to compress %ld bytes: %s\n",
+ (long)src_size, ZSTD_getErrorName(ret));
+ memcpy(dst, src, src_size);
+ return src_size;
+ }
+ size = output.pos;
+ size = process_header(record, size);
+ compressed += size;
+ dst += size;
+ dst_size -= size;
+ }
+
+ return compressed;
+}
+
+size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
+ void *dst, size_t dst_size)
+{
+ size_t ret;
+ ZSTD_inBuffer input = { src, src_size, 0 };
+ ZSTD_outBuffer output = { dst, dst_size, 0 };
+
+ while (input.pos < input.size) {
+ ret = ZSTD_decompressStream(data->dstream, &output, &input);
+ if (ZSTD_isError(ret)) {
+ pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n",
+ src_size, output.size, dst_size, ZSTD_getErrorName(ret));
+ break;
+ }
+ output.dst = dst + output.pos;
+ output.size = dst_size - output.pos;
+ }
+
+ return output.pos;
+}