376 files changed, 143555 insertions, 0 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
new file mode 100644
index 000000000..2c364a908
--- /dev/null
+++ b/tools/perf/util/Build
@@ -0,0 +1,341 @@
+perf-y += arm64-frame-pointer-unwind-support.o
+perf-y += annotate.o
+perf-y += block-info.o
+perf-y += block-range.o
+perf-y += build-id.o
+perf-y += cacheline.o
+perf-y += config.o
+perf-y += copyfile.o
+perf-y += ctype.o
+perf-y += db-export.o
+perf-y += env.o
+perf-y += event.o
+perf-y += evlist.o
+perf-y += evlist-hybrid.o
+perf-y += sideband_evlist.o
+perf-y += evsel.o
+perf-y += evsel_fprintf.o
+perf-y += perf_event_attr_fprintf.o
+perf-y += evswitch.o
+perf-y += find_bit.o
+perf-y += get_current_dir_name.o
+perf-y += kallsyms.o
+perf-y += levenshtein.o
+perf-y += llvm-utils.o
+perf-y += mmap.o
+perf-y += memswap.o
+perf-y += parse-events.o
+perf-y += parse-events-hybrid.o
+perf-y += print-events.o
+perf-y += tracepoint.o
+perf-y += perf_regs.o
+perf-y += path.o
+perf-y += print_binary.o
+perf-y += rlimit.o
+perf-y += argv_split.o
+perf-y += rbtree.o
+perf-y += libstring.o
+perf-y += bitmap.o
+perf-y += hweight.o
+perf-y += smt.o
+perf-y += strbuf.o
+perf-y += string.o
+perf-y += strlist.o
+perf-y += strfilter.o
+perf-y += top.o
+perf-y += usage.o
+perf-y += dso.o
+perf-y += dsos.o
+perf-y += symbol.o
+perf-y += symbol_fprintf.o
+perf-y += color.o
+perf-y += color_config.o
+perf-y += metricgroup.o
+perf-y += header.o
+perf-y += callchain.o
+perf-y += values.o
+perf-y += debug.o
+perf-y += fncache.o
+perf-y += machine.o
+perf-y += map.o
+perf-y += maps.o
+perf-y += pstack.o
+perf-y += session.o
+perf-y += sample-raw.o
+perf-y += s390-sample-raw.o
+perf-y += amd-sample-raw.o
+perf-$(CONFIG_TRACE) += syscalltbl.o
+perf-y += ordered-events.o
+perf-y += namespaces.o
+perf-y += comm.o
+perf-y += thread.o
+perf-y += thread_map.o
+perf-y += trace-event-parse.o
+perf-y += parse-events-flex.o
+perf-y += parse-events-bison.o
+perf-y += pmu.o
+perf-y += pmu-flex.o
+perf-y += pmu-bison.o
+perf-y += pmu-hybrid.o
+perf-y += trace-event-read.o
+perf-y += trace-event-info.o
+perf-y += trace-event-scripting.o
+perf-y += trace-event.o
+perf-y += svghelper.o
+perf-y += sort.o
+perf-y += hist.o
+perf-y += util.o
+perf-y += cpumap.o
+perf-y += affinity.o
+perf-y += cputopo.o
+perf-y += cgroup.o
+perf-y += target.o
+perf-y += rblist.o
+perf-y += intlist.o
+perf-y += vdso.o
+perf-y += counts.o
+perf-y += stat.o
+perf-y += stat-shadow.o
+perf-y += stat-display.o
+perf-y += perf_api_probe.o
+perf-y += record.o
+perf-y += srcline.o
+perf-y += srccode.o
+perf-y += synthetic-events.o
+perf-y += data.o
+perf-y += tsc.o
+perf-y += cloexec.o
+perf-y += call-path.o
+perf-y += rwsem.o
+perf-y += thread-stack.o
+perf-y += spark.o
+perf-y += topdown.o
+perf-y += iostat.o
+perf-y += stream.o
+perf-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+perf-$(CONFIG_AUXTRACE) += intel-pt.o
+perf-$(CONFIG_AUXTRACE) += intel-bts.o
+perf-$(CONFIG_AUXTRACE) += arm-spe.o
+perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
+perf-$(CONFIG_AUXTRACE) += hisi-ptt.o
+perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/
+perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
+
+ifdef CONFIG_LIBOPENCSD
+perf-$(CONFIG_AUXTRACE) += cs-etm.o
+perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
+endif
+
+perf-y += parse-branch-options.o
+perf-y += dump-insn.o
+perf-y += parse-regs-options.o
+perf-y += parse-sublevel-options.o
+perf-y += term.o
+perf-y += help-unknown-cmd.o
+perf-y += dlfilter.o
+perf-y += mem-events.o
+perf-y += vsprintf.o
+perf-y += units.o
+perf-y += time-utils.o
+perf-y += expr-flex.o
+perf-y += expr-bison.o
+perf-y += expr.o
+perf-y += branch.o
+perf-y += mem2node.o
+perf-y += clockid.o
+perf-y += list_sort.o
+perf-y += mutex.o
+
+perf-$(CONFIG_LIBBPF) += bpf-loader.o
+perf-$(CONFIG_LIBBPF) += bpf_map.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
+perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
+perf-$(CONFIG_LIBELF) += symbol-elf.o
+perf-$(CONFIG_LIBELF) += probe-file.o
+perf-$(CONFIG_LIBELF) += probe-event.o
+
+ifdef CONFIG_LIBBPF_DYNAMIC
+  hashmap := 1
+endif
+ifndef CONFIG_LIBBPF
+  hashmap := 1
+endif
+
+ifdef hashmap
+perf-y += hashmap.o
+endif
+
+ifndef CONFIG_LIBELF
+perf-y += symbol-minimal.o
+endif
+
+ifndef CONFIG_SETNS
+perf-y += setns.o
+endif
+
+perf-$(CONFIG_DWARF) += probe-finder.o
+perf-$(CONFIG_DWARF) += dwarf-aux.o
+perf-$(CONFIG_DWARF) += dwarf-regs.o
+
+perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+perf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
+perf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+perf-$(CONFIG_LIBUNWIND_X86)      += libunwind/x86_32.o
+perf-$(CONFIG_LIBUNWIND_AARCH64)  += libunwind/arm64.o
+
+perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+perf-y += data-convert-json.o
+
+perf-y += scripting-engines/
+
+perf-$(CONFIG_ZLIB) += zlib.o
+perf-$(CONFIG_LZMA) += lzma.o
+perf-$(CONFIG_ZSTD) += zstd.o
+
+perf-$(CONFIG_LIBCAP) += cap.o
+
+perf-y += demangle-ocaml.o
+perf-y += demangle-java.o
+perf-y += demangle-rust.o
+
+ifdef CONFIG_JITDUMP
+perf-$(CONFIG_LIBELF) += jitdump.o
+perf-$(CONFIG_LIBELF) += genelf.o
+perf-$(CONFIG_DWARF) += genelf_debug.o
+endif
+
+perf-y += perf-hooks.o
+
+perf-$(CONFIG_LIBBPF) += bpf-event.o
+perf-$(CONFIG_LIBBPF) += bpf-utils.o
+
+perf-$(CONFIG_CXX) += c++/
+
+perf-$(CONFIG_LIBPFM4) += pfm.o
+
+CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
+
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o  += -Wno-packed
+
+$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \
+		--header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $<
+
+$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+		-o $(OUTPUT)util/parse-events-bison.c -p parse_events_
+
+$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \
+		--header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $<
+
+$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+		-o $(OUTPUT)util/expr-bison.c -p expr_
+
+$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \
+		--header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $<
+
+$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+		-o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e  's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
+ifeq ($(FLEX_GE_26),1)
+  flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations
+  CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?)
+  ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1)
+    flex_flags += -Wno-misleading-indentation
+  endif
+else
+  flex_flags := -w
+endif
+CFLAGS_parse-events-flex.o  += $(flex_flags)
+CFLAGS_pmu-flex.o           += $(flex_flags)
+CFLAGS_expr-flex.o          += $(flex_flags)
+
+bison_flags := -DYYENABLE_NLS=0
+BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
+ifeq ($(BISON_GE_35),1)
+  bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option
+else
+  bison_flags += -w
+endif
+
+BISON_LT_381 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 381)
+ifeq ($(BISON_LT_381),1)
+  bison_flags += -DYYNOMEM=YYABORT
+endif
+
+CFLAGS_parse-events-bison.o += $(bison_flags)
+CFLAGS_pmu-bison.o          += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+CFLAGS_expr-bison.o         += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c
+
+CFLAGS_bitmap.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_find_bit.o      += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_parse-events.o  += -Wno-redundant-decls
+CFLAGS_expr.o          += -Wno-redundant-decls
+CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
+CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/
+
+$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/ctype.o: ../lib/ctype.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
new file mode 100755
index 000000000..3cc42821d
--- /dev/null
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -0,0 +1,52 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $# -eq 1 ]  ; then
+	OUTPUT=$1
+fi
+
+GVF=${OUTPUT}PERF-VERSION-FILE
+
+LF='
+'
+
+#
+# Use version from kernel Makefile unless not in a git repository and
+# PERF-VERSION-FILE exists
+#
+CID=
+TAG=
+if test -d ../../.git -o -f ../../.git
+then
+	TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+	CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
+elif test -f ../../PERF-VERSION-FILE
+then
+	TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
+fi
+if test -z "$TAG"
+then
+	TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+fi
+
+VN="$TAG$CID"
+if test -n "$CID"
+then
+	# format version string, strip trailing zero of sublevel:
+	VN=$(echo "$VN" | sed -e 's/-/./g;s/\([0-9]*[.][0-9]*\)[.]0/\1/')
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+	VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF)
+else
+	VC=unset
+fi
+test "$VN" = "$VC" || {
+	echo >&2 "  PERF_VERSION = $VN"
+	echo "#define PERF_VERSION \"$VN\"" >$GVF
+}
+
+
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
new file mode 100644
index 000000000..4ee96b3c7
--- /dev/null
+++ b/tools/perf/util/affinity.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Manage affinity to optimize IPIs inside the kernel perf API. */
+#define _GNU_SOURCE 1
+#include <sched.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/zalloc.h>
+#include "perf.h"
+#include "cpumap.h"
+#include "affinity.h"
+
+static int get_cpu_set_size(void)
+{
+	int sz = cpu__max_cpu().cpu + 8 - 1;
+	/*
+	 * sched_getaffinity doesn't like masks smaller than the kernel.
+	 * Hopefully that's big enough.
+	 */
+	if (sz < 4096)
+		sz = 4096;
+	return sz / 8;
+}
+
+int affinity__setup(struct affinity *a)
+{
+	int cpu_set_size = get_cpu_set_size();
+
+	a->orig_cpus = bitmap_zalloc(cpu_set_size * 8);
+	if (!a->orig_cpus)
+		return -1;
+	sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
+	a->sched_cpus = bitmap_zalloc(cpu_set_size * 8);
+	if (!a->sched_cpus) {
+		zfree(&a->orig_cpus);
+		return -1;
+	}
+	bitmap_zero((unsigned long *)a->sched_cpus, cpu_set_size);
+	a->changed = false;
+	return 0;
+}
+
+/*
+ * perf_event_open does an IPI internally to the target CPU.
+ * It is more efficient to change perf's affinity to the target
+ * CPU and then set up all events on that CPU, so we amortize
+ * CPU communication.
+ */
+void affinity__set(struct affinity *a, int cpu)
+{
+	int cpu_set_size = get_cpu_set_size();
+
+	/*
+	 * Return:
+	 * - if cpu is -1
+	 * - restrict out of bound access to sched_cpus
+	 */
+	if (cpu == -1 || ((cpu >= (cpu_set_size * 8))))
+		return;
+
+	a->changed = true;
+	set_bit(cpu, a->sched_cpus);
+	/*
+	 * We ignore errors because affinity is just an optimization.
+	 * This could happen for example with isolated CPUs or cpusets.
+	 * In this case the IPIs inside the kernel's perf API still work.
+	 */
+	sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
+	clear_bit(cpu, a->sched_cpus);
+}
+
+static void __affinity__cleanup(struct affinity *a)
+{
+	int cpu_set_size = get_cpu_set_size();
+
+	if (a->changed)
+		sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
+	zfree(&a->sched_cpus);
+	zfree(&a->orig_cpus);
+}
+
+void affinity__cleanup(struct affinity *a)
+{
+	if (a != NULL)
+		__affinity__cleanup(a);
+}
diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h
new file mode 100644
index 000000000..0ad6a18ef
--- /dev/null
+++ b/tools/perf/util/affinity.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_AFFINITY_H
+#define PERF_AFFINITY_H 1
+
+#include <stdbool.h>
+
+struct affinity {
+	unsigned long *orig_cpus;
+	unsigned long *sched_cpus;
+	bool changed;
+};
+
+void affinity__cleanup(struct affinity *a);
+void affinity__set(struct affinity *a, int cpu);
+int affinity__setup(struct affinity *a);
+
+#endif // PERF_AFFINITY_H
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
new file mode 100644
index 000000000..238305868
--- /dev/null
+++ b/tools/perf/util/amd-sample-raw.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD specific. Provide textual annotation for IBS raw sample data.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <linux/string.h>
+#include "../../arch/x86/include/asm/amd-ibs.h"
+
+#include "debug.h"
+#include "session.h"
+#include "evlist.h"
+#include "sample-raw.h"
+#include "pmu-events/pmu-events.h"
+
+static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
+static bool zen4_ibs_extensions;
+
+static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
+{
+	const char * const ic_miss_strs[] = {
+		" IcMiss 0",
+		" IcMiss 1",
+	};
+	const char * const l1tlb_pgsz_strs[] = {
+		" L1TlbPgSz 4KB",
+		" L1TlbPgSz 2MB",
+		" L1TlbPgSz 1GB",
+		" L1TlbPgSz RESERVED"
+	};
+	const char * const l1tlb_pgsz_strs_erratum1347[] = {
+		" L1TlbPgSz 4KB",
+		" L1TlbPgSz 16KB",
+		" L1TlbPgSz 2MB",
+		" L1TlbPgSz 1GB"
+	};
+	const char *ic_miss_str = NULL;
+	const char *l1tlb_pgsz_str = NULL;
+	char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
+
+	if (cpu_family == 0x19 && cpu_model < 0x10) {
+		/*
+		 * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
+		 * Erratum #1347 workaround is to use table provided in erratum
+		 */
+		if (reg.phy_addr_valid)
+			l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
+	} else {
+		if (reg.phy_addr_valid)
+			l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
+		ic_miss_str = ic_miss_strs[reg.ic_miss];
+	}
+
+	if (zen4_ibs_extensions) {
+		snprintf(l3_miss_str, sizeof(l3_miss_str),
+			 " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
+			 reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
+	}
+
+	printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
+		"PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
+		reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
+		reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
+		reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
+		reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
+		l3_miss_str);
+}
+
+static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
+{
+	printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
+}
+
+static void pr_ibs_op_ctl(union ibs_op_ctl reg)
+{
+	char l3_miss_only[sizeof(" L3MissOnly _")] = "";
+
+	if (zen4_ibs_extensions)
+		snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
+
+	printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n",
+		reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
+		reg.op_en, reg.op_val, reg.cnt_ctl,
+		reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
+}
+
+static void pr_ibs_op_data(union ibs_op_data reg)
+{
+	printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
+		" RipInvalid %d BrnFuse %d Microcode %d\n",
+		reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
+		reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
+		reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
+		reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
+		reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
+}
+
+static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
+{
+	static const char * const data_src_str[] = {
+		"",
+		" DataSrc 1=Local L3 or other L1/L2 in CCX",
+		" DataSrc 2=A peer cache in a near CCX",
+		" DataSrc 3=Data returned from DRAM",
+		" DataSrc 4=(reserved)",
+		" DataSrc 5=A peer cache in a far CCX",
+		" DataSrc 6=DRAM address map with \"long latency\" bit set",
+		" DataSrc 7=Data returned from MMIO/Config/PCI/APIC",
+		" DataSrc 8=Extension Memory (S-Link, GenZ, etc)",
+		" DataSrc 9=(reserved)",
+		" DataSrc 10=(reserved)",
+		" DataSrc 11=(reserved)",
+		" DataSrc 12=Peer Agent Memory",
+		/* 13 to 31 are reserved. Avoid printing them. */
+	};
+	int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
+
+	printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
+		(data_src == 1 || data_src == 2 || data_src == 5) ?
+			(reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
+		reg.rmt_node,
+		data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
+}
+
+static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
+{
+	static const char * const data_src_str[] = {
+		"",
+		" DataSrc 1=(reserved)",
+		" DataSrc 2=Local node cache",
+		" DataSrc 3=DRAM",
+		" DataSrc 4=Remote node cache",
+		" DataSrc 5=(reserved)",
+		" DataSrc 6=(reserved)",
+		" DataSrc 7=Other"
+	};
+
+	printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
+	       reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
+						     : "CacheHitSt 0=M-state ") : "",
+	       reg.rmt_node, data_src_str[reg.data_src_lo]);
+}
+
+static void pr_ibs_op_data2(union ibs_op_data2 reg)
+{
+	if (zen4_ibs_extensions)
+		return pr_ibs_op_data2_extended(reg);
+	pr_ibs_op_data2_default(reg);
+}
+
+static void pr_ibs_op_data3(union ibs_op_data3 reg)
+{
+	char l2_miss_str[sizeof(" L2Miss _")] = "";
+	char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
+	char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
+
+	/*
+	 * Erratum #1293
+	 * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
+	 */
+	if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
+		snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
+		snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
+			 " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
+	}
+
+	if (reg.op_mem_width)
+		snprintf(op_mem_width_str, sizeof(op_mem_width_str),
+			 " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
+
+	printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
+		"DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
+		"DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
+		"DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
+		reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
+		reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
+		reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
+		reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
+		reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
+		op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
+}
+
+/*
+ * IBS Op/Execution MSRs always saved, in order, are:
+ * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
+ * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
+ */
+static void amd_dump_ibs_op(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
+	__u64 *rip = (__u64 *)op_ctl + 1;
+	union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
+	union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
+
+	pr_ibs_op_ctl(*op_ctl);
+	if (!op_data->op_rip_invalid)
+		printf("IbsOpRip:\t%016llx\n", *rip);
+	pr_ibs_op_data(*op_data);
+	/*
+	 * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
+	 */
+	if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
+	      (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
+		pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
+	pr_ibs_op_data3(*op_data3);
+	if (op_data3->dc_lin_addr_valid)
+		printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
+	if (op_data3->dc_phy_addr_valid)
+		printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
+	if (op_data->op_brn_ret && *(rip + 6))
+		printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
+}
+
+/*
+ * IBS Fetch MSRs always saved, in order, are:
+ * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
+ */
+static void amd_dump_ibs_fetch(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
+	__u64 *addr = (__u64 *)fetch_ctl + 1;
+	union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
+
+	pr_ibs_fetch_ctl(*fetch_ctl);
+	printf("IbsFetchLinAd:\t%016llx\n", *addr++);
+	if (fetch_ctl->phy_addr_valid)
+		printf("IbsFetchPhysAd:\t%016llx\n", *addr);
+	pr_ic_ibs_extd_ctl(*extd_ctl);
+}
+
+/*
+ * Test for enable and valid bits in captured control MSRs.
+ */
+static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
+
+	if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
+		return true;
+
+	return false;
+}
+
+static bool is_valid_ibs_op_sample(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
+
+	if (op_ctl->op_en && op_ctl->op_val)
+		return true;
+
+	return false;
+}
+
+/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
+ * and if the event was triggered by IBS, display its raw data with decoded text.
+ * The function is only invoked when the dump flag -D is set.
+ */
+void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
+			    struct perf_sample *sample)
+{
+	struct evsel *evsel;
+
+	if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
+		return;
+
+	evsel = evlist__event2evsel(evlist, event);
+	if (!evsel)
+		return;
+
+	if (evsel->core.attr.type == ibs_fetch_type) {
+		if (!is_valid_ibs_fetch_sample(sample)) {
+			pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
+			return;
+		}
+		amd_dump_ibs_fetch(sample);
+	} else if (evsel->core.attr.type == ibs_op_type) {
+		if (!is_valid_ibs_op_sample(sample)) {
+			pr_debug("Invalid raw IBS Op MSR data encountered\n");
+			return;
+		}
+		amd_dump_ibs_op(sample);
+	}
+}
+
+static void parse_cpuid(struct perf_env *env)
+{
+	const char *cpuid;
+	int ret;
+
+	cpuid = perf_env__cpuid(env);
+	/*
+	 * cpuid = "AuthenticAMD,family,model,stepping"
+	 */
+	ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
+	if (ret != 2)
+		pr_debug("problem parsing cpuid\n");
+}
+
+/*
+ * Find and assign the type number used for ibs_op or ibs_fetch samples.
+ * Device names can be large - we are only interested in the first 9 characters,
+ * to match "ibs_fetch".
+ */
+bool evlist__has_amd_ibs(struct evlist *evlist)
+{
+	struct perf_env *env = evlist->env;
+	int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
+	const char *pmu_mapping = perf_env__pmu_mappings(env);
+	char name[sizeof("ibs_fetch")];
+	u32 type;
+
+	while (nr_pmu_mappings--) {
+		ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
+		if (ret == 2) {
+			if (strstarts(name, "ibs_op"))
+				ibs_op_type = type;
+			else if (strstarts(name, "ibs_fetch"))
+				ibs_fetch_type = type;
+		}
+		pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
+	}
+
+	if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
+		zen4_ibs_extensions = 1;
+
+	if (ibs_fetch_type || ibs_op_type) {
+		if (!cpu_family)
+			parse_cpuid(env);
+		return true;
+	}
+
+	return false;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
new file mode 100644
index 000000000..a9122ea3b
--- /dev/null
+++ b/tools/perf/util/annotate.c
@@ -0,0 +1,3297 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-annotate.c, see those files for further
+ * copyright notes.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include "util.h" // hex_width()
+#include "ui/ui.h"
+#include "sort.h"
+#include "build-id.h"
+#include "color.h"
+#include "config.h"
+#include "dso.h"
+#include "env.h"
+#include "map.h"
+#include "maps.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "units.h"
+#include "debug.h"
+#include "annotate.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include "block-range.h"
+#include "string2.h"
+#include "util/event.h"
+#include "arch/common.h"
+#include "namespaces.h"
+#include <regex.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <subcmd/parse-options.h>
+#include <subcmd/run-command.h>
+
+/* FIXME: For the HE_COLORSET */
+#include "ui/browser.h"
+
+/*
+ * FIXME: Using the same values as slang.h,
+ * but that header may not be available everywhere
+ */
+#define LARROW_CHAR	((unsigned char)',')
+#define RARROW_CHAR	((unsigned char)'+')
+#define DARROW_CHAR	((unsigned char)'.')
+#define UARROW_CHAR	((unsigned char)'-')
+
+#include <linux/ctype.h>
+
+struct annotation_options annotation__default_options = {
+	.use_offset     = true,
+	.jump_arrows    = true,
+	.annotate_src	= true,
+	.offset_level	= ANNOTATION__OFFSET_JUMP_TARGETS,
+	.percent_type	= PERCENT_PERIOD_LOCAL,
+};
+
+static regex_t	 file_lineno;
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name);
+static void ins__sort(struct arch *arch);
+static int disasm_line__parse(char *line, const char **namep, char **rawp);
+
+struct arch {
+	const char	*name;
+	struct ins	*instructions;
+	size_t		nr_instructions;
+	size_t		nr_instructions_allocated;
+	struct ins_ops  *(*associate_instruction_ops)(struct arch *arch, const char *name);
+	bool		sorted_instructions;
+	bool		initialized;
+	void		*priv;
+	unsigned int	model;
+	unsigned int	family;
+	int		(*init)(struct arch *arch, char *cpuid);
+	bool		(*ins_is_fused)(struct arch *arch, const char *ins1,
+					const char *ins2);
+	struct		{
+		char comment_char;
+		char skip_functions_char;
+	} objdump;
+};
+
+static struct ins_ops call_ops;
+static struct ins_ops dec_ops;
+static struct ins_ops jump_ops;
+static struct ins_ops mov_ops;
+static struct ins_ops nop_ops;
+static struct ins_ops lock_ops;
+static struct ins_ops ret_ops;
+
+static int arch__grow_instructions(struct arch *arch)
+{
+	struct ins *new_instructions;
+	size_t new_nr_allocated;
+
+	if (arch->nr_instructions_allocated == 0 && arch->instructions)
+		goto grow_from_non_allocated_table;
+
+	new_nr_allocated = arch->nr_instructions_allocated + 128;
+	new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
+	if (new_instructions == NULL)
+		return -1;
+
+out_update_instructions:
+	arch->instructions = new_instructions;
+	arch->nr_instructions_allocated = new_nr_allocated;
+	return 0;
+
+grow_from_non_allocated_table:
+	new_nr_allocated = arch->nr_instructions + 128;
+	new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
+	if (new_instructions == NULL)
+		return -1;
+
+	memcpy(new_instructions, arch->instructions, arch->nr_instructions);
+	goto out_update_instructions;
+}
+
+static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
+{
+	struct ins *ins;
+
+	if (arch->nr_instructions == arch->nr_instructions_allocated &&
+	    arch__grow_instructions(arch))
+		return -1;
+
+	ins = &arch->instructions[arch->nr_instructions];
+	ins->name = strdup(name);
+	if (!ins->name)
+		return -1;
+
+	ins->ops  = ops;
+	arch->nr_instructions++;
+
+	ins__sort(arch);
+	return 0;
+}
+
+#include "arch/arc/annotate/instructions.c"
+#include "arch/arm/annotate/instructions.c"
+#include "arch/arm64/annotate/instructions.c"
+#include "arch/csky/annotate/instructions.c"
+#include "arch/mips/annotate/instructions.c"
+#include "arch/x86/annotate/instructions.c"
+#include "arch/powerpc/annotate/instructions.c"
+#include "arch/riscv64/annotate/instructions.c"
+#include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
+
+static struct arch architectures[] = {
+	{
+		.name = "arc",
+		.init = arc__annotate_init,
+	},
+	{
+		.name = "arm",
+		.init = arm__annotate_init,
+	},
+	{
+		.name = "arm64",
+		.init = arm64__annotate_init,
+	},
+	{
+		.name = "csky",
+		.init = csky__annotate_init,
+	},
+	{
+		.name = "mips",
+		.init = mips__annotate_init,
+		.objdump = {
+			.comment_char = '#',
+		},
+	},
+	{
+		.name = "x86",
+		.init = x86__annotate_init,
+		.instructions = x86__instructions,
+		.nr_instructions = ARRAY_SIZE(x86__instructions),
+		.objdump =  {
+			.comment_char = '#',
+		},
+	},
+	{
+		.name = "powerpc",
+		.init = powerpc__annotate_init,
+	},
+	{
+		.name = "riscv64",
+		.init = riscv64__annotate_init,
+	},
+	{
+		.name = "s390",
+		.init = s390__annotate_init,
+		.objdump =  {
+			.comment_char = '#',
+		},
+	},
+	{
+		.name = "sparc",
+		.init = sparc__annotate_init,
+		.objdump = {
+			.comment_char = '#',
+		},
+	},
+};
+
+static void ins__delete(struct ins_operands *ops)
+{
+	if (ops == NULL)
+		return;
+	zfree(&ops->source.raw);
+	zfree(&ops->source.name);
+	zfree(&ops->target.raw);
+	zfree(&ops->target.name);
+}
+
+static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
+			      struct ins_operands *ops, int max_ins_name)
+{
+	return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw);
+}
+
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+		   struct ins_operands *ops, int max_ins_name)
+{
+	if (ins->ops->scnprintf)
+		return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name);
+
+	return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+}
+
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+	if (!arch || !arch->ins_is_fused)
+		return false;
+
+	return arch->ins_is_fused(arch, ins1, ins2);
+}
+
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+	char *endptr, *tok, *name;
+	struct map *map = ms->map;
+	struct addr_map_symbol target = {
+		.ms = { .map = map, },
+	};
+
+	ops->target.addr = strtoull(ops->raw, &endptr, 16);
+
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		goto indirect_call;
+
+	name++;
+
+	if (arch->objdump.skip_functions_char &&
+	    strchr(name, arch->objdump.skip_functions_char))
+		return -1;
+
+	tok = strchr(name, '>');
+	if (tok == NULL)
+		return -1;
+
+	*tok = '\0';
+	ops->target.name = strdup(name);
+	*tok = '>';
+
+	if (ops->target.name == NULL)
+		return -1;
+find_target:
+	target.addr = map__objdump_2mem(map, ops->target.addr);
+
+	if (maps__find_ams(ms->maps, &target) == 0 &&
+	    map__rip_2objdump(target.ms.map, map->map_ip(target.ms.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.ms.sym;
+
+	return 0;
+
+indirect_call:
+	tok = strchr(endptr, '*');
+	if (tok != NULL) {
+		endptr++;
+
+		/* Indirect call can use a non-rip register and offset: callq  *0x8(%rbx).
+		 * Do not parse such instruction.  */
+		if (strstr(endptr, "(%r") == NULL)
+			ops->target.addr = strtoull(endptr, NULL, 16);
+	}
+	goto find_target;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops, int max_ins_name)
+{
+	if (ops->target.sym)
+		return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
+
+	if (ops->target.addr == 0)
+		return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+	if (ops->target.name)
+		return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name);
+
+	return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr);
+}
+
+static struct ins_ops call_ops = {
+	.parse	   = call__parse,
+	.scnprintf = call__scnprintf,
+};
+
+bool ins__is_call(const struct ins *ins)
+{
+	return ins->ops == &call_ops || ins->ops == &s390_call_ops;
+}
+
+/*
+ * Prevents from matching commas in the comment section, e.g.:
+ * ffff200008446e70:       b.cs    ffff2000084470f4 <generic_exec_single+0x314>  // b.hs, b.nlast
+ *
+ * and skip comma as part of function arguments, e.g.:
+ * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc>
+ */
+static inline const char *validate_comma(const char *c, struct ins_operands *ops)
+{
+	if (ops->raw_comment && c > ops->raw_comment)
+		return NULL;
+
+	if (ops->raw_func_start && c > ops->raw_func_start)
+		return NULL;
+
+	return c;
+}
+
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+	struct map *map = ms->map;
+	struct symbol *sym = ms->sym;
+	struct addr_map_symbol target = {
+		.ms = { .map = map, },
+	};
+	const char *c = strchr(ops->raw, ',');
+	u64 start, end;
+
+	ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+	ops->raw_func_start = strchr(ops->raw, '<');
+
+	c = validate_comma(c, ops);
+
+	/*
+	 * Examples of lines to parse for the _cpp_lex_token@@Base
+	 * function:
+	 *
+	 * 1159e6c: jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 * 1159e8b: jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 * The first is a jump to an offset inside the same function,
+	 * the second is to another function, i.e. that 0xa72 is an
+	 * offset in the cpp_named_operator2name@@base function.
+	 */
+	/*
+	 * skip over possible up to 2 operands to get to address, e.g.:
+	 * tbnz	 w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+	 */
+	if (c++ != NULL) {
+		ops->target.addr = strtoull(c, NULL, 16);
+		if (!ops->target.addr) {
+			c = strchr(c, ',');
+			c = validate_comma(c, ops);
+			if (c++ != NULL)
+				ops->target.addr = strtoull(c, NULL, 16);
+		}
+	} else {
+		ops->target.addr = strtoull(ops->raw, NULL, 16);
+	}
+
+	target.addr = map__objdump_2mem(map, ops->target.addr);
+	start = map->unmap_ip(map, sym->start),
+	end = map->unmap_ip(map, sym->end);
+
+	ops->target.outside = target.addr < start || target.addr > end;
+
+	/*
+	 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
+
+		cpp_named_operator2name@@Base+0xa72
+
+	 * Point to a place that is after the cpp_named_operator2name
+	 * boundaries, i.e.  in the ELF symbol table for cc1
+	 * cpp_named_operator2name is marked as being 32-bytes long, but it in
+	 * fact is much larger than that, so we seem to need a symbols__find()
+	 * routine that looks for >= current->start and  < next_symbol->start,
+	 * possibly just for C++ objects?
+	 *
+	 * For now lets just make some progress by marking jumps to outside the
+	 * current function as call like.
+	 *
+	 * Actual navigation will come next, with further understanding of how
+	 * the symbol searching and disassembly should be done.
+	 */
+	if (maps__find_ams(ms->maps, &target) == 0 &&
+	    map__rip_2objdump(target.ms.map, map->map_ip(target.ms.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.ms.sym;
+
+	if (!ops->target.outside) {
+		ops->target.offset = target.addr - start;
+		ops->target.offset_avail = true;
+	} else {
+		ops->target.offset_avail = false;
+	}
+
+	return 0;
+}
+
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops, int max_ins_name)
+{
+	const char *c;
+
+	if (!ops->target.addr || ops->target.offset < 0)
+		return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+	if (ops->target.outside && ops->target.sym != NULL)
+		return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
+
+	c = strchr(ops->raw, ',');
+	c = validate_comma(c, ops);
+
+	if (c != NULL) {
+		const char *c2 = strchr(c + 1, ',');
+
+		c2 = validate_comma(c2, ops);
+		/* check for 3-op insn */
+		if (c2 != NULL)
+			c = c2;
+		c++;
+
+		/* mirror arch objdump's space-after-comma style */
+		if (*c == ' ')
+			c++;
+	}
+
+	return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name,
+			 ins->name, c ? c - ops->raw : 0, ops->raw,
+			 ops->target.offset);
+}
+
+static struct ins_ops jump_ops = {
+	.parse	   = jump__parse,
+	.scnprintf = jump__scnprintf,
+};
+
+bool ins__is_jump(const struct ins *ins)
+{
+	return ins->ops == &jump_ops;
+}
+
+static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
+{
+	char *endptr, *name, *t;
+
+	if (strstr(raw, "(%rip)") == NULL)
+		return 0;
+
+	*addrp = strtoull(comment, &endptr, 16);
+	if (endptr == comment)
+		return 0;
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		return -1;
+
+	name++;
+
+	t = strchr(name, '>');
+	if (t == NULL)
+		return 0;
+
+	*t = '\0';
+	*namep = strdup(name);
+	*t = '>';
+
+	return 0;
+}
+
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+	ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
+	if (ops->locked.ops == NULL)
+		return 0;
+
+	if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
+		goto out_free_ops;
+
+	ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
+
+	if (ops->locked.ins.ops == NULL)
+		goto out_free_ops;
+
+	if (ops->locked.ins.ops->parse &&
+	    ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
+		goto out_free_ops;
+
+	return 0;
+
+out_free_ops:
+	zfree(&ops->locked.ops);
+	return 0;
+}
+
+static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops, int max_ins_name)
+{
+	int printed;
+
+	if (ops->locked.ins.ops == NULL)
+		return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+	printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name);
+	return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
+					size - printed, ops->locked.ops, max_ins_name);
+}
+
+static void lock__delete(struct ins_operands *ops)
+{
+	struct ins *ins = &ops->locked.ins;
+
+	if (ins->ops && ins->ops->free)
+		ins->ops->free(ops->locked.ops);
+	else
+		ins__delete(ops->locked.ops);
+
+	zfree(&ops->locked.ops);
+	zfree(&ops->target.raw);
+	zfree(&ops->target.name);
+}
+
+static struct ins_ops lock_ops = {
+	.free	   = lock__delete,
+	.parse	   = lock__parse,
+	.scnprintf = lock__scnprintf,
+};
+
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+	char *s = strchr(ops->raw, ','), *target, *comment, prev;
+
+	if (s == NULL)
+		return -1;
+
+	*s = '\0';
+	ops->source.raw = strdup(ops->raw);
+	*s = ',';
+
+	if (ops->source.raw == NULL)
+		return -1;
+
+	target = ++s;
+	comment = strchr(s, arch->objdump.comment_char);
+
+	if (comment != NULL)
+		s = comment - 1;
+	else
+		s = strchr(s, '\0') - 1;
+
+	while (s > target && isspace(s[0]))
+		--s;
+	s++;
+	prev = *s;
+	*s = '\0';
+
+	ops->target.raw = strdup(target);
+	*s = prev;
+
+	if (ops->target.raw == NULL)
+		goto out_free_source;
+
+	if (comment == NULL)
+		return 0;
+
+	comment = skip_spaces(comment);
+	comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
+	comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+	return 0;
+
+out_free_source:
+	zfree(&ops->source.raw);
+	return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops, int max_ins_name)
+{
+	return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name,
+			 ops->source.name ?: ops->source.raw,
+			 ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops mov_ops = {
+	.parse	   = mov__parse,
+	.scnprintf = mov__scnprintf,
+};
+
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+	char *target, *comment, *s, prev;
+
+	target = s = ops->raw;
+
+	while (s[0] != '\0' && !isspace(s[0]))
+		++s;
+	prev = *s;
+	*s = '\0';
+
+	ops->target.raw = strdup(target);
+	*s = prev;
+
+	if (ops->target.raw == NULL)
+		return -1;
+
+	comment = strchr(s, arch->objdump.comment_char);
+	if (comment == NULL)
+		return 0;
+
+	comment = skip_spaces(comment);
+	comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+	return 0;
+}
+
+static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops, int max_ins_name)
+{
+	return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
+			 ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops dec_ops = {
+	.parse	   = dec__parse,
+	.scnprintf = dec__scnprintf,
+};
+
+static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
+			  struct ins_operands *ops __maybe_unused, int max_ins_name)
+{
+	return scnprintf(bf, size, "%-*s", max_ins_name, "nop");
+}
+
+static struct ins_ops nop_ops = {
+	.scnprintf = nop__scnprintf,
+};
+
+static struct ins_ops ret_ops = {
+	.scnprintf = ins__raw_scnprintf,
+};
+
+bool ins__is_ret(const struct ins *ins)
+{
+	return ins->ops == &ret_ops;
+}
+
+bool ins__is_lock(const struct ins *ins)
+{
+	return ins->ops == &lock_ops;
+}
+
+static int ins__key_cmp(const void *name, const void *insp)
+{
+	const struct ins *ins = insp;
+
+	return strcmp(name, ins->name);
+}
+
+static int ins__cmp(const void *a, const void *b)
+{
+	const struct ins *ia = a;
+	const struct ins *ib = b;
+
+	return strcmp(ia->name, ib->name);
+}
+
+static void ins__sort(struct arch *arch)
+{
+	const int nmemb = arch->nr_instructions;
+
+	qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
+}
+
+static struct ins_ops *__ins__find(struct arch *arch, const char *name)
+{
+	struct ins *ins;
+	const int nmemb = arch->nr_instructions;
+
+	if (!arch->sorted_instructions) {
+		ins__sort(arch);
+		arch->sorted_instructions = true;
+	}
+
+	ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+	return ins ? ins->ops : NULL;
+}
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name)
+{
+	struct ins_ops *ops = __ins__find(arch, name);
+
+	if (!ops && arch->associate_instruction_ops)
+		ops = arch->associate_instruction_ops(arch, name);
+
+	return ops;
+}
+
+static int arch__key_cmp(const void *name, const void *archp)
+{
+	const struct arch *arch = archp;
+
+	return strcmp(name, arch->name);
+}
+
+static int arch__cmp(const void *a, const void *b)
+{
+	const struct arch *aa = a;
+	const struct arch *ab = b;
+
+	return strcmp(aa->name, ab->name);
+}
+
+static void arch__sort(void)
+{
+	const int nmemb = ARRAY_SIZE(architectures);
+
+	qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
+}
+
+static struct arch *arch__find(const char *name)
+{
+	const int nmemb = ARRAY_SIZE(architectures);
+	static bool sorted;
+
+	if (!sorted) {
+		arch__sort();
+		sorted = true;
+	}
+
+	return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
+}
+
+static struct annotated_source *annotated_source__new(void)
+{
+	struct annotated_source *src = zalloc(sizeof(*src));
+
+	if (src != NULL)
+		INIT_LIST_HEAD(&src->source);
+
+	return src;
+}
+
+static __maybe_unused void annotated_source__delete(struct annotated_source *src)
+{
+	if (src == NULL)
+		return;
+	zfree(&src->histograms);
+	zfree(&src->cycles_hist);
+	free(src);
+}
+
+static int annotated_source__alloc_histograms(struct annotated_source *src,
+					      size_t size, int nr_hists)
+{
+	size_t sizeof_sym_hist;
+
+	/*
+	 * Add buffer of one element for zero length symbol.
+	 * When sample is taken from first instruction of
+	 * zero length symbol, perf still resolves it and
+	 * shows symbol name in perf report and allows to
+	 * annotate it.
+	 */
+	if (size == 0)
+		size = 1;
+
+	/* Check for overflow when calculating sizeof_sym_hist */
+	if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry))
+		return -1;
+
+	sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
+
+	/* Check for overflow in zalloc argument */
+	if (sizeof_sym_hist > SIZE_MAX / nr_hists)
+		return -1;
+
+	src->sizeof_sym_hist = sizeof_sym_hist;
+	src->nr_histograms   = nr_hists;
+	src->histograms	     = calloc(nr_hists, sizeof_sym_hist) ;
+	return src->histograms ? 0 : -1;
+}
+
+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	const size_t size = symbol__size(sym);
+
+	notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+	if (notes->src->cycles_hist == NULL)
+		return -1;
+	return 0;
+}
+
+void symbol__annotate_zero_histograms(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	mutex_lock(&notes->lock);
+	if (notes->src != NULL) {
+		memset(notes->src->histograms, 0,
+		       notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+		if (notes->src->cycles_hist)
+			memset(notes->src->cycles_hist, 0,
+				symbol__size(sym) * sizeof(struct cyc_hist));
+	}
+	mutex_unlock(&notes->lock);
+}
+
+static int __symbol__account_cycles(struct cyc_hist *ch,
+				    u64 start,
+				    unsigned offset, unsigned cycles,
+				    unsigned have_start)
+{
+	/*
+	 * For now we can only account one basic block per
+	 * final jump. But multiple could be overlapping.
+	 * Always account the longest one. So when
+	 * a shorter one has been already seen throw it away.
+	 *
+	 * We separately always account the full cycles.
+	 */
+	ch[offset].num_aggr++;
+	ch[offset].cycles_aggr += cycles;
+
+	if (cycles > ch[offset].cycles_max)
+		ch[offset].cycles_max = cycles;
+
+	if (ch[offset].cycles_min) {
+		if (cycles && cycles < ch[offset].cycles_min)
+			ch[offset].cycles_min = cycles;
+	} else
+		ch[offset].cycles_min = cycles;
+
+	if (!have_start && ch[offset].have_start)
+		return 0;
+	if (ch[offset].num) {
+		if (have_start && (!ch[offset].have_start ||
+				   ch[offset].start > start)) {
+			ch[offset].have_start = 0;
+			ch[offset].cycles = 0;
+			ch[offset].num = 0;
+			if (ch[offset].reset < 0xffff)
+				ch[offset].reset++;
+		} else if (have_start &&
+			   ch[offset].start < start)
+			return 0;
+	}
+
+	if (ch[offset].num < NUM_SPARKS)
+		ch[offset].cycles_spark[ch[offset].num] = cycles;
+
+	ch[offset].have_start = have_start;
+	ch[offset].start = start;
+	ch[offset].cycles += cycles;
+	ch[offset].num++;
+	return 0;
+}
+
+static int __symbol__inc_addr_samples(struct map_symbol *ms,
+				      struct annotated_source *src, int evidx, u64 addr,
+				      struct perf_sample *sample)
+{
+	struct symbol *sym = ms->sym;
+	unsigned offset;
+	struct sym_hist *h;
+
+	pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, ms->map->unmap_ip(ms->map, addr));
+
+	if ((addr < sym->start || addr >= sym->end) &&
+	    (addr != sym->end || sym->start != sym->end)) {
+		pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n",
+		       __func__, __LINE__, sym->name, sym->start, addr, sym->end);
+		return -ERANGE;
+	}
+
+	offset = addr - sym->start;
+	h = annotated_source__histogram(src, evidx);
+	if (h == NULL) {
+		pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n",
+			 __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
+		return -ENOMEM;
+	}
+	h->nr_samples++;
+	h->addr[offset].nr_samples++;
+	h->period += sample->period;
+	h->addr[offset].period += sample->period;
+
+	pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
+		  ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
+		  sym->start, sym->name, addr, addr - sym->start, evidx,
+		  h->addr[offset].nr_samples, h->addr[offset].period);
+	return 0;
+}
+
+static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	if (notes->src == NULL) {
+		notes->src = annotated_source__new();
+		if (notes->src == NULL)
+			return NULL;
+		goto alloc_cycles_hist;
+	}
+
+	if (!notes->src->cycles_hist) {
+alloc_cycles_hist:
+		symbol__alloc_hist_cycles(sym);
+	}
+
+	return notes->src->cycles_hist;
+}
+
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	if (notes->src == NULL) {
+		notes->src = annotated_source__new();
+		if (notes->src == NULL)
+			return NULL;
+		goto alloc_histograms;
+	}
+
+	if (notes->src->histograms == NULL) {
+alloc_histograms:
+		annotated_source__alloc_histograms(notes->src, symbol__size(sym),
+						   nr_hists);
+	}
+
+	return notes->src;
+}
+
+static int symbol__inc_addr_samples(struct map_symbol *ms,
+				    struct evsel *evsel, u64 addr,
+				    struct perf_sample *sample)
+{
+	struct symbol *sym = ms->sym;
+	struct annotated_source *src;
+
+	if (sym == NULL)
+		return 0;
+	src = symbol__hists(sym, evsel->evlist->core.nr_entries);
+	return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0;
+}
+
+static int symbol__account_cycles(u64 addr, u64 start,
+				  struct symbol *sym, unsigned cycles)
+{
+	struct cyc_hist *cycles_hist;
+	unsigned offset;
+
+	if (sym == NULL)
+		return 0;
+	cycles_hist = symbol__cycles_hist(sym);
+	if (cycles_hist == NULL)
+		return -ENOMEM;
+	if (addr < sym->start || addr >= sym->end)
+		return -ERANGE;
+
+	if (start) {
+		if (start < sym->start || start >= sym->end)
+			return -ERANGE;
+		if (start >= addr)
+			start = 0;
+	}
+	offset = addr - sym->start;
+	return __symbol__account_cycles(cycles_hist,
+					start ? start - sym->start : 0,
+					offset, cycles,
+					!!start);
+}
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+				    struct addr_map_symbol *start,
+				    unsigned cycles)
+{
+	u64 saddr = 0;
+	int err;
+
+	if (!cycles)
+		return 0;
+
+	/*
+	 * Only set start when IPC can be computed. We can only
+	 * compute it when the basic block is completely in a single
+	 * function.
+	 * Special case the case when the jump is elsewhere, but
+	 * it starts on the function start.
+	 */
+	if (start &&
+		(start->ms.sym == ams->ms.sym ||
+		 (ams->ms.sym &&
+		   start->addr == ams->ms.sym->start + ams->ms.map->start)))
+		saddr = start->al_addr;
+	if (saddr == 0)
+		pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
+			ams->addr,
+			start ? start->addr : 0,
+			ams->ms.sym ? ams->ms.sym->start + ams->ms.map->start : 0,
+			saddr);
+	err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles);
+	if (err)
+		pr_debug2("account_cycles failed %d\n", err);
+	return err;
+}
+
+static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end)
+{
+	unsigned n_insn = 0;
+	u64 offset;
+
+	for (offset = start; offset <= end; offset++) {
+		if (notes->offsets[offset])
+			n_insn++;
+	}
+	return n_insn;
+}
+
+static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch)
+{
+	unsigned n_insn;
+	unsigned int cover_insn = 0;
+	u64 offset;
+
+	n_insn = annotation__count_insn(notes, start, end);
+	if (n_insn && ch->num && ch->cycles) {
+		float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+		/* Hide data when there are too many overlaps. */
+		if (ch->reset >= 0x7fff)
+			return;
+
+		for (offset = start; offset <= end; offset++) {
+			struct annotation_line *al = notes->offsets[offset];
+
+			if (al && al->ipc == 0.0) {
+				al->ipc = ipc;
+				cover_insn++;
+			}
+		}
+
+		if (cover_insn) {
+			notes->hit_cycles += ch->cycles;
+			notes->hit_insn += n_insn * ch->num;
+			notes->cover_insn += cover_insn;
+		}
+	}
+}
+
+void annotation__compute_ipc(struct annotation *notes, size_t size)
+{
+	s64 offset;
+
+	if (!notes->src || !notes->src->cycles_hist)
+		return;
+
+	notes->total_insn = annotation__count_insn(notes, 0, size - 1);
+	notes->hit_cycles = 0;
+	notes->hit_insn = 0;
+	notes->cover_insn = 0;
+
+	mutex_lock(&notes->lock);
+	for (offset = size - 1; offset >= 0; --offset) {
+		struct cyc_hist *ch;
+
+		ch = &notes->src->cycles_hist[offset];
+		if (ch && ch->cycles) {
+			struct annotation_line *al;
+
+			if (ch->have_start)
+				annotation__count_and_fill(notes, ch->start, offset, ch);
+			al = notes->offsets[offset];
+			if (al && ch->num_aggr) {
+				al->cycles = ch->cycles_aggr / ch->num_aggr;
+				al->cycles_max = ch->cycles_max;
+				al->cycles_min = ch->cycles_min;
+			}
+			notes->have_cycles = true;
+		}
+	}
+	mutex_unlock(&notes->lock);
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+				 struct evsel *evsel)
+{
+	return symbol__inc_addr_samples(&ams->ms, evsel, ams->al_addr, sample);
+}
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+				 struct evsel *evsel, u64 ip)
+{
+	return symbol__inc_addr_samples(&he->ms, evsel, ip, sample);
+}
+
+static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
+{
+	dl->ins.ops = ins__find(arch, dl->ins.name);
+
+	if (!dl->ins.ops)
+		return;
+
+	if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
+		dl->ins.ops = NULL;
+}
+
+static int disasm_line__parse(char *line, const char **namep, char **rawp)
+{
+	char tmp, *name = skip_spaces(line);
+
+	if (name[0] == '\0')
+		return -1;
+
+	*rawp = name + 1;
+
+	while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
+		++*rawp;
+
+	tmp = (*rawp)[0];
+	(*rawp)[0] = '\0';
+	*namep = strdup(name);
+
+	if (*namep == NULL)
+		goto out;
+
+	(*rawp)[0] = tmp;
+	*rawp = strim(*rawp);
+
+	return 0;
+
+out:
+	return -1;
+}
+
+struct annotate_args {
+	struct arch		  *arch;
+	struct map_symbol	  ms;
+	struct evsel		  *evsel;
+	struct annotation_options *options;
+	s64			  offset;
+	char			  *line;
+	int			  line_nr;
+	char			  *fileloc;
+};
+
+static void annotation_line__init(struct annotation_line *al,
+				  struct annotate_args *args,
+				  int nr)
+{
+	al->offset = args->offset;
+	al->line = strdup(args->line);
+	al->line_nr = args->line_nr;
+	al->fileloc = args->fileloc;
+	al->data_nr = nr;
+}
+
+static void annotation_line__exit(struct annotation_line *al)
+{
+	free_srcline(al->path);
+	zfree(&al->line);
+}
+
+static size_t disasm_line_size(int nr)
+{
+	struct annotation_line *al;
+
+	return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
+}
+
+/*
+ * Allocating the disasm annotation line data with
+ * following structure:
+ *
+ *    -------------------------------------------
+ *    struct disasm_line | struct annotation_line
+ *    -------------------------------------------
+ *
+ * We have 'struct annotation_line' member as last member
+ * of 'struct disasm_line' to have an easy access.
+ */
+static struct disasm_line *disasm_line__new(struct annotate_args *args)
+{
+	struct disasm_line *dl = NULL;
+	int nr = 1;
+
+	if (evsel__is_group_event(args->evsel))
+		nr = args->evsel->core.nr_members;
+
+	dl = zalloc(disasm_line_size(nr));
+	if (!dl)
+		return NULL;
+
+	annotation_line__init(&dl->al, args, nr);
+	if (dl->al.line == NULL)
+		goto out_delete;
+
+	if (args->offset != -1) {
+		if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+			goto out_free_line;
+
+		disasm_line__init_ins(dl, args->arch, &args->ms);
+	}
+
+	return dl;
+
+out_free_line:
+	zfree(&dl->al.line);
+out_delete:
+	free(dl);
+	return NULL;
+}
+
+void disasm_line__free(struct disasm_line *dl)
+{
+	if (dl->ins.ops && dl->ins.ops->free)
+		dl->ins.ops->free(&dl->ops);
+	else
+		ins__delete(&dl->ops);
+	zfree(&dl->ins.name);
+	annotation_line__exit(&dl->al);
+	free(dl);
+}
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
+{
+	if (raw || !dl->ins.ops)
+		return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw);
+
+	return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
+}
+
+void annotation__init(struct annotation *notes)
+{
+	mutex_init(&notes->lock);
+}
+
+void annotation__exit(struct annotation *notes)
+{
+	annotated_source__delete(notes->src);
+	mutex_destroy(&notes->lock);
+}
+
+static void annotation_line__add(struct annotation_line *al, struct list_head *head)
+{
+	list_add_tail(&al->node, head);
+}
+
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head)
+{
+	list_for_each_entry_continue(pos, head, node)
+		if (pos->offset >= 0)
+			return pos;
+
+	return NULL;
+}
+
+static const char *annotate__address_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark red for >75% coverage */
+		if (cov > 0.75)
+			return PERF_COLOR_RED;
+
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+	bool emit_comment = true;
+
+	if (!br)
+		return;
+
+#if 1
+	if (br->is_target && br->start == addr) {
+		struct block_range *branch = br;
+		double p;
+
+		/*
+		 * Find matching branch to our target.
+		 */
+		while (!branch->is_branch)
+			branch = block_range__next(branch);
+
+		p = 100 *(double)br->entry / branch->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage joined at this target in relation
+			 * to the next branch.
+			 */
+			printf(" +%.2f%%", p);
+		}
+	}
+#endif
+	if (br->is_branch && br->end == addr) {
+		double p = 100*(double)br->taken / br->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage leaving at this branch, and
+			 * its prediction ratio.
+			 */
+			printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred  / br->taken);
+		}
+	}
+}
+
+static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width)
+{
+	s64 offset = dl->al.offset;
+	const u64 addr = start + offset;
+	struct block_range *br;
+
+	br = block_range__find(addr);
+	color_fprintf(stdout, annotate__address_color(br), "  %*" PRIx64 ":", addr_fmt_width, addr);
+	color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line);
+	annotate__branch_printf(br, addr);
+	return 0;
+}
+
+static int
+annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
+		       struct evsel *evsel, u64 len, int min_pcnt, int printed,
+		       int max_lines, struct annotation_line *queue, int addr_fmt_width,
+		       int percent_type)
+{
+	struct disasm_line *dl = container_of(al, struct disasm_line, al);
+	static const char *prev_line;
+
+	if (al->offset != -1) {
+		double max_percent = 0.0;
+		int i, nr_percent = 1;
+		const char *color;
+		struct annotation *notes = symbol__annotation(sym);
+
+		for (i = 0; i < al->data_nr; i++) {
+			double percent;
+
+			percent = annotation_data__percent(&al->data[i],
+							   percent_type);
+
+			if (percent > max_percent)
+				max_percent = percent;
+		}
+
+		if (al->data_nr > nr_percent)
+			nr_percent = al->data_nr;
+
+		if (max_percent < min_pcnt)
+			return -1;
+
+		if (max_lines && printed >= max_lines)
+			return 1;
+
+		if (queue != NULL) {
+			list_for_each_entry_from(queue, &notes->src->source, node) {
+				if (queue == al)
+					break;
+				annotation_line__print(queue, sym, start, evsel, len,
+						       0, 0, 1, NULL, addr_fmt_width,
+						       percent_type);
+			}
+		}
+
+		color = get_percent_color(max_percent);
+
+		for (i = 0; i < nr_percent; i++) {
+			struct annotation_data *data = &al->data[i];
+			double percent;
+
+			percent = annotation_data__percent(data, percent_type);
+			color = get_percent_color(percent);
+
+			if (symbol_conf.show_total_period)
+				color_fprintf(stdout, color, " %11" PRIu64,
+					      data->he.period);
+			else if (symbol_conf.show_nr_samples)
+				color_fprintf(stdout, color, " %7" PRIu64,
+					      data->he.nr_samples);
+			else
+				color_fprintf(stdout, color, " %7.2f", percent);
+		}
+
+		printf(" : ");
+
+		disasm_line__print(dl, start, addr_fmt_width);
+
+		/*
+		 * Also color the filename and line if needed, with
+		 * the same color than the percentage. Don't print it
+		 * twice for close colored addr with the same filename:line
+		 */
+		if (al->path) {
+			if (!prev_line || strcmp(prev_line, al->path)) {
+				color_fprintf(stdout, color, " // %s", al->path);
+				prev_line = al->path;
+			}
+		}
+
+		printf("\n");
+	} else if (max_lines && printed >= max_lines)
+		return 1;
+	else {
+		int width = symbol_conf.show_total_period ? 12 : 8;
+
+		if (queue)
+			return -1;
+
+		if (evsel__is_group_event(evsel))
+			width *= evsel->core.nr_members;
+
+		if (!*al->line)
+			printf(" %*s:\n", width, " ");
+		else
+			printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line);
+	}
+
+	return 0;
+}
+
+/*
+ * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
+ * which looks like following
+ *
+ *  0000000000415500 <_init>:
+ *    415500:       sub    $0x8,%rsp
+ *    415504:       mov    0x2f5ad5(%rip),%rax        # 70afe0 <_DYNAMIC+0x2f8>
+ *    41550b:       test   %rax,%rax
+ *    41550e:       je     415515 <_init+0x15>
+ *    415510:       callq  416e70 <__gmon_start__@plt>
+ *    415515:       add    $0x8,%rsp
+ *    415519:       retq
+ *
+ * it will be parsed and saved into struct disasm_line as
+ *  <offset>       <name>  <ops.raw>
+ *
+ * The offset will be a relative offset from the start of the symbol and -1
+ * means that it's not a disassembly line so should be treated differently.
+ * The ops.raw part will be parsed further according to type of the instruction.
+ */
+static int symbol__parse_objdump_line(struct symbol *sym,
+				      struct annotate_args *args,
+				      char *parsed_line, int *line_nr, char **fileloc)
+{
+	struct map *map = args->ms.map;
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *dl;
+	char *tmp;
+	s64 line_ip, offset = -1;
+	regmatch_t match[2];
+
+	/* /filename:linenr ? Save line number and ignore. */
+	if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
+		*line_nr = atoi(parsed_line + match[1].rm_so);
+		*fileloc = strdup(parsed_line);
+		return 0;
+	}
+
+	/* Process hex address followed by ':'. */
+	line_ip = strtoull(parsed_line, &tmp, 16);
+	if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') {
+		u64 start = map__rip_2objdump(map, sym->start),
+		    end = map__rip_2objdump(map, sym->end);
+
+		offset = line_ip - start;
+		if ((u64)line_ip < start || (u64)line_ip >= end)
+			offset = -1;
+		else
+			parsed_line = tmp + 1;
+	}
+
+	args->offset  = offset;
+	args->line    = parsed_line;
+	args->line_nr = *line_nr;
+	args->fileloc = *fileloc;
+	args->ms.sym  = sym;
+
+	dl = disasm_line__new(args);
+	(*line_nr)++;
+
+	if (dl == NULL)
+		return -1;
+
+	if (!disasm_line__has_local_offset(dl)) {
+		dl->ops.target.offset = dl->ops.target.addr -
+					map__rip_2objdump(map, sym->start);
+		dl->ops.target.offset_avail = true;
+	}
+
+	/* kcore has no symbols, so add the call target symbol */
+	if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
+		struct addr_map_symbol target = {
+			.addr = dl->ops.target.addr,
+			.ms = { .map = map, },
+		};
+
+		if (!maps__find_ams(args->ms.maps, &target) &&
+		    target.ms.sym->start == target.al_addr)
+			dl->ops.target.sym = target.ms.sym;
+	}
+
+	annotation_line__add(&dl->al, &notes->src->source);
+
+	return 0;
+}
+
+static __attribute__((constructor)) void symbol__init_regexpr(void)
+{
+	regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
+}
+
+static void delete_last_nop(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct list_head *list = &notes->src->source;
+	struct disasm_line *dl;
+
+	while (!list_empty(list)) {
+		dl = list_entry(list->prev, struct disasm_line, al.node);
+
+		if (dl->ins.ops) {
+			if (dl->ins.ops != &nop_ops)
+				return;
+		} else {
+			if (!strstr(dl->al.line, " nop ") &&
+			    !strstr(dl->al.line, " nopl ") &&
+			    !strstr(dl->al.line, " nopw "))
+				return;
+		}
+
+		list_del_init(&dl->al.node);
+		disasm_line__free(dl);
+	}
+}
+
+int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen)
+{
+	struct dso *dso = ms->map->dso;
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		str_error_r(errnum, buf, buflen);
+		return 0;
+	}
+
+	switch (errnum) {
+	case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
+		char bf[SBUILD_ID_SIZE + 15] = " with build id ";
+		char *build_id_msg = NULL;
+
+		if (dso->has_build_id) {
+			build_id__sprintf(&dso->bid, bf + 15);
+			build_id_msg = bf;
+		}
+		scnprintf(buf, buflen,
+			  "No vmlinux file%s\nwas found in the path.\n\n"
+			  "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
+			  "Please use:\n\n"
+			  "  perf buildid-cache -vu vmlinux\n\n"
+			  "or:\n\n"
+			  "  --vmlinux vmlinux\n", build_id_msg ?: "");
+	}
+		break;
+	case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
+		scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
+		break;
+	case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
+		scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
+		break;
+	case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
+		scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
+		break;
+	case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
+		scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name);
+		break;
+	case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
+		scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
+			  dso->long_name);
+		break;
+	default:
+		scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
+		break;
+	}
+
+	return 0;
+}
+
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
+{
+	char linkname[PATH_MAX];
+	char *build_id_filename;
+	char *build_id_path = NULL;
+	char *pos;
+	int len;
+
+	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(dso))
+		return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+	build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
+	if (build_id_filename) {
+		__symbol__join_symfs(filename, filename_size, build_id_filename);
+		free(build_id_filename);
+	} else {
+		if (dso->has_build_id)
+			return ENOMEM;
+		goto fallback;
+	}
+
+	build_id_path = strdup(filename);
+	if (!build_id_path)
+		return ENOMEM;
+
+	/*
+	 * old style build-id cache has name of XX/XXXXXXX.. while
+	 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+	 * extract the build-id part of dirname in the new style only.
+	 */
+	pos = strrchr(build_id_path, '/');
+	if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+		dirname(build_id_path);
+
+	if (dso__is_kcore(dso))
+		goto fallback;
+
+	len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
+	if (len < 0)
+		goto fallback;
+
+	linkname[len] = '\0';
+	if (strstr(linkname, DSO__NAME_KALLSYMS) ||
+		access(filename, R_OK)) {
+fallback:
+		/*
+		 * If we don't have build-ids or the build-id file isn't in the
+		 * cache, or is just a kallsyms file, well, lets hope that this
+		 * DSO is the same as when 'perf record' ran.
+		 */
+		__symbol__join_symfs(filename, filename_size, dso->long_name);
+
+		mutex_lock(&dso->lock);
+		if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
+			char *new_name = filename_with_chroot(dso->nsinfo->pid,
+							      filename);
+			if (new_name) {
+				strlcpy(filename, new_name, filename_size);
+				free(new_name);
+			}
+		}
+		mutex_unlock(&dso->lock);
+	}
+
+	free(build_id_path);
+	return 0;
+}
+
+#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+#define PACKAGE "perf"
+#include <bfd.h>
+#include <dis-asm.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <tools/dis-asm-compat.h>
+
+static int symbol__disassemble_bpf(struct symbol *sym,
+				   struct annotate_args *args)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct annotation_options *opts = args->options;
+	struct bpf_prog_linfo *prog_linfo = NULL;
+	struct bpf_prog_info_node *info_node;
+	int len = sym->end - sym->start;
+	disassembler_ftype disassemble;
+	struct map *map = args->ms.map;
+	struct perf_bpil *info_linear;
+	struct disassemble_info info;
+	struct dso *dso = map->dso;
+	int pc = 0, count, sub_id;
+	struct btf *btf = NULL;
+	char tpath[PATH_MAX];
+	size_t buf_size;
+	int nr_skip = 0;
+	char *buf;
+	bfd *bfdf;
+	int ret;
+	FILE *s;
+
+	if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
+		return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
+
+	pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+		  sym->name, sym->start, sym->end - sym->start);
+
+	memset(tpath, 0, sizeof(tpath));
+	perf_exe(tpath, sizeof(tpath));
+
+	bfdf = bfd_openr(tpath, NULL);
+	if (bfdf == NULL)
+		abort();
+
+	if (!bfd_check_format(bfdf, bfd_object))
+		abort();
+
+	s = open_memstream(&buf, &buf_size);
+	if (!s) {
+		ret = errno;
+		goto out;
+	}
+	init_disassemble_info_compat(&info, s,
+				     (fprintf_ftype) fprintf,
+				     fprintf_styled);
+	info.arch = bfd_get_arch(bfdf);
+	info.mach = bfd_get_mach(bfdf);
+
+	info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
+						 dso->bpf_prog.id);
+	if (!info_node) {
+		ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
+		goto out;
+	}
+	info_linear = info_node->info_linear;
+	sub_id = dso->bpf_prog.sub_id;
+
+	info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
+	info.buffer_length = info_linear->info.jited_prog_len;
+
+	if (info_linear->info.nr_line_info)
+		prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+
+	if (info_linear->info.btf_id) {
+		struct btf_node *node;
+
+		node = perf_env__find_btf(dso->bpf_prog.env,
+					  info_linear->info.btf_id);
+		if (node)
+			btf = btf__new((__u8 *)(node->data),
+				       node->data_size);
+	}
+
+	disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+	disassemble = disassembler(info.arch,
+				   bfd_big_endian(bfdf),
+				   info.mach,
+				   bfdf);
+#else
+	disassemble = disassembler(bfdf);
+#endif
+	if (disassemble == NULL)
+		abort();
+
+	fflush(s);
+	do {
+		const struct bpf_line_info *linfo = NULL;
+		struct disasm_line *dl;
+		size_t prev_buf_size;
+		const char *srcline;
+		u64 addr;
+
+		addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
+		count = disassemble(pc, &info);
+
+		if (prog_linfo)
+			linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
+								addr, sub_id,
+								nr_skip);
+
+		if (linfo && btf) {
+			srcline = btf__name_by_offset(btf, linfo->line_off);
+			nr_skip++;
+		} else
+			srcline = NULL;
+
+		fprintf(s, "\n");
+		prev_buf_size = buf_size;
+		fflush(s);
+
+		if (!opts->hide_src_code && srcline) {
+			args->offset = -1;
+			args->line = strdup(srcline);
+			args->line_nr = 0;
+			args->fileloc = NULL;
+			args->ms.sym  = sym;
+			dl = disasm_line__new(args);
+			if (dl) {
+				annotation_line__add(&dl->al,
+						     &notes->src->source);
+			}
+		}
+
+		args->offset = pc;
+		args->line = buf + prev_buf_size;
+		args->line_nr = 0;
+		args->fileloc = NULL;
+		args->ms.sym  = sym;
+		dl = disasm_line__new(args);
+		if (dl)
+			annotation_line__add(&dl->al, &notes->src->source);
+
+		pc += count;
+	} while (count > 0 && pc < len);
+
+	ret = 0;
+out:
+	free(prog_linfo);
+	btf__free(btf);
+	fclose(s);
+	bfd_close(bfdf);
+	return ret;
+}
+#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
+				   struct annotate_args *args __maybe_unused)
+{
+	return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
+}
+#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+
+static int
+symbol__disassemble_bpf_image(struct symbol *sym,
+			      struct annotate_args *args)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *dl;
+
+	args->offset = -1;
+	args->line = strdup("to be implemented");
+	args->line_nr = 0;
+	args->fileloc = NULL;
+	dl = disasm_line__new(args);
+	if (dl)
+		annotation_line__add(&dl->al, &notes->src->source);
+
+	free(args->line);
+	return 0;
+}
+
+/*
+ * Possibly create a new version of line with tabs expanded. Returns the
+ * existing or new line, storage is updated if a new line is allocated. If
+ * allocation fails then NULL is returned.
+ */
+static char *expand_tabs(char *line, char **storage, size_t *storage_len)
+{
+	size_t i, src, dst, len, new_storage_len, num_tabs;
+	char *new_line;
+	size_t line_len = strlen(line);
+
+	for (num_tabs = 0, i = 0; i < line_len; i++)
+		if (line[i] == '\t')
+			num_tabs++;
+
+	if (num_tabs == 0)
+		return line;
+
+	/*
+	 * Space for the line and '\0', less the leading and trailing
+	 * spaces. Each tab may introduce 7 additional spaces.
+	 */
+	new_storage_len = line_len + 1 + (num_tabs * 7);
+
+	new_line = malloc(new_storage_len);
+	if (new_line == NULL) {
+		pr_err("Failure allocating memory for tab expansion\n");
+		return NULL;
+	}
+
+	/*
+	 * Copy regions starting at src and expand tabs. If there are two
+	 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces
+	 * are inserted.
+	 */
+	for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) {
+		if (line[i] == '\t') {
+			len = i - src;
+			memcpy(&new_line[dst], &line[src], len);
+			dst += len;
+			new_line[dst++] = ' ';
+			while (dst % 8 != 0)
+				new_line[dst++] = ' ';
+			src = i + 1;
+			num_tabs--;
+		}
+	}
+
+	/* Expand the last region. */
+	len = line_len - src;
+	memcpy(&new_line[dst], &line[src], len);
+	dst += len;
+	new_line[dst] = '\0';
+
+	free(*storage);
+	*storage = new_line;
+	*storage_len = new_storage_len;
+	return new_line;
+
+}
+
+static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+{
+	struct annotation_options *opts = args->options;
+	struct map *map = args->ms.map;
+	struct dso *dso = map->dso;
+	char *command;
+	FILE *file;
+	char symfs_filename[PATH_MAX];
+	struct kcore_extract kce;
+	bool delete_extract = false;
+	bool decomp = false;
+	int lineno = 0;
+	char *fileloc = NULL;
+	int nline;
+	char *line;
+	size_t line_len;
+	const char *objdump_argv[] = {
+		"/bin/sh",
+		"-c",
+		NULL, /* Will be the objdump command to run. */
+		"--",
+		NULL, /* Will be the symfs path. */
+		NULL,
+	};
+	struct child_process objdump_process;
+	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+	if (err)
+		return err;
+
+	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+		 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
+		 map->unmap_ip(map, sym->end));
+
+	pr_debug("annotating [%p] %30s : [%p] %30s\n",
+		 dso, dso->long_name, sym, sym->name);
+
+	if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+		return symbol__disassemble_bpf(sym, args);
+	} else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
+		return symbol__disassemble_bpf_image(sym, args);
+	} else if (dso__is_kcore(dso)) {
+		kce.kcore_filename = symfs_filename;
+		kce.addr = map__rip_2objdump(map, sym->start);
+		kce.offs = sym->start;
+		kce.len = sym->end - sym->start;
+		if (!kcore_extract__create(&kce)) {
+			delete_extract = true;
+			strlcpy(symfs_filename, kce.extract_filename,
+				sizeof(symfs_filename));
+		}
+	} else if (dso__needs_decompress(dso)) {
+		char tmp[KMOD_DECOMP_LEN];
+
+		if (dso__decompress_kmodule_path(dso, symfs_filename,
+						 tmp, sizeof(tmp)) < 0)
+			return -1;
+
+		decomp = true;
+		strcpy(symfs_filename, tmp);
+	}
+
+	err = asprintf(&command,
+		 "%s %s%s --start-address=0x%016" PRIx64
+		 " --stop-address=0x%016" PRIx64
+		 " -l -d %s %s %s %c%s%c %s%s -C \"$1\"",
+		 opts->objdump_path ?: "objdump",
+		 opts->disassembler_style ? "-M " : "",
+		 opts->disassembler_style ?: "",
+		 map__rip_2objdump(map, sym->start),
+		 map__rip_2objdump(map, sym->end),
+		 opts->show_asm_raw ? "" : "--no-show-raw-insn",
+		 opts->annotate_src ? "-S" : "",
+		 opts->prefix ? "--prefix " : "",
+		 opts->prefix ? '"' : ' ',
+		 opts->prefix ?: "",
+		 opts->prefix ? '"' : ' ',
+		 opts->prefix_strip ? "--prefix-strip=" : "",
+		 opts->prefix_strip ?: "");
+
+	if (err < 0) {
+		pr_err("Failure allocating memory for the command to run\n");
+		goto out_remove_tmp;
+	}
+
+	pr_debug("Executing: %s\n", command);
+
+	objdump_argv[2] = command;
+	objdump_argv[4] = symfs_filename;
+
+	/* Create a pipe to read from for stdout */
+	memset(&objdump_process, 0, sizeof(objdump_process));
+	objdump_process.argv = objdump_argv;
+	objdump_process.out = -1;
+	objdump_process.err = -1;
+	objdump_process.no_stderr = 1;
+	if (start_command(&objdump_process)) {
+		pr_err("Failure starting to run %s\n", command);
+		err = -1;
+		goto out_free_command;
+	}
+
+	file = fdopen(objdump_process.out, "r");
+	if (!file) {
+		pr_err("Failure creating FILE stream for %s\n", command);
+		/*
+		 * If we were using debug info should retry with
+		 * original binary.
+		 */
+		err = -1;
+		goto out_close_stdout;
+	}
+
+	/* Storage for getline. */
+	line = NULL;
+	line_len = 0;
+
+	nline = 0;
+	while (!feof(file)) {
+		const char *match;
+		char *expanded_line;
+
+		if (getline(&line, &line_len, file) < 0 || !line)
+			break;
+
+		/* Skip lines containing "filename:" */
+		match = strstr(line, symfs_filename);
+		if (match && match[strlen(symfs_filename)] == ':')
+			continue;
+
+		expanded_line = strim(line);
+		expanded_line = expand_tabs(expanded_line, &line, &line_len);
+		if (!expanded_line)
+			break;
+
+		/*
+		 * The source code line number (lineno) needs to be kept in
+		 * across calls to symbol__parse_objdump_line(), so that it
+		 * can associate it with the instructions till the next one.
+		 * See disasm_line__new() and struct disasm_line::line_nr.
+		 */
+		if (symbol__parse_objdump_line(sym, args, expanded_line,
+					       &lineno, &fileloc) < 0)
+			break;
+		nline++;
+	}
+	free(line);
+
+	err = finish_command(&objdump_process);
+	if (err)
+		pr_err("Error running %s\n", command);
+
+	if (nline == 0) {
+		err = -1;
+		pr_err("No output from %s\n", command);
+	}
+
+	/*
+	 * kallsyms does not have symbol sizes so there may a nop at the end.
+	 * Remove it.
+	 */
+	if (dso__is_kcore(dso))
+		delete_last_nop(sym);
+
+	fclose(file);
+
+out_close_stdout:
+	close(objdump_process.out);
+
+out_free_command:
+	free(command);
+
+out_remove_tmp:
+	if (decomp)
+		unlink(symfs_filename);
+
+	if (delete_extract)
+		kcore_extract__delete(&kce);
+
+	return err;
+}
+
+static void calc_percent(struct sym_hist *sym_hist,
+			 struct hists *hists,
+			 struct annotation_data *data,
+			 s64 offset, s64 end)
+{
+	unsigned int hits = 0;
+	u64 period = 0;
+
+	while (offset < end) {
+		hits   += sym_hist->addr[offset].nr_samples;
+		period += sym_hist->addr[offset].period;
+		++offset;
+	}
+
+	if (sym_hist->nr_samples) {
+		data->he.period     = period;
+		data->he.nr_samples = hits;
+		data->percent[PERCENT_HITS_LOCAL] = 100.0 * hits / sym_hist->nr_samples;
+	}
+
+	if (hists->stats.nr_non_filtered_samples)
+		data->percent[PERCENT_HITS_GLOBAL] = 100.0 * hits / hists->stats.nr_non_filtered_samples;
+
+	if (sym_hist->period)
+		data->percent[PERCENT_PERIOD_LOCAL] = 100.0 * period / sym_hist->period;
+
+	if (hists->stats.total_period)
+		data->percent[PERCENT_PERIOD_GLOBAL] = 100.0 * period / hists->stats.total_period;
+}
+
+static void annotation__calc_percent(struct annotation *notes,
+				     struct evsel *leader, s64 len)
+{
+	struct annotation_line *al, *next;
+	struct evsel *evsel;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		s64 end;
+		int i = 0;
+
+		if (al->offset == -1)
+			continue;
+
+		next = annotation_line__next(al, &notes->src->source);
+		end  = next ? next->offset : len;
+
+		for_each_group_evsel(evsel, leader) {
+			struct hists *hists = evsel__hists(evsel);
+			struct annotation_data *data;
+			struct sym_hist *sym_hist;
+
+			BUG_ON(i >= al->data_nr);
+
+			sym_hist = annotation__histogram(notes, evsel->core.idx);
+			data = &al->data[i++];
+
+			calc_percent(sym_hist, hists, data, al->offset, end);
+		}
+	}
+}
+
+void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	annotation__calc_percent(notes, evsel, symbol__size(sym));
+}
+
+int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
+		     struct annotation_options *options, struct arch **parch)
+{
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	struct annotate_args args = {
+		.evsel		= evsel,
+		.options	= options,
+	};
+	struct perf_env *env = evsel__env(evsel);
+	const char *arch_name = perf_env__arch(env);
+	struct arch *arch;
+	int err;
+
+	if (!arch_name)
+		return errno;
+
+	args.arch = arch = arch__find(arch_name);
+	if (arch == NULL) {
+		pr_err("%s: unsupported arch %s\n", __func__, arch_name);
+		return ENOTSUP;
+	}
+
+	if (parch)
+		*parch = arch;
+
+	if (arch->init) {
+		err = arch->init(arch, env ? env->cpuid : NULL);
+		if (err) {
+			pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
+			return err;
+		}
+	}
+
+	args.ms = *ms;
+	if (notes->options && notes->options->full_addr)
+		notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+	else
+		notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+
+	return symbol__disassemble(sym, &args);
+}
+
+static void insert_source_line(struct rb_root *root, struct annotation_line *al,
+			       struct annotation_options *opts)
+{
+	struct annotation_line *iter;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	int i, ret;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct annotation_line, rb_node);
+
+		ret = strcmp(iter->path, al->path);
+		if (ret == 0) {
+			for (i = 0; i < al->data_nr; i++) {
+				iter->data[i].percent_sum += annotation_data__percent(&al->data[i],
+										      opts->percent_type);
+			}
+			return;
+		}
+
+		if (ret < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	for (i = 0; i < al->data_nr; i++) {
+		al->data[i].percent_sum = annotation_data__percent(&al->data[i],
+								   opts->percent_type);
+	}
+
+	rb_link_node(&al->rb_node, parent, p);
+	rb_insert_color(&al->rb_node, root);
+}
+
+static int cmp_source_line(struct annotation_line *a, struct annotation_line *b)
+{
+	int i;
+
+	for (i = 0; i < a->data_nr; i++) {
+		if (a->data[i].percent_sum == b->data[i].percent_sum)
+			continue;
+		return a->data[i].percent_sum > b->data[i].percent_sum;
+	}
+
+	return 0;
+}
+
+static void __resort_source_line(struct rb_root *root, struct annotation_line *al)
+{
+	struct annotation_line *iter;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct annotation_line, rb_node);
+
+		if (cmp_source_line(al, iter))
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&al->rb_node, parent, p);
+	rb_insert_color(&al->rb_node, root);
+}
+
+static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root)
+{
+	struct annotation_line *al;
+	struct rb_node *node;
+
+	node = rb_first(src_root);
+	while (node) {
+		struct rb_node *next;
+
+		al = rb_entry(node, struct annotation_line, rb_node);
+		next = rb_next(node);
+		rb_erase(node, src_root);
+
+		__resort_source_line(dest_root, al);
+		node = next;
+	}
+}
+
+static void print_summary(struct rb_root *root, const char *filename)
+{
+	struct annotation_line *al;
+	struct rb_node *node;
+
+	printf("\nSorted summary for file %s\n", filename);
+	printf("----------------------------------------------\n\n");
+
+	if (RB_EMPTY_ROOT(root)) {
+		printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+		return;
+	}
+
+	node = rb_first(root);
+	while (node) {
+		double percent, percent_max = 0.0;
+		const char *color;
+		char *path;
+		int i;
+
+		al = rb_entry(node, struct annotation_line, rb_node);
+		for (i = 0; i < al->data_nr; i++) {
+			percent = al->data[i].percent_sum;
+			color = get_percent_color(percent);
+			color_fprintf(stdout, color, " %7.2f", percent);
+
+			if (percent > percent_max)
+				percent_max = percent;
+		}
+
+		path = al->path;
+		color = get_percent_color(percent_max);
+		color_fprintf(stdout, color, " %s\n", path);
+
+		node = rb_next(node);
+	}
+}
+
+static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
+	u64 len = symbol__size(sym), offset;
+
+	for (offset = 0; offset < len; ++offset)
+		if (h->addr[offset].nr_samples != 0)
+			printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
+			       sym->start + offset, h->addr[offset].nr_samples);
+	printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);
+}
+
+static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
+{
+	char bf[32];
+	struct annotation_line *line;
+
+	list_for_each_entry_reverse(line, lines, node) {
+		if (line->offset != -1)
+			return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset);
+	}
+
+	return 0;
+}
+
+int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
+			    struct annotation_options *opts)
+{
+	struct map *map = ms->map;
+	struct symbol *sym = ms->sym;
+	struct dso *dso = map->dso;
+	char *filename;
+	const char *d_filename;
+	const char *evsel_name = evsel__name(evsel);
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
+	struct annotation_line *pos, *queue = NULL;
+	u64 start = map__rip_2objdump(map, sym->start);
+	int printed = 2, queue_len = 0, addr_fmt_width;
+	int more = 0;
+	bool context = opts->context;
+	u64 len;
+	int width = symbol_conf.show_total_period ? 12 : 8;
+	int graph_dotted_len;
+	char buf[512];
+
+	filename = strdup(dso->long_name);
+	if (!filename)
+		return -ENOMEM;
+
+	if (opts->full_path)
+		d_filename = filename;
+	else
+		d_filename = basename(filename);
+
+	len = symbol__size(sym);
+
+	if (evsel__is_group_event(evsel)) {
+		width *= evsel->core.nr_members;
+		evsel__group_desc(evsel, buf, sizeof(buf));
+		evsel_name = buf;
+	}
+
+	graph_dotted_len = printf(" %-*.*s|	Source code & Disassembly of %s for %s (%" PRIu64 " samples, "
+				  "percent: %s)\n",
+				  width, width, symbol_conf.show_total_period ? "Period" :
+				  symbol_conf.show_nr_samples ? "Samples" : "Percent",
+				  d_filename, evsel_name, h->nr_samples,
+				  percent_type_str(opts->percent_type));
+
+	printf("%-*.*s----\n",
+	       graph_dotted_len, graph_dotted_len, graph_dotted_line);
+
+	if (verbose > 0)
+		symbol__annotate_hits(sym, evsel);
+
+	addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+
+	list_for_each_entry(pos, &notes->src->source, node) {
+		int err;
+
+		if (context && queue == NULL) {
+			queue = pos;
+			queue_len = 0;
+		}
+
+		err = annotation_line__print(pos, sym, start, evsel, len,
+					     opts->min_pcnt, printed, opts->max_lines,
+					     queue, addr_fmt_width, opts->percent_type);
+
+		switch (err) {
+		case 0:
+			++printed;
+			if (context) {
+				printed += queue_len;
+				queue = NULL;
+				queue_len = 0;
+			}
+			break;
+		case 1:
+			/* filtered by max_lines */
+			++more;
+			break;
+		case -1:
+		default:
+			/*
+			 * Filtered by min_pcnt or non IP lines when
+			 * context != 0
+			 */
+			if (!context)
+				break;
+			if (queue_len == context)
+				queue = list_entry(queue->node.next, typeof(*queue), node);
+			else
+				++queue_len;
+			break;
+		}
+	}
+
+	free(filename);
+
+	return more;
+}
+
+static void FILE__set_percent_color(void *fp __maybe_unused,
+				    double percent __maybe_unused,
+				    bool current __maybe_unused)
+{
+}
+
+static int FILE__set_jumps_percent_color(void *fp __maybe_unused,
+					 int nr __maybe_unused, bool current __maybe_unused)
+{
+	return 0;
+}
+
+static int FILE__set_color(void *fp __maybe_unused, int color __maybe_unused)
+{
+	return 0;
+}
+
+static void FILE__printf(void *fp, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vfprintf(fp, fmt, args);
+	va_end(args);
+}
+
+static void FILE__write_graph(void *fp, int graph)
+{
+	const char *s;
+	switch (graph) {
+
+	case DARROW_CHAR: s = "↓"; break;
+	case UARROW_CHAR: s = "↑"; break;
+	case LARROW_CHAR: s = "←"; break;
+	case RARROW_CHAR: s = "→"; break;
+	default:		s = "?"; break;
+	}
+
+	fputs(s, fp);
+}
+
+static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
+				     struct annotation_options *opts)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct annotation_write_ops wops = {
+		.first_line		 = true,
+		.obj			 = fp,
+		.set_color		 = FILE__set_color,
+		.set_percent_color	 = FILE__set_percent_color,
+		.set_jumps_percent_color = FILE__set_jumps_percent_color,
+		.printf			 = FILE__printf,
+		.write_graph		 = FILE__write_graph,
+	};
+	struct annotation_line *al;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		if (annotation_line__filter(al, notes))
+			continue;
+		annotation_line__write(al, notes, &wops, opts);
+		fputc('\n', fp);
+		wops.first_line = false;
+	}
+
+	return 0;
+}
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
+				struct annotation_options *opts)
+{
+	const char *ev_name = evsel__name(evsel);
+	char buf[1024];
+	char *filename;
+	int err = -1;
+	FILE *fp;
+
+	if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0)
+		return -1;
+
+	fp = fopen(filename, "w");
+	if (fp == NULL)
+		goto out_free_filename;
+
+	if (evsel__is_group_event(evsel)) {
+		evsel__group_desc(evsel, buf, sizeof(buf));
+		ev_name = buf;
+	}
+
+	fprintf(fp, "%s() %s\nEvent: %s\n\n",
+		ms->sym->name, ms->map->dso->long_name, ev_name);
+	symbol__annotate_fprintf2(ms->sym, fp, opts);
+
+	fclose(fp);
+	err = 0;
+out_free_filename:
+	free(filename);
+	return err;
+}
+
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evidx);
+
+	memset(h, 0, notes->src->sizeof_sym_hist);
+}
+
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evidx);
+	int len = symbol__size(sym), offset;
+
+	h->nr_samples = 0;
+	for (offset = 0; offset < len; ++offset) {
+		h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8;
+		h->nr_samples += h->addr[offset].nr_samples;
+	}
+}
+
+void annotated_source__purge(struct annotated_source *as)
+{
+	struct annotation_line *al, *n;
+
+	list_for_each_entry_safe(al, n, &as->source, node) {
+		list_del_init(&al->node);
+		disasm_line__free(disasm_line(al));
+	}
+}
+
+static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)
+{
+	size_t printed;
+
+	if (dl->al.offset == -1)
+		return fprintf(fp, "%s\n", dl->al.line);
+
+	printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name);
+
+	if (dl->ops.raw[0] != '\0') {
+		printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ",
+				   dl->ops.raw);
+	}
+
+	return printed + fprintf(fp, "\n");
+}
+
+size_t disasm__fprintf(struct list_head *head, FILE *fp)
+{
+	struct disasm_line *pos;
+	size_t printed = 0;
+
+	list_for_each_entry(pos, head, al.node)
+		printed += disasm_line__fprintf(pos, fp);
+
+	return printed;
+}
+
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym)
+{
+	if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) ||
+	    !disasm_line__has_local_offset(dl) || dl->ops.target.offset < 0 ||
+	    dl->ops.target.offset >= (s64)symbol__size(sym))
+		return false;
+
+	return true;
+}
+
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
+{
+	u64 offset, size = symbol__size(sym);
+
+	/* PLT symbols contain external offsets */
+	if (strstr(sym->name, "@plt"))
+		return;
+
+	for (offset = 0; offset < size; ++offset) {
+		struct annotation_line *al = notes->offsets[offset];
+		struct disasm_line *dl;
+
+		dl = disasm_line(al);
+
+		if (!disasm_line__is_valid_local_jump(dl, sym))
+			continue;
+
+		al = notes->offsets[dl->ops.target.offset];
+
+		/*
+		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
+		 * have to adjust to the previous offset?
+		 */
+		if (al == NULL)
+			continue;
+
+		if (++al->jump_sources > notes->max_jump_sources)
+			notes->max_jump_sources = al->jump_sources;
+	}
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size)
+{
+	struct annotation_line *al;
+
+	notes->max_line_len = 0;
+	notes->nr_entries = 0;
+	notes->nr_asm_entries = 0;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		size_t line_len = strlen(al->line);
+
+		if (notes->max_line_len < line_len)
+			notes->max_line_len = line_len;
+		al->idx = notes->nr_entries++;
+		if (al->offset != -1) {
+			al->idx_asm = notes->nr_asm_entries++;
+			/*
+			 * FIXME: short term bandaid to cope with assembly
+			 * routines that comes with labels in the same column
+			 * as the address in objdump, sigh.
+			 *
+			 * E.g. copy_user_generic_unrolled
+ 			 */
+			if (al->offset < size)
+				notes->offsets[al->offset] = al;
+		} else
+			al->idx_asm = -1;
+	}
+}
+
+static inline int width_jumps(int n)
+{
+	if (n >= 100)
+		return 5;
+	if (n / 10)
+		return 2;
+	return 1;
+}
+
+static int annotation__max_ins_name(struct annotation *notes)
+{
+	int max_name = 0, len;
+	struct annotation_line *al;
+
+        list_for_each_entry(al, &notes->src->source, node) {
+		if (al->offset == -1)
+			continue;
+
+		len = strlen(disasm_line(al)->ins.name);
+		if (max_name < len)
+			max_name = len;
+	}
+
+	return max_name;
+}
+
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym)
+{
+	notes->widths.addr = notes->widths.target =
+		notes->widths.min_addr = hex_width(symbol__size(sym));
+	notes->widths.max_addr = hex_width(sym->end);
+	notes->widths.jumps = width_jumps(notes->max_jump_sources);
+	notes->widths.max_ins_name = annotation__max_ins_name(notes);
+}
+
+void annotation__update_column_widths(struct annotation *notes)
+{
+	if (notes->options->use_offset)
+		notes->widths.target = notes->widths.min_addr;
+	else if (notes->options->full_addr)
+		notes->widths.target = BITS_PER_LONG / 4;
+	else
+		notes->widths.target = notes->widths.max_addr;
+
+	notes->widths.addr = notes->widths.target;
+
+	if (notes->options->show_nr_jumps)
+		notes->widths.addr += notes->widths.jumps + 1;
+}
+
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms)
+{
+	notes->options->full_addr = !notes->options->full_addr;
+
+	if (notes->options->full_addr)
+		notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+	else
+		notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+
+	annotation__update_column_widths(notes);
+}
+
+static void annotation__calc_lines(struct annotation *notes, struct map *map,
+				   struct rb_root *root,
+				   struct annotation_options *opts)
+{
+	struct annotation_line *al;
+	struct rb_root tmp_root = RB_ROOT;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		double percent_max = 0.0;
+		int i;
+
+		for (i = 0; i < al->data_nr; i++) {
+			double percent;
+
+			percent = annotation_data__percent(&al->data[i],
+							   opts->percent_type);
+
+			if (percent > percent_max)
+				percent_max = percent;
+		}
+
+		if (percent_max <= 0.5)
+			continue;
+
+		al->path = get_srcline(map->dso, notes->start + al->offset, NULL,
+				       false, true, notes->start + al->offset);
+		insert_source_line(&tmp_root, al, opts);
+	}
+
+	resort_source_line(root, &tmp_root);
+}
+
+static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root,
+			       struct annotation_options *opts)
+{
+	struct annotation *notes = symbol__annotation(ms->sym);
+
+	annotation__calc_lines(notes, ms->map, root, opts);
+}
+
+int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel,
+			  struct annotation_options *opts)
+{
+	struct dso *dso = ms->map->dso;
+	struct symbol *sym = ms->sym;
+	struct rb_root source_line = RB_ROOT;
+	struct hists *hists = evsel__hists(evsel);
+	char buf[1024];
+	int err;
+
+	err = symbol__annotate2(ms, evsel, opts, NULL);
+	if (err) {
+		char msg[BUFSIZ];
+
+		dso->annotate_warned = true;
+		symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+		return -1;
+	}
+
+	if (opts->print_lines) {
+		srcline_full_filename = opts->full_path;
+		symbol__calc_lines(ms, &source_line, opts);
+		print_summary(&source_line, dso->long_name);
+	}
+
+	hists__scnprintf_title(hists, buf, sizeof(buf));
+	fprintf(stdout, "%s, [percent: %s]\n%s() %s\n",
+		buf, percent_type_str(opts->percent_type), sym->name, dso->long_name);
+	symbol__annotate_fprintf2(sym, stdout, opts);
+
+	annotated_source__purge(symbol__annotation(sym)->src);
+
+	return 0;
+}
+
+int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
+			 struct annotation_options *opts)
+{
+	struct dso *dso = ms->map->dso;
+	struct symbol *sym = ms->sym;
+	struct rb_root source_line = RB_ROOT;
+	int err;
+
+	err = symbol__annotate(ms, evsel, opts, NULL);
+	if (err) {
+		char msg[BUFSIZ];
+
+		dso->annotate_warned = true;
+		symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+		return -1;
+	}
+
+	symbol__calc_percent(sym, evsel);
+
+	if (opts->print_lines) {
+		srcline_full_filename = opts->full_path;
+		symbol__calc_lines(ms, &source_line, opts);
+		print_summary(&source_line, dso->long_name);
+	}
+
+	symbol__annotate_printf(ms, evsel, opts);
+
+	annotated_source__purge(symbol__annotation(sym)->src);
+
+	return 0;
+}
+
+bool ui__has_annotation(void)
+{
+	return use_browser == 1 && perf_hpp_list.sym;
+}
+
+
+static double annotation_line__max_percent(struct annotation_line *al,
+					   struct annotation *notes,
+					   unsigned int percent_type)
+{
+	double percent_max = 0.0;
+	int i;
+
+	for (i = 0; i < notes->nr_events; i++) {
+		double percent;
+
+		percent = annotation_data__percent(&al->data[i],
+						   percent_type);
+
+		if (percent > percent_max)
+			percent_max = percent;
+	}
+
+	return percent_max;
+}
+
+static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
+			       void *obj, char *bf, size_t size,
+			       void (*obj__printf)(void *obj, const char *fmt, ...),
+			       void (*obj__write_graph)(void *obj, int graph))
+{
+	if (dl->ins.ops && dl->ins.ops->scnprintf) {
+		if (ins__is_jump(&dl->ins)) {
+			bool fwd;
+
+			if (dl->ops.target.outside)
+				goto call_like;
+			fwd = dl->ops.target.offset > dl->al.offset;
+			obj__write_graph(obj, fwd ? DARROW_CHAR : UARROW_CHAR);
+			obj__printf(obj, " ");
+		} else if (ins__is_call(&dl->ins)) {
+call_like:
+			obj__write_graph(obj, RARROW_CHAR);
+			obj__printf(obj, " ");
+		} else if (ins__is_ret(&dl->ins)) {
+			obj__write_graph(obj, LARROW_CHAR);
+			obj__printf(obj, " ");
+		} else {
+			obj__printf(obj, "  ");
+		}
+	} else {
+		obj__printf(obj, "  ");
+	}
+
+	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name);
+}
+
+static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
+{
+	double ipc = 0.0, coverage = 0.0;
+
+	if (notes->hit_cycles)
+		ipc = notes->hit_insn / ((double)notes->hit_cycles);
+
+	if (notes->total_insn) {
+		coverage = notes->cover_insn * 100.0 /
+			((double)notes->total_insn);
+	}
+
+	scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)",
+		  ipc, coverage);
+}
+
+static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
+				     bool first_line, bool current_entry, bool change_color, int width,
+				     void *obj, unsigned int percent_type,
+				     int  (*obj__set_color)(void *obj, int color),
+				     void (*obj__set_percent_color)(void *obj, double percent, bool current),
+				     int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
+				     void (*obj__printf)(void *obj, const char *fmt, ...),
+				     void (*obj__write_graph)(void *obj, int graph))
+
+{
+	double percent_max = annotation_line__max_percent(al, notes, percent_type);
+	int pcnt_width = annotation__pcnt_width(notes),
+	    cycles_width = annotation__cycles_width(notes);
+	bool show_title = false;
+	char bf[256];
+	int printed;
+
+	if (first_line && (al->offset == -1 || percent_max == 0.0)) {
+		if (notes->have_cycles) {
+			if (al->ipc == 0.0 && al->cycles == 0)
+				show_title = true;
+		} else
+			show_title = true;
+	}
+
+	if (al->offset != -1 && percent_max != 0.0) {
+		int i;
+
+		for (i = 0; i < notes->nr_events; i++) {
+			double percent;
+
+			percent = annotation_data__percent(&al->data[i], percent_type);
+
+			obj__set_percent_color(obj, percent, current_entry);
+			if (symbol_conf.show_total_period) {
+				obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
+			} else if (symbol_conf.show_nr_samples) {
+				obj__printf(obj, "%6" PRIu64 " ",
+						   al->data[i].he.nr_samples);
+			} else {
+				obj__printf(obj, "%6.2f ", percent);
+			}
+		}
+	} else {
+		obj__set_percent_color(obj, 0, current_entry);
+
+		if (!show_title)
+			obj__printf(obj, "%-*s", pcnt_width, " ");
+		else {
+			obj__printf(obj, "%-*s", pcnt_width,
+					   symbol_conf.show_total_period ? "Period" :
+					   symbol_conf.show_nr_samples ? "Samples" : "Percent");
+		}
+	}
+
+	if (notes->have_cycles) {
+		if (al->ipc)
+			obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
+		else if (!show_title)
+			obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " ");
+		else
+			obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
+
+		if (!notes->options->show_minmax_cycle) {
+			if (al->cycles)
+				obj__printf(obj, "%*" PRIu64 " ",
+					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
+			else if (!show_title)
+				obj__printf(obj, "%*s",
+					    ANNOTATION__CYCLES_WIDTH, " ");
+			else
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__CYCLES_WIDTH - 1,
+					    "Cycle");
+		} else {
+			if (al->cycles) {
+				char str[32];
+
+				scnprintf(str, sizeof(str),
+					"%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
+					al->cycles, al->cycles_min,
+					al->cycles_max);
+
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+					    str);
+			} else if (!show_title)
+				obj__printf(obj, "%*s",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH,
+					    " ");
+			else
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+					    "Cycle(min/max)");
+		}
+
+		if (show_title && !*al->line) {
+			ipc_coverage_string(bf, sizeof(bf), notes);
+			obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf);
+		}
+	}
+
+	obj__printf(obj, " ");
+
+	if (!*al->line)
+		obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
+	else if (al->offset == -1) {
+		if (al->line_nr && notes->options->show_linenr)
+			printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
+		else
+			printed = scnprintf(bf, sizeof(bf), "%-*s  ", notes->widths.addr, " ");
+		obj__printf(obj, bf);
+		obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line);
+	} else {
+		u64 addr = al->offset;
+		int color = -1;
+
+		if (!notes->options->use_offset)
+			addr += notes->start;
+
+		if (!notes->options->use_offset) {
+			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
+		} else {
+			if (al->jump_sources &&
+			    notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
+				if (notes->options->show_nr_jumps) {
+					int prev;
+					printed = scnprintf(bf, sizeof(bf), "%*d ",
+							    notes->widths.jumps,
+							    al->jump_sources);
+					prev = obj__set_jumps_percent_color(obj, al->jump_sources,
+									    current_entry);
+					obj__printf(obj, bf);
+					obj__set_color(obj, prev);
+				}
+print_addr:
+				printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
+						    notes->widths.target, addr);
+			} else if (ins__is_call(&disasm_line(al)->ins) &&
+				   notes->options->offset_level >= ANNOTATION__OFFSET_CALL) {
+				goto print_addr;
+			} else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
+				goto print_addr;
+			} else {
+				printed = scnprintf(bf, sizeof(bf), "%-*s  ",
+						    notes->widths.addr, " ");
+			}
+		}
+
+		if (change_color)
+			color = obj__set_color(obj, HE_COLORSET_ADDR);
+		obj__printf(obj, bf);
+		if (change_color)
+			obj__set_color(obj, color);
+
+		disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph);
+
+		obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf);
+	}
+
+}
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+			    struct annotation_write_ops *wops,
+			    struct annotation_options *opts)
+{
+	__annotation_line__write(al, notes, wops->first_line, wops->current_entry,
+				 wops->change_color, wops->width, wops->obj,
+				 opts->percent_type,
+				 wops->set_color, wops->set_percent_color,
+				 wops->set_jumps_percent_color, wops->printf,
+				 wops->write_graph);
+}
+
+int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
+		      struct annotation_options *options, struct arch **parch)
+{
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	size_t size = symbol__size(sym);
+	int nr_pcnt = 1, err;
+
+	notes->offsets = zalloc(size * sizeof(struct annotation_line *));
+	if (notes->offsets == NULL)
+		return ENOMEM;
+
+	if (evsel__is_group_event(evsel))
+		nr_pcnt = evsel->core.nr_members;
+
+	err = symbol__annotate(ms, evsel, options, parch);
+	if (err)
+		goto out_free_offsets;
+
+	notes->options = options;
+
+	symbol__calc_percent(sym, evsel);
+
+	annotation__set_offsets(notes, size);
+	annotation__mark_jump_targets(notes, sym);
+	annotation__compute_ipc(notes, size);
+	annotation__init_column_widths(notes, sym);
+	notes->nr_events = nr_pcnt;
+
+	annotation__update_column_widths(notes);
+	sym->annotate2 = 1;
+
+	return 0;
+
+out_free_offsets:
+	zfree(&notes->offsets);
+	return err;
+}
+
+static int annotation__config(const char *var, const char *value, void *data)
+{
+	struct annotation_options *opt = data;
+
+	if (!strstarts(var, "annotate."))
+		return 0;
+
+	if (!strcmp(var, "annotate.offset_level")) {
+		perf_config_u8(&opt->offset_level, "offset_level", value);
+
+		if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+			opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
+		else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
+			opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+	} else if (!strcmp(var, "annotate.hide_src_code")) {
+		opt->hide_src_code = perf_config_bool("hide_src_code", value);
+	} else if (!strcmp(var, "annotate.jump_arrows")) {
+		opt->jump_arrows = perf_config_bool("jump_arrows", value);
+	} else if (!strcmp(var, "annotate.show_linenr")) {
+		opt->show_linenr = perf_config_bool("show_linenr", value);
+	} else if (!strcmp(var, "annotate.show_nr_jumps")) {
+		opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value);
+	} else if (!strcmp(var, "annotate.show_nr_samples")) {
+		symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples",
+								value);
+	} else if (!strcmp(var, "annotate.show_total_period")) {
+		symbol_conf.show_total_period = perf_config_bool("show_total_period",
+								value);
+	} else if (!strcmp(var, "annotate.use_offset")) {
+		opt->use_offset = perf_config_bool("use_offset", value);
+	} else if (!strcmp(var, "annotate.disassembler_style")) {
+		opt->disassembler_style = value;
+	} else if (!strcmp(var, "annotate.demangle")) {
+		symbol_conf.demangle = perf_config_bool("demangle", value);
+	} else if (!strcmp(var, "annotate.demangle_kernel")) {
+		symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value);
+	} else {
+		pr_debug("%s variable unknown, ignoring...", var);
+	}
+
+	return 0;
+}
+
+void annotation_config__init(struct annotation_options *opt)
+{
+	perf_config(annotation__config, opt);
+}
+
+static unsigned int parse_percent_type(char *str1, char *str2)
+{
+	unsigned int type = (unsigned int) -1;
+
+	if (!strcmp("period", str1)) {
+		if (!strcmp("local", str2))
+			type = PERCENT_PERIOD_LOCAL;
+		else if (!strcmp("global", str2))
+			type = PERCENT_PERIOD_GLOBAL;
+	}
+
+	if (!strcmp("hits", str1)) {
+		if (!strcmp("local", str2))
+			type = PERCENT_HITS_LOCAL;
+		else if (!strcmp("global", str2))
+			type = PERCENT_HITS_GLOBAL;
+	}
+
+	return type;
+}
+
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+				int unset __maybe_unused)
+{
+	struct annotation_options *opts = opt->value;
+	unsigned int type;
+	char *str1, *str2;
+	int err = -1;
+
+	str1 = strdup(_str);
+	if (!str1)
+		return -ENOMEM;
+
+	str2 = strchr(str1, '-');
+	if (!str2)
+		goto out;
+
+	*str2++ = 0;
+
+	type = parse_percent_type(str1, str2);
+	if (type == (unsigned int) -1)
+		type = parse_percent_type(str2, str1);
+	if (type != (unsigned int) -1) {
+		opts->percent_type = type;
+		err = 0;
+	}
+
+out:
+	free(str1);
+	return err;
+}
+
+int annotate_check_args(struct annotation_options *args)
+{
+	if (args->prefix_strip && !args->prefix) {
+		pr_err("--prefix-strip requires --prefix\n");
+		return -1;
+	}
+	return 0;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
new file mode 100644
index 000000000..8934072c3
--- /dev/null
+++ b/tools/perf/util/annotate.h
@@ -0,0 +1,428 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ANNOTATE_H
+#define __PERF_ANNOTATE_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <asm/bug.h>
+#include "symbol_conf.h"
+#include "mutex.h"
+#include "spark.h"
+
+struct hist_browser_timer;
+struct hist_entry;
+struct ins_ops;
+struct map;
+struct map_symbol;
+struct addr_map_symbol;
+struct option;
+struct perf_sample;
+struct evsel;
+struct symbol;
+
+struct ins {
+	const char     *name;
+	struct ins_ops *ops;
+};
+
+struct ins_operands {
+	char	*raw;
+	char	*raw_comment;
+	char	*raw_func_start;
+	struct {
+		char	*raw;
+		char	*name;
+		struct symbol *sym;
+		u64	addr;
+		s64	offset;
+		bool	offset_avail;
+		bool	outside;
+	} target;
+	union {
+		struct {
+			char	*raw;
+			char	*name;
+			u64	addr;
+		} source;
+		struct {
+			struct ins	    ins;
+			struct ins_operands *ops;
+		} locked;
+	};
+};
+
+struct arch;
+
+struct ins_ops {
+	void (*free)(struct ins_operands *ops);
+	int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
+	int (*scnprintf)(struct ins *ins, char *bf, size_t size,
+			 struct ins_operands *ops, int max_ins_name);
+};
+
+bool ins__is_jump(const struct ins *ins);
+bool ins__is_call(const struct ins *ins);
+bool ins__is_ret(const struct ins *ins);
+bool ins__is_lock(const struct ins *ins);
+int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
+
+#define ANNOTATION__IPC_WIDTH 6
+#define ANNOTATION__CYCLES_WIDTH 6
+#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
+#define ANNOTATION__AVG_IPC_WIDTH 36
+#define ANNOTATION_DUMMY_LEN	256
+
+struct annotation_options {
+	bool hide_src_code,
+	     use_offset,
+	     jump_arrows,
+	     print_lines,
+	     full_path,
+	     show_linenr,
+	     show_fileloc,
+	     show_nr_jumps,
+	     show_minmax_cycle,
+	     show_asm_raw,
+	     annotate_src,
+	     full_addr;
+	u8   offset_level;
+	int  min_pcnt;
+	int  max_lines;
+	int  context;
+	const char *objdump_path;
+	const char *disassembler_style;
+	const char *prefix;
+	const char *prefix_strip;
+	unsigned int percent_type;
+};
+
+enum {
+	ANNOTATION__OFFSET_JUMP_TARGETS = 1,
+	ANNOTATION__OFFSET_CALL,
+	ANNOTATION__MAX_OFFSET_LEVEL,
+};
+
+#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS
+
+extern struct annotation_options annotation__default_options;
+
+struct annotation;
+
+struct sym_hist_entry {
+	u64		nr_samples;
+	u64		period;
+};
+
+enum {
+	PERCENT_HITS_LOCAL,
+	PERCENT_HITS_GLOBAL,
+	PERCENT_PERIOD_LOCAL,
+	PERCENT_PERIOD_GLOBAL,
+	PERCENT_MAX,
+};
+
+struct annotation_data {
+	double			 percent[PERCENT_MAX];
+	double			 percent_sum;
+	struct sym_hist_entry	 he;
+};
+
+struct annotation_line {
+	struct list_head	 node;
+	struct rb_node		 rb_node;
+	s64			 offset;
+	char			*line;
+	int			 line_nr;
+	char			*fileloc;
+	int			 jump_sources;
+	float			 ipc;
+	u64			 cycles;
+	u64			 cycles_max;
+	u64			 cycles_min;
+	char			*path;
+	u32			 idx;
+	int			 idx_asm;
+	int			 data_nr;
+	struct annotation_data	 data[];
+};
+
+struct disasm_line {
+	struct ins		 ins;
+	struct ins_operands	 ops;
+
+	/* This needs to be at the end. */
+	struct annotation_line	 al;
+};
+
+static inline double annotation_data__percent(struct annotation_data *data,
+					      unsigned int which)
+{
+	return which < PERCENT_MAX ? data->percent[which] : -1;
+}
+
+static inline const char *percent_type_str(unsigned int type)
+{
+	static const char *str[PERCENT_MAX] = {
+		"local hits",
+		"global hits",
+		"local period",
+		"global period",
+	};
+
+	if (WARN_ON(type >= PERCENT_MAX))
+		return "N/A";
+
+	return str[type];
+}
+
+static inline struct disasm_line *disasm_line(struct annotation_line *al)
+{
+	return al ? container_of(al, struct disasm_line, al) : NULL;
+}
+
+/*
+ * Is this offset in the same function as the line it is used?
+ * asm functions jump to other functions, for instance.
+ */
+static inline bool disasm_line__has_local_offset(const struct disasm_line *dl)
+{
+	return dl->ops.target.offset_avail && !dl->ops.target.outside;
+}
+
+/*
+ * Can we draw an arrow from the jump to its target, for instance? I.e.
+ * is the jump and its target in the same function?
+ */
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym);
+
+void disasm_line__free(struct disasm_line *dl);
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head);
+
+struct annotation_write_ops {
+	bool first_line, current_entry, change_color;
+	int  width;
+	void *obj;
+	int  (*set_color)(void *obj, int color);
+	void (*set_percent_color)(void *obj, double percent, bool current);
+	int  (*set_jumps_percent_color)(void *obj, int nr, bool current);
+	void (*printf)(void *obj, const char *fmt, ...);
+	void (*write_graph)(void *obj, int graph);
+};
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+			    struct annotation_write_ops *ops,
+			    struct annotation_options *opts);
+
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+					   char *bf, size_t size,
+					   struct evsel *evsel,
+					   bool show_freq);
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name);
+size_t disasm__fprintf(struct list_head *head, FILE *fp);
+void symbol__calc_percent(struct symbol *sym, struct evsel *evsel);
+
+struct sym_hist {
+	u64		      nr_samples;
+	u64		      period;
+	struct sym_hist_entry addr[];
+};
+
+struct cyc_hist {
+	u64	start;
+	u64	cycles;
+	u64	cycles_aggr;
+	u64	cycles_max;
+	u64	cycles_min;
+	s64	cycles_spark[NUM_SPARKS];
+	u32	num;
+	u32	num_aggr;
+	u8	have_start;
+	/* 1 byte padding */
+	u16	reset;
+};
+
+/** struct annotated_source - symbols with hits have this attached as in sannotation
+ *
+ * @histograms: Array of addr hit histograms per event being monitored
+ * nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if
+ * 		  we have more than a group in a evlist, where we will want
+ * 		  to see each group separately, that is why symbol__annotate2()
+ * 		  sets src->nr_histograms to evsel->nr_members.
+ * @lines: If 'print_lines' is specified, per source code line percentages
+ * @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
+ *
+ * lines is allocated, percentages calculated and all sorted by percentage
+ * when the annotation is about to be presented, so the percentages are for
+ * one of the entries in the histogram array, i.e. for the event/counter being
+ * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
+ * returns.
+ */
+struct annotated_source {
+	struct list_head   source;
+	int    		   nr_histograms;
+	size_t		   sizeof_sym_hist;
+	struct cyc_hist	   *cycles_hist;
+	struct sym_hist	   *histograms;
+};
+
+struct annotation {
+	struct mutex lock;
+	u64			max_coverage;
+	u64			start;
+	u64			hit_cycles;
+	u64			hit_insn;
+	unsigned int		total_insn;
+	unsigned int		cover_insn;
+	struct annotation_options *options;
+	struct annotation_line	**offsets;
+	int			nr_events;
+	int			max_jump_sources;
+	int			nr_entries;
+	int			nr_asm_entries;
+	u16			max_line_len;
+	struct {
+		u8		addr;
+		u8		jumps;
+		u8		target;
+		u8		min_addr;
+		u8		max_addr;
+		u8		max_ins_name;
+	} widths;
+	bool			have_cycles;
+	struct annotated_source *src;
+};
+
+void annotation__init(struct annotation *notes);
+void annotation__exit(struct annotation *notes);
+
+static inline int annotation__cycles_width(struct annotation *notes)
+{
+	if (notes->have_cycles && notes->options->show_minmax_cycle)
+		return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
+
+	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
+}
+
+static inline int annotation__pcnt_width(struct annotation *notes)
+{
+	return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
+}
+
+static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
+{
+	return notes->options->hide_src_code && al->offset == -1;
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size);
+void annotation__compute_ipc(struct annotation *notes, size_t size);
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
+void annotation__update_column_widths(struct annotation *notes);
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
+
+static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
+{
+	return ((void *)src->histograms) + (src->sizeof_sym_hist * idx);
+}
+
+static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+{
+	return annotated_source__histogram(notes->src, idx);
+}
+
+static inline struct annotation *symbol__annotation(struct symbol *sym)
+{
+	return (void *)sym - symbol_conf.priv_size;
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+				 struct evsel *evsel);
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+				    struct addr_map_symbol *start,
+				    unsigned cycles);
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+				 struct evsel *evsel, u64 addr);
+
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
+void symbol__annotate_zero_histograms(struct symbol *sym);
+
+int symbol__annotate(struct map_symbol *ms,
+		     struct evsel *evsel,
+		     struct annotation_options *options,
+		     struct arch **parch);
+int symbol__annotate2(struct map_symbol *ms,
+		      struct evsel *evsel,
+		      struct annotation_options *options,
+		      struct arch **parch);
+
+enum symbol_disassemble_errno {
+	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__SYMBOL_ANNOTATE_ERRNO__START		= -10000,
+
+	SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX	= __SYMBOL_ANNOTATE_ERRNO__START,
+	SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF,
+	SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING,
+	SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP,
+	SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE,
+	SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF,
+
+	__SYMBOL_ANNOTATE_ERRNO__END,
+};
+
+int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen);
+
+int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
+			    struct annotation_options *options);
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void annotated_source__purge(struct annotated_source *as);
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
+				struct annotation_options *opts);
+
+bool ui__has_annotation(void);
+
+int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts);
+
+int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts);
+
+#ifdef HAVE_SLANG_SUPPORT
+int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
+			 struct hist_browser_timer *hbt,
+			 struct annotation_options *opts);
+#else
+static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
+				struct evsel *evsel  __maybe_unused,
+				struct hist_browser_timer *hbt __maybe_unused,
+				struct annotation_options *opts __maybe_unused)
+{
+	return 0;
+}
+#endif
+
+void annotation_config__init(struct annotation_options *opt);
+
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+				int unset);
+
+int annotate_check_args(struct annotation_options *args);
+
+#endif	/* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/archinsn.h b/tools/perf/util/archinsn.h
new file mode 100644
index 000000000..448cbb6b8
--- /dev/null
+++ b/tools/perf/util/archinsn.h
@@ -0,0 +1,12 @@
+#ifndef INSN_H
+#define INSN_H 1
+
+struct perf_sample;
+struct machine;
+struct thread;
+
+void arch_fetch_insn(struct perf_sample *sample,
+		     struct thread *thread,
+		     struct machine *machine);
+
+#endif
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
new file mode 100644
index 000000000..f8dae13fc
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
new file mode 100644
index 000000000..091987dd3
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arm_spe_decoder.c: ARM SPE support
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+#include "../debug.h"
+#include "../util.h"
+
+#include "arm-spe-decoder.h"
+
+static u64 arm_spe_calc_ip(int index, u64 payload)
+{
+	u64 ns, el, val;
+
+	/* Instruction virtual address or Branch target address */
+	if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
+	    index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+		ns = SPE_ADDR_PKT_GET_NS(payload);
+		el = SPE_ADDR_PKT_GET_EL(payload);
+
+		/* Clean highest byte */
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+
+		/* Fill highest byte for EL1 or EL2 (VHE) mode */
+		if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
+			payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
+
+	/* Data access virtual address */
+	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
+
+		/* Clean tags */
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+
+		/*
+		 * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet"
+		 * defines the data virtual address payload format, the top byte
+		 * (bits [63:56]) is assigned as top-byte tag; so we only can
+		 * retrieve address value from bits [55:0].
+		 *
+		 * According to Documentation/arm64/memory.rst, if detects the
+		 * specific pattern in bits [55:52] of payload which falls in
+		 * the kernel space, should fixup the top byte and this allows
+		 * perf tool to parse DSO symbol for data address correctly.
+		 *
+		 * For this reason, if detects the bits [55:52] is 0xf, will
+		 * fill 0xff into the top byte.
+		 */
+		val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload);
+		if ((val & 0xf0ULL) == 0xf0ULL)
+			payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
+
+	/* Data access physical address */
+	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
+		/* Clean highest byte */
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+	} else {
+		pr_err("unsupported address packet index: 0x%x\n", index);
+	}
+
+	return payload;
+}
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
+{
+	struct arm_spe_decoder *decoder;
+
+	if (!params->get_trace)
+		return NULL;
+
+	decoder = zalloc(sizeof(struct arm_spe_decoder));
+	if (!decoder)
+		return NULL;
+
+	decoder->get_trace = params->get_trace;
+	decoder->data = params->data;
+
+	return decoder;
+}
+
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
+{
+	free(decoder);
+}
+
+static int arm_spe_get_data(struct arm_spe_decoder *decoder)
+{
+	struct arm_spe_buffer buffer = { .buf = 0, };
+	int ret;
+
+	pr_debug("Getting more data\n");
+	ret = decoder->get_trace(&buffer, decoder->data);
+	if (ret < 0)
+		return ret;
+
+	decoder->buf = buffer.buf;
+	decoder->len = buffer.len;
+
+	if (!decoder->len)
+		pr_debug("No more data\n");
+
+	return decoder->len;
+}
+
+static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
+{
+	int ret;
+
+	do {
+		if (!decoder->len) {
+			ret = arm_spe_get_data(decoder);
+
+			/* Failed to read out trace data */
+			if (ret <= 0)
+				return ret;
+		}
+
+		ret = arm_spe_get_packet(decoder->buf, decoder->len,
+					 &decoder->packet);
+		if (ret <= 0) {
+			/* Move forward for 1 byte */
+			decoder->buf += 1;
+			decoder->len -= 1;
+			return -EBADMSG;
+		}
+
+		decoder->buf += ret;
+		decoder->len -= ret;
+	} while (decoder->packet.type == ARM_SPE_PAD);
+
+	return 1;
+}
+
+static int arm_spe_read_record(struct arm_spe_decoder *decoder)
+{
+	int err;
+	int idx;
+	u64 payload, ip;
+
+	memset(&decoder->record, 0x0, sizeof(decoder->record));
+	decoder->record.context_id = (u64)-1;
+
+	while (1) {
+		err = arm_spe_get_next_packet(decoder);
+		if (err <= 0)
+			return err;
+
+		idx = decoder->packet.index;
+		payload = decoder->packet.payload;
+
+		switch (decoder->packet.type) {
+		case ARM_SPE_TIMESTAMP:
+			decoder->record.timestamp = payload;
+			return 1;
+		case ARM_SPE_END:
+			return 1;
+		case ARM_SPE_ADDRESS:
+			ip = arm_spe_calc_ip(idx, payload);
+			if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
+				decoder->record.from_ip = ip;
+			else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
+				decoder->record.to_ip = ip;
+			else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT)
+				decoder->record.virt_addr = ip;
+			else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS)
+				decoder->record.phys_addr = ip;
+			break;
+		case ARM_SPE_COUNTER:
+			if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
+				decoder->record.latency = payload;
+			break;
+		case ARM_SPE_CONTEXT:
+			decoder->record.context_id = payload;
+			break;
+		case ARM_SPE_OP_TYPE:
+			if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) {
+				if (payload & 0x1)
+					decoder->record.op = ARM_SPE_ST;
+				else
+					decoder->record.op = ARM_SPE_LD;
+			}
+			break;
+		case ARM_SPE_EVENTS:
+			if (payload & BIT(EV_L1D_REFILL))
+				decoder->record.type |= ARM_SPE_L1D_MISS;
+
+			if (payload & BIT(EV_L1D_ACCESS))
+				decoder->record.type |= ARM_SPE_L1D_ACCESS;
+
+			if (payload & BIT(EV_TLB_WALK))
+				decoder->record.type |= ARM_SPE_TLB_MISS;
+
+			if (payload & BIT(EV_TLB_ACCESS))
+				decoder->record.type |= ARM_SPE_TLB_ACCESS;
+
+			if (payload & BIT(EV_LLC_MISS))
+				decoder->record.type |= ARM_SPE_LLC_MISS;
+
+			if (payload & BIT(EV_LLC_ACCESS))
+				decoder->record.type |= ARM_SPE_LLC_ACCESS;
+
+			if (payload & BIT(EV_REMOTE_ACCESS))
+				decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
+
+			if (payload & BIT(EV_MISPRED))
+				decoder->record.type |= ARM_SPE_BRANCH_MISS;
+
+			break;
+		case ARM_SPE_DATA_SOURCE:
+			decoder->record.source = payload;
+			break;
+		case ARM_SPE_BAD:
+			break;
+		case ARM_SPE_PAD:
+			break;
+		default:
+			pr_err("Get packet error!\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int arm_spe_decode(struct arm_spe_decoder *decoder)
+{
+	return arm_spe_read_record(decoder);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
new file mode 100644
index 000000000..46a61df11
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
+ * Copyright (c) 2019-2020, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_DECODER_H__
+#define INCLUDE__ARM_SPE_DECODER_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+enum arm_spe_sample_type {
+	ARM_SPE_L1D_ACCESS	= 1 << 0,
+	ARM_SPE_L1D_MISS	= 1 << 1,
+	ARM_SPE_LLC_ACCESS	= 1 << 2,
+	ARM_SPE_LLC_MISS	= 1 << 3,
+	ARM_SPE_TLB_ACCESS	= 1 << 4,
+	ARM_SPE_TLB_MISS	= 1 << 5,
+	ARM_SPE_BRANCH_MISS	= 1 << 6,
+	ARM_SPE_REMOTE_ACCESS	= 1 << 7,
+};
+
+enum arm_spe_op_type {
+	ARM_SPE_LD		= 1 << 0,
+	ARM_SPE_ST		= 1 << 1,
+};
+
+enum arm_spe_neoverse_data_source {
+	ARM_SPE_NV_L1D		 = 0x0,
+	ARM_SPE_NV_L2		 = 0x8,
+	ARM_SPE_NV_PEER_CORE	 = 0x9,
+	ARM_SPE_NV_LOCAL_CLUSTER = 0xa,
+	ARM_SPE_NV_SYS_CACHE	 = 0xb,
+	ARM_SPE_NV_PEER_CLUSTER	 = 0xc,
+	ARM_SPE_NV_REMOTE	 = 0xd,
+	ARM_SPE_NV_DRAM		 = 0xe,
+};
+
+struct arm_spe_record {
+	enum arm_spe_sample_type type;
+	int err;
+	u32 op;
+	u32 latency;
+	u64 from_ip;
+	u64 to_ip;
+	u64 timestamp;
+	u64 virt_addr;
+	u64 phys_addr;
+	u64 context_id;
+	u16 source;
+};
+
+struct arm_spe_insn;
+
+struct arm_spe_buffer {
+	const unsigned char *buf;
+	size_t len;
+	u64 offset;
+	u64 trace_nr;
+};
+
+struct arm_spe_params {
+	int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+	void *data;
+};
+
+struct arm_spe_decoder {
+	int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+	void *data;
+	struct arm_spe_record record;
+
+	const unsigned char *buf;
+	size_t len;
+
+	struct arm_spe_pkt packet;
+};
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
+
+int arm_spe_decode(struct arm_spe_decoder *decoder);
+
+#endif
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
new file mode 100644
index 000000000..2f311189c
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/bitops.h>
+#include <stdarg.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+	memcpy((d), (s), (n));    \
+	*(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const arm_spe_packet_name[] = {
+	[ARM_SPE_PAD]		= "PAD",
+	[ARM_SPE_END]		= "END",
+	[ARM_SPE_TIMESTAMP]	= "TS",
+	[ARM_SPE_ADDRESS]	= "ADDR",
+	[ARM_SPE_COUNTER]	= "LAT",
+	[ARM_SPE_CONTEXT]	= "CONTEXT",
+	[ARM_SPE_OP_TYPE]	= "OP-TYPE",
+	[ARM_SPE_EVENTS]	= "EVENTS",
+	[ARM_SPE_DATA_SOURCE]	= "DATA-SOURCE",
+};
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
+{
+	return arm_spe_packet_name[type];
+}
+
+/*
+ * Extracts the field "sz" from header bits and converts to bytes:
+ *   00 : byte (1)
+ *   01 : halfword (2)
+ *   10 : word (4)
+ *   11 : doubleword (8)
+ */
+static unsigned int arm_spe_payload_len(unsigned char hdr)
+{
+	return 1U << ((hdr & GENMASK_ULL(5, 4)) >> 4);
+}
+
+static int arm_spe_get_payload(const unsigned char *buf, size_t len,
+			       unsigned char ext_hdr,
+			       struct arm_spe_pkt *packet)
+{
+	size_t payload_len = arm_spe_payload_len(buf[ext_hdr]);
+
+	if (len < 1 + ext_hdr + payload_len)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	buf += 1 + ext_hdr;
+
+	switch (payload_len) {
+	case 1: packet->payload = *(uint8_t *)buf; break;
+	case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break;
+	case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break;
+	case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break;
+	default: return ARM_SPE_BAD_PACKET;
+	}
+
+	return 1 + ext_hdr + payload_len;
+}
+
+static int arm_spe_get_pad(struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_PAD;
+	return 1;
+}
+
+static int arm_spe_get_alignment(const unsigned char *buf, size_t len,
+				 struct arm_spe_pkt *packet)
+{
+	unsigned int alignment = 1 << ((buf[0] & 0xf) + 1);
+
+	if (len < alignment)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	packet->type = ARM_SPE_PAD;
+	return alignment - (((uintptr_t)buf) & (alignment - 1));
+}
+
+static int arm_spe_get_end(struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_END;
+	return 1;
+}
+
+static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
+				 struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_TIMESTAMP;
+	return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_events(const unsigned char *buf, size_t len,
+			      struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_EVENTS;
+
+	/* we use index to identify Events with a less number of
+	 * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
+	 * LLC-REFILL, and REMOTE-ACCESS events are identified if
+	 * index > 1.
+	 */
+	packet->index = arm_spe_payload_len(buf[0]);
+
+	return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
+				   struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_DATA_SOURCE;
+	return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_context(const unsigned char *buf, size_t len,
+			       struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_CONTEXT;
+	packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]);
+	return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
+			       struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_OP_TYPE;
+	packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]);
+	return arm_spe_get_payload(buf, len, 0, packet);
+}
+
+static int arm_spe_get_counter(const unsigned char *buf, size_t len,
+			       const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_COUNTER;
+
+	if (ext_hdr)
+		packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
+	else
+		packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
+
+	return arm_spe_get_payload(buf, len, ext_hdr, packet);
+}
+
+static int arm_spe_get_addr(const unsigned char *buf, size_t len,
+			    const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_ADDRESS;
+
+	if (ext_hdr)
+		packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
+	else
+		packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
+
+	return arm_spe_get_payload(buf, len, ext_hdr, packet);
+}
+
+static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
+				 struct arm_spe_pkt *packet)
+{
+	unsigned int hdr;
+	unsigned char ext_hdr = 0;
+
+	memset(packet, 0, sizeof(struct arm_spe_pkt));
+
+	if (!len)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	hdr = buf[0];
+
+	if (hdr == SPE_HEADER0_PAD)
+		return arm_spe_get_pad(packet);
+
+	if (hdr == SPE_HEADER0_END) /* no timestamp at end of record */
+		return arm_spe_get_end(packet);
+
+	if (hdr == SPE_HEADER0_TIMESTAMP)
+		return arm_spe_get_timestamp(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_EVENTS)
+		return arm_spe_get_events(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_SOURCE)
+		return arm_spe_get_data_source(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_CONTEXT)
+		return arm_spe_get_context(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_OP_TYPE)
+		return arm_spe_get_op_type(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) {
+		/* 16-bit extended format header */
+		if (len == 1)
+			return ARM_SPE_BAD_PACKET;
+
+		ext_hdr = 1;
+		hdr = buf[1];
+		if (hdr == SPE_HEADER1_ALIGNMENT)
+			return arm_spe_get_alignment(buf, len, packet);
+	}
+
+	/*
+	 * The short format header's byte 0 or the extended format header's
+	 * byte 1 has been assigned to 'hdr', which uses the same encoding for
+	 * address packet and counter packet, so don't need to distinguish if
+	 * it's short format or extended format and handle in once.
+	 */
+	if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_ADDRESS)
+		return arm_spe_get_addr(buf, len, ext_hdr, packet);
+
+	if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_COUNTER)
+		return arm_spe_get_counter(buf, len, ext_hdr, packet);
+
+	return ARM_SPE_BAD_PACKET;
+}
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+		       struct arm_spe_pkt *packet)
+{
+	int ret;
+
+	ret = arm_spe_do_get_packet(buf, len, packet);
+	/* put multiple consecutive PADs on the same line, up to
+	 * the fixed-width output format of 16 bytes per line.
+	 */
+	if (ret > 0 && packet->type == ARM_SPE_PAD) {
+		while (ret < 16 && len > (size_t)ret && !buf[ret])
+			ret += 1;
+	}
+	return ret;
+}
+
+static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen,
+				  const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	/* Bail out if any error occurred */
+	if (err && *err)
+		return *err;
+
+	va_start(ap, fmt);
+	ret = vsnprintf(*buf_p, *blen, fmt, ap);
+	va_end(ap);
+
+	if (ret < 0) {
+		if (err && !*err)
+			*err = ret;
+
+	/*
+	 * A return value of *blen or more means that the output was
+	 * truncated and the buffer is overrun.
+	 */
+	} else if ((size_t)ret >= *blen) {
+		(*buf_p)[*blen - 1] = '\0';
+
+		/*
+		 * Set *err to 'ret' to avoid overflow if tries to
+		 * fill this buffer sequentially.
+		 */
+		if (err && !*err)
+			*err = ret;
+	} else {
+		*buf_p += ret;
+		*blen -= ret;
+	}
+
+	return ret;
+}
+
+static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
+				  char *buf, size_t buf_len)
+{
+	u64 payload = packet->payload;
+	int err = 0;
+
+	arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV");
+
+	if (payload & BIT(EV_EXCEPTION_GEN))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN");
+	if (payload & BIT(EV_RETIRED))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED");
+	if (payload & BIT(EV_L1D_ACCESS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS");
+	if (payload & BIT(EV_L1D_REFILL))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL");
+	if (payload & BIT(EV_TLB_ACCESS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS");
+	if (payload & BIT(EV_TLB_WALK))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL");
+	if (payload & BIT(EV_NOT_TAKEN))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN");
+	if (payload & BIT(EV_MISPRED))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED");
+	if (payload & BIT(EV_LLC_ACCESS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS");
+	if (payload & BIT(EV_LLC_MISS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
+	if (payload & BIT(EV_REMOTE_ACCESS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
+	if (payload & BIT(EV_ALIGNMENT))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT");
+	if (payload & BIT(EV_PARTIAL_PREDICATE))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
+	if (payload & BIT(EV_EMPTY_PREDICATE))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED");
+
+	return err;
+}
+
+static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
+				    char *buf, size_t buf_len)
+{
+	u64 payload = packet->payload;
+	int err = 0;
+
+	switch (packet->index) {
+	case SPE_OP_PKT_HDR_CLASS_OTHER:
+		if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) {
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER");
+
+			/* SVE effective vector length */
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+					       SPE_OP_PKG_SVE_EVL(payload));
+
+			if (payload & SPE_OP_PKT_SVE_FP)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
+			if (payload & SPE_OP_PKT_SVE_PRED)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+		} else {
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
+					       payload & SPE_OP_PKT_COND ?
+					       "COND-SELECT" : "INSN-OTHER");
+		}
+		break;
+	case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				       payload & 0x1 ? "ST" : "LD");
+
+		if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) {
+			if (payload & SPE_OP_PKT_AT)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
+			if (payload & SPE_OP_PKT_EXCL)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
+			if (payload & SPE_OP_PKT_AR)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
+		}
+
+		switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) {
+		case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP:
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
+			break;
+		case SPE_OP_PKT_LDST_SUBCLASS_GP_REG:
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
+			break;
+		case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG:
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
+			break;
+		case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG:
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
+			break;
+		default:
+			break;
+		}
+
+		if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
+			/* SVE effective vector length */
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+					       SPE_OP_PKG_SVE_EVL(payload));
+
+			if (payload & SPE_OP_PKT_SVE_PRED)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+			if (payload & SPE_OP_PKT_SVE_SG)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG");
+		}
+		break;
+	case SPE_OP_PKT_HDR_CLASS_BR_ERET:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "B");
+
+		if (payload & SPE_OP_PKT_COND)
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
+
+		if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload))
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
+
+		break;
+	default:
+		/* Unknown index */
+		err = -1;
+		break;
+	}
+
+	return err;
+}
+
+static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
+				 char *buf, size_t buf_len)
+{
+	int ns, el, idx = packet->index;
+	int ch, pat;
+	u64 payload = packet->payload;
+	int err = 0;
+
+	switch (idx) {
+	case SPE_ADDR_PKT_HDR_INDEX_INS:
+	case SPE_ADDR_PKT_HDR_INDEX_BRANCH:
+		ns = !!SPE_ADDR_PKT_GET_NS(payload);
+		el = SPE_ADDR_PKT_GET_EL(payload);
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				"%s 0x%llx el%d ns=%d",
+				(idx == 1) ? "TGT" : "PC", payload, el, ns);
+		break;
+	case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				       "VA 0x%llx", payload);
+		break;
+	case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS:
+		ns = !!SPE_ADDR_PKT_GET_NS(payload);
+		ch = !!SPE_ADDR_PKT_GET_CH(payload);
+		pat = SPE_ADDR_PKT_GET_PAT(payload);
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				       "PA 0x%llx ns=%d ch=%d pat=%x",
+				       payload, ns, ch, pat);
+		break;
+	default:
+		/* Unknown index */
+		err = -1;
+		break;
+	}
+
+	return err;
+}
+
+static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet,
+				    char *buf, size_t buf_len)
+{
+	u64 payload = packet->payload;
+	const char *name = arm_spe_pkt_name(packet->type);
+	int err = 0;
+
+	arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name,
+			       (unsigned short)payload);
+
+	switch (packet->index) {
+	case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT");
+		break;
+	case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE");
+		break;
+	case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT");
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
+		     size_t buf_len)
+{
+	int idx = packet->index;
+	unsigned long long payload = packet->payload;
+	const char *name = arm_spe_pkt_name(packet->type);
+	char *buf_orig = buf;
+	size_t blen = buf_len;
+	int err = 0;
+
+	switch (packet->type) {
+	case ARM_SPE_BAD:
+	case ARM_SPE_PAD:
+	case ARM_SPE_END:
+		arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name);
+		break;
+	case ARM_SPE_EVENTS:
+		err = arm_spe_pkt_desc_event(packet, buf, buf_len);
+		break;
+	case ARM_SPE_OP_TYPE:
+		err = arm_spe_pkt_desc_op_type(packet, buf, buf_len);
+		break;
+	case ARM_SPE_DATA_SOURCE:
+	case ARM_SPE_TIMESTAMP:
+		arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload);
+		break;
+	case ARM_SPE_ADDRESS:
+		err = arm_spe_pkt_desc_addr(packet, buf, buf_len);
+		break;
+	case ARM_SPE_CONTEXT:
+		arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d",
+				       name, (unsigned long)payload, idx + 1);
+		break;
+	case ARM_SPE_COUNTER:
+		err = arm_spe_pkt_desc_counter(packet, buf, buf_len);
+		break;
+	default:
+		/* Unknown packet type */
+		err = -1;
+		break;
+	}
+
+	/* Output raw data if detect any error */
+	if (err) {
+		err = 0;
+		arm_spe_pkt_out_string(&err, &buf_orig, &buf_len, "%s 0x%llx (%d)",
+				       name, payload, packet->index);
+	}
+
+	return err;
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
new file mode 100644
index 000000000..9b970e7bf
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
+#define INCLUDE__ARM_SPE_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define ARM_SPE_PKT_DESC_MAX		256
+
+#define ARM_SPE_NEED_MORE_BYTES		-1
+#define ARM_SPE_BAD_PACKET		-2
+
+#define ARM_SPE_PKT_MAX_SZ		16
+
+enum arm_spe_pkt_type {
+	ARM_SPE_BAD,
+	ARM_SPE_PAD,
+	ARM_SPE_END,
+	ARM_SPE_TIMESTAMP,
+	ARM_SPE_ADDRESS,
+	ARM_SPE_COUNTER,
+	ARM_SPE_CONTEXT,
+	ARM_SPE_OP_TYPE,
+	ARM_SPE_EVENTS,
+	ARM_SPE_DATA_SOURCE,
+};
+
+struct arm_spe_pkt {
+	enum arm_spe_pkt_type	type;
+	unsigned char		index;
+	uint64_t		payload;
+};
+
+/* Short header (HEADER0) and extended header (HEADER1) */
+#define SPE_HEADER0_PAD				0x0
+#define SPE_HEADER0_END				0x1
+#define SPE_HEADER0_TIMESTAMP			0x71
+/* Mask for event & data source */
+#define SPE_HEADER0_MASK1			(GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0))
+#define SPE_HEADER0_EVENTS			0x42
+#define SPE_HEADER0_SOURCE			0x43
+/* Mask for context & operation */
+#define SPE_HEADER0_MASK2			GENMASK_ULL(7, 2)
+#define SPE_HEADER0_CONTEXT			0x64
+#define SPE_HEADER0_OP_TYPE			0x48
+/* Mask for extended format */
+#define SPE_HEADER0_EXTENDED			0x20
+/* Mask for address & counter */
+#define SPE_HEADER0_MASK3			GENMASK_ULL(7, 3)
+#define SPE_HEADER0_ADDRESS			0xb0
+#define SPE_HEADER0_COUNTER			0x98
+#define SPE_HEADER1_ALIGNMENT			0x0
+
+#define SPE_HDR_SHORT_INDEX(h)			((h) & GENMASK_ULL(2, 0))
+#define SPE_HDR_EXTENDED_INDEX(h0, h1)		(((h0) & GENMASK_ULL(1, 0)) << 3 | \
+						 SPE_HDR_SHORT_INDEX(h1))
+
+/* Address packet header */
+#define SPE_ADDR_PKT_HDR_INDEX_INS		0x0
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH		0x1
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT	0x2
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS	0x3
+
+/* Address packet payload */
+#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT		56
+#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v)	((v) & GENMASK_ULL(55, 0))
+#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v)		(((v) & GENMASK_ULL(55, 48)) >> 48)
+
+#define SPE_ADDR_PKT_GET_NS(v)			(((v) & BIT_ULL(63)) >> 63)
+#define SPE_ADDR_PKT_GET_EL(v)			(((v) & GENMASK_ULL(62, 61)) >> 61)
+#define SPE_ADDR_PKT_GET_CH(v)			(((v) & BIT_ULL(62)) >> 62)
+#define SPE_ADDR_PKT_GET_PAT(v)			(((v) & GENMASK_ULL(59, 56)) >> 56)
+
+#define SPE_ADDR_PKT_EL0			0
+#define SPE_ADDR_PKT_EL1			1
+#define SPE_ADDR_PKT_EL2			2
+#define SPE_ADDR_PKT_EL3			3
+
+/* Context packet header */
+#define SPE_CTX_PKT_HDR_INDEX(h)		((h) & GENMASK_ULL(1, 0))
+
+/* Counter packet header */
+#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT		0x0
+#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT		0x1
+#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT		0x2
+
+/* Event packet payload */
+enum arm_spe_events {
+	EV_EXCEPTION_GEN	= 0,
+	EV_RETIRED		= 1,
+	EV_L1D_ACCESS		= 2,
+	EV_L1D_REFILL		= 3,
+	EV_TLB_ACCESS		= 4,
+	EV_TLB_WALK		= 5,
+	EV_NOT_TAKEN		= 6,
+	EV_MISPRED		= 7,
+	EV_LLC_ACCESS		= 8,
+	EV_LLC_MISS		= 9,
+	EV_REMOTE_ACCESS	= 10,
+	EV_ALIGNMENT		= 11,
+	EV_PARTIAL_PREDICATE	= 17,
+	EV_EMPTY_PREDICATE	= 18,
+};
+
+/* Operation packet header */
+#define SPE_OP_PKT_HDR_CLASS(h)			((h) & GENMASK_ULL(1, 0))
+#define SPE_OP_PKT_HDR_CLASS_OTHER		0x0
+#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC	0x1
+#define SPE_OP_PKT_HDR_CLASS_BR_ERET		0x2
+
+#define SPE_OP_PKT_IS_OTHER_SVE_OP(v)		(((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+
+#define SPE_OP_PKT_COND				BIT(0)
+
+#define SPE_OP_PKT_LDST_SUBCLASS_GET(v)		((v) & GENMASK_ULL(7, 1))
+#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG		0x0
+#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP	0x4
+#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG	0x10
+#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG	0x30
+
+#define SPE_OP_PKT_IS_LDST_ATOMIC(v)		(((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
+
+#define SPE_OP_PKT_AR				BIT(4)
+#define SPE_OP_PKT_EXCL				BIT(3)
+#define SPE_OP_PKT_AT				BIT(2)
+#define SPE_OP_PKT_ST				BIT(0)
+
+#define SPE_OP_PKT_IS_LDST_SVE(v)		(((v) & (BIT(3) | BIT(1))) == 0x8)
+
+#define SPE_OP_PKT_SVE_SG			BIT(7)
+/*
+ * SVE effective vector length (EVL) is stored in byte 0 bits [6:4];
+ * the length is rounded up to a power of two and use 32 as one step,
+ * so EVL calculation is:
+ *
+ *   32 * (2 ^ bits [6:4]) = 32 << (bits [6:4])
+ */
+#define SPE_OP_PKG_SVE_EVL(v)			(32 << (((v) & GENMASK_ULL(6, 4)) >> 4))
+#define SPE_OP_PKT_SVE_PRED			BIT(2)
+#define SPE_OP_PKT_SVE_FP			BIT(1)
+
+#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v)	(((v) & GENMASK_ULL(7, 1)) == 0x2)
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+		       struct arm_spe_pkt *packet);
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
+#endif
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
new file mode 100644
index 000000000..906476a83
--- /dev/null
+++ b/tools/perf/util/arm-spe.c
@@ -0,0 +1,1359 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "session.h"
+#include "symbol.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "tsc.h"
+#include "tool.h"
+#include "util/synthetic-events.h"
+
+#include "arm-spe.h"
+#include "arm-spe-decoder/arm-spe-decoder.h"
+#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
+
+#include "../../arch/arm64/include/asm/cputype.h"
+#define MAX_TIMESTAMP (~0ULL)
+
+struct arm_spe {
+	struct auxtrace			auxtrace;
+	struct auxtrace_queues		queues;
+	struct auxtrace_heap		heap;
+	struct itrace_synth_opts        synth_opts;
+	u32				auxtrace_type;
+	struct perf_session		*session;
+	struct machine			*machine;
+	u32				pmu_type;
+	u64				midr;
+
+	struct perf_tsc_conversion	tc;
+
+	u8				timeless_decoding;
+	u8				data_queued;
+
+	u64				sample_type;
+	u8				sample_flc;
+	u8				sample_llc;
+	u8				sample_tlb;
+	u8				sample_branch;
+	u8				sample_remote_access;
+	u8				sample_memory;
+	u8				sample_instructions;
+	u64				instructions_sample_period;
+
+	u64				l1d_miss_id;
+	u64				l1d_access_id;
+	u64				llc_miss_id;
+	u64				llc_access_id;
+	u64				tlb_miss_id;
+	u64				tlb_access_id;
+	u64				branch_miss_id;
+	u64				remote_access_id;
+	u64				memory_id;
+	u64				instructions_id;
+
+	u64				kernel_start;
+
+	unsigned long			num_events;
+	u8				use_ctx_pkt_for_pid;
+};
+
+struct arm_spe_queue {
+	struct arm_spe			*spe;
+	unsigned int			queue_nr;
+	struct auxtrace_buffer		*buffer;
+	struct auxtrace_buffer		*old_buffer;
+	union perf_event		*event_buf;
+	bool				on_heap;
+	bool				done;
+	pid_t				pid;
+	pid_t				tid;
+	int				cpu;
+	struct arm_spe_decoder		*decoder;
+	u64				time;
+	u64				timestamp;
+	struct thread			*thread;
+	u64				period_instructions;
+};
+
+static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
+			 unsigned char *buf, size_t len)
+{
+	struct arm_spe_pkt packet;
+	size_t pos = 0;
+	int ret, pkt_len, i;
+	char desc[ARM_SPE_PKT_DESC_MAX];
+	const char *color = PERF_COLOR_BLUE;
+
+	color_fprintf(stdout, color,
+		      ". ... ARM SPE data: size %#zx bytes\n",
+		      len);
+
+	while (len) {
+		ret = arm_spe_get_packet(buf, len, &packet);
+		if (ret > 0)
+			pkt_len = ret;
+		else
+			pkt_len = 1;
+		printf(".");
+		color_fprintf(stdout, color, "  %08x: ", pos);
+		for (i = 0; i < pkt_len; i++)
+			color_fprintf(stdout, color, " %02x", buf[i]);
+		for (; i < 16; i++)
+			color_fprintf(stdout, color, "   ");
+		if (ret > 0) {
+			ret = arm_spe_pkt_desc(&packet, desc,
+					       ARM_SPE_PKT_DESC_MAX);
+			if (!ret)
+				color_fprintf(stdout, color, " %s\n", desc);
+		} else {
+			color_fprintf(stdout, color, " Bad packet!\n");
+		}
+		pos += pkt_len;
+		buf += pkt_len;
+		len -= pkt_len;
+	}
+}
+
+static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
+			       size_t len)
+{
+	printf(".\n");
+	arm_spe_dump(spe, buf, len);
+}
+
+static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
+{
+	struct arm_spe_queue *speq = data;
+	struct auxtrace_buffer *buffer = speq->buffer;
+	struct auxtrace_buffer *old_buffer = speq->old_buffer;
+	struct auxtrace_queue *queue;
+
+	queue = &speq->spe->queues.queue_array[speq->queue_nr];
+
+	buffer = auxtrace_buffer__next(queue, buffer);
+	/* If no more data, drop the previous auxtrace_buffer and return */
+	if (!buffer) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		b->len = 0;
+		return 0;
+	}
+
+	speq->buffer = buffer;
+
+	/* If the aux_buffer doesn't have data associated, try to load it */
+	if (!buffer->data) {
+		/* get the file desc associated with the perf data file */
+		int fd = perf_data__fd(speq->spe->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data)
+			return -ENOMEM;
+	}
+
+	b->len = buffer->size;
+	b->buf = buffer->data;
+
+	if (b->len) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		speq->old_buffer = buffer;
+	} else {
+		auxtrace_buffer__drop_data(buffer);
+		return arm_spe_get_trace(b, data);
+	}
+
+	return 0;
+}
+
+static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
+		unsigned int queue_nr)
+{
+	struct arm_spe_params params = { .get_trace = 0, };
+	struct arm_spe_queue *speq;
+
+	speq = zalloc(sizeof(*speq));
+	if (!speq)
+		return NULL;
+
+	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+	if (!speq->event_buf)
+		goto out_free;
+
+	speq->spe = spe;
+	speq->queue_nr = queue_nr;
+	speq->pid = -1;
+	speq->tid = -1;
+	speq->cpu = -1;
+	speq->period_instructions = 0;
+
+	/* params set */
+	params.get_trace = arm_spe_get_trace;
+	params.data = speq;
+
+	/* create new decoder */
+	speq->decoder = arm_spe_decoder_new(&params);
+	if (!speq->decoder)
+		goto out_free;
+
+	return speq;
+
+out_free:
+	zfree(&speq->event_buf);
+	free(speq);
+
+	return NULL;
+}
+
+static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
+{
+	return ip >= spe->kernel_start ?
+		PERF_RECORD_MISC_KERNEL :
+		PERF_RECORD_MISC_USER;
+}
+
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+				    struct auxtrace_queue *queue)
+{
+	struct arm_spe_queue *speq = queue->priv;
+	pid_t tid;
+
+	tid = machine__get_current_tid(spe->machine, speq->cpu);
+	if (tid != -1) {
+		speq->tid = tid;
+		thread__zput(speq->thread);
+	} else
+		speq->tid = queue->tid;
+
+	if ((!speq->thread) && (speq->tid != -1)) {
+		speq->thread = machine__find_thread(spe->machine, -1,
+						    speq->tid);
+	}
+
+	if (speq->thread) {
+		speq->pid = speq->thread->pid_;
+		if (queue->cpu == -1)
+			speq->cpu = speq->thread->cpu;
+	}
+}
+
+static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
+{
+	struct arm_spe *spe = speq->spe;
+	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
+
+	if (err)
+		return err;
+
+	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
+
+	return 0;
+}
+
+static void arm_spe_prep_sample(struct arm_spe *spe,
+				struct arm_spe_queue *speq,
+				union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct arm_spe_record *record = &speq->decoder->record;
+
+	if (!spe->timeless_decoding)
+		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
+
+	sample->ip = record->from_ip;
+	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
+	sample->pid = speq->pid;
+	sample->tid = speq->tid;
+	sample->period = 1;
+	sample->cpu = speq->cpu;
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = sample->cpumode;
+	event->sample.header.size = sizeof(struct perf_event_header);
+}
+
+static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
+{
+	event->header.size = perf_event__sample_event_size(sample, type, 0);
+	return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+static inline int
+arm_spe_deliver_synth_event(struct arm_spe *spe,
+			    struct arm_spe_queue *speq __maybe_unused,
+			    union perf_event *event,
+			    struct perf_sample *sample)
+{
+	int ret;
+
+	if (spe->synth_opts.inject) {
+		ret = arm_spe__inject_event(event, sample, spe->sample_type);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(spe->session, event, sample);
+	if (ret)
+		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
+
+	return ret;
+}
+
+static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
+				     u64 spe_events_id, u64 data_src)
+{
+	struct arm_spe *spe = speq->spe;
+	struct arm_spe_record *record = &speq->decoder->record;
+	union perf_event *event = speq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	arm_spe_prep_sample(spe, speq, event, &sample);
+
+	sample.id = spe_events_id;
+	sample.stream_id = spe_events_id;
+	sample.addr = record->virt_addr;
+	sample.phys_addr = record->phys_addr;
+	sample.data_src = data_src;
+	sample.weight = record->latency;
+
+	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
+					u64 spe_events_id)
+{
+	struct arm_spe *spe = speq->spe;
+	struct arm_spe_record *record = &speq->decoder->record;
+	union perf_event *event = speq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	arm_spe_prep_sample(spe, speq, event, &sample);
+
+	sample.id = spe_events_id;
+	sample.stream_id = spe_events_id;
+	sample.addr = record->to_ip;
+	sample.weight = record->latency;
+
+	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+					     u64 spe_events_id, u64 data_src)
+{
+	struct arm_spe *spe = speq->spe;
+	struct arm_spe_record *record = &speq->decoder->record;
+	union perf_event *event = speq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	/*
+	 * Handles perf instruction sampling period.
+	 */
+	speq->period_instructions++;
+	if (speq->period_instructions < spe->instructions_sample_period)
+		return 0;
+	speq->period_instructions = 0;
+
+	arm_spe_prep_sample(spe, speq, event, &sample);
+
+	sample.id = spe_events_id;
+	sample.stream_id = spe_events_id;
+	sample.addr = record->virt_addr;
+	sample.phys_addr = record->phys_addr;
+	sample.data_src = data_src;
+	sample.period = spe->instructions_sample_period;
+	sample.weight = record->latency;
+
+	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static const struct midr_range neoverse_spe[] = {
+	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+	{},
+};
+
+static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
+						union perf_mem_data_src *data_src)
+{
+	/*
+	 * Even though four levels of cache hierarchy are possible, no known
+	 * production Neoverse systems currently include more than three levels
+	 * so for the time being we assume three exist. If a production system
+	 * is built with four the this function would have to be changed to
+	 * detect the number of levels for reporting.
+	 */
+
+	/*
+	 * We have no data on the hit level or data source for stores in the
+	 * Neoverse SPE records.
+	 */
+	if (record->op & ARM_SPE_ST) {
+		data_src->mem_lvl = PERF_MEM_LVL_NA;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+		return;
+	}
+
+	switch (record->source) {
+	case ARM_SPE_NV_L1D:
+		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_NV_L2:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_NV_PEER_CORE:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	/*
+	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
+	 * transfer, so set SNOOPX_PEER
+	 */
+	case ARM_SPE_NV_LOCAL_CLUSTER:
+	case ARM_SPE_NV_PEER_CLUSTER:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	/*
+	 * System cache is assumed to be L3
+	 */
+	case ARM_SPE_NV_SYS_CACHE:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+		break;
+	/*
+	 * We don't know what level it hit in, except it came from the other
+	 * socket
+	 */
+	case ARM_SPE_NV_REMOTE:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_NV_DRAM:
+		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	default:
+		break;
+	}
+}
+
+static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
+					       union perf_mem_data_src *data_src)
+{
+	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
+		data_src->mem_lvl = PERF_MEM_LVL_L3;
+
+		if (record->type & ARM_SPE_LLC_MISS)
+			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
+		else
+			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
+	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
+		data_src->mem_lvl = PERF_MEM_LVL_L1;
+
+		if (record->type & ARM_SPE_L1D_MISS)
+			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
+		else
+			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
+	}
+
+	if (record->type & ARM_SPE_REMOTE_ACCESS)
+		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+}
+
+static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
+{
+	union perf_mem_data_src	data_src = { 0 };
+	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
+
+	if (record->op == ARM_SPE_LD)
+		data_src.mem_op = PERF_MEM_OP_LOAD;
+	else if (record->op == ARM_SPE_ST)
+		data_src.mem_op = PERF_MEM_OP_STORE;
+	else
+		return 0;
+
+	if (is_neoverse)
+		arm_spe__synth_data_source_neoverse(record, &data_src);
+	else
+		arm_spe__synth_data_source_generic(record, &data_src);
+
+	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
+		data_src.mem_dtlb = PERF_MEM_TLB_WK;
+
+		if (record->type & ARM_SPE_TLB_MISS)
+			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
+		else
+			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
+	}
+
+	return data_src.val;
+}
+
+static int arm_spe_sample(struct arm_spe_queue *speq)
+{
+	const struct arm_spe_record *record = &speq->decoder->record;
+	struct arm_spe *spe = speq->spe;
+	u64 data_src;
+	int err;
+
+	data_src = arm_spe__synth_data_source(record, spe->midr);
+
+	if (spe->sample_flc) {
+		if (record->type & ARM_SPE_L1D_MISS) {
+			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
+							data_src);
+			if (err)
+				return err;
+		}
+
+		if (record->type & ARM_SPE_L1D_ACCESS) {
+			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
+							data_src);
+			if (err)
+				return err;
+		}
+	}
+
+	if (spe->sample_llc) {
+		if (record->type & ARM_SPE_LLC_MISS) {
+			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
+							data_src);
+			if (err)
+				return err;
+		}
+
+		if (record->type & ARM_SPE_LLC_ACCESS) {
+			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
+							data_src);
+			if (err)
+				return err;
+		}
+	}
+
+	if (spe->sample_tlb) {
+		if (record->type & ARM_SPE_TLB_MISS) {
+			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
+							data_src);
+			if (err)
+				return err;
+		}
+
+		if (record->type & ARM_SPE_TLB_ACCESS) {
+			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
+							data_src);
+			if (err)
+				return err;
+		}
+	}
+
+	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
+		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
+		if (err)
+			return err;
+	}
+
+	if (spe->sample_remote_access &&
+	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
+		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
+						data_src);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * When data_src is zero it means the record is not a memory operation,
+	 * skip to synthesize memory sample for this case.
+	 */
+	if (spe->sample_memory && data_src) {
+		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
+		if (err)
+			return err;
+	}
+
+	if (spe->sample_instructions) {
+		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
+{
+	struct arm_spe *spe = speq->spe;
+	struct arm_spe_record *record;
+	int ret;
+
+	if (!spe->kernel_start)
+		spe->kernel_start = machine__kernel_start(spe->machine);
+
+	while (1) {
+		/*
+		 * The usual logic is firstly to decode the packets, and then
+		 * based the record to synthesize sample; but here the flow is
+		 * reversed: it calls arm_spe_sample() for synthesizing samples
+		 * prior to arm_spe_decode().
+		 *
+		 * Two reasons for this code logic:
+		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
+		 * has decoded trace data and generated a record, but the record
+		 * is left to generate sample until run to here, so it's correct
+		 * to synthesize sample for the left record.
+		 * 2. After decoding trace data, it needs to compare the record
+		 * timestamp with the coming perf event, if the record timestamp
+		 * is later than the perf event, it needs bail out and pushs the
+		 * record into auxtrace heap, thus the record can be deferred to
+		 * synthesize sample until run to here at the next time; so this
+		 * can correlate samples between Arm SPE trace data and other
+		 * perf events with correct time ordering.
+		 */
+
+		/*
+		 * Update pid/tid info.
+		 */
+		record = &speq->decoder->record;
+		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
+			ret = arm_spe_set_tid(speq, record->context_id);
+			if (ret)
+				return ret;
+
+			spe->use_ctx_pkt_for_pid = true;
+		}
+
+		ret = arm_spe_sample(speq);
+		if (ret)
+			return ret;
+
+		ret = arm_spe_decode(speq->decoder);
+		if (!ret) {
+			pr_debug("No data or all data has been processed.\n");
+			return 1;
+		}
+
+		/*
+		 * Error is detected when decode SPE trace data, continue to
+		 * the next trace data and find out more records.
+		 */
+		if (ret < 0)
+			continue;
+
+		record = &speq->decoder->record;
+
+		/* Update timestamp for the last record */
+		if (record->timestamp > speq->timestamp)
+			speq->timestamp = record->timestamp;
+
+		/*
+		 * If the timestamp of the queue is later than timestamp of the
+		 * coming perf event, bail out so can allow the perf event to
+		 * be processed ahead.
+		 */
+		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
+			*timestamp = speq->timestamp;
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+static int arm_spe__setup_queue(struct arm_spe *spe,
+			       struct auxtrace_queue *queue,
+			       unsigned int queue_nr)
+{
+	struct arm_spe_queue *speq = queue->priv;
+	struct arm_spe_record *record;
+
+	if (list_empty(&queue->head) || speq)
+		return 0;
+
+	speq = arm_spe__alloc_queue(spe, queue_nr);
+
+	if (!speq)
+		return -ENOMEM;
+
+	queue->priv = speq;
+
+	if (queue->cpu != -1)
+		speq->cpu = queue->cpu;
+
+	if (!speq->on_heap) {
+		int ret;
+
+		if (spe->timeless_decoding)
+			return 0;
+
+retry:
+		ret = arm_spe_decode(speq->decoder);
+
+		if (!ret)
+			return 0;
+
+		if (ret < 0)
+			goto retry;
+
+		record = &speq->decoder->record;
+
+		speq->timestamp = record->timestamp;
+		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
+		if (ret)
+			return ret;
+		speq->on_heap = true;
+	}
+
+	return 0;
+}
+
+static int arm_spe__setup_queues(struct arm_spe *spe)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < spe->queues.nr_queues; i++) {
+		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int arm_spe__update_queues(struct arm_spe *spe)
+{
+	if (spe->queues.new_data) {
+		spe->queues.new_data = false;
+		return arm_spe__setup_queues(spe);
+	}
+
+	return 0;
+}
+
+static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
+{
+	struct evsel *evsel;
+	struct evlist *evlist = spe->session->evlist;
+	bool timeless_decoding = true;
+
+	/*
+	 * Circle through the list of event and complain if we find one
+	 * with the time bit set.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+			timeless_decoding = false;
+	}
+
+	return timeless_decoding;
+}
+
+static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
+{
+	unsigned int queue_nr;
+	u64 ts;
+	int ret;
+
+	while (1) {
+		struct auxtrace_queue *queue;
+		struct arm_spe_queue *speq;
+
+		if (!spe->heap.heap_cnt)
+			return 0;
+
+		if (spe->heap.heap_array[0].ordinal >= timestamp)
+			return 0;
+
+		queue_nr = spe->heap.heap_array[0].queue_nr;
+		queue = &spe->queues.queue_array[queue_nr];
+		speq = queue->priv;
+
+		auxtrace_heap__pop(&spe->heap);
+
+		if (spe->heap.heap_cnt) {
+			ts = spe->heap.heap_array[0].ordinal + 1;
+			if (ts > timestamp)
+				ts = timestamp;
+		} else {
+			ts = timestamp;
+		}
+
+		/*
+		 * A previous context-switch event has set pid/tid in the machine's context, so
+		 * here we need to update the pid/tid in the thread and SPE queue.
+		 */
+		if (!spe->use_ctx_pkt_for_pid)
+			arm_spe_set_pid_tid_cpu(spe, queue);
+
+		ret = arm_spe_run_decoder(speq, &ts);
+		if (ret < 0) {
+			auxtrace_heap__add(&spe->heap, queue_nr, ts);
+			return ret;
+		}
+
+		if (!ret) {
+			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		} else {
+			speq->on_heap = false;
+		}
+	}
+
+	return 0;
+}
+
+static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
+					    u64 time_)
+{
+	struct auxtrace_queues *queues = &spe->queues;
+	unsigned int i;
+	u64 ts = 0;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
+		struct arm_spe_queue *speq = queue->priv;
+
+		if (speq && (tid == -1 || speq->tid == tid)) {
+			speq->time = time_;
+			arm_spe_set_pid_tid_cpu(spe, queue);
+			arm_spe_run_decoder(speq, &ts);
+		}
+	}
+	return 0;
+}
+
+static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
+				  struct perf_sample *sample)
+{
+	pid_t pid, tid;
+	int cpu;
+
+	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
+		return 0;
+
+	pid = event->context_switch.next_prev_pid;
+	tid = event->context_switch.next_prev_tid;
+	cpu = sample->cpu;
+
+	if (tid == -1)
+		pr_warning("context_switch event has no tid\n");
+
+	return machine__set_current_tid(spe->machine, cpu, pid, tid);
+}
+
+static int arm_spe_process_event(struct perf_session *session,
+				 union perf_event *event,
+				 struct perf_sample *sample,
+				 struct perf_tool *tool)
+{
+	int err = 0;
+	u64 timestamp;
+	struct arm_spe *spe = container_of(session->auxtrace,
+			struct arm_spe, auxtrace);
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("SPE trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (sample->time && (sample->time != (u64) -1))
+		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
+	else
+		timestamp = 0;
+
+	if (timestamp || spe->timeless_decoding) {
+		err = arm_spe__update_queues(spe);
+		if (err)
+			return err;
+	}
+
+	if (spe->timeless_decoding) {
+		if (event->header.type == PERF_RECORD_EXIT) {
+			err = arm_spe_process_timeless_queues(spe,
+					event->fork.tid,
+					sample->time);
+		}
+	} else if (timestamp) {
+		err = arm_spe_process_queues(spe, timestamp);
+		if (err)
+			return err;
+
+		if (!spe->use_ctx_pkt_for_pid &&
+		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
+		    event->header.type == PERF_RECORD_SWITCH))
+			err = arm_spe_context_switch(spe, event, sample);
+	}
+
+	return err;
+}
+
+static int arm_spe_process_auxtrace_event(struct perf_session *session,
+					  union perf_event *event,
+					  struct perf_tool *tool __maybe_unused)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+					     auxtrace);
+
+	if (!spe->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t data_offset;
+		int fd = perf_data__fd(session->data);
+		int err;
+
+		if (perf_data__is_pipe(session->data)) {
+			data_offset = 0;
+		} else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
+
+		err = auxtrace_queues__add_event(&spe->queues, session, event,
+				data_offset, &buffer);
+		if (err)
+			return err;
+
+		/* Dump here now we have copied a piped trace out of the pipe */
+		if (dump_trace) {
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				arm_spe_dump_event(spe, buffer->data,
+						buffer->size);
+				auxtrace_buffer__put_data(buffer);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int arm_spe_flush(struct perf_session *session __maybe_unused,
+			 struct perf_tool *tool __maybe_unused)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+			auxtrace);
+	int ret;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	ret = arm_spe__update_queues(spe);
+	if (ret < 0)
+		return ret;
+
+	if (spe->timeless_decoding)
+		return arm_spe_process_timeless_queues(spe, -1,
+				MAX_TIMESTAMP - 1);
+
+	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
+	if (ret)
+		return ret;
+
+	if (!spe->use_ctx_pkt_for_pid)
+		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
+			    "Matching of TIDs to SPE events could be inaccurate.\n");
+
+	return 0;
+}
+
+static void arm_spe_free_queue(void *priv)
+{
+	struct arm_spe_queue *speq = priv;
+
+	if (!speq)
+		return;
+	thread__zput(speq->thread);
+	arm_spe_decoder_free(speq->decoder);
+	zfree(&speq->event_buf);
+	free(speq);
+}
+
+static void arm_spe_free_events(struct perf_session *session)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+					     auxtrace);
+	struct auxtrace_queues *queues = &spe->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		arm_spe_free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+	auxtrace_queues__free(queues);
+}
+
+static void arm_spe_free(struct perf_session *session)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+					     auxtrace);
+
+	auxtrace_heap__free(&spe->heap);
+	arm_spe_free_events(session);
+	session->auxtrace = NULL;
+	free(spe);
+}
+
+static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
+				      struct evsel *evsel)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
+
+	return evsel->core.attr.type == spe->pmu_type;
+}
+
+static const char * const arm_spe_info_fmts[] = {
+	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
+};
+
+static void arm_spe_print_info(__u64 *arr)
+{
+	if (!dump_trace)
+		return;
+
+	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
+}
+
+struct arm_spe_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int arm_spe_event_synth(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine __maybe_unused)
+{
+	struct arm_spe_synth *arm_spe_synth =
+		      container_of(tool, struct arm_spe_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(arm_spe_synth->session,
+						 event, NULL);
+}
+
+static int arm_spe_synth_event(struct perf_session *session,
+			       struct perf_event_attr *attr, u64 id)
+{
+	struct arm_spe_synth arm_spe_synth;
+
+	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
+	arm_spe_synth.session = session;
+
+	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
+					   &id, arm_spe_event_synth);
+}
+
+static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
+				    const char *name)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.id && evsel->core.id[0] == id) {
+			if (evsel->name)
+				zfree(&evsel->name);
+			evsel->name = strdup(name);
+			break;
+		}
+	}
+}
+
+static int
+arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
+{
+	struct evlist *evlist = session->evlist;
+	struct evsel *evsel;
+	struct perf_event_attr attr;
+	bool found = false;
+	u64 id;
+	int err;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type == spe->pmu_type) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		pr_debug("No selected events with SPE trace data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->core.attr.sample_type &
+				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
+	if (spe->timeless_decoding)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	else
+		attr.sample_type |= PERF_SAMPLE_TIME;
+
+	spe->sample_type = attr.sample_type;
+
+	attr.exclude_user = evsel->core.attr.exclude_user;
+	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+	attr.exclude_hv = evsel->core.attr.exclude_hv;
+	attr.exclude_host = evsel->core.attr.exclude_host;
+	attr.exclude_guest = evsel->core.attr.exclude_guest;
+	attr.sample_id_all = evsel->core.attr.sample_id_all;
+	attr.read_format = evsel->core.attr.read_format;
+
+	/* create new id val to be a fixed offset from evsel id */
+	id = evsel->core.id[0] + 1000000000;
+
+	if (!id)
+		id = 1;
+
+	if (spe->synth_opts.flc) {
+		spe->sample_flc = true;
+
+		/* Level 1 data cache miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->l1d_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "l1d-miss");
+		id += 1;
+
+		/* Level 1 data cache access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->l1d_access_id = id;
+		arm_spe_set_event_name(evlist, id, "l1d-access");
+		id += 1;
+	}
+
+	if (spe->synth_opts.llc) {
+		spe->sample_llc = true;
+
+		/* Last level cache miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->llc_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "llc-miss");
+		id += 1;
+
+		/* Last level cache access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->llc_access_id = id;
+		arm_spe_set_event_name(evlist, id, "llc-access");
+		id += 1;
+	}
+
+	if (spe->synth_opts.tlb) {
+		spe->sample_tlb = true;
+
+		/* TLB miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->tlb_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "tlb-miss");
+		id += 1;
+
+		/* TLB access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->tlb_access_id = id;
+		arm_spe_set_event_name(evlist, id, "tlb-access");
+		id += 1;
+	}
+
+	if (spe->synth_opts.branches) {
+		spe->sample_branch = true;
+
+		/* Branch miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->branch_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "branch-miss");
+		id += 1;
+	}
+
+	if (spe->synth_opts.remote_access) {
+		spe->sample_remote_access = true;
+
+		/* Remote access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->remote_access_id = id;
+		arm_spe_set_event_name(evlist, id, "remote-access");
+		id += 1;
+	}
+
+	if (spe->synth_opts.mem) {
+		spe->sample_memory = true;
+
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->memory_id = id;
+		arm_spe_set_event_name(evlist, id, "memory");
+		id += 1;
+	}
+
+	if (spe->synth_opts.instructions) {
+		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
+			goto synth_instructions_out;
+		}
+		if (spe->synth_opts.period > 1)
+			pr_warning("Arm SPE has a hardware-based sample period.\n"
+				   "Additional instruction events will be discarded by --itrace\n");
+
+		spe->sample_instructions = true;
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		attr.sample_period = spe->synth_opts.period;
+		spe->instructions_sample_period = attr.sample_period;
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->instructions_id = id;
+		arm_spe_set_event_name(evlist, id, "instructions");
+	}
+synth_instructions_out:
+
+	return 0;
+}
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
+	struct perf_record_time_conv *tc = &session->time_conv;
+	const char *cpuid = perf_env__cpuid(session->evlist->env);
+	u64 midr = strtol(cpuid, NULL, 16);
+	struct arm_spe *spe;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
+					min_sz)
+		return -EINVAL;
+
+	spe = zalloc(sizeof(struct arm_spe));
+	if (!spe)
+		return -ENOMEM;
+
+	err = auxtrace_queues__init(&spe->queues);
+	if (err)
+		goto err_free;
+
+	spe->session = session;
+	spe->machine = &session->machines.host; /* No kvm support */
+	spe->auxtrace_type = auxtrace_info->type;
+	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+	spe->midr = midr;
+
+	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
+
+	/*
+	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
+	 * and the parameters for hardware clock are stored in the session
+	 * context.  Passes these parameters to the struct perf_tsc_conversion
+	 * in "spe->tc", which is used for later conversion between clock
+	 * counter and timestamp.
+	 *
+	 * For backward compatibility, copies the fields starting from
+	 * "time_cycles" only if they are contained in the event.
+	 */
+	spe->tc.time_shift = tc->time_shift;
+	spe->tc.time_mult = tc->time_mult;
+	spe->tc.time_zero = tc->time_zero;
+
+	if (event_contains(*tc, time_cycles)) {
+		spe->tc.time_cycles = tc->time_cycles;
+		spe->tc.time_mask = tc->time_mask;
+		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
+		spe->tc.cap_user_time_short = tc->cap_user_time_short;
+	}
+
+	spe->auxtrace.process_event = arm_spe_process_event;
+	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
+	spe->auxtrace.flush_events = arm_spe_flush;
+	spe->auxtrace.free_events = arm_spe_free_events;
+	spe->auxtrace.free = arm_spe_free;
+	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
+	session->auxtrace = &spe->auxtrace;
+
+	arm_spe_print_info(&auxtrace_info->priv[0]);
+
+	if (dump_trace)
+		return 0;
+
+	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+		spe->synth_opts = *session->itrace_synth_opts;
+	else
+		itrace_synth_opts__set_default(&spe->synth_opts, false);
+
+	err = arm_spe_synth_events(spe, session);
+	if (err)
+		goto err_free_queues;
+
+	err = auxtrace_queues__process_index(&spe->queues, session);
+	if (err)
+		goto err_free_queues;
+
+	if (spe->queues.populated)
+		spe->data_queued = true;
+
+	return 0;
+
+err_free_queues:
+	auxtrace_queues__free(&spe->queues);
+	session->auxtrace = NULL;
+err_free:
+	free(spe);
+	return err;
+}
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
new file mode 100644
index 000000000..98d323578
--- /dev/null
+++ b/tools/perf/util/arm-spe.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__PERF_ARM_SPE_H__
+#define INCLUDE__PERF_ARM_SPE_H__
+
+#define ARM_SPE_PMU_NAME "arm_spe_"
+
+enum {
+	ARM_SPE_PMU_TYPE,
+	ARM_SPE_PER_CPU_MMAPS,
+	ARM_SPE_AUXTRACE_PRIV_MAX,
+};
+
+#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *arm_spe_recording_init(int *err,
+					       struct perf_pmu *arm_spe_pmu);
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session);
+
+struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+#endif
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
new file mode 100644
index 000000000..4940be4a0
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arm64-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "event.h"
+#include "perf_regs.h" // SMPL_REG_MASK
+#include "unwind.h"
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+struct entries {
+	u64 stack[2];
+	size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+	return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+		&& sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+	struct entries *entries = arg;
+
+	entries->stack[entries->length++] = entry->ip;
+	return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx)
+{
+	int ret;
+	struct entries entries = {};
+	struct regs_dump old_regs = sample->user_regs;
+
+	if (!get_leaf_frame_caller_enabled(sample))
+		return 0;
+
+	/*
+	 * If PC and SP are not recorded, get the value of PC from the stack
+	 * and set its mask. SP is not used when doing the unwinding but it
+	 * still needs to be set to prevent failures.
+	 */
+
+	if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+		sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+		sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+	}
+
+	if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+		sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+		sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+	}
+
+	ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
+	sample->user_regs = old_regs;
+
+	if (ret || entries.length != 2)
+		return ret;
+
+	return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
new file mode 100644
index 000000000..32af9ce94
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+#include "event.h"
+#include "thread.h"
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
new file mode 100644
index 000000000..4445c5c2d
--- /dev/null
+++ b/tools/perf/util/auxtrace.c
@@ -0,0 +1,2865 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * auxtrace.c: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+
+#include "config.h"
+#include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
+#include "evsel_config.h"
+#include "symbol.h"
+#include "util/perf_api_probe.h"
+#include "util/synthetic-events.h"
+#include "thread_map.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+
+#include <linux/hash.h>
+
+#include "event.h"
+#include "record.h"
+#include "session.h"
+#include "debug.h"
+#include <subcmd/parse-options.h>
+
+#include "cs-etm.h"
+#include "intel-pt.h"
+#include "intel-bts.h"
+#include "arm-spe.h"
+#include "hisi-ptt.h"
+#include "s390-cpumsf.h"
+#include "util/mmap.h"
+
+#include <linux/ctype.h>
+#include "symbol/kallsyms.h"
+#include <internal/lib.h>
+
+/*
+ * Make a group from 'leader' to 'last', requiring that the events were not
+ * already grouped to a different leader.
+ */
+static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct evsel *last)
+{
+	struct evsel *evsel;
+	bool grp;
+
+	if (!evsel__is_group_leader(leader))
+		return -EINVAL;
+
+	grp = false;
+	evlist__for_each_entry(evlist, evsel) {
+		if (grp) {
+			if (!(evsel__leader(evsel) == leader ||
+			     (evsel__leader(evsel) == evsel &&
+			      evsel->core.nr_members <= 1)))
+				return -EINVAL;
+		} else if (evsel == leader) {
+			grp = true;
+		}
+		if (evsel == last)
+			break;
+	}
+
+	grp = false;
+	evlist__for_each_entry(evlist, evsel) {
+		if (grp) {
+			if (!evsel__has_leader(evsel, leader)) {
+				evsel__set_leader(evsel, leader);
+				if (leader->core.nr_members < 1)
+					leader->core.nr_members = 1;
+				leader->core.nr_members += 1;
+			}
+		} else if (evsel == leader) {
+			grp = true;
+		}
+		if (evsel == last)
+			break;
+	}
+
+	return 0;
+}
+
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+	return !session->itrace_synth_opts ||
+	       session->itrace_synth_opts->dont_decode;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd)
+{
+	struct perf_event_mmap_page *pc = userpg;
+
+	WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
+
+	mm->userpg = userpg;
+	mm->mask = mp->mask;
+	mm->len = mp->len;
+	mm->prev = 0;
+	mm->idx = mp->idx;
+	mm->tid = mp->tid;
+	mm->cpu = mp->cpu.cpu;
+
+	if (!mp->len || !mp->mmap_needed) {
+		mm->base = NULL;
+		return 0;
+	}
+
+	pc->aux_offset = mp->offset;
+	pc->aux_size = mp->len;
+
+	mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
+	if (mm->base == MAP_FAILED) {
+		pr_debug2("failed to mmap AUX area\n");
+		mm->base = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
+{
+	if (mm->base) {
+		munmap(mm->base, mm->len);
+		mm->base = NULL;
+	}
+}
+
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite)
+{
+	if (auxtrace_pages) {
+		mp->offset = auxtrace_offset;
+		mp->len = auxtrace_pages * (size_t)page_size;
+		mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
+		mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
+		pr_debug2("AUX area mmap length %zu\n", mp->len);
+	} else {
+		mp->len = 0;
+	}
+}
+
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct evlist *evlist,
+				   struct evsel *evsel, int idx)
+{
+	bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+
+	mp->mmap_needed = evsel->needs_auxtrace_mmap;
+
+	if (!mp->mmap_needed)
+		return;
+
+	mp->idx = idx;
+
+	if (per_cpu) {
+		mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
+		if (evlist->core.threads)
+			mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
+		else
+			mp->tid = -1;
+	} else {
+		mp->cpu.cpu = -1;
+		mp->tid = perf_thread_map__pid(evlist->core.threads, idx);
+	}
+}
+
+#define AUXTRACE_INIT_NR_QUEUES	32
+
+static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
+{
+	struct auxtrace_queue *queue_array;
+	unsigned int max_nr_queues, i;
+
+	max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
+	if (nr_queues > max_nr_queues)
+		return NULL;
+
+	queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
+	if (!queue_array)
+		return NULL;
+
+	for (i = 0; i < nr_queues; i++) {
+		INIT_LIST_HEAD(&queue_array[i].head);
+		queue_array[i].priv = NULL;
+	}
+
+	return queue_array;
+}
+
+int auxtrace_queues__init(struct auxtrace_queues *queues)
+{
+	queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
+	queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
+	if (!queues->queue_array)
+		return -ENOMEM;
+	return 0;
+}
+
+static int auxtrace_queues__grow(struct auxtrace_queues *queues,
+				 unsigned int new_nr_queues)
+{
+	unsigned int nr_queues = queues->nr_queues;
+	struct auxtrace_queue *queue_array;
+	unsigned int i;
+
+	if (!nr_queues)
+		nr_queues = AUXTRACE_INIT_NR_QUEUES;
+
+	while (nr_queues && nr_queues < new_nr_queues)
+		nr_queues <<= 1;
+
+	if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
+		return -EINVAL;
+
+	queue_array = auxtrace_alloc_queue_array(nr_queues);
+	if (!queue_array)
+		return -ENOMEM;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		list_splice_tail(&queues->queue_array[i].head,
+				 &queue_array[i].head);
+		queue_array[i].tid = queues->queue_array[i].tid;
+		queue_array[i].cpu = queues->queue_array[i].cpu;
+		queue_array[i].set = queues->queue_array[i].set;
+		queue_array[i].priv = queues->queue_array[i].priv;
+	}
+
+	queues->nr_queues = nr_queues;
+	queues->queue_array = queue_array;
+
+	return 0;
+}
+
+static void *auxtrace_copy_data(u64 size, struct perf_session *session)
+{
+	int fd = perf_data__fd(session->data);
+	void *p;
+	ssize_t ret;
+
+	if (size > SSIZE_MAX)
+		return NULL;
+
+	p = malloc(size);
+	if (!p)
+		return NULL;
+
+	ret = readn(fd, p, size);
+	if (ret != (ssize_t)size) {
+		free(p);
+		return NULL;
+	}
+
+	return p;
+}
+
+static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
+					 unsigned int idx,
+					 struct auxtrace_buffer *buffer)
+{
+	struct auxtrace_queue *queue;
+	int err;
+
+	if (idx >= queues->nr_queues) {
+		err = auxtrace_queues__grow(queues, idx + 1);
+		if (err)
+			return err;
+	}
+
+	queue = &queues->queue_array[idx];
+
+	if (!queue->set) {
+		queue->set = true;
+		queue->tid = buffer->tid;
+		queue->cpu = buffer->cpu.cpu;
+	}
+
+	buffer->buffer_nr = queues->next_buffer_nr++;
+
+	list_add_tail(&buffer->list, &queue->head);
+
+	queues->new_data = true;
+	queues->populated = true;
+
+	return 0;
+}
+
+/* Limit buffers to 32MiB on 32-bit */
+#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
+
+static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
+					 unsigned int idx,
+					 struct auxtrace_buffer *buffer)
+{
+	u64 sz = buffer->size;
+	bool consecutive = false;
+	struct auxtrace_buffer *b;
+	int err;
+
+	while (sz > BUFFER_LIMIT_FOR_32_BIT) {
+		b = memdup(buffer, sizeof(struct auxtrace_buffer));
+		if (!b)
+			return -ENOMEM;
+		b->size = BUFFER_LIMIT_FOR_32_BIT;
+		b->consecutive = consecutive;
+		err = auxtrace_queues__queue_buffer(queues, idx, b);
+		if (err) {
+			auxtrace_buffer__free(b);
+			return err;
+		}
+		buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
+		sz -= BUFFER_LIMIT_FOR_32_BIT;
+		consecutive = true;
+	}
+
+	buffer->size = sz;
+	buffer->consecutive = consecutive;
+
+	return 0;
+}
+
+static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu)
+{
+	unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+	return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap);
+}
+
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+				       struct perf_session *session,
+				       unsigned int idx,
+				       struct auxtrace_buffer *buffer,
+				       struct auxtrace_buffer **buffer_ptr)
+{
+	int err = -ENOMEM;
+
+	if (filter_cpu(session, buffer->cpu))
+		return 0;
+
+	buffer = memdup(buffer, sizeof(*buffer));
+	if (!buffer)
+		return -ENOMEM;
+
+	if (session->one_mmap) {
+		buffer->data = buffer->data_offset - session->one_mmap_offset +
+			       session->one_mmap_addr;
+	} else if (perf_data__is_pipe(session->data)) {
+		buffer->data = auxtrace_copy_data(buffer->size, session);
+		if (!buffer->data)
+			goto out_free;
+		buffer->data_needs_freeing = true;
+	} else if (BITS_PER_LONG == 32 &&
+		   buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
+		err = auxtrace_queues__split_buffer(queues, idx, buffer);
+		if (err)
+			goto out_free;
+	}
+
+	err = auxtrace_queues__queue_buffer(queues, idx, buffer);
+	if (err)
+		goto out_free;
+
+	/* FIXME: Doesn't work for split buffer */
+	if (buffer_ptr)
+		*buffer_ptr = buffer;
+
+	return 0;
+
+out_free:
+	auxtrace_buffer__free(buffer);
+	return err;
+}
+
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+			       struct perf_session *session,
+			       union perf_event *event, off_t data_offset,
+			       struct auxtrace_buffer **buffer_ptr)
+{
+	struct auxtrace_buffer buffer = {
+		.pid = -1,
+		.tid = event->auxtrace.tid,
+		.cpu = { event->auxtrace.cpu },
+		.data_offset = data_offset,
+		.offset = event->auxtrace.offset,
+		.reference = event->auxtrace.reference,
+		.size = event->auxtrace.size,
+	};
+	unsigned int idx = event->auxtrace.idx;
+
+	return auxtrace_queues__add_buffer(queues, session, idx, &buffer,
+					   buffer_ptr);
+}
+
+static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
+					      struct perf_session *session,
+					      off_t file_offset, size_t sz)
+{
+	union perf_event *event;
+	int err;
+	char buf[PERF_SAMPLE_MAX_SIZE];
+
+	err = perf_session__peek_event(session, file_offset, buf,
+				       PERF_SAMPLE_MAX_SIZE, &event, NULL);
+	if (err)
+		return err;
+
+	if (event->header.type == PERF_RECORD_AUXTRACE) {
+		if (event->header.size < sizeof(struct perf_record_auxtrace) ||
+		    event->header.size != sz) {
+			err = -EINVAL;
+			goto out;
+		}
+		file_offset += event->header.size;
+		err = auxtrace_queues__add_event(queues, session, event,
+						 file_offset, NULL);
+	}
+out:
+	return err;
+}
+
+void auxtrace_queues__free(struct auxtrace_queues *queues)
+{
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		while (!list_empty(&queues->queue_array[i].head)) {
+			struct auxtrace_buffer *buffer;
+
+			buffer = list_entry(queues->queue_array[i].head.next,
+					    struct auxtrace_buffer, list);
+			list_del_init(&buffer->list);
+			auxtrace_buffer__free(buffer);
+		}
+	}
+
+	zfree(&queues->queue_array);
+	queues->nr_queues = 0;
+}
+
+static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
+			     unsigned int pos, unsigned int queue_nr,
+			     u64 ordinal)
+{
+	unsigned int parent;
+
+	while (pos) {
+		parent = (pos - 1) >> 1;
+		if (heap_array[parent].ordinal <= ordinal)
+			break;
+		heap_array[pos] = heap_array[parent];
+		pos = parent;
+	}
+	heap_array[pos].queue_nr = queue_nr;
+	heap_array[pos].ordinal = ordinal;
+}
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+		       u64 ordinal)
+{
+	struct auxtrace_heap_item *heap_array;
+
+	if (queue_nr >= heap->heap_sz) {
+		unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
+
+		while (heap_sz <= queue_nr)
+			heap_sz <<= 1;
+		heap_array = realloc(heap->heap_array,
+				     heap_sz * sizeof(struct auxtrace_heap_item));
+		if (!heap_array)
+			return -ENOMEM;
+		heap->heap_array = heap_array;
+		heap->heap_sz = heap_sz;
+	}
+
+	auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
+
+	return 0;
+}
+
+void auxtrace_heap__free(struct auxtrace_heap *heap)
+{
+	zfree(&heap->heap_array);
+	heap->heap_cnt = 0;
+	heap->heap_sz = 0;
+}
+
+void auxtrace_heap__pop(struct auxtrace_heap *heap)
+{
+	unsigned int pos, last, heap_cnt = heap->heap_cnt;
+	struct auxtrace_heap_item *heap_array;
+
+	if (!heap_cnt)
+		return;
+
+	heap->heap_cnt -= 1;
+
+	heap_array = heap->heap_array;
+
+	pos = 0;
+	while (1) {
+		unsigned int left, right;
+
+		left = (pos << 1) + 1;
+		if (left >= heap_cnt)
+			break;
+		right = left + 1;
+		if (right >= heap_cnt) {
+			heap_array[pos] = heap_array[left];
+			return;
+		}
+		if (heap_array[left].ordinal < heap_array[right].ordinal) {
+			heap_array[pos] = heap_array[left];
+			pos = left;
+		} else {
+			heap_array[pos] = heap_array[right];
+			pos = right;
+		}
+	}
+
+	last = heap_cnt - 1;
+	auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
+			 heap_array[last].ordinal);
+}
+
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct evlist *evlist)
+{
+	if (itr)
+		return itr->info_priv_size(itr, evlist);
+	return 0;
+}
+
+static int auxtrace_not_supported(void)
+{
+	pr_err("AUX area tracing is not supported on this architecture\n");
+	return -EINVAL;
+}
+
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+			       struct perf_session *session,
+			       struct perf_record_auxtrace_info *auxtrace_info,
+			       size_t priv_size)
+{
+	if (itr)
+		return itr->info_fill(itr, session, auxtrace_info, priv_size);
+	return auxtrace_not_supported();
+}
+
+void auxtrace_record__free(struct auxtrace_record *itr)
+{
+	if (itr)
+		itr->free(itr);
+}
+
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
+{
+	if (itr && itr->snapshot_start)
+		return itr->snapshot_start(itr);
+	return 0;
+}
+
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr, bool on_exit)
+{
+	if (!on_exit && itr && itr->snapshot_finish)
+		return itr->snapshot_finish(itr);
+	return 0;
+}
+
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+				   struct auxtrace_mmap *mm,
+				   unsigned char *data, u64 *head, u64 *old)
+{
+	if (itr && itr->find_snapshot)
+		return itr->find_snapshot(itr, idx, mm, data, head, old);
+	return 0;
+}
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+			     struct evlist *evlist,
+			     struct record_opts *opts)
+{
+	if (itr) {
+		itr->evlist = evlist;
+		return itr->recording_options(itr, evlist, opts);
+	}
+	return 0;
+}
+
+u64 auxtrace_record__reference(struct auxtrace_record *itr)
+{
+	if (itr)
+		return itr->reference(itr);
+	return 0;
+}
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+				    struct record_opts *opts, const char *str)
+{
+	if (!str)
+		return 0;
+
+	/* PMU-agnostic options */
+	switch (*str) {
+	case 'e':
+		opts->auxtrace_snapshot_on_exit = true;
+		str++;
+		break;
+	default:
+		break;
+	}
+
+	if (itr && itr->parse_snapshot_options)
+		return itr->parse_snapshot_options(itr, opts, str);
+
+	pr_err("No AUX area tracing to snapshot\n");
+	return -EINVAL;
+}
+
+static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
+{
+	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+
+	if (per_cpu_mmaps) {
+		struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
+		int cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
+
+		if (cpu_map_idx == -1)
+			return -EINVAL;
+		return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
+	}
+
+	return perf_evsel__enable_thread(&evsel->core, idx);
+}
+
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct evsel *evsel;
+
+	if (!itr->evlist || !itr->pmu)
+		return -EINVAL;
+
+	evlist__for_each_entry(itr->evlist, evsel) {
+		if (evsel->core.attr.type == itr->pmu->type) {
+			if (evsel->disabled)
+				return 0;
+			return evlist__enable_event_idx(itr->evlist, evsel, idx);
+		}
+	}
+	return -EINVAL;
+}
+
+/*
+ * Event record size is 16-bit which results in a maximum size of about 64KiB.
+ * Allow about 4KiB for the rest of the sample record, to give a maximum
+ * AUX area sample size of 60KiB.
+ */
+#define MAX_AUX_SAMPLE_SIZE (60 * 1024)
+
+/* Arbitrary default size if no other default provided */
+#define DEFAULT_AUX_SAMPLE_SIZE (4 * 1024)
+
+static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
+					     struct record_opts *opts)
+{
+	struct evsel *evsel;
+	bool has_aux_leader = false;
+	u32 sz;
+
+	evlist__for_each_entry(evlist, evsel) {
+		sz = evsel->core.attr.aux_sample_size;
+		if (evsel__is_group_leader(evsel)) {
+			has_aux_leader = evsel__is_aux_event(evsel);
+			if (sz) {
+				if (has_aux_leader)
+					pr_err("Cannot add AUX area sampling to an AUX area event\n");
+				else
+					pr_err("Cannot add AUX area sampling to a group leader\n");
+				return -EINVAL;
+			}
+		}
+		if (sz > MAX_AUX_SAMPLE_SIZE) {
+			pr_err("AUX area sample size %u too big, max. %d\n",
+			       sz, MAX_AUX_SAMPLE_SIZE);
+			return -EINVAL;
+		}
+		if (sz) {
+			if (!has_aux_leader) {
+				pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n");
+				return -EINVAL;
+			}
+			evsel__set_sample_bit(evsel, AUX);
+			opts->auxtrace_sample_mode = true;
+		} else {
+			evsel__reset_sample_bit(evsel, AUX);
+		}
+	}
+
+	if (!opts->auxtrace_sample_mode) {
+		pr_err("AUX area sampling requires an AUX area event group leader plus other events to which to add samples\n");
+		return -EINVAL;
+	}
+
+	if (!perf_can_aux_sample()) {
+		pr_err("AUX area sampling is not supported by kernel\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int auxtrace_parse_sample_options(struct auxtrace_record *itr,
+				  struct evlist *evlist,
+				  struct record_opts *opts, const char *str)
+{
+	struct evsel_config_term *term;
+	struct evsel *aux_evsel;
+	bool has_aux_sample_size = false;
+	bool has_aux_leader = false;
+	struct evsel *evsel;
+	char *endptr;
+	unsigned long sz;
+
+	if (!str)
+		goto no_opt;
+
+	if (!itr) {
+		pr_err("No AUX area event to sample\n");
+		return -EINVAL;
+	}
+
+	sz = strtoul(str, &endptr, 0);
+	if (*endptr || sz > UINT_MAX) {
+		pr_err("Bad AUX area sampling option: '%s'\n", str);
+		return -EINVAL;
+	}
+
+	if (!sz)
+		sz = itr->default_aux_sample_size;
+
+	if (!sz)
+		sz = DEFAULT_AUX_SAMPLE_SIZE;
+
+	/* Set aux_sample_size based on --aux-sample option */
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_group_leader(evsel)) {
+			has_aux_leader = evsel__is_aux_event(evsel);
+		} else if (has_aux_leader) {
+			evsel->core.attr.aux_sample_size = sz;
+		}
+	}
+no_opt:
+	aux_evsel = NULL;
+	/* Override with aux_sample_size from config term */
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_aux_event(evsel))
+			aux_evsel = evsel;
+		term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
+		if (term) {
+			has_aux_sample_size = true;
+			evsel->core.attr.aux_sample_size = term->val.aux_sample_size;
+			/* If possible, group with the AUX event */
+			if (aux_evsel && evsel->core.attr.aux_sample_size)
+				evlist__regroup(evlist, aux_evsel, evsel);
+		}
+	}
+
+	if (!str && !has_aux_sample_size)
+		return 0;
+
+	if (!itr) {
+		pr_err("No AUX area event to sample\n");
+		return -EINVAL;
+	}
+
+	return auxtrace_validate_aux_sample_size(evlist, opts);
+}
+
+void auxtrace_regroup_aux_output(struct evlist *evlist)
+{
+	struct evsel *evsel, *aux_evsel = NULL;
+	struct evsel_config_term *term;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_aux_event(evsel))
+			aux_evsel = evsel;
+		term = evsel__get_config_term(evsel, AUX_OUTPUT);
+		/* If possible, group with the AUX event */
+		if (term && aux_evsel)
+			evlist__regroup(evlist, aux_evsel, evsel);
+	}
+}
+
+struct auxtrace_record *__weak
+auxtrace_record__init(struct evlist *evlist __maybe_unused, int *err)
+{
+	*err = 0;
+	return NULL;
+}
+
+static int auxtrace_index__alloc(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+
+	auxtrace_index = malloc(sizeof(struct auxtrace_index));
+	if (!auxtrace_index)
+		return -ENOMEM;
+
+	auxtrace_index->nr = 0;
+	INIT_LIST_HEAD(&auxtrace_index->list);
+
+	list_add_tail(&auxtrace_index->list, head);
+
+	return 0;
+}
+
+void auxtrace_index__free(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index, *n;
+
+	list_for_each_entry_safe(auxtrace_index, n, head, list) {
+		list_del_init(&auxtrace_index->list);
+		free(auxtrace_index);
+	}
+}
+
+static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+	int err;
+
+	if (list_empty(head)) {
+		err = auxtrace_index__alloc(head);
+		if (err)
+			return NULL;
+	}
+
+	auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
+
+	if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
+		err = auxtrace_index__alloc(head);
+		if (err)
+			return NULL;
+		auxtrace_index = list_entry(head->prev, struct auxtrace_index,
+					    list);
+	}
+
+	return auxtrace_index;
+}
+
+int auxtrace_index__auxtrace_event(struct list_head *head,
+				   union perf_event *event, off_t file_offset)
+{
+	struct auxtrace_index *auxtrace_index;
+	size_t nr;
+
+	auxtrace_index = auxtrace_index__last(head);
+	if (!auxtrace_index)
+		return -ENOMEM;
+
+	nr = auxtrace_index->nr;
+	auxtrace_index->entries[nr].file_offset = file_offset;
+	auxtrace_index->entries[nr].sz = event->header.size;
+	auxtrace_index->nr += 1;
+
+	return 0;
+}
+
+static int auxtrace_index__do_write(int fd,
+				    struct auxtrace_index *auxtrace_index)
+{
+	struct auxtrace_index_entry ent;
+	size_t i;
+
+	for (i = 0; i < auxtrace_index->nr; i++) {
+		ent.file_offset = auxtrace_index->entries[i].file_offset;
+		ent.sz = auxtrace_index->entries[i].sz;
+		if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
+			return -errno;
+	}
+	return 0;
+}
+
+int auxtrace_index__write(int fd, struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+	u64 total = 0;
+	int err;
+
+	list_for_each_entry(auxtrace_index, head, list)
+		total += auxtrace_index->nr;
+
+	if (writen(fd, &total, sizeof(total)) != sizeof(total))
+		return -errno;
+
+	list_for_each_entry(auxtrace_index, head, list) {
+		err = auxtrace_index__do_write(fd, auxtrace_index);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int auxtrace_index__process_entry(int fd, struct list_head *head,
+					 bool needs_swap)
+{
+	struct auxtrace_index *auxtrace_index;
+	struct auxtrace_index_entry ent;
+	size_t nr;
+
+	if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
+		return -1;
+
+	auxtrace_index = auxtrace_index__last(head);
+	if (!auxtrace_index)
+		return -1;
+
+	nr = auxtrace_index->nr;
+	if (needs_swap) {
+		auxtrace_index->entries[nr].file_offset =
+						bswap_64(ent.file_offset);
+		auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
+	} else {
+		auxtrace_index->entries[nr].file_offset = ent.file_offset;
+		auxtrace_index->entries[nr].sz = ent.sz;
+	}
+
+	auxtrace_index->nr = nr + 1;
+
+	return 0;
+}
+
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+			    bool needs_swap)
+{
+	struct list_head *head = &session->auxtrace_index;
+	u64 nr;
+
+	if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
+		return -1;
+
+	if (needs_swap)
+		nr = bswap_64(nr);
+
+	if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
+		return -1;
+
+	while (nr--) {
+		int err;
+
+		err = auxtrace_index__process_entry(fd, head, needs_swap);
+		if (err)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
+						struct perf_session *session,
+						struct auxtrace_index_entry *ent)
+{
+	return auxtrace_queues__add_indexed_event(queues, session,
+						  ent->file_offset, ent->sz);
+}
+
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+				   struct perf_session *session)
+{
+	struct auxtrace_index *auxtrace_index;
+	struct auxtrace_index_entry *ent;
+	size_t i;
+	int err;
+
+	if (auxtrace__dont_decode(session))
+		return 0;
+
+	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+		for (i = 0; i < auxtrace_index->nr; i++) {
+			ent = &auxtrace_index->entries[i];
+			err = auxtrace_queues__process_index_entry(queues,
+								   session,
+								   ent);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+					      struct auxtrace_buffer *buffer)
+{
+	if (buffer) {
+		if (list_is_last(&buffer->list, &queue->head))
+			return NULL;
+		return list_entry(buffer->list.next, struct auxtrace_buffer,
+				  list);
+	} else {
+		if (list_empty(&queue->head))
+			return NULL;
+		return list_entry(queue->head.next, struct auxtrace_buffer,
+				  list);
+	}
+}
+
+struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *queues,
+						     struct perf_sample *sample,
+						     struct perf_session *session)
+{
+	struct perf_sample_id *sid;
+	unsigned int idx;
+	u64 id;
+
+	id = sample->id;
+	if (!id)
+		return NULL;
+
+	sid = evlist__id2sid(session->evlist, id);
+	if (!sid)
+		return NULL;
+
+	idx = sid->idx;
+
+	if (idx >= queues->nr_queues)
+		return NULL;
+
+	return &queues->queue_array[idx];
+}
+
+int auxtrace_queues__add_sample(struct auxtrace_queues *queues,
+				struct perf_session *session,
+				struct perf_sample *sample, u64 data_offset,
+				u64 reference)
+{
+	struct auxtrace_buffer buffer = {
+		.pid = -1,
+		.data_offset = data_offset,
+		.reference = reference,
+		.size = sample->aux_sample.size,
+	};
+	struct perf_sample_id *sid;
+	u64 id = sample->id;
+	unsigned int idx;
+
+	if (!id)
+		return -EINVAL;
+
+	sid = evlist__id2sid(session->evlist, id);
+	if (!sid)
+		return -ENOENT;
+
+	idx = sid->idx;
+	buffer.tid = sid->tid;
+	buffer.cpu = sid->cpu;
+
+	return auxtrace_queues__add_buffer(queues, session, idx, &buffer, NULL);
+}
+
+struct queue_data {
+	bool samples;
+	bool events;
+};
+
+static int auxtrace_queue_data_cb(struct perf_session *session,
+				  union perf_event *event, u64 offset,
+				  void *data)
+{
+	struct queue_data *qd = data;
+	struct perf_sample sample;
+	int err;
+
+	if (qd->events && event->header.type == PERF_RECORD_AUXTRACE) {
+		if (event->header.size < sizeof(struct perf_record_auxtrace))
+			return -EINVAL;
+		offset += event->header.size;
+		return session->auxtrace->queue_data(session, NULL, event,
+						     offset);
+	}
+
+	if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE)
+		return 0;
+
+	err = evlist__parse_sample(session->evlist, event, &sample);
+	if (err)
+		return err;
+
+	if (!sample.aux_sample.size)
+		return 0;
+
+	offset += sample.aux_sample.data - (void *)event;
+
+	return session->auxtrace->queue_data(session, &sample, NULL, offset);
+}
+
+int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
+{
+	struct queue_data qd = {
+		.samples = samples,
+		.events = events,
+	};
+
+	if (auxtrace__dont_decode(session))
+		return 0;
+
+	if (perf_data__is_pipe(session->data))
+		return 0;
+
+	if (!session->auxtrace || !session->auxtrace->queue_data)
+		return -EINVAL;
+
+	return perf_session__peek_events(session, session->header.data_offset,
+					 session->header.data_size,
+					 auxtrace_queue_data_cb, &qd);
+}
+
+void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw)
+{
+	int prot = rw ? PROT_READ | PROT_WRITE : PROT_READ;
+	size_t adj = buffer->data_offset & (page_size - 1);
+	size_t size = buffer->size + adj;
+	off_t file_offset = buffer->data_offset - adj;
+	void *addr;
+
+	if (buffer->data)
+		return buffer->data;
+
+	addr = mmap(NULL, size, prot, MAP_SHARED, fd, file_offset);
+	if (addr == MAP_FAILED)
+		return NULL;
+
+	buffer->mmap_addr = addr;
+	buffer->mmap_size = size;
+
+	buffer->data = addr + adj;
+
+	return buffer->data;
+}
+
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
+{
+	if (!buffer->data || !buffer->mmap_addr)
+		return;
+	munmap(buffer->mmap_addr, buffer->mmap_size);
+	buffer->mmap_addr = NULL;
+	buffer->mmap_size = 0;
+	buffer->data = NULL;
+	buffer->use_data = NULL;
+}
+
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
+{
+	auxtrace_buffer__put_data(buffer);
+	if (buffer->data_needs_freeing) {
+		buffer->data_needs_freeing = false;
+		zfree(&buffer->data);
+		buffer->use_data = NULL;
+		buffer->size = 0;
+	}
+}
+
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
+{
+	auxtrace_buffer__drop_data(buffer);
+	free(buffer);
+}
+
+void auxtrace_synth_guest_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+				int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+				const char *msg, u64 timestamp,
+				pid_t machine_pid, int vcpu)
+{
+	size_t size;
+
+	memset(auxtrace_error, 0, sizeof(struct perf_record_auxtrace_error));
+
+	auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
+	auxtrace_error->type = type;
+	auxtrace_error->code = code;
+	auxtrace_error->cpu = cpu;
+	auxtrace_error->pid = pid;
+	auxtrace_error->tid = tid;
+	auxtrace_error->fmt = 1;
+	auxtrace_error->ip = ip;
+	auxtrace_error->time = timestamp;
+	strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
+	if (machine_pid) {
+		auxtrace_error->fmt = 2;
+		auxtrace_error->machine_pid = machine_pid;
+		auxtrace_error->vcpu = vcpu;
+		size = sizeof(*auxtrace_error);
+	} else {
+		size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
+		       strlen(auxtrace_error->msg) + 1;
+	}
+	auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
+}
+
+void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+			  const char *msg, u64 timestamp)
+{
+	auxtrace_synth_guest_error(auxtrace_error, type, code, cpu, pid, tid,
+				   ip, msg, timestamp, 0, -1);
+}
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+					 struct perf_tool *tool,
+					 struct perf_session *session,
+					 perf_event__handler_t process)
+{
+	union perf_event *ev;
+	size_t priv_size;
+	int err;
+
+	pr_debug2("Synthesizing auxtrace information\n");
+	priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
+	ev = zalloc(sizeof(struct perf_record_auxtrace_info) + priv_size);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
+	ev->auxtrace_info.header.size = sizeof(struct perf_record_auxtrace_info) +
+					priv_size;
+	err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
+					 priv_size);
+	if (err)
+		goto out_free;
+
+	err = process(tool, ev, NULL, NULL);
+out_free:
+	free(ev);
+	return err;
+}
+
+static void unleader_evsel(struct evlist *evlist, struct evsel *leader)
+{
+	struct evsel *new_leader = NULL;
+	struct evsel *evsel;
+
+	/* Find new leader for the group */
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel__has_leader(evsel, leader) || evsel == leader)
+			continue;
+		if (!new_leader)
+			new_leader = evsel;
+		evsel__set_leader(evsel, new_leader);
+	}
+
+	/* Update group information */
+	if (new_leader) {
+		zfree(&new_leader->group_name);
+		new_leader->group_name = leader->group_name;
+		leader->group_name = NULL;
+
+		new_leader->core.nr_members = leader->core.nr_members - 1;
+		leader->core.nr_members = 1;
+	}
+}
+
+static void unleader_auxtrace(struct perf_session *session)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (auxtrace__evsel_is_auxtrace(session, evsel) &&
+		    evsel__is_group_leader(evsel)) {
+			unleader_evsel(session->evlist, evsel);
+		}
+	}
+}
+
+int perf_event__process_auxtrace_info(struct perf_session *session,
+				      union perf_event *event)
+{
+	enum auxtrace_type type = event->auxtrace_info.type;
+	int err;
+
+	if (dump_trace)
+		fprintf(stdout, " type: %u\n", type);
+
+	switch (type) {
+	case PERF_AUXTRACE_INTEL_PT:
+		err = intel_pt_process_auxtrace_info(event, session);
+		break;
+	case PERF_AUXTRACE_INTEL_BTS:
+		err = intel_bts_process_auxtrace_info(event, session);
+		break;
+	case PERF_AUXTRACE_ARM_SPE:
+		err = arm_spe_process_auxtrace_info(event, session);
+		break;
+	case PERF_AUXTRACE_CS_ETM:
+		err = cs_etm__process_auxtrace_info(event, session);
+		break;
+	case PERF_AUXTRACE_S390_CPUMSF:
+		err = s390_cpumsf_process_auxtrace_info(event, session);
+		break;
+	case PERF_AUXTRACE_HISI_PTT:
+		err = hisi_ptt_process_auxtrace_info(event, session);
+		break;
+	case PERF_AUXTRACE_UNKNOWN:
+	default:
+		return -EINVAL;
+	}
+
+	if (err)
+		return err;
+
+	unleader_auxtrace(session);
+
+	return 0;
+}
+
+s64 perf_event__process_auxtrace(struct perf_session *session,
+				 union perf_event *event)
+{
+	s64 err;
+
+	if (dump_trace)
+		fprintf(stdout, " size: %#"PRI_lx64"  offset: %#"PRI_lx64"  ref: %#"PRI_lx64"  idx: %u  tid: %d  cpu: %d\n",
+			event->auxtrace.size, event->auxtrace.offset,
+			event->auxtrace.reference, event->auxtrace.idx,
+			event->auxtrace.tid, event->auxtrace.cpu);
+
+	if (auxtrace__dont_decode(session))
+		return event->auxtrace.size;
+
+	if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
+		return -EINVAL;
+
+	err = session->auxtrace->process_auxtrace_event(session, event, session->tool);
+	if (err < 0)
+		return err;
+
+	return event->auxtrace.size;
+}
+
+#define PERF_ITRACE_DEFAULT_PERIOD_TYPE		PERF_ITRACE_PERIOD_NANOSECS
+#define PERF_ITRACE_DEFAULT_PERIOD		100000
+#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ	16
+#define PERF_ITRACE_MAX_CALLCHAIN_SZ		1024
+#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ	64
+#define PERF_ITRACE_MAX_LAST_BRANCH_SZ		1024
+
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample)
+{
+	synth_opts->branches = true;
+	synth_opts->transactions = true;
+	synth_opts->ptwrites = true;
+	synth_opts->pwr_events = true;
+	synth_opts->other_events = true;
+	synth_opts->intr_events = true;
+	synth_opts->errors = true;
+	synth_opts->flc = true;
+	synth_opts->llc = true;
+	synth_opts->tlb = true;
+	synth_opts->mem = true;
+	synth_opts->remote_access = true;
+
+	if (no_sample) {
+		synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+		synth_opts->period = 1;
+		synth_opts->calls = true;
+	} else {
+		synth_opts->instructions = true;
+		synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+		synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	}
+	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+	synth_opts->initial_skip = 0;
+}
+
+static int get_flag(const char **ptr, unsigned int *flags)
+{
+	while (1) {
+		char c = **ptr;
+
+		if (c >= 'a' && c <= 'z') {
+			*flags |= 1 << (c - 'a');
+			++*ptr;
+			return 0;
+		} else if (c == ' ') {
+			++*ptr;
+			continue;
+		} else {
+			return -1;
+		}
+	}
+}
+
+static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags)
+{
+	while (1) {
+		switch (**ptr) {
+		case '+':
+			++*ptr;
+			if (get_flag(ptr, plus_flags))
+				return -1;
+			break;
+		case '-':
+			++*ptr;
+			if (get_flag(ptr, minus_flags))
+				return -1;
+			break;
+		case ' ':
+			++*ptr;
+			break;
+		default:
+			return 0;
+		}
+	}
+}
+
+#define ITRACE_DFLT_LOG_ON_ERROR_SZ 16384
+
+static unsigned int itrace_log_on_error_size(void)
+{
+	unsigned int sz = 0;
+
+	perf_config_scan("itrace.debug-log-buffer-size", "%u", &sz);
+	return sz ?: ITRACE_DFLT_LOG_ON_ERROR_SZ;
+}
+
+/*
+ * Please check tools/perf/Documentation/perf-script.txt for information
+ * about the options parsed here, which is introduced after this cset,
+ * when support in 'perf script' for these options is introduced.
+ */
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
+			       const char *str, int unset)
+{
+	const char *p;
+	char *endptr;
+	bool period_type_set = false;
+	bool period_set = false;
+
+	synth_opts->set = true;
+
+	if (unset) {
+		synth_opts->dont_decode = true;
+		return 0;
+	}
+
+	if (!str) {
+		itrace_synth_opts__set_default(synth_opts,
+					       synth_opts->default_no_sample);
+		return 0;
+	}
+
+	for (p = str; *p;) {
+		switch (*p++) {
+		case 'i':
+			synth_opts->instructions = true;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				synth_opts->period = strtoull(p, &endptr, 10);
+				period_set = true;
+				p = endptr;
+				while (*p == ' ' || *p == ',')
+					p += 1;
+				switch (*p++) {
+				case 'i':
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_INSTRUCTIONS;
+					period_type_set = true;
+					break;
+				case 't':
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_TICKS;
+					period_type_set = true;
+					break;
+				case 'm':
+					synth_opts->period *= 1000;
+					/* Fall through */
+				case 'u':
+					synth_opts->period *= 1000;
+					/* Fall through */
+				case 'n':
+					if (*p++ != 's')
+						goto out_err;
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_NANOSECS;
+					period_type_set = true;
+					break;
+				case '\0':
+					goto out;
+				default:
+					goto out_err;
+				}
+			}
+			break;
+		case 'b':
+			synth_opts->branches = true;
+			break;
+		case 'x':
+			synth_opts->transactions = true;
+			break;
+		case 'w':
+			synth_opts->ptwrites = true;
+			break;
+		case 'p':
+			synth_opts->pwr_events = true;
+			break;
+		case 'o':
+			synth_opts->other_events = true;
+			break;
+		case 'I':
+			synth_opts->intr_events = true;
+			break;
+		case 'e':
+			synth_opts->errors = true;
+			if (get_flags(&p, &synth_opts->error_plus_flags,
+				      &synth_opts->error_minus_flags))
+				goto out_err;
+			break;
+		case 'd':
+			synth_opts->log = true;
+			if (get_flags(&p, &synth_opts->log_plus_flags,
+				      &synth_opts->log_minus_flags))
+				goto out_err;
+			if (synth_opts->log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR)
+				synth_opts->log_on_error_size = itrace_log_on_error_size();
+			break;
+		case 'c':
+			synth_opts->branches = true;
+			synth_opts->calls = true;
+			break;
+		case 'r':
+			synth_opts->branches = true;
+			synth_opts->returns = true;
+			break;
+		case 'G':
+		case 'g':
+			if (p[-1] == 'G')
+				synth_opts->add_callchain = true;
+			else
+				synth_opts->callchain = true;
+			synth_opts->callchain_sz =
+					PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				unsigned int val;
+
+				val = strtoul(p, &endptr, 10);
+				p = endptr;
+				if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
+					goto out_err;
+				synth_opts->callchain_sz = val;
+			}
+			break;
+		case 'L':
+		case 'l':
+			if (p[-1] == 'L')
+				synth_opts->add_last_branch = true;
+			else
+				synth_opts->last_branch = true;
+			synth_opts->last_branch_sz =
+					PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				unsigned int val;
+
+				val = strtoul(p, &endptr, 10);
+				p = endptr;
+				if (!val ||
+				    val > PERF_ITRACE_MAX_LAST_BRANCH_SZ)
+					goto out_err;
+				synth_opts->last_branch_sz = val;
+			}
+			break;
+		case 's':
+			synth_opts->initial_skip = strtoul(p, &endptr, 10);
+			if (p == endptr)
+				goto out_err;
+			p = endptr;
+			break;
+		case 'f':
+			synth_opts->flc = true;
+			break;
+		case 'm':
+			synth_opts->llc = true;
+			break;
+		case 't':
+			synth_opts->tlb = true;
+			break;
+		case 'a':
+			synth_opts->remote_access = true;
+			break;
+		case 'M':
+			synth_opts->mem = true;
+			break;
+		case 'q':
+			synth_opts->quick += 1;
+			break;
+		case 'A':
+			synth_opts->approx_ipc = true;
+			break;
+		case 'Z':
+			synth_opts->timeless_decoding = true;
+			break;
+		case ' ':
+		case ',':
+			break;
+		default:
+			goto out_err;
+		}
+	}
+out:
+	if (synth_opts->instructions) {
+		if (!period_type_set)
+			synth_opts->period_type =
+					PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+		if (!period_set)
+			synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	}
+
+	return 0;
+
+out_err:
+	pr_err("Bad Instruction Tracing options '%s'\n", str);
+	return -EINVAL;
+}
+
+int itrace_parse_synth_opts(const struct option *opt, const char *str, int unset)
+{
+	return itrace_do_parse_synth_opts(opt->value, str, unset);
+}
+
+static const char * const auxtrace_error_type_name[] = {
+	[PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
+};
+
+static const char *auxtrace_error_name(int type)
+{
+	const char *error_type_name = NULL;
+
+	if (type < PERF_AUXTRACE_ERROR_MAX)
+		error_type_name = auxtrace_error_type_name[type];
+	if (!error_type_name)
+		error_type_name = "unknown AUX";
+	return error_type_name;
+}
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
+{
+	struct perf_record_auxtrace_error *e = &event->auxtrace_error;
+	unsigned long long nsecs = e->time;
+	const char *msg = e->msg;
+	int ret;
+
+	ret = fprintf(fp, " %s error type %u",
+		      auxtrace_error_name(e->type), e->type);
+
+	if (e->fmt && nsecs) {
+		unsigned long secs = nsecs / NSEC_PER_SEC;
+
+		nsecs -= secs * NSEC_PER_SEC;
+		ret += fprintf(fp, " time %lu.%09llu", secs, nsecs);
+	} else {
+		ret += fprintf(fp, " time 0");
+	}
+
+	if (!e->fmt)
+		msg = (const char *)&e->time;
+
+	if (e->fmt >= 2 && e->machine_pid)
+		ret += fprintf(fp, " machine_pid %d vcpu %d", e->machine_pid, e->vcpu);
+
+	ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRI_lx64" code %u: %s\n",
+		       e->cpu, e->pid, e->tid, e->ip, e->code, msg);
+	return ret;
+}
+
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+				      union perf_event *event)
+{
+	struct perf_record_auxtrace_error *e = &event->auxtrace_error;
+
+	if (e->type < PERF_AUXTRACE_ERROR_MAX)
+		session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
+}
+
+void events_stats__auxtrace_error_warn(const struct events_stats *stats)
+{
+	int i;
+
+	for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
+		if (!stats->nr_auxtrace_errors[i])
+			continue;
+		ui__warning("%u %s errors\n",
+			    stats->nr_auxtrace_errors[i],
+			    auxtrace_error_name(i));
+	}
+}
+
+int perf_event__process_auxtrace_error(struct perf_session *session,
+				       union perf_event *event)
+{
+	if (auxtrace__dont_decode(session))
+		return 0;
+
+	perf_event__fprintf_auxtrace_error(event, stdout);
+	return 0;
+}
+
+/*
+ * In the compat mode kernel runs in 64-bit and perf tool runs in 32-bit mode,
+ * 32-bit perf tool cannot access 64-bit value atomically, which might lead to
+ * the issues caused by the below sequence on multiple CPUs: when perf tool
+ * accesses either the load operation or the store operation for 64-bit value,
+ * on some architectures the operation is divided into two instructions, one
+ * is for accessing the low 32-bit value and another is for the high 32-bit;
+ * thus these two user operations can give the kernel chances to access the
+ * 64-bit value, and thus leads to the unexpected load values.
+ *
+ *   kernel (64-bit)                        user (32-bit)
+ *
+ *   if (LOAD ->aux_tail) { --,             LOAD ->aux_head_lo
+ *       STORE $aux_data      |       ,--->
+ *       FLUSH $aux_data      |       |     LOAD ->aux_head_hi
+ *       STORE ->aux_head   --|-------`     smp_rmb()
+ *   }                        |             LOAD $data
+ *                            |             smp_mb()
+ *                            |             STORE ->aux_tail_lo
+ *                            `----------->
+ *                                          STORE ->aux_tail_hi
+ *
+ * For this reason, it's impossible for the perf tool to work correctly when
+ * the AUX head or tail is bigger than 4GB (more than 32 bits length); and we
+ * can not simply limit the AUX ring buffer to less than 4GB, the reason is
+ * the pointers can be increased monotonically, whatever the buffer size it is,
+ * at the end the head and tail can be bigger than 4GB and carry out to the
+ * high 32-bit.
+ *
+ * To mitigate the issues and improve the user experience, we can allow the
+ * perf tool working in certain conditions and bail out with error if detect
+ * any overflow cannot be handled.
+ *
+ * For reading the AUX head, it reads out the values for three times, and
+ * compares the high 4 bytes of the values between the first time and the last
+ * time, if there has no change for high 4 bytes injected by the kernel during
+ * the user reading sequence, it's safe for use the second value.
+ *
+ * When compat_auxtrace_mmap__write_tail() detects any carrying in the high
+ * 32 bits, it means there have two store operations in user space and it cannot
+ * promise the atomicity for 64-bit write, so return '-1' in this case to tell
+ * the caller an overflow error has happened.
+ */
+u64 __weak compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 first, second, last;
+	u64 mask = (u64)(UINT32_MAX) << 32;
+
+	do {
+		first = READ_ONCE(pc->aux_head);
+		/* Ensure all reads are done after we read the head */
+		smp_rmb();
+		second = READ_ONCE(pc->aux_head);
+		/* Ensure all reads are done after we read the head */
+		smp_rmb();
+		last = READ_ONCE(pc->aux_head);
+	} while ((first & mask) != (last & mask));
+
+	return second;
+}
+
+int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 mask = (u64)(UINT32_MAX) << 32;
+
+	if (tail & mask)
+		return -1;
+
+	/* Ensure all reads are done before we write the tail out */
+	smp_mb();
+	WRITE_ONCE(pc->aux_tail, tail);
+	return 0;
+}
+
+static int __auxtrace_mmap__read(struct mmap *map,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 bool snapshot, size_t snapshot_size)
+{
+	struct auxtrace_mmap *mm = &map->auxtrace_mmap;
+	u64 head, old = mm->prev, offset, ref;
+	unsigned char *data = mm->base;
+	size_t size, head_off, old_off, len1, len2, padding;
+	union perf_event ev;
+	void *data1, *data2;
+	int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL));
+
+	head = auxtrace_mmap__read_head(mm, kernel_is_64_bit);
+
+	if (snapshot &&
+	    auxtrace_record__find_snapshot(itr, mm->idx, mm, data, &head, &old))
+		return -1;
+
+	if (old == head)
+		return 0;
+
+	pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
+		  mm->idx, old, head, head - old);
+
+	if (mm->mask) {
+		head_off = head & mm->mask;
+		old_off = old & mm->mask;
+	} else {
+		head_off = head % mm->len;
+		old_off = old % mm->len;
+	}
+
+	if (head_off > old_off)
+		size = head_off - old_off;
+	else
+		size = mm->len - (old_off - head_off);
+
+	if (snapshot && size > snapshot_size)
+		size = snapshot_size;
+
+	ref = auxtrace_record__reference(itr);
+
+	if (head > old || size <= head || mm->mask) {
+		offset = head - size;
+	} else {
+		/*
+		 * When the buffer size is not a power of 2, 'head' wraps at the
+		 * highest multiple of the buffer size, so we have to subtract
+		 * the remainder here.
+		 */
+		u64 rem = (0ULL - mm->len) % mm->len;
+
+		offset = head - size - rem;
+	}
+
+	if (size > head_off) {
+		len1 = size - head_off;
+		data1 = &data[mm->len - len1];
+		len2 = head_off;
+		data2 = &data[0];
+	} else {
+		len1 = size;
+		data1 = &data[head_off - len1];
+		len2 = 0;
+		data2 = NULL;
+	}
+
+	if (itr->alignment) {
+		unsigned int unwanted = len1 % itr->alignment;
+
+		len1 -= unwanted;
+		size -= unwanted;
+	}
+
+	/* padding must be written by fn() e.g. record__process_auxtrace() */
+	padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1);
+	if (padding)
+		padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding;
+
+	memset(&ev, 0, sizeof(ev));
+	ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+	ev.auxtrace.header.size = sizeof(ev.auxtrace);
+	ev.auxtrace.size = size + padding;
+	ev.auxtrace.offset = offset;
+	ev.auxtrace.reference = ref;
+	ev.auxtrace.idx = mm->idx;
+	ev.auxtrace.tid = mm->tid;
+	ev.auxtrace.cpu = mm->cpu;
+
+	if (fn(tool, map, &ev, data1, len1, data2, len2))
+		return -1;
+
+	mm->prev = head;
+
+	if (!snapshot) {
+		int err;
+
+		err = auxtrace_mmap__write_tail(mm, head, kernel_is_64_bit);
+		if (err < 0)
+			return err;
+
+		if (itr->read_finish) {
+			err = itr->read_finish(itr, mm->idx);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 1;
+}
+
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
+			struct perf_tool *tool, process_auxtrace_t fn)
+{
+	return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
+}
+
+int auxtrace_mmap__read_snapshot(struct mmap *map,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 size_t snapshot_size)
+{
+	return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size);
+}
+
+/**
+ * struct auxtrace_cache - hash table to implement a cache
+ * @hashtable: the hashtable
+ * @sz: hashtable size (number of hlists)
+ * @entry_size: size of an entry
+ * @limit: limit the number of entries to this maximum, when reached the cache
+ *         is dropped and caching begins again with an empty cache
+ * @cnt: current number of entries
+ * @bits: hashtable size (@sz = 2^@bits)
+ */
+struct auxtrace_cache {
+	struct hlist_head *hashtable;
+	size_t sz;
+	size_t entry_size;
+	size_t limit;
+	size_t cnt;
+	unsigned int bits;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+					   unsigned int limit_percent)
+{
+	struct auxtrace_cache *c;
+	struct hlist_head *ht;
+	size_t sz, i;
+
+	c = zalloc(sizeof(struct auxtrace_cache));
+	if (!c)
+		return NULL;
+
+	sz = 1UL << bits;
+
+	ht = calloc(sz, sizeof(struct hlist_head));
+	if (!ht)
+		goto out_free;
+
+	for (i = 0; i < sz; i++)
+		INIT_HLIST_HEAD(&ht[i]);
+
+	c->hashtable = ht;
+	c->sz = sz;
+	c->entry_size = entry_size;
+	c->limit = (c->sz * limit_percent) / 100;
+	c->bits = bits;
+
+	return c;
+
+out_free:
+	free(c);
+	return NULL;
+}
+
+static void auxtrace_cache__drop(struct auxtrace_cache *c)
+{
+	struct auxtrace_cache_entry *entry;
+	struct hlist_node *tmp;
+	size_t i;
+
+	if (!c)
+		return;
+
+	for (i = 0; i < c->sz; i++) {
+		hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
+			hlist_del(&entry->hash);
+			auxtrace_cache__free_entry(c, entry);
+		}
+	}
+
+	c->cnt = 0;
+}
+
+void auxtrace_cache__free(struct auxtrace_cache *c)
+{
+	if (!c)
+		return;
+
+	auxtrace_cache__drop(c);
+	zfree(&c->hashtable);
+	free(c);
+}
+
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
+{
+	return malloc(c->entry_size);
+}
+
+void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
+				void *entry)
+{
+	free(entry);
+}
+
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+			struct auxtrace_cache_entry *entry)
+{
+	if (c->limit && ++c->cnt > c->limit)
+		auxtrace_cache__drop(c);
+
+	entry->key = key;
+	hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
+
+	return 0;
+}
+
+static struct auxtrace_cache_entry *auxtrace_cache__rm(struct auxtrace_cache *c,
+						       u32 key)
+{
+	struct auxtrace_cache_entry *entry;
+	struct hlist_head *hlist;
+	struct hlist_node *n;
+
+	if (!c)
+		return NULL;
+
+	hlist = &c->hashtable[hash_32(key, c->bits)];
+	hlist_for_each_entry_safe(entry, n, hlist, hash) {
+		if (entry->key == key) {
+			hlist_del(&entry->hash);
+			return entry;
+		}
+	}
+
+	return NULL;
+}
+
+void auxtrace_cache__remove(struct auxtrace_cache *c, u32 key)
+{
+	struct auxtrace_cache_entry *entry = auxtrace_cache__rm(c, key);
+
+	auxtrace_cache__free_entry(c, entry);
+}
+
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
+{
+	struct auxtrace_cache_entry *entry;
+	struct hlist_head *hlist;
+
+	if (!c)
+		return NULL;
+
+	hlist = &c->hashtable[hash_32(key, c->bits)];
+	hlist_for_each_entry(entry, hlist, hash) {
+		if (entry->key == key)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+	zfree(&filt->str);
+	filt->action   = NULL;
+	filt->sym_from = NULL;
+	filt->sym_to   = NULL;
+	filt->filename = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+	struct addr_filter *filt = zalloc(sizeof(*filt));
+
+	if (filt)
+		INIT_LIST_HEAD(&filt->list);
+
+	return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+	if (filt)
+		addr_filter__free_str(filt);
+	free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_add_tail(&filt->list, &filts->head);
+	filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_del_init(&filt->list);
+	filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+	INIT_LIST_HEAD(&filts->head);
+	filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+	struct addr_filter *filt, *n;
+
+	list_for_each_entry_safe(filt, n, &filts->head, list) {
+		addr_filters__del(filts, filt);
+		addr_filter__free(filt);
+	}
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+			    const char *str_delim)
+{
+	*inp += strspn(*inp, " ");
+
+	if (isdigit(**inp)) {
+		char *endptr;
+
+		if (!num)
+			return -EINVAL;
+		errno = 0;
+		*num = strtoull(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp)
+			return -EINVAL;
+		*inp = endptr;
+	} else {
+		size_t n;
+
+		if (!str)
+			return -EINVAL;
+		*inp += strspn(*inp, " ");
+		*str = *inp;
+		n = strcspn(*inp, str_delim);
+		if (!n)
+			return -EINVAL;
+		*inp += n;
+		if (**inp) {
+			**inp = '\0';
+			*inp += 1;
+		}
+	}
+	return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+	if (!strcmp(filt->action, "filter")) {
+		filt->start = true;
+		filt->range = true;
+	} else if (!strcmp(filt->action, "start")) {
+		filt->start = true;
+	} else if (!strcmp(filt->action, "stop")) {
+		filt->start = false;
+	} else if (!strcmp(filt->action, "tracestop")) {
+		filt->start = false;
+		filt->range = true;
+		filt->action += 5; /* Change 'tracestop' to 'stop' */
+	} else {
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+	*idx = -1;
+
+	*inp += strspn(*inp, " ");
+
+	if (**inp != '#')
+		return 0;
+
+	*inp += 1;
+
+	if (**inp == 'g' || **inp == 'G') {
+		*inp += 1;
+		*idx = 0;
+	} else {
+		unsigned long num;
+		char *endptr;
+
+		errno = 0;
+		num = strtoul(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp || num > INT_MAX)
+			return -EINVAL;
+		*inp = endptr;
+		*idx = num;
+	}
+
+	return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+	int err = parse_num_or_str(inp, num, str, " ");
+
+	if (!err && *str)
+		err = parse_sym_idx(inp, idx);
+
+	return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+	char *fstr;
+	int err;
+
+	filt->str = fstr = strdup(*filter_inp);
+	if (!fstr)
+		return -ENOMEM;
+
+	err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+	if (err)
+		goto out_err;
+
+	err = parse_action(filt);
+	if (err)
+		goto out_err;
+
+	err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+			      &filt->sym_from_idx);
+	if (err)
+		goto out_err;
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '/') {
+		fstr += 1;
+		err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+				      &filt->sym_to_idx);
+		if (err)
+			goto out_err;
+		filt->range = true;
+	}
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '@') {
+		fstr += 1;
+		err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+		if (err)
+			goto out_err;
+	}
+
+	fstr += strspn(fstr, " ,");
+
+	*filter_inp += fstr - filt->str;
+
+	return 0;
+
+out_err:
+	addr_filter__free_str(filt);
+
+	return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter)
+{
+	struct addr_filter *filt;
+	const char *fstr = filter;
+	int err;
+
+	while (*fstr) {
+		filt = addr_filter__new();
+		err = parse_one_filter(filt, &fstr);
+		if (err) {
+			addr_filter__free(filt);
+			addr_filters__exit(filts);
+			return err;
+		}
+		addr_filters__add(filts, filt);
+	}
+
+	return 0;
+}
+
+struct sym_args {
+	const char	*name;
+	u64		start;
+	u64		size;
+	int		idx;
+	int		cnt;
+	bool		started;
+	bool		global;
+	bool		selected;
+	bool		duplicate;
+	bool		near;
+};
+
+static bool kern_sym_name_match(const char *kname, const char *name)
+{
+	size_t n = strlen(name);
+
+	return !strcmp(kname, name) ||
+	       (!strncmp(kname, name, n) && kname[n] == '\t');
+}
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+	/* A function with the same name, and global or the n'th found or any */
+	return kallsyms__is_function(type) &&
+	       kern_sym_name_match(name, args->name) &&
+	       ((args->global && isupper(type)) ||
+		(args->selected && ++(args->cnt) == args->idx) ||
+		(!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (args->started) {
+		if (!args->size)
+			args->size = start - args->start;
+		if (args->selected) {
+			if (args->size)
+				return 1;
+		} else if (kern_sym_match(args, name, type)) {
+			args->duplicate = true;
+			return 1;
+		}
+	} else if (kern_sym_match(args, name, type)) {
+		args->started = true;
+		args->start = start;
+	}
+
+	return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (kern_sym_match(args, name, type)) {
+		pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+		       ++args->cnt, start, type, name);
+		args->near = true;
+	} else if (args->near) {
+		args->near = false;
+		pr_err("\t\twhich is near\t\t%s\n", name);
+	}
+
+	return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+	if (idx > 0) {
+		pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+		       idx, sym_name);
+	} else if (!idx) {
+		pr_err("Global symbol '%s' not found.\n", sym_name);
+	} else {
+		pr_err("Symbol '%s' not found.\n", sym_name);
+	}
+	pr_err("Note that symbols must be functions.\n");
+
+	return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+	struct sym_args args = {
+		.name = sym_name,
+		.idx = idx,
+		.global = !idx,
+		.selected = idx > 0,
+	};
+	int err;
+
+	*start = 0;
+	*size = 0;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+	if (err < 0) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	if (args.duplicate) {
+		pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+		args.cnt = 0;
+		kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+		pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+		       sym_name);
+		pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+		return -EINVAL;
+	}
+
+	if (!args.started) {
+		pr_err("Kernel symbol lookup: ");
+		return sym_not_found_error(sym_name, idx);
+	}
+
+	*start = args.start;
+	*size = args.size;
+
+	return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+			       char type, u64 start)
+{
+	struct sym_args *args = arg;
+	u64 size;
+
+	if (!kallsyms__is_function(type))
+		return 0;
+
+	if (!args->started) {
+		args->started = true;
+		args->start = start;
+	}
+	/* Don't know exactly where the kernel ends, so we add a page */
+	size = round_up(start, page_size) + page_size - args->start;
+	if (size > args->size)
+		args->size = size;
+
+	return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+	struct sym_args args = { .started = false };
+	int err;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+	if (err < 0 || !args.started) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	filt->addr = args.start;
+	filt->size = args.size;
+
+	return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+	if (start + size >= filt->addr)
+		return 0;
+
+	if (filt->sym_from) {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->sym_from, filt->addr);
+	} else {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->addr);
+	}
+
+	return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+	bool no_size = false;
+	u64 start, size;
+	int err;
+
+	if (symbol_conf.kptr_restrict) {
+		pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+		return addr_filter__entire_kernel(filt);
+
+	if (filt->sym_from) {
+		err = find_kern_sym(filt->sym_from, &start, &size,
+				    filt->sym_from_idx);
+		if (err)
+			return err;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to) {
+			filt->size = size;
+			no_size = !size;
+		}
+	}
+
+	if (filt->sym_to) {
+		err = find_kern_sym(filt->sym_to, &start, &size,
+				    filt->sym_to_idx);
+		if (err)
+			return err;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+		filt->size = start + size - filt->addr;
+		no_size = !size;
+	}
+
+	/* The very last symbol in kallsyms does not imply a particular size */
+	if (no_size) {
+		pr_err("Cannot determine size of symbol '%s'\n",
+		       filt->sym_to ? filt->sym_to : filt->sym_from);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+	struct map *map;
+	struct dso *dso;
+
+	map = dso__new_map(name);
+	if (!map)
+		return NULL;
+
+	if (map__load(map) < 0)
+		pr_err("File '%s' not found or has no symbols.\n", name);
+
+	dso = dso__get(map->dso);
+
+	map__put(map);
+
+	return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+			  int idx)
+{
+	/* Same name, and global or the n'th found or any */
+	return !arch__compare_symbol_names(name, sym->name) &&
+	       ((!idx && sym->binding == STB_GLOBAL) ||
+		(idx > 0 && ++*cnt == idx) ||
+		idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+	struct symbol *sym;
+	bool near = false;
+	int cnt = 0;
+
+	pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+	sym = dso__first_symbol(dso);
+	while (sym) {
+		if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+			pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+			       ++cnt, sym->start,
+			       sym->binding == STB_GLOBAL ? 'g' :
+			       sym->binding == STB_LOCAL  ? 'l' : 'w',
+			       sym->name);
+			near = true;
+		} else if (near) {
+			near = false;
+			pr_err("\t\twhich is near\t\t%s\n", sym->name);
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+	       sym_name);
+	pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+			u64 *size, int idx)
+{
+	struct symbol *sym;
+	int cnt = 0;
+
+	*start = 0;
+	*size = 0;
+
+	sym = dso__first_symbol(dso);
+	while (sym) {
+		if (*start) {
+			if (!*size)
+				*size = sym->start - *start;
+			if (idx > 0) {
+				if (*size)
+					return 0;
+			} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+				print_duplicate_syms(dso, sym_name);
+				return -EINVAL;
+			}
+		} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+			*start = sym->start;
+			*size = sym->end - sym->start;
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	if (!*start)
+		return sym_not_found_error(sym_name, idx);
+
+	return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+	if (dso__data_file_size(dso, NULL)) {
+		pr_err("Failed to determine filter for %s\nCannot determine file size.\n",
+		       filt->filename);
+		return -EINVAL;
+	}
+
+	filt->addr = 0;
+	filt->size = dso->data.file_size;
+
+	return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+	u64 start, size;
+	struct dso *dso;
+	int err = 0;
+
+	if (!filt->sym_from && !filt->sym_to)
+		return 0;
+
+	if (!filt->filename)
+		return addr_filter__resolve_kernel_syms(filt);
+
+	dso = load_dso(filt->filename);
+	if (!dso) {
+		pr_err("Failed to load symbols from: %s\n", filt->filename);
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+		err = addr_filter__entire_dso(filt, dso);
+		goto put_dso;
+	}
+
+	if (filt->sym_from) {
+		err = find_dso_sym(dso, filt->sym_from, &start, &size,
+				   filt->sym_from_idx);
+		if (err)
+			goto put_dso;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to)
+			filt->size = size;
+	}
+
+	if (filt->sym_to) {
+		err = find_dso_sym(dso, filt->sym_to, &start, &size,
+				   filt->sym_to_idx);
+		if (err)
+			goto put_dso;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+
+		filt->size = start + size - filt->addr;
+	}
+
+put_dso:
+	dso__put(dso);
+
+	return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+	char filename_buf[PATH_MAX];
+	const char *at = "";
+	const char *fn = "";
+	char *filter;
+	int err;
+
+	if (filt->filename) {
+		at = "@";
+		fn = realpath(filt->filename, filename_buf);
+		if (!fn)
+			return NULL;
+	}
+
+	if (filt->range) {
+		err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, filt->size, at, fn);
+	} else {
+		err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, at, fn);
+	}
+
+	return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct evsel *evsel, const char *filter,
+			     int max_nr)
+{
+	struct addr_filters filts;
+	struct addr_filter *filt;
+	int err;
+
+	addr_filters__init(&filts);
+
+	err = addr_filters__parse_bare_filter(&filts, filter);
+	if (err)
+		goto out_exit;
+
+	if (filts.cnt > max_nr) {
+		pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+		       filts.cnt, max_nr);
+		err = -EINVAL;
+		goto out_exit;
+	}
+
+	list_for_each_entry(filt, &filts.head, list) {
+		char *new_filter;
+
+		err = addr_filter__resolve_syms(filt);
+		if (err)
+			goto out_exit;
+
+		new_filter = addr_filter__to_str(filt);
+		if (!new_filter) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+
+		if (evsel__append_addr_filter(evsel, new_filter)) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+	}
+
+out_exit:
+	addr_filters__exit(&filts);
+
+	if (err) {
+		pr_err("Failed to parse address filter: '%s'\n", filter);
+		pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+		pr_err("Where multiple filters are separated by space or comma.\n");
+	}
+
+	return err;
+}
+
+static int evsel__nr_addr_filter(struct evsel *evsel)
+{
+	struct perf_pmu *pmu = evsel__find_pmu(evsel);
+	int nr_addr_filters = 0;
+
+	if (!pmu)
+		return 0;
+
+	perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+	return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	char *filter;
+	int err, max_nr;
+
+	evlist__for_each_entry(evlist, evsel) {
+		filter = evsel->filter;
+		max_nr = evsel__nr_addr_filter(evsel);
+		if (!filter || !max_nr)
+			continue;
+		evsel->filter = NULL;
+		err = parse_addr_filter(evsel, filter, max_nr);
+		free(filter);
+		if (err)
+			return err;
+		pr_debug("Address filter: %s\n", evsel->filter);
+	}
+
+	return 0;
+}
+
+int auxtrace__process_event(struct perf_session *session, union perf_event *event,
+			    struct perf_sample *sample, struct perf_tool *tool)
+{
+	if (!session->auxtrace)
+		return 0;
+
+	return session->auxtrace->process_event(session, event, sample, tool);
+}
+
+void auxtrace__dump_auxtrace_sample(struct perf_session *session,
+				    struct perf_sample *sample)
+{
+	if (!session->auxtrace || !session->auxtrace->dump_auxtrace_sample ||
+	    auxtrace__dont_decode(session))
+		return;
+
+	session->auxtrace->dump_auxtrace_sample(session, sample);
+}
+
+int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool)
+{
+	if (!session->auxtrace)
+		return 0;
+
+	return session->auxtrace->flush_events(session, tool);
+}
+
+void auxtrace__free_events(struct perf_session *session)
+{
+	if (!session->auxtrace)
+		return;
+
+	return session->auxtrace->free_events(session);
+}
+
+void auxtrace__free(struct perf_session *session)
+{
+	if (!session->auxtrace)
+		return;
+
+	return session->auxtrace->free(session);
+}
+
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+				 struct evsel *evsel)
+{
+	if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace)
+		return false;
+
+	return session->auxtrace->evsel_is_auxtrace(session, evsel);
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
new file mode 100644
index 000000000..6a0f9b98f
--- /dev/null
+++ b/tools/perf/util/auxtrace.h
@@ -0,0 +1,901 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * auxtrace.h: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#ifndef __PERF_AUXTRACE_H
+#define __PERF_AUXTRACE_H
+
+#include <sys/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h> // FILE
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <internal/cpumap.h>
+#include <asm/bitsperlong.h>
+#include <asm/barrier.h>
+
+union perf_event;
+struct perf_session;
+struct evlist;
+struct evsel;
+struct perf_tool;
+struct mmap;
+struct perf_sample;
+struct option;
+struct record_opts;
+struct perf_record_auxtrace_error;
+struct perf_record_auxtrace_info;
+struct events_stats;
+struct perf_pmu;
+
+enum auxtrace_error_type {
+       PERF_AUXTRACE_ERROR_ITRACE  = 1,
+       PERF_AUXTRACE_ERROR_MAX
+};
+
+/* Auxtrace records must have the same alignment as perf event records */
+#define PERF_AUXTRACE_RECORD_ALIGNMENT 8
+
+enum auxtrace_type {
+	PERF_AUXTRACE_UNKNOWN,
+	PERF_AUXTRACE_INTEL_PT,
+	PERF_AUXTRACE_INTEL_BTS,
+	PERF_AUXTRACE_CS_ETM,
+	PERF_AUXTRACE_ARM_SPE,
+	PERF_AUXTRACE_S390_CPUMSF,
+	PERF_AUXTRACE_HISI_PTT,
+};
+
+enum itrace_period_type {
+	PERF_ITRACE_PERIOD_INSTRUCTIONS,
+	PERF_ITRACE_PERIOD_TICKS,
+	PERF_ITRACE_PERIOD_NANOSECS,
+};
+
+#define AUXTRACE_ERR_FLG_OVERFLOW	(1 << ('o' - 'a'))
+#define AUXTRACE_ERR_FLG_DATA_LOST	(1 << ('l' - 'a'))
+
+#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS	(1 << ('a' - 'a'))
+#define AUXTRACE_LOG_FLG_ON_ERROR	(1 << ('e' - 'a'))
+#define AUXTRACE_LOG_FLG_USE_STDOUT	(1 << ('o' - 'a'))
+
+/**
+ * struct itrace_synth_opts - AUX area tracing synthesis options.
+ * @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
+ * @inject: indicates the event (not just the sample) must be fully synthesized
+ *          because 'perf inject' will write it out
+ * @instructions: whether to synthesize 'instructions' events
+ * @branches: whether to synthesize 'branches' events
+ *            (branch misses only for Arm SPE)
+ * @transactions: whether to synthesize events for transactions
+ * @ptwrites: whether to synthesize events for ptwrites
+ * @pwr_events: whether to synthesize power events
+ * @other_events: whether to synthesize other events recorded due to the use of
+ *                aux_output
+ * @intr_events: whether to synthesize interrupt events
+ * @errors: whether to synthesize decoder error events
+ * @dont_decode: whether to skip decoding entirely
+ * @log: write a decoding log
+ * @calls: limit branch samples to calls (can be combined with @returns)
+ * @returns: limit branch samples to returns (can be combined with @calls)
+ * @callchain: add callchain to 'instructions' events
+ * @add_callchain: add callchain to existing event records
+ * @thread_stack: feed branches to the thread_stack
+ * @last_branch: add branch context to 'instruction' events
+ * @add_last_branch: add branch context to existing event records
+ * @approx_ipc: approximate IPC
+ * @flc: whether to synthesize first level cache events
+ * @llc: whether to synthesize last level cache events
+ * @tlb: whether to synthesize TLB events
+ * @remote_access: whether to synthesize remote access events
+ * @mem: whether to synthesize memory events
+ * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps
+ * @vm_time_correlation: perform VM Time Correlation
+ * @vm_tm_corr_dry_run: VM Time Correlation dry-run
+ * @vm_tm_corr_args:  VM Time Correlation implementation-specific arguments
+ * @callchain_sz: maximum callchain size
+ * @last_branch_sz: branch context size
+ * @period: 'instructions' events period
+ * @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
+ * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
+ * @ptime_range: time intervals to trace or NULL
+ * @range_num: number of time intervals to trace
+ * @error_plus_flags: flags to affect what errors are reported
+ * @error_minus_flags: flags to affect what errors are reported
+ * @log_plus_flags: flags to affect what is logged
+ * @log_minus_flags: flags to affect what is logged
+ * @quick: quicker (less detailed) decoding
+ * @log_on_error_size: size of log to keep for outputting log only on errors
+ */
+struct itrace_synth_opts {
+	bool			set;
+	bool			default_no_sample;
+	bool			inject;
+	bool			instructions;
+	bool			branches;
+	bool			transactions;
+	bool			ptwrites;
+	bool			pwr_events;
+	bool			other_events;
+	bool			intr_events;
+	bool			errors;
+	bool			dont_decode;
+	bool			log;
+	bool			calls;
+	bool			returns;
+	bool			callchain;
+	bool			add_callchain;
+	bool			thread_stack;
+	bool			last_branch;
+	bool			add_last_branch;
+	bool			approx_ipc;
+	bool			flc;
+	bool			llc;
+	bool			tlb;
+	bool			remote_access;
+	bool			mem;
+	bool			timeless_decoding;
+	bool			vm_time_correlation;
+	bool			vm_tm_corr_dry_run;
+	char			*vm_tm_corr_args;
+	unsigned int		callchain_sz;
+	unsigned int		last_branch_sz;
+	unsigned long long	period;
+	enum itrace_period_type	period_type;
+	unsigned long		initial_skip;
+	unsigned long		*cpu_bitmap;
+	struct perf_time_interval *ptime_range;
+	int			range_num;
+	unsigned int		error_plus_flags;
+	unsigned int		error_minus_flags;
+	unsigned int		log_plus_flags;
+	unsigned int		log_minus_flags;
+	unsigned int		quick;
+	unsigned int		log_on_error_size;
+};
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ *                               perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+	u64			file_offset;
+	u64			sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ *                         file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+	struct list_head	list;
+	size_t			nr;
+	struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+/**
+ * struct auxtrace - session callbacks to allow AUX area data decoding.
+ * @process_event: lets the decoder see all session events
+ * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
+ * @queue_data: queue an AUX sample or PERF_RECORD_AUXTRACE event for later
+ *              processing
+ * @dump_auxtrace_sample: dump AUX area sample data
+ * @flush_events: process any remaining data
+ * @free_events: free resources associated with event processing
+ * @free: free resources associated with the session
+ */
+struct auxtrace {
+	int (*process_event)(struct perf_session *session,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct perf_tool *tool);
+	int (*process_auxtrace_event)(struct perf_session *session,
+				      union perf_event *event,
+				      struct perf_tool *tool);
+	int (*queue_data)(struct perf_session *session,
+			  struct perf_sample *sample, union perf_event *event,
+			  u64 data_offset);
+	void (*dump_auxtrace_sample)(struct perf_session *session,
+				     struct perf_sample *sample);
+	int (*flush_events)(struct perf_session *session,
+			    struct perf_tool *tool);
+	void (*free_events)(struct perf_session *session);
+	void (*free)(struct perf_session *session);
+	bool (*evsel_is_auxtrace)(struct perf_session *session,
+				  struct evsel *evsel);
+};
+
+/**
+ * struct auxtrace_buffer - a buffer containing AUX area tracing data.
+ * @list: buffers are queued in a list held by struct auxtrace_queue
+ * @size: size of the buffer in bytes
+ * @pid: in per-thread mode, the pid this buffer is associated with
+ * @tid: in per-thread mode, the tid this buffer is associated with
+ * @cpu: in per-cpu mode, the cpu this buffer is associated with
+ * @data: actual buffer data (can be null if the data has not been loaded)
+ * @data_offset: file offset at which the buffer can be read
+ * @mmap_addr: mmap address at which the buffer can be read
+ * @mmap_size: size of the mmap at @mmap_addr
+ * @data_needs_freeing: @data was malloc'd so free it when it is no longer
+ *                      needed
+ * @consecutive: the original data was split up and this buffer is consecutive
+ *               to the previous buffer
+ * @offset: offset as determined by aux_head / aux_tail members of struct
+ *          perf_event_mmap_page
+ * @reference: an implementation-specific reference determined when the data is
+ *             recorded
+ * @buffer_nr: used to number each buffer
+ * @use_size: implementation actually only uses this number of bytes
+ * @use_data: implementation actually only uses data starting at this address
+ */
+struct auxtrace_buffer {
+	struct list_head	list;
+	size_t			size;
+	pid_t			pid;
+	pid_t			tid;
+	struct perf_cpu		cpu;
+	void			*data;
+	off_t			data_offset;
+	void			*mmap_addr;
+	size_t			mmap_size;
+	bool			data_needs_freeing;
+	bool			consecutive;
+	u64			offset;
+	u64			reference;
+	u64			buffer_nr;
+	size_t			use_size;
+	void			*use_data;
+};
+
+/**
+ * struct auxtrace_queue - a queue of AUX area tracing data buffers.
+ * @head: head of buffer list
+ * @tid: in per-thread mode, the tid this queue is associated with
+ * @cpu: in per-cpu mode, the cpu this queue is associated with
+ * @set: %true once this queue has been dedicated to a specific thread or cpu
+ * @priv: implementation-specific data
+ */
+struct auxtrace_queue {
+	struct list_head	head;
+	pid_t			tid;
+	int			cpu;
+	bool			set;
+	void			*priv;
+};
+
+/**
+ * struct auxtrace_queues - an array of AUX area tracing queues.
+ * @queue_array: array of queues
+ * @nr_queues: number of queues
+ * @new_data: set whenever new data is queued
+ * @populated: queues have been fully populated using the auxtrace_index
+ * @next_buffer_nr: used to number each buffer
+ */
+struct auxtrace_queues {
+	struct auxtrace_queue	*queue_array;
+	unsigned int		nr_queues;
+	bool			new_data;
+	bool			populated;
+	u64			next_buffer_nr;
+};
+
+/**
+ * struct auxtrace_heap_item - element of struct auxtrace_heap.
+ * @queue_nr: queue number
+ * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
+ *           to be a timestamp
+ */
+struct auxtrace_heap_item {
+	unsigned int		queue_nr;
+	u64			ordinal;
+};
+
+/**
+ * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
+ * @heap_array: the heap
+ * @heap_cnt: the number of elements in the heap
+ * @heap_sz: maximum number of elements (grows as needed)
+ */
+struct auxtrace_heap {
+	struct auxtrace_heap_item	*heap_array;
+	unsigned int		heap_cnt;
+	unsigned int		heap_sz;
+};
+
+/**
+ * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
+ * @base: address of mapped area
+ * @userpg: pointer to buffer's perf_event_mmap_page
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @len: size of mapped area
+ * @prev: previous aux_head
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap {
+	void		*base;
+	void		*userpg;
+	size_t		mask;
+	size_t		len;
+	u64		prev;
+	int		idx;
+	pid_t		tid;
+	int		cpu;
+};
+
+/**
+ * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @offset: file offset of mapped area
+ * @len: size of mapped area
+ * @prot: mmap memory protection
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @mmap_needed: set to %false for non-auxtrace events. This is needed because
+ *               auxtrace mmapping is done in the same code path as non-auxtrace
+ *               mmapping but not every evsel that needs non-auxtrace mmapping
+ *               also needs auxtrace mmapping.
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap_params {
+	size_t		mask;
+	off_t		offset;
+	size_t		len;
+	int		prot;
+	int		idx;
+	pid_t		tid;
+	bool		mmap_needed;
+	struct perf_cpu	cpu;
+};
+
+/**
+ * struct auxtrace_record - callbacks for recording AUX area data.
+ * @recording_options: validate and process recording options
+ * @info_priv_size: return the size of the private data in auxtrace_info_event
+ * @info_fill: fill-in the private data in auxtrace_info_event
+ * @free: free this auxtrace record structure
+ * @snapshot_start: starting a snapshot
+ * @snapshot_finish: finishing a snapshot
+ * @find_snapshot: find data to snapshot within auxtrace mmap
+ * @parse_snapshot_options: parse snapshot options
+ * @reference: provide a 64-bit reference number for auxtrace_event
+ * @read_finish: called after reading from an auxtrace mmap
+ * @alignment: alignment (if any) for AUX area data
+ * @default_aux_sample_size: default sample size for --aux sample option
+ * @pmu: associated pmu
+ * @evlist: selected events list
+ */
+struct auxtrace_record {
+	int (*recording_options)(struct auxtrace_record *itr,
+				 struct evlist *evlist,
+				 struct record_opts *opts);
+	size_t (*info_priv_size)(struct auxtrace_record *itr,
+				 struct evlist *evlist);
+	int (*info_fill)(struct auxtrace_record *itr,
+			 struct perf_session *session,
+			 struct perf_record_auxtrace_info *auxtrace_info,
+			 size_t priv_size);
+	void (*free)(struct auxtrace_record *itr);
+	int (*snapshot_start)(struct auxtrace_record *itr);
+	int (*snapshot_finish)(struct auxtrace_record *itr);
+	int (*find_snapshot)(struct auxtrace_record *itr, int idx,
+			     struct auxtrace_mmap *mm, unsigned char *data,
+			     u64 *head, u64 *old);
+	int (*parse_snapshot_options)(struct auxtrace_record *itr,
+				      struct record_opts *opts,
+				      const char *str);
+	u64 (*reference)(struct auxtrace_record *itr);
+	int (*read_finish)(struct auxtrace_record *itr, int idx);
+	unsigned int alignment;
+	unsigned int default_aux_sample_size;
+	struct perf_pmu *pmu;
+	struct evlist *evlist;
+};
+
+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ *          to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ *                and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+	struct list_head	list;
+	bool			range;
+	bool			start;
+	const char		*action;
+	const char		*sym_from;
+	const char		*sym_to;
+	int			sym_from_idx;
+	int			sym_to_idx;
+	u64			addr;
+	u64			size;
+	const char		*filename;
+	char			*str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+	struct list_head	head;
+	int			cnt;
+};
+
+struct auxtrace_cache;
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm);
+int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail);
+
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm,
+					   int kernel_is_64_bit __maybe_unused)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 head;
+
+#if BITS_PER_LONG == 32
+	if (kernel_is_64_bit)
+		return compat_auxtrace_mmap__read_head(mm);
+#endif
+	head = READ_ONCE(pc->aux_head);
+
+	/* Ensure all reads are done after we read the head */
+	smp_rmb();
+	return head;
+}
+
+static inline int auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail,
+					    int kernel_is_64_bit __maybe_unused)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+
+#if BITS_PER_LONG == 32
+	if (kernel_is_64_bit)
+		return compat_auxtrace_mmap__write_tail(mm, tail);
+#endif
+	/* Ensure all reads are done before we write the tail out */
+	smp_mb();
+	WRITE_ONCE(pc->aux_tail, tail);
+	return 0;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct evlist *evlist,
+				   struct evsel *evsel, int idx);
+
+typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+				  struct mmap *map,
+				  union perf_event *event, void *data1,
+				  size_t len1, void *data2, size_t len2);
+
+int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
+			struct perf_tool *tool, process_auxtrace_t fn);
+
+int auxtrace_mmap__read_snapshot(struct mmap *map,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 size_t snapshot_size);
+
+int auxtrace_queues__init(struct auxtrace_queues *queues);
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+			       struct perf_session *session,
+			       union perf_event *event, off_t data_offset,
+			       struct auxtrace_buffer **buffer_ptr);
+struct auxtrace_queue *
+auxtrace_queues__sample_queue(struct auxtrace_queues *queues,
+			      struct perf_sample *sample,
+			      struct perf_session *session);
+int auxtrace_queues__add_sample(struct auxtrace_queues *queues,
+				struct perf_session *session,
+				struct perf_sample *sample, u64 data_offset,
+				u64 reference);
+void auxtrace_queues__free(struct auxtrace_queues *queues);
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+				   struct perf_session *session);
+int auxtrace_queue_data(struct perf_session *session, bool samples,
+			bool events);
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+					      struct auxtrace_buffer *buffer);
+void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw);
+static inline void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+	return auxtrace_buffer__get_data_rw(buffer, fd, false);
+}
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+		       u64 ordinal);
+void auxtrace_heap__pop(struct auxtrace_heap *heap);
+void auxtrace_heap__free(struct auxtrace_heap *heap);
+
+struct auxtrace_cache_entry {
+	struct hlist_node hash;
+	u32 key;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+					   unsigned int limit_percent);
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
+void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+			struct auxtrace_cache_entry *entry);
+void auxtrace_cache__remove(struct auxtrace_cache *c, u32 key);
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
+
+struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
+					      int *err);
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+				    struct record_opts *opts,
+				    const char *str);
+int auxtrace_parse_sample_options(struct auxtrace_record *itr,
+				  struct evlist *evlist,
+				  struct record_opts *opts, const char *str);
+void auxtrace_regroup_aux_output(struct evlist *evlist);
+int auxtrace_record__options(struct auxtrace_record *itr,
+			     struct evlist *evlist,
+			     struct record_opts *opts);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct evlist *evlist);
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+			       struct perf_session *session,
+			       struct perf_record_auxtrace_info *auxtrace_info,
+			       size_t priv_size);
+void auxtrace_record__free(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr, bool on_exit);
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+				   struct auxtrace_mmap *mm,
+				   unsigned char *data, u64 *head, u64 *old);
+u64 auxtrace_record__reference(struct auxtrace_record *itr);
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx);
+
+int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
+				   off_t file_offset);
+int auxtrace_index__write(int fd, struct list_head *head);
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+			    bool needs_swap);
+void auxtrace_index__free(struct list_head *head);
+
+void auxtrace_synth_guest_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+				int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+				const char *msg, u64 timestamp,
+				pid_t machine_pid, int vcpu);
+void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int type,
+			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+			  const char *msg, u64 timestamp);
+
+int perf_event__process_auxtrace_info(struct perf_session *session,
+				      union perf_event *event);
+s64 perf_event__process_auxtrace(struct perf_session *session,
+				 union perf_event *event);
+int perf_event__process_auxtrace_error(struct perf_session *session,
+				       union perf_event *event);
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
+			       const char *str, int unset);
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+			    int unset);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample);
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+				      union perf_event *event);
+void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+
+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter);
+int auxtrace_parse_filters(struct evlist *evlist);
+
+int auxtrace__process_event(struct perf_session *session, union perf_event *event,
+			    struct perf_sample *sample, struct perf_tool *tool);
+void auxtrace__dump_auxtrace_sample(struct perf_session *session,
+				    struct perf_sample *sample);
+int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
+void auxtrace__free_events(struct perf_session *session);
+void auxtrace__free(struct perf_session *session);
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+				 struct evsel *evsel);
+
+#define ITRACE_HELP \
+"				i[period]:    		synthesize instructions events\n" \
+"				b:	    		synthesize branches events (branch misses for Arm SPE)\n" \
+"				c:	    		synthesize branches events (calls only)\n"	\
+"				r:	    		synthesize branches events (returns only)\n" \
+"				x:	    		synthesize transactions events\n"		\
+"				w:	    		synthesize ptwrite events\n"		\
+"				p:	    		synthesize power events\n"			\
+"				o:			synthesize other events recorded due to the use\n" \
+"							of aux-output (refer to perf record)\n"	\
+"				I:			synthesize interrupt or similar (asynchronous) events\n" \
+"							(e.g. Intel PT Event Trace)\n" \
+"				e[flags]:		synthesize error events\n" \
+"							each flag must be preceded by + or -\n" \
+"							error flags are: o (overflow)\n" \
+"									 l (data lost)\n" \
+"				d[flags]:		create a debug log\n" \
+"							each flag must be preceded by + or -\n" \
+"							log flags are: a (all perf events)\n" \
+"							               o (output to stdout)\n" \
+"				f:	    		synthesize first level cache events\n" \
+"				m:	    		synthesize last level cache events\n" \
+"				t:	    		synthesize TLB events\n" \
+"				a:	    		synthesize remote access events\n" \
+"				g[len]:     		synthesize a call chain (use with i or x)\n" \
+"				G[len]:			synthesize a call chain on existing event records\n" \
+"				l[len]:     		synthesize last branch entries (use with i or x)\n" \
+"				L[len]:			synthesize last branch entries on existing event records\n" \
+"				sNUMBER:    		skip initial number of events\n"		\
+"				q:			quicker (less detailed) decoding\n" \
+"				A:			approximate IPC\n" \
+"				Z:			prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \
+"				PERIOD[ns|us|ms|i|t]:   specify period to sample stream\n" \
+"				concatenate multiple options. Default is ibxwpe or cewp\n"
+
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts,
+				       struct perf_time_interval *ptime_range,
+				       int range_num)
+{
+	opts->ptime_range = ptime_range;
+	opts->range_num = range_num;
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts)
+{
+	opts->ptime_range = NULL;
+	opts->range_num = 0;
+}
+
+#else
+#include "debug.h"
+
+static inline struct auxtrace_record *
+auxtrace_record__init(struct evlist *evlist __maybe_unused,
+		      int *err)
+{
+	*err = 0;
+	return NULL;
+}
+
+static inline
+void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
+			     struct evlist *evlist __maybe_unused,
+			     struct record_opts *opts __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+int perf_event__process_auxtrace_info(struct perf_session *session __maybe_unused,
+				      union perf_event *event __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+s64 perf_event__process_auxtrace(struct perf_session *session __maybe_unused,
+				 union perf_event *event __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+int perf_event__process_auxtrace_error(struct perf_session *session __maybe_unused,
+				       union perf_event *event __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+void perf_session__auxtrace_error_inc(struct perf_session *session
+				      __maybe_unused,
+				      union perf_event *event
+				      __maybe_unused)
+{
+}
+
+static inline
+void events_stats__auxtrace_error_warn(const struct events_stats *stats
+				       __maybe_unused)
+{
+}
+
+static inline
+int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts __maybe_unused,
+			       const char *str __maybe_unused, int unset __maybe_unused)
+{
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+static inline
+int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+				    struct record_opts *opts __maybe_unused,
+				    const char *str)
+{
+	if (!str)
+		return 0;
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused,
+				  struct evlist *evlist __maybe_unused,
+				  struct record_opts *opts __maybe_unused,
+				  const char *str)
+{
+	if (!str)
+		return 0;
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+static inline
+void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused)
+{
+}
+
+static inline
+int auxtrace__process_event(struct perf_session *session __maybe_unused,
+			    union perf_event *event __maybe_unused,
+			    struct perf_sample *sample __maybe_unused,
+			    struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused,
+				    struct perf_sample *sample __maybe_unused)
+{
+}
+
+static inline
+int auxtrace__flush_events(struct perf_session *session __maybe_unused,
+			   struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+void auxtrace__free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
+{
+}
+
+static inline
+void auxtrace__free(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_index__write(int fd __maybe_unused,
+			  struct list_head *head __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_index__process(int fd __maybe_unused,
+			    u64 size __maybe_unused,
+			    struct perf_session *session __maybe_unused,
+			    bool needs_swap __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+void auxtrace_index__free(struct list_head *head __maybe_unused)
+{
+}
+
+static inline
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+				 struct evsel *evsel __maybe_unused)
+{
+	return false;
+}
+
+static inline
+int auxtrace_parse_filters(struct evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct evlist *evlist,
+				   struct evsel *evsel, int idx);
+
+#define ITRACE_HELP ""
+
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts
+				       __maybe_unused,
+				       struct perf_time_interval *ptime_range
+				       __maybe_unused,
+				       int range_num __maybe_unused)
+{
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts
+					 __maybe_unused)
+{
+}
+
+#endif
+
+#endif
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
new file mode 100644
index 000000000..5ecd4f401
--- /dev/null
+++ b/tools/perf/util/block-info.c
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <linux/zalloc.h>
+#include "block-info.h"
+#include "sort.h"
+#include "annotate.h"
+#include "symbol.h"
+#include "dso.h"
+#include "map.h"
+#include "srcline.h"
+#include "evlist.h"
+#include "hist.h"
+#include "ui/browsers/hists.h"
+
+static struct block_header_column {
+	const char *name;
+	int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+	[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT] = {
+		.name = "Sampled Cycles%",
+		.width = 15,
+	},
+	[PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+		.name = "Sampled Cycles",
+		.width = 14,
+	},
+	[PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+		.name = "Avg Cycles%",
+		.width = 11,
+	},
+	[PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+		.name = "Avg Cycles",
+		.width = 10,
+	},
+	[PERF_HPP_REPORT__BLOCK_RANGE] = {
+		.name = "[Program Block Range]",
+		.width = 70,
+	},
+	[PERF_HPP_REPORT__BLOCK_DSO] = {
+		.name = "Shared Object",
+		.width = 20,
+	}
+};
+
+struct block_info *block_info__get(struct block_info *bi)
+{
+	if (bi)
+		refcount_inc(&bi->refcnt);
+	return bi;
+}
+
+void block_info__put(struct block_info *bi)
+{
+	if (bi && refcount_dec_and_test(&bi->refcnt))
+		free(bi);
+}
+
+struct block_info *block_info__new(void)
+{
+	struct block_info *bi = zalloc(sizeof(*bi));
+
+	if (bi)
+		refcount_set(&bi->refcnt, 1);
+	return bi;
+}
+
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct block_info *bi_l = left->block_info;
+	struct block_info *bi_r = right->block_info;
+	int cmp;
+
+	if (!bi_l->sym || !bi_r->sym) {
+		if (!bi_l->sym && !bi_r->sym)
+			return -1;
+		else if (!bi_l->sym)
+			return -1;
+		else
+			return 1;
+	}
+
+	cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+	if (cmp)
+		return cmp;
+
+	if (bi_l->start != bi_r->start)
+		return (int64_t)(bi_r->start - bi_l->start);
+
+	return (int64_t)(bi_r->end - bi_l->end);
+}
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+			struct hist_entry *left, struct hist_entry *right)
+{
+	return __block_info__cmp(left, right);
+}
+
+static void init_block_info(struct block_info *bi, struct symbol *sym,
+			    struct cyc_hist *ch, int offset,
+			    u64 total_cycles)
+{
+	bi->sym = sym;
+	bi->start = ch->start;
+	bi->end = offset;
+	bi->cycles = ch->cycles;
+	bi->cycles_aggr = ch->cycles_aggr;
+	bi->num = ch->num;
+	bi->num_aggr = ch->num_aggr;
+	bi->total_cycles = total_cycles;
+
+	memcpy(bi->cycles_spark, ch->cycles_spark,
+	       NUM_SPARKS * sizeof(u64));
+}
+
+int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
+			    u64 *block_cycles_aggr, u64 total_cycles)
+{
+	struct annotation *notes;
+	struct cyc_hist *ch;
+	static struct addr_location al;
+	u64 cycles = 0;
+
+	if (!he->ms.map || !he->ms.sym)
+		return 0;
+
+	memset(&al, 0, sizeof(al));
+	al.map = he->ms.map;
+	al.sym = he->ms.sym;
+
+	notes = symbol__annotation(he->ms.sym);
+	if (!notes || !notes->src || !notes->src->cycles_hist)
+		return 0;
+	ch = notes->src->cycles_hist;
+	for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
+		if (ch[i].num_aggr) {
+			struct block_info *bi;
+			struct hist_entry *he_block;
+
+			bi = block_info__new();
+			if (!bi)
+				return -1;
+
+			init_block_info(bi, he->ms.sym, &ch[i], i,
+					total_cycles);
+			cycles += bi->cycles_aggr / bi->num_aggr;
+
+			he_block = hists__add_entry_block(&bh->block_hists,
+							  &al, bi);
+			if (!he_block) {
+				block_info__put(bi);
+				return -1;
+			}
+		}
+	}
+
+	if (block_cycles_aggr)
+		*block_cycles_aggr += cycles;
+
+	return 0;
+}
+
+static int block_column_header(struct perf_hpp_fmt *fmt,
+			       struct perf_hpp *hpp,
+			       struct hists *hists __maybe_unused,
+			       int line __maybe_unused,
+			       int *span __maybe_unused)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+			 block_fmt->header);
+}
+
+static int block_column_width(struct perf_hpp_fmt *fmt,
+			      struct perf_hpp *hpp __maybe_unused,
+			      struct hists *hists __maybe_unused)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+	return block_fmt->width;
+}
+
+static int color_pct(struct perf_hpp *hpp, int width, double pct)
+{
+#ifdef HAVE_SLANG_SUPPORT
+	if (use_browser) {
+		return __hpp__slsmg_color_printf(hpp, "%*.2f%%",
+						 width - 1, pct);
+	}
+#endif
+	return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct);
+}
+
+static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
+					struct perf_hpp *hpp,
+					struct hist_entry *he)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+	struct block_info *bi = he->block_info;
+	double ratio = 0.0;
+
+	if (block_fmt->total_cycles)
+		ratio = (double)bi->cycles_aggr / (double)block_fmt->total_cycles;
+
+	return color_pct(hpp, block_fmt->width, 100.0 * ratio);
+}
+
+static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
+					   struct hist_entry *left,
+					   struct hist_entry *right)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+	struct block_info *bi_l = left->block_info;
+	struct block_info *bi_r = right->block_info;
+	double l, r;
+
+	if (block_fmt->total_cycles) {
+		l = ((double)bi_l->cycles_aggr /
+			(double)block_fmt->total_cycles) * 100000.0;
+		r = ((double)bi_r->cycles_aggr /
+			(double)block_fmt->total_cycles) * 100000.0;
+		return (int64_t)l - (int64_t)r;
+	}
+
+	return 0;
+}
+
+static void cycles_string(u64 cycles, char *buf, int size)
+{
+	if (cycles >= 1000000)
+		scnprintf(buf, size, "%.1fM", (double)cycles / 1000000.0);
+	else if (cycles >= 1000)
+		scnprintf(buf, size, "%.1fK", (double)cycles / 1000.0);
+	else
+		scnprintf(buf, size, "%1d", cycles);
+}
+
+static int block_cycles_lbr_entry(struct perf_hpp_fmt *fmt,
+				  struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+	struct block_info *bi = he->block_info;
+	char cycles_buf[16];
+
+	cycles_string(bi->cycles_aggr, cycles_buf, sizeof(cycles_buf));
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+			 cycles_buf);
+}
+
+static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
+				  struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+	struct block_info *bi = he->block_info;
+	double ratio = 0.0;
+	u64 avg;
+
+	if (block_fmt->block_cycles && bi->num_aggr) {
+		avg = bi->cycles_aggr / bi->num_aggr;
+		ratio = (double)avg / (double)block_fmt->block_cycles;
+	}
+
+	return color_pct(hpp, block_fmt->width, 100.0 * ratio);
+}
+
+static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
+				  struct perf_hpp *hpp,
+				  struct hist_entry *he)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+	struct block_info *bi = he->block_info;
+	char cycles_buf[16];
+
+	cycles_string(bi->cycles_aggr / bi->num_aggr, cycles_buf,
+		      sizeof(cycles_buf));
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+			 cycles_buf);
+}
+
+static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			     struct hist_entry *he)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+	struct block_info *bi = he->block_info;
+	char buf[128];
+	char *start_line, *end_line;
+
+	symbol_conf.disable_add2line_warn = true;
+
+	start_line = map__srcline(he->ms.map, bi->sym->start + bi->start,
+				  he->ms.sym);
+
+	end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
+				he->ms.sym);
+
+	if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+	    (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
+		scnprintf(buf, sizeof(buf), "[%s -> %s]",
+			  start_line, end_line);
+	} else {
+		scnprintf(buf, sizeof(buf), "[%7lx -> %7lx]",
+			  bi->start, bi->end);
+	}
+
+	free_srcline(start_line);
+	free_srcline(end_line);
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_dso_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			   struct hist_entry *he)
+{
+	struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+	struct map *map = he->ms.map;
+
+	if (map && map->dso) {
+		return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+				 map->dso->short_name);
+	}
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+			 "[unknown]");
+}
+
+static void init_block_header(struct block_fmt *block_fmt)
+{
+	struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+	BUG_ON(block_fmt->idx >= PERF_HPP_REPORT__BLOCK_MAX_INDEX);
+
+	block_fmt->header = block_columns[block_fmt->idx].name;
+	block_fmt->width = block_columns[block_fmt->idx].width;
+
+	fmt->header = block_column_header;
+	fmt->width = block_column_width;
+}
+
+static void hpp_register(struct block_fmt *block_fmt, int idx,
+			 struct perf_hpp_list *hpp_list)
+{
+	struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+	block_fmt->idx = idx;
+	INIT_LIST_HEAD(&fmt->list);
+	INIT_LIST_HEAD(&fmt->sort_list);
+
+	switch (idx) {
+	case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
+		fmt->color = block_total_cycles_pct_entry;
+		fmt->cmp = block_info__cmp;
+		fmt->sort = block_total_cycles_pct_sort;
+		break;
+	case PERF_HPP_REPORT__BLOCK_LBR_CYCLES:
+		fmt->entry = block_cycles_lbr_entry;
+		break;
+	case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
+		fmt->color = block_cycles_pct_entry;
+		break;
+	case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
+		fmt->entry = block_avg_cycles_entry;
+		break;
+	case PERF_HPP_REPORT__BLOCK_RANGE:
+		fmt->entry = block_range_entry;
+		break;
+	case PERF_HPP_REPORT__BLOCK_DSO:
+		fmt->entry = block_dso_entry;
+		break;
+	default:
+		return;
+	}
+
+	init_block_header(block_fmt);
+	perf_hpp_list__column_register(hpp_list, fmt);
+}
+
+static void register_block_columns(struct perf_hpp_list *hpp_list,
+				   struct block_fmt *block_fmts,
+				   int *block_hpps, int nr_hpps)
+{
+	for (int i = 0; i < nr_hpps; i++)
+		hpp_register(&block_fmts[i], block_hpps[i], hpp_list);
+}
+
+static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
+			    int *block_hpps, int nr_hpps)
+{
+	__hists__init(&bh->block_hists, &bh->block_list);
+	perf_hpp_list__init(&bh->block_list);
+	bh->block_list.nr_header_lines = 1;
+
+	register_block_columns(&bh->block_list, block_fmts,
+			       block_hpps, nr_hpps);
+
+	/* Sort by the first fmt */
+	perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt);
+}
+
+static int process_block_report(struct hists *hists,
+				struct block_report *block_report,
+				u64 total_cycles, int *block_hpps,
+				int nr_hpps)
+{
+	struct rb_node *next = rb_first_cached(&hists->entries);
+	struct block_hist *bh = &block_report->hist;
+	struct hist_entry *he;
+
+	if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX)
+		return -1;
+
+	block_report->nr_fmts = nr_hpps;
+	init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps);
+
+	while (next) {
+		he = rb_entry(next, struct hist_entry, rb_node);
+		block_info__process_sym(he, bh, &block_report->cycles,
+					total_cycles);
+		next = rb_next(&he->rb_node);
+	}
+
+	for (int i = 0; i < nr_hpps; i++) {
+		block_report->fmts[i].total_cycles = total_cycles;
+		block_report->fmts[i].block_cycles = block_report->cycles;
+	}
+
+	hists__output_resort(&bh->block_hists, NULL);
+	return 0;
+}
+
+struct block_report *block_info__create_report(struct evlist *evlist,
+					       u64 total_cycles,
+					       int *block_hpps, int nr_hpps,
+					       int *nr_reps)
+{
+	struct block_report *block_reports;
+	int nr_hists = evlist->core.nr_entries, i = 0;
+	struct evsel *pos;
+
+	block_reports = calloc(nr_hists, sizeof(struct block_report));
+	if (!block_reports)
+		return NULL;
+
+	evlist__for_each_entry(evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+
+		process_block_report(hists, &block_reports[i], total_cycles,
+				     block_hpps, nr_hpps);
+		i++;
+	}
+
+	*nr_reps = nr_hists;
+	return block_reports;
+}
+
+void block_info__free_report(struct block_report *reps, int nr_reps)
+{
+	for (int i = 0; i < nr_reps; i++)
+		hists__delete_entries(&reps[i].hist.block_hists);
+
+	free(reps);
+}
+
+int report__browse_block_hists(struct block_hist *bh, float min_percent,
+			       struct evsel *evsel, struct perf_env *env,
+			       struct annotation_options *annotation_opts)
+{
+	int ret;
+
+	switch (use_browser) {
+	case 0:
+		symbol_conf.report_individual_block = true;
+		hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
+			       stdout, true);
+		return 0;
+	case 1:
+		symbol_conf.report_individual_block = true;
+		ret = block_hists_tui_browse(bh, evsel, min_percent,
+					     env, annotation_opts);
+		return ret;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+float block_info__total_cycles_percent(struct hist_entry *he)
+{
+	struct block_info *bi = he->block_info;
+
+	if (bi->total_cycles)
+		return bi->cycles * 100.0 / bi->total_cycles;
+
+	return 0.0;
+}
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
new file mode 100644
index 000000000..42e9dcc4c
--- /dev/null
+++ b/tools/perf/util/block-info.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BLOCK_H
+#define __PERF_BLOCK_H
+
+#include <linux/types.h>
+#include <linux/refcount.h>
+#include "hist.h"
+#include "symbol.h"
+#include "sort.h"
+#include "ui/ui.h"
+
+struct block_info {
+	struct symbol		*sym;
+	u64			start;
+	u64			end;
+	u64			cycles;
+	u64			cycles_aggr;
+	s64			cycles_spark[NUM_SPARKS];
+	u64			total_cycles;
+	int			num;
+	int			num_aggr;
+	refcount_t		refcnt;
+};
+
+struct block_fmt {
+	struct perf_hpp_fmt	fmt;
+	int			idx;
+	int			width;
+	const char		*header;
+	u64			total_cycles;
+	u64			block_cycles;
+};
+
+enum {
+	PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
+	PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+	PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+	PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+	PERF_HPP_REPORT__BLOCK_RANGE,
+	PERF_HPP_REPORT__BLOCK_DSO,
+	PERF_HPP_REPORT__BLOCK_MAX_INDEX
+};
+
+struct block_report {
+	struct block_hist	hist;
+	u64			cycles;
+	struct block_fmt	fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+	int			nr_fmts;
+};
+
+struct block_hist;
+
+struct block_info *block_info__new(void);
+struct block_info *block_info__get(struct block_info *bi);
+void   block_info__put(struct block_info *bi);
+
+static inline void __block_info__zput(struct block_info **bi)
+{
+	block_info__put(*bi);
+	*bi = NULL;
+}
+
+#define block_info__zput(bi) __block_info__zput(&bi)
+
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+			struct hist_entry *left, struct hist_entry *right);
+
+int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
+			    u64 *block_cycles_aggr, u64 total_cycles);
+
+struct block_report *block_info__create_report(struct evlist *evlist,
+					       u64 total_cycles,
+					       int *block_hpps, int nr_hpps,
+					       int *nr_reps);
+
+void block_info__free_report(struct block_report *reps, int nr_reps);
+
+int report__browse_block_hists(struct block_hist *bh, float min_percent,
+			       struct evsel *evsel, struct perf_env *env,
+			       struct annotation_options *annotation_opts);
+
+float block_info__total_cycles_percent(struct hist_entry *he);
+
+#endif /* __PERF_BLOCK_H */
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 000000000..1be432657
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "block-range.h"
+#include "annotate.h"
+#include <assert.h>
+#include <stdlib.h>
+
+struct {
+	struct rb_root root;
+	u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+	/*
+	 * XXX still paranoid for now; see if we can make this depend on
+	 * DEBUG=1 builds.
+	 */
+#if 1
+	struct rb_node *rb;
+	u64 old = 0; /* NULL isn't executable */
+
+	for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+		struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+		assert(old < entry->start);
+		assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+		old = entry->end;
+	}
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *parent = NULL;
+	struct block_range *entry;
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (addr < entry->start)
+			p = &parent->rb_left;
+		else if (addr > entry->end)
+			p = &parent->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_left;
+	while (*p) {
+		node = *p;
+		p = &node->rb_right;
+	}
+	rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_right;
+	while (*p) {
+		node = *p;
+		p = &node->rb_left;
+	}
+	rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end:   branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *n, *parent = NULL;
+	struct block_range *next, *entry = NULL;
+	struct block_range_iter iter = { NULL, NULL };
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (start < entry->start)
+			p = &parent->rb_left;
+		else if (start > entry->end)
+			p = &parent->rb_right;
+		else
+			break;
+	}
+
+	/*
+	 * Didn't find anything.. there's a hole at @start, however @end might
+	 * be inside/behind the next range.
+	 */
+	if (!*p) {
+		if (!entry) /* tree empty */
+			goto do_whole;
+
+		/*
+		 * If the last node is before, advance one to find the next.
+		 */
+		n = parent;
+		if (entry->end < start) {
+			n = rb_next(n);
+			if (!n)
+				goto do_whole;
+		}
+		next = rb_entry(n, struct block_range, node);
+
+		if (next->start <= end) { /* add head: [start...][n->start...] */
+			struct block_range *head = malloc(sizeof(struct block_range));
+			if (!head)
+				return iter;
+
+			*head = (struct block_range){
+				.start		= start,
+				.end		= next->start - 1,
+				.is_target	= 1,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&head->node, &next->node);
+			rb_insert_color(&head->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.start = head;
+			goto do_tail;
+		}
+
+do_whole:
+		/*
+		 * The whole [start..end] range is non-overlapping.
+		 */
+		entry = malloc(sizeof(struct block_range));
+		if (!entry)
+			return iter;
+
+		*entry = (struct block_range){
+			.start		= start,
+			.end		= end,
+			.is_target	= 1,
+			.is_branch	= 1,
+		};
+
+		rb_link_node(&entry->node, parent, p);
+		rb_insert_color(&entry->node, &block_ranges.root);
+		block_range__debug();
+
+		iter.start = entry;
+		iter.end   = entry;
+		goto done;
+	}
+
+	/*
+	 * We found a range that overlapped with ours, split if needed.
+	 */
+	if (entry->start < start) { /* split: [e->start...][start...] */
+		struct block_range *head = malloc(sizeof(struct block_range));
+		if (!head)
+			return iter;
+
+		*head = (struct block_range){
+			.start		= entry->start,
+			.end		= start - 1,
+			.is_target	= entry->is_target,
+			.is_branch	= 0,
+
+			.coverage	= entry->coverage,
+			.entry		= entry->entry,
+		};
+
+		entry->start		= start;
+		entry->is_target	= 1;
+		entry->entry		= 0;
+
+		rb_link_left_of_node(&head->node, &entry->node);
+		rb_insert_color(&head->node, &block_ranges.root);
+		block_range__debug();
+
+	} else if (entry->start == start)
+		entry->is_target = 1;
+
+	iter.start = entry;
+
+do_tail:
+	/*
+	 * At this point we've got: @iter.start = [@start...] but @end can still be
+	 * inside or beyond it.
+	 */
+	entry = iter.start;
+	for (;;) {
+		/*
+		 * If @end is inside @entry, split.
+		 */
+		if (end < entry->end) { /* split: [...end][...e->end] */
+			struct block_range *tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= end + 1,
+				.end		= entry->end,
+				.is_target	= 0,
+				.is_branch	= entry->is_branch,
+
+				.coverage	= entry->coverage,
+				.taken		= entry->taken,
+				.pred		= entry->pred,
+			};
+
+			entry->end		= end;
+			entry->is_branch	= 1;
+			entry->taken		= 0;
+			entry->pred		= 0;
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = entry;
+			goto done;
+		}
+
+		/*
+		 * If @end matches @entry, done
+		 */
+		if (end == entry->end) {
+			entry->is_branch = 1;
+			iter.end = entry;
+			goto done;
+		}
+
+		next = block_range__next(entry);
+		if (!next)
+			goto add_tail;
+
+		/*
+		 * If @end is in beyond @entry but not inside @next, add tail.
+		 */
+		if (end < next->start) { /* add tail: [...e->end][...end] */
+			struct block_range *tail;
+add_tail:
+			tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= end,
+				.is_target	= 0,
+				.is_branch	= 1,
+			};
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = tail;
+			goto done;
+		}
+
+		/*
+		 * If there is a hole between @entry and @next, fill it.
+		 */
+		if (entry->end + 1 != next->start) {
+			struct block_range *hole = malloc(sizeof(struct block_range));
+			if (!hole)
+				return iter;
+
+			*hole = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= next->start - 1,
+				.is_target	= 0,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&hole->node, &next->node);
+			rb_insert_color(&hole->node, &block_ranges.root);
+			block_range__debug();
+		}
+
+		entry = next;
+	}
+
+done:
+	assert(iter.start->start == start && iter.start->is_target);
+	assert(iter.end->end == end && iter.end->is_branch);
+
+	block_ranges.blocks++;
+
+	return iter;
+}
+
+
+/*
+ * Compute coverage as:
+ *
+ *    br->coverage / br->sym->max_coverage
+ *
+ * This ensures each symbol has a 100% spot, to reflect that each symbol has a
+ * most covered section.
+ *
+ * Returns [0-1] for coverage and -1 if we had no data what so ever or the
+ * symbol does not exist.
+ */
+double block_range__coverage(struct block_range *br)
+{
+	struct symbol *sym;
+
+	if (!br) {
+		if (block_ranges.blocks)
+			return 0;
+
+		return -1;
+	}
+
+	sym = br->sym;
+	if (!sym)
+		return -1;
+
+	return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+}
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
new file mode 100644
index 000000000..ec0fb534b
--- /dev/null
+++ b/tools/perf/util/block-range.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BLOCK_RANGE_H
+#define __PERF_BLOCK_RANGE_H
+
+#include <stdbool.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct symbol;
+
+/*
+ * struct block_range - non-overlapping parts of basic blocks
+ * @node:	treenode
+ * @start:	inclusive start of range
+ * @end:	inclusive end of range
+ * @is_target:	@start is a jump target
+ * @is_branch:	@end is a branch instruction
+ * @coverage:	number of blocks that cover this range
+ * @taken:	number of times the branch is taken (requires @is_branch)
+ * @pred:	number of times the taken branch was predicted
+ */
+struct block_range {
+	struct rb_node node;
+
+	struct symbol *sym;
+
+	u64 start;
+	u64 end;
+
+	int is_target, is_branch;
+
+	u64 coverage;
+	u64 entry;
+	u64 taken;
+	u64 pred;
+};
+
+static inline struct block_range *block_range__next(struct block_range *br)
+{
+	struct rb_node *n = rb_next(&br->node);
+	if (!n)
+		return NULL;
+	return rb_entry(n, struct block_range, node);
+}
+
+struct block_range_iter {
+	struct block_range *start;
+	struct block_range *end;
+};
+
+static inline struct block_range *block_range_iter(struct block_range_iter *iter)
+{
+	return iter->start;
+}
+
+static inline bool block_range_iter__next(struct block_range_iter *iter)
+{
+	if (iter->start == iter->end)
+		return false;
+
+	iter->start = block_range__next(iter->start);
+	return true;
+}
+
+static inline bool block_range_iter__valid(struct block_range_iter *iter)
+{
+	if (!iter->start || !iter->end)
+		return false;
+	return true;
+}
+
+extern struct block_range *block_range__find(u64 addr);
+extern struct block_range_iter block_range__create(u64 start, u64 end);
+extern double block_range__coverage(struct block_range *br);
+
+#endif /* __PERF_BLOCK_RANGE_H */
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
new file mode 100644
index 000000000..91c7bfa82
--- /dev/null
+++ b/tools/perf/util/bpf-event.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <internal/lib.h>
+#include <symbol/kallsyms.h>
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include "debug.h"
+#include "dso.h"
+#include "symbol.h"
+#include "machine.h"
+#include "env.h"
+#include "session.h"
+#include "map.h"
+#include "evlist.h"
+#include "record.h"
+#include "util/synthetic-events.h"
+
+#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+       struct btf *btf;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+       int err = btf__get_from_id(id, &btf);
+#pragma GCC diagnostic pop
+
+       return err ? ERR_PTR(err) : btf;
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BPF_PROG_LOAD
+LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
+				const struct bpf_insn *insns, size_t insns_cnt,
+				const char *license, __u32 kern_version,
+				char *log_buf, size_t log_buf_sz);
+
+int bpf_prog_load(enum bpf_prog_type prog_type,
+		  const char *prog_name __maybe_unused,
+		  const char *license,
+		  const struct bpf_insn *insns, size_t insn_cnt,
+		  const struct bpf_prog_load_opts *opts)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return bpf_load_program(prog_type, insns, insn_cnt, license,
+				opts->kern_version, opts->log_buf, opts->log_size);
+#pragma GCC diagnostic pop
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+struct bpf_program *
+bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return bpf_program__next(prev, obj);
+#pragma GCC diagnostic pop
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+struct bpf_map *
+bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return bpf_map__next(prev, obj);
+#pragma GCC diagnostic pop
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BTF__RAW_DATA
+const void *
+btf__raw_data(const struct btf *btf_ro, __u32 *size)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return btf__get_raw_data(btf_ro, size);
+#pragma GCC diagnostic pop
+}
+#endif
+
+static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
+{
+	int ret = 0;
+	size_t i;
+
+	for (i = 0; i < len; i++)
+		ret += snprintf(buf + ret, size - ret, "%02x", data[i]);
+	return ret;
+}
+
+static int machine__process_bpf_event_load(struct machine *machine,
+					   union perf_event *event,
+					   struct perf_sample *sample __maybe_unused)
+{
+	struct bpf_prog_info_node *info_node;
+	struct perf_env *env = machine->env;
+	struct perf_bpil *info_linear;
+	int id = event->bpf.id;
+	unsigned int i;
+
+	/* perf-record, no need to handle bpf-event */
+	if (env == NULL)
+		return 0;
+
+	info_node = perf_env__find_bpf_prog_info(env, id);
+	if (!info_node)
+		return 0;
+	info_linear = info_node->info_linear;
+
+	for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) {
+		u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms);
+		u64 addr = addrs[i];
+		struct map *map = maps__find(machine__kernel_maps(machine), addr);
+
+		if (map) {
+			map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO;
+			map->dso->bpf_prog.id = id;
+			map->dso->bpf_prog.sub_id = i;
+			map->dso->bpf_prog.env = env;
+		}
+	}
+	return 0;
+}
+
+int machine__process_bpf(struct machine *machine, union perf_event *event,
+			 struct perf_sample *sample)
+{
+	if (dump_trace)
+		perf_event__fprintf_bpf(event, stdout);
+
+	switch (event->bpf.type) {
+	case PERF_BPF_EVENT_PROG_LOAD:
+		return machine__process_bpf_event_load(machine, event, sample);
+
+	case PERF_BPF_EVENT_PROG_UNLOAD:
+		/*
+		 * Do not free bpf_prog_info and btf of the program here,
+		 * as annotation still need them. They will be freed at
+		 * the end of the session.
+		 */
+		break;
+	default:
+		pr_debug("unexpected bpf event type of %d\n", event->bpf.type);
+		break;
+	}
+	return 0;
+}
+
+static int perf_env__fetch_btf(struct perf_env *env,
+			       u32 btf_id,
+			       struct btf *btf)
+{
+	struct btf_node *node;
+	u32 data_size;
+	const void *data;
+
+	data = btf__raw_data(btf, &data_size);
+
+	node = malloc(data_size + sizeof(struct btf_node));
+	if (!node)
+		return -1;
+
+	node->id = btf_id;
+	node->data_size = data_size;
+	memcpy(node->data, data, data_size);
+
+	if (!perf_env__insert_btf(env, node)) {
+		/* Insertion failed because of a duplicate. */
+		free(node);
+		return -1;
+	}
+	return 0;
+}
+
+static int synthesize_bpf_prog_name(char *buf, int size,
+				    struct bpf_prog_info *info,
+				    struct btf *btf,
+				    u32 sub_id)
+{
+	u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(uintptr_t)(info->prog_tags);
+	void *func_infos = (void *)(uintptr_t)(info->func_info);
+	u32 sub_prog_cnt = info->nr_jited_ksyms;
+	const struct bpf_func_info *finfo;
+	const char *short_name = NULL;
+	const struct btf_type *t;
+	int name_len;
+
+	name_len = snprintf(buf, size, "bpf_prog_");
+	name_len += snprintf_hex(buf + name_len, size - name_len,
+				 prog_tags[sub_id], BPF_TAG_SIZE);
+	if (btf) {
+		finfo = func_infos + sub_id * info->func_info_rec_size;
+		t = btf__type_by_id(btf, finfo->type_id);
+		short_name = btf__name_by_offset(btf, t->name_off);
+	} else if (sub_id == 0 && sub_prog_cnt == 1) {
+		/* no subprog */
+		if (info->name[0])
+			short_name = info->name;
+	} else
+		short_name = "F";
+	if (short_name)
+		name_len += snprintf(buf + name_len, size - name_len,
+				     "_%s", short_name);
+	return name_len;
+}
+
+/*
+ * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
+ * program. One PERF_RECORD_BPF_EVENT is generated for the program. And
+ * one PERF_RECORD_KSYMBOL is generated for each sub program.
+ *
+ * Returns:
+ *    0 for success;
+ *   -1 for failures;
+ *   -2 for lack of kernel support.
+ */
+static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
+					       perf_event__handler_t process,
+					       struct machine *machine,
+					       int fd,
+					       union perf_event *event,
+					       struct record_opts *opts)
+{
+	struct perf_record_ksymbol *ksymbol_event = &event->ksymbol;
+	struct perf_record_bpf_event *bpf_event = &event->bpf;
+	struct perf_tool *tool = session->tool;
+	struct bpf_prog_info_node *info_node;
+	struct perf_bpil *info_linear;
+	struct bpf_prog_info *info;
+	struct btf *btf = NULL;
+	struct perf_env *env;
+	u32 sub_prog_cnt, i;
+	int err = 0;
+	u64 arrays;
+
+	/*
+	 * for perf-record and perf-report use header.env;
+	 * otherwise, use global perf_env.
+	 */
+	env = session->data ? &session->header.env : &perf_env;
+
+	arrays = 1UL << PERF_BPIL_JITED_KSYMS;
+	arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
+	arrays |= 1UL << PERF_BPIL_FUNC_INFO;
+	arrays |= 1UL << PERF_BPIL_PROG_TAGS;
+	arrays |= 1UL << PERF_BPIL_JITED_INSNS;
+	arrays |= 1UL << PERF_BPIL_LINE_INFO;
+	arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+
+	info_linear = get_bpf_prog_info_linear(fd, arrays);
+	if (IS_ERR_OR_NULL(info_linear)) {
+		info_linear = NULL;
+		pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+		return -1;
+	}
+
+	if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+		free(info_linear);
+		pr_debug("%s: the kernel is too old, aborting\n", __func__);
+		return -2;
+	}
+
+	info = &info_linear->info;
+	if (!info->jited_ksyms) {
+		free(info_linear);
+		return -1;
+	}
+
+	/* number of ksyms, func_lengths, and tags should match */
+	sub_prog_cnt = info->nr_jited_ksyms;
+	if (sub_prog_cnt != info->nr_prog_tags ||
+	    sub_prog_cnt != info->nr_jited_func_lens) {
+		free(info_linear);
+		return -1;
+	}
+
+	/* check BTF func info support */
+	if (info->btf_id && info->nr_func_info && info->func_info_rec_size) {
+		/* btf func info number should be same as sub_prog_cnt */
+		if (sub_prog_cnt != info->nr_func_info) {
+			pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
+			free(info_linear);
+			return -1;
+		}
+		btf = btf__load_from_kernel_by_id(info->btf_id);
+		if (libbpf_get_error(btf)) {
+			pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
+			err = -1;
+			goto out;
+		}
+		perf_env__fetch_btf(env, info->btf_id, btf);
+	}
+
+	/* Synthesize PERF_RECORD_KSYMBOL */
+	for (i = 0; i < sub_prog_cnt; i++) {
+		__u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens);
+		__u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms);
+		int name_len;
+
+		*ksymbol_event = (struct perf_record_ksymbol) {
+			.header = {
+				.type = PERF_RECORD_KSYMBOL,
+				.size = offsetof(struct perf_record_ksymbol, name),
+			},
+			.addr = prog_addrs[i],
+			.len = prog_lens[i],
+			.ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+			.flags = 0,
+		};
+
+		name_len = synthesize_bpf_prog_name(ksymbol_event->name,
+						    KSYM_NAME_LEN, info, btf, i);
+		ksymbol_event->header.size += PERF_ALIGN(name_len + 1,
+							 sizeof(u64));
+
+		memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+		event->header.size += machine->id_hdr_size;
+		err = perf_tool__process_synth_event(tool, event,
+						     machine, process);
+	}
+
+	if (!opts->no_bpf_event) {
+		/* Synthesize PERF_RECORD_BPF_EVENT */
+		*bpf_event = (struct perf_record_bpf_event) {
+			.header = {
+				.type = PERF_RECORD_BPF_EVENT,
+				.size = sizeof(struct perf_record_bpf_event),
+			},
+			.type = PERF_BPF_EVENT_PROG_LOAD,
+			.flags = 0,
+			.id = info->id,
+		};
+		memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE);
+		memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+		event->header.size += machine->id_hdr_size;
+
+		/* save bpf_prog_info to env */
+		info_node = malloc(sizeof(struct bpf_prog_info_node));
+		if (!info_node) {
+			err = -1;
+			goto out;
+		}
+
+		info_node->info_linear = info_linear;
+		perf_env__insert_bpf_prog_info(env, info_node);
+		info_linear = NULL;
+
+		/*
+		 * process after saving bpf_prog_info to env, so that
+		 * required information is ready for look up
+		 */
+		err = perf_tool__process_synth_event(tool, event,
+						     machine, process);
+	}
+
+out:
+	free(info_linear);
+	btf__free(btf);
+	return err ? -1 : 0;
+}
+
+struct kallsyms_parse {
+	union perf_event	*event;
+	perf_event__handler_t	 process;
+	struct machine		*machine;
+	struct perf_tool	*tool;
+};
+
+static int
+process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data)
+{
+	struct machine *machine = data->machine;
+	union perf_event *event = data->event;
+	struct perf_record_ksymbol *ksymbol;
+	int len;
+
+	ksymbol = &event->ksymbol;
+
+	*ksymbol = (struct perf_record_ksymbol) {
+		.header = {
+			.type = PERF_RECORD_KSYMBOL,
+			.size = offsetof(struct perf_record_ksymbol, name),
+		},
+		.addr      = addr,
+		.len       = page_size,
+		.ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+		.flags     = 0,
+	};
+
+	len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name);
+	ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64));
+	memset((void *) event + event->header.size, 0, machine->id_hdr_size);
+	event->header.size += machine->id_hdr_size;
+
+	return perf_tool__process_synth_event(data->tool, event, machine,
+					      data->process);
+}
+
+static int
+kallsyms_process_symbol(void *data, const char *_name,
+			char type __maybe_unused, u64 start)
+{
+	char disp[KSYM_NAME_LEN];
+	char *module, *name;
+	unsigned long id;
+	int err = 0;
+
+	module = strchr(_name, '\t');
+	if (!module)
+		return 0;
+
+	/* We are going after [bpf] module ... */
+	if (strcmp(module + 1, "[bpf]"))
+		return 0;
+
+	name = memdup(_name, (module - _name) + 1);
+	if (!name)
+		return -ENOMEM;
+
+	name[module - _name] = 0;
+
+	/* .. and only for trampolines and dispatchers */
+	if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) ||
+	    (sscanf(name, "bpf_dispatcher_%s", disp) == 1))
+		err = process_bpf_image(name, start, data);
+
+	free(name);
+	return err;
+}
+
+int perf_event__synthesize_bpf_events(struct perf_session *session,
+				      perf_event__handler_t process,
+				      struct machine *machine,
+				      struct record_opts *opts)
+{
+	const char *kallsyms_filename = "/proc/kallsyms";
+	struct kallsyms_parse arg;
+	union perf_event *event;
+	__u32 id = 0;
+	int err;
+	int fd;
+
+	event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size);
+	if (!event)
+		return -1;
+
+	/* Synthesize all the bpf programs in system. */
+	while (true) {
+		err = bpf_prog_get_next_id(id, &id);
+		if (err) {
+			if (errno == ENOENT) {
+				err = 0;
+				break;
+			}
+			pr_debug("%s: can't get next program: %s%s\n",
+				 __func__, strerror(errno),
+				 errno == EINVAL ? " -- kernel too old?" : "");
+			/* don't report error on old kernel or EPERM  */
+			err = (errno == EINVAL || errno == EPERM) ? 0 : -1;
+			break;
+		}
+		fd = bpf_prog_get_fd_by_id(id);
+		if (fd < 0) {
+			pr_debug("%s: failed to get fd for prog_id %u\n",
+				 __func__, id);
+			continue;
+		}
+
+		err = perf_event__synthesize_one_bpf_prog(session, process,
+							  machine, fd,
+							  event, opts);
+		close(fd);
+		if (err) {
+			/* do not return error for old kernel */
+			if (err == -2)
+				err = 0;
+			break;
+		}
+	}
+
+	/* Synthesize all the bpf images - trampolines/dispatchers. */
+	if (symbol_conf.kallsyms_name != NULL)
+		kallsyms_filename = symbol_conf.kallsyms_name;
+
+	arg = (struct kallsyms_parse) {
+		.event   = event,
+		.process = process,
+		.machine = machine,
+		.tool    = session->tool,
+	};
+
+	if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) {
+		pr_err("%s: failed to synthesize bpf images: %s\n",
+		       __func__, strerror(errno));
+	}
+
+	free(event);
+	return err;
+}
+
+static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
+{
+	struct bpf_prog_info_node *info_node;
+	struct perf_bpil *info_linear;
+	struct btf *btf = NULL;
+	u64 arrays;
+	u32 btf_id;
+	int fd;
+
+	fd = bpf_prog_get_fd_by_id(id);
+	if (fd < 0)
+		return;
+
+	arrays = 1UL << PERF_BPIL_JITED_KSYMS;
+	arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
+	arrays |= 1UL << PERF_BPIL_FUNC_INFO;
+	arrays |= 1UL << PERF_BPIL_PROG_TAGS;
+	arrays |= 1UL << PERF_BPIL_JITED_INSNS;
+	arrays |= 1UL << PERF_BPIL_LINE_INFO;
+	arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+
+	info_linear = get_bpf_prog_info_linear(fd, arrays);
+	if (IS_ERR_OR_NULL(info_linear)) {
+		pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+		goto out;
+	}
+
+	btf_id = info_linear->info.btf_id;
+
+	info_node = malloc(sizeof(struct bpf_prog_info_node));
+	if (info_node) {
+		info_node->info_linear = info_linear;
+		perf_env__insert_bpf_prog_info(env, info_node);
+	} else
+		free(info_linear);
+
+	if (btf_id == 0)
+		goto out;
+
+	btf = btf__load_from_kernel_by_id(btf_id);
+	if (libbpf_get_error(btf)) {
+		pr_debug("%s: failed to get BTF of id %u, aborting\n",
+			 __func__, btf_id);
+		goto out;
+	}
+	perf_env__fetch_btf(env, btf_id, btf);
+
+out:
+	btf__free(btf);
+	close(fd);
+}
+
+static int bpf_event__sb_cb(union perf_event *event, void *data)
+{
+	struct perf_env *env = data;
+
+	if (event->header.type != PERF_RECORD_BPF_EVENT)
+		return -1;
+
+	switch (event->bpf.type) {
+	case PERF_BPF_EVENT_PROG_LOAD:
+		perf_env__add_bpf_info(env, event->bpf.id);
+
+	case PERF_BPF_EVENT_PROG_UNLOAD:
+		/*
+		 * Do not free bpf_prog_info and btf of the program here,
+		 * as annotation still need them. They will be freed at
+		 * the end of the session.
+		 */
+		break;
+	default:
+		pr_debug("unexpected bpf event type of %d\n", event->bpf.type);
+		break;
+	}
+
+	return 0;
+}
+
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
+{
+	struct perf_event_attr attr = {
+		.type	          = PERF_TYPE_SOFTWARE,
+		.config           = PERF_COUNT_SW_DUMMY,
+		.sample_id_all    = 1,
+		.watermark        = 1,
+		.bpf_event        = 1,
+		.size	   = sizeof(attr), /* to capture ABI version */
+	};
+
+	/*
+	 * Older gcc versions don't support designated initializers, like above,
+	 * for unnamed union members, such as the following:
+	 */
+	attr.wakeup_watermark = 1;
+
+	return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env);
+}
+
+void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+				      struct perf_env *env,
+				      FILE *fp)
+{
+	__u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens);
+	__u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms);
+	char name[KSYM_NAME_LEN];
+	struct btf *btf = NULL;
+	u32 sub_prog_cnt, i;
+
+	sub_prog_cnt = info->nr_jited_ksyms;
+	if (sub_prog_cnt != info->nr_prog_tags ||
+	    sub_prog_cnt != info->nr_jited_func_lens)
+		return;
+
+	if (info->btf_id) {
+		struct btf_node *node;
+
+		node = __perf_env__find_btf(env, info->btf_id);
+		if (node)
+			btf = btf__new((__u8 *)(node->data),
+				       node->data_size);
+	}
+
+	if (sub_prog_cnt == 1) {
+		synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0);
+		fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n",
+			info->id, name, prog_addrs[0], prog_lens[0]);
+		goto out;
+	}
+
+	fprintf(fp, "# bpf_prog_info %u:\n", info->id);
+	for (i = 0; i < sub_prog_cnt; i++) {
+		synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, i);
+
+		fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n",
+			i, name, prog_addrs[i], prog_lens[i]);
+	}
+out:
+	btf__free(btf);
+}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
new file mode 100644
index 000000000..e2f042090
--- /dev/null
+++ b/tools/perf/util/bpf-event.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BPF_EVENT_H
+#define __PERF_BPF_EVENT_H
+
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+#include <api/fd/array.h>
+#include <stdio.h>
+
+struct bpf_prog_info;
+struct machine;
+union perf_event;
+struct perf_env;
+struct perf_sample;
+struct perf_session;
+struct record_opts;
+struct evlist;
+struct target;
+
+struct bpf_prog_info_node {
+	struct perf_bpil		*info_linear;
+	struct rb_node			rb_node;
+};
+
+struct btf_node {
+	struct rb_node	rb_node;
+	u32		id;
+	u32		data_size;
+	char		data[];
+};
+
+#ifdef HAVE_LIBBPF_SUPPORT
+int machine__process_bpf(struct machine *machine, union perf_event *event,
+			 struct perf_sample *sample);
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env);
+void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+				      struct perf_env *env,
+				      FILE *fp);
+#else
+static inline int machine__process_bpf(struct machine *machine __maybe_unused,
+				       union perf_event *event __maybe_unused,
+				       struct perf_sample *sample __maybe_unused)
+{
+	return 0;
+}
+
+static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused,
+					   struct perf_env *env __maybe_unused)
+{
+	return 0;
+}
+
+static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused,
+						    struct perf_env *env __maybe_unused,
+						    FILE *fp __maybe_unused)
+{
+
+}
+#endif // HAVE_LIBBPF_SUPPORT
+#endif
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
new file mode 100644
index 000000000..f4adeccdb
--- /dev/null
+++ b/tools/perf/util/bpf-loader.c
@@ -0,0 +1,2133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf-loader.c
+ *
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "debug.h"
+#include "evlist.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-event.h"
+#include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
+#include "strfilter.h"
+#include "util.h"
+#include "llvm-utils.h"
+#include "c++/clang-c.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
+#include "asm/bug.h"
+
+#include <internal/xyarray.h>
+
+#ifndef HAVE_LIBBPF_BPF_PROGRAM__SET_INSNS
+int bpf_program__set_insns(struct bpf_program *prog __maybe_unused,
+			   struct bpf_insn *new_insns __maybe_unused, size_t new_insn_cnt __maybe_unused)
+{
+	pr_err("%s: not support, update libbpf\n", __func__);
+	return -ENOTSUP;
+}
+
+int libbpf_register_prog_handler(const char *sec __maybe_unused,
+                                 enum bpf_prog_type prog_type __maybe_unused,
+                                 enum bpf_attach_type exp_attach_type __maybe_unused,
+                                 const struct libbpf_prog_handler_opts *opts __maybe_unused)
+{
+	pr_err("%s: not support, update libbpf\n", __func__);
+	return -ENOTSUP;
+}
+#endif
+
+/* temporarily disable libbpf deprecation warnings */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)),
+			      const char *fmt, va_list args)
+{
+	return veprintf(1, verbose, pr_fmt(fmt), args);
+}
+
+struct bpf_prog_priv {
+	bool is_tp;
+	char *sys_name;
+	char *evt_name;
+	struct perf_probe_event pev;
+	bool need_prologue;
+	struct bpf_insn *insns_buf;
+	int nr_types;
+	int *type_mapping;
+	int *prologue_fds;
+};
+
+struct bpf_perf_object {
+	struct list_head list;
+	struct bpf_object *obj;
+};
+
+struct bpf_preproc_result {
+	struct bpf_insn *new_insn_ptr;
+	int new_insn_cnt;
+};
+
+static LIST_HEAD(bpf_objects_list);
+static struct hashmap *bpf_program_hash;
+static struct hashmap *bpf_map_hash;
+
+static struct bpf_perf_object *
+bpf_perf_object__next(struct bpf_perf_object *prev)
+{
+	if (!prev) {
+		if (list_empty(&bpf_objects_list))
+			return NULL;
+
+		return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list);
+	}
+	if (list_is_last(&prev->list, &bpf_objects_list))
+		return NULL;
+
+	return list_next_entry(prev, list);
+}
+
+#define bpf_perf_object__for_each(perf_obj, tmp)	\
+	for ((perf_obj) = bpf_perf_object__next(NULL),	\
+	     (tmp) = bpf_perf_object__next(perf_obj);	\
+	     (perf_obj) != NULL;			\
+	     (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
+
+static bool libbpf_initialized;
+static int libbpf_sec_handler;
+
+static int bpf_perf_object__add(struct bpf_object *obj)
+{
+	struct bpf_perf_object *perf_obj = zalloc(sizeof(*perf_obj));
+
+	if (perf_obj) {
+		INIT_LIST_HEAD(&perf_obj->list);
+		perf_obj->obj = obj;
+		list_add_tail(&perf_obj->list, &bpf_objects_list);
+	}
+	return perf_obj ? 0 : -ENOMEM;
+}
+
+static void *program_priv(const struct bpf_program *prog)
+{
+	void *priv;
+
+	if (IS_ERR_OR_NULL(bpf_program_hash))
+		return NULL;
+	if (!hashmap__find(bpf_program_hash, prog, &priv))
+		return NULL;
+	return priv;
+}
+
+static struct bpf_insn prologue_init_insn[] = {
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_MOV64_IMM(BPF_REG_5, 0),
+};
+
+static int libbpf_prog_prepare_load_fn(struct bpf_program *prog,
+				       struct bpf_prog_load_opts *opts __maybe_unused,
+				       long cookie __maybe_unused)
+{
+	size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn);
+	size_t orig_insn_cnt, insn_cnt, init_size, orig_size;
+	struct bpf_prog_priv *priv = program_priv(prog);
+	const struct bpf_insn *orig_insn;
+	struct bpf_insn *insn;
+
+	if (IS_ERR_OR_NULL(priv)) {
+		pr_debug("bpf: failed to get private field\n");
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (!priv->need_prologue)
+		return 0;
+
+	/* prepend initialization code to program instructions */
+	orig_insn = bpf_program__insns(prog);
+	orig_insn_cnt = bpf_program__insn_cnt(prog);
+	init_size = init_size_cnt * sizeof(*insn);
+	orig_size = orig_insn_cnt * sizeof(*insn);
+
+	insn_cnt = orig_insn_cnt + init_size_cnt;
+	insn = malloc(insn_cnt * sizeof(*insn));
+	if (!insn)
+		return -ENOMEM;
+
+	memcpy(insn, prologue_init_insn, init_size);
+	memcpy((char *) insn + init_size, orig_insn, orig_size);
+	bpf_program__set_insns(prog, insn, insn_cnt);
+	return 0;
+}
+
+static int libbpf_init(void)
+{
+	LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts,
+		.prog_prepare_load_fn = libbpf_prog_prepare_load_fn,
+	);
+
+	if (libbpf_initialized)
+		return 0;
+
+	libbpf_set_print(libbpf_perf_print);
+	libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE,
+							  0, &handler_opts);
+	if (libbpf_sec_handler < 0) {
+		pr_debug("bpf: failed to register libbpf section handler: %d\n",
+			 libbpf_sec_handler);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	libbpf_initialized = true;
+	return 0;
+}
+
+struct bpf_object *
+bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name);
+	struct bpf_object *obj;
+	int err;
+
+	err = libbpf_init();
+	if (err)
+		return ERR_PTR(err);
+
+	obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+	if (IS_ERR_OR_NULL(obj)) {
+		pr_debug("bpf: failed to load buffer\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (bpf_perf_object__add(obj)) {
+		bpf_object__close(obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return obj;
+}
+
+static void bpf_perf_object__close(struct bpf_perf_object *perf_obj)
+{
+	list_del(&perf_obj->list);
+	bpf_object__close(perf_obj->obj);
+	free(perf_obj);
+}
+
+struct bpf_object *bpf__prepare_load(const char *filename, bool source)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename);
+	struct bpf_object *obj;
+	int err;
+
+	err = libbpf_init();
+	if (err)
+		return ERR_PTR(err);
+
+	if (source) {
+		void *obj_buf;
+		size_t obj_buf_sz;
+
+		perf_clang__init();
+		err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz);
+		perf_clang__cleanup();
+		if (err) {
+			pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err);
+			err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz);
+			if (err)
+				return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
+		} else
+			pr_debug("bpf: successful builtin compilation\n");
+		obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+
+		if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
+			llvm__dump_obj(filename, obj_buf, obj_buf_sz);
+
+		free(obj_buf);
+	} else {
+		obj = bpf_object__open(filename);
+	}
+
+	if (IS_ERR_OR_NULL(obj)) {
+		pr_debug("bpf: failed to load %s\n", filename);
+		return obj;
+	}
+
+	if (bpf_perf_object__add(obj)) {
+		bpf_object__close(obj);
+		return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
+	}
+
+	return obj;
+}
+
+static void close_prologue_programs(struct bpf_prog_priv *priv)
+{
+	struct perf_probe_event *pev;
+	int i, fd;
+
+	if (!priv->need_prologue)
+		return;
+	pev = &priv->pev;
+	for (i = 0; i < pev->ntevs; i++) {
+		fd = priv->prologue_fds[i];
+		if (fd != -1)
+			close(fd);
+	}
+}
+
+static void
+clear_prog_priv(const struct bpf_program *prog __maybe_unused,
+		void *_priv)
+{
+	struct bpf_prog_priv *priv = _priv;
+
+	close_prologue_programs(priv);
+	cleanup_perf_probe_events(&priv->pev, 1);
+	zfree(&priv->insns_buf);
+	zfree(&priv->prologue_fds);
+	zfree(&priv->type_mapping);
+	zfree(&priv->sys_name);
+	zfree(&priv->evt_name);
+	free(priv);
+}
+
+static void bpf_program_hash_free(void)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	if (IS_ERR_OR_NULL(bpf_program_hash))
+		return;
+
+	hashmap__for_each_entry(bpf_program_hash, cur, bkt)
+		clear_prog_priv(cur->key, cur->value);
+
+	hashmap__free(bpf_program_hash);
+	bpf_program_hash = NULL;
+}
+
+static void bpf_map_hash_free(void);
+
+void bpf__clear(void)
+{
+	struct bpf_perf_object *perf_obj, *tmp;
+
+	bpf_perf_object__for_each(perf_obj, tmp) {
+		bpf__unprobe(perf_obj->obj);
+		bpf_perf_object__close(perf_obj);
+	}
+
+	bpf_program_hash_free();
+	bpf_map_hash_free();
+}
+
+static size_t ptr_hash(const void *__key, void *ctx __maybe_unused)
+{
+	return (size_t) __key;
+}
+
+static bool ptr_equal(const void *key1, const void *key2,
+			  void *ctx __maybe_unused)
+{
+	return key1 == key2;
+}
+
+static int program_set_priv(struct bpf_program *prog, void *priv)
+{
+	void *old_priv;
+
+	/*
+	 * Should not happen, we warn about it in the
+	 * caller function - config_bpf_program
+	 */
+	if (IS_ERR(bpf_program_hash))
+		return PTR_ERR(bpf_program_hash);
+
+	if (!bpf_program_hash) {
+		bpf_program_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
+		if (IS_ERR(bpf_program_hash))
+			return PTR_ERR(bpf_program_hash);
+	}
+
+	old_priv = program_priv(prog);
+	if (old_priv) {
+		clear_prog_priv(prog, old_priv);
+		return hashmap__set(bpf_program_hash, prog, priv, NULL, NULL);
+	}
+	return hashmap__add(bpf_program_hash, prog, priv);
+}
+
+static int
+prog_config__exec(const char *value, struct perf_probe_event *pev)
+{
+	pev->uprobes = true;
+	pev->target = strdup(value);
+	if (!pev->target)
+		return -ENOMEM;
+	return 0;
+}
+
+static int
+prog_config__module(const char *value, struct perf_probe_event *pev)
+{
+	pev->uprobes = false;
+	pev->target = strdup(value);
+	if (!pev->target)
+		return -ENOMEM;
+	return 0;
+}
+
+static int
+prog_config__bool(const char *value, bool *pbool, bool invert)
+{
+	int err;
+	bool bool_value;
+
+	if (!pbool)
+		return -EINVAL;
+
+	err = strtobool(value, &bool_value);
+	if (err)
+		return err;
+
+	*pbool = invert ? !bool_value : bool_value;
+	return 0;
+}
+
+static int
+prog_config__inlines(const char *value,
+		     struct perf_probe_event *pev __maybe_unused)
+{
+	return prog_config__bool(value, &probe_conf.no_inlines, true);
+}
+
+static int
+prog_config__force(const char *value,
+		   struct perf_probe_event *pev __maybe_unused)
+{
+	return prog_config__bool(value, &probe_conf.force_add, false);
+}
+
+static struct {
+	const char *key;
+	const char *usage;
+	const char *desc;
+	int (*func)(const char *, struct perf_probe_event *);
+} bpf_prog_config_terms[] = {
+	{
+		.key	= "exec",
+		.usage	= "exec=<full path of file>",
+		.desc	= "Set uprobe target",
+		.func	= prog_config__exec,
+	},
+	{
+		.key	= "module",
+		.usage	= "module=<module name>    ",
+		.desc	= "Set kprobe module",
+		.func	= prog_config__module,
+	},
+	{
+		.key	= "inlines",
+		.usage	= "inlines=[yes|no]        ",
+		.desc	= "Probe at inline symbol",
+		.func	= prog_config__inlines,
+	},
+	{
+		.key	= "force",
+		.usage	= "force=[yes|no]          ",
+		.desc	= "Forcibly add events with existing name",
+		.func	= prog_config__force,
+	},
+};
+
+static int
+do_prog_config(const char *key, const char *value,
+	       struct perf_probe_event *pev)
+{
+	unsigned int i;
+
+	pr_debug("config bpf program: %s=%s\n", key, value);
+	for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+		if (strcmp(key, bpf_prog_config_terms[i].key) == 0)
+			return bpf_prog_config_terms[i].func(value, pev);
+
+	pr_debug("BPF: ERROR: invalid program config option: %s=%s\n",
+		 key, value);
+
+	pr_debug("\nHint: Valid options are:\n");
+	for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+		pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage,
+			 bpf_prog_config_terms[i].desc);
+	pr_debug("\n");
+
+	return -BPF_LOADER_ERRNO__PROGCONF_TERM;
+}
+
+static const char *
+parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev)
+{
+	char *text = strdup(config_str);
+	char *sep, *line;
+	const char *main_str = NULL;
+	int err = 0;
+
+	if (!text) {
+		pr_debug("Not enough memory: dup config_str failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	line = text;
+	while ((sep = strchr(line, ';'))) {
+		char *equ;
+
+		*sep = '\0';
+		equ = strchr(line, '=');
+		if (!equ) {
+			pr_warning("WARNING: invalid config in BPF object: %s\n",
+				   line);
+			pr_warning("\tShould be 'key=value'.\n");
+			goto nextline;
+		}
+		*equ = '\0';
+
+		err = do_prog_config(line, equ + 1, pev);
+		if (err)
+			break;
+nextline:
+		line = sep + 1;
+	}
+
+	if (!err)
+		main_str = config_str + (line - text);
+	free(text);
+
+	return err ? ERR_PTR(err) : main_str;
+}
+
+static int
+parse_prog_config(const char *config_str, const char **p_main_str,
+		  bool *is_tp, struct perf_probe_event *pev)
+{
+	int err;
+	const char *main_str = parse_prog_config_kvpair(config_str, pev);
+
+	if (IS_ERR(main_str))
+		return PTR_ERR(main_str);
+
+	*p_main_str = main_str;
+	if (!strchr(main_str, '=')) {
+		/* Is a tracepoint event? */
+		const char *s = strchr(main_str, ':');
+
+		if (!s) {
+			pr_debug("bpf: '%s' is not a valid tracepoint\n",
+				 config_str);
+			return -BPF_LOADER_ERRNO__CONFIG;
+		}
+
+		*is_tp = true;
+		return 0;
+	}
+
+	*is_tp = false;
+	err = parse_perf_probe_command(main_str, pev);
+	if (err < 0) {
+		pr_debug("bpf: '%s' is not a valid config string\n",
+			 config_str);
+		/* parse failed, don't need clear pev. */
+		return -BPF_LOADER_ERRNO__CONFIG;
+	}
+	return 0;
+}
+
+static int
+config_bpf_program(struct bpf_program *prog)
+{
+	struct perf_probe_event *pev = NULL;
+	struct bpf_prog_priv *priv = NULL;
+	const char *config_str, *main_str;
+	bool is_tp = false;
+	int err;
+
+	/* Initialize per-program probing setting */
+	probe_conf.no_inlines = false;
+	probe_conf.force_add = false;
+
+	priv = calloc(sizeof(*priv), 1);
+	if (!priv) {
+		pr_debug("bpf: failed to alloc priv\n");
+		return -ENOMEM;
+	}
+	pev = &priv->pev;
+
+	config_str = bpf_program__section_name(prog);
+	pr_debug("bpf: config program '%s'\n", config_str);
+	err = parse_prog_config(config_str, &main_str, &is_tp, pev);
+	if (err)
+		goto errout;
+
+	if (is_tp) {
+		char *s = strchr(main_str, ':');
+
+		priv->is_tp = true;
+		priv->sys_name = strndup(main_str, s - main_str);
+		priv->evt_name = strdup(s + 1);
+		goto set_priv;
+	}
+
+	if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
+		pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
+			 config_str, PERF_BPF_PROBE_GROUP);
+		err = -BPF_LOADER_ERRNO__GROUP;
+		goto errout;
+	} else if (!pev->group)
+		pev->group = strdup(PERF_BPF_PROBE_GROUP);
+
+	if (!pev->group) {
+		pr_debug("bpf: strdup failed\n");
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	if (!pev->event) {
+		pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n",
+			 config_str);
+		err = -BPF_LOADER_ERRNO__EVENTNAME;
+		goto errout;
+	}
+	pr_debug("bpf: config '%s' is ok\n", config_str);
+
+set_priv:
+	err = program_set_priv(prog, priv);
+	if (err) {
+		pr_debug("Failed to set priv for program '%s'\n", config_str);
+		goto errout;
+	}
+
+	return 0;
+
+errout:
+	if (pev)
+		clear_perf_probe_event(pev);
+	free(priv);
+	return err;
+}
+
+static int bpf__prepare_probe(void)
+{
+	static int err = 0;
+	static bool initialized = false;
+
+	/*
+	 * Make err static, so if init failed the first, bpf__prepare_probe()
+	 * fails each time without calling init_probe_symbol_maps multiple
+	 * times.
+	 */
+	if (initialized)
+		return err;
+
+	initialized = true;
+	err = init_probe_symbol_maps(false);
+	if (err < 0)
+		pr_debug("Failed to init_probe_symbol_maps\n");
+	probe_conf.max_probes = MAX_PROBES;
+	return err;
+}
+
+static int
+preproc_gen_prologue(struct bpf_program *prog, int n,
+		     const struct bpf_insn *orig_insns, int orig_insns_cnt,
+		     struct bpf_preproc_result *res)
+{
+	struct bpf_prog_priv *priv = program_priv(prog);
+	struct probe_trace_event *tev;
+	struct perf_probe_event *pev;
+	struct bpf_insn *buf;
+	size_t prologue_cnt = 0;
+	int i, err;
+
+	if (IS_ERR_OR_NULL(priv) || priv->is_tp)
+		goto errout;
+
+	pev = &priv->pev;
+
+	if (n < 0 || n >= priv->nr_types)
+		goto errout;
+
+	/* Find a tev belongs to that type */
+	for (i = 0; i < pev->ntevs; i++) {
+		if (priv->type_mapping[i] == n)
+			break;
+	}
+
+	if (i >= pev->ntevs) {
+		pr_debug("Internal error: prologue type %d not found\n", n);
+		return -BPF_LOADER_ERRNO__PROLOGUE;
+	}
+
+	tev = &pev->tevs[i];
+
+	buf = priv->insns_buf;
+	err = bpf__gen_prologue(tev->args, tev->nargs,
+				buf, &prologue_cnt,
+				BPF_MAXINSNS - orig_insns_cnt);
+	if (err) {
+		const char *title;
+
+		title = bpf_program__section_name(prog);
+		pr_debug("Failed to generate prologue for program %s\n",
+			 title);
+		return err;
+	}
+
+	memcpy(&buf[prologue_cnt], orig_insns,
+	       sizeof(struct bpf_insn) * orig_insns_cnt);
+
+	res->new_insn_ptr = buf;
+	res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
+	return 0;
+
+errout:
+	pr_debug("Internal error in preproc_gen_prologue\n");
+	return -BPF_LOADER_ERRNO__PROLOGUE;
+}
+
+/*
+ * compare_tev_args is reflexive, transitive and antisymmetric.
+ * I can proof it but this margin is too narrow to contain.
+ */
+static int compare_tev_args(const void *ptev1, const void *ptev2)
+{
+	int i, ret;
+	const struct probe_trace_event *tev1 =
+		*(const struct probe_trace_event **)ptev1;
+	const struct probe_trace_event *tev2 =
+		*(const struct probe_trace_event **)ptev2;
+
+	ret = tev2->nargs - tev1->nargs;
+	if (ret)
+		return ret;
+
+	for (i = 0; i < tev1->nargs; i++) {
+		struct probe_trace_arg *arg1, *arg2;
+		struct probe_trace_arg_ref *ref1, *ref2;
+
+		arg1 = &tev1->args[i];
+		arg2 = &tev2->args[i];
+
+		ret = strcmp(arg1->value, arg2->value);
+		if (ret)
+			return ret;
+
+		ref1 = arg1->ref;
+		ref2 = arg2->ref;
+
+		while (ref1 && ref2) {
+			ret = ref2->offset - ref1->offset;
+			if (ret)
+				return ret;
+
+			ref1 = ref1->next;
+			ref2 = ref2->next;
+		}
+
+		if (ref1 || ref2)
+			return ref2 ? 1 : -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Assign a type number to each tevs in a pev.
+ * mapping is an array with same slots as tevs in that pev.
+ * nr_types will be set to number of types.
+ */
+static int map_prologue(struct perf_probe_event *pev, int *mapping,
+			int *nr_types)
+{
+	int i, type = 0;
+	struct probe_trace_event **ptevs;
+
+	size_t array_sz = sizeof(*ptevs) * pev->ntevs;
+
+	ptevs = malloc(array_sz);
+	if (!ptevs) {
+		pr_debug("Not enough memory: alloc ptevs failed\n");
+		return -ENOMEM;
+	}
+
+	pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
+	for (i = 0; i < pev->ntevs; i++)
+		ptevs[i] = &pev->tevs[i];
+
+	qsort(ptevs, pev->ntevs, sizeof(*ptevs),
+	      compare_tev_args);
+
+	for (i = 0; i < pev->ntevs; i++) {
+		int n;
+
+		n = ptevs[i] - pev->tevs;
+		if (i == 0) {
+			mapping[n] = type;
+			pr_debug("mapping[%d]=%d\n", n, type);
+			continue;
+		}
+
+		if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
+			mapping[n] = type;
+		else
+			mapping[n] = ++type;
+
+		pr_debug("mapping[%d]=%d\n", n, mapping[n]);
+	}
+	free(ptevs);
+	*nr_types = type + 1;
+
+	return 0;
+}
+
+static int hook_load_preprocessor(struct bpf_program *prog)
+{
+	struct bpf_prog_priv *priv = program_priv(prog);
+	struct perf_probe_event *pev;
+	bool need_prologue = false;
+	int i;
+
+	if (IS_ERR_OR_NULL(priv)) {
+		pr_debug("Internal error when hook preprocessor\n");
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (priv->is_tp) {
+		priv->need_prologue = false;
+		return 0;
+	}
+
+	pev = &priv->pev;
+	for (i = 0; i < pev->ntevs; i++) {
+		struct probe_trace_event *tev = &pev->tevs[i];
+
+		if (tev->nargs > 0) {
+			need_prologue = true;
+			break;
+		}
+	}
+
+	/*
+	 * Since all tevs don't have argument, we don't need generate
+	 * prologue.
+	 */
+	if (!need_prologue) {
+		priv->need_prologue = false;
+		return 0;
+	}
+
+	priv->need_prologue = true;
+	priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
+	if (!priv->insns_buf) {
+		pr_debug("Not enough memory: alloc insns_buf failed\n");
+		return -ENOMEM;
+	}
+
+	priv->prologue_fds = malloc(sizeof(int) * pev->ntevs);
+	if (!priv->prologue_fds) {
+		pr_debug("Not enough memory: alloc prologue fds failed\n");
+		return -ENOMEM;
+	}
+	memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs);
+
+	priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
+	if (!priv->type_mapping) {
+		pr_debug("Not enough memory: alloc type_mapping failed\n");
+		return -ENOMEM;
+	}
+	memset(priv->type_mapping, -1,
+	       sizeof(int) * pev->ntevs);
+
+	return map_prologue(pev, priv->type_mapping, &priv->nr_types);
+}
+
+int bpf__probe(struct bpf_object *obj)
+{
+	int err = 0;
+	struct bpf_program *prog;
+	struct bpf_prog_priv *priv;
+	struct perf_probe_event *pev;
+
+	err = bpf__prepare_probe();
+	if (err) {
+		pr_debug("bpf__prepare_probe failed\n");
+		return err;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		err = config_bpf_program(prog);
+		if (err)
+			goto out;
+
+		priv = program_priv(prog);
+		if (IS_ERR_OR_NULL(priv)) {
+			if (!priv)
+				err = -BPF_LOADER_ERRNO__INTERNAL;
+			else
+				err = PTR_ERR(priv);
+			goto out;
+		}
+
+		if (priv->is_tp) {
+			bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+			continue;
+		}
+
+		bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE);
+		pev = &priv->pev;
+
+		err = convert_perf_probe_events(pev, 1);
+		if (err < 0) {
+			pr_debug("bpf_probe: failed to convert perf probe events\n");
+			goto out;
+		}
+
+		err = apply_perf_probe_events(pev, 1);
+		if (err < 0) {
+			pr_debug("bpf_probe: failed to apply perf probe events\n");
+			goto out;
+		}
+
+		/*
+		 * After probing, let's consider prologue, which
+		 * adds program fetcher to BPF programs.
+		 *
+		 * hook_load_preprocessor() hooks pre-processor
+		 * to bpf_program, let it generate prologue
+		 * dynamically during loading.
+		 */
+		err = hook_load_preprocessor(prog);
+		if (err)
+			goto out;
+	}
+out:
+	return err < 0 ? err : 0;
+}
+
+#define EVENTS_WRITE_BUFSIZE  4096
+int bpf__unprobe(struct bpf_object *obj)
+{
+	int err, ret = 0;
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = program_priv(prog);
+		int i;
+
+		if (IS_ERR_OR_NULL(priv) || priv->is_tp)
+			continue;
+
+		for (i = 0; i < priv->pev.ntevs; i++) {
+			struct probe_trace_event *tev = &priv->pev.tevs[i];
+			char name_buf[EVENTS_WRITE_BUFSIZE];
+			struct strfilter *delfilter;
+
+			snprintf(name_buf, EVENTS_WRITE_BUFSIZE,
+				 "%s:%s", tev->group, tev->event);
+			name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0';
+
+			delfilter = strfilter__new(name_buf, NULL);
+			if (!delfilter) {
+				pr_debug("Failed to create filter for unprobing\n");
+				ret = -ENOMEM;
+				continue;
+			}
+
+			err = del_perf_probe_events(delfilter);
+			strfilter__delete(delfilter);
+			if (err) {
+				pr_debug("Failed to delete %s\n", name_buf);
+				ret = err;
+				continue;
+			}
+		}
+	}
+	return ret;
+}
+
+static int bpf_object__load_prologue(struct bpf_object *obj)
+{
+	int init_cnt = ARRAY_SIZE(prologue_init_insn);
+	const struct bpf_insn *orig_insns;
+	struct bpf_preproc_result res;
+	struct perf_probe_event *pev;
+	struct bpf_program *prog;
+	int orig_insns_cnt;
+
+	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = program_priv(prog);
+		int err, i, fd;
+
+		if (IS_ERR_OR_NULL(priv)) {
+			pr_debug("bpf: failed to get private field\n");
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+
+		if (!priv->need_prologue)
+			continue;
+
+		/*
+		 * For each program that needs prologue we do following:
+		 *
+		 * - take its current instructions and use them
+		 *   to generate the new code with prologue
+		 * - load new instructions with bpf_prog_load
+		 *   and keep the fd in prologue_fds
+		 * - new fd will be used in bpf__foreach_event
+		 *   to connect this program with perf evsel
+		 */
+		orig_insns = bpf_program__insns(prog);
+		orig_insns_cnt = bpf_program__insn_cnt(prog);
+
+		pev = &priv->pev;
+		for (i = 0; i < pev->ntevs; i++) {
+			/*
+			 * Skipping artificall prologue_init_insn instructions
+			 * (init_cnt), so the prologue can be generated instead
+			 * of them.
+			 */
+			err = preproc_gen_prologue(prog, i,
+						   orig_insns + init_cnt,
+						   orig_insns_cnt - init_cnt,
+						   &res);
+			if (err)
+				return err;
+
+			fd = bpf_prog_load(bpf_program__get_type(prog),
+					   bpf_program__name(prog), "GPL",
+					   res.new_insn_ptr,
+					   res.new_insn_cnt, NULL);
+			if (fd < 0) {
+				char bf[128];
+
+				libbpf_strerror(-errno, bf, sizeof(bf));
+				pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n",
+					 -errno, bf);
+				return -errno;
+			}
+			priv->prologue_fds[i] = fd;
+		}
+		/*
+		 * We no longer need the original program,
+		 * we can unload it.
+		 */
+		bpf_program__unload(prog);
+	}
+	return 0;
+}
+
+int bpf__load(struct bpf_object *obj)
+{
+	int err;
+
+	err = bpf_object__load(obj);
+	if (err) {
+		char bf[128];
+		libbpf_strerror(err, bf, sizeof(bf));
+		pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
+		return err;
+	}
+	return bpf_object__load_prologue(obj);
+}
+
+int bpf__foreach_event(struct bpf_object *obj,
+		       bpf_prog_iter_callback_t func,
+		       void *arg)
+{
+	struct bpf_program *prog;
+	int err;
+
+	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = program_priv(prog);
+		struct probe_trace_event *tev;
+		struct perf_probe_event *pev;
+		int i, fd;
+
+		if (IS_ERR_OR_NULL(priv)) {
+			pr_debug("bpf: failed to get private field\n");
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+
+		if (priv->is_tp) {
+			fd = bpf_program__fd(prog);
+			err = (*func)(priv->sys_name, priv->evt_name, fd, obj, arg);
+			if (err) {
+				pr_debug("bpf: tracepoint call back failed, stop iterate\n");
+				return err;
+			}
+			continue;
+		}
+
+		pev = &priv->pev;
+		for (i = 0; i < pev->ntevs; i++) {
+			tev = &pev->tevs[i];
+
+			if (priv->need_prologue)
+				fd = priv->prologue_fds[i];
+			else
+				fd = bpf_program__fd(prog);
+
+			if (fd < 0) {
+				pr_debug("bpf: failed to get file descriptor\n");
+				return fd;
+			}
+
+			err = (*func)(tev->group, tev->event, fd, obj, arg);
+			if (err) {
+				pr_debug("bpf: call back failed, stop iterate\n");
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+enum bpf_map_op_type {
+	BPF_MAP_OP_SET_VALUE,
+	BPF_MAP_OP_SET_EVSEL,
+};
+
+enum bpf_map_key_type {
+	BPF_MAP_KEY_ALL,
+	BPF_MAP_KEY_RANGES,
+};
+
+struct bpf_map_op {
+	struct list_head list;
+	enum bpf_map_op_type op_type;
+	enum bpf_map_key_type key_type;
+	union {
+		struct parse_events_array array;
+	} k;
+	union {
+		u64 value;
+		struct evsel *evsel;
+	} v;
+};
+
+struct bpf_map_priv {
+	struct list_head ops_list;
+};
+
+static void
+bpf_map_op__delete(struct bpf_map_op *op)
+{
+	if (!list_empty(&op->list))
+		list_del_init(&op->list);
+	if (op->key_type == BPF_MAP_KEY_RANGES)
+		parse_events__clear_array(&op->k.array);
+	free(op);
+}
+
+static void
+bpf_map_priv__purge(struct bpf_map_priv *priv)
+{
+	struct bpf_map_op *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
+		list_del_init(&pos->list);
+		bpf_map_op__delete(pos);
+	}
+}
+
+static void
+bpf_map_priv__clear(const struct bpf_map *map __maybe_unused,
+		    void *_priv)
+{
+	struct bpf_map_priv *priv = _priv;
+
+	bpf_map_priv__purge(priv);
+	free(priv);
+}
+
+static void *map_priv(const struct bpf_map *map)
+{
+	void *priv;
+
+	if (IS_ERR_OR_NULL(bpf_map_hash))
+		return NULL;
+	if (!hashmap__find(bpf_map_hash, map, &priv))
+		return NULL;
+	return priv;
+}
+
+static void bpf_map_hash_free(void)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	if (IS_ERR_OR_NULL(bpf_map_hash))
+		return;
+
+	hashmap__for_each_entry(bpf_map_hash, cur, bkt)
+		bpf_map_priv__clear(cur->key, cur->value);
+
+	hashmap__free(bpf_map_hash);
+	bpf_map_hash = NULL;
+}
+
+static int map_set_priv(struct bpf_map *map, void *priv)
+{
+	void *old_priv;
+
+	if (WARN_ON_ONCE(IS_ERR(bpf_map_hash)))
+		return PTR_ERR(bpf_program_hash);
+
+	if (!bpf_map_hash) {
+		bpf_map_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
+		if (IS_ERR(bpf_map_hash))
+			return PTR_ERR(bpf_map_hash);
+	}
+
+	old_priv = map_priv(map);
+	if (old_priv) {
+		bpf_map_priv__clear(map, old_priv);
+		return hashmap__set(bpf_map_hash, map, priv, NULL, NULL);
+	}
+	return hashmap__add(bpf_map_hash, map, priv);
+}
+
+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
+{
+	op->key_type = BPF_MAP_KEY_ALL;
+	if (!term)
+		return 0;
+
+	if (term->array.nr_ranges) {
+		size_t memsz = term->array.nr_ranges *
+				sizeof(op->k.array.ranges[0]);
+
+		op->k.array.ranges = memdup(term->array.ranges, memsz);
+		if (!op->k.array.ranges) {
+			pr_debug("Not enough memory to alloc indices for map\n");
+			return -ENOMEM;
+		}
+		op->key_type = BPF_MAP_KEY_RANGES;
+		op->k.array.nr_ranges = term->array.nr_ranges;
+	}
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map_op__new(struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = zalloc(sizeof(*op));
+	if (!op) {
+		pr_debug("Failed to alloc bpf_map_op\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	INIT_LIST_HEAD(&op->list);
+
+	err = bpf_map_op_setkey(op, term);
+	if (err) {
+		free(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static struct bpf_map_op *
+bpf_map_op__clone(struct bpf_map_op *op)
+{
+	struct bpf_map_op *newop;
+
+	newop = memdup(op, sizeof(*op));
+	if (!newop) {
+		pr_debug("Failed to alloc bpf_map_op\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&newop->list);
+	if (op->key_type == BPF_MAP_KEY_RANGES) {
+		size_t memsz = op->k.array.nr_ranges *
+			       sizeof(op->k.array.ranges[0]);
+
+		newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
+		if (!newop->k.array.ranges) {
+			pr_debug("Failed to alloc indices for map\n");
+			free(newop);
+			return NULL;
+		}
+	}
+
+	return newop;
+}
+
+static struct bpf_map_priv *
+bpf_map_priv__clone(struct bpf_map_priv *priv)
+{
+	struct bpf_map_priv *newpriv;
+	struct bpf_map_op *pos, *newop;
+
+	newpriv = zalloc(sizeof(*newpriv));
+	if (!newpriv) {
+		pr_debug("Not enough memory to alloc map private\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&newpriv->ops_list);
+
+	list_for_each_entry(pos, &priv->ops_list, list) {
+		newop = bpf_map_op__clone(pos);
+		if (!newop) {
+			bpf_map_priv__purge(newpriv);
+			return NULL;
+		}
+		list_add_tail(&newop->list, &newpriv->ops_list);
+	}
+
+	return newpriv;
+}
+
+static int
+bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
+{
+	const char *map_name = bpf_map__name(map);
+	struct bpf_map_priv *priv = map_priv(map);
+
+	if (IS_ERR(priv)) {
+		pr_debug("Failed to get private from map %s\n", map_name);
+		return PTR_ERR(priv);
+	}
+
+	if (!priv) {
+		priv = zalloc(sizeof(*priv));
+		if (!priv) {
+			pr_debug("Not enough memory to alloc map private\n");
+			return -ENOMEM;
+		}
+		INIT_LIST_HEAD(&priv->ops_list);
+
+		if (map_set_priv(map, priv)) {
+			free(priv);
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+	}
+
+	list_add_tail(&op->list, &priv->ops_list);
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = bpf_map_op__new(term);
+	if (IS_ERR(op))
+		return op;
+
+	err = bpf_map__add_op(map, op);
+	if (err) {
+		bpf_map_op__delete(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static int
+__bpf_map__config_value(struct bpf_map *map,
+			struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	const char *map_name = bpf_map__name(map);
+
+	if (!map) {
+		pr_debug("Map '%s' is invalid\n", map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+	if (bpf_map__key_size(map) < sizeof(unsigned int)) {
+		pr_debug("Map %s has incorrect key size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+	}
+	switch (bpf_map__value_size(map)) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		pr_debug("Map %s has incorrect value size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_VALUE;
+	op->v.value = term->val.num;
+	return 0;
+}
+
+static int
+bpf_map__config_value(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct evlist *evlist __maybe_unused)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
+		pr_debug("ERROR: wrong value type for 'value'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_value(map, term);
+}
+
+static int
+__bpf_map__config_event(struct bpf_map *map,
+			struct parse_events_term *term,
+			struct evlist *evlist)
+{
+	struct bpf_map_op *op;
+	const char *map_name = bpf_map__name(map);
+	struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
+
+	if (!evsel) {
+		pr_debug("Event (for '%s') '%s' doesn't exist\n",
+			 map_name, term->val.str);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+	}
+
+	if (!map) {
+		pr_debug("Map '%s' is invalid\n", map_name);
+		return PTR_ERR(map);
+	}
+
+	/*
+	 * No need to check key_size and value_size:
+	 * kernel has already checked them.
+	 */
+	if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_EVSEL;
+	op->v.evsel = evsel;
+	return 0;
+}
+
+static int
+bpf_map__config_event(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct evlist *evlist)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
+		pr_debug("ERROR: wrong value type for 'event'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_event(map, term, evlist);
+}
+
+struct bpf_obj_config__map_func {
+	const char *config_opt;
+	int (*config_func)(struct bpf_map *, struct parse_events_term *,
+			   struct evlist *);
+};
+
+struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
+	{"value", bpf_map__config_value},
+	{"event", bpf_map__config_event},
+};
+
+static int
+config_map_indices_range_check(struct parse_events_term *term,
+			       struct bpf_map *map,
+			       const char *map_name)
+{
+	struct parse_events_array *array = &term->array;
+	unsigned int i;
+
+	if (!array->nr_ranges)
+		return 0;
+	if (!array->ranges) {
+		pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+			 map_name, (int)array->nr_ranges);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (!map) {
+		pr_debug("Map '%s' is invalid\n", map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	for (i = 0; i < array->nr_ranges; i++) {
+		unsigned int start = array->ranges[i].start;
+		size_t length = array->ranges[i].length;
+		unsigned int idx = start + length - 1;
+
+		if (idx >= bpf_map__max_entries(map)) {
+			pr_debug("ERROR: index %d too large\n", idx);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+		}
+	}
+	return 0;
+}
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct evlist *evlist,
+		    int *key_scan_pos)
+{
+	/* key is "map:<mapname>.<config opt>" */
+	char *map_name = strdup(term->config + sizeof("map:") - 1);
+	struct bpf_map *map;
+	int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+	char *map_opt;
+	size_t i;
+
+	if (!map_name)
+		return -ENOMEM;
+
+	map_opt = strchr(map_name, '.');
+	if (!map_opt) {
+		pr_debug("ERROR: Invalid map config: %s\n", map_name);
+		goto out;
+	}
+
+	*map_opt++ = '\0';
+	if (*map_opt == '\0') {
+		pr_debug("ERROR: Invalid map option: %s\n", term->config);
+		goto out;
+	}
+
+	map = bpf_object__find_map_by_name(obj, map_name);
+	if (!map) {
+		pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+		err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+		goto out;
+	}
+
+	*key_scan_pos += strlen(map_opt);
+	err = config_map_indices_range_check(term, map, map_name);
+	if (err)
+		goto out;
+	*key_scan_pos -= strlen(map_opt);
+
+	for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
+		struct bpf_obj_config__map_func *func =
+				&bpf_obj_config__map_funcs[i];
+
+		if (strcmp(map_opt, func->config_opt) == 0) {
+			err = func->config_func(map, term, evlist);
+			goto out;
+		}
+	}
+
+	pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+	err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+	if (!err)
+		*key_scan_pos += strlen(map_opt);
+
+	free(map_name);
+	return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct evlist *evlist,
+		    int *error_pos)
+{
+	int key_scan_pos = 0;
+	int err;
+
+	if (!obj || !term || !term->config)
+		return -EINVAL;
+
+	if (strstarts(term->config, "map:")) {
+		key_scan_pos = sizeof("map:") - 1;
+		err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+		goto out;
+	}
+	err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+	if (error_pos)
+		*error_pos = key_scan_pos;
+	return err;
+
+}
+
+typedef int (*map_config_func_t)(const char *name, int map_fd,
+				 const struct bpf_map *map,
+				 struct bpf_map_op *op,
+				 void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+		      void *arg, const char *name,
+		      int map_fd, const struct bpf_map *map,
+		      struct bpf_map_op *op)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < bpf_map__max_entries(map); i++) {
+		err = func(name, map_fd, map, op, &i, arg);
+		if (err) {
+			pr_debug("ERROR: failed to insert value to %s[%u]\n",
+				 name, i);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+			 const char *name, int map_fd,
+			 const struct bpf_map *map,
+			 struct bpf_map_op *op)
+{
+	unsigned int i, j;
+	int err;
+
+	for (i = 0; i < op->k.array.nr_ranges; i++) {
+		unsigned int start = op->k.array.ranges[i].start;
+		size_t length = op->k.array.ranges[i].length;
+
+		for (j = 0; j < length; j++) {
+			unsigned int idx = start + j;
+
+			err = func(name, map_fd, map, op, &idx, arg);
+			if (err) {
+				pr_debug("ERROR: failed to insert value to %s[%u]\n",
+					 name, idx);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+			   map_config_func_t func,
+			   void *arg)
+{
+	int err, map_fd, type;
+	struct bpf_map_op *op;
+	const char *name = bpf_map__name(map);
+	struct bpf_map_priv *priv = map_priv(map);
+
+	if (IS_ERR(priv)) {
+		pr_debug("ERROR: failed to get private from map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	if (!priv || list_empty(&priv->ops_list)) {
+		pr_debug("INFO: nothing to config for map %s\n", name);
+		return 0;
+	}
+
+	if (!map) {
+		pr_debug("Map '%s' is invalid\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	map_fd = bpf_map__fd(map);
+	if (map_fd < 0) {
+		pr_debug("ERROR: failed to get fd from map %s\n", name);
+		return map_fd;
+	}
+
+	type = bpf_map__type(map);
+	list_for_each_entry(op, &priv->ops_list, list) {
+		switch (type) {
+		case BPF_MAP_TYPE_ARRAY:
+		case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+			switch (op->key_type) {
+			case BPF_MAP_KEY_ALL:
+				err = foreach_key_array_all(func, arg, name,
+							    map_fd, map, op);
+				break;
+			case BPF_MAP_KEY_RANGES:
+				err = foreach_key_array_ranges(func, arg, name,
+							       map_fd, map, op);
+				break;
+			default:
+				pr_debug("ERROR: keytype for map '%s' invalid\n",
+					 name);
+				return -BPF_LOADER_ERRNO__INTERNAL;
+			}
+			if (err)
+				return err;
+			break;
+		default:
+			pr_debug("ERROR: type of '%s' incorrect\n", name);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+		}
+	}
+
+	return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+			   size_t val_size, u64 val)
+{
+	int err = 0;
+
+	switch (val_size) {
+	case 1: {
+		u8 _val = (u8)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 2: {
+		u16 _val = (u16)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 4: {
+		u32 _val = (u32)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 8: {
+		err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+		break;
+	}
+	default:
+		pr_debug("ERROR: invalid value size\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+			   struct evsel *evsel)
+{
+	struct xyarray *xy = evsel->core.fd;
+	struct perf_event_attr *attr;
+	unsigned int key, events;
+	bool check_pass = false;
+	int *evt_fd;
+	int err;
+
+	if (!xy) {
+		pr_debug("ERROR: evsel not ready for map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (xy->row_size / xy->entry_size != 1) {
+		pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+			 name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+	}
+
+	attr = &evsel->core.attr;
+	if (attr->inherit) {
+		pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+	}
+
+	if (evsel__is_bpf_output(evsel))
+		check_pass = true;
+	if (attr->type == PERF_TYPE_RAW)
+		check_pass = true;
+	if (attr->type == PERF_TYPE_HARDWARE)
+		check_pass = true;
+	if (!check_pass) {
+		pr_debug("ERROR: Event type is wrong for map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+	}
+
+	events = xy->entries / (xy->row_size / xy->entry_size);
+	key = *((unsigned int *)pkey);
+	if (key >= events) {
+		pr_debug("ERROR: there is no event %d for map %s\n",
+			 key, name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+	}
+	evt_fd = xyarray__entry(xy, key, 0);
+	err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+			     const struct bpf_map *map,
+			     struct bpf_map_op *op,
+			     void *pkey, void *arg __maybe_unused)
+{
+	int err;
+
+	switch (op->op_type) {
+	case BPF_MAP_OP_SET_VALUE:
+		err = apply_config_value_for_key(map_fd, pkey,
+						 bpf_map__value_size(map),
+						 op->v.value);
+		break;
+	case BPF_MAP_OP_SET_EVSEL:
+		err = apply_config_evsel_for_key(name, map_fd, pkey,
+						 op->v.evsel);
+		break;
+	default:
+		pr_debug("ERROR: unknown value type for '%s'\n", name);
+		err = -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+	return bpf_map_config_foreach_key(map,
+					  apply_obj_config_map_for_key,
+					  NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+	struct bpf_map *map;
+	int err;
+
+	bpf_object__for_each_map(map, obj) {
+		err = apply_obj_config_map(map);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+	struct bpf_perf_object *perf_obj, *tmp;
+	int err;
+
+	bpf_perf_object__for_each(perf_obj, tmp) {
+		err = apply_obj_config_object(perf_obj->obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#define bpf__perf_for_each_map(map, pobj, tmp)			\
+	bpf_perf_object__for_each(pobj, tmp)			\
+		bpf_object__for_each_map(map, pobj->obj)
+
+#define bpf__perf_for_each_map_named(map, pobj, pobjtmp, name)	\
+	bpf__perf_for_each_map(map, pobj, pobjtmp)		\
+		if (bpf_map__name(map) && (strcmp(name, bpf_map__name(map)) == 0))
+
+struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
+{
+	struct bpf_map_priv *tmpl_priv = NULL;
+	struct bpf_perf_object *perf_obj, *tmp;
+	struct evsel *evsel = NULL;
+	struct bpf_map *map;
+	int err;
+	bool need_init = false;
+
+	bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
+		struct bpf_map_priv *priv = map_priv(map);
+
+		if (IS_ERR(priv))
+			return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
+
+		/*
+		 * No need to check map type: type should have been
+		 * verified by kernel.
+		 */
+		if (!need_init && !priv)
+			need_init = !priv;
+		if (!tmpl_priv && priv)
+			tmpl_priv = priv;
+	}
+
+	if (!need_init)
+		return NULL;
+
+	if (!tmpl_priv) {
+		char *event_definition = NULL;
+
+		if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0)
+			return ERR_PTR(-ENOMEM);
+
+		err = parse_event(evlist, event_definition);
+		free(event_definition);
+
+		if (err) {
+			pr_debug("ERROR: failed to create the \"%s\" bpf-output event\n", name);
+			return ERR_PTR(-err);
+		}
+
+		evsel = evlist__last(evlist);
+	}
+
+	bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
+		struct bpf_map_priv *priv = map_priv(map);
+
+		if (IS_ERR(priv))
+			return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
+		if (priv)
+			continue;
+
+		if (tmpl_priv) {
+			priv = bpf_map_priv__clone(tmpl_priv);
+			if (!priv)
+				return ERR_PTR(-ENOMEM);
+
+			err = map_set_priv(map, priv);
+			if (err) {
+				bpf_map_priv__clear(map, priv);
+				return ERR_PTR(err);
+			}
+		} else if (evsel) {
+			struct bpf_map_op *op;
+
+			op = bpf_map__add_newop(map, NULL);
+			if (IS_ERR(op))
+				return ERR_CAST(op);
+			op->op_type = BPF_MAP_OP_SET_EVSEL;
+			op->v.evsel = evsel;
+		}
+	}
+
+	return evsel;
+}
+
+int bpf__setup_stdout(struct evlist *evlist)
+{
+	struct evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__");
+	return PTR_ERR_OR_ZERO(evsel);
+}
+
+#define ERRNO_OFFSET(e)		((e) - __BPF_LOADER_ERRNO__START)
+#define ERRCODE_OFFSET(c)	ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
+#define NR_ERRNO	(__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
+
+static const char *bpf_loader_strerror_table[NR_ERRNO] = {
+	[ERRCODE_OFFSET(CONFIG)]	= "Invalid config string",
+	[ERRCODE_OFFSET(GROUP)]		= "Invalid group name",
+	[ERRCODE_OFFSET(EVENTNAME)]	= "No event name found in config string",
+	[ERRCODE_OFFSET(INTERNAL)]	= "BPF loader internal error",
+	[ERRCODE_OFFSET(COMPILE)]	= "Error when compiling BPF scriptlet",
+	[ERRCODE_OFFSET(PROGCONF_TERM)]	= "Invalid program config term in config string",
+	[ERRCODE_OFFSET(PROLOGUE)]	= "Failed to generate prologue",
+	[ERRCODE_OFFSET(PROLOGUE2BIG)]	= "Prologue too big for program",
+	[ERRCODE_OFFSET(PROLOGUEOOB)]	= "Offset out of bound for prologue",
+	[ERRCODE_OFFSET(OBJCONF_OPT)]	= "Invalid object config option",
+	[ERRCODE_OFFSET(OBJCONF_CONF)]	= "Config value not set (missing '=')",
+	[ERRCODE_OFFSET(OBJCONF_MAP_OPT)]	= "Invalid object map config option",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)]	= "Target map doesn't exist",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUE)]	= "Incorrect value type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)]	= "Incorrect map type",
+	[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)]	= "Incorrect map key size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)]	= "Incorrect map value size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)]	= "Event not found for map setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)]	= "Invalid map size for event setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)]	= "Event dimension too large",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)]	= "Doesn't support inherit event",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)]	= "Wrong event type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)]	= "Index too large",
+};
+
+static int
+bpf_loader_strerror(int err, char *buf, size_t size)
+{
+	char sbuf[STRERR_BUFSIZE];
+	const char *msg;
+
+	if (!buf || !size)
+		return -1;
+
+	err = err > 0 ? err : -err;
+
+	if (err >= __LIBBPF_ERRNO__START)
+		return libbpf_strerror(err, buf, size);
+
+	if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) {
+		msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)];
+		snprintf(buf, size, "%s", msg);
+		buf[size - 1] = '\0';
+		return 0;
+	}
+
+	if (err >= __BPF_LOADER_ERRNO__END)
+		snprintf(buf, size, "Unknown bpf loader error %d", err);
+	else
+		snprintf(buf, size, "%s",
+			 str_error_r(err, sbuf, sizeof(sbuf)));
+
+	buf[size - 1] = '\0';
+	return -1;
+}
+
+#define bpf__strerror_head(err, buf, size) \
+	char sbuf[STRERR_BUFSIZE], *emsg;\
+	if (!size)\
+		return 0;\
+	if (err < 0)\
+		err = -err;\
+	bpf_loader_strerror(err, sbuf, sizeof(sbuf));\
+	emsg = sbuf;\
+	switch (err) {\
+	default:\
+		scnprintf(buf, size, "%s", emsg);\
+		break;
+
+#define bpf__strerror_entry(val, fmt...)\
+	case val: {\
+		scnprintf(buf, size, fmt);\
+		break;\
+	}
+
+#define bpf__strerror_end(buf, size)\
+	}\
+	buf[size - 1] = '\0';
+
+int bpf__strerror_prepare_load(const char *filename, bool source,
+			       int err, char *buf, size_t size)
+{
+	size_t n;
+	int ret;
+
+	n = snprintf(buf, size, "Failed to load %s%s: ",
+			 filename, source ? " from source" : "");
+	if (n >= size) {
+		buf[size - 1] = '\0';
+		return 0;
+	}
+	buf += n;
+	size -= n;
+
+	ret = bpf_loader_strerror(err, buf, size);
+	buf[size - 1] = '\0';
+	return ret;
+}
+
+int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
+			int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	case BPF_LOADER_ERRNO__PROGCONF_TERM: {
+		scnprintf(buf, size, "%s (add -v to see detail)", emsg);
+		break;
+	}
+	bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
+	bpf__strerror_entry(EACCES, "You need to be root");
+	bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
+	bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_load(struct bpf_object *obj,
+		       int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	case LIBBPF_ERRNO__KVER: {
+		unsigned int obj_kver = bpf_object__kversion(obj);
+		unsigned int real_kver;
+
+		if (fetch_kernel_version(&real_kver, NULL, 0)) {
+			scnprintf(buf, size, "Unable to fetch kernel version");
+			break;
+		}
+
+		if (obj_kver != real_kver) {
+			scnprintf(buf, size,
+				  "'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")",
+				  KVER_PARAM(obj_kver),
+				  KVER_PARAM(real_kver));
+			break;
+		}
+
+		scnprintf(buf, size, "Failed to load program for unknown reason");
+		break;
+	}
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			     struct parse_events_term *term __maybe_unused,
+			     struct evlist *evlist __maybe_unused,
+			     int *error_pos __maybe_unused, int err,
+			     char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+			    "Can't use this config term with this map type");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+			    "Cannot set event to BPF map in multi-thread tracing");
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+			    "%s (Hint: use -i to turn off inherit)", emsg);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+			    "Can only put raw, hardware and BPF output event into a BPF map");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused,
+				     int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_end(buf, size);
+	return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
new file mode 100644
index 000000000..5d1c725ce
--- /dev/null
+++ b/tools/perf/util/bpf-loader.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_LOADER_H
+#define __BPF_LOADER_H
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/libbpf.h>
+
+enum bpf_loader_errno {
+	__BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100,
+	/* Invalid config string */
+	BPF_LOADER_ERRNO__CONFIG = __BPF_LOADER_ERRNO__START,
+	BPF_LOADER_ERRNO__GROUP,	/* Invalid group name */
+	BPF_LOADER_ERRNO__EVENTNAME,	/* Event name is missing */
+	BPF_LOADER_ERRNO__INTERNAL,	/* BPF loader internal error */
+	BPF_LOADER_ERRNO__COMPILE,	/* Error when compiling BPF scriptlet */
+	BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */
+	BPF_LOADER_ERRNO__PROLOGUE,	/* Failed to generate prologue */
+	BPF_LOADER_ERRNO__PROLOGUE2BIG,	/* Prologue too big for program */
+	BPF_LOADER_ERRNO__PROLOGUEOOB,	/* Offset out of bound for prologue */
+	BPF_LOADER_ERRNO__OBJCONF_OPT,	/* Invalid object config option */
+	BPF_LOADER_ERRNO__OBJCONF_CONF,	/* Config value not set (lost '=')) */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_OPT,	/* Invalid object map config option */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST,	/* Target map not exist */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE,	/* Incorrect value type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,	/* Incorrect map type */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE,	/* Incorrect map key size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT,	/* Event not found for map setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE,	/* Invalid map size for event setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,	/* Event dimension too large */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,	/* Doesn't support inherit event */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,	/* Wrong event type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,	/* Index too large */
+	__BPF_LOADER_ERRNO__END,
+};
+#endif // HAVE_LIBBPF_SUPPORT
+
+struct evsel;
+struct evlist;
+struct bpf_object;
+struct parse_events_term;
+#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
+
+typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event,
+					int fd, struct bpf_object *obj, void *arg);
+
+#ifdef HAVE_LIBBPF_SUPPORT
+struct bpf_object *bpf__prepare_load(const char *filename, bool source);
+int bpf__strerror_prepare_load(const char *filename, bool source,
+			       int err, char *buf, size_t size);
+
+struct bpf_object *bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz,
+					    const char *name);
+
+void bpf__clear(void);
+
+int bpf__probe(struct bpf_object *obj);
+int bpf__unprobe(struct bpf_object *obj);
+int bpf__strerror_probe(struct bpf_object *obj, int err,
+			char *buf, size_t size);
+
+int bpf__load(struct bpf_object *obj);
+int bpf__strerror_load(struct bpf_object *obj, int err,
+		       char *buf, size_t size);
+int bpf__foreach_event(struct bpf_object *obj,
+		       bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+		    struct evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+			     struct parse_events_term *term,
+			     struct evlist *evlist,
+			     int *error_pos, int err, char *buf,
+			     size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
+
+int bpf__setup_stdout(struct evlist *evlist);
+struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name);
+int bpf__strerror_setup_output_event(struct evlist *evlist, int err, char *buf, size_t size);
+#else
+#include <errno.h>
+#include <string.h>
+#include "debug.h"
+
+static inline struct bpf_object *
+bpf__prepare_load(const char *filename __maybe_unused,
+		  bool source __maybe_unused)
+{
+	pr_debug("ERROR: eBPF object loading is disabled during compiling.\n");
+	return ERR_PTR(-ENOTSUP);
+}
+
+static inline struct bpf_object *
+bpf__prepare_load_buffer(void *obj_buf __maybe_unused,
+					   size_t obj_buf_sz __maybe_unused)
+{
+	return ERR_PTR(-ENOTSUP);
+}
+
+static inline void bpf__clear(void) { }
+
+static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;}
+static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0;}
+static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; }
+
+static inline int
+bpf__foreach_event(struct bpf_object *obj __maybe_unused,
+		   bpf_prog_iter_callback_t func __maybe_unused,
+		   void *arg __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+		struct parse_events_term *term __maybe_unused,
+		struct evlist *evlist __maybe_unused,
+		int *error_pos __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+bpf__apply_obj_config(void)
+{
+	return 0;
+}
+
+static inline int
+bpf__setup_stdout(struct evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static inline struct evsel *
+bpf__setup_output_event(struct evlist *evlist __maybe_unused, const char *name __maybe_unused)
+{
+	return NULL;
+}
+
+static inline int
+__bpf_strerror(char *buf, size_t size)
+{
+	if (!size)
+		return 0;
+	strncpy(buf,
+		"ERROR: eBPF object loading is disabled during compiling.\n",
+		size);
+	buf[size - 1] = '\0';
+	return 0;
+}
+
+static inline
+int bpf__strerror_prepare_load(const char *filename __maybe_unused,
+			       bool source __maybe_unused,
+			       int err __maybe_unused,
+			       char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
+		    int err __maybe_unused,
+		    char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
+				     int err __maybe_unused,
+				     char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			 struct parse_events_term *term __maybe_unused,
+			 struct evlist *evlist __maybe_unused,
+			 int *error_pos __maybe_unused,
+			 int err __maybe_unused,
+			 char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+			       char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused,
+				 int err __maybe_unused, char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+#endif
+
+static inline int bpf__strerror_setup_stdout(struct evlist *evlist, int err, char *buf, size_t size)
+{
+	return bpf__strerror_setup_output_event(evlist, err, buf, size);
+}
+#endif
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
new file mode 100644
index 000000000..9887ae092
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf-prologue.c
+ *
+ * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <bpf/libbpf.h>
+#include "debug.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-finder.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <dwarf-regs.h>
+#include <linux/filter.h>
+
+#define BPF_REG_SIZE		8
+
+#define JMP_TO_ERROR_CODE	-1
+#define JMP_TO_SUCCESS_CODE	-2
+#define JMP_TO_USER_CODE	-3
+
+struct bpf_insn_pos {
+	struct bpf_insn *begin;
+	struct bpf_insn *end;
+	struct bpf_insn *pos;
+};
+
+static inline int
+pos_get_cnt(struct bpf_insn_pos *pos)
+{
+	return pos->pos - pos->begin;
+}
+
+static int
+append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
+{
+	if (!pos->pos)
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+	if (pos->pos + 1 >= pos->end) {
+		pr_err("bpf prologue: prologue too long\n");
+		pos->pos = NULL;
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+	}
+
+	*(pos->pos)++ = new_insn;
+	return 0;
+}
+
+static int
+check_pos(struct bpf_insn_pos *pos)
+{
+	if (!pos->pos || pos->pos >= pos->end)
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+	return 0;
+}
+
+/*
+ * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see
+ * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM
+ * instruction (BPF_{B,H,W,DW}).
+ */
+static int
+argtype_to_ldx_size(const char *type)
+{
+	int arg_size = type ? atoi(&type[1]) : 64;
+
+	switch (arg_size) {
+	case 8:
+		return BPF_B;
+	case 16:
+		return BPF_H;
+	case 32:
+		return BPF_W;
+	case 64:
+	default:
+		return BPF_DW;
+	}
+}
+
+static const char *
+insn_sz_to_str(int insn_sz)
+{
+	switch (insn_sz) {
+	case BPF_B:
+		return "BPF_B";
+	case BPF_H:
+		return "BPF_H";
+	case BPF_W:
+		return "BPF_W";
+	case BPF_DW:
+		return "BPF_DW";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+/* Give it a shorter name */
+#define ins(i, p) append_insn((i), (p))
+
+/*
+ * Give a register name (in 'reg'), generate instruction to
+ * load register into an eBPF register rd:
+ *   'ldd target_reg, offset(ctx_reg)', where:
+ * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
+ */
+static int
+gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
+		     const char *reg, int target_reg)
+{
+	int offset = regs_query_register_offset(reg);
+
+	if (offset < 0) {
+		pr_err("bpf: prologue: failed to get register %s\n",
+		       reg);
+		return offset;
+	}
+	ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
+
+	return check_pos(pos);
+}
+
+/*
+ * Generate a BPF_FUNC_probe_read function call.
+ *
+ * src_base_addr_reg is a register holding base address,
+ * dst_addr_reg is a register holding dest address (on stack),
+ * result is:
+ *
+ *  *[dst_addr_reg] = *([src_base_addr_reg] + offset)
+ *
+ * Arguments of BPF_FUNC_probe_read:
+ *     ARG1: ptr to stack (dest)
+ *     ARG2: size (8)
+ *     ARG3: unsafe ptr (src)
+ */
+static int
+gen_read_mem(struct bpf_insn_pos *pos,
+	     int src_base_addr_reg,
+	     int dst_addr_reg,
+	     long offset,
+	     int probeid)
+{
+	/* mov arg3, src_base_addr_reg */
+	if (src_base_addr_reg != BPF_REG_ARG3)
+		ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
+	/* add arg3, #offset */
+	if (offset)
+		ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
+
+	/* mov arg2, #reg_size */
+	ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
+
+	/* mov arg1, dst_addr_reg */
+	if (dst_addr_reg != BPF_REG_ARG1)
+		ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
+
+	/* Call probe_read  */
+	ins(BPF_EMIT_CALL(probeid), pos);
+	/*
+	 * Error processing: if read fail, goto error code,
+	 * will be relocated. Target should be the start of
+	 * error processing code.
+	 */
+	ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
+	    pos);
+
+	return check_pos(pos);
+}
+
+/*
+ * Each arg should be bare register. Fetch and save them into argument
+ * registers (r3 - r5).
+ *
+ * BPF_REG_1 should have been initialized with pointer to
+ * 'struct pt_regs'.
+ */
+static int
+gen_prologue_fastpath(struct bpf_insn_pos *pos,
+		      struct probe_trace_arg *args, int nargs)
+{
+	int i, err = 0;
+
+	for (i = 0; i < nargs; i++) {
+		err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
+					   BPF_PROLOGUE_START_ARG_REG + i);
+		if (err)
+			goto errout;
+	}
+
+	return check_pos(pos);
+errout:
+	return err;
+}
+
+/*
+ * Slow path:
+ *   At least one argument has the form of 'offset($rx)'.
+ *
+ * Following code first stores them into stack, then loads all of then
+ * to r2 - r5.
+ * Before final loading, the final result should be:
+ *
+ * low address
+ * BPF_REG_FP - 24  ARG3
+ * BPF_REG_FP - 16  ARG2
+ * BPF_REG_FP - 8   ARG1
+ * BPF_REG_FP
+ * high address
+ *
+ * For each argument (described as: offn(...off2(off1(reg)))),
+ * generates following code:
+ *
+ *  r7 <- fp
+ *  r7 <- r7 - stack_offset  // Ideal code should initialize r7 using
+ *                           // fp before generating args. However,
+ *                           // eBPF won't regard r7 as stack pointer
+ *                           // if it is generated by minus 8 from
+ *                           // another stack pointer except fp.
+ *                           // This is why we have to set r7
+ *                           // to fp for each variable.
+ *  r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
+ *  (r7) <- r3       // skip following instructions for bare reg
+ *  r3 <- r3 + off1  . // skip if off1 == 0
+ *  r2 <- 8           \
+ *  r1 <- r7           |-> generated by gen_read_mem()
+ *  call probe_read    /
+ *  jnei r0, 0, err  ./
+ *  r3 <- (r7)
+ *  r3 <- r3 + off2  . // skip if off2 == 0
+ *  r2 <- 8           \  // r2 may be broken by probe_read, so set again
+ *  r1 <- r7           |-> generated by gen_read_mem()
+ *  call probe_read    /
+ *  jnei r0, 0, err  ./
+ *  ...
+ */
+static int
+gen_prologue_slowpath(struct bpf_insn_pos *pos,
+		      struct probe_trace_arg *args, int nargs)
+{
+	int err, i, probeid;
+
+	for (i = 0; i < nargs; i++) {
+		struct probe_trace_arg *arg = &args[i];
+		const char *reg = arg->value;
+		struct probe_trace_arg_ref *ref = NULL;
+		int stack_offset = (i + 1) * -8;
+
+		pr_debug("prologue: fetch arg %d, base reg is %s\n",
+			 i, reg);
+
+		/* value of base register is stored into ARG3 */
+		err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
+					   BPF_REG_ARG3);
+		if (err) {
+			pr_err("prologue: failed to get offset of register %s\n",
+			       reg);
+			goto errout;
+		}
+
+		/* Make r7 the stack pointer. */
+		ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
+		/* r7 += -8 */
+		ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
+		/*
+		 * Store r3 (base register) onto stack
+		 * Ensure fp[offset] is set.
+		 * fp is the only valid base register when storing
+		 * into stack. We are not allowed to use r7 as base
+		 * register here.
+		 */
+		ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
+				stack_offset), pos);
+
+		ref = arg->ref;
+		probeid = BPF_FUNC_probe_read_kernel;
+		while (ref) {
+			pr_debug("prologue: arg %d: offset %ld\n",
+				 i, ref->offset);
+
+			if (ref->user_access)
+				probeid = BPF_FUNC_probe_read_user;
+
+			err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
+					   ref->offset, probeid);
+			if (err) {
+				pr_err("prologue: failed to generate probe_read function call\n");
+				goto errout;
+			}
+
+			ref = ref->next;
+			/*
+			 * Load previous result into ARG3. Use
+			 * BPF_REG_FP instead of r7 because verifier
+			 * allows FP based addressing only.
+			 */
+			if (ref)
+				ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
+						BPF_REG_FP, stack_offset), pos);
+		}
+	}
+
+	/* Final pass: read to registers */
+	for (i = 0; i < nargs; i++) {
+		int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW;
+
+		pr_debug("prologue: load arg %d, insn_sz is %s\n",
+			 i, insn_sz_to_str(insn_sz));
+		ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i,
+				BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
+	}
+
+	ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
+
+	return check_pos(pos);
+errout:
+	return err;
+}
+
+static int
+prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
+		  struct bpf_insn *success_code, struct bpf_insn *user_code)
+{
+	struct bpf_insn *insn;
+
+	if (check_pos(pos))
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+	for (insn = pos->begin; insn < pos->pos; insn++) {
+		struct bpf_insn *target;
+		u8 class = BPF_CLASS(insn->code);
+		u8 opcode;
+
+		if (class != BPF_JMP)
+			continue;
+		opcode = BPF_OP(insn->code);
+		if (opcode == BPF_CALL)
+			continue;
+
+		switch (insn->off) {
+		case JMP_TO_ERROR_CODE:
+			target = error_code;
+			break;
+		case JMP_TO_SUCCESS_CODE:
+			target = success_code;
+			break;
+		case JMP_TO_USER_CODE:
+			target = user_code;
+			break;
+		default:
+			pr_err("bpf prologue: internal error: relocation failed\n");
+			return -BPF_LOADER_ERRNO__PROLOGUE;
+		}
+
+		insn->off = target - (insn + 1);
+	}
+	return 0;
+}
+
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+		      struct bpf_insn *new_prog, size_t *new_cnt,
+		      size_t cnt_space)
+{
+	struct bpf_insn *success_code = NULL;
+	struct bpf_insn *error_code = NULL;
+	struct bpf_insn *user_code = NULL;
+	struct bpf_insn_pos pos;
+	bool fastpath = true;
+	int err = 0, i;
+
+	if (!new_prog || !new_cnt)
+		return -EINVAL;
+
+	if (cnt_space > BPF_MAXINSNS)
+		cnt_space = BPF_MAXINSNS;
+
+	pos.begin = new_prog;
+	pos.end = new_prog + cnt_space;
+	pos.pos = new_prog;
+
+	if (!nargs) {
+		ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
+		    &pos);
+
+		if (check_pos(&pos))
+			goto errout;
+
+		*new_cnt = pos_get_cnt(&pos);
+		return 0;
+	}
+
+	if (nargs > BPF_PROLOGUE_MAX_ARGS) {
+		pr_warning("bpf: prologue: %d arguments are dropped\n",
+			   nargs - BPF_PROLOGUE_MAX_ARGS);
+		nargs = BPF_PROLOGUE_MAX_ARGS;
+	}
+
+	/* First pass: validation */
+	for (i = 0; i < nargs; i++) {
+		struct probe_trace_arg_ref *ref = args[i].ref;
+
+		if (args[i].value[0] == '@') {
+			/* TODO: fetch global variable */
+			pr_err("bpf: prologue: global %s%+ld not support\n",
+				args[i].value, ref ? ref->offset : 0);
+			return -ENOTSUP;
+		}
+
+		while (ref) {
+			/* fastpath is true if all args has ref == NULL */
+			fastpath = false;
+
+			/*
+			 * Instruction encodes immediate value using
+			 * s32, ref->offset is long. On systems which
+			 * can't fill long in s32, refuse to process if
+			 * ref->offset too large (or small).
+			 */
+#ifdef __LP64__
+#define OFFSET_MAX	((1LL << 31) - 1)
+#define OFFSET_MIN	((1LL << 31) * -1)
+			if (ref->offset > OFFSET_MAX ||
+					ref->offset < OFFSET_MIN) {
+				pr_err("bpf: prologue: offset out of bound: %ld\n",
+				       ref->offset);
+				return -BPF_LOADER_ERRNO__PROLOGUEOOB;
+			}
+#endif
+			ref = ref->next;
+		}
+	}
+	pr_debug("prologue: pass validation\n");
+
+	if (fastpath) {
+		/* If all variables are registers... */
+		pr_debug("prologue: fast path\n");
+		err = gen_prologue_fastpath(&pos, args, nargs);
+		if (err)
+			goto errout;
+	} else {
+		pr_debug("prologue: slow path\n");
+
+		/* Initialization: move ctx to a callee saved register. */
+		ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
+
+		err = gen_prologue_slowpath(&pos, args, nargs);
+		if (err)
+			goto errout;
+		/*
+		 * start of ERROR_CODE (only slow pass needs error code)
+		 *   mov r2 <- 1  // r2 is error number
+		 *   mov r3 <- 0  // r3, r4... should be touched or
+		 *                // verifier would complain
+		 *   mov r4 <- 0
+		 *   ...
+		 *   goto usercode
+		 */
+		error_code = pos.pos;
+		ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
+		    &pos);
+
+		for (i = 0; i < nargs; i++)
+			ins(BPF_ALU64_IMM(BPF_MOV,
+					  BPF_PROLOGUE_START_ARG_REG + i,
+					  0),
+			    &pos);
+		ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
+				&pos);
+	}
+
+	/*
+	 * start of SUCCESS_CODE:
+	 *   mov r2 <- 0
+	 *   goto usercode  // skip
+	 */
+	success_code = pos.pos;
+	ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
+
+	/*
+	 * start of USER_CODE:
+	 *   Restore ctx to r1
+	 */
+	user_code = pos.pos;
+	if (!fastpath) {
+		/*
+		 * Only slow path needs restoring of ctx. In fast path,
+		 * register are loaded directly from r1.
+		 */
+		ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
+		err = prologue_relocate(&pos, error_code, success_code,
+					user_code);
+		if (err)
+			goto errout;
+	}
+
+	err = check_pos(&pos);
+	if (err)
+		goto errout;
+
+	*new_cnt = pos_get_cnt(&pos);
+	return 0;
+errout:
+	return err;
+}
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
new file mode 100644
index 000000000..c50c73580
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_PROLOGUE_H
+#define __BPF_PROLOGUE_H
+
+#include <linux/compiler.h>
+#include <linux/filter.h>
+#include "probe-event.h"
+
+#define BPF_PROLOGUE_MAX_ARGS 3
+#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
+#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
+
+#ifdef HAVE_BPF_PROLOGUE
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+		      struct bpf_insn *new_prog, size_t *new_cnt,
+		      size_t cnt_space);
+#else
+#include <errno.h>
+
+static inline int
+bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
+		  int nargs __maybe_unused,
+		  struct bpf_insn *new_prog __maybe_unused,
+		  size_t *new_cnt,
+		  size_t cnt_space __maybe_unused)
+{
+	if (!new_cnt)
+		return -EINVAL;
+	*new_cnt = 0;
+	return -ENOTSUP;
+}
+#endif
+#endif /* __BPF_PROLOGUE_H */
diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c
new file mode 100644
index 000000000..80b1d2b37
--- /dev/null
+++ b/tools/perf/util/bpf-utils.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <bpf/bpf.h>
+#include "bpf-utils.h"
+#include "debug.h"
+
+struct bpil_array_desc {
+	int	array_offset;	/* e.g. offset of jited_prog_insns */
+	int	count_offset;	/* e.g. offset of jited_prog_len */
+	int	size_offset;	/* > 0: offset of rec size,
+				 * < 0: fix size of -size_offset
+				 */
+};
+
+static struct bpil_array_desc bpil_array_desc[] = {
+	[PERF_BPIL_JITED_INSNS] = {
+		offsetof(struct bpf_prog_info, jited_prog_insns),
+		offsetof(struct bpf_prog_info, jited_prog_len),
+		-1,
+	},
+	[PERF_BPIL_XLATED_INSNS] = {
+		offsetof(struct bpf_prog_info, xlated_prog_insns),
+		offsetof(struct bpf_prog_info, xlated_prog_len),
+		-1,
+	},
+	[PERF_BPIL_MAP_IDS] = {
+		offsetof(struct bpf_prog_info, map_ids),
+		offsetof(struct bpf_prog_info, nr_map_ids),
+		-(int)sizeof(__u32),
+	},
+	[PERF_BPIL_JITED_KSYMS] = {
+		offsetof(struct bpf_prog_info, jited_ksyms),
+		offsetof(struct bpf_prog_info, nr_jited_ksyms),
+		-(int)sizeof(__u64),
+	},
+	[PERF_BPIL_JITED_FUNC_LENS] = {
+		offsetof(struct bpf_prog_info, jited_func_lens),
+		offsetof(struct bpf_prog_info, nr_jited_func_lens),
+		-(int)sizeof(__u32),
+	},
+	[PERF_BPIL_FUNC_INFO] = {
+		offsetof(struct bpf_prog_info, func_info),
+		offsetof(struct bpf_prog_info, nr_func_info),
+		offsetof(struct bpf_prog_info, func_info_rec_size),
+	},
+	[PERF_BPIL_LINE_INFO] = {
+		offsetof(struct bpf_prog_info, line_info),
+		offsetof(struct bpf_prog_info, nr_line_info),
+		offsetof(struct bpf_prog_info, line_info_rec_size),
+	},
+	[PERF_BPIL_JITED_LINE_INFO] = {
+		offsetof(struct bpf_prog_info, jited_line_info),
+		offsetof(struct bpf_prog_info, nr_jited_line_info),
+		offsetof(struct bpf_prog_info, jited_line_info_rec_size),
+	},
+	[PERF_BPIL_PROG_TAGS] = {
+		offsetof(struct bpf_prog_info, prog_tags),
+		offsetof(struct bpf_prog_info, nr_prog_tags),
+		-(int)sizeof(__u8) * BPF_TAG_SIZE,
+	},
+
+};
+
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
+					   int offset)
+{
+	__u32 *array = (__u32 *)info;
+
+	if (offset >= 0)
+		return array[offset / sizeof(__u32)];
+	return -(int)offset;
+}
+
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
+					   int offset)
+{
+	__u64 *array = (__u64 *)info;
+
+	if (offset >= 0)
+		return array[offset / sizeof(__u64)];
+	return -(int)offset;
+}
+
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
+					 __u32 val)
+{
+	__u32 *array = (__u32 *)info;
+
+	if (offset >= 0)
+		array[offset / sizeof(__u32)] = val;
+}
+
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
+					 __u64 val)
+{
+	__u64 *array = (__u64 *)info;
+
+	if (offset >= 0)
+		array[offset / sizeof(__u64)] = val;
+}
+
+struct perf_bpil *
+get_bpf_prog_info_linear(int fd, __u64 arrays)
+{
+	struct bpf_prog_info info = {};
+	struct perf_bpil *info_linear;
+	__u32 info_len = sizeof(info);
+	__u32 data_len = 0;
+	int i, err;
+	void *ptr;
+
+	if (arrays >> PERF_BPIL_LAST_ARRAY)
+		return ERR_PTR(-EINVAL);
+
+	/* step 1: get array dimensions */
+	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	if (err) {
+		pr_debug("can't get prog info: %s", strerror(errno));
+		return ERR_PTR(-EFAULT);
+	}
+
+	/* step 2: calculate total size of all arrays */
+	for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+		bool include_array = (arrays & (1UL << i)) > 0;
+		struct bpil_array_desc *desc;
+		__u32 count, size;
+
+		desc = bpil_array_desc + i;
+
+		/* kernel is too old to support this field */
+		if (info_len < desc->array_offset + sizeof(__u32) ||
+		    info_len < desc->count_offset + sizeof(__u32) ||
+		    (desc->size_offset > 0 && info_len < (__u32)desc->size_offset))
+			include_array = false;
+
+		if (!include_array) {
+			arrays &= ~(1UL << i);	/* clear the bit */
+			continue;
+		}
+
+		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+
+		data_len += roundup(count * size, sizeof(__u64));
+	}
+
+	/* step 3: allocate continuous memory */
+	info_linear = malloc(sizeof(struct perf_bpil) + data_len);
+	if (!info_linear)
+		return ERR_PTR(-ENOMEM);
+
+	/* step 4: fill data to info_linear->info */
+	info_linear->arrays = arrays;
+	memset(&info_linear->info, 0, sizeof(info));
+	ptr = info_linear->data;
+
+	for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+		struct bpil_array_desc *desc;
+		__u32 count, size;
+
+		if ((arrays & (1UL << i)) == 0)
+			continue;
+
+		desc  = bpil_array_desc + i;
+		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+		bpf_prog_info_set_offset_u32(&info_linear->info,
+					     desc->count_offset, count);
+		bpf_prog_info_set_offset_u32(&info_linear->info,
+					     desc->size_offset, size);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset,
+					     ptr_to_u64(ptr));
+		ptr += roundup(count * size, sizeof(__u64));
+	}
+
+	/* step 5: call syscall again to get required arrays */
+	err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
+	if (err) {
+		pr_debug("can't get prog info: %s", strerror(errno));
+		free(info_linear);
+		return ERR_PTR(-EFAULT);
+	}
+
+	/* step 6: verify the data */
+	for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+		struct bpil_array_desc *desc;
+		__u32 v1, v2;
+
+		if ((arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpil_array_desc + i;
+		v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+						   desc->count_offset);
+		if (v1 != v2)
+			pr_warning("%s: mismatch in element count\n", __func__);
+
+		v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+						   desc->size_offset);
+		if (v1 != v2)
+			pr_warning("%s: mismatch in rec size\n", __func__);
+	}
+
+	/* step 7: update info_len and data_len */
+	info_linear->info_len = sizeof(struct bpf_prog_info);
+	info_linear->data_len = data_len;
+
+	return info_linear;
+}
+
+void bpil_addr_to_offs(struct perf_bpil *info_linear)
+{
+	int i;
+
+	for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+		struct bpil_array_desc *desc;
+		__u64 addr, offs;
+
+		if ((info_linear->arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpil_array_desc + i;
+		addr = bpf_prog_info_read_offset_u64(&info_linear->info,
+						     desc->array_offset);
+		offs = addr - ptr_to_u64(info_linear->data);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset, offs);
+	}
+}
+
+void bpil_offs_to_addr(struct perf_bpil *info_linear)
+{
+	int i;
+
+	for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+		struct bpil_array_desc *desc;
+		__u64 addr, offs;
+
+		if ((info_linear->arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpil_array_desc + i;
+		offs = bpf_prog_info_read_offset_u64(&info_linear->info,
+						     desc->array_offset);
+		addr = offs + ptr_to_u64(info_linear->data);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset, addr);
+	}
+}
diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h
new file mode 100644
index 000000000..86a5055cd
--- /dev/null
+++ b/tools/perf/util/bpf-utils.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __PERF_BPF_UTILS_H
+#define __PERF_BPF_UTILS_H
+
+#define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+#include <bpf/libbpf.h>
+
+/*
+ * Get bpf_prog_info in continuous memory
+ *
+ * struct bpf_prog_info has multiple arrays. The user has option to choose
+ * arrays to fetch from kernel. The following APIs provide an uniform way to
+ * fetch these data. All arrays in bpf_prog_info are stored in a single
+ * continuous memory region. This makes it easy to store the info in a
+ * file.
+ *
+ * Before writing perf_bpil to files, it is necessary to
+ * translate pointers in bpf_prog_info to offsets. Helper functions
+ * bpil_addr_to_offs() and bpil_offs_to_addr()
+ * are introduced to switch between pointers and offsets.
+ *
+ * Examples:
+ *   # To fetch map_ids and prog_tags:
+ *   __u64 arrays = (1UL << PERF_BPIL_MAP_IDS) |
+ *           (1UL << PERF_BPIL_PROG_TAGS);
+ *   struct perf_bpil *info_linear =
+ *           get_bpf_prog_info_linear(fd, arrays);
+ *
+ *   # To save data in file
+ *   bpil_addr_to_offs(info_linear);
+ *   write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
+ *
+ *   # To read data from file
+ *   read(f, info_linear, <proper_size>);
+ *   bpil_offs_to_addr(info_linear);
+ */
+enum perf_bpil_array_types {
+	PERF_BPIL_FIRST_ARRAY = 0,
+	PERF_BPIL_JITED_INSNS = 0,
+	PERF_BPIL_XLATED_INSNS,
+	PERF_BPIL_MAP_IDS,
+	PERF_BPIL_JITED_KSYMS,
+	PERF_BPIL_JITED_FUNC_LENS,
+	PERF_BPIL_FUNC_INFO,
+	PERF_BPIL_LINE_INFO,
+	PERF_BPIL_JITED_LINE_INFO,
+	PERF_BPIL_PROG_TAGS,
+	PERF_BPIL_LAST_ARRAY,
+};
+
+struct perf_bpil {
+	/* size of struct bpf_prog_info, when the tool is compiled */
+	__u32			info_len;
+	/* total bytes allocated for data, round up to 8 bytes */
+	__u32			data_len;
+	/* which arrays are included in data */
+	__u64			arrays;
+	struct bpf_prog_info	info;
+	__u8			data[];
+};
+
+struct perf_bpil *
+get_bpf_prog_info_linear(int fd, __u64 arrays);
+
+void
+bpil_addr_to_offs(struct perf_bpil *info_linear);
+
+void
+bpil_offs_to_addr(struct perf_bpil *info_linear);
+
+#endif /* HAVE_LIBBPF_SUPPORT */
+#endif /* __PERF_BPF_UTILS_H */
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
new file mode 100644
index 000000000..ef1c15e4a
--- /dev/null
+++ b/tools/perf/util/bpf_counter.c
@@ -0,0 +1,837 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2019 Facebook */
+
+#include <assert.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <api/fs/fs.h>
+#include <perf/bpf_perf.h>
+
+#include "bpf_counter.h"
+#include "bpf-utils.h"
+#include "counts.h"
+#include "debug.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "target.h"
+#include "cgroup.h"
+#include "cpumap.h"
+#include "thread_map.h"
+
+#include "bpf_skel/bpf_prog_profiler.skel.h"
+#include "bpf_skel/bperf_u.h"
+#include "bpf_skel/bperf_leader.skel.h"
+#include "bpf_skel/bperf_follower.skel.h"
+
+#define ATTR_MAP_SIZE 16
+
+static inline void *u64_to_ptr(__u64 ptr)
+{
+	return (void *)(unsigned long)ptr;
+}
+
+static struct bpf_counter *bpf_counter_alloc(void)
+{
+	struct bpf_counter *counter;
+
+	counter = zalloc(sizeof(*counter));
+	if (counter)
+		INIT_LIST_HEAD(&counter->list);
+	return counter;
+}
+
+static int bpf_program_profiler__destroy(struct evsel *evsel)
+{
+	struct bpf_counter *counter, *tmp;
+
+	list_for_each_entry_safe(counter, tmp,
+				 &evsel->bpf_counter_list, list) {
+		list_del_init(&counter->list);
+		bpf_prog_profiler_bpf__destroy(counter->skel);
+		free(counter);
+	}
+	assert(list_empty(&evsel->bpf_counter_list));
+
+	return 0;
+}
+
+static char *bpf_target_prog_name(int tgt_fd)
+{
+	struct bpf_func_info *func_info;
+	struct perf_bpil *info_linear;
+	const struct btf_type *t;
+	struct btf *btf = NULL;
+	char *name = NULL;
+
+	info_linear = get_bpf_prog_info_linear(tgt_fd, 1UL << PERF_BPIL_FUNC_INFO);
+	if (IS_ERR_OR_NULL(info_linear)) {
+		pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd);
+		return NULL;
+	}
+
+	if (info_linear->info.btf_id == 0) {
+		pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
+		goto out;
+	}
+
+	btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+	if (libbpf_get_error(btf)) {
+		pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+		goto out;
+	}
+
+	func_info = u64_to_ptr(info_linear->info.func_info);
+	t = btf__type_by_id(btf, func_info[0].type_id);
+	if (!t) {
+		pr_debug("btf %d doesn't have type %d\n",
+			 info_linear->info.btf_id, func_info[0].type_id);
+		goto out;
+	}
+	name = strdup(btf__name_by_offset(btf, t->name_off));
+out:
+	btf__free(btf);
+	free(info_linear);
+	return name;
+}
+
+static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
+{
+	struct bpf_prog_profiler_bpf *skel;
+	struct bpf_counter *counter;
+	struct bpf_program *prog;
+	char *prog_name;
+	int prog_fd;
+	int err;
+
+	prog_fd = bpf_prog_get_fd_by_id(prog_id);
+	if (prog_fd < 0) {
+		pr_err("Failed to open fd for bpf prog %u\n", prog_id);
+		return -1;
+	}
+	counter = bpf_counter_alloc();
+	if (!counter) {
+		close(prog_fd);
+		return -1;
+	}
+
+	skel = bpf_prog_profiler_bpf__open();
+	if (!skel) {
+		pr_err("Failed to open bpf skeleton\n");
+		goto err_out;
+	}
+
+	skel->rodata->num_cpu = evsel__nr_cpus(evsel);
+
+	bpf_map__set_max_entries(skel->maps.events, evsel__nr_cpus(evsel));
+	bpf_map__set_max_entries(skel->maps.fentry_readings, 1);
+	bpf_map__set_max_entries(skel->maps.accum_readings, 1);
+
+	prog_name = bpf_target_prog_name(prog_fd);
+	if (!prog_name) {
+		pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id);
+		goto err_out;
+	}
+
+	bpf_object__for_each_program(prog, skel->obj) {
+		err = bpf_program__set_attach_target(prog, prog_fd, prog_name);
+		if (err) {
+			pr_err("bpf_program__set_attach_target failed.\n"
+			       "Does bpf prog %u have BTF?\n", prog_id);
+			goto err_out;
+		}
+	}
+	set_max_rlimit();
+	err = bpf_prog_profiler_bpf__load(skel);
+	if (err) {
+		pr_err("bpf_prog_profiler_bpf__load failed\n");
+		goto err_out;
+	}
+
+	assert(skel != NULL);
+	counter->skel = skel;
+	list_add(&counter->list, &evsel->bpf_counter_list);
+	close(prog_fd);
+	return 0;
+err_out:
+	bpf_prog_profiler_bpf__destroy(skel);
+	free(counter);
+	close(prog_fd);
+	return -1;
+}
+
+static int bpf_program_profiler__load(struct evsel *evsel, struct target *target)
+{
+	char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p;
+	u32 prog_id;
+	int ret;
+
+	bpf_str_ = bpf_str = strdup(target->bpf_str);
+	if (!bpf_str)
+		return -1;
+
+	while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) {
+		prog_id = strtoul(tok, &p, 10);
+		if (prog_id == 0 || prog_id == UINT_MAX ||
+		    (*p != '\0' && *p != ',')) {
+			pr_err("Failed to parse bpf prog ids %s\n",
+			       target->bpf_str);
+			return -1;
+		}
+
+		ret = bpf_program_profiler_load_one(evsel, prog_id);
+		if (ret) {
+			bpf_program_profiler__destroy(evsel);
+			free(bpf_str_);
+			return -1;
+		}
+		bpf_str = NULL;
+	}
+	free(bpf_str_);
+	return 0;
+}
+
+static int bpf_program_profiler__enable(struct evsel *evsel)
+{
+	struct bpf_counter *counter;
+	int ret;
+
+	list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+		assert(counter->skel != NULL);
+		ret = bpf_prog_profiler_bpf__attach(counter->skel);
+		if (ret) {
+			bpf_program_profiler__destroy(evsel);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int bpf_program_profiler__disable(struct evsel *evsel)
+{
+	struct bpf_counter *counter;
+
+	list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+		assert(counter->skel != NULL);
+		bpf_prog_profiler_bpf__detach(counter->skel);
+	}
+	return 0;
+}
+
+static int bpf_program_profiler__read(struct evsel *evsel)
+{
+	// BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible
+	// Sometimes possible > online, like on a Ryzen 3900X that has 24
+	// threads but its possible showed 0-31 -acme
+	int num_cpu_bpf = libbpf_num_possible_cpus();
+	struct bpf_perf_event_value values[num_cpu_bpf];
+	struct bpf_counter *counter;
+	struct perf_counts_values *counts;
+	int reading_map_fd;
+	__u32 key = 0;
+	int err, idx, bpf_cpu;
+
+	if (list_empty(&evsel->bpf_counter_list))
+		return -EAGAIN;
+
+	perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) {
+		counts = perf_counts(evsel->counts, idx, 0);
+		counts->val = 0;
+		counts->ena = 0;
+		counts->run = 0;
+	}
+	list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+		struct bpf_prog_profiler_bpf *skel = counter->skel;
+
+		assert(skel != NULL);
+		reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
+
+		err = bpf_map_lookup_elem(reading_map_fd, &key, values);
+		if (err) {
+			pr_err("failed to read value\n");
+			return err;
+		}
+
+		for (bpf_cpu = 0; bpf_cpu < num_cpu_bpf; bpf_cpu++) {
+			idx = perf_cpu_map__idx(evsel__cpus(evsel),
+						(struct perf_cpu){.cpu = bpf_cpu});
+			if (idx == -1)
+				continue;
+			counts = perf_counts(evsel->counts, idx, 0);
+			counts->val += values[bpf_cpu].counter;
+			counts->ena += values[bpf_cpu].enabled;
+			counts->run += values[bpf_cpu].running;
+		}
+	}
+	return 0;
+}
+
+static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx,
+					    int fd)
+{
+	struct bpf_prog_profiler_bpf *skel;
+	struct bpf_counter *counter;
+	int ret;
+
+	list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
+		skel = counter->skel;
+		assert(skel != NULL);
+
+		ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
+					  &cpu_map_idx, &fd, BPF_ANY);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+struct bpf_counter_ops bpf_program_profiler_ops = {
+	.load       = bpf_program_profiler__load,
+	.enable	    = bpf_program_profiler__enable,
+	.disable    = bpf_program_profiler__disable,
+	.read       = bpf_program_profiler__read,
+	.destroy    = bpf_program_profiler__destroy,
+	.install_pe = bpf_program_profiler__install_pe,
+};
+
+static bool bperf_attr_map_compatible(int attr_map_fd)
+{
+	struct bpf_map_info map_info = {0};
+	__u32 map_info_len = sizeof(map_info);
+	int err;
+
+	err = bpf_obj_get_info_by_fd(attr_map_fd, &map_info, &map_info_len);
+
+	if (err)
+		return false;
+	return (map_info.key_size == sizeof(struct perf_event_attr)) &&
+		(map_info.value_size == sizeof(struct perf_event_attr_map_entry));
+}
+
+#ifndef HAVE_LIBBPF_BPF_MAP_CREATE
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+                              int value_size, int max_entries, __u32 map_flags);
+int
+bpf_map_create(enum bpf_map_type map_type,
+	       const char *map_name __maybe_unused,
+	       __u32 key_size,
+	       __u32 value_size,
+	       __u32 max_entries,
+	       const struct bpf_map_create_opts *opts __maybe_unused)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return bpf_create_map(map_type, key_size, value_size, max_entries, 0);
+#pragma GCC diagnostic pop
+}
+#endif
+
+static int bperf_lock_attr_map(struct target *target)
+{
+	char path[PATH_MAX];
+	int map_fd, err;
+
+	if (target->attr_map) {
+		scnprintf(path, PATH_MAX, "%s", target->attr_map);
+	} else {
+		scnprintf(path, PATH_MAX, "%s/fs/bpf/%s", sysfs__mountpoint(),
+			  BPF_PERF_DEFAULT_ATTR_MAP_PATH);
+	}
+
+	if (access(path, F_OK)) {
+		map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
+					sizeof(struct perf_event_attr),
+					sizeof(struct perf_event_attr_map_entry),
+					ATTR_MAP_SIZE, NULL);
+		if (map_fd < 0)
+			return -1;
+
+		err = bpf_obj_pin(map_fd, path);
+		if (err) {
+			/* someone pinned the map in parallel? */
+			close(map_fd);
+			map_fd = bpf_obj_get(path);
+			if (map_fd < 0)
+				return -1;
+		}
+	} else {
+		map_fd = bpf_obj_get(path);
+		if (map_fd < 0)
+			return -1;
+	}
+
+	if (!bperf_attr_map_compatible(map_fd)) {
+		close(map_fd);
+		return -1;
+
+	}
+	err = flock(map_fd, LOCK_EX);
+	if (err) {
+		close(map_fd);
+		return -1;
+	}
+	return map_fd;
+}
+
+static int bperf_check_target(struct evsel *evsel,
+			      struct target *target,
+			      enum bperf_filter_type *filter_type,
+			      __u32 *filter_entry_cnt)
+{
+	if (evsel->core.leader->nr_members > 1) {
+		pr_err("bpf managed perf events do not yet support groups.\n");
+		return -1;
+	}
+
+	/* determine filter type based on target */
+	if (target->system_wide) {
+		*filter_type = BPERF_FILTER_GLOBAL;
+		*filter_entry_cnt = 1;
+	} else if (target->cpu_list) {
+		*filter_type = BPERF_FILTER_CPU;
+		*filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel));
+	} else if (target->tid) {
+		*filter_type = BPERF_FILTER_PID;
+		*filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+	} else if (target->pid || evsel->evlist->workload.pid != -1) {
+		*filter_type = BPERF_FILTER_TGID;
+		*filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+	} else {
+		pr_err("bpf managed perf events do not yet support these targets.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static	struct perf_cpu_map *all_cpu_map;
+
+static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
+				       struct perf_event_attr_map_entry *entry)
+{
+	struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
+	int link_fd, diff_map_fd, err;
+	struct bpf_link *link = NULL;
+
+	if (!skel) {
+		pr_err("Failed to open leader skeleton\n");
+		return -1;
+	}
+
+	bpf_map__set_max_entries(skel->maps.events, libbpf_num_possible_cpus());
+	err = bperf_leader_bpf__load(skel);
+	if (err) {
+		pr_err("Failed to load leader skeleton\n");
+		goto out;
+	}
+
+	link = bpf_program__attach(skel->progs.on_switch);
+	if (IS_ERR(link)) {
+		pr_err("Failed to attach leader program\n");
+		err = PTR_ERR(link);
+		goto out;
+	}
+
+	link_fd = bpf_link__fd(link);
+	diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
+	entry->link_id = bpf_link_get_id(link_fd);
+	entry->diff_map_id = bpf_map_get_id(diff_map_fd);
+	err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY);
+	assert(err == 0);
+
+	evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
+	assert(evsel->bperf_leader_link_fd >= 0);
+
+	/*
+	 * save leader_skel for install_pe, which is called within
+	 * following evsel__open_per_cpu call
+	 */
+	evsel->leader_skel = skel;
+	evsel__open_per_cpu(evsel, all_cpu_map, -1);
+
+out:
+	bperf_leader_bpf__destroy(skel);
+	bpf_link__destroy(link);
+	return err;
+}
+
+static int bperf__load(struct evsel *evsel, struct target *target)
+{
+	struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
+	int attr_map_fd, diff_map_fd = -1, err;
+	enum bperf_filter_type filter_type;
+	__u32 filter_entry_cnt, i;
+
+	if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
+		return -1;
+
+	if (!all_cpu_map) {
+		all_cpu_map = perf_cpu_map__new(NULL);
+		if (!all_cpu_map)
+			return -1;
+	}
+
+	evsel->bperf_leader_prog_fd = -1;
+	evsel->bperf_leader_link_fd = -1;
+
+	/*
+	 * Step 1: hold a fd on the leader program and the bpf_link, if
+	 * the program is not already gone, reload the program.
+	 * Use flock() to ensure exclusive access to the perf_event_attr
+	 * map.
+	 */
+	attr_map_fd = bperf_lock_attr_map(target);
+	if (attr_map_fd < 0) {
+		pr_err("Failed to lock perf_event_attr map\n");
+		return -1;
+	}
+
+	err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
+	if (err) {
+		err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY);
+		if (err)
+			goto out;
+	}
+
+	evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
+	if (evsel->bperf_leader_link_fd < 0 &&
+	    bperf_reload_leader_program(evsel, attr_map_fd, &entry)) {
+		err = -1;
+		goto out;
+	}
+	/*
+	 * The bpf_link holds reference to the leader program, and the
+	 * leader program holds reference to the maps. Therefore, if
+	 * link_id is valid, diff_map_id should also be valid.
+	 */
+	evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
+		bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
+	assert(evsel->bperf_leader_prog_fd >= 0);
+
+	diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id);
+	assert(diff_map_fd >= 0);
+
+	/*
+	 * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
+	 * whether the kernel support it
+	 */
+	err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0);
+	if (err) {
+		pr_err("The kernel does not support test_run for raw_tp BPF programs.\n"
+		       "Therefore, --use-bpf might show inaccurate readings\n");
+		goto out;
+	}
+
+	/* Step 2: load the follower skeleton */
+	evsel->follower_skel = bperf_follower_bpf__open();
+	if (!evsel->follower_skel) {
+		err = -1;
+		pr_err("Failed to open follower skeleton\n");
+		goto out;
+	}
+
+	/* attach fexit program to the leader program */
+	bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX,
+				       evsel->bperf_leader_prog_fd, "on_switch");
+
+	/* connect to leader diff_reading map */
+	bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd);
+
+	/* set up reading map */
+	bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
+				 filter_entry_cnt);
+	/* set up follower filter based on target */
+	bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
+				 filter_entry_cnt);
+	err = bperf_follower_bpf__load(evsel->follower_skel);
+	if (err) {
+		pr_err("Failed to load follower skeleton\n");
+		bperf_follower_bpf__destroy(evsel->follower_skel);
+		evsel->follower_skel = NULL;
+		goto out;
+	}
+
+	for (i = 0; i < filter_entry_cnt; i++) {
+		int filter_map_fd;
+		__u32 key;
+
+		if (filter_type == BPERF_FILTER_PID ||
+		    filter_type == BPERF_FILTER_TGID)
+			key = evsel->core.threads->map[i].pid;
+		else if (filter_type == BPERF_FILTER_CPU)
+			key = evsel->core.cpus->map[i].cpu;
+		else
+			break;
+
+		filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
+		bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
+	}
+
+	evsel->follower_skel->bss->type = filter_type;
+
+	err = bperf_follower_bpf__attach(evsel->follower_skel);
+
+out:
+	if (err && evsel->bperf_leader_link_fd >= 0)
+		close(evsel->bperf_leader_link_fd);
+	if (err && evsel->bperf_leader_prog_fd >= 0)
+		close(evsel->bperf_leader_prog_fd);
+	if (diff_map_fd >= 0)
+		close(diff_map_fd);
+
+	flock(attr_map_fd, LOCK_UN);
+	close(attr_map_fd);
+
+	return err;
+}
+
+static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
+{
+	struct bperf_leader_bpf *skel = evsel->leader_skel;
+
+	return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
+				   &cpu_map_idx, &fd, BPF_ANY);
+}
+
+/*
+ * trigger the leader prog on each cpu, so the accum_reading map could get
+ * the latest readings.
+ */
+static int bperf_sync_counters(struct evsel *evsel)
+{
+	int num_cpu, i, cpu;
+
+	num_cpu = all_cpu_map->nr;
+	for (i = 0; i < num_cpu; i++) {
+		cpu = all_cpu_map->map[i].cpu;
+		bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
+	}
+	return 0;
+}
+
+static int bperf__enable(struct evsel *evsel)
+{
+	evsel->follower_skel->bss->enabled = 1;
+	return 0;
+}
+
+static int bperf__disable(struct evsel *evsel)
+{
+	evsel->follower_skel->bss->enabled = 0;
+	return 0;
+}
+
+static int bperf__read(struct evsel *evsel)
+{
+	struct bperf_follower_bpf *skel = evsel->follower_skel;
+	__u32 num_cpu_bpf = cpu__max_cpu().cpu;
+	struct bpf_perf_event_value values[num_cpu_bpf];
+	struct perf_counts_values *counts;
+	int reading_map_fd, err = 0;
+	__u32 i;
+	int j;
+
+	bperf_sync_counters(evsel);
+	reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
+
+	for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+		struct perf_cpu entry;
+		__u32 cpu;
+
+		err = bpf_map_lookup_elem(reading_map_fd, &i, values);
+		if (err)
+			goto out;
+		switch (evsel->follower_skel->bss->type) {
+		case BPERF_FILTER_GLOBAL:
+			assert(i == 0);
+
+			perf_cpu_map__for_each_cpu(entry, j, evsel__cpus(evsel)) {
+				counts = perf_counts(evsel->counts, j, 0);
+				counts->val = values[entry.cpu].counter;
+				counts->ena = values[entry.cpu].enabled;
+				counts->run = values[entry.cpu].running;
+			}
+			break;
+		case BPERF_FILTER_CPU:
+			cpu = perf_cpu_map__cpu(evsel__cpus(evsel), i).cpu;
+			assert(cpu >= 0);
+			counts = perf_counts(evsel->counts, i, 0);
+			counts->val = values[cpu].counter;
+			counts->ena = values[cpu].enabled;
+			counts->run = values[cpu].running;
+			break;
+		case BPERF_FILTER_PID:
+		case BPERF_FILTER_TGID:
+			counts = perf_counts(evsel->counts, 0, i);
+			counts->val = 0;
+			counts->ena = 0;
+			counts->run = 0;
+
+			for (cpu = 0; cpu < num_cpu_bpf; cpu++) {
+				counts->val += values[cpu].counter;
+				counts->ena += values[cpu].enabled;
+				counts->run += values[cpu].running;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+out:
+	return err;
+}
+
+static int bperf__destroy(struct evsel *evsel)
+{
+	bperf_follower_bpf__destroy(evsel->follower_skel);
+	close(evsel->bperf_leader_prog_fd);
+	close(evsel->bperf_leader_link_fd);
+	return 0;
+}
+
+/*
+ * bperf: share hardware PMCs with BPF
+ *
+ * perf uses performance monitoring counters (PMC) to monitor system
+ * performance. The PMCs are limited hardware resources. For example,
+ * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu.
+ *
+ * Modern data center systems use these PMCs in many different ways:
+ * system level monitoring, (maybe nested) container level monitoring, per
+ * process monitoring, profiling (in sample mode), etc. In some cases,
+ * there are more active perf_events than available hardware PMCs. To allow
+ * all perf_events to have a chance to run, it is necessary to do expensive
+ * time multiplexing of events.
+ *
+ * On the other hand, many monitoring tools count the common metrics
+ * (cycles, instructions). It is a waste to have multiple tools create
+ * multiple perf_events of "cycles" and occupy multiple PMCs.
+ *
+ * bperf tries to reduce such wastes by allowing multiple perf_events of
+ * "cycles" or "instructions" (at different scopes) to share PMUs. Instead
+ * of having each perf-stat session to read its own perf_events, bperf uses
+ * BPF programs to read the perf_events and aggregate readings to BPF maps.
+ * Then, the perf-stat session(s) reads the values from these BPF maps.
+ *
+ *                                ||
+ *       shared progs and maps <- || -> per session progs and maps
+ *                                ||
+ *   ---------------              ||
+ *   | perf_events |              ||
+ *   ---------------       fexit  ||      -----------------
+ *          |             --------||----> | follower prog |
+ *       --------------- /        || ---  -----------------
+ * cs -> | leader prog |/         ||/        |         |
+ *   --> ---------------         /||  --------------  ------------------
+ *  /       |         |         / ||  | filter map |  | accum_readings |
+ * /  ------------  ------------  ||  --------------  ------------------
+ * |  | prev map |  | diff map |  ||                        |
+ * |  ------------  ------------  ||                        |
+ *  \                             ||                        |
+ * = \ ==================================================== | ============
+ *    \                                                    /   user space
+ *     \                                                  /
+ *      \                                                /
+ *    BPF_PROG_TEST_RUN                    BPF_MAP_LOOKUP_ELEM
+ *        \                                            /
+ *         \                                          /
+ *          \------  perf-stat ----------------------/
+ *
+ * The figure above shows the architecture of bperf. Note that the figure
+ * is divided into 3 regions: shared progs and maps (top left), per session
+ * progs and maps (top right), and user space (bottom).
+ *
+ * The leader prog is triggered on each context switch (cs). The leader
+ * prog reads perf_events and stores the difference (current_reading -
+ * previous_reading) to the diff map. For the same metric, e.g. "cycles",
+ * multiple perf-stat sessions share the same leader prog.
+ *
+ * Each perf-stat session creates a follower prog as fexit program to the
+ * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38)
+ * follower progs to the same leader prog. The follower prog checks current
+ * task and processor ID to decide whether to add the value from the diff
+ * map to its accumulated reading map (accum_readings).
+ *
+ * Finally, perf-stat user space reads the value from accum_reading map.
+ *
+ * Besides context switch, it is also necessary to trigger the leader prog
+ * before perf-stat reads the value. Otherwise, the accum_reading map may
+ * not have the latest reading from the perf_events. This is achieved by
+ * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU.
+ *
+ * Comment before the definition of struct perf_event_attr_map_entry
+ * describes how different sessions of perf-stat share information about
+ * the leader prog.
+ */
+
+struct bpf_counter_ops bperf_ops = {
+	.load       = bperf__load,
+	.enable     = bperf__enable,
+	.disable    = bperf__disable,
+	.read       = bperf__read,
+	.install_pe = bperf__install_pe,
+	.destroy    = bperf__destroy,
+};
+
+extern struct bpf_counter_ops bperf_cgrp_ops;
+
+static inline bool bpf_counter_skip(struct evsel *evsel)
+{
+	return list_empty(&evsel->bpf_counter_list) &&
+		evsel->follower_skel == NULL;
+}
+
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
+{
+	if (bpf_counter_skip(evsel))
+		return 0;
+	return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd);
+}
+
+int bpf_counter__load(struct evsel *evsel, struct target *target)
+{
+	if (target->bpf_str)
+		evsel->bpf_counter_ops = &bpf_program_profiler_ops;
+	else if (cgrp_event_expanded && target->use_bpf)
+		evsel->bpf_counter_ops = &bperf_cgrp_ops;
+	else if (target->use_bpf || evsel->bpf_counter ||
+		 evsel__match_bpf_counter_events(evsel->name))
+		evsel->bpf_counter_ops = &bperf_ops;
+
+	if (evsel->bpf_counter_ops)
+		return evsel->bpf_counter_ops->load(evsel, target);
+	return 0;
+}
+
+int bpf_counter__enable(struct evsel *evsel)
+{
+	if (bpf_counter_skip(evsel))
+		return 0;
+	return evsel->bpf_counter_ops->enable(evsel);
+}
+
+int bpf_counter__disable(struct evsel *evsel)
+{
+	if (bpf_counter_skip(evsel))
+		return 0;
+	return evsel->bpf_counter_ops->disable(evsel);
+}
+
+int bpf_counter__read(struct evsel *evsel)
+{
+	if (bpf_counter_skip(evsel))
+		return -EAGAIN;
+	return evsel->bpf_counter_ops->read(evsel);
+}
+
+void bpf_counter__destroy(struct evsel *evsel)
+{
+	if (bpf_counter_skip(evsel))
+		return;
+	evsel->bpf_counter_ops->destroy(evsel);
+	evsel->bpf_counter_ops = NULL;
+}
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
new file mode 100644
index 000000000..c6d21c07b
--- /dev/null
+++ b/tools/perf/util/bpf_counter.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BPF_COUNTER_H
+#define __PERF_BPF_COUNTER_H 1
+
+#include <linux/list.h>
+#include <sys/resource.h>
+
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#endif
+
+struct evsel;
+struct target;
+struct bpf_counter;
+
+typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
+typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
+					   struct target *target);
+typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
+					       int cpu_map_idx,
+					       int fd);
+
+struct bpf_counter_ops {
+	bpf_counter_evsel_target_op load;
+	bpf_counter_evsel_op enable;
+	bpf_counter_evsel_op disable;
+	bpf_counter_evsel_op read;
+	bpf_counter_evsel_op destroy;
+	bpf_counter_evsel_install_pe_op install_pe;
+};
+
+struct bpf_counter {
+	void *skel;
+	struct list_head list;
+};
+
+#ifdef HAVE_BPF_SKEL
+
+int bpf_counter__load(struct evsel *evsel, struct target *target);
+int bpf_counter__enable(struct evsel *evsel);
+int bpf_counter__disable(struct evsel *evsel);
+int bpf_counter__read(struct evsel *evsel);
+void bpf_counter__destroy(struct evsel *evsel);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd);
+
+#else /* HAVE_BPF_SKEL */
+
+#include <linux/err.h>
+
+static inline int bpf_counter__load(struct evsel *evsel __maybe_unused,
+				    struct target *target __maybe_unused)
+{
+	return 0;
+}
+
+static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused)
+{
+	return 0;
+}
+
+static inline int bpf_counter__disable(struct evsel *evsel __maybe_unused)
+{
+	return 0;
+}
+
+static inline int bpf_counter__read(struct evsel *evsel __maybe_unused)
+{
+	return -EAGAIN;
+}
+
+static inline void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
+{
+}
+
+static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused,
+					  int cpu __maybe_unused,
+					  int fd __maybe_unused)
+{
+	return 0;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+static inline void set_max_rlimit(void)
+{
+	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+	setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
+#ifdef HAVE_BPF_SKEL
+
+static inline __u32 bpf_link_get_id(int fd)
+{
+	struct bpf_link_info link_info = { .id = 0, };
+	__u32 link_info_len = sizeof(link_info);
+
+	bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+	return link_info.id;
+}
+
+static inline __u32 bpf_link_get_prog_id(int fd)
+{
+	struct bpf_link_info link_info = { .id = 0, };
+	__u32 link_info_len = sizeof(link_info);
+
+	bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+	return link_info.prog_id;
+}
+
+static inline __u32 bpf_map_get_id(int fd)
+{
+	struct bpf_map_info map_info = { .id = 0, };
+	__u32 map_info_len = sizeof(map_info);
+
+	bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
+	return map_info.id;
+}
+
+/* trigger the leader program on a cpu */
+static inline int bperf_trigger_reading(int prog_fd, int cpu)
+{
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+			    .ctx_in = NULL,
+			    .ctx_size_in = 0,
+			    .flags = BPF_F_TEST_RUN_ON_CPU,
+			    .cpu = cpu,
+			    .retval = 0,
+		);
+
+	return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_BPF_COUNTER_H */
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
new file mode 100644
index 000000000..1c82377ed
--- /dev/null
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2021 Facebook */
+/* Copyright (c) 2021 Google */
+
+#include <assert.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <linux/perf_event.h>
+#include <api/fs/fs.h>
+#include <perf/bpf_perf.h>
+
+#include "affinity.h"
+#include "bpf_counter.h"
+#include "cgroup.h"
+#include "counts.h"
+#include "debug.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "target.h"
+#include "cpumap.h"
+#include "thread_map.h"
+
+#include "bpf_skel/bperf_cgroup.skel.h"
+
+static struct perf_event_attr cgrp_switch_attr = {
+	.type = PERF_TYPE_SOFTWARE,
+	.config = PERF_COUNT_SW_CGROUP_SWITCHES,
+	.size = sizeof(cgrp_switch_attr),
+	.sample_period = 1,
+	.disabled = 1,
+};
+
+static struct evsel *cgrp_switch;
+static struct bperf_cgroup_bpf *skel;
+
+#define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0))
+
+static int bperf_load_program(struct evlist *evlist)
+{
+	struct bpf_link *link;
+	struct evsel *evsel;
+	struct cgroup *cgrp, *leader_cgrp;
+	int i, j;
+	struct perf_cpu cpu;
+	int total_cpus = cpu__max_cpu().cpu;
+	int map_size, map_fd;
+	int prog_fd, err;
+
+	skel = bperf_cgroup_bpf__open();
+	if (!skel) {
+		pr_err("Failed to open cgroup skeleton\n");
+		return -1;
+	}
+
+	skel->rodata->num_cpus = total_cpus;
+	skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups;
+
+	BUG_ON(evlist->core.nr_entries % nr_cgroups != 0);
+
+	/* we need one copy of events per cpu for reading */
+	map_size = total_cpus * evlist->core.nr_entries / nr_cgroups;
+	bpf_map__set_max_entries(skel->maps.events, map_size);
+	bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups);
+	/* previous result is saved in a per-cpu array */
+	map_size = evlist->core.nr_entries / nr_cgroups;
+	bpf_map__set_max_entries(skel->maps.prev_readings, map_size);
+	/* cgroup result needs all events (per-cpu) */
+	map_size = evlist->core.nr_entries;
+	bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size);
+
+	set_max_rlimit();
+
+	err = bperf_cgroup_bpf__load(skel);
+	if (err) {
+		pr_err("Failed to load cgroup skeleton\n");
+		goto out;
+	}
+
+	if (cgroup_is_v2("perf_event") > 0)
+		skel->bss->use_cgroup_v2 = 1;
+
+	err = -1;
+
+	cgrp_switch = evsel__new(&cgrp_switch_attr);
+	if (evsel__open_per_cpu(cgrp_switch, evlist->core.all_cpus, -1) < 0) {
+		pr_err("Failed to open cgroup switches event\n");
+		goto out;
+	}
+
+	perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
+		link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch,
+						      FD(cgrp_switch, i));
+		if (IS_ERR(link)) {
+			pr_err("Failed to attach cgroup program\n");
+			err = PTR_ERR(link);
+			goto out;
+		}
+	}
+
+	/*
+	 * Update cgrp_idx map from cgroup-id to event index.
+	 */
+	cgrp = NULL;
+	i = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (cgrp == NULL || evsel->cgrp == leader_cgrp) {
+			leader_cgrp = evsel->cgrp;
+			evsel->cgrp = NULL;
+
+			/* open single copy of the events w/o cgroup */
+			err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1);
+			if (err == 0)
+				evsel->supported = true;
+
+			map_fd = bpf_map__fd(skel->maps.events);
+			perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) {
+				int fd = FD(evsel, j);
+				__u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
+
+				bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY);
+			}
+
+			evsel->cgrp = leader_cgrp;
+		}
+
+		if (evsel->cgrp == cgrp)
+			continue;
+
+		cgrp = evsel->cgrp;
+
+		if (read_cgroup_id(cgrp) < 0) {
+			pr_err("Failed to get cgroup id\n");
+			err = -1;
+			goto out;
+		}
+
+		map_fd = bpf_map__fd(skel->maps.cgrp_idx);
+		err = bpf_map_update_elem(map_fd, &cgrp->id, &i, BPF_ANY);
+		if (err < 0) {
+			pr_err("Failed to update cgroup index map\n");
+			goto out;
+		}
+
+		i++;
+	}
+
+	/*
+	 * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
+	 * whether the kernel support it
+	 */
+	prog_fd = bpf_program__fd(skel->progs.trigger_read);
+	err = bperf_trigger_reading(prog_fd, 0);
+	if (err) {
+		pr_warning("The kernel does not support test_run for raw_tp BPF programs.\n"
+			   "Therefore, --for-each-cgroup might show inaccurate readings\n");
+		err = 0;
+	}
+
+out:
+	return err;
+}
+
+static int bperf_cgrp__load(struct evsel *evsel,
+			    struct target *target __maybe_unused)
+{
+	static bool bperf_loaded = false;
+
+	evsel->bperf_leader_prog_fd = -1;
+	evsel->bperf_leader_link_fd = -1;
+
+	if (!bperf_loaded && bperf_load_program(evsel->evlist))
+		return -1;
+
+	bperf_loaded = true;
+	/* just to bypass bpf_counter_skip() */
+	evsel->follower_skel = (struct bperf_follower_bpf *)skel;
+
+	return 0;
+}
+
+static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
+				  int cpu __maybe_unused, int fd __maybe_unused)
+{
+	/* nothing to do */
+	return 0;
+}
+
+/*
+ * trigger the leader prog on each cpu, so the cgrp_reading map could get
+ * the latest results.
+ */
+static int bperf_cgrp__sync_counters(struct evlist *evlist)
+{
+	struct perf_cpu cpu;
+	int idx;
+	int prog_fd = bpf_program__fd(skel->progs.trigger_read);
+
+	perf_cpu_map__for_each_cpu(cpu, idx, evlist->core.all_cpus)
+		bperf_trigger_reading(prog_fd, cpu.cpu);
+
+	return 0;
+}
+
+static int bperf_cgrp__enable(struct evsel *evsel)
+{
+	if (evsel->core.idx)
+		return 0;
+
+	bperf_cgrp__sync_counters(evsel->evlist);
+
+	skel->bss->enabled = 1;
+	return 0;
+}
+
+static int bperf_cgrp__disable(struct evsel *evsel)
+{
+	if (evsel->core.idx)
+		return 0;
+
+	bperf_cgrp__sync_counters(evsel->evlist);
+
+	skel->bss->enabled = 0;
+	return 0;
+}
+
+static int bperf_cgrp__read(struct evsel *evsel)
+{
+	struct evlist *evlist = evsel->evlist;
+	int total_cpus = cpu__max_cpu().cpu;
+	struct perf_counts_values *counts;
+	struct bpf_perf_event_value *values;
+	int reading_map_fd, err = 0;
+
+	if (evsel->core.idx)
+		return 0;
+
+	bperf_cgrp__sync_counters(evsel->evlist);
+
+	values = calloc(total_cpus, sizeof(*values));
+	if (values == NULL)
+		return -ENOMEM;
+
+	reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings);
+
+	evlist__for_each_entry(evlist, evsel) {
+		__u32 idx = evsel->core.idx;
+		int i;
+		struct perf_cpu cpu;
+
+		err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
+		if (err) {
+			pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
+			       idx, evsel__name(evsel), evsel->cgrp->name);
+			goto out;
+		}
+
+		perf_cpu_map__for_each_cpu(cpu, i, evsel->core.cpus) {
+			counts = perf_counts(evsel->counts, i, 0);
+			counts->val = values[cpu.cpu].counter;
+			counts->ena = values[cpu.cpu].enabled;
+			counts->run = values[cpu.cpu].running;
+		}
+	}
+
+out:
+	free(values);
+	return err;
+}
+
+static int bperf_cgrp__destroy(struct evsel *evsel)
+{
+	if (evsel->core.idx)
+		return 0;
+
+	bperf_cgroup_bpf__destroy(skel);
+	evsel__delete(cgrp_switch);  // it'll destroy on_switch progs too
+
+	return 0;
+}
+
+struct bpf_counter_ops bperf_cgrp_ops = {
+	.load       = bperf_cgrp__load,
+	.enable     = bperf_cgrp__enable,
+	.disable    = bperf_cgrp__disable,
+	.read       = bperf_cgrp__read,
+	.install_pe = bperf_cgrp__install_pe,
+	.destroy    = bperf_cgrp__destroy,
+};
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
new file mode 100644
index 000000000..7a4297d8f
--- /dev/null
+++ b/tools/perf/util/bpf_ftrace.c
@@ -0,0 +1,154 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <linux/err.h>
+
+#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/bpf_counter.h"
+
+#include "util/bpf_skel/func_latency.skel.h"
+
+static struct func_latency_bpf *skel;
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
+{
+	int fd, err;
+	int i, ncpus = 1, ntasks = 1;
+	struct filter_entry *func;
+
+	if (!list_is_singular(&ftrace->filters)) {
+		pr_err("ERROR: %s target function(s).\n",
+		       list_empty(&ftrace->filters) ? "No" : "Too many");
+		return -1;
+	}
+
+	func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+
+	skel = func_latency_bpf__open();
+	if (!skel) {
+		pr_err("Failed to open func latency skeleton\n");
+		return -1;
+	}
+
+	/* don't need to set cpu filter for system-wide mode */
+	if (ftrace->target.cpu_list) {
+		ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
+		bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+	}
+
+	if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+		ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+		bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+	}
+
+	set_max_rlimit();
+
+	err = func_latency_bpf__load(skel);
+	if (err) {
+		pr_err("Failed to load func latency skeleton\n");
+		goto out;
+	}
+
+	if (ftrace->target.cpu_list) {
+		u32 cpu;
+		u8 val = 1;
+
+		skel->bss->has_cpu = 1;
+		fd = bpf_map__fd(skel->maps.cpu_filter);
+
+		for (i = 0; i < ncpus; i++) {
+			cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
+			bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+		}
+	}
+
+	if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+		u32 pid;
+		u8 val = 1;
+
+		skel->bss->has_task = 1;
+		fd = bpf_map__fd(skel->maps.task_filter);
+
+		for (i = 0; i < ntasks; i++) {
+			pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+			bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+		}
+	}
+
+	skel->bss->use_nsec = ftrace->use_nsec;
+
+	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+							    false, func->name);
+	if (IS_ERR(skel->links.func_begin)) {
+		pr_err("Failed to attach fentry program\n");
+		err = PTR_ERR(skel->links.func_begin);
+		goto out;
+	}
+
+	skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+							  true, func->name);
+	if (IS_ERR(skel->links.func_end)) {
+		pr_err("Failed to attach fexit program\n");
+		err = PTR_ERR(skel->links.func_end);
+		goto out;
+	}
+
+	/* XXX: we don't actually use this fd - just for poll() */
+	return open("/dev/null", O_RDONLY);
+
+out:
+	return err;
+}
+
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+	skel->bss->enabled = 1;
+	return 0;
+}
+
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+	skel->bss->enabled = 0;
+	return 0;
+}
+
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+				  int buckets[])
+{
+	int i, fd, err;
+	u32 idx;
+	u64 *hist;
+	int ncpus = cpu__max_cpu().cpu;
+
+	fd = bpf_map__fd(skel->maps.latency);
+
+	hist = calloc(ncpus, sizeof(*hist));
+	if (hist == NULL)
+		return -ENOMEM;
+
+	for (idx = 0; idx < NUM_BUCKET; idx++) {
+		err = bpf_map_lookup_elem(fd, &idx, hist);
+		if (err) {
+			buckets[idx] = 0;
+			continue;
+		}
+
+		for (i = 0; i < ncpus; i++)
+			buckets[idx] += hist[i];
+	}
+
+	free(hist);
+	return 0;
+}
+
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+	func_latency_bpf__destroy(skel);
+	return 0;
+}
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
new file mode 100644
index 000000000..b629dd679
--- /dev/null
+++ b/tools/perf/util/bpf_kwork.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf_kwork.c
+ *
+ * Copyright (c) 2022  Huawei Inc,  Yang Jihong <yangjihong1@huawei.com>
+ */
+
+#include <time.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/time64.h>
+
+#include "util/debug.h"
+#include "util/kwork.h"
+
+#include <bpf/bpf.h>
+
+#include "util/bpf_skel/kwork_trace.skel.h"
+
+/*
+ * This should be in sync with "util/kwork_trace.bpf.c"
+ */
+#define MAX_KWORKNAME 128
+
+struct work_key {
+	u32 type;
+	u32 cpu;
+	u64 id;
+};
+
+struct report_data {
+	u64 nr;
+	u64 total_time;
+	u64 max_time;
+	u64 max_time_start;
+	u64 max_time_end;
+};
+
+struct kwork_class_bpf {
+	struct kwork_class *class;
+
+	void (*load_prepare)(struct perf_kwork *kwork);
+	int  (*get_work_name)(struct work_key *key, char **ret_name);
+};
+
+static struct kwork_trace_bpf *skel;
+
+static struct timespec ts_start;
+static struct timespec ts_end;
+
+void perf_kwork__trace_start(void)
+{
+	clock_gettime(CLOCK_MONOTONIC, &ts_start);
+	skel->bss->enabled = 1;
+}
+
+void perf_kwork__trace_finish(void)
+{
+	clock_gettime(CLOCK_MONOTONIC, &ts_end);
+	skel->bss->enabled = 0;
+}
+
+static int get_work_name_from_map(struct work_key *key, char **ret_name)
+{
+	char name[MAX_KWORKNAME] = { 0 };
+	int fd = bpf_map__fd(skel->maps.perf_kwork_names);
+
+	*ret_name = NULL;
+
+	if (fd < 0) {
+		pr_debug("Invalid names map fd\n");
+		return 0;
+	}
+
+	if ((bpf_map_lookup_elem(fd, key, name) == 0) && (strlen(name) != 0)) {
+		*ret_name = strdup(name);
+		if (*ret_name == NULL) {
+			pr_err("Failed to copy work name\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void irq_load_prepare(struct perf_kwork *kwork)
+{
+	if (kwork->report == KWORK_REPORT_RUNTIME) {
+		bpf_program__set_autoload(skel->progs.report_irq_handler_entry, true);
+		bpf_program__set_autoload(skel->progs.report_irq_handler_exit, true);
+	}
+}
+
+static struct kwork_class_bpf kwork_irq_bpf = {
+	.load_prepare  = irq_load_prepare,
+	.get_work_name = get_work_name_from_map,
+};
+
+static void softirq_load_prepare(struct perf_kwork *kwork)
+{
+	if (kwork->report == KWORK_REPORT_RUNTIME) {
+		bpf_program__set_autoload(skel->progs.report_softirq_entry, true);
+		bpf_program__set_autoload(skel->progs.report_softirq_exit, true);
+	} else if (kwork->report == KWORK_REPORT_LATENCY) {
+		bpf_program__set_autoload(skel->progs.latency_softirq_raise, true);
+		bpf_program__set_autoload(skel->progs.latency_softirq_entry, true);
+	}
+}
+
+static struct kwork_class_bpf kwork_softirq_bpf = {
+	.load_prepare  = softirq_load_prepare,
+	.get_work_name = get_work_name_from_map,
+};
+
+static void workqueue_load_prepare(struct perf_kwork *kwork)
+{
+	if (kwork->report == KWORK_REPORT_RUNTIME) {
+		bpf_program__set_autoload(skel->progs.report_workqueue_execute_start, true);
+		bpf_program__set_autoload(skel->progs.report_workqueue_execute_end, true);
+	} else if (kwork->report == KWORK_REPORT_LATENCY) {
+		bpf_program__set_autoload(skel->progs.latency_workqueue_activate_work, true);
+		bpf_program__set_autoload(skel->progs.latency_workqueue_execute_start, true);
+	}
+}
+
+static struct kwork_class_bpf kwork_workqueue_bpf = {
+	.load_prepare  = workqueue_load_prepare,
+	.get_work_name = get_work_name_from_map,
+};
+
+static struct kwork_class_bpf *
+kwork_class_bpf_supported_list[KWORK_CLASS_MAX] = {
+	[KWORK_CLASS_IRQ]       = &kwork_irq_bpf,
+	[KWORK_CLASS_SOFTIRQ]   = &kwork_softirq_bpf,
+	[KWORK_CLASS_WORKQUEUE] = &kwork_workqueue_bpf,
+};
+
+static bool valid_kwork_class_type(enum kwork_class_type type)
+{
+	return type >= 0 && type < KWORK_CLASS_MAX ? true : false;
+}
+
+static int setup_filters(struct perf_kwork *kwork)
+{
+	u8 val = 1;
+	int i, nr_cpus, key, fd;
+	struct perf_cpu_map *map;
+
+	if (kwork->cpu_list != NULL) {
+		fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+		if (fd < 0) {
+			pr_debug("Invalid cpu filter fd\n");
+			return -1;
+		}
+
+		map = perf_cpu_map__new(kwork->cpu_list);
+		if (map == NULL) {
+			pr_debug("Invalid cpu_list\n");
+			return -1;
+		}
+
+		nr_cpus = libbpf_num_possible_cpus();
+		for (i = 0; i < perf_cpu_map__nr(map); i++) {
+			struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+
+			if (cpu.cpu >= nr_cpus) {
+				perf_cpu_map__put(map);
+				pr_err("Requested cpu %d too large\n", cpu.cpu);
+				return -1;
+			}
+			bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
+		}
+		perf_cpu_map__put(map);
+
+		skel->bss->has_cpu_filter = 1;
+	}
+
+	if (kwork->profile_name != NULL) {
+		if (strlen(kwork->profile_name) >= MAX_KWORKNAME) {
+			pr_err("Requested name filter %s too large, limit to %d\n",
+			       kwork->profile_name, MAX_KWORKNAME - 1);
+			return -1;
+		}
+
+		fd = bpf_map__fd(skel->maps.perf_kwork_name_filter);
+		if (fd < 0) {
+			pr_debug("Invalid name filter fd\n");
+			return -1;
+		}
+
+		key = 0;
+		bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY);
+
+		skel->bss->has_name_filter = 1;
+	}
+
+	return 0;
+}
+
+int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork)
+{
+	struct bpf_program *prog;
+	struct kwork_class *class;
+	struct kwork_class_bpf *class_bpf;
+	enum kwork_class_type type;
+
+	skel = kwork_trace_bpf__open();
+	if (!skel) {
+		pr_debug("Failed to open kwork trace skeleton\n");
+		return -1;
+	}
+
+	/*
+	 * set all progs to non-autoload,
+	 * then set corresponding progs according to config
+	 */
+	bpf_object__for_each_program(prog, skel->obj)
+		bpf_program__set_autoload(prog, false);
+
+	list_for_each_entry(class, &kwork->class_list, list) {
+		type = class->type;
+		if (!valid_kwork_class_type(type) ||
+		    (kwork_class_bpf_supported_list[type] == NULL)) {
+			pr_err("Unsupported bpf trace class %s\n", class->name);
+			goto out;
+		}
+
+		class_bpf = kwork_class_bpf_supported_list[type];
+		class_bpf->class = class;
+
+		if (class_bpf->load_prepare != NULL)
+			class_bpf->load_prepare(kwork);
+	}
+
+	if (kwork_trace_bpf__load(skel)) {
+		pr_debug("Failed to load kwork trace skeleton\n");
+		goto out;
+	}
+
+	if (setup_filters(kwork))
+		goto out;
+
+	if (kwork_trace_bpf__attach(skel)) {
+		pr_debug("Failed to attach kwork trace skeleton\n");
+		goto out;
+	}
+
+	return 0;
+
+out:
+	kwork_trace_bpf__destroy(skel);
+	return -1;
+}
+
+static int add_work(struct perf_kwork *kwork,
+		    struct work_key *key,
+		    struct report_data *data)
+{
+	struct kwork_work *work;
+	struct kwork_class_bpf *bpf_trace;
+	struct kwork_work tmp = {
+		.id = key->id,
+		.name = NULL,
+		.cpu = key->cpu,
+	};
+	enum kwork_class_type type = key->type;
+
+	if (!valid_kwork_class_type(type)) {
+		pr_debug("Invalid class type %d to add work\n", type);
+		return -1;
+	}
+
+	bpf_trace = kwork_class_bpf_supported_list[type];
+	tmp.class = bpf_trace->class;
+
+	if ((bpf_trace->get_work_name != NULL) &&
+	    (bpf_trace->get_work_name(key, &tmp.name)))
+		return -1;
+
+	work = perf_kwork_add_work(kwork, tmp.class, &tmp);
+	if (work == NULL)
+		return -1;
+
+	if (kwork->report == KWORK_REPORT_RUNTIME) {
+		work->nr_atoms = data->nr;
+		work->total_runtime = data->total_time;
+		work->max_runtime = data->max_time;
+		work->max_runtime_start = data->max_time_start;
+		work->max_runtime_end = data->max_time_end;
+	} else if (kwork->report == KWORK_REPORT_LATENCY) {
+		work->nr_atoms = data->nr;
+		work->total_latency = data->total_time;
+		work->max_latency = data->max_time;
+		work->max_latency_start = data->max_time_start;
+		work->max_latency_end = data->max_time_end;
+	} else {
+		pr_debug("Invalid bpf report type %d\n", kwork->report);
+		return -1;
+	}
+
+	kwork->timestart = (u64)ts_start.tv_sec * NSEC_PER_SEC + ts_start.tv_nsec;
+	kwork->timeend = (u64)ts_end.tv_sec * NSEC_PER_SEC + ts_end.tv_nsec;
+
+	return 0;
+}
+
+int perf_kwork__report_read_bpf(struct perf_kwork *kwork)
+{
+	struct report_data data;
+	struct work_key key = {
+		.type = 0,
+		.cpu  = 0,
+		.id   = 0,
+	};
+	struct work_key prev = {
+		.type = 0,
+		.cpu  = 0,
+		.id   = 0,
+	};
+	int fd = bpf_map__fd(skel->maps.perf_kwork_report);
+
+	if (fd < 0) {
+		pr_debug("Invalid report fd\n");
+		return -1;
+	}
+
+	while (!bpf_map_get_next_key(fd, &prev, &key)) {
+		if ((bpf_map_lookup_elem(fd, &key, &data)) != 0) {
+			pr_debug("Failed to lookup report elem\n");
+			return -1;
+		}
+
+		if ((data.nr != 0) && (add_work(kwork, &key, &data) != 0))
+			return -1;
+
+		prev = key;
+	}
+	return 0;
+}
+
+void perf_kwork__report_cleanup_bpf(void)
+{
+	kwork_trace_bpf__destroy(skel);
+}
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
new file mode 100644
index 000000000..fc4d613cb
--- /dev/null
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/map.h"
+#include "util/symbol.h"
+#include "util/target.h"
+#include "util/thread_map.h"
+#include "util/lock-contention.h"
+#include <linux/zalloc.h>
+#include <linux/string.h>
+#include <bpf/bpf.h>
+
+#include "bpf_skel/lock_contention.skel.h"
+
+static struct lock_contention_bpf *skel;
+
+struct lock_contention_data {
+	u64 total_time;
+	u64 min_time;
+	u64 max_time;
+	u32 count;
+	u32 flags;
+};
+
+int lock_contention_prepare(struct lock_contention *con)
+{
+	int i, fd;
+	int ncpus = 1, ntasks = 1;
+	struct evlist *evlist = con->evlist;
+	struct target *target = con->target;
+
+	skel = lock_contention_bpf__open();
+	if (!skel) {
+		pr_err("Failed to open lock-contention BPF skeleton\n");
+		return -1;
+	}
+
+	bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
+	bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
+	bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
+
+	if (target__has_cpu(target))
+		ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
+	if (target__has_task(target))
+		ntasks = perf_thread_map__nr(evlist->core.threads);
+
+	bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+	bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+
+	if (lock_contention_bpf__load(skel) < 0) {
+		pr_err("Failed to load lock-contention BPF skeleton\n");
+		return -1;
+	}
+
+	if (target__has_cpu(target)) {
+		u32 cpu;
+		u8 val = 1;
+
+		skel->bss->has_cpu = 1;
+		fd = bpf_map__fd(skel->maps.cpu_filter);
+
+		for (i = 0; i < ncpus; i++) {
+			cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
+			bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+		}
+	}
+
+	if (target__has_task(target)) {
+		u32 pid;
+		u8 val = 1;
+
+		skel->bss->has_task = 1;
+		fd = bpf_map__fd(skel->maps.task_filter);
+
+		for (i = 0; i < ntasks; i++) {
+			pid = perf_thread_map__pid(evlist->core.threads, i);
+			bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+		}
+	}
+
+	if (target__none(target) && evlist->workload.pid > 0) {
+		u32 pid = evlist->workload.pid;
+		u8 val = 1;
+
+		skel->bss->has_task = 1;
+		fd = bpf_map__fd(skel->maps.task_filter);
+		bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+	}
+
+	skel->bss->stack_skip = con->stack_skip;
+
+	lock_contention_bpf__attach(skel);
+	return 0;
+}
+
+int lock_contention_start(void)
+{
+	skel->bss->enabled = 1;
+	return 0;
+}
+
+int lock_contention_stop(void)
+{
+	skel->bss->enabled = 0;
+	return 0;
+}
+
+int lock_contention_read(struct lock_contention *con)
+{
+	int fd, stack;
+	s32 prev_key, key;
+	struct lock_contention_data data;
+	struct lock_stat *st;
+	struct machine *machine = con->machine;
+	u64 stack_trace[con->max_stack];
+
+	fd = bpf_map__fd(skel->maps.lock_stat);
+	stack = bpf_map__fd(skel->maps.stacks);
+
+	con->lost = skel->bss->lost;
+
+	prev_key = 0;
+	while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
+		struct map *kmap;
+		struct symbol *sym;
+		int idx = 0;
+
+		bpf_map_lookup_elem(fd, &key, &data);
+		st = zalloc(sizeof(*st));
+		if (st == NULL)
+			return -1;
+
+		st->nr_contended = data.count;
+		st->wait_time_total = data.total_time;
+		st->wait_time_max = data.max_time;
+		st->wait_time_min = data.min_time;
+
+		if (data.count)
+			st->avg_wait_time = data.total_time / data.count;
+
+		st->flags = data.flags;
+
+		bpf_map_lookup_elem(stack, &key, stack_trace);
+
+		/* skip lock internal functions */
+		while (is_lock_function(machine, stack_trace[idx]) &&
+		       idx < con->max_stack - 1)
+			idx++;
+
+		st->addr = stack_trace[idx];
+		sym = machine__find_kernel_symbol(machine, st->addr, &kmap);
+
+		if (sym) {
+			unsigned long offset;
+			int ret = 0;
+
+			offset = kmap->map_ip(kmap, st->addr) - sym->start;
+
+			if (offset)
+				ret = asprintf(&st->name, "%s+%#lx", sym->name, offset);
+			else
+				st->name = strdup(sym->name);
+
+			if (ret < 0 || st->name == NULL)
+				return -1;
+		} else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) {
+			free(st);
+			return -1;
+		}
+
+		if (verbose) {
+			st->callstack = memdup(stack_trace, sizeof(stack_trace));
+			if (st->callstack == NULL) {
+				free(st);
+				return -1;
+			}
+		}
+
+		hlist_add_head(&st->hash_entry, con->result);
+		prev_key = key;
+	}
+
+	return 0;
+}
+
+int lock_contention_finish(void)
+{
+	if (skel) {
+		skel->bss->enabled = 0;
+		lock_contention_bpf__destroy(skel);
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
new file mode 100644
index 000000000..c863ae0c5
--- /dev/null
+++ b/tools/perf/util/bpf_map.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "util/bpf_map.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
+{
+	return type == BPF_MAP_TYPE_PERCPU_HASH ||
+	       type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+	       type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+}
+
+static void *bpf_map__alloc_value(const struct bpf_map *map)
+{
+	if (bpf_map__is_per_cpu(bpf_map__type(map)))
+		return malloc(round_up(bpf_map__value_size(map), 8) *
+			      sysconf(_SC_NPROCESSORS_CONF));
+
+	return malloc(bpf_map__value_size(map));
+}
+
+int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
+{
+	void *prev_key = NULL, *key, *value;
+	int fd = bpf_map__fd(map), err;
+	int printed = 0;
+
+	if (fd < 0)
+		return fd;
+
+	if (!map)
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = malloc(bpf_map__key_size(map));
+	if (key == NULL)
+		goto out;
+
+	value = bpf_map__alloc_value(map);
+	if (value == NULL)
+		goto out_free_key;
+
+	while ((err = bpf_map_get_next_key(fd, prev_key, key) == 0)) {
+		int intkey = *(int *)key;
+
+		if (!bpf_map_lookup_elem(fd, key, value)) {
+			bool boolval = *(bool *)value;
+			if (boolval)
+				printed += fprintf(fp, "[%d] = %d,\n", intkey, boolval);
+		} else {
+			printed += fprintf(fp, "[%d] = ERROR,\n", intkey);
+		}
+
+		prev_key = key;
+	}
+
+	if (err == ENOENT)
+		err = printed;
+
+	free(value);
+out_free_key:
+	free(key);
+out:
+	return err;
+}
diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h
new file mode 100644
index 000000000..d6abd5e47
--- /dev/null
+++ b/tools/perf/util/bpf_map.h
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#ifndef __PERF_BPF_MAP_H
+#define __PERF_BPF_MAP_H 1
+
+#include <stdio.h>
+#include <linux/compiler.h>
+struct bpf_map;
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+int bpf_map__fprintf(struct bpf_map *map, FILE *fp);
+
+#else
+
+static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused)
+{
+	return 0;
+}
+
+#endif // HAVE_LIBBPF_SUPPORT
+
+#endif // __PERF_BPF_MAP_H
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
new file mode 100644
index 000000000..21f4d9ba0
--- /dev/null
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/bpf_counter.h"
+#include "util/debug.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/off_cpu.h"
+#include "util/perf-hooks.h"
+#include "util/record.h"
+#include "util/session.h"
+#include "util/target.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/cgroup.h"
+#include "util/strlist.h"
+#include <bpf/bpf.h>
+
+#include "bpf_skel/off_cpu.skel.h"
+
+#define MAX_STACKS  32
+#define MAX_PROC  4096
+/* we don't need actual timestamp, just want to put the samples at last */
+#define OFF_CPU_TIMESTAMP  (~0ull << 32)
+
+static struct off_cpu_bpf *skel;
+
+struct off_cpu_key {
+	u32 pid;
+	u32 tgid;
+	u32 stack_id;
+	u32 state;
+	u64 cgroup_id;
+};
+
+union off_cpu_data {
+	struct perf_event_header hdr;
+	u64 array[1024 / sizeof(u64)];
+};
+
+static int off_cpu_config(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.size	= sizeof(attr), /* to capture ABI version */
+	};
+	char *evname = strdup(OFFCPU_EVENT);
+
+	if (evname == NULL)
+		return -ENOMEM;
+
+	evsel = evsel__new(&attr);
+	if (!evsel) {
+		free(evname);
+		return -ENOMEM;
+	}
+
+	evsel->core.attr.freq = 1;
+	evsel->core.attr.sample_period = 1;
+	/* off-cpu analysis depends on stack trace */
+	evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
+
+	evlist__add(evlist, evsel);
+
+	free(evsel->name);
+	evsel->name = evname;
+
+	return 0;
+}
+
+static void off_cpu_start(void *arg)
+{
+	struct evlist *evlist = arg;
+
+	/* update task filter for the given workload */
+	if (!skel->bss->has_cpu && !skel->bss->has_task &&
+	    perf_thread_map__pid(evlist->core.threads, 0) != -1) {
+		int fd;
+		u32 pid;
+		u8 val = 1;
+
+		skel->bss->has_task = 1;
+		skel->bss->uses_tgid = 1;
+		fd = bpf_map__fd(skel->maps.task_filter);
+		pid = perf_thread_map__pid(evlist->core.threads, 0);
+		bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+	}
+
+	skel->bss->enabled = 1;
+}
+
+static void off_cpu_finish(void *arg __maybe_unused)
+{
+	skel->bss->enabled = 0;
+	off_cpu_bpf__destroy(skel);
+}
+
+/* v5.18 kernel added prev_state arg, so it needs to check the signature */
+static void check_sched_switch_args(void)
+{
+	const struct btf *btf = btf__load_vmlinux_btf();
+	const struct btf_type *t1, *t2, *t3;
+	u32 type_id;
+
+	type_id = btf__find_by_name_kind(btf, "btf_trace_sched_switch",
+					 BTF_KIND_TYPEDEF);
+	if ((s32)type_id < 0)
+		return;
+
+	t1 = btf__type_by_id(btf, type_id);
+	if (t1 == NULL)
+		return;
+
+	t2 = btf__type_by_id(btf, t1->type);
+	if (t2 == NULL || !btf_is_ptr(t2))
+		return;
+
+	t3 = btf__type_by_id(btf, t2->type);
+	/* btf_trace func proto has one more argument for the context */
+	if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 5) {
+		/* new format: pass prev_state as 4th arg */
+		skel->rodata->has_prev_state = true;
+	}
+}
+
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+		    struct record_opts *opts)
+{
+	int err, fd, i;
+	int ncpus = 1, ntasks = 1, ncgrps = 1;
+	struct strlist *pid_slist = NULL;
+	struct str_node *pos;
+
+	if (off_cpu_config(evlist) < 0) {
+		pr_err("Failed to config off-cpu BPF event\n");
+		return -1;
+	}
+
+	skel = off_cpu_bpf__open();
+	if (!skel) {
+		pr_err("Failed to open off-cpu BPF skeleton\n");
+		return -1;
+	}
+
+	/* don't need to set cpu filter for system-wide mode */
+	if (target->cpu_list) {
+		ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
+		bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+	}
+
+	if (target->pid) {
+		pid_slist = strlist__new(target->pid, NULL);
+		if (!pid_slist) {
+			pr_err("Failed to create a strlist for pid\n");
+			return -1;
+		}
+
+		ntasks = 0;
+		strlist__for_each_entry(pos, pid_slist) {
+			char *end_ptr;
+			int pid = strtol(pos->s, &end_ptr, 10);
+
+			if (pid == INT_MIN || pid == INT_MAX ||
+			    (*end_ptr != '\0' && *end_ptr != ','))
+				continue;
+
+			ntasks++;
+		}
+
+		if (ntasks < MAX_PROC)
+			ntasks = MAX_PROC;
+
+		bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+	} else if (target__has_task(target)) {
+		ntasks = perf_thread_map__nr(evlist->core.threads);
+		bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+	} else if (target__none(target)) {
+		bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
+	}
+
+	if (evlist__first(evlist)->cgrp) {
+		ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */
+		bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+
+		if (!cgroup_is_v2("perf_event"))
+			skel->rodata->uses_cgroup_v1 = true;
+	}
+
+	if (opts->record_cgroup) {
+		skel->rodata->needs_cgroup = true;
+
+		if (!cgroup_is_v2("perf_event"))
+			skel->rodata->uses_cgroup_v1 = true;
+	}
+
+	set_max_rlimit();
+	check_sched_switch_args();
+
+	err = off_cpu_bpf__load(skel);
+	if (err) {
+		pr_err("Failed to load off-cpu skeleton\n");
+		goto out;
+	}
+
+	if (target->cpu_list) {
+		u32 cpu;
+		u8 val = 1;
+
+		skel->bss->has_cpu = 1;
+		fd = bpf_map__fd(skel->maps.cpu_filter);
+
+		for (i = 0; i < ncpus; i++) {
+			cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
+			bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+		}
+	}
+
+	if (target->pid) {
+		u8 val = 1;
+
+		skel->bss->has_task = 1;
+		skel->bss->uses_tgid = 1;
+		fd = bpf_map__fd(skel->maps.task_filter);
+
+		strlist__for_each_entry(pos, pid_slist) {
+			char *end_ptr;
+			u32 tgid;
+			int pid = strtol(pos->s, &end_ptr, 10);
+
+			if (pid == INT_MIN || pid == INT_MAX ||
+			    (*end_ptr != '\0' && *end_ptr != ','))
+				continue;
+
+			tgid = pid;
+			bpf_map_update_elem(fd, &tgid, &val, BPF_ANY);
+		}
+	} else if (target__has_task(target)) {
+		u32 pid;
+		u8 val = 1;
+
+		skel->bss->has_task = 1;
+		fd = bpf_map__fd(skel->maps.task_filter);
+
+		for (i = 0; i < ntasks; i++) {
+			pid = perf_thread_map__pid(evlist->core.threads, i);
+			bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+		}
+	}
+
+	if (evlist__first(evlist)->cgrp) {
+		struct evsel *evsel;
+		u8 val = 1;
+
+		skel->bss->has_cgroup = 1;
+		fd = bpf_map__fd(skel->maps.cgroup_filter);
+
+		evlist__for_each_entry(evlist, evsel) {
+			struct cgroup *cgrp = evsel->cgrp;
+
+			if (cgrp == NULL)
+				continue;
+
+			if (!cgrp->id && read_cgroup_id(cgrp) < 0) {
+				pr_err("Failed to read cgroup id of %s\n",
+				       cgrp->name);
+				goto out;
+			}
+
+			bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY);
+		}
+	}
+
+	err = off_cpu_bpf__attach(skel);
+	if (err) {
+		pr_err("Failed to attach off-cpu BPF skeleton\n");
+		goto out;
+	}
+
+	if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) ||
+	    perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) {
+		pr_err("Failed to attach off-cpu skeleton\n");
+		goto out;
+	}
+
+	return 0;
+
+out:
+	off_cpu_bpf__destroy(skel);
+	return -1;
+}
+
+int off_cpu_write(struct perf_session *session)
+{
+	int bytes = 0, size;
+	int fd, stack;
+	u64 sample_type, val, sid = 0;
+	struct evsel *evsel;
+	struct perf_data_file *file = &session->data->file;
+	struct off_cpu_key prev, key;
+	union off_cpu_data data = {
+		.hdr = {
+			.type = PERF_RECORD_SAMPLE,
+			.misc = PERF_RECORD_MISC_USER,
+		},
+	};
+	u64 tstamp = OFF_CPU_TIMESTAMP;
+
+	skel->bss->enabled = 0;
+
+	evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT);
+	if (evsel == NULL) {
+		pr_err("%s evsel not found\n", OFFCPU_EVENT);
+		return 0;
+	}
+
+	sample_type = evsel->core.attr.sample_type;
+
+	if (sample_type & ~OFFCPU_SAMPLE_TYPES) {
+		pr_err("not supported sample type: %llx\n",
+		       (unsigned long long)sample_type);
+		return -1;
+	}
+
+	if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
+		if (evsel->core.id)
+			sid = evsel->core.id[0];
+	}
+
+	fd = bpf_map__fd(skel->maps.off_cpu);
+	stack = bpf_map__fd(skel->maps.stacks);
+	memset(&prev, 0, sizeof(prev));
+
+	while (!bpf_map_get_next_key(fd, &prev, &key)) {
+		int n = 1;  /* start from perf_event_header */
+		int ip_pos = -1;
+
+		bpf_map_lookup_elem(fd, &key, &val);
+
+		if (sample_type & PERF_SAMPLE_IDENTIFIER)
+			data.array[n++] = sid;
+		if (sample_type & PERF_SAMPLE_IP) {
+			ip_pos = n;
+			data.array[n++] = 0;  /* will be updated */
+		}
+		if (sample_type & PERF_SAMPLE_TID)
+			data.array[n++] = (u64)key.pid << 32 | key.tgid;
+		if (sample_type & PERF_SAMPLE_TIME)
+			data.array[n++] = tstamp;
+		if (sample_type & PERF_SAMPLE_ID)
+			data.array[n++] = sid;
+		if (sample_type & PERF_SAMPLE_CPU)
+			data.array[n++] = 0;
+		if (sample_type & PERF_SAMPLE_PERIOD)
+			data.array[n++] = val;
+		if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+			int len = 0;
+
+			/* data.array[n] is callchain->nr (updated later) */
+			data.array[n + 1] = PERF_CONTEXT_USER;
+			data.array[n + 2] = 0;
+
+			bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
+			while (data.array[n + 2 + len])
+				len++;
+
+			/* update length of callchain */
+			data.array[n] = len + 1;
+
+			/* update sample ip with the first callchain entry */
+			if (ip_pos >= 0)
+				data.array[ip_pos] = data.array[n + 2];
+
+			/* calculate sample callchain data array length */
+			n += len + 2;
+		}
+		if (sample_type & PERF_SAMPLE_CGROUP)
+			data.array[n++] = key.cgroup_id;
+
+		size = n * sizeof(u64);
+		data.hdr.size = size;
+		bytes += size;
+
+		if (perf_data_file__write(file, &data, size) < 0) {
+			pr_err("failed to write perf data, error: %m\n");
+			return bytes;
+		}
+
+		prev = key;
+		/* increase dummy timestamp to sort later samples */
+		tstamp++;
+	}
+	return bytes;
+}
diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore
new file mode 100644
index 000000000..5263e9e6c
--- /dev/null
+++ b/tools/perf/util/bpf_skel/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+.tmp
+*.skel.h
+\ No newline at end of file
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
new file mode 100644
index 000000000..6a438e010
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEVELS  10  // max cgroup hierarchy level: arbitrary
+#define MAX_EVENTS  32  // max events per cgroup: arbitrary
+
+// NOTE: many of map and global data will be modified before loading
+//       from the userspace (perf tool) using the skeleton helpers.
+
+// single set of global perf events to measure
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(int));
+	__uint(max_entries, 1);
+} events SEC(".maps");
+
+// from cgroup id to event index
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u64));
+	__uint(value_size, sizeof(__u32));
+	__uint(max_entries, 1);
+} cgrp_idx SEC(".maps");
+
+// per-cpu event snapshots to calculate delta
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+} prev_readings SEC(".maps");
+
+// aggregated event values for each cgroup (per-cpu)
+// will be read from the user-space
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+} cgrp_readings SEC(".maps");
+
+/* new kernel cgroup definition */
+struct cgroup___new {
+	int level;
+	struct cgroup *ancestors[];
+} __attribute__((preserve_access_index));
+
+/* old kernel cgroup definition */
+struct cgroup___old {
+	int level;
+	u64 ancestor_ids[];
+} __attribute__((preserve_access_index));
+
+const volatile __u32 num_events = 1;
+const volatile __u32 num_cpus = 1;
+
+int enabled = 0;
+int use_cgroup_v2 = 0;
+int perf_subsys_id = -1;
+
+static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
+{
+	/* recast pointer to capture new type for compiler */
+	struct cgroup___new *cgrp_new = (void *)cgrp;
+
+	if (bpf_core_field_exists(cgrp_new->ancestors)) {
+		return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
+	} else {
+		/* recast pointer to capture old type for compiler */
+		struct cgroup___old *cgrp_old = (void *)cgrp;
+
+		return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
+	}
+}
+
+static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
+{
+	struct task_struct *p = (void *)bpf_get_current_task();
+	struct cgroup *cgrp;
+	register int i = 0;
+	__u32 *elem;
+	int level;
+	int cnt;
+
+	if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+		perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+						     perf_event_cgrp_id);
+#else
+		perf_subsys_id = perf_event_cgrp_id;
+#endif
+	}
+	cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup);
+	level = BPF_CORE_READ(cgrp, level);
+
+	for (cnt = 0; i < MAX_LEVELS; i++) {
+		__u64 cgrp_id;
+
+		if (i > level)
+			break;
+
+		// convert cgroup-id to a map index
+		cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
+		elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
+		if (!elem)
+			continue;
+
+		cgrps[cnt++] = *elem;
+		if (cnt == size)
+			break;
+	}
+
+	return cnt;
+}
+
+static inline int get_cgroup_v2_idx(__u32 *cgrps, int size)
+{
+	register int i = 0;
+	__u32 *elem;
+	int cnt;
+
+	for (cnt = 0; i < MAX_LEVELS; i++) {
+		__u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i);
+
+		if (cgrp_id == 0)
+			break;
+
+		// convert cgroup-id to a map index
+		elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
+		if (!elem)
+			continue;
+
+		cgrps[cnt++] = *elem;
+		if (cnt == size)
+			break;
+	}
+
+	return cnt;
+}
+
+static int bperf_cgroup_count(void)
+{
+	register __u32 idx = 0;  // to have it in a register to pass BPF verifier
+	register int c = 0;
+	struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val;
+	__u32 cpu = bpf_get_smp_processor_id();
+	__u32 cgrp_idx[MAX_LEVELS];
+	int cgrp_cnt;
+	__u32 key, cgrp;
+	long err;
+
+	if (use_cgroup_v2)
+		cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS);
+	else
+		cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS);
+
+	for ( ; idx < MAX_EVENTS; idx++) {
+		if (idx == num_events)
+			break;
+
+		// XXX: do not pass idx directly (for verifier)
+		key = idx;
+		// this is per-cpu array for diff
+		prev_val = bpf_map_lookup_elem(&prev_readings, &key);
+		if (!prev_val) {
+			val.counter = val.enabled = val.running = 0;
+			bpf_map_update_elem(&prev_readings, &key, &val, BPF_ANY);
+
+			prev_val = bpf_map_lookup_elem(&prev_readings, &key);
+			if (!prev_val)
+				continue;
+		}
+
+		// read from global perf_event array
+		key = idx * num_cpus + cpu;
+		err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
+		if (err)
+			continue;
+
+		if (enabled) {
+			delta.counter = val.counter - prev_val->counter;
+			delta.enabled = val.enabled - prev_val->enabled;
+			delta.running = val.running - prev_val->running;
+
+			for (c = 0; c < MAX_LEVELS; c++) {
+				if (c == cgrp_cnt)
+					break;
+
+				cgrp = cgrp_idx[c];
+
+				// aggregate the result by cgroup
+				key = cgrp * num_events + idx;
+				cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key);
+				if (cgrp_val) {
+					cgrp_val->counter += delta.counter;
+					cgrp_val->enabled += delta.enabled;
+					cgrp_val->running += delta.running;
+				} else {
+					bpf_map_update_elem(&cgrp_readings, &key,
+							    &delta, BPF_ANY);
+				}
+			}
+		}
+
+		*prev_val = val;
+	}
+	return 0;
+}
+
+// This will be attached to cgroup-switches event for each cpu
+SEC("perf_event")
+int BPF_PROG(on_cgrp_switch)
+{
+	return bperf_cgroup_count();
+}
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(trigger_read)
+{
+	return bperf_cgroup_count();
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
new file mode 100644
index 000000000..f19399853
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bperf_u.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+	__uint(max_entries, 1);
+} diff_readings SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+	__uint(max_entries, 1);
+} accum_readings SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} filter SEC(".maps");
+
+enum bperf_filter_type type = 0;
+int enabled = 0;
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+	struct bpf_perf_event_value *diff_val, *accum_val;
+	__u32 filter_key, zero = 0;
+	__u32 *accum_key;
+
+	if (!enabled)
+		return 0;
+
+	switch (type) {
+	case BPERF_FILTER_GLOBAL:
+		accum_key = &zero;
+		goto do_add;
+	case BPERF_FILTER_CPU:
+		filter_key = bpf_get_smp_processor_id();
+		break;
+	case BPERF_FILTER_PID:
+		filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
+		break;
+	case BPERF_FILTER_TGID:
+		filter_key = bpf_get_current_pid_tgid() >> 32;
+		break;
+	default:
+		return 0;
+	}
+
+	accum_key = bpf_map_lookup_elem(&filter, &filter_key);
+	if (!accum_key)
+		return 0;
+
+do_add:
+	diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+	if (!diff_val)
+		return 0;
+
+	accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+	if (!accum_val)
+		return 0;
+
+	accum_val->counter += diff_val->counter;
+	accum_val->enabled += diff_val->enabled;
+	accum_val->running += diff_val->running;
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
new file mode 100644
index 000000000..e2a2d4cd7
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(int));
+	__uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} events SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+	__uint(max_entries, 1);
+} prev_readings SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+	__uint(max_entries, 1);
+} diff_readings SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(on_switch)
+{
+	struct bpf_perf_event_value val, *prev_val, *diff_val;
+	__u32 key = bpf_get_smp_processor_id();
+	__u32 zero = 0;
+	long err;
+
+	prev_val = bpf_map_lookup_elem(&prev_readings, &zero);
+	if (!prev_val)
+		return 0;
+
+	diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+	if (!diff_val)
+		return 0;
+
+	err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
+	if (err)
+		return 0;
+
+	diff_val->counter = val.counter - prev_val->counter;
+	diff_val->enabled = val.enabled - prev_val->enabled;
+	diff_val->running = val.running - prev_val->running;
+	*prev_val = val;
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h
new file mode 100644
index 000000000..1ce0c2c90
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_u.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+
+#ifndef __BPERF_STAT_U_H
+#define __BPERF_STAT_U_H
+
+enum bperf_filter_type {
+	BPERF_FILTER_GLOBAL = 1,
+	BPERF_FILTER_CPU,
+	BPERF_FILTER_PID,
+	BPERF_FILTER_TGID,
+};
+
+#endif /* __BPERF_STAT_U_H */
diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
new file mode 100644
index 000000000..97037d3b3
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* map of perf event fds, num_cpu * num_metric entries */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+/* readings at fentry */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+	__uint(max_entries, 1);
+} fentry_readings SEC(".maps");
+
+/* accumulated readings */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+	__uint(max_entries, 1);
+} accum_readings SEC(".maps");
+
+const volatile __u32 num_cpu = 1;
+
+SEC("fentry/XXX")
+int BPF_PROG(fentry_XXX)
+{
+	__u32 key = bpf_get_smp_processor_id();
+	struct bpf_perf_event_value *ptr;
+	__u32 zero = 0;
+	long err;
+
+	/* look up before reading, to reduce error */
+	ptr = bpf_map_lookup_elem(&fentry_readings, &zero);
+	if (!ptr)
+		return 0;
+
+	err = bpf_perf_event_read_value(&events, key, ptr, sizeof(*ptr));
+	if (err)
+		return 0;
+
+	return 0;
+}
+
+static inline void
+fexit_update_maps(struct bpf_perf_event_value *after)
+{
+	struct bpf_perf_event_value *before, diff;
+	__u32 zero = 0;
+
+	before = bpf_map_lookup_elem(&fentry_readings, &zero);
+	/* only account samples with a valid fentry_reading */
+	if (before && before->counter) {
+		struct bpf_perf_event_value *accum;
+
+		diff.counter = after->counter - before->counter;
+		diff.enabled = after->enabled - before->enabled;
+		diff.running = after->running - before->running;
+
+		accum = bpf_map_lookup_elem(&accum_readings, &zero);
+		if (accum) {
+			accum->counter += diff.counter;
+			accum->enabled += diff.enabled;
+			accum->running += diff.running;
+		}
+	}
+}
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+	struct bpf_perf_event_value reading;
+	__u32 cpu = bpf_get_smp_processor_id();
+	int err;
+
+	/* read all events before updating the maps, to reduce error */
+	err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading));
+	if (err)
+		return 0;
+
+	fexit_update_maps(&reading);
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
new file mode 100644
index 000000000..9d01e3af7
--- /dev/null
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+// This should be in sync with "util/ftrace.h"
+#define NUM_BUCKET  22
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u64));
+	__uint(value_size, sizeof(__u64));
+	__uint(max_entries, 10000);
+} functime SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u64));
+	__uint(max_entries, NUM_BUCKET);
+} latency SEC(".maps");
+
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+int use_nsec = 0;
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+	__u64 key, now;
+
+	if (!enabled)
+		return 0;
+
+	key = bpf_get_current_pid_tgid();
+
+	if (has_cpu) {
+		__u32 cpu = bpf_get_smp_processor_id();
+		__u8 *ok;
+
+		ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+		if (!ok)
+			return 0;
+	}
+
+	if (has_task) {
+		__u32 pid = key & 0xffffffff;
+		__u8 *ok;
+
+		ok = bpf_map_lookup_elem(&task_filter, &pid);
+		if (!ok)
+			return 0;
+	}
+
+	now = bpf_ktime_get_ns();
+
+	// overwrite timestamp for nested functions
+	bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+	return 0;
+}
+
+SEC("kretprobe/func")
+int BPF_PROG(func_end)
+{
+	__u64 tid;
+	__u64 *start;
+	__u64 cmp_base = use_nsec ? 1 : 1000;
+
+	if (!enabled)
+		return 0;
+
+	tid = bpf_get_current_pid_tgid();
+
+	start = bpf_map_lookup_elem(&functime, &tid);
+	if (start) {
+		__s64 delta = bpf_ktime_get_ns() - *start;
+		__u32 key;
+		__u64 *hist;
+
+		bpf_map_delete_elem(&functime, &tid);
+
+		if (delta < 0)
+			return 0;
+
+		// calculate index using delta
+		for (key = 0; key < (NUM_BUCKET - 1); key++) {
+			if (delta < (cmp_base << key))
+				break;
+		}
+
+		hist = bpf_map_lookup_elem(&latency, &key);
+		if (!hist)
+			return 0;
+
+		*hist += 1;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
new file mode 100644
index 000000000..063c124e0
--- /dev/null
+++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022, Huawei
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define KWORK_COUNT 100
+#define MAX_KWORKNAME 128
+
+/*
+ * This should be in sync with "util/kwork.h"
+ */
+enum kwork_class_type {
+	KWORK_CLASS_IRQ,
+	KWORK_CLASS_SOFTIRQ,
+	KWORK_CLASS_WORKQUEUE,
+	KWORK_CLASS_MAX,
+};
+
+struct work_key {
+	__u32 type;
+	__u32 cpu;
+	__u64 id;
+};
+
+struct report_data {
+	__u64 nr;
+	__u64 total_time;
+	__u64 max_time;
+	__u64 max_time_start;
+	__u64 max_time_end;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct work_key));
+	__uint(value_size, MAX_KWORKNAME);
+	__uint(max_entries, KWORK_COUNT);
+} perf_kwork_names SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct work_key));
+	__uint(value_size, sizeof(__u64));
+	__uint(max_entries, KWORK_COUNT);
+} perf_kwork_time SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct work_key));
+	__uint(value_size, sizeof(struct report_data));
+	__uint(max_entries, KWORK_COUNT);
+} perf_kwork_report SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} perf_kwork_cpu_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, MAX_KWORKNAME);
+	__uint(max_entries, 1);
+} perf_kwork_name_filter SEC(".maps");
+
+int enabled = 0;
+int has_cpu_filter = 0;
+int has_name_filter = 0;
+
+static __always_inline int local_strncmp(const char *s1,
+					 unsigned int sz, const char *s2)
+{
+	int ret = 0;
+	unsigned int i;
+
+	for (i = 0; i < sz; i++) {
+		ret = (unsigned char)s1[i] - (unsigned char)s2[i];
+		if (ret || !s1[i] || !s2[i])
+			break;
+	}
+
+	return ret;
+}
+
+static __always_inline int trace_event_match(struct work_key *key, char *name)
+{
+	__u8 *cpu_val;
+	char *name_val;
+	__u32 zero = 0;
+	__u32 cpu = bpf_get_smp_processor_id();
+
+	if (!enabled)
+		return 0;
+
+	if (has_cpu_filter) {
+		cpu_val = bpf_map_lookup_elem(&perf_kwork_cpu_filter, &cpu);
+		if (!cpu_val)
+			return 0;
+	}
+
+	if (has_name_filter && (name != NULL)) {
+		name_val = bpf_map_lookup_elem(&perf_kwork_name_filter, &zero);
+		if (name_val &&
+		    (local_strncmp(name_val, MAX_KWORKNAME, name) != 0)) {
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static __always_inline void do_update_time(void *map, struct work_key *key,
+					   __u64 time_start, __u64 time_end)
+{
+	struct report_data zero, *data;
+	__s64 delta = time_end - time_start;
+
+	if (delta < 0)
+		return;
+
+	data = bpf_map_lookup_elem(map, key);
+	if (!data) {
+		__builtin_memset(&zero, 0, sizeof(zero));
+		bpf_map_update_elem(map, key, &zero, BPF_NOEXIST);
+		data = bpf_map_lookup_elem(map, key);
+		if (!data)
+			return;
+	}
+
+	if ((delta > data->max_time) ||
+	    (data->max_time == 0)) {
+		data->max_time       = delta;
+		data->max_time_start = time_start;
+		data->max_time_end   = time_end;
+	}
+
+	data->total_time += delta;
+	data->nr++;
+}
+
+static __always_inline void do_update_timestart(void *map, struct work_key *key)
+{
+	__u64 ts = bpf_ktime_get_ns();
+
+	bpf_map_update_elem(map, key, &ts, BPF_ANY);
+}
+
+static __always_inline void do_update_timeend(void *report_map, void *time_map,
+					      struct work_key *key)
+{
+	__u64 *time = bpf_map_lookup_elem(time_map, key);
+
+	if (time) {
+		bpf_map_delete_elem(time_map, key);
+		do_update_time(report_map, key, *time, bpf_ktime_get_ns());
+	}
+}
+
+static __always_inline void do_update_name(void *map,
+					   struct work_key *key, char *name)
+{
+	if (!bpf_map_lookup_elem(map, key))
+		bpf_map_update_elem(map, key, name, BPF_ANY);
+}
+
+static __always_inline int update_timestart(void *map, struct work_key *key)
+{
+	if (!trace_event_match(key, NULL))
+		return 0;
+
+	do_update_timestart(map, key);
+	return 0;
+}
+
+static __always_inline int update_timestart_and_name(void *time_map,
+						     void *names_map,
+						     struct work_key *key,
+						     char *name)
+{
+	if (!trace_event_match(key, name))
+		return 0;
+
+	do_update_timestart(time_map, key);
+	do_update_name(names_map, key, name);
+
+	return 0;
+}
+
+static __always_inline int update_timeend(void *report_map,
+					  void *time_map, struct work_key *key)
+{
+	if (!trace_event_match(key, NULL))
+		return 0;
+
+	do_update_timeend(report_map, time_map, key);
+
+	return 0;
+}
+
+static __always_inline int update_timeend_and_name(void *report_map,
+						   void *time_map,
+						   void *names_map,
+						   struct work_key *key,
+						   char *name)
+{
+	if (!trace_event_match(key, name))
+		return 0;
+
+	do_update_timeend(report_map, time_map, key);
+	do_update_name(names_map, key, name);
+
+	return 0;
+}
+
+SEC("tracepoint/irq/irq_handler_entry")
+int report_irq_handler_entry(struct trace_event_raw_irq_handler_entry *ctx)
+{
+	char name[MAX_KWORKNAME];
+	struct work_key key = {
+		.type = KWORK_CLASS_IRQ,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->irq,
+	};
+	void *name_addr = (void *)ctx + (ctx->__data_loc_name & 0xffff);
+
+	bpf_probe_read_kernel_str(name, sizeof(name), name_addr);
+
+	return update_timestart_and_name(&perf_kwork_time,
+					 &perf_kwork_names, &key, name);
+}
+
+SEC("tracepoint/irq/irq_handler_exit")
+int report_irq_handler_exit(struct trace_event_raw_irq_handler_exit *ctx)
+{
+	struct work_key key = {
+		.type = KWORK_CLASS_IRQ,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->irq,
+	};
+
+	return update_timeend(&perf_kwork_report, &perf_kwork_time, &key);
+}
+
+static char softirq_name_list[NR_SOFTIRQS][MAX_KWORKNAME] = {
+	{ "HI"       },
+	{ "TIMER"    },
+	{ "NET_TX"   },
+	{ "NET_RX"   },
+	{ "BLOCK"    },
+	{ "IRQ_POLL" },
+	{ "TASKLET"  },
+	{ "SCHED"    },
+	{ "HRTIMER"  },
+	{ "RCU"      },
+};
+
+SEC("tracepoint/irq/softirq_entry")
+int report_softirq_entry(struct trace_event_raw_softirq *ctx)
+{
+	unsigned int vec = ctx->vec;
+	struct work_key key = {
+		.type = KWORK_CLASS_SOFTIRQ,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)vec,
+	};
+
+	if (vec < NR_SOFTIRQS) {
+		return update_timestart_and_name(&perf_kwork_time,
+						 &perf_kwork_names, &key,
+						 softirq_name_list[vec]);
+	}
+
+	return 0;
+}
+
+SEC("tracepoint/irq/softirq_exit")
+int report_softirq_exit(struct trace_event_raw_softirq *ctx)
+{
+	struct work_key key = {
+		.type = KWORK_CLASS_SOFTIRQ,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->vec,
+	};
+
+	return update_timeend(&perf_kwork_report, &perf_kwork_time, &key);
+}
+
+SEC("tracepoint/irq/softirq_raise")
+int latency_softirq_raise(struct trace_event_raw_softirq *ctx)
+{
+	unsigned int vec = ctx->vec;
+	struct work_key key = {
+		.type = KWORK_CLASS_SOFTIRQ,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)vec,
+	};
+
+	if (vec < NR_SOFTIRQS) {
+		return update_timestart_and_name(&perf_kwork_time,
+						 &perf_kwork_names, &key,
+						 softirq_name_list[vec]);
+	}
+
+	return 0;
+}
+
+SEC("tracepoint/irq/softirq_entry")
+int latency_softirq_entry(struct trace_event_raw_softirq *ctx)
+{
+	struct work_key key = {
+		.type = KWORK_CLASS_SOFTIRQ,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->vec,
+	};
+
+	return update_timeend(&perf_kwork_report, &perf_kwork_time, &key);
+}
+
+SEC("tracepoint/workqueue/workqueue_execute_start")
+int report_workqueue_execute_start(struct trace_event_raw_workqueue_execute_start *ctx)
+{
+	struct work_key key = {
+		.type = KWORK_CLASS_WORKQUEUE,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->work,
+	};
+
+	return update_timestart(&perf_kwork_time, &key);
+}
+
+SEC("tracepoint/workqueue/workqueue_execute_end")
+int report_workqueue_execute_end(struct trace_event_raw_workqueue_execute_end *ctx)
+{
+	char name[MAX_KWORKNAME];
+	struct work_key key = {
+		.type = KWORK_CLASS_WORKQUEUE,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->work,
+	};
+	unsigned long long func_addr = (unsigned long long)ctx->function;
+
+	__builtin_memset(name, 0, sizeof(name));
+	bpf_snprintf(name, sizeof(name), "%ps", &func_addr, sizeof(func_addr));
+
+	return update_timeend_and_name(&perf_kwork_report, &perf_kwork_time,
+				       &perf_kwork_names, &key, name);
+}
+
+SEC("tracepoint/workqueue/workqueue_activate_work")
+int latency_workqueue_activate_work(struct trace_event_raw_workqueue_activate_work *ctx)
+{
+	struct work_key key = {
+		.type = KWORK_CLASS_WORKQUEUE,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->work,
+	};
+
+	return update_timestart(&perf_kwork_time, &key);
+}
+
+SEC("tracepoint/workqueue/workqueue_execute_start")
+int latency_workqueue_execute_start(struct trace_event_raw_workqueue_execute_start *ctx)
+{
+	char name[MAX_KWORKNAME];
+	struct work_key key = {
+		.type = KWORK_CLASS_WORKQUEUE,
+		.cpu  = bpf_get_smp_processor_id(),
+		.id   = (__u64)ctx->work,
+	};
+	unsigned long long func_addr = (unsigned long long)ctx->function;
+
+	__builtin_memset(name, 0, sizeof(name));
+	bpf_snprintf(name, sizeof(name), "%ps", &func_addr, sizeof(func_addr));
+
+	return update_timeend_and_name(&perf_kwork_report, &perf_kwork_time,
+				       &perf_kwork_names, &key, name);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
new file mode 100644
index 000000000..1bb8628e7
--- /dev/null
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* maximum stack trace depth */
+#define MAX_STACKS   8
+
+/* default buffer size */
+#define MAX_ENTRIES  10240
+
+struct contention_key {
+	__s32 stack_id;
+};
+
+struct contention_data {
+	__u64 total_time;
+	__u64 min_time;
+	__u64 max_time;
+	__u32 count;
+	__u32 flags;
+};
+
+struct tstamp_data {
+	__u64 timestamp;
+	__u64 lock;
+	__u32 flags;
+	__s32 stack_id;
+};
+
+/* callstack storage  */
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, MAX_STACKS * sizeof(__u64));
+	__uint(max_entries, MAX_ENTRIES);
+} stacks SEC(".maps");
+
+/* maintain timestamp at the beginning of contention */
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct tstamp_data);
+} tstamp SEC(".maps");
+
+/* actual lock contention statistics */
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct contention_key));
+	__uint(value_size, sizeof(struct contention_data));
+	__uint(max_entries, MAX_ENTRIES);
+} lock_stat SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+/* control flags */
+int enabled;
+int has_cpu;
+int has_task;
+int stack_skip;
+
+/* error stat */
+int lost;
+
+static inline int can_record(void)
+{
+	if (has_cpu) {
+		__u32 cpu = bpf_get_smp_processor_id();
+		__u8 *ok;
+
+		ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+		if (!ok)
+			return 0;
+	}
+
+	if (has_task) {
+		__u8 *ok;
+		__u32 pid = bpf_get_current_pid_tgid();
+
+		ok = bpf_map_lookup_elem(&task_filter, &pid);
+		if (!ok)
+			return 0;
+	}
+
+	return 1;
+}
+
+SEC("tp_btf/contention_begin")
+int contention_begin(u64 *ctx)
+{
+	struct task_struct *curr;
+	struct tstamp_data *pelem;
+
+	if (!enabled || !can_record())
+		return 0;
+
+	curr = bpf_get_current_task_btf();
+	pelem = bpf_task_storage_get(&tstamp, curr, NULL,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!pelem || pelem->lock)
+		return 0;
+
+	pelem->timestamp = bpf_ktime_get_ns();
+	pelem->lock = (__u64)ctx[0];
+	pelem->flags = (__u32)ctx[1];
+	pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip);
+
+	if (pelem->stack_id < 0)
+		lost++;
+	return 0;
+}
+
+SEC("tp_btf/contention_end")
+int contention_end(u64 *ctx)
+{
+	struct task_struct *curr;
+	struct tstamp_data *pelem;
+	struct contention_key key;
+	struct contention_data *data;
+	__u64 duration;
+
+	if (!enabled)
+		return 0;
+
+	curr = bpf_get_current_task_btf();
+	pelem = bpf_task_storage_get(&tstamp, curr, NULL, 0);
+	if (!pelem || pelem->lock != ctx[0])
+		return 0;
+
+	duration = bpf_ktime_get_ns() - pelem->timestamp;
+
+	key.stack_id = pelem->stack_id;
+	data = bpf_map_lookup_elem(&lock_stat, &key);
+	if (!data) {
+		struct contention_data first = {
+			.total_time = duration,
+			.max_time = duration,
+			.min_time = duration,
+			.count = 1,
+			.flags = pelem->flags,
+		};
+
+		bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
+		pelem->lock = 0;
+		return 0;
+	}
+
+	__sync_fetch_and_add(&data->total_time, duration);
+	__sync_fetch_and_add(&data->count, 1);
+
+	/* FIXME: need atomic operations */
+	if (data->max_time < duration)
+		data->max_time = duration;
+	if (data->min_time > duration)
+		data->min_time = duration;
+
+	pelem->lock = 0;
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
new file mode 100644
index 000000000..38e3b287d
--- /dev/null
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* task->flags for off-cpu analysis */
+#define PF_KTHREAD   0x00200000  /* I am a kernel thread */
+
+/* task->state for off-cpu analysis */
+#define TASK_INTERRUPTIBLE	0x0001
+#define TASK_UNINTERRUPTIBLE	0x0002
+
+/* create a new thread */
+#define CLONE_THREAD  0x10000
+
+#define MAX_STACKS   32
+#define MAX_ENTRIES  102400
+
+struct tstamp_data {
+	__u32 stack_id;
+	__u32 state;
+	__u64 timestamp;
+};
+
+struct offcpu_key {
+	__u32 pid;
+	__u32 tgid;
+	__u32 stack_id;
+	__u32 state;
+	__u64 cgroup_id;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, MAX_STACKS * sizeof(__u64));
+	__uint(max_entries, MAX_ENTRIES);
+} stacks SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct tstamp_data);
+} tstamp SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct offcpu_key));
+	__uint(value_size, sizeof(__u64));
+	__uint(max_entries, MAX_ENTRIES);
+} off_cpu SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u64));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} cgroup_filter SEC(".maps");
+
+/* new kernel task_struct definition */
+struct task_struct___new {
+	long __state;
+} __attribute__((preserve_access_index));
+
+/* old kernel task_struct definition */
+struct task_struct___old {
+	long state;
+} __attribute__((preserve_access_index));
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+int has_cgroup = 0;
+int uses_tgid = 0;
+
+const volatile bool has_prev_state = false;
+const volatile bool needs_cgroup = false;
+const volatile bool uses_cgroup_v1 = false;
+
+int perf_subsys_id = -1;
+
+/*
+ * Old kernel used to call it task_struct->state and now it's '__state'.
+ * Use BPF CO-RE "ignored suffix rule" to deal with it like below:
+ *
+ * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes
+ */
+static inline int get_task_state(struct task_struct *t)
+{
+	/* recast pointer to capture new type for compiler */
+	struct task_struct___new *t_new = (void *)t;
+
+	if (bpf_core_field_exists(t_new->__state)) {
+		return BPF_CORE_READ(t_new, __state);
+	} else {
+		/* recast pointer to capture old type for compiler */
+		struct task_struct___old *t_old = (void *)t;
+
+		return BPF_CORE_READ(t_old, state);
+	}
+}
+
+static inline __u64 get_cgroup_id(struct task_struct *t)
+{
+	struct cgroup *cgrp;
+
+	if (!uses_cgroup_v1)
+		return BPF_CORE_READ(t, cgroups, dfl_cgrp, kn, id);
+
+	if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+		perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+						     perf_event_cgrp_id);
+#else
+		perf_subsys_id = perf_event_cgrp_id;
+#endif
+	}
+
+	cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_subsys_id], cgroup);
+	return BPF_CORE_READ(cgrp, kn, id);
+}
+
+static inline int can_record(struct task_struct *t, int state)
+{
+	/* kernel threads don't have user stack */
+	if (t->flags & PF_KTHREAD)
+		return 0;
+
+	if (state != TASK_INTERRUPTIBLE &&
+	    state != TASK_UNINTERRUPTIBLE)
+		return 0;
+
+	if (has_cpu) {
+		__u32 cpu = bpf_get_smp_processor_id();
+		__u8 *ok;
+
+		ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+		if (!ok)
+			return 0;
+	}
+
+	if (has_task) {
+		__u8 *ok;
+		__u32 pid;
+
+		if (uses_tgid)
+			pid = t->tgid;
+		else
+			pid = t->pid;
+
+		ok = bpf_map_lookup_elem(&task_filter, &pid);
+		if (!ok)
+			return 0;
+	}
+
+	if (has_cgroup) {
+		__u8 *ok;
+		__u64 cgrp_id = get_cgroup_id(t);
+
+		ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id);
+		if (!ok)
+			return 0;
+	}
+
+	return 1;
+}
+
+static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
+			struct task_struct *next, int state)
+{
+	__u64 ts;
+	__u32 stack_id;
+	struct tstamp_data *pelem;
+
+	ts = bpf_ktime_get_ns();
+
+	if (!can_record(prev, state))
+		goto next;
+
+	stack_id = bpf_get_stackid(ctx, &stacks,
+				   BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK);
+
+	pelem = bpf_task_storage_get(&tstamp, prev, NULL,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!pelem)
+		goto next;
+
+	pelem->timestamp = ts;
+	pelem->state = state;
+	pelem->stack_id = stack_id;
+
+next:
+	pelem = bpf_task_storage_get(&tstamp, next, NULL, 0);
+
+	if (pelem && pelem->timestamp) {
+		struct offcpu_key key = {
+			.pid = next->pid,
+			.tgid = next->tgid,
+			.stack_id = pelem->stack_id,
+			.state = pelem->state,
+			.cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0,
+		};
+		__u64 delta = ts - pelem->timestamp;
+		__u64 *total;
+
+		total = bpf_map_lookup_elem(&off_cpu, &key);
+		if (total)
+			*total += delta;
+		else
+			bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
+
+		/* prevent to reuse the timestamp later */
+		pelem->timestamp = 0;
+	}
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int on_newtask(u64 *ctx)
+{
+	struct task_struct *task;
+	u64 clone_flags;
+	u32 pid;
+	u8 val = 1;
+
+	if (!uses_tgid)
+		return 0;
+
+	task = (struct task_struct *)bpf_get_current_task();
+
+	pid = BPF_CORE_READ(task, tgid);
+	if (!bpf_map_lookup_elem(&task_filter, &pid))
+		return 0;
+
+	task = (struct task_struct *)ctx[0];
+	clone_flags = ctx[1];
+
+	pid = task->tgid;
+	if (!(clone_flags & CLONE_THREAD))
+		bpf_map_update_elem(&task_filter, &pid, &val, BPF_NOEXIST);
+
+	return 0;
+}
+
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+	struct task_struct *prev, *next;
+	int prev_state;
+
+	if (!enabled)
+		return 0;
+
+	prev = (struct task_struct *)ctx[1];
+	next = (struct task_struct *)ctx[2];
+
+	if (has_prev_state)
+		prev_state = (int)ctx[3];
+	else
+		prev_state = get_task_state(prev);
+
+	return off_cpu_stat(ctx, prev, next, prev_state);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
new file mode 100644
index 000000000..6d3823848
--- /dev/null
+++ b/tools/perf/util/branch.c
@@ -0,0 +1,214 @@
+#include "util/map_symbol.h"
+#include "util/branch.h"
+#include <linux/kernel.h>
+
+static bool cross_area(u64 addr1, u64 addr2, int size)
+{
+	u64 align1, align2;
+
+	align1 = addr1 & ~(size - 1);
+	align2 = addr2 & ~(size - 1);
+
+	return (align1 != align2) ? true : false;
+}
+
+#define AREA_4K		4096
+#define AREA_2M		(2 * 1024 * 1024)
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+		       u64 from, u64 to)
+{
+	if (flags->type == PERF_BR_UNKNOWN || from == 0)
+		return;
+
+	if (flags->type == PERF_BR_EXTEND_ABI)
+		st->new_counts[flags->new_type]++;
+	else
+		st->counts[flags->type]++;
+
+	if (flags->type == PERF_BR_COND) {
+		if (to > from)
+			st->cond_fwd++;
+		else
+			st->cond_bwd++;
+	}
+
+	if (cross_area(from, to, AREA_2M))
+		st->cross_2m++;
+	else if (cross_area(from, to, AREA_4K))
+		st->cross_4k++;
+}
+
+const char *branch_new_type_name(int new_type)
+{
+	const char *branch_new_names[PERF_BR_NEW_MAX] = {
+		"FAULT_ALGN",
+		"FAULT_DATA",
+		"FAULT_INST",
+/*
+ * TODO: This switch should happen on 'session->header.env.arch'
+ * instead, because an arm64 platform perf recording could be
+ * opened for analysis on other platforms as well.
+ */
+#ifdef __aarch64__
+		"ARM64_FIQ",
+		"ARM64_DEBUG_HALT",
+		"ARM64_DEBUG_EXIT",
+		"ARM64_DEBUG_INST",
+		"ARM64_DEBUG_DATA"
+#else
+		"ARCH_1",
+		"ARCH_2",
+		"ARCH_3",
+		"ARCH_4",
+		"ARCH_5"
+#endif
+	};
+
+	if (new_type >= 0 && new_type < PERF_BR_NEW_MAX)
+		return branch_new_names[new_type];
+
+	return NULL;
+}
+
+const char *branch_type_name(int type)
+{
+	const char *branch_names[PERF_BR_MAX] = {
+		"N/A",
+		"COND",
+		"UNCOND",
+		"IND",
+		"CALL",
+		"IND_CALL",
+		"RET",
+		"SYSCALL",
+		"SYSRET",
+		"COND_CALL",
+		"COND_RET",
+		"ERET",
+		"IRQ",
+		"SERROR",
+		"NO_TX",
+		"", // Needed for PERF_BR_EXTEND_ABI that ends up triggering some compiler warnings about NULL deref
+	};
+
+	if (type >= 0 && type < PERF_BR_MAX)
+		return branch_names[type];
+
+	return NULL;
+}
+
+const char *get_branch_type(struct branch_entry *e)
+{
+	if (e->flags.type == PERF_BR_UNKNOWN)
+		return "";
+
+	if (e->flags.type == PERF_BR_EXTEND_ABI)
+		return branch_new_type_name(e->flags.new_type);
+
+	return branch_type_name(e->flags.type);
+}
+
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
+{
+	u64 total = 0;
+	int i;
+
+	for (i = 0; i < PERF_BR_MAX; i++)
+		total += st->counts[i];
+
+	if (total == 0)
+		return;
+
+	fprintf(fp, "\n#");
+	fprintf(fp, "\n# Branch Statistics:");
+	fprintf(fp, "\n#");
+
+	if (st->cond_fwd > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"COND_FWD",
+			100.0 * (double)st->cond_fwd / (double)total);
+	}
+
+	if (st->cond_bwd > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"COND_BWD",
+			100.0 * (double)st->cond_bwd / (double)total);
+	}
+
+	if (st->cross_4k > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"CROSS_4K",
+			100.0 * (double)st->cross_4k / (double)total);
+	}
+
+	if (st->cross_2m > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"CROSS_2M",
+			100.0 * (double)st->cross_2m / (double)total);
+	}
+
+	for (i = 0; i < PERF_BR_MAX; i++) {
+		if (st->counts[i] > 0)
+			fprintf(fp, "\n%8s: %5.1f%%",
+				branch_type_name(i),
+				100.0 *
+				(double)st->counts[i] / (double)total);
+	}
+
+	for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+		if (st->new_counts[i] > 0)
+			fprintf(fp, "\n%8s: %5.1f%%",
+				branch_new_type_name(i),
+				100.0 *
+				(double)st->new_counts[i] / (double)total);
+	}
+
+}
+
+static int count_str_scnprintf(int idx, const char *str, char *bf, int size)
+{
+	return scnprintf(bf, size, "%s%s", (idx) ? " " : " (", str);
+}
+
+int branch_type_str(struct branch_type_stat *st, char *bf, int size)
+{
+	int i, j = 0, printed = 0;
+	u64 total = 0;
+
+	for (i = 0; i < PERF_BR_MAX; i++)
+		total += st->counts[i];
+
+	for (i = 0; i < PERF_BR_NEW_MAX; i++)
+		total += st->new_counts[i];
+
+	if (total == 0)
+		return 0;
+
+	if (st->cond_fwd > 0)
+		printed += count_str_scnprintf(j++, "COND_FWD", bf + printed, size - printed);
+
+	if (st->cond_bwd > 0)
+		printed += count_str_scnprintf(j++, "COND_BWD", bf + printed, size - printed);
+
+	for (i = 0; i < PERF_BR_MAX; i++) {
+		if (i == PERF_BR_COND)
+			continue;
+
+		if (st->counts[i] > 0)
+			printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed);
+	}
+
+	for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+		if (st->new_counts[i] > 0)
+			printed += count_str_scnprintf(j++, branch_new_type_name(i), bf + printed, size - printed);
+	}
+
+	if (st->cross_4k > 0)
+		printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed);
+
+	if (st->cross_2m > 0)
+		printed += count_str_scnprintf(j++, "CROSS_2M", bf + printed, size - printed);
+
+	return printed;
+}
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
new file mode 100644
index 000000000..dca75cad9
--- /dev/null
+++ b/tools/perf/util/branch.h
@@ -0,0 +1,94 @@
+#ifndef _PERF_BRANCH_H
+#define _PERF_BRANCH_H 1
+/*
+ * The linux/stddef.h isn't need here, but is needed for __always_inline used
+ * in files included from uapi/linux/perf_event.h such as
+ * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
+ * detected in at least musl libc, used in Alpine Linux. -acme
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/compiler.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include "event.h"
+
+struct branch_flags {
+	union {
+		u64 value;
+		struct {
+			u64 mispred:1;
+			u64 predicted:1;
+			u64 in_tx:1;
+			u64 abort:1;
+			u64 cycles:16;
+			u64 type:4;
+			u64 spec:2;
+			u64 new_type:4;
+			u64 priv:3;
+			u64 reserved:31;
+		};
+	};
+};
+
+struct branch_info {
+	struct addr_map_symbol from;
+	struct addr_map_symbol to;
+	struct branch_flags    flags;
+	char		       *srcline_from;
+	char		       *srcline_to;
+};
+
+struct branch_entry {
+	u64			from;
+	u64			to;
+	struct branch_flags	flags;
+};
+
+struct branch_stack {
+	u64			nr;
+	u64			hw_idx;
+	struct branch_entry	entries[];
+};
+
+/*
+ * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
+ * Otherwise, the output format of a sample with branch stack is
+ * struct branch_stack {
+ *	u64			nr;
+ *	struct branch_entry	entries[0];
+ * }
+ * Check whether the hw_idx is available,
+ * and return the corresponding pointer of entries[0].
+ */
+static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
+{
+	u64 *entry = (u64 *)sample->branch_stack;
+
+	entry++;
+	if (sample->no_hw_idx)
+		return (struct branch_entry *)entry;
+	return (struct branch_entry *)(++entry);
+}
+
+struct branch_type_stat {
+	bool	branch_to;
+	u64	counts[PERF_BR_MAX];
+	u64	new_counts[PERF_BR_NEW_MAX];
+	u64	cond_fwd;
+	u64	cond_bwd;
+	u64	cross_4k;
+	u64	cross_2m;
+};
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+		       u64 from, u64 to);
+
+const char *branch_type_name(int type);
+const char *branch_new_type_name(int new_type);
+const char *get_branch_type(struct branch_entry *e);
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
+int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
+
+#endif /* _PERF_BRANCH_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
new file mode 100644
index 000000000..a839b30c9
--- /dev/null
+++ b/tools/perf/util/build-id.c
@@ -0,0 +1,1018 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * build-id.c
+ *
+ * build-id support
+ *
+ * Copyright (C) 2009, 2010 Red Hat Inc.
+ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "util.h" // lsdir(), mkdir_p(), rm_rf()
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include "util/copyfile.h"
+#include "dso.h"
+#include "build-id.h"
+#include "event.h"
+#include "namespaces.h"
+#include "map.h"
+#include "symbol.h"
+#include "thread.h"
+#include <linux/kernel.h>
+#include "debug.h"
+#include "session.h"
+#include "tool.h"
+#include "header.h"
+#include "vdso.h"
+#include "path.h"
+#include "probe-file.h"
+#include "strlist.h"
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+#include <linux/string.h>
+#include <asm/bug.h>
+
+static bool no_buildid_cache;
+
+int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
+			   union perf_event *event,
+			   struct perf_sample *sample,
+			   struct evsel *evsel __maybe_unused,
+			   struct machine *machine)
+{
+	struct addr_location al;
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->tid);
+
+	if (thread == NULL) {
+		pr_err("problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
+		al.map->dso->hit = 1;
+
+	thread__put(thread);
+	return 0;
+}
+
+static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event,
+				       struct perf_sample *sample
+				       __maybe_unused,
+				       struct machine *machine)
+{
+	struct thread *thread = machine__findnew_thread(machine,
+							event->fork.pid,
+							event->fork.tid);
+
+	dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+		    event->fork.ppid, event->fork.ptid);
+
+	if (thread) {
+		machine__remove_thread(machine, thread);
+		thread__put(thread);
+	}
+
+	return 0;
+}
+
+struct perf_tool build_id__mark_dso_hit_ops = {
+	.sample	= build_id__mark_dso_hit,
+	.mmap	= perf_event__process_mmap,
+	.mmap2	= perf_event__process_mmap2,
+	.fork	= perf_event__process_fork,
+	.exit	= perf_event__exit_del_thread,
+	.attr		 = perf_event__process_attr,
+	.build_id	 = perf_event__process_build_id,
+	.ordered_events	 = true,
+};
+
+int build_id__sprintf(const struct build_id *build_id, char *bf)
+{
+	char *bid = bf;
+	const u8 *raw = build_id->data;
+	size_t i;
+
+	bf[0] = 0x0;
+
+	for (i = 0; i < build_id->size; ++i) {
+		sprintf(bid, "%02x", *raw);
+		++raw;
+		bid += 2;
+	}
+
+	return (bid - bf) + 1;
+}
+
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
+{
+	char notes[PATH_MAX];
+	struct build_id bid;
+	int ret;
+
+	if (!root_dir)
+		root_dir = "";
+
+	scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+	ret = sysfs__read_build_id(notes, &bid);
+	if (ret < 0)
+		return ret;
+
+	return build_id__sprintf(&bid, sbuild_id);
+}
+
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
+{
+	struct build_id bid;
+	int ret;
+
+	ret = filename__read_build_id(pathname, &bid);
+	if (ret < 0)
+		return ret;
+
+	return build_id__sprintf(&bid, sbuild_id);
+}
+
+/* asnprintf consolidates asprintf and snprintf */
+static int asnprintf(char **strp, size_t size, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	if (!strp)
+		return -EINVAL;
+
+	va_start(ap, fmt);
+	if (*strp)
+		ret = vsnprintf(*strp, size, fmt, ap);
+	else
+		ret = vasprintf(strp, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+				    size_t size)
+{
+	bool retry_old = true;
+
+	snprintf(bf, size, "%s/%s/%s/kallsyms",
+		 buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+	if (!access(bf, F_OK))
+		return bf;
+	if (retry_old) {
+		/* Try old style kallsyms cache */
+		snprintf(bf, size, "%s/%s/%s",
+			 buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+		retry_old = false;
+		goto retry;
+	}
+
+	return NULL;
+}
+
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size)
+{
+	char *tmp = bf;
+	int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
+			    sbuild_id, sbuild_id + 2);
+	if (ret < 0 || (tmp && size < (unsigned int)ret))
+		return NULL;
+	return bf;
+}
+
+/* The caller is responsible to free the returned buffer. */
+char *build_id_cache__origname(const char *sbuild_id)
+{
+	char *linkname;
+	char buf[PATH_MAX];
+	char *ret = NULL, *p;
+	size_t offs = 5;	/* == strlen("../..") */
+	ssize_t len;
+
+	linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+	if (!linkname)
+		return NULL;
+
+	len = readlink(linkname, buf, sizeof(buf) - 1);
+	if (len <= 0)
+		goto out;
+	buf[len] = '\0';
+
+	/* The link should be "../..<origpath>/<sbuild_id>" */
+	p = strrchr(buf, '/');	/* Cut off the "/<sbuild_id>" */
+	if (p && (p > buf + offs)) {
+		*p = '\0';
+		if (buf[offs + 1] == '[')
+			offs++;	/*
+				 * This is a DSO name, like [kernel.kallsyms].
+				 * Skip the first '/', since this is not the
+				 * cache of a regular file.
+				 */
+		ret = strdup(buf + offs);	/* Skip "../..[/]" */
+	}
+out:
+	free(linkname);
+	return ret;
+}
+
+/* Check if the given build_id cache is valid on current running system */
+static bool build_id_cache__valid_id(char *sbuild_id)
+{
+	char real_sbuild_id[SBUILD_ID_SIZE] = "";
+	char *pathname;
+	int ret = 0;
+	bool result = false;
+
+	pathname = build_id_cache__origname(sbuild_id);
+	if (!pathname)
+		return false;
+
+	if (!strcmp(pathname, DSO__NAME_KALLSYMS))
+		ret = sysfs__sprintf_build_id("/", real_sbuild_id);
+	else if (pathname[0] == '/')
+		ret = filename__sprintf_build_id(pathname, real_sbuild_id);
+	else
+		ret = -EINVAL;	/* Should we support other special DSO cache? */
+	if (ret >= 0)
+		result = (strcmp(sbuild_id, real_sbuild_id) == 0);
+	free(pathname);
+
+	return result;
+}
+
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso,
+					    bool is_debug)
+{
+	return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : (is_debug ?
+	    "debug" : "elf"));
+}
+
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			       bool is_debug, bool is_kallsyms)
+{
+	bool is_vdso = dso__is_vdso((struct dso *)dso);
+	char sbuild_id[SBUILD_ID_SIZE];
+	char *linkname;
+	bool alloc = (bf == NULL);
+	int ret;
+
+	if (!dso->has_build_id)
+		return NULL;
+
+	build_id__sprintf(&dso->bid, sbuild_id);
+	linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+	if (!linkname)
+		return NULL;
+
+	/* Check if old style build_id cache */
+	if (is_regular_file(linkname))
+		ret = asnprintf(&bf, size, "%s", linkname);
+	else
+		ret = asnprintf(&bf, size, "%s/%s", linkname,
+			 build_id_cache__basename(is_kallsyms, is_vdso,
+						  is_debug));
+	if (ret < 0 || (!alloc && size < (unsigned int)ret))
+		bf = NULL;
+	free(linkname);
+
+	return bf;
+}
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			     bool is_debug)
+{
+	bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+
+	return __dso__build_id_filename(dso, bf, size, is_debug, is_kallsyms);
+}
+
+static int write_buildid(const char *name, size_t name_len, struct build_id *bid,
+			 pid_t pid, u16 misc, struct feat_fd *fd)
+{
+	int err;
+	struct perf_record_header_build_id b;
+	size_t len;
+
+	len = name_len + 1;
+	len = PERF_ALIGN(len, NAME_ALIGN);
+
+	memset(&b, 0, sizeof(b));
+	memcpy(&b.data, bid->data, bid->size);
+	b.size = (u8) bid->size;
+	misc |= PERF_RECORD_MISC_BUILD_ID_SIZE;
+	b.pid = pid;
+	b.header.misc = misc;
+	b.header.size = sizeof(b) + len;
+
+	err = do_write(fd, &b, sizeof(b));
+	if (err < 0)
+		return err;
+
+	return write_padded(fd, name, name_len + 1, len);
+}
+
+static int machine__write_buildid_table(struct machine *machine,
+					struct feat_fd *fd)
+{
+	int err = 0;
+	struct dso *pos;
+	u16 kmisc = PERF_RECORD_MISC_KERNEL,
+	    umisc = PERF_RECORD_MISC_USER;
+
+	if (!machine__is_host(machine)) {
+		kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+		umisc = PERF_RECORD_MISC_GUEST_USER;
+	}
+
+	dsos__for_each_with_build_id(pos, &machine->dsos.head) {
+		const char *name;
+		size_t name_len;
+		bool in_kernel = false;
+
+		if (!pos->hit && !dso__is_vdso(pos))
+			continue;
+
+		if (dso__is_vdso(pos)) {
+			name = pos->short_name;
+			name_len = pos->short_name_len;
+		} else if (dso__is_kcore(pos)) {
+			name = machine->mmap_name;
+			name_len = strlen(name);
+		} else {
+			name = pos->long_name;
+			name_len = pos->long_name_len;
+		}
+
+		in_kernel = pos->kernel ||
+				is_kernel_module(name,
+					PERF_RECORD_MISC_CPUMODE_UNKNOWN);
+		err = write_buildid(name, name_len, &pos->bid, machine->pid,
+				    in_kernel ? kmisc : umisc, fd);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int perf_session__write_buildid_table(struct perf_session *session,
+				      struct feat_fd *fd)
+{
+	struct rb_node *nd;
+	int err = machine__write_buildid_table(&session->machines.host, fd);
+
+	if (err)
+		return err;
+
+	for (nd = rb_first_cached(&session->machines.guests); nd;
+	     nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		err = machine__write_buildid_table(pos, fd);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+static int __dsos__hit_all(struct list_head *head)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, head, node)
+		pos->hit = true;
+
+	return 0;
+}
+
+static int machine__hit_all_dsos(struct machine *machine)
+{
+	return __dsos__hit_all(&machine->dsos.head);
+}
+
+int dsos__hit_all(struct perf_session *session)
+{
+	struct rb_node *nd;
+	int err;
+
+	err = machine__hit_all_dsos(&session->machines.host);
+	if (err)
+		return err;
+
+	for (nd = rb_first_cached(&session->machines.guests); nd;
+	     nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+
+		err = machine__hit_all_dsos(pos);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void disable_buildid_cache(void)
+{
+	no_buildid_cache = true;
+}
+
+static bool lsdir_bid_head_filter(const char *name __maybe_unused,
+				  struct dirent *d)
+{
+	return (strlen(d->d_name) == 2) &&
+		isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]);
+}
+
+static bool lsdir_bid_tail_filter(const char *name __maybe_unused,
+				  struct dirent *d)
+{
+	int i = 0;
+	while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3)
+		i++;
+	return (i >= SBUILD_ID_MIN_SIZE - 3) && (i <= SBUILD_ID_SIZE - 3) &&
+		(d->d_name[i] == '\0');
+}
+
+struct strlist *build_id_cache__list_all(bool validonly)
+{
+	struct strlist *toplist, *linklist = NULL, *bidlist;
+	struct str_node *nd, *nd2;
+	char *topdir, *linkdir = NULL;
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	/* for filename__ functions */
+	if (validonly)
+		symbol__init(NULL);
+
+	/* Open the top-level directory */
+	if (asprintf(&topdir, "%s/.build-id/", buildid_dir) < 0)
+		return NULL;
+
+	bidlist = strlist__new(NULL, NULL);
+	if (!bidlist)
+		goto out;
+
+	toplist = lsdir(topdir, lsdir_bid_head_filter);
+	if (!toplist) {
+		pr_debug("Error in lsdir(%s): %d\n", topdir, errno);
+		/* If there is no buildid cache, return an empty list */
+		if (errno == ENOENT)
+			goto out;
+		goto err_out;
+	}
+
+	strlist__for_each_entry(nd, toplist) {
+		if (asprintf(&linkdir, "%s/%s", topdir, nd->s) < 0)
+			goto err_out;
+		/* Open the lower-level directory */
+		linklist = lsdir(linkdir, lsdir_bid_tail_filter);
+		if (!linklist) {
+			pr_debug("Error in lsdir(%s): %d\n", linkdir, errno);
+			goto err_out;
+		}
+		strlist__for_each_entry(nd2, linklist) {
+			if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s",
+				     nd->s, nd2->s) > SBUILD_ID_SIZE - 1)
+				goto err_out;
+			if (validonly && !build_id_cache__valid_id(sbuild_id))
+				continue;
+			if (strlist__add(bidlist, sbuild_id) < 0)
+				goto err_out;
+		}
+		strlist__delete(linklist);
+		zfree(&linkdir);
+	}
+
+out_free:
+	strlist__delete(toplist);
+out:
+	free(topdir);
+
+	return bidlist;
+
+err_out:
+	strlist__delete(linklist);
+	zfree(&linkdir);
+	strlist__delete(bidlist);
+	bidlist = NULL;
+	goto out_free;
+}
+
+static bool str_is_build_id(const char *maybe_sbuild_id, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (!isxdigit(maybe_sbuild_id[i]))
+			return false;
+	}
+	return true;
+}
+
+/* Return the valid complete build-id */
+char *build_id_cache__complement(const char *incomplete_sbuild_id)
+{
+	struct strlist *bidlist;
+	struct str_node *nd, *cand = NULL;
+	char *sbuild_id = NULL;
+	size_t len = strlen(incomplete_sbuild_id);
+
+	if (len >= SBUILD_ID_SIZE ||
+	    !str_is_build_id(incomplete_sbuild_id, len))
+		return NULL;
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist)
+		return NULL;
+
+	strlist__for_each_entry(nd, bidlist) {
+		if (strncmp(nd->s, incomplete_sbuild_id, len) != 0)
+			continue;
+		if (cand) {	/* Error: There are more than 2 candidates. */
+			cand = NULL;
+			break;
+		}
+		cand = nd;
+	}
+	if (cand)
+		sbuild_id = strdup(cand->s);
+	strlist__delete(bidlist);
+
+	return sbuild_id;
+}
+
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+			       struct nsinfo *nsi, bool is_kallsyms,
+			       bool is_vdso)
+{
+	char *realname = (char *)name, *filename;
+	bool slash = is_kallsyms || is_vdso;
+
+	if (!slash)
+		realname = nsinfo__realpath(name, nsi);
+
+	if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+		     is_vdso ? DSO__NAME_VDSO : (realname ? realname : name),
+		     sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
+		filename = NULL;
+
+	if (!slash)
+		free(realname);
+
+	return filename;
+}
+
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+				   struct strlist **result)
+{
+	char *dir_name;
+	int ret = 0;
+
+	dir_name = build_id_cache__cachedir(NULL, pathname, nsi, false, false);
+	if (!dir_name)
+		return -ENOMEM;
+
+	*result = lsdir(dir_name, lsdir_no_dot_filter);
+	if (!*result)
+		ret = -errno;
+	free(dir_name);
+
+	return ret;
+}
+
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_GELF_GETNOTE_SUPPORT)
+static int build_id_cache__add_sdt_cache(const char *sbuild_id,
+					  const char *realname,
+					  struct nsinfo *nsi)
+{
+	struct probe_cache *cache;
+	int ret;
+	struct nscookie nsc;
+
+	cache = probe_cache__new(sbuild_id, nsi);
+	if (!cache)
+		return -1;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = probe_cache__scan_sdt(cache, realname);
+	nsinfo__mountns_exit(&nsc);
+	if (ret >= 0) {
+		pr_debug4("Found %d SDTs in %s\n", ret, realname);
+		if (probe_cache__commit(cache) < 0)
+			ret = -1;
+	}
+	probe_cache__delete(cache);
+	return ret;
+}
+#else
+#define build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) (0)
+#endif
+
+static char *build_id_cache__find_debug(const char *sbuild_id,
+					struct nsinfo *nsi,
+					const char *root_dir)
+{
+	const char *dirname = "/usr/lib/debug/.build-id/";
+	char *realname = NULL;
+	char dirbuf[PATH_MAX];
+	char *debugfile;
+	struct nscookie nsc;
+	size_t len = 0;
+
+	debugfile = calloc(1, PATH_MAX);
+	if (!debugfile)
+		goto out;
+
+	if (root_dir) {
+		path__join(dirbuf, PATH_MAX, root_dir, dirname);
+		dirname = dirbuf;
+	}
+
+	len = __symbol__join_symfs(debugfile, PATH_MAX, dirname);
+	snprintf(debugfile + len, PATH_MAX - len, "%.2s/%s.debug", sbuild_id,
+		 sbuild_id + 2);
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	realname = realpath(debugfile, NULL);
+	if (realname && access(realname, R_OK))
+		zfree(&realname);
+	nsinfo__mountns_exit(&nsc);
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+	if (realname == NULL) {
+		debuginfod_client* c;
+
+		pr_debug("Downloading debug info with build id %s\n", sbuild_id);
+
+		c = debuginfod_begin();
+		if (c != NULL) {
+			int fd = debuginfod_find_debuginfo(c,
+					(const unsigned char*)sbuild_id, 0,
+					&realname);
+			if (fd >= 0)
+				close(fd); /* retaining reference by realname */
+			debuginfod_end(c);
+		}
+	}
+#endif
+
+out:
+	free(debugfile);
+	return realname;
+}
+
+int
+build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+		    struct nsinfo *nsi, bool is_kallsyms, bool is_vdso,
+		    const char *proper_name, const char *root_dir)
+{
+	const size_t size = PATH_MAX;
+	char *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *tmp;
+	char *debugfile = NULL;
+	int err = -1;
+
+	if (!proper_name)
+		proper_name = name;
+
+	dir_name = build_id_cache__cachedir(sbuild_id, proper_name, nsi, is_kallsyms,
+					    is_vdso);
+	if (!dir_name)
+		goto out_free;
+
+	/* Remove old style build-id cache */
+	if (is_regular_file(dir_name))
+		if (unlink(dir_name))
+			goto out_free;
+
+	if (mkdir_p(dir_name, 0755))
+		goto out_free;
+
+	/* Save the allocated buildid dirname */
+	if (asprintf(&filename, "%s/%s", dir_name,
+		     build_id_cache__basename(is_kallsyms, is_vdso,
+		     false)) < 0) {
+		filename = NULL;
+		goto out_free;
+	}
+
+	if (access(filename, F_OK)) {
+		if (is_kallsyms) {
+			if (copyfile("/proc/kallsyms", filename))
+				goto out_free;
+		} else if (nsi && nsinfo__need_setns(nsi)) {
+			if (copyfile_ns(name, filename, nsi))
+				goto out_free;
+		} else if (link(realname, filename) && errno != EEXIST &&
+				copyfile(name, filename))
+			goto out_free;
+	}
+
+	/* Some binaries are stripped, but have .debug files with their symbol
+	 * table.  Check to see if we can locate one of those, since the elf
+	 * file itself may not be very useful to users of our tools without a
+	 * symtab.
+	 */
+	if (!is_kallsyms && !is_vdso &&
+	    strncmp(".ko", name + strlen(name) - 3, 3)) {
+		debugfile = build_id_cache__find_debug(sbuild_id, nsi, root_dir);
+		if (debugfile) {
+			zfree(&filename);
+			if (asprintf(&filename, "%s/%s", dir_name,
+			    build_id_cache__basename(false, false, true)) < 0) {
+				filename = NULL;
+				goto out_free;
+			}
+			if (access(filename, F_OK)) {
+				if (nsi && nsinfo__need_setns(nsi)) {
+					if (copyfile_ns(debugfile, filename,
+							nsi))
+						goto out_free;
+				} else if (link(debugfile, filename) &&
+						errno != EEXIST &&
+						copyfile(debugfile, filename))
+					goto out_free;
+			}
+		}
+	}
+
+	if (!build_id_cache__linkname(sbuild_id, linkname, size))
+		goto out_free;
+	tmp = strrchr(linkname, '/');
+	*tmp = '\0';
+
+	if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+		goto out_free;
+
+	*tmp = '/';
+	tmp = dir_name + strlen(buildid_dir) - 5;
+	memcpy(tmp, "../..", 5);
+
+	if (symlink(tmp, linkname) == 0) {
+		err = 0;
+	} else if (errno == EEXIST) {
+		char path[PATH_MAX];
+		ssize_t len;
+
+		len = readlink(linkname, path, sizeof(path) - 1);
+		if (len <= 0) {
+			pr_err("Can't read link: %s\n", linkname);
+			goto out_free;
+		}
+		path[len] = '\0';
+
+		if (strcmp(tmp, path)) {
+			pr_debug("build <%s> already linked to %s\n",
+				 sbuild_id, linkname);
+		}
+		err = 0;
+	}
+
+	/* Update SDT cache : error is just warned */
+	if (realname &&
+	    build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) < 0)
+		pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
+
+out_free:
+	free(filename);
+	free(debugfile);
+	free(dir_name);
+	free(linkname);
+	return err;
+}
+
+int __build_id_cache__add_s(const char *sbuild_id, const char *name,
+			    struct nsinfo *nsi, bool is_kallsyms, bool is_vdso,
+			    const char *proper_name, const char *root_dir)
+{
+	char *realname = NULL;
+	int err = -1;
+
+	if (!is_kallsyms) {
+		if (!is_vdso)
+			realname = nsinfo__realpath(name, nsi);
+		else
+			realname = realpath(name, NULL);
+		if (!realname)
+			goto out_free;
+	}
+
+	err = build_id_cache__add(sbuild_id, name, realname, nsi,
+				  is_kallsyms, is_vdso, proper_name, root_dir);
+out_free:
+	if (!is_kallsyms)
+		free(realname);
+	return err;
+}
+
+static int build_id_cache__add_b(const struct build_id *bid,
+				 const char *name, struct nsinfo *nsi,
+				 bool is_kallsyms, bool is_vdso,
+				 const char *proper_name,
+				 const char *root_dir)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	build_id__sprintf(bid, sbuild_id);
+
+	return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms,
+				       is_vdso, proper_name, root_dir);
+}
+
+bool build_id_cache__cached(const char *sbuild_id)
+{
+	bool ret = false;
+	char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
+
+	if (filename && !access(filename, F_OK))
+		ret = true;
+	free(filename);
+
+	return ret;
+}
+
+int build_id_cache__remove_s(const char *sbuild_id)
+{
+	const size_t size = PATH_MAX;
+	char *filename = zalloc(size),
+	     *linkname = zalloc(size), *tmp;
+	int err = -1;
+
+	if (filename == NULL || linkname == NULL)
+		goto out_free;
+
+	if (!build_id_cache__linkname(sbuild_id, linkname, size))
+		goto out_free;
+
+	if (access(linkname, F_OK))
+		goto out_free;
+
+	if (readlink(linkname, filename, size - 1) < 0)
+		goto out_free;
+
+	if (unlink(linkname))
+		goto out_free;
+
+	/*
+	 * Since the link is relative, we must make it absolute:
+	 */
+	tmp = strrchr(linkname, '/') + 1;
+	snprintf(tmp, size - (tmp - linkname), "%s", filename);
+
+	if (rm_rf(linkname))
+		goto out_free;
+
+	err = 0;
+out_free:
+	free(filename);
+	free(linkname);
+	return err;
+}
+
+static int filename__read_build_id_ns(const char *filename,
+				      struct build_id *bid,
+				      struct nsinfo *nsi)
+{
+	struct nscookie nsc;
+	int ret;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = filename__read_build_id(filename, bid);
+	nsinfo__mountns_exit(&nsc);
+
+	return ret;
+}
+
+static bool dso__build_id_mismatch(struct dso *dso, const char *name)
+{
+	struct build_id bid;
+	bool ret = false;
+
+	mutex_lock(&dso->lock);
+	if (filename__read_build_id_ns(name, &bid, dso->nsinfo) >= 0)
+		ret = !dso__build_id_equal(dso, &bid);
+
+	mutex_unlock(&dso->lock);
+
+	return ret;
+}
+
+static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+			       void *priv __maybe_unused)
+{
+	bool is_kallsyms = dso__is_kallsyms(dso);
+	bool is_vdso = dso__is_vdso(dso);
+	const char *name = dso->long_name;
+	const char *proper_name = NULL;
+	const char *root_dir = NULL;
+	char *allocated_name = NULL;
+	int ret = 0;
+
+	if (!dso->has_build_id)
+		return 0;
+
+	if (dso__is_kcore(dso)) {
+		is_kallsyms = true;
+		name = machine->mmap_name;
+	}
+
+	if (!machine__is_host(machine)) {
+		if (*machine->root_dir) {
+			root_dir = machine->root_dir;
+			ret = asprintf(&allocated_name, "%s/%s", root_dir, name);
+			if (ret < 0)
+				return ret;
+			proper_name = name;
+			name = allocated_name;
+		} else if (is_kallsyms) {
+			/* Cannot get guest kallsyms */
+			return 0;
+		}
+	}
+
+	if (!is_kallsyms && dso__build_id_mismatch(dso, name))
+		goto out_free;
+
+	mutex_lock(&dso->lock);
+	ret = build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
+				    is_kallsyms, is_vdso, proper_name, root_dir);
+	mutex_unlock(&dso->lock);
+out_free:
+	free(allocated_name);
+	return ret;
+}
+
+static int
+machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv)
+{
+	int ret = machine__for_each_dso(&machines->host, fn, priv);
+	struct rb_node *nd;
+
+	for (nd = rb_first_cached(&machines->guests); nd;
+	     nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+
+		ret |= machine__for_each_dso(pos, fn, priv);
+	}
+	return ret ? -1 : 0;
+}
+
+int __perf_session__cache_build_ids(struct perf_session *session,
+				    machine__dso_t fn, void *priv)
+{
+	if (no_buildid_cache)
+		return 0;
+
+	if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
+		return -1;
+
+	return machines__for_each_dso(&session->machines, fn, priv) ?  -1 : 0;
+}
+
+int perf_session__cache_build_ids(struct perf_session *session)
+{
+	return __perf_session__cache_build_ids(session, dso__cache_build_id, NULL);
+}
+
+static bool machine__read_build_ids(struct machine *machine, bool with_hits)
+{
+	return __dsos__read_build_ids(&machine->dsos.head, with_hits);
+}
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
+{
+	struct rb_node *nd;
+	bool ret = machine__read_build_ids(&session->machines.host, with_hits);
+
+	for (nd = rb_first_cached(&session->machines.guests); nd;
+	     nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret |= machine__read_build_ids(pos, with_hits);
+	}
+
+	return ret;
+}
+
+void build_id__init(struct build_id *bid, const u8 *data, size_t size)
+{
+	WARN_ON(size > BUILD_ID_SIZE);
+	memcpy(bid->data, data, size);
+	bid->size = size;
+}
+
+bool build_id__is_defined(const struct build_id *bid)
+{
+	return bid && bid->size ? !!memchr_inv(bid->data, 0, bid->size) : false;
+}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
new file mode 100644
index 000000000..4e3a11693
--- /dev/null
+++ b/tools/perf/util/build-id.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_BUILD_ID_H_
+#define PERF_BUILD_ID_H_ 1
+
+#define BUILD_ID_SIZE	20 /* SHA-1 length in bytes */
+#define BUILD_ID_MIN_SIZE	16 /* MD5/UUID/GUID length in bytes */
+#define SBUILD_ID_SIZE	(BUILD_ID_SIZE * 2 + 1)
+#define SBUILD_ID_MIN_SIZE	(BUILD_ID_MIN_SIZE * 2 + 1)
+
+#include "machine.h"
+#include "tool.h"
+#include <linux/types.h>
+
+struct build_id {
+	u8	data[BUILD_ID_SIZE];
+	size_t	size;
+};
+
+struct nsinfo;
+
+extern struct perf_tool build_id__mark_dso_hit_ops;
+struct dso;
+struct feat_fd;
+
+void build_id__init(struct build_id *bid, const u8 *data, size_t size);
+int build_id__sprintf(const struct build_id *build_id, char *bf);
+bool build_id__is_defined(const struct build_id *bid);
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+				    size_t size);
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			     bool is_debug);
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			       bool is_debug, bool is_kallsyms);
+
+int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
+			   struct perf_sample *sample, struct evsel *evsel,
+			   struct machine *machine);
+
+int dsos__hit_all(struct perf_session *session);
+
+int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+			       struct perf_sample *sample, struct evsel *evsel,
+			       struct machine *machine);
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
+int perf_session__write_buildid_table(struct perf_session *session,
+				      struct feat_fd *fd);
+int perf_session__cache_build_ids(struct perf_session *session);
+int __perf_session__cache_build_ids(struct perf_session *session,
+				    machine__dso_t fn, void *priv);
+
+char *build_id_cache__origname(const char *sbuild_id);
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size);
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+			       struct nsinfo *nsi, bool is_kallsyms,
+			       bool is_vdso);
+
+struct strlist;
+
+struct strlist *build_id_cache__list_all(bool validonly);
+char *build_id_cache__complement(const char *incomplete_sbuild_id);
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+				   struct strlist **result);
+bool build_id_cache__cached(const char *sbuild_id);
+int build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+			struct nsinfo *nsi, bool is_kallsyms, bool is_vdso,
+			const char *proper_name, const char *root_dir);
+int __build_id_cache__add_s(const char *sbuild_id,
+			    const char *name, struct nsinfo *nsi,
+			    bool is_kallsyms, bool is_vdso,
+			    const char *proper_name, const char *root_dir);
+static inline int build_id_cache__add_s(const char *sbuild_id,
+					const char *name, struct nsinfo *nsi,
+					bool is_kallsyms, bool is_vdso)
+{
+	return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, is_vdso, NULL, NULL);
+}
+int build_id_cache__remove_s(const char *sbuild_id);
+
+extern char buildid_dir[];
+
+void set_buildid_dir(const char *dir);
+void disable_buildid_cache(void);
+
+#endif
diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build
new file mode 100644
index 000000000..613ecfd76
--- /dev/null
+++ b/tools/perf/util/c++/Build
@@ -0,0 +1,2 @@
+perf-$(CONFIG_CLANGLLVM) += clang.o
+perf-$(CONFIG_CLANGLLVM) += clang-test.o
diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h
new file mode 100644
index 000000000..d3731a876
--- /dev/null
+++ b/tools/perf/util/c++/clang-c.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_CLANG_C_H
+#define PERF_UTIL_CLANG_C_H
+
+#include <stddef.h>	/* for size_t */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_LIBCLANGLLVM_SUPPORT
+extern void perf_clang__init(void);
+extern void perf_clang__cleanup(void);
+
+struct test_suite;
+extern int test__clang_to_IR(struct test_suite *test, int subtest);
+extern int test__clang_to_obj(struct test_suite *test, int subtest);
+
+extern int perf_clang__compile_bpf(const char *filename,
+				   void **p_obj_buf,
+				   size_t *p_obj_buf_sz);
+#else
+
+#include <errno.h>
+#include <linux/compiler.h>	/* for __maybe_unused */
+
+static inline void perf_clang__init(void) { }
+static inline void perf_clang__cleanup(void) { }
+
+static inline int
+perf_clang__compile_bpf(const char *filename __maybe_unused,
+			void **p_obj_buf __maybe_unused,
+			size_t *p_obj_buf_sz __maybe_unused)
+{
+	return -ENOTSUP;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp
new file mode 100644
index 000000000..a4683ca53
--- /dev/null
+++ b/tools/perf/util/c++/clang-test.cpp
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "clang.h"
+#include "clang-c.h"
+extern "C" {
+#include "../util.h"
+}
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+#include <tests/llvm.h>
+#include <string>
+
+class perf_clang_scope {
+public:
+	explicit perf_clang_scope() {perf_clang__init();}
+	~perf_clang_scope() {perf_clang__cleanup();}
+};
+
+static std::unique_ptr<llvm::Module>
+__test__clang_to_IR(void)
+{
+	unsigned int kernel_version;
+
+	if (fetch_kernel_version(&kernel_version, NULL, 0))
+		return std::unique_ptr<llvm::Module>(nullptr);
+
+	std::string cflag_kver("-DLINUX_VERSION_CODE=" +
+				std::to_string(kernel_version));
+
+	std::unique_ptr<llvm::Module> M =
+		perf::getModuleFromSource({cflag_kver.c_str()},
+					  "perf-test.c",
+					  test_llvm__bpf_base_prog);
+	return M;
+}
+
+extern "C" {
+int test__clang_to_IR(struct test_suite *test __maybe_unused,
+                      int subtest __maybe_unused)
+{
+	perf_clang_scope _scope;
+
+	auto M = __test__clang_to_IR();
+	if (!M)
+		return -1;
+	for (llvm::Function& F : *M)
+		if (F.getName() == "bpf_func__SyS_epoll_pwait")
+			return 0;
+	return -1;
+}
+
+int test__clang_to_obj(struct test_suite *test __maybe_unused,
+                       int subtest __maybe_unused)
+{
+	perf_clang_scope _scope;
+
+	auto M = __test__clang_to_IR();
+	if (!M)
+		return -1;
+
+	auto Buffer = perf::getBPFObjectFromModule(&*M);
+	if (!Buffer)
+		return -1;
+	return 0;
+}
+
+}
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
new file mode 100644
index 000000000..1aad7d6d3
--- /dev/null
+++ b/tools/perf/util/c++/clang.cpp
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * llvm C frontend for perf. Support dynamically compile C file
+ *
+ * Inspired by clang example code:
+ * http://llvm.org/svn/llvm-project/cfe/trunk/examples/clang-interpreter/main.cpp
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include "clang/Basic/Version.h"
+#include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#if CLANG_VERSION_MAJOR >= 14
+#include "llvm/MC/TargetRegistry.h"
+#else
+#include "llvm/Support/TargetRegistry.h"
+#endif
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <memory>
+
+#include "clang.h"
+#include "clang-c.h"
+
+namespace perf {
+
+static std::unique_ptr<llvm::LLVMContext> LLVMCtx;
+
+using namespace clang;
+
+static CompilerInvocation *
+createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path,
+			 DiagnosticsEngine& Diags)
+{
+	llvm::opt::ArgStringList CCArgs {
+		"-cc1",
+		"-triple", "bpf-pc-linux",
+		"-fsyntax-only",
+		"-O2",
+		"-nostdsysteminc",
+		"-nobuiltininc",
+		"-vectorize-loops",
+		"-vectorize-slp",
+		"-Wno-unused-value",
+		"-Wno-pointer-sign",
+		"-x", "c"};
+
+	CCArgs.append(CFlags.begin(), CFlags.end());
+	CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs
+#if CLANG_VERSION_MAJOR >= 11
+                                                        ,/*BinaryName=*/nullptr
+#endif
+                                                        );
+
+	FrontendOptions& Opts = CI->getFrontendOpts();
+	Opts.Inputs.clear();
+	Opts.Inputs.emplace_back(Path,
+			FrontendOptions::getInputKindForExtension("c"));
+	return CI;
+}
+
+static std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags,
+		    StringRef Path, IntrusiveRefCntPtr<vfs::FileSystem> VFS)
+{
+	CompilerInstance Clang;
+	Clang.createDiagnostics();
+
+#if CLANG_VERSION_MAJOR < 9
+	Clang.setVirtualFileSystem(&*VFS);
+#else
+	Clang.createFileManager(&*VFS);
+#endif
+
+#if CLANG_VERSION_MAJOR < 4
+	IntrusiveRefCntPtr<CompilerInvocation> CI =
+		createCompilerInvocation(std::move(CFlags), Path,
+					 Clang.getDiagnostics());
+	Clang.setInvocation(&*CI);
+#else
+	std::shared_ptr<CompilerInvocation> CI(
+		createCompilerInvocation(std::move(CFlags), Path,
+					 Clang.getDiagnostics()));
+	Clang.setInvocation(CI);
+#endif
+
+	std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx));
+	if (!Clang.ExecuteAction(*Act))
+		return std::unique_ptr<llvm::Module>(nullptr);
+
+	return Act->takeModule();
+}
+
+std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags,
+		    StringRef Name, StringRef Content)
+{
+	using namespace vfs;
+
+	llvm::IntrusiveRefCntPtr<OverlayFileSystem> OverlayFS(
+			new OverlayFileSystem(getRealFileSystem()));
+	llvm::IntrusiveRefCntPtr<InMemoryFileSystem> MemFS(
+			new InMemoryFileSystem(true));
+
+	/*
+	 * pushOverlay helps setting working dir for MemFS. Must call
+	 * before addFile.
+	 */
+	OverlayFS->pushOverlay(MemFS);
+	MemFS->addFile(Twine(Name), 0, llvm::MemoryBuffer::getMemBuffer(Content));
+
+	return getModuleFromSource(std::move(CFlags), Name, OverlayFS);
+}
+
+std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags, StringRef Path)
+{
+	IntrusiveRefCntPtr<vfs::FileSystem> VFS(vfs::getRealFileSystem());
+	return getModuleFromSource(std::move(CFlags), Path, VFS);
+}
+
+std::unique_ptr<llvm::SmallVectorImpl<char>>
+getBPFObjectFromModule(llvm::Module *Module)
+{
+	using namespace llvm;
+
+	std::string TargetTriple("bpf-pc-linux");
+	std::string Error;
+	const Target* Target = TargetRegistry::lookupTarget(TargetTriple, Error);
+	if (!Target) {
+		llvm::errs() << Error;
+		return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);
+	}
+
+	llvm::TargetOptions Opt;
+	TargetMachine *TargetMachine =
+		Target->createTargetMachine(TargetTriple,
+					    "generic", "",
+					    Opt, Reloc::Static);
+
+	Module->setDataLayout(TargetMachine->createDataLayout());
+	Module->setTargetTriple(TargetTriple);
+
+	std::unique_ptr<SmallVectorImpl<char>> Buffer(new SmallVector<char, 0>());
+	raw_svector_ostream ostream(*Buffer);
+
+	legacy::PassManager PM;
+	bool NotAdded;
+	NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream
+#if CLANG_VERSION_MAJOR >= 7
+                                                      , /*DwoOut=*/nullptr
+#endif
+#if CLANG_VERSION_MAJOR < 10
+                                                      , TargetMachine::CGFT_ObjectFile
+#else
+                                                      , llvm::CGFT_ObjectFile
+#endif
+                                                      );
+	if (NotAdded) {
+		llvm::errs() << "TargetMachine can't emit a file of this type\n";
+		return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);
+	}
+	PM.run(*Module);
+
+	return Buffer;
+}
+
+}
+
+extern "C" {
+void perf_clang__init(void)
+{
+	perf::LLVMCtx.reset(new llvm::LLVMContext());
+	LLVMInitializeBPFTargetInfo();
+	LLVMInitializeBPFTarget();
+	LLVMInitializeBPFTargetMC();
+	LLVMInitializeBPFAsmPrinter();
+}
+
+void perf_clang__cleanup(void)
+{
+	perf::LLVMCtx.reset(nullptr);
+	llvm::llvm_shutdown();
+}
+
+int perf_clang__compile_bpf(const char *filename,
+			    void **p_obj_buf,
+			    size_t *p_obj_buf_sz)
+{
+	using namespace perf;
+
+	if (!p_obj_buf || !p_obj_buf_sz)
+		return -EINVAL;
+
+	llvm::opt::ArgStringList CFlags;
+	auto M = getModuleFromSource(std::move(CFlags), filename);
+	if (!M)
+		return  -EINVAL;
+	auto O = getBPFObjectFromModule(&*M);
+	if (!O)
+		return -EINVAL;
+
+	size_t size = O->size_in_bytes();
+	void *buffer;
+
+	buffer = malloc(size);
+	if (!buffer)
+		return -ENOMEM;
+	memcpy(buffer, O->data(), size);
+	*p_obj_buf = buffer;
+	*p_obj_buf_sz = size;
+	return 0;
+}
+}
diff --git a/tools/perf/util/c++/clang.h b/tools/perf/util/c++/clang.h
new file mode 100644
index 000000000..6ce33e22f
--- /dev/null
+++ b/tools/perf/util/c++/clang.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_CLANG_H
+#define PERF_UTIL_CLANG_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Option/Option.h"
+#include <memory>
+
+namespace perf {
+
+using namespace llvm;
+
+std::unique_ptr<Module>
+getModuleFromSource(opt::ArgStringList CFlags,
+		    StringRef Name, StringRef Content);
+
+std::unique_ptr<Module>
+getModuleFromSource(opt::ArgStringList CFlags,
+		    StringRef Path);
+
+std::unique_ptr<llvm::SmallVectorImpl<char>>
+getBPFObjectFromModule(llvm::Module *Module);
+
+}
+#endif
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
new file mode 100644
index 000000000..9f2e36ef5
--- /dev/null
+++ b/tools/perf/util/cache.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CACHE_H
+#define __PERF_CACHE_H
+
+#include "strbuf.h"
+#include <subcmd/pager.h>
+#include "../ui/ui.h"
+
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+#define CMD_EXEC_PATH "--exec-path"
+#define CMD_DEBUGFS_DIR "--debugfs-dir="
+
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
+#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
+#define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
+
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+static inline int is_absolute_path(const char *path)
+{
+	return path[0] == '/';
+}
+
+char *mkpath(const char *fmt, ...) __printf(1, 2);
+
+#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/cacheline.c b/tools/perf/util/cacheline.c
new file mode 100644
index 000000000..e98b5250a
--- /dev/null
+++ b/tools/perf/util/cacheline.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cacheline.h"
+#include <unistd.h>
+
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+#include <api/fs/fs.h>
+#include "debug.h"
+static void cache_line_size(int *cacheline_sizep)
+{
+	if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+		pr_debug("cannot determine cache line size");
+}
+#endif
+
+int cacheline_size(void)
+{
+	static int size;
+
+	if (!size)
+		cache_line_size(&size);
+
+	return size;
+}
diff --git a/tools/perf/util/cacheline.h b/tools/perf/util/cacheline.h
new file mode 100644
index 000000000..dec8c0fb1
--- /dev/null
+++ b/tools/perf/util/cacheline.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_CACHELINE_H
+#define PERF_CACHELINE_H
+
+#include <linux/compiler.h>
+
+int __pure cacheline_size(void);
+
+static inline u64 cl_address(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & ~(cacheline_size() - 1));
+}
+
+static inline u64 cl_offset(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & (cacheline_size() - 1));
+}
+
+#endif // PERF_CACHELINE_H
diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c
new file mode 100644
index 000000000..5c60b8be1
--- /dev/null
+++ b/tools/perf/util/call-path.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+
+#include "call-path.h"
+
+static void call_path__init(struct call_path *cp, struct call_path *parent,
+			    struct symbol *sym, u64 ip, bool in_kernel)
+{
+	cp->parent = parent;
+	cp->sym = sym;
+	cp->ip = sym ? 0 : ip;
+	cp->db_id = 0;
+	cp->in_kernel = in_kernel;
+	RB_CLEAR_NODE(&cp->rb_node);
+	cp->children = RB_ROOT;
+}
+
+struct call_path_root *call_path_root__new(void)
+{
+	struct call_path_root *cpr;
+
+	cpr = zalloc(sizeof(struct call_path_root));
+	if (!cpr)
+		return NULL;
+	call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+	INIT_LIST_HEAD(&cpr->blocks);
+	return cpr;
+}
+
+void call_path_root__free(struct call_path_root *cpr)
+{
+	struct call_path_block *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
+		list_del_init(&pos->node);
+		free(pos);
+	}
+	free(cpr);
+}
+
+static struct call_path *call_path__new(struct call_path_root *cpr,
+					struct call_path *parent,
+					struct symbol *sym, u64 ip,
+					bool in_kernel)
+{
+	struct call_path_block *cpb;
+	struct call_path *cp;
+	size_t n;
+
+	if (cpr->next < cpr->sz) {
+		cpb = list_last_entry(&cpr->blocks, struct call_path_block,
+				      node);
+	} else {
+		cpb = zalloc(sizeof(struct call_path_block));
+		if (!cpb)
+			return NULL;
+		list_add_tail(&cpb->node, &cpr->blocks);
+		cpr->sz += CALL_PATH_BLOCK_SIZE;
+	}
+
+	n = cpr->next++ & CALL_PATH_BLOCK_MASK;
+	cp = &cpb->cp[n];
+
+	call_path__init(cp, parent, sym, ip, in_kernel);
+
+	return cp;
+}
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+				     struct call_path *parent,
+				     struct symbol *sym, u64 ip, u64 ks)
+{
+	struct rb_node **p;
+	struct rb_node *node_parent = NULL;
+	struct call_path *cp;
+	bool in_kernel = ip >= ks;
+
+	if (sym)
+		ip = 0;
+
+	if (!parent)
+		return call_path__new(cpr, parent, sym, ip, in_kernel);
+
+	p = &parent->children.rb_node;
+	while (*p != NULL) {
+		node_parent = *p;
+		cp = rb_entry(node_parent, struct call_path, rb_node);
+
+		if (cp->sym == sym && cp->ip == ip)
+			return cp;
+
+		if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+	if (!cp)
+		return NULL;
+
+	rb_link_node(&cp->rb_node, node_parent, p);
+	rb_insert_color(&cp->rb_node, &parent->children);
+
+	return cp;
+}
diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h
new file mode 100644
index 000000000..5875cfc81
--- /dev/null
+++ b/tools/perf/util/call-path.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#ifndef __PERF_CALL_PATH_H
+#define __PERF_CALL_PATH_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/**
+ * struct call_path - node in list of calls leading to a function call.
+ * @parent: call path to the parent function call
+ * @sym: symbol of function called
+ * @ip: only if sym is null, the ip of the function
+ * @db_id: id used for db-export
+ * @in_kernel: whether function is a in the kernel
+ * @rb_node: node in parent's tree of called functions
+ * @children: tree of call paths of functions called
+ *
+ * In combination with the call_return structure, the call_path structure
+ * defines a context-sensitive call-graph.
+ */
+struct call_path {
+	struct call_path *parent;
+	struct symbol *sym;
+	u64 ip;
+	u64 db_id;
+	bool in_kernel;
+	struct rb_node rb_node;
+	struct rb_root children;
+};
+
+#define CALL_PATH_BLOCK_SHIFT 8
+#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
+#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
+
+struct call_path_block {
+	struct call_path cp[CALL_PATH_BLOCK_SIZE];
+	struct list_head node;
+};
+
+/**
+ * struct call_path_root - root of all call paths.
+ * @call_path: root call path
+ * @blocks: list of blocks to store call paths
+ * @next: next free space
+ * @sz: number of spaces
+ */
+struct call_path_root {
+	struct call_path call_path;
+	struct list_head blocks;
+	size_t next;
+	size_t sz;
+};
+
+struct call_path_root *call_path_root__new(void);
+void call_path_root__free(struct call_path_root *cpr);
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+				     struct call_path *parent,
+				     struct symbol *sym, u64 ip, u64 ks);
+
+#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
new file mode 100644
index 000000000..a093a15f0
--- /dev/null
+++ b/tools/perf/util/callchain.c
@@ -0,0 +1,1724 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Handle the callchains from the stream in an ad-hoc radix tree and then
+ * sort them in an rbtree.
+ *
+ * Using a radix for code path provides a fast retrieval and factorizes
+ * memory use. Also that lets us use the paths in a hierarchical graph view.
+ *
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <math.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include "asm/bug.h"
+
+#include "debug.h"
+#include "dso.h"
+#include "event.h"
+#include "hist.h"
+#include "sort.h"
+#include "machine.h"
+#include "map.h"
+#include "callchain.h"
+#include "branch.h"
+#include "symbol.h"
+#include "util.h"
+#include "../perf.h"
+
+#define CALLCHAIN_PARAM_DEFAULT			\
+	.mode		= CHAIN_GRAPH_ABS,	\
+	.min_percent	= 0.5,			\
+	.order		= ORDER_CALLEE,		\
+	.key		= CCKEY_FUNCTION,	\
+	.value		= CCVAL_PERCENT,	\
+
+struct callchain_param callchain_param = {
+	CALLCHAIN_PARAM_DEFAULT
+};
+
+/*
+ * Are there any events usind DWARF callchains?
+ *
+ * I.e.
+ *
+ * -e cycles/call-graph=dwarf/
+ */
+bool dwarf_callchain_users;
+
+struct callchain_param callchain_param_default = {
+	CALLCHAIN_PARAM_DEFAULT
+};
+
+__thread struct callchain_cursor callchain_cursor;
+
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
+{
+	return parse_callchain_record(arg, param);
+}
+
+static int parse_callchain_mode(const char *value)
+{
+	if (!strncmp(value, "graph", strlen(value))) {
+		callchain_param.mode = CHAIN_GRAPH_ABS;
+		return 0;
+	}
+	if (!strncmp(value, "flat", strlen(value))) {
+		callchain_param.mode = CHAIN_FLAT;
+		return 0;
+	}
+	if (!strncmp(value, "fractal", strlen(value))) {
+		callchain_param.mode = CHAIN_GRAPH_REL;
+		return 0;
+	}
+	if (!strncmp(value, "folded", strlen(value))) {
+		callchain_param.mode = CHAIN_FOLDED;
+		return 0;
+	}
+	return -1;
+}
+
+static int parse_callchain_order(const char *value)
+{
+	if (!strncmp(value, "caller", strlen(value))) {
+		callchain_param.order = ORDER_CALLER;
+		callchain_param.order_set = true;
+		return 0;
+	}
+	if (!strncmp(value, "callee", strlen(value))) {
+		callchain_param.order = ORDER_CALLEE;
+		callchain_param.order_set = true;
+		return 0;
+	}
+	return -1;
+}
+
+static int parse_callchain_sort_key(const char *value)
+{
+	if (!strncmp(value, "function", strlen(value))) {
+		callchain_param.key = CCKEY_FUNCTION;
+		return 0;
+	}
+	if (!strncmp(value, "address", strlen(value))) {
+		callchain_param.key = CCKEY_ADDRESS;
+		return 0;
+	}
+	if (!strncmp(value, "srcline", strlen(value))) {
+		callchain_param.key = CCKEY_SRCLINE;
+		return 0;
+	}
+	if (!strncmp(value, "branch", strlen(value))) {
+		callchain_param.branch_callstack = 1;
+		return 0;
+	}
+	return -1;
+}
+
+static int parse_callchain_value(const char *value)
+{
+	if (!strncmp(value, "percent", strlen(value))) {
+		callchain_param.value = CCVAL_PERCENT;
+		return 0;
+	}
+	if (!strncmp(value, "period", strlen(value))) {
+		callchain_param.value = CCVAL_PERIOD;
+		return 0;
+	}
+	if (!strncmp(value, "count", strlen(value))) {
+		callchain_param.value = CCVAL_COUNT;
+		return 0;
+	}
+	return -1;
+}
+
+static int get_stack_size(const char *str, unsigned long *_size)
+{
+	char *endptr;
+	unsigned long size;
+	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
+
+	size = strtoul(str, &endptr, 0);
+
+	do {
+		if (*endptr)
+			break;
+
+		size = round_up(size, sizeof(u64));
+		if (!size || size > max_size)
+			break;
+
+		*_size = size;
+		return 0;
+
+	} while (0);
+
+	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
+	       max_size, str);
+	return -1;
+}
+
+static int
+__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
+{
+	char *tok;
+	char *endptr, *saveptr = NULL;
+	bool minpcnt_set = false;
+	bool record_opt_set = false;
+	bool try_stack_size = false;
+
+	callchain_param.enabled = true;
+	symbol_conf.use_callchain = true;
+
+	if (!arg)
+		return 0;
+
+	while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
+		if (!strncmp(tok, "none", strlen(tok))) {
+			callchain_param.mode = CHAIN_NONE;
+			callchain_param.enabled = false;
+			symbol_conf.use_callchain = false;
+			return 0;
+		}
+
+		if (!parse_callchain_mode(tok) ||
+		    !parse_callchain_order(tok) ||
+		    !parse_callchain_sort_key(tok) ||
+		    !parse_callchain_value(tok)) {
+			/* parsing ok - move on to the next */
+			try_stack_size = false;
+			goto next;
+		} else if (allow_record_opt && !record_opt_set) {
+			if (parse_callchain_record(tok, &callchain_param))
+				goto try_numbers;
+
+			/* assume that number followed by 'dwarf' is stack size */
+			if (callchain_param.record_mode == CALLCHAIN_DWARF)
+				try_stack_size = true;
+
+			record_opt_set = true;
+			goto next;
+		}
+
+try_numbers:
+		if (try_stack_size) {
+			unsigned long size = 0;
+
+			if (get_stack_size(tok, &size) < 0)
+				return -1;
+			callchain_param.dump_size = size;
+			try_stack_size = false;
+		} else if (!minpcnt_set) {
+			/* try to get the min percent */
+			callchain_param.min_percent = strtod(tok, &endptr);
+			if (tok == endptr)
+				return -1;
+			minpcnt_set = true;
+		} else {
+			/* try print limit at last */
+			callchain_param.print_limit = strtoul(tok, &endptr, 0);
+			if (tok == endptr)
+				return -1;
+		}
+next:
+		arg = NULL;
+	}
+
+	if (callchain_register_param(&callchain_param) < 0) {
+		pr_err("Can't register callchain params\n");
+		return -1;
+	}
+	return 0;
+}
+
+int parse_callchain_report_opt(const char *arg)
+{
+	return __parse_callchain_report_opt(arg, false);
+}
+
+int parse_callchain_top_opt(const char *arg)
+{
+	return __parse_callchain_report_opt(arg, true);
+}
+
+int parse_callchain_record(const char *arg, struct callchain_param *param)
+{
+	char *tok, *name, *saveptr = NULL;
+	char *buf;
+	int ret = -1;
+
+	/* We need buffer that we know we can write to. */
+	buf = malloc(strlen(arg) + 1);
+	if (!buf)
+		return -ENOMEM;
+
+	strcpy(buf, arg);
+
+	tok = strtok_r((char *)buf, ",", &saveptr);
+	name = tok ? : (char *)buf;
+
+	do {
+		/* Framepointer style */
+		if (!strncmp(name, "fp", sizeof("fp"))) {
+			ret = 0;
+			param->record_mode = CALLCHAIN_FP;
+
+			tok = strtok_r(NULL, ",", &saveptr);
+			if (tok) {
+				unsigned long size;
+
+				size = strtoul(tok, &name, 0);
+				if (size < (unsigned) sysctl__max_stack())
+					param->max_stack = size;
+			}
+			break;
+
+		/* Dwarf style */
+		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
+			const unsigned long default_stack_dump_size = 8192;
+
+			ret = 0;
+			param->record_mode = CALLCHAIN_DWARF;
+			param->dump_size = default_stack_dump_size;
+			dwarf_callchain_users = true;
+
+			tok = strtok_r(NULL, ",", &saveptr);
+			if (tok) {
+				unsigned long size = 0;
+
+				ret = get_stack_size(tok, &size);
+				param->dump_size = size;
+			}
+		} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+			if (!strtok_r(NULL, ",", &saveptr)) {
+				param->record_mode = CALLCHAIN_LBR;
+				ret = 0;
+			} else
+				pr_err("callchain: No more arguments "
+					"needed for --call-graph lbr\n");
+			break;
+		} else {
+			pr_err("callchain: Unknown --call-graph option "
+			       "value: %s\n", arg);
+			break;
+		}
+
+	} while (0);
+
+	free(buf);
+	return ret;
+}
+
+int perf_callchain_config(const char *var, const char *value)
+{
+	char *endptr;
+
+	if (!strstarts(var, "call-graph."))
+		return 0;
+	var += sizeof("call-graph.") - 1;
+
+	if (!strcmp(var, "record-mode"))
+		return parse_callchain_record_opt(value, &callchain_param);
+	if (!strcmp(var, "dump-size")) {
+		unsigned long size = 0;
+		int ret;
+
+		ret = get_stack_size(value, &size);
+		callchain_param.dump_size = size;
+
+		return ret;
+	}
+	if (!strcmp(var, "print-type")){
+		int ret;
+		ret = parse_callchain_mode(value);
+		if (ret == -1)
+			pr_err("Invalid callchain mode: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "order")){
+		int ret;
+		ret = parse_callchain_order(value);
+		if (ret == -1)
+			pr_err("Invalid callchain order: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "sort-key")){
+		int ret;
+		ret = parse_callchain_sort_key(value);
+		if (ret == -1)
+			pr_err("Invalid callchain sort key: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "threshold")) {
+		callchain_param.min_percent = strtod(value, &endptr);
+		if (value == endptr) {
+			pr_err("Invalid callchain threshold: %s\n", value);
+			return -1;
+		}
+	}
+	if (!strcmp(var, "print-limit")) {
+		callchain_param.print_limit = strtod(value, &endptr);
+		if (value == endptr) {
+			pr_err("Invalid callchain print limit: %s\n", value);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+		    enum chain_mode mode)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct callchain_node *rnode;
+	u64 chain_cumul = callchain_cumul_hits(chain);
+
+	while (*p) {
+		u64 rnode_cumul;
+
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node);
+		rnode_cumul = callchain_cumul_hits(rnode);
+
+		switch (mode) {
+		case CHAIN_FLAT:
+		case CHAIN_FOLDED:
+			if (rnode->hit < chain->hit)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		case CHAIN_GRAPH_ABS: /* Falldown */
+		case CHAIN_GRAPH_REL:
+			if (rnode_cumul < chain_cumul)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		case CHAIN_NONE:
+		default:
+			break;
+		}
+	}
+
+	rb_link_node(&chain->rb_node, parent, p);
+	rb_insert_color(&chain->rb_node, root);
+}
+
+static void
+__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
+		  u64 min_hit)
+{
+	struct rb_node *n;
+	struct callchain_node *child;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		__sort_chain_flat(rb_root, child, min_hit);
+	}
+
+	if (node->hit && node->hit >= min_hit)
+		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
+}
+
+/*
+ * Once we get every callchains from the stream, we can now
+ * sort them by hit
+ */
+static void
+sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
+		u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+	*rb_root = RB_ROOT;
+	__sort_chain_flat(rb_root, &root->node, min_hit);
+}
+
+static void __sort_chain_graph_abs(struct callchain_node *node,
+				   u64 min_hit)
+{
+	struct rb_node *n;
+	struct callchain_node *child;
+
+	node->rb_root = RB_ROOT;
+	n = rb_first(&node->rb_root_in);
+
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		__sort_chain_graph_abs(child, min_hit);
+		if (callchain_cumul_hits(child) >= min_hit)
+			rb_insert_callchain(&node->rb_root, child,
+					    CHAIN_GRAPH_ABS);
+	}
+}
+
+static void
+sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
+		     u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+	__sort_chain_graph_abs(&chain_root->node, min_hit);
+	rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+static void __sort_chain_graph_rel(struct callchain_node *node,
+				   double min_percent)
+{
+	struct rb_node *n;
+	struct callchain_node *child;
+	u64 min_hit;
+
+	node->rb_root = RB_ROOT;
+	min_hit = ceil(node->children_hit * min_percent);
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		__sort_chain_graph_rel(child, min_percent);
+		if (callchain_cumul_hits(child) >= min_hit)
+			rb_insert_callchain(&node->rb_root, child,
+					    CHAIN_GRAPH_REL);
+	}
+}
+
+static void
+sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
+		     u64 min_hit __maybe_unused, struct callchain_param *param)
+{
+	__sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
+	rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+int callchain_register_param(struct callchain_param *param)
+{
+	switch (param->mode) {
+	case CHAIN_GRAPH_ABS:
+		param->sort = sort_chain_graph_abs;
+		break;
+	case CHAIN_GRAPH_REL:
+		param->sort = sort_chain_graph_rel;
+		break;
+	case CHAIN_FLAT:
+	case CHAIN_FOLDED:
+		param->sort = sort_chain_flat;
+		break;
+	case CHAIN_NONE:
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Create a child for a parent. If inherit_children, then the new child
+ * will become the new parent of it's parent children
+ */
+static struct callchain_node *
+create_child(struct callchain_node *parent, bool inherit_children)
+{
+	struct callchain_node *new;
+
+	new = zalloc(sizeof(*new));
+	if (!new) {
+		perror("not enough memory to create child for code path tree");
+		return NULL;
+	}
+	new->parent = parent;
+	INIT_LIST_HEAD(&new->val);
+	INIT_LIST_HEAD(&new->parent_val);
+
+	if (inherit_children) {
+		struct rb_node *n;
+		struct callchain_node *child;
+
+		new->rb_root_in = parent->rb_root_in;
+		parent->rb_root_in = RB_ROOT;
+
+		n = rb_first(&new->rb_root_in);
+		while (n) {
+			child = rb_entry(n, struct callchain_node, rb_node_in);
+			child->parent = new;
+			n = rb_next(n);
+		}
+
+		/* make it the first child */
+		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+	}
+
+	return new;
+}
+
+
+/*
+ * Fill the node with callchain values
+ */
+static int
+fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
+{
+	struct callchain_cursor_node *cursor_node;
+
+	node->val_nr = cursor->nr - cursor->pos;
+	if (!node->val_nr)
+		pr_warning("Warning: empty node in callchain tree\n");
+
+	cursor_node = callchain_cursor_current(cursor);
+
+	while (cursor_node) {
+		struct callchain_list *call;
+
+		call = zalloc(sizeof(*call));
+		if (!call) {
+			perror("not enough memory for the code path tree");
+			return -1;
+		}
+		call->ip = cursor_node->ip;
+		call->ms = cursor_node->ms;
+		map__get(call->ms.map);
+		call->srcline = cursor_node->srcline;
+
+		if (cursor_node->branch) {
+			call->branch_count = 1;
+
+			if (cursor_node->branch_from) {
+				/*
+				 * branch_from is set with value somewhere else
+				 * to imply it's "to" of a branch.
+				 */
+				call->brtype_stat.branch_to = true;
+
+				if (cursor_node->branch_flags.predicted)
+					call->predicted_count = 1;
+
+				if (cursor_node->branch_flags.abort)
+					call->abort_count = 1;
+
+				branch_type_count(&call->brtype_stat,
+						  &cursor_node->branch_flags,
+						  cursor_node->branch_from,
+						  cursor_node->ip);
+			} else {
+				/*
+				 * It's "from" of a branch
+				 */
+				call->brtype_stat.branch_to = false;
+				call->cycles_count =
+					cursor_node->branch_flags.cycles;
+				call->iter_count = cursor_node->nr_loop_iter;
+				call->iter_cycles = cursor_node->iter_cycles;
+			}
+		}
+
+		list_add_tail(&call->list, &node->val);
+
+		callchain_cursor_advance(cursor);
+		cursor_node = callchain_cursor_current(cursor);
+	}
+	return 0;
+}
+
+static struct callchain_node *
+add_child(struct callchain_node *parent,
+	  struct callchain_cursor *cursor,
+	  u64 period)
+{
+	struct callchain_node *new;
+
+	new = create_child(parent, false);
+	if (new == NULL)
+		return NULL;
+
+	if (fill_node(new, cursor) < 0) {
+		struct callchain_list *call, *tmp;
+
+		list_for_each_entry_safe(call, tmp, &new->val, list) {
+			list_del_init(&call->list);
+			map__zput(call->ms.map);
+			free(call);
+		}
+		free(new);
+		return NULL;
+	}
+
+	new->children_hit = 0;
+	new->hit = period;
+	new->children_count = 0;
+	new->count = 1;
+	return new;
+}
+
+enum match_result {
+	MATCH_ERROR  = -1,
+	MATCH_EQ,
+	MATCH_LT,
+	MATCH_GT,
+};
+
+static enum match_result match_chain_strings(const char *left,
+					     const char *right)
+{
+	enum match_result ret = MATCH_EQ;
+	int cmp;
+
+	if (left && right)
+		cmp = strcmp(left, right);
+	else if (!left && right)
+		cmp = 1;
+	else if (left && !right)
+		cmp = -1;
+	else
+		return MATCH_ERROR;
+
+	if (cmp != 0)
+		ret = cmp < 0 ? MATCH_LT : MATCH_GT;
+
+	return ret;
+}
+
+/*
+ * We need to always use relative addresses because we're aggregating
+ * callchains from multiple threads, i.e. different address spaces, so
+ * comparing absolute addresses make no sense as a symbol in a DSO may end up
+ * in a different address when used in a different binary or even the same
+ * binary but with some sort of address randomization technique, thus we need
+ * to compare just relative addresses. -acme
+ */
+static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
+						   struct map *right_map, u64 right_ip)
+{
+	struct dso *left_dso = left_map ? left_map->dso : NULL;
+	struct dso *right_dso = right_map ? right_map->dso : NULL;
+
+	if (left_dso != right_dso)
+		return left_dso < right_dso ? MATCH_LT : MATCH_GT;
+
+	if (left_ip != right_ip)
+ 		return left_ip < right_ip ? MATCH_LT : MATCH_GT;
+
+	return MATCH_EQ;
+}
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+				     struct callchain_list *cnode)
+{
+	enum match_result match = MATCH_ERROR;
+
+	switch (callchain_param.key) {
+	case CCKEY_SRCLINE:
+		match = match_chain_strings(cnode->srcline, node->srcline);
+		if (match != MATCH_ERROR)
+			break;
+		/* otherwise fall-back to symbol-based comparison below */
+		__fallthrough;
+	case CCKEY_FUNCTION:
+		if (node->ms.sym && cnode->ms.sym) {
+			/*
+			 * Compare inlined frames based on their symbol name
+			 * because different inlined frames will have the same
+			 * symbol start. Otherwise do a faster comparison based
+			 * on the symbol start address.
+			 */
+			if (cnode->ms.sym->inlined || node->ms.sym->inlined) {
+				match = match_chain_strings(cnode->ms.sym->name,
+							    node->ms.sym->name);
+				if (match != MATCH_ERROR)
+					break;
+			} else {
+				match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
+								  node->ms.map, node->ms.sym->start);
+				break;
+			}
+		}
+		/* otherwise fall-back to IP-based comparison below */
+		__fallthrough;
+	case CCKEY_ADDRESS:
+	default:
+		match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->ms.map, node->ip);
+		break;
+	}
+
+	if (match == MATCH_EQ && node->branch) {
+		cnode->branch_count++;
+
+		if (node->branch_from) {
+			/*
+			 * It's "to" of a branch
+			 */
+			cnode->brtype_stat.branch_to = true;
+
+			if (node->branch_flags.predicted)
+				cnode->predicted_count++;
+
+			if (node->branch_flags.abort)
+				cnode->abort_count++;
+
+			branch_type_count(&cnode->brtype_stat,
+					  &node->branch_flags,
+					  node->branch_from,
+					  node->ip);
+		} else {
+			/*
+			 * It's "from" of a branch
+			 */
+			cnode->brtype_stat.branch_to = false;
+			cnode->cycles_count += node->branch_flags.cycles;
+			cnode->iter_count += node->nr_loop_iter;
+			cnode->iter_cycles += node->iter_cycles;
+			cnode->from_count++;
+		}
+	}
+
+	return match;
+}
+
+/*
+ * Split the parent in two parts (a new child is created) and
+ * give a part of its callchain to the created child.
+ * Then create another child to host the given callchain of new branch
+ */
+static int
+split_add_child(struct callchain_node *parent,
+		struct callchain_cursor *cursor,
+		struct callchain_list *to_split,
+		u64 idx_parents, u64 idx_local, u64 period)
+{
+	struct callchain_node *new;
+	struct list_head *old_tail;
+	unsigned int idx_total = idx_parents + idx_local;
+
+	/* split */
+	new = create_child(parent, true);
+	if (new == NULL)
+		return -1;
+
+	/* split the callchain and move a part to the new child */
+	old_tail = parent->val.prev;
+	list_del_range(&to_split->list, old_tail);
+	new->val.next = &to_split->list;
+	new->val.prev = old_tail;
+	to_split->list.prev = &new->val;
+	old_tail->next = &new->val;
+
+	/* split the hits */
+	new->hit = parent->hit;
+	new->children_hit = parent->children_hit;
+	parent->children_hit = callchain_cumul_hits(new);
+	new->val_nr = parent->val_nr - idx_local;
+	parent->val_nr = idx_local;
+	new->count = parent->count;
+	new->children_count = parent->children_count;
+	parent->children_count = callchain_cumul_counts(new);
+
+	/* create a new child for the new branch if any */
+	if (idx_total < cursor->nr) {
+		struct callchain_node *first;
+		struct callchain_list *cnode;
+		struct callchain_cursor_node *node;
+		struct rb_node *p, **pp;
+
+		parent->hit = 0;
+		parent->children_hit += period;
+		parent->count = 0;
+		parent->children_count += 1;
+
+		node = callchain_cursor_current(cursor);
+		new = add_child(parent, cursor, period);
+		if (new == NULL)
+			return -1;
+
+		/*
+		 * This is second child since we moved parent's children
+		 * to new (first) child above.
+		 */
+		p = parent->rb_root_in.rb_node;
+		first = rb_entry(p, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&first->val, struct callchain_list,
+					 list);
+
+		if (match_chain(node, cnode) == MATCH_LT)
+			pp = &p->rb_left;
+		else
+			pp = &p->rb_right;
+
+		rb_link_node(&new->rb_node_in, p, pp);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+	} else {
+		parent->hit = period;
+		parent->count = 1;
+	}
+	return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+	     struct callchain_cursor *cursor,
+	     u64 period);
+
+static int
+append_chain_children(struct callchain_node *root,
+		      struct callchain_cursor *cursor,
+		      u64 period)
+{
+	struct callchain_node *rnode;
+	struct callchain_cursor_node *node;
+	struct rb_node **p = &root->rb_root_in.rb_node;
+	struct rb_node *parent = NULL;
+
+	node = callchain_cursor_current(cursor);
+	if (!node)
+		return -1;
+
+	/* lookup in children */
+	while (*p) {
+		enum match_result ret;
+
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+
+		/* If at least first entry matches, rely to children */
+		ret = append_chain(rnode, cursor, period);
+		if (ret == MATCH_EQ)
+			goto inc_children_hit;
+		if (ret == MATCH_ERROR)
+			return -1;
+
+		if (ret == MATCH_LT)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	/* nothing in children, add to the current node */
+	rnode = add_child(root, cursor, period);
+	if (rnode == NULL)
+		return -1;
+
+	rb_link_node(&rnode->rb_node_in, parent, p);
+	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
+
+inc_children_hit:
+	root->children_hit += period;
+	root->children_count++;
+	return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+	     struct callchain_cursor *cursor,
+	     u64 period)
+{
+	struct callchain_list *cnode;
+	u64 start = cursor->pos;
+	bool found = false;
+	u64 matches;
+	enum match_result cmp = MATCH_ERROR;
+
+	/*
+	 * Lookup in the current node
+	 * If we have a symbol, then compare the start to match
+	 * anywhere inside a function, unless function
+	 * mode is disabled.
+	 */
+	list_for_each_entry(cnode, &root->val, list) {
+		struct callchain_cursor_node *node;
+
+		node = callchain_cursor_current(cursor);
+		if (!node)
+			break;
+
+		cmp = match_chain(node, cnode);
+		if (cmp != MATCH_EQ)
+			break;
+
+		found = true;
+
+		callchain_cursor_advance(cursor);
+	}
+
+	/* matches not, relay no the parent */
+	if (!found) {
+		WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
+		return cmp;
+	}
+
+	matches = cursor->pos - start;
+
+	/* we match only a part of the node. Split it and add the new chain */
+	if (matches < root->val_nr) {
+		if (split_add_child(root, cursor, cnode, start, matches,
+				    period) < 0)
+			return MATCH_ERROR;
+
+		return MATCH_EQ;
+	}
+
+	/* we match 100% of the path, increment the hit */
+	if (matches == root->val_nr && cursor->pos == cursor->nr) {
+		root->hit += period;
+		root->count++;
+		return MATCH_EQ;
+	}
+
+	/* We match the node and still have a part remaining */
+	if (append_chain_children(root, cursor, period) < 0)
+		return MATCH_ERROR;
+
+	return MATCH_EQ;
+}
+
+int callchain_append(struct callchain_root *root,
+		     struct callchain_cursor *cursor,
+		     u64 period)
+{
+	if (!cursor->nr)
+		return 0;
+
+	callchain_cursor_commit(cursor);
+
+	if (append_chain_children(&root->node, cursor, period) < 0)
+		return -1;
+
+	if (cursor->nr > root->max_depth)
+		root->max_depth = cursor->nr;
+
+	return 0;
+}
+
+static int
+merge_chain_branch(struct callchain_cursor *cursor,
+		   struct callchain_node *dst, struct callchain_node *src)
+{
+	struct callchain_cursor_node **old_last = cursor->last;
+	struct callchain_node *child;
+	struct callchain_list *list, *next_list;
+	struct rb_node *n;
+	int old_pos = cursor->nr;
+	int err = 0;
+
+	list_for_each_entry_safe(list, next_list, &src->val, list) {
+		callchain_cursor_append(cursor, list->ip, &list->ms,
+					false, NULL, 0, 0, 0, list->srcline);
+		list_del_init(&list->list);
+		map__zput(list->ms.map);
+		free(list);
+	}
+
+	if (src->hit) {
+		callchain_cursor_commit(cursor);
+		if (append_chain_children(dst, cursor, src->hit) < 0)
+			return -1;
+	}
+
+	n = rb_first(&src->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &src->rb_root_in);
+
+		err = merge_chain_branch(cursor, dst, child);
+		if (err)
+			break;
+
+		free(child);
+	}
+
+	cursor->nr = old_pos;
+	cursor->last = old_last;
+
+	return err;
+}
+
+int callchain_merge(struct callchain_cursor *cursor,
+		    struct callchain_root *dst, struct callchain_root *src)
+{
+	return merge_chain_branch(cursor, &dst->node, &src->node);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor,
+			    u64 ip, struct map_symbol *ms,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+			    const char *srcline)
+{
+	struct callchain_cursor_node *node = *cursor->last;
+
+	if (!node) {
+		node = calloc(1, sizeof(*node));
+		if (!node)
+			return -ENOMEM;
+
+		*cursor->last = node;
+	}
+
+	node->ip = ip;
+	map__zput(node->ms.map);
+	node->ms = *ms;
+	map__get(node->ms.map);
+	node->branch = branch;
+	node->nr_loop_iter = nr_loop_iter;
+	node->iter_cycles = iter_cycles;
+	node->srcline = srcline;
+
+	if (flags)
+		memcpy(&node->branch_flags, flags,
+			sizeof(struct branch_flags));
+
+	node->branch_from = branch_from;
+	cursor->nr++;
+
+	cursor->last = &node->next;
+
+	return 0;
+}
+
+int sample__resolve_callchain(struct perf_sample *sample,
+			      struct callchain_cursor *cursor, struct symbol **parent,
+			      struct evsel *evsel, struct addr_location *al,
+			      int max_stack)
+{
+	if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
+		return 0;
+
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
+	    perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
+		return thread__resolve_callchain(al->thread, cursor, evsel, sample,
+						 parent, al, max_stack);
+	}
+	return 0;
+}
+
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
+{
+	if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
+		!symbol_conf.show_branchflag_count)
+		return 0;
+	return callchain_append(he->callchain, &callchain_cursor, sample->period);
+}
+
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved)
+{
+	al->maps = node->ms.maps;
+	al->map = node->ms.map;
+	al->sym = node->ms.sym;
+	al->srcline = node->srcline;
+	al->addr = node->ip;
+
+	if (al->sym == NULL) {
+		if (hide_unresolved)
+			return 0;
+		if (al->map == NULL)
+			goto out;
+	}
+
+	if (al->maps == machine__kernel_maps(al->maps->machine)) {
+		if (machine__is_host(al->maps->machine)) {
+			al->cpumode = PERF_RECORD_MISC_KERNEL;
+			al->level = 'k';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+			al->level = 'g';
+		}
+	} else {
+		if (machine__is_host(al->maps->machine)) {
+			al->cpumode = PERF_RECORD_MISC_USER;
+			al->level = '.';
+		} else if (perf_guest) {
+			al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+			al->level = 'u';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+			al->level = 'H';
+		}
+	}
+
+out:
+	return 1;
+}
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+			       char *bf, size_t bfsize, bool show_dso)
+{
+	bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+	bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE;
+	int printed;
+
+	if (cl->ms.sym) {
+		const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
+
+		if (show_srcline && cl->srcline)
+			printed = scnprintf(bf, bfsize, "%s %s%s",
+					    cl->ms.sym->name, cl->srcline,
+					    inlined);
+		else
+			printed = scnprintf(bf, bfsize, "%s%s",
+					    cl->ms.sym->name, inlined);
+	} else
+		printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
+
+	if (show_dso)
+		scnprintf(bf + printed, bfsize - printed, " %s",
+			  cl->ms.map ?
+			  cl->ms.map->dso->short_name :
+			  "unknown");
+
+	return bf;
+}
+
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+				      char *bf, size_t bfsize, u64 total)
+{
+	double percent = 0.0;
+	u64 period = callchain_cumul_hits(node);
+	unsigned count = callchain_cumul_counts(node);
+
+	if (callchain_param.mode == CHAIN_FOLDED) {
+		period = node->hit;
+		count = node->count;
+	}
+
+	switch (callchain_param.value) {
+	case CCVAL_PERIOD:
+		scnprintf(bf, bfsize, "%"PRIu64, period);
+		break;
+	case CCVAL_COUNT:
+		scnprintf(bf, bfsize, "%u", count);
+		break;
+	case CCVAL_PERCENT:
+	default:
+		if (total)
+			percent = period * 100.0 / total;
+		scnprintf(bf, bfsize, "%.2f%%", percent);
+		break;
+	}
+	return bf;
+}
+
+int callchain_node__fprintf_value(struct callchain_node *node,
+				 FILE *fp, u64 total)
+{
+	double percent = 0.0;
+	u64 period = callchain_cumul_hits(node);
+	unsigned count = callchain_cumul_counts(node);
+
+	if (callchain_param.mode == CHAIN_FOLDED) {
+		period = node->hit;
+		count = node->count;
+	}
+
+	switch (callchain_param.value) {
+	case CCVAL_PERIOD:
+		return fprintf(fp, "%"PRIu64, period);
+	case CCVAL_COUNT:
+		return fprintf(fp, "%u", count);
+	case CCVAL_PERCENT:
+	default:
+		if (total)
+			percent = period * 100.0 / total;
+		return percent_color_fprintf(fp, "%.2f%%", percent);
+	}
+	return 0;
+}
+
+static void callchain_counts_value(struct callchain_node *node,
+				   u64 *branch_count, u64 *predicted_count,
+				   u64 *abort_count, u64 *cycles_count)
+{
+	struct callchain_list *clist;
+
+	list_for_each_entry(clist, &node->val, list) {
+		if (branch_count)
+			*branch_count += clist->branch_count;
+
+		if (predicted_count)
+			*predicted_count += clist->predicted_count;
+
+		if (abort_count)
+			*abort_count += clist->abort_count;
+
+		if (cycles_count)
+			*cycles_count += clist->cycles_count;
+	}
+}
+
+static int callchain_node_branch_counts_cumul(struct callchain_node *node,
+					      u64 *branch_count,
+					      u64 *predicted_count,
+					      u64 *abort_count,
+					      u64 *cycles_count)
+{
+	struct callchain_node *child;
+	struct rb_node *n;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		callchain_node_branch_counts_cumul(child, branch_count,
+						   predicted_count,
+						   abort_count,
+						   cycles_count);
+
+		callchain_counts_value(child, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count);
+	}
+
+	return 0;
+}
+
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count)
+{
+	if (branch_count)
+		*branch_count = 0;
+
+	if (predicted_count)
+		*predicted_count = 0;
+
+	if (abort_count)
+		*abort_count = 0;
+
+	if (cycles_count)
+		*cycles_count = 0;
+
+	return callchain_node_branch_counts_cumul(&root->node,
+						  branch_count,
+						  predicted_count,
+						  abort_count,
+						  cycles_count);
+}
+
+static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
+{
+	return scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
+}
+
+static int count_float_printf(int idx, const char *str, float value,
+			      char *bf, int bfsize, float threshold)
+{
+	if (threshold != 0.0 && value < threshold)
+		return 0;
+
+	return scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
+}
+
+static int branch_to_str(char *bf, int bfsize,
+			 u64 branch_count, u64 predicted_count,
+			 u64 abort_count,
+			 struct branch_type_stat *brtype_stat)
+{
+	int printed, i = 0;
+
+	printed = branch_type_str(brtype_stat, bf, bfsize);
+	if (printed)
+		i++;
+
+	if (predicted_count < branch_count) {
+		printed += count_float_printf(i++, "predicted",
+				predicted_count * 100.0 / branch_count,
+				bf + printed, bfsize - printed, 0.0);
+	}
+
+	if (abort_count) {
+		printed += count_float_printf(i++, "abort",
+				abort_count * 100.0 / branch_count,
+				bf + printed, bfsize - printed, 0.1);
+	}
+
+	if (i)
+		printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+	return printed;
+}
+
+static int branch_from_str(char *bf, int bfsize,
+			   u64 branch_count,
+			   u64 cycles_count, u64 iter_count,
+			   u64 iter_cycles, u64 from_count)
+{
+	int printed = 0, i = 0;
+	u64 cycles, v = 0;
+
+	cycles = cycles_count / branch_count;
+	if (cycles) {
+		printed += count_pri64_printf(i++, "cycles",
+				cycles,
+				bf + printed, bfsize - printed);
+	}
+
+	if (iter_count && from_count) {
+		v = iter_count / from_count;
+		if (v) {
+			printed += count_pri64_printf(i++, "iter",
+					v, bf + printed, bfsize - printed);
+
+			printed += count_pri64_printf(i++, "avg_cycles",
+					iter_cycles / iter_count,
+					bf + printed, bfsize - printed);
+		}
+	}
+
+	if (i)
+		printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+	return printed;
+}
+
+static int counts_str_build(char *bf, int bfsize,
+			     u64 branch_count, u64 predicted_count,
+			     u64 abort_count, u64 cycles_count,
+			     u64 iter_count, u64 iter_cycles,
+			     u64 from_count,
+			     struct branch_type_stat *brtype_stat)
+{
+	int printed;
+
+	if (branch_count == 0)
+		return scnprintf(bf, bfsize, " (calltrace)");
+
+	if (brtype_stat->branch_to) {
+		printed = branch_to_str(bf, bfsize, branch_count,
+				predicted_count, abort_count, brtype_stat);
+	} else {
+		printed = branch_from_str(bf, bfsize, branch_count,
+				cycles_count, iter_count, iter_cycles,
+				from_count);
+	}
+
+	if (!printed)
+		bf[0] = 0;
+
+	return printed;
+}
+
+static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
+				   u64 branch_count, u64 predicted_count,
+				   u64 abort_count, u64 cycles_count,
+				   u64 iter_count, u64 iter_cycles,
+				   u64 from_count,
+				   struct branch_type_stat *brtype_stat)
+{
+	char str[256];
+
+	counts_str_build(str, sizeof(str), branch_count,
+			 predicted_count, abort_count, cycles_count,
+			 iter_count, iter_cycles, from_count, brtype_stat);
+
+	if (fp)
+		return fprintf(fp, "%s", str);
+
+	return scnprintf(bf, bfsize, "%s", str);
+}
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize)
+{
+	u64 branch_count, predicted_count;
+	u64 abort_count, cycles_count;
+	u64 iter_count, iter_cycles;
+	u64 from_count;
+
+	branch_count = clist->branch_count;
+	predicted_count = clist->predicted_count;
+	abort_count = clist->abort_count;
+	cycles_count = clist->cycles_count;
+	iter_count = clist->iter_count;
+	iter_cycles = clist->iter_cycles;
+	from_count = clist->from_count;
+
+	return callchain_counts_printf(fp, bf, bfsize, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count, iter_count, iter_cycles,
+				       from_count, &clist->brtype_stat);
+}
+
+static void free_callchain_node(struct callchain_node *node)
+{
+	struct callchain_list *list, *tmp;
+	struct callchain_node *child;
+	struct rb_node *n;
+
+	list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
+		list_del_init(&list->list);
+		map__zput(list->ms.map);
+		free(list);
+	}
+
+	list_for_each_entry_safe(list, tmp, &node->val, list) {
+		list_del_init(&list->list);
+		map__zput(list->ms.map);
+		free(list);
+	}
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &node->rb_root_in);
+
+		free_callchain_node(child);
+		free(child);
+	}
+}
+
+void free_callchain(struct callchain_root *root)
+{
+	if (!symbol_conf.use_callchain)
+		return;
+
+	free_callchain_node(&root->node);
+}
+
+static u64 decay_callchain_node(struct callchain_node *node)
+{
+	struct callchain_node *child;
+	struct rb_node *n;
+	u64 child_hits = 0;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+
+		child_hits += decay_callchain_node(child);
+		n = rb_next(n);
+	}
+
+	node->hit = (node->hit * 7) / 8;
+	node->children_hit = child_hits;
+
+	return node->hit;
+}
+
+void decay_callchain(struct callchain_root *root)
+{
+	if (!symbol_conf.use_callchain)
+		return;
+
+	decay_callchain_node(&root->node);
+}
+
+int callchain_node__make_parent_list(struct callchain_node *node)
+{
+	struct callchain_node *parent = node->parent;
+	struct callchain_list *chain, *new;
+	LIST_HEAD(head);
+
+	while (parent) {
+		list_for_each_entry_reverse(chain, &parent->val, list) {
+			new = malloc(sizeof(*new));
+			if (new == NULL)
+				goto out;
+			*new = *chain;
+			new->has_children = false;
+			map__get(new->ms.map);
+			list_add_tail(&new->list, &head);
+		}
+		parent = parent->parent;
+	}
+
+	list_for_each_entry_safe_reverse(chain, new, &head, list)
+		list_move_tail(&chain->list, &node->parent_val);
+
+	if (!list_empty(&node->parent_val)) {
+		chain = list_first_entry(&node->parent_val, struct callchain_list, list);
+		chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
+
+		chain = list_first_entry(&node->val, struct callchain_list, list);
+		chain->has_children = false;
+	}
+	return 0;
+
+out:
+	list_for_each_entry_safe(chain, new, &head, list) {
+		list_del_init(&chain->list);
+		map__zput(chain->ms.map);
+		free(chain);
+	}
+	return -ENOMEM;
+}
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+			   struct callchain_cursor *src)
+{
+	int rc = 0;
+
+	callchain_cursor_reset(dst);
+	callchain_cursor_commit(src);
+
+	while (true) {
+		struct callchain_cursor_node *node;
+
+		node = callchain_cursor_current(src);
+		if (node == NULL)
+			break;
+
+		rc = callchain_cursor_append(dst, node->ip, &node->ms,
+					     node->branch, &node->branch_flags,
+					     node->nr_loop_iter,
+					     node->iter_cycles,
+					     node->branch_from, node->srcline);
+		if (rc)
+			break;
+
+		callchain_cursor_advance(src);
+	}
+
+	return rc;
+}
+
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+	struct callchain_cursor_node *node;
+
+	cursor->nr = 0;
+	cursor->last = &cursor->first;
+
+	for (node = cursor->first; node != NULL; node = node->next)
+		map__zput(node->ms.map);
+}
+
+void callchain_param_setup(u64 sample_type, const char *arch)
+{
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+		    (sample_type & PERF_SAMPLE_STACK_USER)) {
+			callchain_param.record_mode = CALLCHAIN_DWARF;
+			dwarf_callchain_users = true;
+		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+			callchain_param.record_mode = CALLCHAIN_LBR;
+		else
+			callchain_param.record_mode = CALLCHAIN_FP;
+	}
+
+	/*
+	 * It's necessary to use libunwind to reliably determine the caller of
+	 * a leaf function on aarch64, as otherwise we cannot know whether to
+	 * start from the LR or FP.
+	 *
+	 * Always starting from the LR can result in duplicate or entirely
+	 * erroneous entries. Always skipping the LR and starting from the FP
+	 * can result in missing entries.
+	 */
+	if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
+		dwarf_callchain_users = true;
+}
+
+static bool chain_match(struct callchain_list *base_chain,
+			struct callchain_list *pair_chain)
+{
+	enum match_result match;
+
+	match = match_chain_strings(base_chain->srcline,
+				    pair_chain->srcline);
+	if (match != MATCH_ERROR)
+		return match == MATCH_EQ;
+
+	match = match_chain_dso_addresses(base_chain->ms.map,
+					  base_chain->ip,
+					  pair_chain->ms.map,
+					  pair_chain->ip);
+
+	return match == MATCH_EQ;
+}
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+			     struct callchain_node *pair_cnode)
+{
+	struct callchain_list *base_chain, *pair_chain;
+	bool match = false;
+
+	pair_chain = list_first_entry(&pair_cnode->val,
+				      struct callchain_list,
+				      list);
+
+	list_for_each_entry(base_chain, &base_cnode->val, list) {
+		if (&pair_chain->list == &pair_cnode->val)
+			return false;
+
+		if (!base_chain->srcline || !pair_chain->srcline) {
+			pair_chain = list_next_entry(pair_chain, list);
+			continue;
+		}
+
+		match = chain_match(base_chain, pair_chain);
+		if (!match)
+			return false;
+
+		pair_chain = list_next_entry(pair_chain, list);
+	}
+
+	/*
+	 * Say chain1 is ABC, chain2 is ABCD, we consider they are
+	 * not fully matched.
+	 */
+	if (pair_chain && (&pair_chain->list != &pair_cnode->val))
+		return false;
+
+	return match;
+}
+
+static u64 count_callchain_hits(struct hist_entry *he)
+{
+	struct rb_root *root = &he->sorted_chain;
+	struct rb_node *rb_node = rb_first(root);
+	struct callchain_node *node;
+	u64 chain_hits = 0;
+
+	while (rb_node) {
+		node = rb_entry(rb_node, struct callchain_node, rb_node);
+		chain_hits += node->hit;
+		rb_node = rb_next(rb_node);
+	}
+
+	return chain_hits;
+}
+
+u64 callchain_total_hits(struct hists *hists)
+{
+	struct rb_node *next = rb_first_cached(&hists->entries);
+	u64 chain_hits = 0;
+
+	while (next) {
+		struct hist_entry *he = rb_entry(next, struct hist_entry,
+						 rb_node);
+
+		chain_hits += count_callchain_hits(he);
+		next = rb_next(&he->rb_node);
+	}
+
+	return chain_hits;
+}
+
+s64 callchain_avg_cycles(struct callchain_node *cnode)
+{
+	struct callchain_list *chain;
+	s64 cycles = 0;
+
+	list_for_each_entry(chain, &cnode->val, list) {
+		if (chain->srcline && chain->branch_count)
+			cycles += chain->cycles_count / chain->branch_count;
+	}
+
+	return cycles;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
new file mode 100644
index 000000000..d95615dae
--- /dev/null
+++ b/tools/perf/util/callchain.h
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CALLCHAIN_H
+#define __PERF_CALLCHAIN_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "map_symbol.h"
+#include "branch.h"
+
+struct addr_location;
+struct evsel;
+struct ip_callchain;
+struct map;
+struct perf_sample;
+struct thread;
+struct hists;
+
+#define HELP_PAD "\t\t\t\t"
+
+#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
+
+# define RECORD_MODE_HELP  HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
+
+#define RECORD_SIZE_HELP						\
+	HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
+	HELP_PAD "\t\tdefault: 8192 (bytes)\n"
+
+#define CALLCHAIN_RECORD_HELP  CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
+
+#define CALLCHAIN_REPORT_HELP						\
+	HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
+	HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
+	HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
+	HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
+	HELP_PAD "sort_key:\tcall graph sort key (function|address)\n"	\
+	HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
+	HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
+
+enum perf_call_graph_mode {
+	CALLCHAIN_NONE,
+	CALLCHAIN_FP,
+	CALLCHAIN_DWARF,
+	CALLCHAIN_LBR,
+	CALLCHAIN_MAX
+};
+
+enum chain_mode {
+	CHAIN_NONE,
+	CHAIN_FLAT,
+	CHAIN_GRAPH_ABS,
+	CHAIN_GRAPH_REL,
+	CHAIN_FOLDED,
+};
+
+enum chain_order {
+	ORDER_CALLER,
+	ORDER_CALLEE
+};
+
+struct callchain_node {
+	struct callchain_node	*parent;
+	struct list_head	val;
+	struct list_head	parent_val;
+	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
+	struct rb_node		rb_node;    /* to sort nodes in an output tree */
+	struct rb_root		rb_root_in; /* input tree of children */
+	struct rb_root		rb_root;    /* sorted output tree of children */
+	unsigned int		val_nr;
+	unsigned int		count;
+	unsigned int		children_count;
+	u64			hit;
+	u64			children_hit;
+};
+
+struct callchain_root {
+	u64			max_depth;
+	struct callchain_node	node;
+};
+
+struct callchain_param;
+
+typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
+				 u64, struct callchain_param *);
+
+enum chain_key {
+	CCKEY_FUNCTION,
+	CCKEY_ADDRESS,
+	CCKEY_SRCLINE
+};
+
+enum chain_value {
+	CCVAL_PERCENT,
+	CCVAL_PERIOD,
+	CCVAL_COUNT,
+};
+
+extern bool dwarf_callchain_users;
+
+struct callchain_param {
+	bool			enabled;
+	enum perf_call_graph_mode record_mode;
+	u32			dump_size;
+	enum chain_mode 	mode;
+	u16			max_stack;
+	u32			print_limit;
+	double			min_percent;
+	sort_chain_func_t	sort;
+	enum chain_order	order;
+	bool			order_set;
+	enum chain_key		key;
+	bool			branch_callstack;
+	enum chain_value	value;
+};
+
+extern struct callchain_param callchain_param;
+extern struct callchain_param callchain_param_default;
+
+struct callchain_list {
+	u64			ip;
+	struct map_symbol	ms;
+	struct /* for TUI */ {
+		bool		unfolded;
+		bool		has_children;
+	};
+	u64			branch_count;
+	u64			from_count;
+	u64			predicted_count;
+	u64			abort_count;
+	u64			cycles_count;
+	u64			iter_count;
+	u64			iter_cycles;
+	struct branch_type_stat brtype_stat;
+	const char		*srcline;
+	struct list_head	list;
+};
+
+/*
+ * A callchain cursor is a single linked list that
+ * let one feed a callchain progressively.
+ * It keeps persistent allocated entries to minimize
+ * allocations.
+ */
+struct callchain_cursor_node {
+	u64				ip;
+	struct map_symbol		ms;
+	const char			*srcline;
+	/* Indicate valid cursor node for LBR stitch */
+	bool				valid;
+
+	bool				branch;
+	struct branch_flags		branch_flags;
+	u64				branch_from;
+	int				nr_loop_iter;
+	u64				iter_cycles;
+	struct callchain_cursor_node	*next;
+};
+
+struct stitch_list {
+	struct list_head		node;
+	struct callchain_cursor_node	cursor;
+};
+
+struct callchain_cursor {
+	u64				nr;
+	struct callchain_cursor_node	*first;
+	struct callchain_cursor_node	**last;
+	u64				pos;
+	struct callchain_cursor_node	*curr;
+};
+
+extern __thread struct callchain_cursor callchain_cursor;
+
+static inline void callchain_init(struct callchain_root *root)
+{
+	INIT_LIST_HEAD(&root->node.val);
+	INIT_LIST_HEAD(&root->node.parent_val);
+
+	root->node.parent = NULL;
+	root->node.hit = 0;
+	root->node.children_hit = 0;
+	root->node.rb_root_in = RB_ROOT;
+	root->max_depth = 0;
+}
+
+static inline u64 callchain_cumul_hits(struct callchain_node *node)
+{
+	return node->hit + node->children_hit;
+}
+
+static inline unsigned callchain_cumul_counts(struct callchain_node *node)
+{
+	return node->count + node->children_count;
+}
+
+int callchain_register_param(struct callchain_param *param);
+int callchain_append(struct callchain_root *root,
+		     struct callchain_cursor *cursor,
+		     u64 period);
+
+int callchain_merge(struct callchain_cursor *cursor,
+		    struct callchain_root *dst, struct callchain_root *src);
+
+void callchain_cursor_reset(struct callchain_cursor *cursor);
+
+int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
+			    struct map_symbol *ms,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+			    const char *srcline);
+
+/* Close a cursor writing session. Initialize for the reader */
+static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
+{
+	cursor->curr = cursor->first;
+	cursor->pos = 0;
+}
+
+/* Cursor reading iteration helpers */
+static inline struct callchain_cursor_node *
+callchain_cursor_current(struct callchain_cursor *cursor)
+{
+	if (cursor->pos == cursor->nr)
+		return NULL;
+
+	return cursor->curr;
+}
+
+static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
+{
+	cursor->curr = cursor->curr->next;
+	cursor->pos++;
+}
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+			   struct callchain_cursor *src);
+
+struct option;
+struct hist_entry;
+
+int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+int record_callchain_opt(const struct option *opt, const char *arg, int unset);
+
+struct record_opts;
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset);
+
+int sample__resolve_callchain(struct perf_sample *sample,
+			      struct callchain_cursor *cursor, struct symbol **parent,
+			      struct evsel *evsel, struct addr_location *al,
+			      int max_stack);
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved);
+
+extern const char record_callchain_help[];
+int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
+int parse_callchain_report_opt(const char *arg);
+int parse_callchain_top_opt(const char *arg);
+int perf_callchain_config(const char *var, const char *value);
+
+static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
+					     struct callchain_cursor *src)
+{
+	*dest = *src;
+
+	dest->first = src->curr;
+	dest->nr -= src->pos;
+}
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
+#else
+static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
+			struct ip_callchain *chain __maybe_unused)
+{
+	return -1;
+}
+#endif
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+			       char *bf, size_t bfsize, bool show_dso);
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+				      char *bf, size_t bfsize, u64 total);
+int callchain_node__fprintf_value(struct callchain_node *node,
+				  FILE *fp, u64 total);
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize);
+
+void free_callchain(struct callchain_root *root);
+void decay_callchain(struct callchain_root *root);
+int callchain_node__make_parent_list(struct callchain_node *node);
+
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count);
+
+void callchain_param_setup(u64 sample_type, const char *arch);
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+			     struct callchain_node *pair_cnode);
+
+u64 callchain_total_hits(struct hists *hists);
+
+s64 callchain_avg_cycles(struct callchain_node *cnode);
+
+#endif	/* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c
new file mode 100644
index 000000000..c3ba841bb
--- /dev/null
+++ b/tools/perf/util/cap.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Capability utilities
+ */
+
+#ifdef HAVE_LIBCAP_SUPPORT
+
+#include "cap.h"
+#include <stdbool.h>
+#include <sys/capability.h>
+
+bool perf_cap__capable(cap_value_t cap)
+{
+	cap_flag_value_t val;
+	cap_t caps = cap_get_proc();
+
+	if (!caps)
+		return false;
+
+	if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val) != 0)
+		val = CAP_CLEAR;
+
+	if (cap_free(caps) != 0)
+		return false;
+
+	return val == CAP_SET;
+}
+
+#endif  /* HAVE_LIBCAP_SUPPORT */
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
new file mode 100644
index 000000000..ae52878c0
--- /dev/null
+++ b/tools/perf/util/cap.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CAP_H
+#define __PERF_CAP_H
+
+#include <stdbool.h>
+#include <linux/capability.h>
+#include <linux/compiler.h>
+
+#ifdef HAVE_LIBCAP_SUPPORT
+
+#include <sys/capability.h>
+
+bool perf_cap__capable(cap_value_t cap);
+
+#else
+
+#include <unistd.h>
+#include <sys/types.h>
+
+static inline bool perf_cap__capable(int cap __maybe_unused)
+{
+	return geteuid() == 0;
+}
+
+#endif /* HAVE_LIBCAP_SUPPORT */
+
+/* For older systems */
+#ifndef CAP_SYSLOG
+#define CAP_SYSLOG	34
+#endif
+
+#ifndef CAP_PERFMON
+#define CAP_PERFMON	38
+#endif
+
+#endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
new file mode 100644
index 000000000..cd978c240
--- /dev/null
+++ b/tools/perf/util/cgroup.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <subcmd/parse-options.h>
+#include "evsel.h"
+#include "cgroup.h"
+#include "evlist.h"
+#include "rblist.h"
+#include "metricgroup.h"
+#include "stat.h"
+#include <linux/zalloc.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <api/fs/fs.h>
+#include <ftw.h>
+#include <regex.h>
+
+int nr_cgroups;
+bool cgrp_event_expanded;
+
+/* used to match cgroup name with patterns */
+struct cgroup_name {
+	struct list_head list;
+	bool used;
+	char name[];
+};
+static LIST_HEAD(cgroup_list);
+
+static int open_cgroup(const char *name)
+{
+	char path[PATH_MAX + 1];
+	char mnt[PATH_MAX + 1];
+	int fd;
+
+
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		fprintf(stderr, "no access to cgroup %s\n", path);
+
+	return fd;
+}
+
+#ifdef HAVE_FILE_HANDLE
+int read_cgroup_id(struct cgroup *cgrp)
+{
+	char path[PATH_MAX + 1];
+	char mnt[PATH_MAX + 1];
+	struct {
+		struct file_handle fh;
+		uint64_t cgroup_id;
+	} handle;
+	int mount_id;
+
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/%s", mnt, cgrp->name);
+
+	handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+	if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0)
+		return -1;
+
+	cgrp->id = handle.cgroup_id;
+	return 0;
+}
+#endif  /* HAVE_FILE_HANDLE */
+
+#ifndef CGROUP2_SUPER_MAGIC
+#define CGROUP2_SUPER_MAGIC  0x63677270
+#endif
+
+int cgroup_is_v2(const char *subsys)
+{
+	char mnt[PATH_MAX + 1];
+	struct statfs stbuf;
+
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, subsys))
+		return -1;
+
+	if (statfs(mnt, &stbuf) < 0)
+		return -1;
+
+	return (stbuf.f_type == CGROUP2_SUPER_MAGIC);
+}
+
+static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str)
+{
+	struct evsel *counter;
+	/*
+	 * check if cgrp is already defined, if so we reuse it
+	 */
+	evlist__for_each_entry(evlist, counter) {
+		if (!counter->cgrp)
+			continue;
+		if (!strcmp(counter->cgrp->name, str))
+			return cgroup__get(counter->cgrp);
+	}
+
+	return NULL;
+}
+
+static struct cgroup *cgroup__new(const char *name, bool do_open)
+{
+	struct cgroup *cgroup = zalloc(sizeof(*cgroup));
+
+	if (cgroup != NULL) {
+		refcount_set(&cgroup->refcnt, 1);
+
+		cgroup->name = strdup(name);
+		if (!cgroup->name)
+			goto out_err;
+
+		if (do_open) {
+			cgroup->fd = open_cgroup(name);
+			if (cgroup->fd == -1)
+				goto out_free_name;
+		} else {
+			cgroup->fd = -1;
+		}
+	}
+
+	return cgroup;
+
+out_free_name:
+	zfree(&cgroup->name);
+out_err:
+	free(cgroup);
+	return NULL;
+}
+
+struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name)
+{
+	struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
+
+	return cgroup ?: cgroup__new(name, true);
+}
+
+static int add_cgroup(struct evlist *evlist, const char *str)
+{
+	struct evsel *counter;
+	struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
+	int n;
+
+	if (!cgrp)
+		return -1;
+	/*
+	 * find corresponding event
+	 * if add cgroup N, then need to find event N
+	 */
+	n = 0;
+	evlist__for_each_entry(evlist, counter) {
+		if (n == nr_cgroups)
+			goto found;
+		n++;
+	}
+
+	cgroup__put(cgrp);
+	return -1;
+found:
+	counter->cgrp = cgrp;
+	return 0;
+}
+
+static void cgroup__delete(struct cgroup *cgroup)
+{
+	if (cgroup->fd >= 0)
+		close(cgroup->fd);
+	zfree(&cgroup->name);
+	free(cgroup);
+}
+
+void cgroup__put(struct cgroup *cgrp)
+{
+	if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
+		cgroup__delete(cgrp);
+	}
+}
+
+struct cgroup *cgroup__get(struct cgroup *cgroup)
+{
+       if (cgroup)
+		refcount_inc(&cgroup->refcnt);
+       return cgroup;
+}
+
+static void evsel__set_default_cgroup(struct evsel *evsel, struct cgroup *cgroup)
+{
+	if (evsel->cgrp == NULL)
+		evsel->cgrp = cgroup__get(cgroup);
+}
+
+void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__set_default_cgroup(evsel, cgroup);
+}
+
+/* helper function for ftw() in match_cgroups and list_cgroups */
+static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused,
+			   int typeflag, struct FTW *ftwbuf __maybe_unused)
+{
+	struct cgroup_name *cn;
+
+	if (typeflag != FTW_D)
+		return 0;
+
+	cn = malloc(sizeof(*cn) + strlen(fpath) + 1);
+	if (cn == NULL)
+		return -1;
+
+	cn->used = false;
+	strcpy(cn->name, fpath);
+
+	list_add_tail(&cn->list, &cgroup_list);
+	return 0;
+}
+
+static int check_and_add_cgroup_name(const char *fpath)
+{
+	struct cgroup_name *cn;
+
+	list_for_each_entry(cn, &cgroup_list, list) {
+		if (!strcmp(cn->name, fpath))
+			return 0;
+	}
+
+	/* pretend if it's added by ftw() */
+	return add_cgroup_name(fpath, NULL, FTW_D, NULL);
+}
+
+static void release_cgroup_list(void)
+{
+	struct cgroup_name *cn;
+
+	while (!list_empty(&cgroup_list)) {
+		cn = list_first_entry(&cgroup_list, struct cgroup_name, list);
+		list_del(&cn->list);
+		free(cn);
+	}
+}
+
+/* collect given cgroups only */
+static int list_cgroups(const char *str)
+{
+	const char *p, *e, *eos = str + strlen(str);
+	struct cgroup_name *cn;
+	char *s;
+
+	/* use given name as is when no regex is given */
+	for (;;) {
+		p = strchr(str, ',');
+		e = p ? p : eos;
+
+		if (e - str) {
+			int ret;
+
+			s = strndup(str, e - str);
+			if (!s)
+				return -1;
+
+			ret = check_and_add_cgroup_name(s);
+			free(s);
+			if (ret < 0)
+				return -1;
+		} else {
+			if (check_and_add_cgroup_name("/") < 0)
+				return -1;
+		}
+
+		if (!p)
+			break;
+		str = p+1;
+	}
+
+	/* these groups will be used */
+	list_for_each_entry(cn, &cgroup_list, list)
+		cn->used = true;
+
+	return 0;
+}
+
+/* collect all cgroups first and then match with the pattern */
+static int match_cgroups(const char *str)
+{
+	char mnt[PATH_MAX];
+	const char *p, *e, *eos = str + strlen(str);
+	struct cgroup_name *cn;
+	regex_t reg;
+	int prefix_len;
+	char *s;
+
+	if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event"))
+		return -1;
+
+	/* cgroup_name will have a full path, skip the root directory */
+	prefix_len = strlen(mnt);
+
+	/* collect all cgroups in the cgroup_list */
+	if (nftw(mnt, add_cgroup_name, 20, 0) < 0)
+		return -1;
+
+	for (;;) {
+		p = strchr(str, ',');
+		e = p ? p : eos;
+
+		/* allow empty cgroups, i.e., skip */
+		if (e - str) {
+			/* termination added */
+			s = strndup(str, e - str);
+			if (!s)
+				return -1;
+			if (regcomp(&reg, s, REG_NOSUB)) {
+				free(s);
+				return -1;
+			}
+
+			/* check cgroup name with the pattern */
+			list_for_each_entry(cn, &cgroup_list, list) {
+				char *name = cn->name + prefix_len;
+
+				if (name[0] == '/' && name[1])
+					name++;
+				if (!regexec(&reg, name, 0, NULL, 0))
+					cn->used = true;
+			}
+			regfree(&reg);
+			free(s);
+		} else {
+			/* first entry to root cgroup */
+			cn = list_first_entry(&cgroup_list, struct cgroup_name,
+					      list);
+			cn->used = true;
+		}
+
+		if (!p)
+			break;
+		str = p+1;
+	}
+	return prefix_len;
+}
+
+int parse_cgroups(const struct option *opt, const char *str,
+		  int unset __maybe_unused)
+{
+	struct evlist *evlist = *(struct evlist **)opt->value;
+	struct evsel *counter;
+	struct cgroup *cgrp = NULL;
+	const char *p, *e, *eos = str + strlen(str);
+	char *s;
+	int ret, i;
+
+	if (list_empty(&evlist->core.entries)) {
+		fprintf(stderr, "must define events before cgroups\n");
+		return -1;
+	}
+
+	for (;;) {
+		p = strchr(str, ',');
+		e = p ? p : eos;
+
+		/* allow empty cgroups, i.e., skip */
+		if (e - str) {
+			/* termination added */
+			s = strndup(str, e - str);
+			if (!s)
+				return -1;
+			ret = add_cgroup(evlist, s);
+			free(s);
+			if (ret)
+				return -1;
+		}
+		/* nr_cgroups is increased een for empty cgroups */
+		nr_cgroups++;
+		if (!p)
+			break;
+		str = p+1;
+	}
+	/* for the case one cgroup combine to multiple events */
+	i = 0;
+	if (nr_cgroups == 1) {
+		evlist__for_each_entry(evlist, counter) {
+			if (i == 0)
+				cgrp = counter->cgrp;
+			else {
+				counter->cgrp = cgrp;
+				refcount_inc(&cgrp->refcnt);
+			}
+			i++;
+		}
+	}
+	return 0;
+}
+
+static bool has_pattern_string(const char *str)
+{
+	return !!strpbrk(str, "{}[]()|*+?^$");
+}
+
+int evlist__expand_cgroup(struct evlist *evlist, const char *str,
+			  struct rblist *metric_events, bool open_cgroup)
+{
+	struct evlist *orig_list, *tmp_list;
+	struct evsel *pos, *evsel, *leader;
+	struct rblist orig_metric_events;
+	struct cgroup *cgrp = NULL;
+	struct cgroup_name *cn;
+	int ret = -1;
+	int prefix_len;
+
+	if (evlist->core.nr_entries == 0) {
+		fprintf(stderr, "must define events before cgroups\n");
+		return -EINVAL;
+	}
+
+	orig_list = evlist__new();
+	tmp_list = evlist__new();
+	if (orig_list == NULL || tmp_list == NULL) {
+		fprintf(stderr, "memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	/* save original events and init evlist */
+	evlist__splice_list_tail(orig_list, &evlist->core.entries);
+	evlist->core.nr_entries = 0;
+
+	if (metric_events) {
+		orig_metric_events = *metric_events;
+		rblist__init(metric_events);
+	} else {
+		rblist__init(&orig_metric_events);
+	}
+
+	if (has_pattern_string(str))
+		prefix_len = match_cgroups(str);
+	else
+		prefix_len = list_cgroups(str);
+
+	if (prefix_len < 0)
+		goto out_err;
+
+	list_for_each_entry(cn, &cgroup_list, list) {
+		char *name;
+
+		if (!cn->used)
+			continue;
+
+		/* cgroup_name might have a full path, skip the prefix */
+		name = cn->name + prefix_len;
+		if (name[0] == '/' && name[1])
+			name++;
+		cgrp = cgroup__new(name, open_cgroup);
+		if (cgrp == NULL)
+			goto out_err;
+
+		leader = NULL;
+		evlist__for_each_entry(orig_list, pos) {
+			evsel = evsel__clone(pos);
+			if (evsel == NULL)
+				goto out_err;
+
+			cgroup__put(evsel->cgrp);
+			evsel->cgrp = cgroup__get(cgrp);
+
+			if (evsel__is_group_leader(pos))
+				leader = evsel;
+			evsel__set_leader(evsel, leader);
+
+			evlist__add(tmp_list, evsel);
+		}
+		/* cgroup__new() has a refcount, release it here */
+		cgroup__put(cgrp);
+		nr_cgroups++;
+
+		if (metric_events) {
+			perf_stat__collect_metric_expr(tmp_list);
+			if (metricgroup__copy_metric_events(tmp_list, cgrp,
+							    metric_events,
+							    &orig_metric_events) < 0)
+				goto out_err;
+		}
+
+		evlist__splice_list_tail(evlist, &tmp_list->core.entries);
+		tmp_list->core.nr_entries = 0;
+	}
+
+	if (list_empty(&evlist->core.entries)) {
+		fprintf(stderr, "no cgroup matched: %s\n", str);
+		goto out_err;
+	}
+
+	ret = 0;
+	cgrp_event_expanded = true;
+
+out_err:
+	evlist__delete(orig_list);
+	evlist__delete(tmp_list);
+	rblist__exit(&orig_metric_events);
+	release_cgroup_list();
+
+	return ret;
+}
+
+static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id,
+					bool create, const char *path)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct cgroup *cgrp;
+
+	while (*p != NULL) {
+		parent = *p;
+		cgrp = rb_entry(parent, struct cgroup, node);
+
+		if (cgrp->id == id)
+			return cgrp;
+
+		if (cgrp->id < id)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	if (!create)
+		return NULL;
+
+	cgrp = malloc(sizeof(*cgrp));
+	if (cgrp == NULL)
+		return NULL;
+
+	cgrp->name = strdup(path);
+	if (cgrp->name == NULL) {
+		free(cgrp);
+		return NULL;
+	}
+
+	cgrp->fd = -1;
+	cgrp->id = id;
+	refcount_set(&cgrp->refcnt, 1);
+
+	rb_link_node(&cgrp->node, parent, p);
+	rb_insert_color(&cgrp->node, root);
+
+	return cgrp;
+}
+
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+			       const char *path)
+{
+	struct cgroup *cgrp;
+
+	down_write(&env->cgroups.lock);
+	cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path);
+	up_write(&env->cgroups.lock);
+	return cgrp;
+}
+
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id)
+{
+	struct cgroup *cgrp;
+
+	down_read(&env->cgroups.lock);
+	cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL);
+	up_read(&env->cgroups.lock);
+	return cgrp;
+}
+
+void perf_env__purge_cgroups(struct perf_env *env)
+{
+	struct rb_node *node;
+	struct cgroup *cgrp;
+
+	down_write(&env->cgroups.lock);
+	while (!RB_EMPTY_ROOT(&env->cgroups.tree)) {
+		node = rb_first(&env->cgroups.tree);
+		cgrp = rb_entry(node, struct cgroup, node);
+
+		rb_erase(node, &env->cgroups.tree);
+		cgroup__put(cgrp);
+	}
+	up_write(&env->cgroups.lock);
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
new file mode 100644
index 000000000..12256b786
--- /dev/null
+++ b/tools/perf/util/cgroup.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CGROUP_H__
+#define __CGROUP_H__
+
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include "util/env.h"
+
+struct option;
+
+struct cgroup {
+	struct rb_node		node;
+	u64			id;
+	char			*name;
+	int			fd;
+	refcount_t		refcnt;
+};
+
+extern int nr_cgroups; /* number of explicit cgroups defined */
+extern bool cgrp_event_expanded;
+
+struct cgroup *cgroup__get(struct cgroup *cgroup);
+void cgroup__put(struct cgroup *cgroup);
+
+struct evlist;
+struct rblist;
+
+struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name);
+int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups,
+			  struct rblist *metric_events, bool open_cgroup);
+
+void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
+
+int parse_cgroups(const struct option *opt, const char *str, int unset);
+
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+			       const char *path);
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id);
+
+void perf_env__purge_cgroups(struct perf_env *env);
+
+#ifdef HAVE_FILE_HANDLE
+int read_cgroup_id(struct cgroup *cgrp);
+#else
+static inline int read_cgroup_id(struct cgroup *cgrp __maybe_unused)
+{
+	return -1;
+}
+#endif  /* HAVE_FILE_HANDLE */
+
+int cgroup_is_v2(const char *subsys);
+
+#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c
new file mode 100644
index 000000000..74365a5d9
--- /dev/null
+++ b/tools/perf/util/clockid.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <time.h>
+#include <strings.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "clockid.h"
+#include "record.h"
+
+struct clockid_map {
+	const char *name;
+	int clockid;
+};
+
+#define CLOCKID_MAP(n, c)	\
+	{ .name = n, .clockid = (c), }
+
+#define CLOCKID_END	{ .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+	/* available for all events, NMI safe */
+	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+	/* available for some events */
+	CLOCKID_MAP("realtime", CLOCK_REALTIME),
+	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+	CLOCKID_MAP("tai", CLOCK_TAI),
+
+	/* available for the lazy */
+	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+	CLOCKID_MAP("real", CLOCK_REALTIME),
+	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+	CLOCKID_END,
+};
+
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+	struct timespec res;
+
+	*res_ns = 0;
+	if (!clock_getres(clk_id, &res))
+		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+	else
+		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+	return 0;
+}
+
+int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+	struct record_opts *opts = (struct record_opts *)opt->value;
+	const struct clockid_map *cm;
+	const char *ostr = str;
+
+	if (unset) {
+		opts->use_clockid = 0;
+		return 0;
+	}
+
+	/* no arg passed */
+	if (!str)
+		return 0;
+
+	/* no setting it twice */
+	if (opts->use_clockid)
+		return -1;
+
+	opts->use_clockid = true;
+
+	/* if its a number, we're done */
+	if (sscanf(str, "%d", &opts->clockid) == 1)
+		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
+
+	/* allow a "CLOCK_" prefix to the name */
+	if (!strncasecmp(str, "CLOCK_", 6))
+		str += 6;
+
+	for (cm = clockids; cm->name; cm++) {
+		if (!strcasecmp(str, cm->name)) {
+			opts->clockid = cm->clockid;
+			return get_clockid_res(opts->clockid,
+					       &opts->clockid_res_ns);
+		}
+	}
+
+	opts->use_clockid = false;
+	ui__warning("unknown clockid %s, check man page\n", ostr);
+	return -1;
+}
+
+const char *clockid_name(clockid_t clk_id)
+{
+	const struct clockid_map *cm;
+
+	for (cm = clockids; cm->name; cm++) {
+		if (cm->clockid == clk_id)
+			return cm->name;
+	}
+	return "(not found)";
+}
diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h
new file mode 100644
index 000000000..9b49b4711
--- /dev/null
+++ b/tools/perf/util/clockid.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PERF_CLOCKID_H
+#define __PERF_CLOCKID_H
+
+struct option;
+int parse_clockid(const struct option *opt, const char *str, int unset);
+
+const char *clockid_name(clockid_t clk_id);
+
+#endif
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
new file mode 100644
index 000000000..fa8248aad
--- /dev/null
+++ b/tools/perf/util/cloexec.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <sched.h>
+#include "util.h" // for sched_getcpu()
+#include "../perf-sys.h"
+#include "cloexec.h"
+#include "event.h"
+#include "asm/bug.h"
+#include "debug.h"
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/string.h>
+
+static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
+
+int __weak sched_getcpu(void)
+{
+#ifdef __NR_getcpu
+	unsigned cpu;
+	int err = syscall(__NR_getcpu, &cpu, NULL, NULL);
+	if (!err)
+		return cpu;
+#else
+	errno = ENOSYS;
+#endif
+	return -1;
+}
+
+static int perf_flag_probe(void)
+{
+	/* use 'safest' configuration as used in evsel__fallback() */
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+		.exclude_kernel = 1,
+	};
+	int fd;
+	int err;
+	int cpu;
+	pid_t pid = -1;
+	char sbuf[STRERR_BUFSIZE];
+
+	cpu = sched_getcpu();
+	if (cpu < 0)
+		cpu = 0;
+
+	/*
+	 * Using -1 for the pid is a workaround to avoid gratuitous jump label
+	 * changes.
+	 */
+	while (1) {
+		/* check cloexec flag */
+		fd = sys_perf_event_open(&attr, pid, cpu, -1,
+					 PERF_FLAG_FD_CLOEXEC);
+		if (fd < 0 && pid == -1 && errno == EACCES) {
+			pid = 0;
+			continue;
+		}
+		break;
+	}
+	err = errno;
+
+	if (fd >= 0) {
+		close(fd);
+		return 1;
+	}
+
+	WARN_ONCE(err != EINVAL && err != EBUSY && err != EACCES,
+		  "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
+		  err, str_error_r(err, sbuf, sizeof(sbuf)));
+
+	/* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
+	while (1) {
+		fd = sys_perf_event_open(&attr, pid, cpu, -1, 0);
+		if (fd < 0 && pid == -1 && errno == EACCES) {
+			pid = 0;
+			continue;
+		}
+		break;
+	}
+	err = errno;
+
+	if (fd >= 0)
+		close(fd);
+
+	if (WARN_ONCE(fd < 0 && err != EBUSY && err != EACCES,
+		      "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
+		      err, str_error_r(err, sbuf, sizeof(sbuf))))
+		return -1;
+
+	return 0;
+}
+
+unsigned long perf_event_open_cloexec_flag(void)
+{
+	static bool probed;
+
+	if (!probed) {
+		if (perf_flag_probe() <= 0)
+			flag = 0;
+		probed = true;
+	}
+
+	return flag;
+}
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
new file mode 100644
index 000000000..78216b101
--- /dev/null
+++ b/tools/perf/util/cloexec.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CLOEXEC_H
+#define __PERF_CLOEXEC_H
+
+unsigned long perf_event_open_cloexec_flag(void);
+
+#endif /* __PERF_CLOEXEC_H */
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
new file mode 100644
index 000000000..bffbdd216
--- /dev/null
+++ b/tools/perf/util/color.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <subcmd/pager.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "color.h"
+#include <math.h>
+#include <unistd.h>
+
+int perf_use_color_default = -1;
+
+static int __color_vsnprintf(char *bf, size_t size, const char *color,
+			     const char *fmt, va_list args, const char *trail)
+{
+	int r = 0;
+
+	/*
+	 * Auto-detect:
+	 */
+	if (perf_use_color_default < 0) {
+		if (isatty(1) || pager_in_use())
+			perf_use_color_default = 1;
+		else
+			perf_use_color_default = 0;
+	}
+
+	if (perf_use_color_default && *color)
+		r += scnprintf(bf, size, "%s", color);
+	r += vscnprintf(bf + r, size - r, fmt, args);
+	if (perf_use_color_default && *color)
+		r += scnprintf(bf + r, size - r, "%s", PERF_COLOR_RESET);
+	if (trail)
+		r += scnprintf(bf + r, size - r, "%s", trail);
+	return r;
+}
+
+/* Colors are not included in return value */
+static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
+		va_list args)
+{
+	int r = 0;
+
+	/*
+	 * Auto-detect:
+	 */
+	if (perf_use_color_default < 0) {
+		if (isatty(fileno(fp)) || pager_in_use())
+			perf_use_color_default = 1;
+		else
+			perf_use_color_default = 0;
+	}
+
+	if (perf_use_color_default && *color)
+		fprintf(fp, "%s", color);
+	r += vfprintf(fp, fmt, args);
+	if (perf_use_color_default && *color)
+		fprintf(fp, "%s", PERF_COLOR_RESET);
+	return r;
+}
+
+int color_vsnprintf(char *bf, size_t size, const char *color,
+		    const char *fmt, va_list args)
+{
+	return __color_vsnprintf(bf, size, color, fmt, args, NULL);
+}
+
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args)
+{
+	return __color_vfprintf(fp, color, fmt, args);
+}
+
+int color_snprintf(char *bf, size_t size, const char *color,
+		   const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = color_vsnprintf(bf, size, color, fmt, args);
+	va_end(args);
+	return r;
+}
+
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args);
+	va_end(args);
+	return r;
+}
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+		size_t count, const char *buf)
+{
+	if (!*color)
+		return fwrite(buf, count, 1, fp) != 1;
+
+	while (count) {
+		char *p = memchr(buf, '\n', count);
+
+		if (p != buf && (fputs(color, fp) < 0 ||
+				fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 ||
+				fputs(PERF_COLOR_RESET, fp) < 0))
+			return -1;
+		if (!p)
+			return 0;
+		if (fputc('\n', fp) < 0)
+			return -1;
+		count -= p + 1 - buf;
+		buf = p + 1;
+	}
+	return 0;
+}
+
+const char *get_percent_color(double percent)
+{
+	const char *color = PERF_COLOR_NORMAL;
+
+	/*
+	 * We color high-overhead entries in red, mid-overhead
+	 * entries in green - and keep the low overhead places
+	 * normal:
+	 */
+	if (fabs(percent) >= MIN_RED)
+		color = PERF_COLOR_RED;
+	else {
+		if (fabs(percent) > MIN_GREEN)
+			color = PERF_COLOR_GREEN;
+	}
+	return color;
+}
+
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent)
+{
+	int r;
+	const char *color;
+
+	color = get_percent_color(percent);
+	r = color_fprintf(fp, color, fmt, percent);
+
+	return r;
+}
+
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value)
+{
+	const char *color = get_percent_color(value);
+	return color_snprintf(bf, size, color, fmt, value);
+}
+
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	double percent;
+
+	va_start(args, fmt);
+	percent = va_arg(args, double);
+	va_end(args);
+	return value_color_snprintf(bf, size, fmt, percent);
+}
+
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	double percent;
+	const char *color;
+
+	va_start(args, fmt);
+	len = va_arg(args, int);
+	percent = va_arg(args, double);
+	va_end(args);
+
+	color = get_percent_color(percent);
+	return color_snprintf(bf, size, color, fmt, len, percent);
+}
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
new file mode 100644
index 000000000..01f7bed21
--- /dev/null
+++ b/tools/perf/util/color.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COLOR_H
+#define __PERF_COLOR_H
+
+#include <stdio.h>
+#include <stdarg.h>
+
+/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
+#define COLOR_MAXLEN 24
+
+#define PERF_COLOR_NORMAL	""
+#define PERF_COLOR_RESET	"\033[m"
+#define PERF_COLOR_BOLD		"\033[1m"
+#define PERF_COLOR_RED		"\033[31m"
+#define PERF_COLOR_GREEN	"\033[32m"
+#define PERF_COLOR_YELLOW	"\033[33m"
+#define PERF_COLOR_BLUE		"\033[34m"
+#define PERF_COLOR_MAGENTA	"\033[35m"
+#define PERF_COLOR_CYAN		"\033[36m"
+#define PERF_COLOR_BG_RED	"\033[41m"
+
+#define MIN_GREEN	0.5
+#define MIN_RED		5.0
+
+/*
+ * This variable stores the value of color.ui
+ */
+extern int perf_use_color_default;
+
+
+/*
+ * Use this instead of perf_default_config if you need the value of color.ui.
+ */
+int perf_color_default_config(const char *var, const char *value, void *cb);
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
+int color_vsnprintf(char *bf, size_t size, const char *color,
+		    const char *fmt, va_list args);
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
+int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
+const char *get_percent_color(double percent);
+
+#endif /* __PERF_COLOR_H */
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c
new file mode 100644
index 000000000..dc09ba7cb
--- /dev/null
+++ b/tools/perf/util/color_config.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <subcmd/pager.h>
+#include <string.h>
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include "color.h"
+#include <math.h>
+#include <unistd.h>
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+	if (value) {
+		if (!strcasecmp(value, "never"))
+			return 0;
+		if (!strcasecmp(value, "always"))
+			return 1;
+		if (!strcasecmp(value, "auto"))
+			goto auto_color;
+	}
+
+	/* Missing or explicit false to turn off colorization */
+	if (!perf_config_bool(var, value))
+		return 0;
+
+	/* any normal truth value defaults to 'auto' */
+ auto_color:
+	if (stdout_is_tty < 0)
+		stdout_is_tty = isatty(1);
+	if (stdout_is_tty || pager_in_use()) {
+		char *term = getenv("TERM");
+		if (term && strcmp(term, "dumb"))
+			return 1;
+	}
+	return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value,
+			      void *cb __maybe_unused)
+{
+	if (!strcmp(var, "color.ui")) {
+		perf_use_color_default = perf_config_colorbool(var, value, -1);
+		return 0;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
new file mode 100644
index 000000000..afb8d4fd2
--- /dev/null
+++ b/tools/perf/util/comm.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "comm.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <linux/zalloc.h>
+#include "rwsem.h"
+
+struct comm_str {
+	char *str;
+	struct rb_node rb_node;
+	refcount_t refcnt;
+};
+
+/* Should perhaps be moved to struct machine */
+static struct rb_root comm_str_root;
+static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
+
+static struct comm_str *comm_str__get(struct comm_str *cs)
+{
+	if (cs && refcount_inc_not_zero(&cs->refcnt))
+		return cs;
+
+	return NULL;
+}
+
+static void comm_str__put(struct comm_str *cs)
+{
+	if (cs && refcount_dec_and_test(&cs->refcnt)) {
+		down_write(&comm_str_lock);
+		rb_erase(&cs->rb_node, &comm_str_root);
+		up_write(&comm_str_lock);
+		zfree(&cs->str);
+		free(cs);
+	}
+}
+
+static struct comm_str *comm_str__alloc(const char *str)
+{
+	struct comm_str *cs;
+
+	cs = zalloc(sizeof(*cs));
+	if (!cs)
+		return NULL;
+
+	cs->str = strdup(str);
+	if (!cs->str) {
+		free(cs);
+		return NULL;
+	}
+
+	refcount_set(&cs->refcnt, 1);
+
+	return cs;
+}
+
+static
+struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct comm_str *iter, *new;
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct comm_str, rb_node);
+
+		/*
+		 * If we race with comm_str__put, iter->refcnt is 0
+		 * and it will be removed within comm_str__put call
+		 * shortly, ignore it in this search.
+		 */
+		cmp = strcmp(str, iter->str);
+		if (!cmp && comm_str__get(iter))
+			return iter;
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	new = comm_str__alloc(str);
+	if (!new)
+		return NULL;
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+
+	return new;
+}
+
+static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+{
+	struct comm_str *cs;
+
+	down_write(&comm_str_lock);
+	cs = __comm_str__findnew(str, root);
+	up_write(&comm_str_lock);
+
+	return cs;
+}
+
+struct comm *comm__new(const char *str, u64 timestamp, bool exec)
+{
+	struct comm *comm = zalloc(sizeof(*comm));
+
+	if (!comm)
+		return NULL;
+
+	comm->start = timestamp;
+	comm->exec = exec;
+
+	comm->comm_str = comm_str__findnew(str, &comm_str_root);
+	if (!comm->comm_str) {
+		free(comm);
+		return NULL;
+	}
+
+	return comm;
+}
+
+int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec)
+{
+	struct comm_str *new, *old = comm->comm_str;
+
+	new = comm_str__findnew(str, &comm_str_root);
+	if (!new)
+		return -ENOMEM;
+
+	comm_str__put(old);
+	comm->comm_str = new;
+	comm->start = timestamp;
+	if (exec)
+		comm->exec = true;
+
+	return 0;
+}
+
+void comm__free(struct comm *comm)
+{
+	comm_str__put(comm->comm_str);
+	free(comm);
+}
+
+const char *comm__str(const struct comm *comm)
+{
+	return comm->comm_str->str;
+}
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
new file mode 100644
index 000000000..f35d8fbfa
--- /dev/null
+++ b/tools/perf/util/comm.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COMM_H
+#define __PERF_COMM_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <stdbool.h>
+
+struct comm_str;
+
+struct comm {
+	struct comm_str *comm_str;
+	u64 start;
+	struct list_head list;
+	bool exec;
+	union { /* Tool specific area */
+		void	*priv;
+		u64	db_id;
+	};
+};
+
+void comm__free(struct comm *comm);
+struct comm *comm__new(const char *str, u64 timestamp, bool exec);
+const char *comm__str(const struct comm *comm);
+int comm__override(struct comm *comm, const char *str, u64 timestamp,
+		   bool exec);
+
+#endif  /* __PERF_COMM_H */
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
new file mode 100644
index 000000000..0cd3369af
--- /dev/null
+++ b/tools/perf/util/compress.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_COMPRESS_H
+#define PERF_COMPRESS_H
+
+#include <stdbool.h>
+#ifdef HAVE_ZSTD_SUPPORT
+#include <zstd.h>
+#endif
+
+#ifdef HAVE_ZLIB_SUPPORT
+int gzip_decompress_to_file(const char *input, int output_fd);
+bool gzip_is_compressed(const char *input);
+#endif
+
+#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_to_file(const char *input, int output_fd);
+bool lzma_is_compressed(const char *input);
+#endif
+
+struct zstd_data {
+#ifdef HAVE_ZSTD_SUPPORT
+	ZSTD_CStream	*cstream;
+	ZSTD_DStream	*dstream;
+#endif
+};
+
+#ifdef HAVE_ZSTD_SUPPORT
+
+int zstd_init(struct zstd_data *data, int level);
+int zstd_fini(struct zstd_data *data);
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
+				       void *src, size_t src_size, size_t max_record_size,
+				       size_t process_header(void *record, size_t increment));
+
+size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
+			      void *dst, size_t dst_size);
+#else /* !HAVE_ZSTD_SUPPORT */
+
+static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
+{
+	return 0;
+}
+
+static inline int zstd_fini(struct zstd_data *data __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
+				       void *dst __maybe_unused, size_t dst_size __maybe_unused,
+				       void *src __maybe_unused, size_t src_size __maybe_unused,
+				       size_t max_record_size __maybe_unused,
+				       size_t process_header(void *record, size_t increment) __maybe_unused)
+{
+	return 0;
+}
+
+static inline size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused,
+					    size_t src_size __maybe_unused, void *dst __maybe_unused,
+					    size_t dst_size __maybe_unused)
+{
+	return 0;
+}
+#endif
+
+#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
new file mode 100644
index 000000000..3f2ae19a1
--- /dev/null
+++ b/tools/perf/util/config.c
@@ -0,0 +1,941 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * config.c
+ *
+ * Helper functions for parsing config items.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include <errno.h>
+#include <sys/param.h>
+#include "cache.h"
+#include "callchain.h"
+#include <subcmd/exec-cmd.h>
+#include "util/event.h"  /* proc_map_timeout */
+#include "util/hist.h"  /* perf_hist_config */
+#include "util/llvm-utils.h"   /* perf_llvm_config */
+#include "util/stat.h"  /* perf_stat__set_big_num */
+#include "util/evsel.h"  /* evsel__hw_names, evsel__use_bpf_counters */
+#include "build-id.h"
+#include "debug.h"
+#include "config.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/ctype.h>
+
+#define MAXNAME (256)
+
+#define DEBUG_CACHE_DIR ".debug"
+
+
+char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+static struct perf_config_set *config_set;
+
+const char *config_exclusive_filename;
+
+static int get_next_char(void)
+{
+	int c;
+	FILE *f;
+
+	c = '\n';
+	if ((f = config_file) != NULL) {
+		c = fgetc(f);
+		if (c == '\r') {
+			/* DOS like systems */
+			c = fgetc(f);
+			if (c != '\n') {
+				ungetc(c, f);
+				c = '\r';
+			}
+		}
+		if (c == '\n')
+			config_linenr++;
+		if (c == EOF) {
+			config_file_eof = 1;
+			c = '\n';
+		}
+	}
+	return c;
+}
+
+static char *parse_value(void)
+{
+	static char value[1024];
+	int quote = 0, comment = 0, space = 0;
+	size_t len = 0;
+
+	for (;;) {
+		int c = get_next_char();
+
+		if (len >= sizeof(value) - 1)
+			return NULL;
+		if (c == '\n') {
+			if (quote)
+				return NULL;
+			value[len] = 0;
+			return value;
+		}
+		if (comment)
+			continue;
+		if (isspace(c) && !quote) {
+			space = 1;
+			continue;
+		}
+		if (!quote) {
+			if (c == ';' || c == '#') {
+				comment = 1;
+				continue;
+			}
+		}
+		if (space) {
+			if (len)
+				value[len++] = ' ';
+			space = 0;
+		}
+		if (c == '\\') {
+			c = get_next_char();
+			switch (c) {
+			case '\n':
+				continue;
+			case 't':
+				c = '\t';
+				break;
+			case 'b':
+				c = '\b';
+				break;
+			case 'n':
+				c = '\n';
+				break;
+			/* Some characters escape as themselves */
+			case '\\': case '"':
+				break;
+			/* Reject unknown escape sequences */
+			default:
+				return NULL;
+			}
+			value[len++] = c;
+			continue;
+		}
+		if (c == '"') {
+			quote = 1-quote;
+			continue;
+		}
+		value[len++] = c;
+	}
+}
+
+static inline int iskeychar(int c)
+{
+	return isalnum(c) || c == '-' || c == '_';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+	int c;
+	char *value;
+
+	/* Get the full name */
+	for (;;) {
+		c = get_next_char();
+		if (config_file_eof)
+			break;
+		if (!iskeychar(c))
+			break;
+		name[len++] = c;
+		if (len >= MAXNAME)
+			return -1;
+	}
+	name[len] = 0;
+	while (c == ' ' || c == '\t')
+		c = get_next_char();
+
+	value = NULL;
+	if (c != '\n') {
+		if (c != '=')
+			return -1;
+		value = parse_value();
+		if (!value)
+			return -1;
+	}
+	return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+	do {
+		if (c == '\n')
+			return -1;
+		c = get_next_char();
+	} while (isspace(c));
+
+	/* We require the format to be '[base "extension"]' */
+	if (c != '"')
+		return -1;
+	name[baselen++] = '.';
+
+	for (;;) {
+		int ch = get_next_char();
+
+		if (ch == '\n')
+			return -1;
+		if (ch == '"')
+			break;
+		if (ch == '\\') {
+			ch = get_next_char();
+			if (ch == '\n')
+				return -1;
+		}
+		name[baselen++] = ch;
+		if (baselen > MAXNAME / 2)
+			return -1;
+	}
+
+	/* Final ']' */
+	if (get_next_char() != ']')
+		return -1;
+	return baselen;
+}
+
+static int get_base_var(char *name)
+{
+	int baselen = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (config_file_eof)
+			return -1;
+		if (c == ']')
+			return baselen;
+		if (isspace(c))
+			return get_extended_base_var(name, baselen, c);
+		if (!iskeychar(c) && c != '.')
+			return -1;
+		if (baselen > MAXNAME / 2)
+			return -1;
+		name[baselen++] = tolower(c);
+	}
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+	int comment = 0;
+	int baselen = 0;
+	static char var[MAXNAME];
+
+	/* U+FEFF Byte Order Mark in UTF8 */
+	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+	const unsigned char *bomptr = utf8_bom;
+
+	for (;;) {
+		int line, c = get_next_char();
+
+		if (bomptr && *bomptr) {
+			/* We are at the file beginning; skip UTF8-encoded BOM
+			 * if present. Sane editors won't put this in on their
+			 * own, but e.g. Windows Notepad will do it happily. */
+			if ((unsigned char) c == *bomptr) {
+				bomptr++;
+				continue;
+			} else {
+				/* Do not tolerate partial BOM. */
+				if (bomptr != utf8_bom)
+					break;
+				/* No BOM at file beginning. Cool. */
+				bomptr = NULL;
+			}
+		}
+		if (c == '\n') {
+			if (config_file_eof)
+				return 0;
+			comment = 0;
+			continue;
+		}
+		if (comment || isspace(c))
+			continue;
+		if (c == '#' || c == ';') {
+			comment = 1;
+			continue;
+		}
+		if (c == '[') {
+			baselen = get_base_var(var);
+			if (baselen <= 0)
+				break;
+			var[baselen++] = '.';
+			var[baselen] = 0;
+			continue;
+		}
+		if (!isalpha(c))
+			break;
+		var[baselen] = tolower(c);
+
+		/*
+		 * The get_value function might or might not reach the '\n',
+		 * so saving the current line number for error reporting.
+		 */
+		line = config_linenr;
+		if (get_value(fn, data, var, baselen+1) < 0) {
+			config_linenr = line;
+			break;
+		}
+	}
+	pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+	return -1;
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+	if (!*end)
+		return 1;
+	else if (!strcasecmp(end, "k")) {
+		*val *= 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "m")) {
+		*val *= 1024 * 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "g")) {
+		*val *= 1024 * 1024 * 1024;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_llong(const char *value, long long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long long val = strtoll(value, &end, 0);
+		unsigned long factor = 1;
+
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long val = strtol(value, &end, 0);
+		unsigned long factor = 1;
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+static void bad_config(const char *name)
+{
+	if (config_file_name)
+		pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name);
+	else
+		pr_warning("bad config value for '%s', ignoring...\n", name);
+}
+
+int perf_config_u64(u64 *dest, const char *name, const char *value)
+{
+	long long ret = 0;
+
+	if (!perf_parse_llong(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+
+	*dest = ret;
+	return 0;
+}
+
+int perf_config_int(int *dest, const char *name, const char *value)
+{
+	long ret = 0;
+	if (!perf_parse_long(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+	*dest = ret;
+	return 0;
+}
+
+int perf_config_u8(u8 *dest, const char *name, const char *value)
+{
+	long ret = 0;
+
+	if (!perf_parse_long(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+	*dest = ret;
+	return 0;
+}
+
+static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+	int ret;
+
+	*is_bool = 1;
+	if (!value)
+		return 1;
+	if (!*value)
+		return 0;
+	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+		return 1;
+	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+		return 0;
+	*is_bool = 0;
+	return perf_config_int(&ret, name, value) < 0 ? -1 : ret;
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+	int discard;
+	return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+static const char *perf_config_dirname(const char *name, const char *value)
+{
+	if (!name)
+		return NULL;
+	return value;
+}
+
+static int perf_buildid_config(const char *var, const char *value)
+{
+	/* same dir for all commands */
+	if (!strcmp(var, "buildid.dir")) {
+		const char *dir = perf_config_dirname(var, value);
+
+		if (!dir) {
+			pr_err("Invalid buildid directory!\n");
+			return -1;
+		}
+		strncpy(buildid_dir, dir, MAXPATHLEN-1);
+		buildid_dir[MAXPATHLEN-1] = '\0';
+	}
+
+	return 0;
+}
+
+static int perf_default_core_config(const char *var __maybe_unused,
+				    const char *value __maybe_unused)
+{
+	if (!strcmp(var, "core.proc-map-timeout"))
+		proc_map_timeout = strtoul(value, NULL, 10);
+
+	/* Add other config variables here. */
+	return 0;
+}
+
+static int perf_ui_config(const char *var, const char *value)
+{
+	/* Add other config variables here. */
+	if (!strcmp(var, "ui.show-headers"))
+		symbol_conf.show_hist_headers = perf_config_bool(var, value);
+
+	return 0;
+}
+
+static int perf_stat_config(const char *var, const char *value)
+{
+	if (!strcmp(var, "stat.big-num"))
+		perf_stat__set_big_num(perf_config_bool(var, value));
+
+	if (!strcmp(var, "stat.no-csv-summary"))
+		perf_stat__set_no_csv_summary(perf_config_bool(var, value));
+
+	if (!strcmp(var, "stat.bpf-counter-events"))
+		evsel__bpf_counter_events = strdup(value);
+
+	/* Add other config variables here. */
+	return 0;
+}
+
+int perf_default_config(const char *var, const char *value,
+			void *dummy __maybe_unused)
+{
+	if (strstarts(var, "core."))
+		return perf_default_core_config(var, value);
+
+	if (strstarts(var, "hist."))
+		return perf_hist_config(var, value);
+
+	if (strstarts(var, "ui."))
+		return perf_ui_config(var, value);
+
+	if (strstarts(var, "call-graph."))
+		return perf_callchain_config(var, value);
+
+	if (strstarts(var, "llvm."))
+		return perf_llvm_config(var, value);
+
+	if (strstarts(var, "buildid."))
+		return perf_buildid_config(var, value);
+
+	if (strstarts(var, "stat."))
+		return perf_stat_config(var, value);
+
+	/* Add other config variables here. */
+	return 0;
+}
+
+static int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+	int ret;
+	FILE *f = fopen(filename, "r");
+
+	ret = -1;
+	if (f) {
+		config_file = f;
+		config_file_name = filename;
+		config_linenr = 1;
+		config_file_eof = 0;
+		ret = perf_parse_file(fn, data);
+		fclose(f);
+		config_file_name = NULL;
+	}
+	return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+	static const char *system_wide;
+	if (!system_wide)
+		system_wide = system_path(ETC_PERFCONFIG);
+	return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+	const char *v = getenv(k);
+	return v ? perf_config_bool(k, v) : def;
+}
+
+int perf_config_system(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+int perf_config_global(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+static char *home_perfconfig(void)
+{
+	const char *home = NULL;
+	char *config;
+	struct stat st;
+
+	home = getenv("HOME");
+
+	/*
+	 * Skip reading user config if:
+	 *   - there is no place to read it from (HOME)
+	 *   - we are asked not to (PERF_CONFIG_NOGLOBAL=1)
+	 */
+	if (!home || !*home || !perf_config_global())
+		return NULL;
+
+	config = strdup(mkpath("%s/.perfconfig", home));
+	if (config == NULL) {
+		pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
+		return NULL;
+	}
+
+	if (stat(config, &st) < 0)
+		goto out_free;
+
+	if (st.st_uid && (st.st_uid != geteuid())) {
+		pr_warning("File %s not owned by current user or root, ignoring it.", config);
+		goto out_free;
+	}
+
+	if (st.st_size)
+		return config;
+
+out_free:
+	free(config);
+	return NULL;
+}
+
+const char *perf_home_perfconfig(void)
+{
+	static const char *config;
+	static bool failed;
+
+	if (failed || config)
+		return config;
+
+	config = home_perfconfig();
+	if (!config)
+		failed = true;
+
+	return config;
+}
+
+static struct perf_config_section *find_section(struct list_head *sections,
+						const char *section_name)
+{
+	struct perf_config_section *section;
+
+	list_for_each_entry(section, sections, node)
+		if (!strcmp(section->name, section_name))
+			return section;
+
+	return NULL;
+}
+
+static struct perf_config_item *find_config_item(const char *name,
+						 struct perf_config_section *section)
+{
+	struct perf_config_item *item;
+
+	list_for_each_entry(item, &section->items, node)
+		if (!strcmp(item->name, name))
+			return item;
+
+	return NULL;
+}
+
+static struct perf_config_section *add_section(struct list_head *sections,
+					       const char *section_name)
+{
+	struct perf_config_section *section = zalloc(sizeof(*section));
+
+	if (!section)
+		return NULL;
+
+	INIT_LIST_HEAD(&section->items);
+	section->name = strdup(section_name);
+	if (!section->name) {
+		pr_debug("%s: strdup failed\n", __func__);
+		free(section);
+		return NULL;
+	}
+
+	list_add_tail(&section->node, sections);
+	return section;
+}
+
+static struct perf_config_item *add_config_item(struct perf_config_section *section,
+						const char *name)
+{
+	struct perf_config_item *item = zalloc(sizeof(*item));
+
+	if (!item)
+		return NULL;
+
+	item->name = strdup(name);
+	if (!item->name) {
+		pr_debug("%s: strdup failed\n", __func__);
+		free(item);
+		return NULL;
+	}
+
+	list_add_tail(&item->node, &section->items);
+	return item;
+}
+
+static int set_value(struct perf_config_item *item, const char *value)
+{
+	char *val = strdup(value);
+
+	if (!val)
+		return -1;
+
+	zfree(&item->value);
+	item->value = val;
+	return 0;
+}
+
+static int collect_config(const char *var, const char *value,
+			  void *perf_config_set)
+{
+	int ret = -1;
+	char *ptr, *key;
+	char *section_name, *name;
+	struct perf_config_section *section = NULL;
+	struct perf_config_item *item = NULL;
+	struct perf_config_set *set = perf_config_set;
+	struct list_head *sections;
+
+	if (set == NULL)
+		return -1;
+
+	sections = &set->sections;
+	key = ptr = strdup(var);
+	if (!key) {
+		pr_debug("%s: strdup failed\n", __func__);
+		return -1;
+	}
+
+	section_name = strsep(&ptr, ".");
+	name = ptr;
+	if (name == NULL || value == NULL)
+		goto out_free;
+
+	section = find_section(sections, section_name);
+	if (!section) {
+		section = add_section(sections, section_name);
+		if (!section)
+			goto out_free;
+	}
+
+	item = find_config_item(name, section);
+	if (!item) {
+		item = add_config_item(section, name);
+		if (!item)
+			goto out_free;
+	}
+
+	/* perf_config_set can contain both user and system config items.
+	 * So we should know where each value is from.
+	 * The classification would be needed when a particular config file
+	 * is overwritten by setting feature i.e. set_config().
+	 */
+	if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
+		section->from_system_config = true;
+		item->from_system_config = true;
+	} else {
+		section->from_system_config = false;
+		item->from_system_config = false;
+	}
+
+	ret = set_value(item, value);
+
+out_free:
+	free(key);
+	return ret;
+}
+
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value)
+{
+	config_file_name = file_name;
+	return collect_config(var, value, set);
+}
+
+static int perf_config_set__init(struct perf_config_set *set)
+{
+	int ret = -1;
+
+	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+	if (config_exclusive_filename)
+		return perf_config_from_file(collect_config, config_exclusive_filename, set);
+	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+		if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+			goto out;
+	}
+	if (perf_config_global() && perf_home_perfconfig()) {
+		if (perf_config_from_file(collect_config, perf_home_perfconfig(), set) < 0)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+struct perf_config_set *perf_config_set__new(void)
+{
+	struct perf_config_set *set = zalloc(sizeof(*set));
+
+	if (set) {
+		INIT_LIST_HEAD(&set->sections);
+		perf_config_set__init(set);
+	}
+
+	return set;
+}
+
+struct perf_config_set *perf_config_set__load_file(const char *file)
+{
+	struct perf_config_set *set = zalloc(sizeof(*set));
+
+	if (set) {
+		INIT_LIST_HEAD(&set->sections);
+		perf_config_from_file(collect_config, file, set);
+	}
+
+	return set;
+}
+
+static int perf_config__init(void)
+{
+	if (config_set == NULL)
+		config_set = perf_config_set__new();
+
+	return config_set == NULL;
+}
+
+int perf_config_set(struct perf_config_set *set,
+		    config_fn_t fn, void *data)
+{
+	int ret = 0;
+	char key[BUFSIZ];
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+
+	perf_config_set__for_each_entry(set, section, item) {
+		char *value = item->value;
+
+		if (value) {
+			scnprintf(key, sizeof(key), "%s.%s",
+				  section->name, item->name);
+			ret = fn(key, value, data);
+			if (ret < 0) {
+				pr_err("Error in the given config file: wrong config key-value pair %s=%s\n",
+				       key, value);
+				/*
+				 * Can't be just a 'break', as perf_config_set__for_each_entry()
+				 * expands to two nested for() loops.
+				 */
+				goto out;
+			}
+		}
+	}
+out:
+	return ret;
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+	if (config_set == NULL && perf_config__init())
+		return -1;
+
+	return perf_config_set(config_set, fn, data);
+}
+
+void perf_config__exit(void)
+{
+	perf_config_set__delete(config_set);
+	config_set = NULL;
+}
+
+void perf_config__refresh(void)
+{
+	perf_config__exit();
+	perf_config__init();
+}
+
+static void perf_config_item__delete(struct perf_config_item *item)
+{
+	zfree(&item->name);
+	zfree(&item->value);
+	free(item);
+}
+
+static void perf_config_section__purge(struct perf_config_section *section)
+{
+	struct perf_config_item *item, *tmp;
+
+	list_for_each_entry_safe(item, tmp, &section->items, node) {
+		list_del_init(&item->node);
+		perf_config_item__delete(item);
+	}
+}
+
+static void perf_config_section__delete(struct perf_config_section *section)
+{
+	perf_config_section__purge(section);
+	zfree(&section->name);
+	free(section);
+}
+
+static void perf_config_set__purge(struct perf_config_set *set)
+{
+	struct perf_config_section *section, *tmp;
+
+	list_for_each_entry_safe(section, tmp, &set->sections, node) {
+		list_del_init(&section->node);
+		perf_config_section__delete(section);
+	}
+}
+
+void perf_config_set__delete(struct perf_config_set *set)
+{
+	if (set == NULL)
+		return;
+
+	perf_config_set__purge(set);
+	free(set);
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+	pr_err("Missing value for '%s'", var);
+	return -1;
+}
+
+void set_buildid_dir(const char *dir)
+{
+	if (dir)
+		scnprintf(buildid_dir, MAXPATHLEN, "%s", dir);
+
+	/* default to $HOME/.debug */
+	if (buildid_dir[0] == '\0') {
+		char *home = getenv("HOME");
+
+		if (home) {
+			snprintf(buildid_dir, MAXPATHLEN, "%s/%s",
+				 home, DEBUG_CACHE_DIR);
+		} else {
+			strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
+		}
+		buildid_dir[MAXPATHLEN-1] = '\0';
+	}
+	/* for communicating with external commands */
+	setenv("PERF_BUILDID_DIR", buildid_dir, 1);
+}
+
+struct perf_config_scan_data {
+	const char *name;
+	const char *fmt;
+	va_list args;
+	int ret;
+};
+
+static int perf_config_scan_cb(const char *var, const char *value, void *data)
+{
+	struct perf_config_scan_data *d = data;
+
+	if (!strcmp(var, d->name))
+		d->ret = vsscanf(value, d->fmt, d->args);
+
+	return 0;
+}
+
+int perf_config_scan(const char *name, const char *fmt, ...)
+{
+	struct perf_config_scan_data d = {
+		.name = name,
+		.fmt = fmt,
+	};
+
+	va_start(d.args, fmt);
+	perf_config(perf_config_scan_cb, &d);
+	va_end(d.args);
+
+	return d.ret;
+}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
new file mode 100644
index 000000000..2e5e80892
--- /dev/null
+++ b/tools/perf/util/config.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CONFIG_H
+#define __PERF_CONFIG_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+
+struct perf_config_item {
+	char *name;
+	char *value;
+	bool from_system_config;
+	struct list_head node;
+};
+
+struct perf_config_section {
+	char *name;
+	struct list_head items;
+	bool from_system_config;
+	struct list_head node;
+};
+
+struct perf_config_set {
+	struct list_head sections;
+};
+
+extern const char *config_exclusive_filename;
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3);
+int perf_config_set(struct perf_config_set *set,
+		    config_fn_t fn, void *data);
+int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u8(u8 *dest, const char *name, const char *value);
+int perf_config_u64(u64 *dest, const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_etc_perfconfig(void);
+const char *perf_home_perfconfig(void);
+int perf_config_system(void);
+int perf_config_global(void);
+
+struct perf_config_set *perf_config_set__new(void);
+struct perf_config_set *perf_config_set__load_file(const char *file);
+void perf_config_set__delete(struct perf_config_set *set);
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value);
+void perf_config__exit(void);
+void perf_config__refresh(void);
+
+/**
+ * perf_config_sections__for_each - iterate thru all the sections
+ * @list: list_head instance to iterate
+ * @section: struct perf_config_section iterator
+ */
+#define perf_config_sections__for_each_entry(list, section)	\
+        list_for_each_entry(section, list, node)
+
+/**
+ * perf_config_items__for_each - iterate thru all the items
+ * @list: list_head instance to iterate
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_items__for_each_entry(list, item)	\
+        list_for_each_entry(item, list, node)
+
+/**
+ * perf_config_set__for_each - iterate thru all the config section-item pairs
+ * @set: evlist instance to iterate
+ * @section: struct perf_config_section iterator
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_set__for_each_entry(set, section, item)			\
+	perf_config_sections__for_each_entry(&set->sections, section)		\
+	perf_config_items__for_each_entry(&section->items, item)
+
+#endif /* __PERF_CONFIG_H */
diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c
new file mode 100644
index 000000000..47e03de7c
--- /dev/null
+++ b/tools/perf/util/copyfile.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/copyfile.h"
+#include "util/namespaces.h"
+#include <internal/lib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
+{
+	int err = -1;
+	char *line = NULL;
+	size_t n;
+	FILE *from_fp, *to_fp;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	from_fp = fopen(from, "r");
+	nsinfo__mountns_exit(&nsc);
+	if (from_fp == NULL)
+		goto out;
+
+	to_fp = fopen(to, "w");
+	if (to_fp == NULL)
+		goto out_fclose_from;
+
+	while (getline(&line, &n, from_fp) > 0)
+		if (fputs(line, to_fp) == EOF)
+			goto out_fclose_to;
+	err = 0;
+out_fclose_to:
+	fclose(to_fp);
+	free(line);
+out_fclose_from:
+	fclose(from_fp);
+out:
+	return err;
+}
+
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+	void *ptr;
+	loff_t pgoff;
+
+	pgoff = off_in & ~(page_size - 1);
+	off_in -= pgoff;
+
+	ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+	if (ptr == MAP_FAILED)
+		return -1;
+
+	while (size) {
+		ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+		if (ret < 0 && errno == EINTR)
+			continue;
+		if (ret <= 0)
+			break;
+
+		size -= ret;
+		off_in += ret;
+		off_out += ret;
+	}
+	munmap(ptr, off_in + size);
+
+	return size ? -1 : 0;
+}
+
+static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
+			    struct nsinfo *nsi)
+{
+	int fromfd, tofd;
+	struct stat st;
+	int err;
+	char *tmp = NULL, *ptr = NULL;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	err = stat(from, &st);
+	nsinfo__mountns_exit(&nsc);
+	if (err)
+		goto out;
+	err = -1;
+
+	/* extra 'x' at the end is to reserve space for '.' */
+	if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+		tmp = NULL;
+		goto out;
+	}
+	ptr = strrchr(tmp, '/');
+	if (!ptr)
+		goto out;
+	ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+	*ptr = '.';
+
+	tofd = mkstemp(tmp);
+	if (tofd < 0)
+		goto out;
+
+	if (st.st_size == 0) { /* /proc? do it slowly... */
+		err = slow_copyfile(from, tmp, nsi);
+		if (!err && fchmod(tofd, mode))
+			err = -1;
+		goto out_close_to;
+	}
+
+	if (fchmod(tofd, mode))
+		goto out_close_to;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	fromfd = open(from, O_RDONLY);
+	nsinfo__mountns_exit(&nsc);
+	if (fromfd < 0)
+		goto out_close_to;
+
+	err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
+
+	close(fromfd);
+out_close_to:
+	close(tofd);
+	if (!err)
+		err = link(tmp, to);
+	unlink(tmp);
+out:
+	free(tmp);
+	return err;
+}
+
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
+{
+	return copyfile_mode_ns(from, to, 0755, nsi);
+}
+
+int copyfile_mode(const char *from, const char *to, mode_t mode)
+{
+	return copyfile_mode_ns(from, to, mode, NULL);
+}
+
+int copyfile(const char *from, const char *to)
+{
+	return copyfile_mode(from, to, 0755);
+}
diff --git a/tools/perf/util/copyfile.h b/tools/perf/util/copyfile.h
new file mode 100644
index 000000000..e85d2f22f
--- /dev/null
+++ b/tools/perf/util/copyfile.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_COPYFILE_H_
+#define PERF_COPYFILE_H_
+
+#include <linux/types.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+struct nsinfo;
+
+int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
+
+#endif // PERF_COPYFILE_H_
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
new file mode 100644
index 000000000..7a447d918
--- /dev/null
+++ b/tools/perf/util/counts.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "evsel.h"
+#include "counts.h"
+#include <perf/threadmap.h>
+#include <linux/zalloc.h>
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads)
+{
+	struct perf_counts *counts = zalloc(sizeof(*counts));
+
+	if (counts) {
+		struct xyarray *values;
+
+		values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
+		if (!values) {
+			free(counts);
+			return NULL;
+		}
+
+		counts->values = values;
+
+		values = xyarray__new(ncpus, nthreads, sizeof(bool));
+		if (!values) {
+			xyarray__delete(counts->values);
+			free(counts);
+			return NULL;
+		}
+
+		counts->loaded = values;
+	}
+
+	return counts;
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+	if (counts) {
+		xyarray__delete(counts->loaded);
+		xyarray__delete(counts->values);
+		free(counts);
+	}
+}
+
+void perf_counts__reset(struct perf_counts *counts)
+{
+	xyarray__reset(counts->loaded);
+	xyarray__reset(counts->values);
+	memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
+}
+
+void evsel__reset_counts(struct evsel *evsel)
+{
+	perf_counts__reset(evsel->counts);
+}
+
+int evsel__alloc_counts(struct evsel *evsel)
+{
+	struct perf_cpu_map *cpus = evsel__cpus(evsel);
+	int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+	evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads);
+	return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void evsel__free_counts(struct evsel *evsel)
+{
+	perf_counts__delete(evsel->counts);
+	evsel->counts = NULL;
+}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
new file mode 100644
index 000000000..5de275194
--- /dev/null
+++ b/tools/perf/util/counts.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COUNTS_H
+#define __PERF_COUNTS_H
+
+#include <linux/types.h>
+#include <internal/xyarray.h>
+#include <perf/evsel.h>
+#include <stdbool.h>
+
+struct evsel;
+
+struct perf_counts {
+	s8			  scaled;
+	struct perf_counts_values aggr;
+	struct xyarray		  *values;
+	struct xyarray		  *loaded;
+};
+
+
+static inline struct perf_counts_values*
+perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread)
+{
+	return xyarray__entry(counts->values, cpu_map_idx, thread);
+}
+
+static inline bool
+perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread)
+{
+	return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread));
+}
+
+static inline void
+perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded)
+{
+	*((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded;
+}
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads);
+void perf_counts__delete(struct perf_counts *counts);
+void perf_counts__reset(struct perf_counts *counts);
+
+void evsel__reset_counts(struct evsel *evsel);
+int evsel__alloc_counts(struct evsel *evsel);
+void evsel__free_counts(struct evsel *evsel);
+
+#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpu-set-sched.h b/tools/perf/util/cpu-set-sched.h
new file mode 100644
index 000000000..8cf4e40d3
--- /dev/null
+++ b/tools/perf/util/cpu-set-sched.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Definitions taken from glibc for use with older systems, same licensing.
+#ifndef _CPU_SET_SCHED_PERF_H
+#define _CPU_SET_SCHED_PERF_H
+
+#include <features.h>
+#include <sched.h>
+
+#ifndef CPU_EQUAL
+#ifndef __CPU_EQUAL_S
+#if __GNUC_PREREQ (2, 91)
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+  (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0)
+#else
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+  (__extension__							      \
+   ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits;			      \
+      const __cpu_mask *__arr2 = (cpusetp2)->__bits;			      \
+      size_t __imax = (setsize) / sizeof (__cpu_mask);			      \
+      size_t __i;							      \
+      for (__i = 0; __i < __imax; ++__i)				      \
+	if (__arr1[__i] != __arr2[__i])					      \
+	  break;							      \
+      __i == __imax; }))
+#endif
+#endif // __CPU_EQUAL_S
+
+#define CPU_EQUAL(cpusetp1, cpusetp2) \
+  __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2)
+#endif // CPU_EQUAL
+
+#ifndef CPU_OR
+#ifndef __CPU_OP_S
+#define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \
+  (__extension__							      \
+   ({ cpu_set_t *__dest = (destset);					      \
+      const __cpu_mask *__arr1 = (srcset1)->__bits;			      \
+      const __cpu_mask *__arr2 = (srcset2)->__bits;			      \
+      size_t __imax = (setsize) / sizeof (__cpu_mask);			      \
+      size_t __i;							      \
+      for (__i = 0; __i < __imax; ++__i)				      \
+	((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i];    \
+      __dest; }))
+#endif // __CPU_OP_S
+
+#define CPU_OR(destset, srcset1, srcset2) \
+  __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |)
+#endif // CPU_OR
+
+#endif // _CPU_SET_SCHED_PERF_H
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
new file mode 100644
index 000000000..8486ca3be
--- /dev/null
+++ b/tools/perf/util/cpumap.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <api/fs/fs.h>
+#include "cpumap.h"
+#include "debug.h"
+#include "event.h"
+#include <assert.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include "asm/bug.h"
+
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+
+static struct perf_cpu max_cpu_num;
+static struct perf_cpu max_present_cpu_num;
+static int max_node_num;
+/**
+ * The numa node X as read from /sys/devices/system/node/nodeX indexed by the
+ * CPU number.
+ */
+static int *cpunode_map;
+
+bool perf_record_cpu_map_data__test_bit(int i,
+					const struct perf_record_cpu_map_data *data)
+{
+	int bit_word32 = i / 32;
+	__u32 bit_mask32 = 1U << (i & 31);
+	int bit_word64 = i / 64;
+	__u64 bit_mask64 = ((__u64)1) << (i & 63);
+
+	return (data->mask32_data.long_size == 4)
+		? (bit_word32 < data->mask32_data.nr) &&
+		(data->mask32_data.mask[bit_word32] & bit_mask32) != 0
+		: (bit_word64 < data->mask64_data.nr) &&
+		(data->mask64_data.mask[bit_word64] & bit_mask64) != 0;
+}
+
+/* Read ith mask value from data into the given 64-bit sized bitmap */
+static void perf_record_cpu_map_data__read_one_mask(const struct perf_record_cpu_map_data *data,
+						    int i, unsigned long *bitmap)
+{
+#if __SIZEOF_LONG__ == 8
+	if (data->mask32_data.long_size == 4)
+		bitmap[0] = data->mask32_data.mask[i];
+	else
+		bitmap[0] = data->mask64_data.mask[i];
+#else
+	if (data->mask32_data.long_size == 4) {
+		bitmap[0] = data->mask32_data.mask[i];
+		bitmap[1] = 0;
+	} else {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		bitmap[0] = (unsigned long)(data->mask64_data.mask[i] >> 32);
+		bitmap[1] = (unsigned long)data->mask64_data.mask[i];
+#else
+		bitmap[0] = (unsigned long)data->mask64_data.mask[i];
+		bitmap[1] = (unsigned long)(data->mask64_data.mask[i] >> 32);
+#endif
+	}
+#endif
+}
+static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_map_data *data)
+{
+	struct perf_cpu_map *map;
+
+	map = perf_cpu_map__empty_new(data->cpus_data.nr);
+	if (map) {
+		unsigned i;
+
+		for (i = 0; i < data->cpus_data.nr; i++) {
+			/*
+			 * Special treatment for -1, which is not real cpu number,
+			 * and we need to use (int) -1 to initialize map[i],
+			 * otherwise it would become 65535.
+			 */
+			if (data->cpus_data.cpu[i] == (u16) -1)
+				map->map[i].cpu = -1;
+			else
+				map->map[i].cpu = (int) data->cpus_data.cpu[i];
+		}
+	}
+
+	return map;
+}
+
+static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_data *data)
+{
+	DECLARE_BITMAP(local_copy, 64);
+	int weight = 0, mask_nr = data->mask32_data.nr;
+	struct perf_cpu_map *map;
+
+	for (int i = 0; i < mask_nr; i++) {
+		perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
+		weight += bitmap_weight(local_copy, 64);
+	}
+
+	map = perf_cpu_map__empty_new(weight);
+	if (!map)
+		return NULL;
+
+	for (int i = 0, j = 0; i < mask_nr; i++) {
+		int cpus_per_i = (i * data->mask32_data.long_size  * BITS_PER_BYTE);
+		int cpu;
+
+		perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
+		for_each_set_bit(cpu, local_copy, 64)
+			map->map[j++].cpu = cpu + cpus_per_i;
+	}
+	return map;
+
+}
+
+static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map_data *data)
+{
+	struct perf_cpu_map *map;
+	unsigned int i = 0;
+
+	map = perf_cpu_map__empty_new(data->range_cpu_data.end_cpu -
+				data->range_cpu_data.start_cpu + 1 + data->range_cpu_data.any_cpu);
+	if (!map)
+		return NULL;
+
+	if (data->range_cpu_data.any_cpu)
+		map->map[i++].cpu = -1;
+
+	for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
+	     i++, cpu++)
+		map->map[i].cpu = cpu;
+
+	return map;
+}
+
+struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data)
+{
+	switch (data->type) {
+	case PERF_CPU_MAP__CPUS:
+		return cpu_map__from_entries(data);
+	case PERF_CPU_MAP__MASK:
+		return cpu_map__from_mask(data);
+	case PERF_CPU_MAP__RANGE_CPUS:
+		return cpu_map__from_range(data);
+	default:
+		pr_err("cpu_map__new_data unknown type %d\n", data->type);
+		return NULL;
+	}
+}
+
+size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)
+{
+#define BUFSIZE 1024
+	char buf[BUFSIZE];
+
+	cpu_map__snprint(map, buf, sizeof(buf));
+	return fprintf(fp, "%s\n", buf);
+#undef BUFSIZE
+}
+
+struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
+{
+	struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr);
+
+	if (cpus != NULL) {
+		int i;
+
+		cpus->nr = nr;
+		for (i = 0; i < nr; i++)
+			cpus->map[i].cpu = -1;
+
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
+{
+	struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
+
+	if (cpus != NULL) {
+		int i;
+
+		cpus->nr = nr;
+		for (i = 0; i < nr; i++)
+			cpus->map[i] = aggr_cpu_id__empty();
+
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+static int cpu__get_topology_int(int cpu, const char *name, int *value)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, PATH_MAX,
+		"devices/system/cpu/cpu%d/topology/%s", cpu, name);
+
+	return sysfs__read_int(path, value);
+}
+
+int cpu__get_socket_id(struct perf_cpu cpu)
+{
+	int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value);
+	return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused)
+{
+	struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+	id.socket = cpu__get_socket_id(cpu);
+	return id;
+}
+
+static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
+{
+	struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
+	struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
+
+	if (a->node != b->node)
+		return a->node - b->node;
+	else if (a->socket != b->socket)
+		return a->socket - b->socket;
+	else if (a->die != b->die)
+		return a->die - b->die;
+	else if (a->core != b->core)
+		return a->core - b->core;
+	else
+		return a->thread_idx - b->thread_idx;
+}
+
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+				       aggr_cpu_id_get_t get_id,
+				       void *data)
+{
+	int idx;
+	struct perf_cpu cpu;
+	struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr);
+
+	if (!c)
+		return NULL;
+
+	/* Reset size as it may only be partially filled */
+	c->nr = 0;
+
+	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+		bool duplicate = false;
+		struct aggr_cpu_id cpu_id = get_id(cpu, data);
+
+		for (int j = 0; j < c->nr; j++) {
+			if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) {
+				duplicate = true;
+				break;
+			}
+		}
+		if (!duplicate) {
+			c->map[c->nr] = cpu_id;
+			c->nr++;
+		}
+	}
+	/* Trim. */
+	if (c->nr != cpus->nr) {
+		struct cpu_aggr_map *trimmed_c =
+			realloc(c,
+				sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
+
+		if (trimmed_c)
+			c = trimmed_c;
+	}
+	/* ensure we process id in increasing order */
+	qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
+
+	return c;
+
+}
+
+int cpu__get_die_id(struct perf_cpu cpu)
+{
+	int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value);
+
+	return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
+{
+	struct aggr_cpu_id id;
+	int die;
+
+	die = cpu__get_die_id(cpu);
+	/* There is no die_id on legacy system. */
+	if (die == -1)
+		die = 0;
+
+	/*
+	 * die_id is relative to socket, so start
+	 * with the socket ID and then add die to
+	 * make a unique ID.
+	 */
+	id = aggr_cpu_id__socket(cpu, data);
+	if (aggr_cpu_id__is_empty(&id))
+		return id;
+
+	id.die = die;
+	return id;
+}
+
+int cpu__get_core_id(struct perf_cpu cpu)
+{
+	int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
+	return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
+{
+	struct aggr_cpu_id id;
+	int core = cpu__get_core_id(cpu);
+
+	/* aggr_cpu_id__die returns a struct with socket and die set. */
+	id = aggr_cpu_id__die(cpu, data);
+	if (aggr_cpu_id__is_empty(&id))
+		return id;
+
+	/*
+	 * core_id is relative to socket and die, we need a global id.
+	 * So we combine the result from cpu_map__get_die with the core id
+	 */
+	id.core = core;
+	return id;
+
+}
+
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data)
+{
+	struct aggr_cpu_id id;
+
+	/* aggr_cpu_id__core returns a struct with socket, die and core set. */
+	id = aggr_cpu_id__core(cpu, data);
+	if (aggr_cpu_id__is_empty(&id))
+		return id;
+
+	id.cpu = cpu;
+	return id;
+
+}
+
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused)
+{
+	struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+	id.node = cpu__get_node(cpu);
+	return id;
+}
+
+/* setup simple routines to easily access node numbers given a cpu number */
+static int get_max_num(char *path, int *max)
+{
+	size_t num;
+	char *buf;
+	int err = 0;
+
+	if (filename__read_str(path, &buf, &num))
+		return -1;
+
+	buf[num] = '\0';
+
+	/* start on the right, to find highest node num */
+	while (--num) {
+		if ((buf[num] == ',') || (buf[num] == '-')) {
+			num++;
+			break;
+		}
+	}
+	if (sscanf(&buf[num], "%d", max) < 1) {
+		err = -1;
+		goto out;
+	}
+
+	/* convert from 0-based to 1-based */
+	(*max)++;
+
+out:
+	free(buf);
+	return err;
+}
+
+/* Determine highest possible cpu in the system for sparse allocation */
+static void set_max_cpu_num(void)
+{
+	const char *mnt;
+	char path[PATH_MAX];
+	int ret = -1;
+
+	/* set up default */
+	max_cpu_num.cpu = 4096;
+	max_present_cpu_num.cpu = 4096;
+
+	mnt = sysfs__mountpoint();
+	if (!mnt)
+		goto out;
+
+	/* get the highest possible cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
+	if (ret >= PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_cpu_num.cpu);
+	if (ret)
+		goto out;
+
+	/* get the highest present cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+	if (ret >= PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_present_cpu_num.cpu);
+
+out:
+	if (ret)
+		pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
+}
+
+/* Determine highest possible node in the system for sparse allocation */
+static void set_max_node_num(void)
+{
+	const char *mnt;
+	char path[PATH_MAX];
+	int ret = -1;
+
+	/* set up default */
+	max_node_num = 8;
+
+	mnt = sysfs__mountpoint();
+	if (!mnt)
+		goto out;
+
+	/* get the highest possible cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
+	if (ret >= PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_node_num);
+
+out:
+	if (ret)
+		pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
+}
+
+int cpu__max_node(void)
+{
+	if (unlikely(!max_node_num))
+		set_max_node_num();
+
+	return max_node_num;
+}
+
+struct perf_cpu cpu__max_cpu(void)
+{
+	if (unlikely(!max_cpu_num.cpu))
+		set_max_cpu_num();
+
+	return max_cpu_num;
+}
+
+struct perf_cpu cpu__max_present_cpu(void)
+{
+	if (unlikely(!max_present_cpu_num.cpu))
+		set_max_cpu_num();
+
+	return max_present_cpu_num;
+}
+
+
+int cpu__get_node(struct perf_cpu cpu)
+{
+	if (unlikely(cpunode_map == NULL)) {
+		pr_debug("cpu_map not initialized\n");
+		return -1;
+	}
+
+	return cpunode_map[cpu.cpu];
+}
+
+static int init_cpunode_map(void)
+{
+	int i;
+
+	set_max_cpu_num();
+	set_max_node_num();
+
+	cpunode_map = calloc(max_cpu_num.cpu, sizeof(int));
+	if (!cpunode_map) {
+		pr_err("%s: calloc failed\n", __func__);
+		return -1;
+	}
+
+	for (i = 0; i < max_cpu_num.cpu; i++)
+		cpunode_map[i] = -1;
+
+	return 0;
+}
+
+int cpu__setup_cpunode_map(void)
+{
+	struct dirent *dent1, *dent2;
+	DIR *dir1, *dir2;
+	unsigned int cpu, mem;
+	char buf[PATH_MAX];
+	char path[PATH_MAX];
+	const char *mnt;
+	int n;
+
+	/* initialize globals */
+	if (init_cpunode_map())
+		return -1;
+
+	mnt = sysfs__mountpoint();
+	if (!mnt)
+		return 0;
+
+	n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
+	if (n >= PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		return -1;
+	}
+
+	dir1 = opendir(path);
+	if (!dir1)
+		return 0;
+
+	/* walk tree and setup map */
+	while ((dent1 = readdir(dir1)) != NULL) {
+		if (dent1->d_type != DT_DIR || sscanf(dent1->d_name, "node%u", &mem) < 1)
+			continue;
+
+		n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
+		if (n >= PATH_MAX) {
+			pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+			continue;
+		}
+
+		dir2 = opendir(buf);
+		if (!dir2)
+			continue;
+		while ((dent2 = readdir(dir2)) != NULL) {
+			if (dent2->d_type != DT_LNK || sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+				continue;
+			cpunode_map[cpu] = mem;
+		}
+		closedir(dir2);
+	}
+	closedir(dir1);
+	return 0;
+}
+
+size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
+{
+	int i, start = -1;
+	bool first = true;
+	size_t ret = 0;
+
+#define COMMA first ? "" : ","
+
+	for (i = 0; i < map->nr + 1; i++) {
+		struct perf_cpu cpu = { .cpu = INT_MAX };
+		bool last = i == map->nr;
+
+		if (!last)
+			cpu = map->map[i];
+
+		if (start == -1) {
+			start = i;
+			if (last) {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d", COMMA,
+						map->map[i].cpu);
+			}
+		} else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) {
+			int end = i - 1;
+
+			if (start == end) {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d", COMMA,
+						map->map[start].cpu);
+			} else {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d-%d", COMMA,
+						map->map[start].cpu, map->map[end].cpu);
+			}
+			first = false;
+			start = i;
+		}
+	}
+
+#undef COMMA
+
+	pr_debug2("cpumask list: %s\n", buf);
+	return ret;
+}
+
+static char hex_char(unsigned char val)
+{
+	if (val < 10)
+		return val + '0';
+	if (val < 16)
+		return val - 10 + 'a';
+	return '?';
+}
+
+size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
+{
+	int i, cpu;
+	char *ptr = buf;
+	unsigned char *bitmap;
+	struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1);
+
+	if (buf == NULL)
+		return 0;
+
+	bitmap = zalloc(last_cpu.cpu / 8 + 1);
+	if (bitmap == NULL) {
+		buf[0] = '\0';
+		return 0;
+	}
+
+	for (i = 0; i < map->nr; i++) {
+		cpu = perf_cpu_map__cpu(map, i).cpu;
+		bitmap[cpu / 8] |= 1 << (cpu % 8);
+	}
+
+	for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+		unsigned char bits = bitmap[cpu / 8];
+
+		if (cpu % 8)
+			bits >>= 4;
+		else
+			bits &= 0xf;
+
+		*ptr++ = hex_char(bits);
+		if ((cpu % 32) == 0 && cpu > 0)
+			*ptr++ = ',';
+	}
+	*ptr = '\0';
+	free(bitmap);
+
+	buf[size - 1] = '\0';
+	return ptr - buf;
+}
+
+const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
+{
+	static const struct perf_cpu_map *online = NULL;
+
+	if (!online)
+		online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
+
+	return online;
+}
+
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
+{
+	return a->thread_idx == b->thread_idx &&
+		a->node == b->node &&
+		a->socket == b->socket &&
+		a->die == b->die &&
+		a->core == b->core &&
+		a->cpu.cpu == b->cpu.cpu;
+}
+
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
+{
+	return a->thread_idx == -1 &&
+		a->node == -1 &&
+		a->socket == -1 &&
+		a->die == -1 &&
+		a->core == -1 &&
+		a->cpu.cpu == -1;
+}
+
+struct aggr_cpu_id aggr_cpu_id__empty(void)
+{
+	struct aggr_cpu_id ret = {
+		.thread_idx = -1,
+		.node = -1,
+		.socket = -1,
+		.die = -1,
+		.core = -1,
+		.cpu = (struct perf_cpu){ .cpu = -1 },
+	};
+	return ret;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
new file mode 100644
index 000000000..4a6d02957
--- /dev/null
+++ b/tools/perf/util/cpumap.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CPUMAP_H
+#define __PERF_CPUMAP_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
+
+/** Identify where counts are aggregated, -1 implies not to aggregate. */
+struct aggr_cpu_id {
+	/** A value in the range 0 to number of threads. */
+	int thread_idx;
+	/** The numa node X as read from /sys/devices/system/node/nodeX. */
+	int node;
+	/**
+	 * The socket number as read from
+	 * /sys/devices/system/cpu/cpuX/topology/physical_package_id.
+	 */
+	int socket;
+	/** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
+	int die;
+	/** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */
+	int core;
+	/** CPU aggregation, note there is one CPU for each SMT thread. */
+	struct perf_cpu cpu;
+};
+
+/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */
+struct cpu_aggr_map {
+	refcount_t refcnt;
+	/** Number of valid entries. */
+	int nr;
+	/** The entries. */
+	struct aggr_cpu_id map[];
+};
+
+struct perf_record_cpu_map_data;
+
+bool perf_record_cpu_map_data__test_bit(int i, const struct perf_record_cpu_map_data *data);
+
+struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
+
+struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data);
+size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
+size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
+size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp);
+const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
+
+int cpu__setup_cpunode_map(void);
+
+int cpu__max_node(void);
+struct perf_cpu cpu__max_cpu(void);
+struct perf_cpu cpu__max_present_cpu(void);
+
+/**
+ * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
+ */
+static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
+{
+	return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
+}
+
+/**
+ * cpu__get_node - Returns the numa node X as read from
+ * /sys/devices/system/node/nodeX for the given CPU.
+ */
+int cpu__get_node(struct perf_cpu cpu);
+/**
+ * cpu__get_socket_id - Returns the socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU.
+ */
+int cpu__get_socket_id(struct perf_cpu cpu);
+/**
+ * cpu__get_die_id - Returns the die id as read from
+ * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU.
+ */
+int cpu__get_die_id(struct perf_cpu cpu);
+/**
+ * cpu__get_core_id - Returns the core id as read from
+ * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU.
+ */
+int cpu__get_core_id(struct perf_cpu cpu);
+
+/**
+ * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry
+ * being empty.
+ */
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
+
+typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data);
+
+/**
+ * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in
+ * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value
+ * passed to it. The cpu_aggr_map is sorted with duplicate values removed.
+ */
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+				       aggr_cpu_id_get_t get_id,
+				       void *data);
+
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b);
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a);
+struct aggr_cpu_id aggr_cpu_id__empty(void);
+
+
+/**
+ * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with
+ * the socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated
+ * with the die and socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket
+ * populated with the core, die and socket for cpu. The function signature is
+ * compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket
+ * populated with the cpu, core, die and socket for cpu. The function signature
+ * is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for
+ * cpu. The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data);
+
+#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
new file mode 100644
index 000000000..1a3ff6449
--- /dev/null
+++ b/tools/perf/util/cputopo.c
@@ -0,0 +1,499 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/param.h>
+#include <sys/utsname.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <api/fs/fs.h>
+#include <linux/zalloc.h>
+#include <perf/cpumap.h>
+
+#include "cputopo.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "env.h"
+#include "pmu-hybrid.h"
+
+#define PACKAGE_CPUS_FMT \
+	"%s/devices/system/cpu/cpu%d/topology/package_cpus_list"
+#define PACKAGE_CPUS_FMT_OLD \
+	"%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
+#define DIE_CPUS_FMT \
+	"%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
+#define CORE_CPUS_FMT \
+	"%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
+#define CORE_CPUS_FMT_OLD \
+	"%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
+#define NODE_ONLINE_FMT \
+	"%s/devices/system/node/online"
+#define NODE_MEMINFO_FMT \
+	"%s/devices/system/node/node%d/meminfo"
+#define NODE_CPULIST_FMT \
+	"%s/devices/system/node/node%d/cpulist"
+
+static int build_cpu_topology(struct cpu_topology *tp, int cpu)
+{
+	FILE *fp;
+	char filename[MAXPATHLEN];
+	char *buf = NULL, *p;
+	size_t len = 0;
+	ssize_t sret;
+	u32 i = 0;
+	int ret = -1;
+
+	scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT,
+		  sysfs__mountpoint(), cpu);
+	if (access(filename, F_OK) == -1) {
+		scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD,
+			sysfs__mountpoint(), cpu);
+	}
+	fp = fopen(filename, "r");
+	if (!fp)
+		goto try_dies;
+
+	sret = getline(&buf, &len, fp);
+	fclose(fp);
+	if (sret <= 0)
+		goto try_dies;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	for (i = 0; i < tp->package_cpus_lists; i++) {
+		if (!strcmp(buf, tp->package_cpus_list[i]))
+			break;
+	}
+	if (i == tp->package_cpus_lists) {
+		tp->package_cpus_list[i] = buf;
+		tp->package_cpus_lists++;
+		buf = NULL;
+		len = 0;
+	}
+	ret = 0;
+
+try_dies:
+	if (!tp->die_cpus_list)
+		goto try_threads;
+
+	scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
+		  sysfs__mountpoint(), cpu);
+	fp = fopen(filename, "r");
+	if (!fp)
+		goto try_threads;
+
+	sret = getline(&buf, &len, fp);
+	fclose(fp);
+	if (sret <= 0)
+		goto try_threads;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	for (i = 0; i < tp->die_cpus_lists; i++) {
+		if (!strcmp(buf, tp->die_cpus_list[i]))
+			break;
+	}
+	if (i == tp->die_cpus_lists) {
+		tp->die_cpus_list[i] = buf;
+		tp->die_cpus_lists++;
+		buf = NULL;
+		len = 0;
+	}
+	ret = 0;
+
+try_threads:
+	scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT,
+		  sysfs__mountpoint(), cpu);
+	if (access(filename, F_OK) == -1) {
+		scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD,
+			  sysfs__mountpoint(), cpu);
+	}
+	fp = fopen(filename, "r");
+	if (!fp)
+		goto done;
+
+	if (getline(&buf, &len, fp) <= 0)
+		goto done;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	for (i = 0; i < tp->core_cpus_lists; i++) {
+		if (!strcmp(buf, tp->core_cpus_list[i]))
+			break;
+	}
+	if (i == tp->core_cpus_lists) {
+		tp->core_cpus_list[i] = buf;
+		tp->core_cpus_lists++;
+		buf = NULL;
+	}
+	ret = 0;
+done:
+	if (fp)
+		fclose(fp);
+	free(buf);
+	return ret;
+}
+
+void cpu_topology__delete(struct cpu_topology *tp)
+{
+	u32 i;
+
+	if (!tp)
+		return;
+
+	for (i = 0 ; i < tp->package_cpus_lists; i++)
+		zfree(&tp->package_cpus_list[i]);
+
+	for (i = 0 ; i < tp->die_cpus_lists; i++)
+		zfree(&tp->die_cpus_list[i]);
+
+	for (i = 0 ; i < tp->core_cpus_lists; i++)
+		zfree(&tp->core_cpus_list[i]);
+
+	free(tp);
+}
+
+bool cpu_topology__smt_on(const struct cpu_topology *topology)
+{
+	for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+		const char *cpu_list = topology->core_cpus_list[i];
+
+		/*
+		 * If there is a need to separate siblings in a core then SMT is
+		 * enabled.
+		 */
+		if (strchr(cpu_list, ',') || strchr(cpu_list, '-'))
+			return true;
+	}
+	return false;
+}
+
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+			     const char *user_requested_cpu_list)
+{
+	struct perf_cpu_map *user_requested_cpus;
+
+	/*
+	 * If user_requested_cpu_list is empty then all CPUs are recorded and so
+	 * core_wide is true.
+	 */
+	if (!user_requested_cpu_list)
+		return true;
+
+	user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list);
+	/* Check that every user requested CPU is the complete set of SMT threads on a core. */
+	for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+		const char *core_cpu_list = topology->core_cpus_list[i];
+		struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list);
+		struct perf_cpu cpu;
+		int idx;
+		bool has_first, first = true;
+
+		perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) {
+			if (first) {
+				has_first = perf_cpu_map__has(user_requested_cpus, cpu);
+				first = false;
+			} else {
+				/*
+				 * If the first core CPU is user requested then
+				 * all subsequent CPUs in the core must be user
+				 * requested too. If the first CPU isn't user
+				 * requested then none of the others must be
+				 * too.
+				 */
+				if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) {
+					perf_cpu_map__put(core_cpus);
+					perf_cpu_map__put(user_requested_cpus);
+					return false;
+				}
+			}
+		}
+		perf_cpu_map__put(core_cpus);
+	}
+	perf_cpu_map__put(user_requested_cpus);
+	return true;
+}
+
+static bool has_die_topology(void)
+{
+	char filename[MAXPATHLEN];
+	struct utsname uts;
+
+	if (uname(&uts) < 0)
+		return false;
+
+	if (strncmp(uts.machine, "x86_64", 6) &&
+	    strncmp(uts.machine, "s390x", 5))
+		return false;
+
+	scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
+		  sysfs__mountpoint(), 0);
+	if (access(filename, F_OK) == -1)
+		return false;
+
+	return true;
+}
+
+struct cpu_topology *cpu_topology__new(void)
+{
+	struct cpu_topology *tp = NULL;
+	void *addr;
+	u32 nr, i, nr_addr;
+	size_t sz;
+	long ncpus;
+	int ret = -1;
+	struct perf_cpu_map *map;
+	bool has_die = has_die_topology();
+
+	ncpus = cpu__max_present_cpu().cpu;
+
+	/* build online CPU map */
+	map = perf_cpu_map__new(NULL);
+	if (map == NULL) {
+		pr_debug("failed to get system cpumap\n");
+		return NULL;
+	}
+
+	nr = (u32)(ncpus & UINT_MAX);
+
+	sz = nr * sizeof(char *);
+	if (has_die)
+		nr_addr = 3;
+	else
+		nr_addr = 2;
+	addr = calloc(1, sizeof(*tp) + nr_addr * sz);
+	if (!addr)
+		goto out_free;
+
+	tp = addr;
+	addr += sizeof(*tp);
+	tp->package_cpus_list = addr;
+	addr += sz;
+	if (has_die) {
+		tp->die_cpus_list = addr;
+		addr += sz;
+	}
+	tp->core_cpus_list = addr;
+
+	for (i = 0; i < nr; i++) {
+		if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i }))
+			continue;
+
+		ret = build_cpu_topology(tp, i);
+		if (ret < 0)
+			break;
+	}
+
+out_free:
+	perf_cpu_map__put(map);
+	if (ret) {
+		cpu_topology__delete(tp);
+		tp = NULL;
+	}
+	return tp;
+}
+
+static int load_numa_node(struct numa_topology_node *node, int nr)
+{
+	char str[MAXPATHLEN];
+	char field[32];
+	char *buf = NULL, *p;
+	size_t len = 0;
+	int ret = -1;
+	FILE *fp;
+	u64 mem;
+
+	node->node = (u32) nr;
+
+	scnprintf(str, MAXPATHLEN, NODE_MEMINFO_FMT,
+		  sysfs__mountpoint(), nr);
+	fp = fopen(str, "r");
+	if (!fp)
+		return -1;
+
+	while (getline(&buf, &len, fp) > 0) {
+		/* skip over invalid lines */
+		if (!strchr(buf, ':'))
+			continue;
+		if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
+			goto err;
+		if (!strcmp(field, "MemTotal:"))
+			node->mem_total = mem;
+		if (!strcmp(field, "MemFree:"))
+			node->mem_free = mem;
+		if (node->mem_total && node->mem_free)
+			break;
+	}
+
+	fclose(fp);
+	fp = NULL;
+
+	scnprintf(str, MAXPATHLEN, NODE_CPULIST_FMT,
+		  sysfs__mountpoint(), nr);
+
+	fp = fopen(str, "r");
+	if (!fp)
+		return -1;
+
+	if (getline(&buf, &len, fp) <= 0)
+		goto err;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	node->cpus = buf;
+	fclose(fp);
+	return 0;
+
+err:
+	free(buf);
+	if (fp)
+		fclose(fp);
+	return ret;
+}
+
+struct numa_topology *numa_topology__new(void)
+{
+	struct perf_cpu_map *node_map = NULL;
+	struct numa_topology *tp = NULL;
+	char path[MAXPATHLEN];
+	char *buf = NULL;
+	size_t len = 0;
+	u32 nr, i;
+	FILE *fp;
+	char *c;
+
+	scnprintf(path, MAXPATHLEN, NODE_ONLINE_FMT,
+		  sysfs__mountpoint());
+
+	fp = fopen(path, "r");
+	if (!fp)
+		return NULL;
+
+	if (getline(&buf, &len, fp) <= 0)
+		goto out;
+
+	c = strchr(buf, '\n');
+	if (c)
+		*c = '\0';
+
+	node_map = perf_cpu_map__new(buf);
+	if (!node_map)
+		goto out;
+
+	nr = (u32) perf_cpu_map__nr(node_map);
+
+	tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
+	if (!tp)
+		goto out;
+
+	tp->nr = nr;
+
+	for (i = 0; i < nr; i++) {
+		if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) {
+			numa_topology__delete(tp);
+			tp = NULL;
+			break;
+		}
+	}
+
+out:
+	free(buf);
+	fclose(fp);
+	perf_cpu_map__put(node_map);
+	return tp;
+}
+
+void numa_topology__delete(struct numa_topology *tp)
+{
+	u32 i;
+
+	for (i = 0; i < tp->nr; i++)
+		zfree(&tp->nodes[i].cpus);
+
+	free(tp);
+}
+
+static int load_hybrid_node(struct hybrid_topology_node *node,
+			    struct perf_pmu *pmu)
+{
+	const char *sysfs;
+	char path[PATH_MAX];
+	char *buf = NULL, *p;
+	FILE *fp;
+	size_t len = 0;
+
+	node->pmu_name = strdup(pmu->name);
+	if (!node->pmu_name)
+		return -1;
+
+	sysfs = sysfs__mountpoint();
+	if (!sysfs)
+		goto err;
+
+	snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name);
+	fp = fopen(path, "r");
+	if (!fp)
+		goto err;
+
+	if (getline(&buf, &len, fp) <= 0) {
+		fclose(fp);
+		goto err;
+	}
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	fclose(fp);
+	node->cpus = buf;
+	return 0;
+
+err:
+	zfree(&node->pmu_name);
+	free(buf);
+	return -1;
+}
+
+struct hybrid_topology *hybrid_topology__new(void)
+{
+	struct perf_pmu *pmu;
+	struct hybrid_topology *tp = NULL;
+	u32 nr, i = 0;
+
+	nr = perf_pmu__hybrid_pmu_num();
+	if (nr == 0)
+		return NULL;
+
+	tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0]) * nr);
+	if (!tp)
+		return NULL;
+
+	tp->nr = nr;
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		if (load_hybrid_node(&tp->nodes[i], pmu)) {
+			hybrid_topology__delete(tp);
+			return NULL;
+		}
+		i++;
+	}
+
+	return tp;
+}
+
+void hybrid_topology__delete(struct hybrid_topology *tp)
+{
+	u32 i;
+
+	for (i = 0; i < tp->nr; i++) {
+		zfree(&tp->nodes[i].pmu_name);
+		zfree(&tp->nodes[i].cpus);
+	}
+
+	free(tp);
+}
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
new file mode 100644
index 000000000..969e5920a
--- /dev/null
+++ b/tools/perf/util/cputopo.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CPUTOPO_H
+#define __PERF_CPUTOPO_H
+
+#include <linux/types.h>
+
+struct cpu_topology {
+	/* The number of unique package_cpus_lists below. */
+	u32	  package_cpus_lists;
+	/* The number of unique die_cpu_lists below. */
+	u32	  die_cpus_lists;
+	/* The number of unique core_cpu_lists below. */
+	u32	  core_cpus_lists;
+	/*
+	 * An array of strings where each string is unique and read from
+	 * /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI
+	 * each of these is a human-readable list of CPUs sharing the same
+	 * physical_package_id. The format is like 0-3, 8-11, 14,17.
+	 */
+	const char **package_cpus_list;
+	/*
+	 * An array of string where each string is unique and from
+	 * /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI
+	 * each of these is a human-readable list of CPUs within the same die.
+	 * The format is like 0-3, 8-11, 14,17.
+	 */
+	const char **die_cpus_list;
+	/*
+	 * An array of string where each string is unique and from
+	 * /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI
+	 * each of these is a human-readable list of CPUs within the same
+	 * core. The format is like 0-3, 8-11, 14,17.
+	 */
+	const char **core_cpus_list;
+};
+
+struct numa_topology_node {
+	char		*cpus;
+	u32		 node;
+	u64		 mem_total;
+	u64		 mem_free;
+};
+
+struct numa_topology {
+	u32				nr;
+	struct numa_topology_node	nodes[];
+};
+
+struct hybrid_topology_node {
+	char		*pmu_name;
+	char		*cpus;
+};
+
+struct hybrid_topology {
+	u32				nr;
+	struct hybrid_topology_node	nodes[];
+};
+
+struct cpu_topology *cpu_topology__new(void);
+void cpu_topology__delete(struct cpu_topology *tp);
+/* Determine from the core list whether SMT was enabled. */
+bool cpu_topology__smt_on(const struct cpu_topology *topology);
+/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+			     const char *user_requested_cpu_list);
+
+struct numa_topology *numa_topology__new(void);
+void numa_topology__delete(struct numa_topology *tp);
+
+struct hybrid_topology *hybrid_topology__new(void);
+void hybrid_topology__delete(struct hybrid_topology *tp);
+
+#endif /* __PERF_CPUTOPO_H */
diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build
new file mode 100644
index 000000000..216cb17a3
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
new file mode 100644
index 000000000..31fa3b451
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <asm/bug.h>
+#include <linux/coresight-pmu.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <opencsd/c_api/opencsd_c_api.h>
+
+#include "cs-etm.h"
+#include "cs-etm-decoder.h"
+#include "debug.h"
+#include "intlist.h"
+
+/* use raw logging */
+#ifdef CS_DEBUG_RAW
+#define CS_LOG_RAW_FRAMES
+#ifdef CS_RAW_PACKED
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \
+			    OCSD_DFRMTR_PACKED_RAW_OUT)
+#else
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT)
+#endif
+#endif
+
+struct cs_etm_decoder {
+	void *data;
+	void (*packet_printer)(const char *msg);
+	bool suppress_printing;
+	dcd_tree_handle_t dcd_tree;
+	cs_etm_mem_cb_type mem_access;
+	ocsd_datapath_resp_t prev_return;
+	const char *decoder_name;
+};
+
+static u32
+cs_etm_decoder__mem_access(const void *context,
+			   const ocsd_vaddr_t address,
+			   const ocsd_mem_space_acc_t mem_space __maybe_unused,
+			   const u8 trace_chan_id,
+			   const u32 req_size,
+			   u8 *buffer)
+{
+	struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+
+	return decoder->mem_access(decoder->data, trace_chan_id,
+				   address, req_size, buffer);
+}
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+				      u64 start, u64 end,
+				      cs_etm_mem_cb_type cb_func)
+{
+	decoder->mem_access = cb_func;
+
+	if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end,
+					       OCSD_MEM_SPACE_ANY,
+					       cs_etm_decoder__mem_access,
+					       decoder))
+		return -1;
+
+	return 0;
+}
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
+{
+	ocsd_datapath_resp_t dp_ret;
+
+	decoder->prev_return = OCSD_RESP_CONT;
+	decoder->suppress_printing = true;
+	dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
+				      0, 0, NULL, NULL);
+	decoder->suppress_printing = false;
+	if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
+		return -1;
+
+	return 0;
+}
+
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
+			       struct cs_etm_packet *packet)
+{
+	if (!packet_queue || !packet)
+		return -EINVAL;
+
+	/* Nothing to do, might as well just return */
+	if (packet_queue->packet_count == 0)
+		return 0;
+	/*
+	 * The queueing process in function cs_etm_decoder__buffer_packet()
+	 * increments the tail *before* using it.  This is somewhat counter
+	 * intuitive but it has the advantage of centralizing tail management
+	 * at a single location.  Because of that we need to follow the same
+	 * heuristic with the head, i.e we increment it before using its
+	 * value.  Otherwise the first element of the packet queue is not
+	 * used.
+	 */
+	packet_queue->head = (packet_queue->head + 1) &
+			     (CS_ETM_PACKET_MAX_BUFFER - 1);
+
+	*packet = packet_queue->packet_buffer[packet_queue->head];
+
+	packet_queue->packet_count--;
+
+	return 1;
+}
+
+static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
+					    ocsd_etmv3_cfg *config)
+{
+	config->reg_idr = params->etmv3.reg_idr;
+	config->reg_ctrl = params->etmv3.reg_ctrl;
+	config->reg_ccer = params->etmv3.reg_ccer;
+	config->reg_trc_id = params->etmv3.reg_trc_id;
+	config->arch_ver = ARCH_V7;
+	config->core_prof = profile_CortexA;
+
+	return 0;
+}
+
+#define TRCIDR1_TRCARCHMIN_SHIFT 4
+#define TRCIDR1_TRCARCHMIN_MASK  GENMASK(7, 4)
+#define TRCIDR1_TRCARCHMIN(x)    (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT)
+
+static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1)
+{
+	/*
+	 * For ETMv4 if the trace minor version is 4 or more then we can assume
+	 * the architecture is ARCH_AA64 rather than just V8.
+	 * ARCH_V8 = V8 architecture
+	 * ARCH_AA64 = Min v8r3 plus additional AA64 PE features
+	 */
+	return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8;
+}
+
+static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
+					     ocsd_etmv4_cfg *config)
+{
+	config->reg_configr = params->etmv4.reg_configr;
+	config->reg_traceidr = params->etmv4.reg_traceidr;
+	config->reg_idr0 = params->etmv4.reg_idr0;
+	config->reg_idr1 = params->etmv4.reg_idr1;
+	config->reg_idr2 = params->etmv4.reg_idr2;
+	config->reg_idr8 = params->etmv4.reg_idr8;
+	config->reg_idr9 = 0;
+	config->reg_idr10 = 0;
+	config->reg_idr11 = 0;
+	config->reg_idr12 = 0;
+	config->reg_idr13 = 0;
+	config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1);
+	config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params,
+					   ocsd_ete_cfg *config)
+{
+	config->reg_configr = params->ete.reg_configr;
+	config->reg_traceidr = params->ete.reg_traceidr;
+	config->reg_idr0 = params->ete.reg_idr0;
+	config->reg_idr1 = params->ete.reg_idr1;
+	config->reg_idr2 = params->ete.reg_idr2;
+	config->reg_idr8 = params->ete.reg_idr8;
+	config->reg_devarch = params->ete.reg_devarch;
+	config->arch_ver = ARCH_AA64;
+	config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__print_str_cb(const void *p_context,
+					 const char *msg,
+					 const int str_len)
+{
+	const struct cs_etm_decoder *decoder = p_context;
+
+	if (p_context && str_len && !decoder->suppress_printing)
+		decoder->packet_printer(msg);
+}
+
+static int
+cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params,
+					 struct cs_etm_decoder *decoder)
+{
+	int ret = 0;
+
+	if (d_params->packet_printer == NULL)
+		return -1;
+
+	decoder->packet_printer = d_params->packet_printer;
+
+	/*
+	 * Set up a library default logger to process any printers
+	 * (packet/raw frame) we add later.
+	 */
+	ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+	if (ret != 0)
+		return -1;
+
+	/* no stdout / err / file output */
+	ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+	if (ret != 0)
+		return -1;
+
+	/*
+	 * Set the string CB for the default logger, passes strings to
+	 * perf print logger.
+	 */
+	ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+					      (void *)decoder,
+					      cs_etm_decoder__print_str_cb);
+	if (ret != 0)
+		ret = -1;
+
+	return 0;
+}
+
+#ifdef CS_LOG_RAW_FRAMES
+static void
+cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params,
+				       struct cs_etm_decoder *decoder)
+{
+	/* Only log these during a --dump operation */
+	if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+		/* set up a library default logger to process the
+		 *  raw frame printer we add later
+		 */
+		ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+
+		/* no stdout / err / file output */
+		ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+
+		/* set the string CB for the default logger,
+		 * passes strings to perf print logger.
+		 */
+		ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+						(void *)decoder,
+						cs_etm_decoder__print_str_cb);
+
+		/* use the built in library printer for the raw frames */
+		ocsd_dt_set_raw_frame_printer(decoder->dcd_tree,
+					      CS_RAW_DEBUG_FLAGS);
+	}
+}
+#else
+static void
+cs_etm_decoder__init_raw_frame_logging(
+		struct cs_etm_decoder_params *d_params __maybe_unused,
+		struct cs_etm_decoder *decoder __maybe_unused)
+{
+}
+#endif
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
+				  struct cs_etm_packet_queue *packet_queue,
+				  const uint8_t trace_chan_id)
+{
+	/* No timestamp packet has been received, nothing to do */
+	if (!packet_queue->cs_timestamp)
+		return OCSD_RESP_CONT;
+
+	packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
+
+	/* Estimate the timestamp for the next range packet */
+	packet_queue->next_cs_timestamp += packet_queue->instr_count;
+	packet_queue->instr_count = 0;
+
+	/* Tell the front end which traceid_queue needs attention */
+	cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+	return OCSD_RESP_WAIT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
+				  const ocsd_generic_trace_elem *elem,
+				  const uint8_t trace_chan_id,
+				  const ocsd_trc_index_t indx)
+{
+	struct cs_etm_packet_queue *packet_queue;
+
+	/* First get the packet queue for this traceID */
+	packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+	if (!packet_queue)
+		return OCSD_RESP_FATAL_SYS_ERR;
+
+	/*
+	 * We've seen a timestamp packet before - simply record the new value.
+	 * Function do_soft_timestamp() will report the value to the front end,
+	 * hence asking the decoder to keep decoding rather than stopping.
+	 */
+	if (packet_queue->cs_timestamp) {
+		packet_queue->next_cs_timestamp = elem->timestamp;
+		return OCSD_RESP_CONT;
+	}
+
+
+	if (!elem->timestamp) {
+		/*
+		 * Zero timestamps can be seen due to misconfiguration or hardware bugs.
+		 * Warn once, and don't try to subtract instr_count as it would result in an
+		 * underflow.
+		 */
+		packet_queue->cs_timestamp = 0;
+		if (!cs_etm__etmq_is_timeless(etmq))
+			pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
+					". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
+					indx);
+
+	} else if (packet_queue->instr_count > elem->timestamp) {
+		/*
+		 * Sanity check that the elem->timestamp - packet_queue->instr_count would not
+		 * result in an underflow. Warn and clamp at 0 if it would.
+		 */
+		packet_queue->cs_timestamp = 0;
+		pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx);
+	} else {
+		/*
+		 * This is the first timestamp we've seen since the beginning of traces
+		 * or a discontinuity.  Since timestamps packets are generated *after*
+		 * range packets have been generated, we need to estimate the time at
+		 * which instructions started by subtracting the number of instructions
+		 * executed to the timestamp.
+		 */
+		packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
+	}
+	packet_queue->next_cs_timestamp = elem->timestamp;
+	packet_queue->instr_count = 0;
+
+	/* Tell the front end which traceid_queue needs attention */
+	cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+	/* Halt processing until we are being told to proceed */
+	return OCSD_RESP_WAIT;
+}
+
+static void
+cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
+{
+	packet_queue->cs_timestamp = 0;
+	packet_queue->next_cs_timestamp = 0;
+	packet_queue->instr_count = 0;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
+			      const u8 trace_chan_id,
+			      enum cs_etm_sample_type sample_type)
+{
+	u32 et = 0;
+	int cpu;
+
+	if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
+		return OCSD_RESP_FATAL_SYS_ERR;
+
+	if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+		return OCSD_RESP_FATAL_SYS_ERR;
+
+	et = packet_queue->tail;
+	et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1);
+	packet_queue->tail = et;
+	packet_queue->packet_count++;
+
+	packet_queue->packet_buffer[et].sample_type = sample_type;
+	packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
+	packet_queue->packet_buffer[et].cpu = cpu;
+	packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
+	packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+	packet_queue->packet_buffer[et].instr_count = 0;
+	packet_queue->packet_buffer[et].last_instr_taken_branch = false;
+	packet_queue->packet_buffer[et].last_instr_size = 0;
+	packet_queue->packet_buffer[et].last_instr_type = 0;
+	packet_queue->packet_buffer[et].last_instr_subtype = 0;
+	packet_queue->packet_buffer[et].last_instr_cond = 0;
+	packet_queue->packet_buffer[et].flags = 0;
+	packet_queue->packet_buffer[et].exception_number = UINT32_MAX;
+	packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id;
+
+	if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1)
+		return OCSD_RESP_WAIT;
+
+	return OCSD_RESP_CONT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
+			     struct cs_etm_packet_queue *packet_queue,
+			     const ocsd_generic_trace_elem *elem,
+			     const uint8_t trace_chan_id)
+{
+	int ret = 0;
+	struct cs_etm_packet *packet;
+
+	ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
+					    CS_ETM_RANGE);
+	if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+		return ret;
+
+	packet = &packet_queue->packet_buffer[packet_queue->tail];
+
+	switch (elem->isa) {
+	case ocsd_isa_aarch64:
+		packet->isa = CS_ETM_ISA_A64;
+		break;
+	case ocsd_isa_arm:
+		packet->isa = CS_ETM_ISA_A32;
+		break;
+	case ocsd_isa_thumb2:
+		packet->isa = CS_ETM_ISA_T32;
+		break;
+	case ocsd_isa_tee:
+	case ocsd_isa_jazelle:
+	case ocsd_isa_custom:
+	case ocsd_isa_unknown:
+	default:
+		packet->isa = CS_ETM_ISA_UNKNOWN;
+	}
+
+	packet->start_addr = elem->st_addr;
+	packet->end_addr = elem->en_addr;
+	packet->instr_count = elem->num_instr_range;
+	packet->last_instr_type = elem->last_i_type;
+	packet->last_instr_subtype = elem->last_i_subtype;
+	packet->last_instr_cond = elem->last_instr_cond;
+
+	if (elem->last_i_type == OCSD_INSTR_BR || elem->last_i_type == OCSD_INSTR_BR_INDIRECT)
+		packet->last_instr_taken_branch = elem->last_instr_exec;
+	else
+		packet->last_instr_taken_branch = false;
+
+	packet->last_instr_size = elem->last_instr_sz;
+
+	/* per-thread scenario, no need to generate a timestamp */
+	if (cs_etm__etmq_is_timeless(etmq))
+		goto out;
+
+	/*
+	 * The packet queue is full and we haven't seen a timestamp (had we
+	 * seen one the packet queue wouldn't be full).  Let the front end
+	 * deal with it.
+	 */
+	if (ret == OCSD_RESP_WAIT)
+		goto out;
+
+	packet_queue->instr_count += elem->num_instr_range;
+	/* Tell the front end we have a new timestamp to process */
+	ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue,
+						trace_chan_id);
+out:
+	return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
+				     const uint8_t trace_chan_id)
+{
+	/*
+	 * Something happened and who knows when we'll get new traces so
+	 * reset time statistics.
+	 */
+	cs_etm_decoder__reset_timestamp(queue);
+	return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+					     CS_ETM_DISCONTINUITY);
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
+				 const ocsd_generic_trace_elem *elem,
+				 const uint8_t trace_chan_id)
+{	int ret = 0;
+	struct cs_etm_packet *packet;
+
+	ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+					    CS_ETM_EXCEPTION);
+	if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+		return ret;
+
+	packet = &queue->packet_buffer[queue->tail];
+	packet->exception_number = elem->exception_number;
+
+	return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
+				     const uint8_t trace_chan_id)
+{
+	return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+					     CS_ETM_EXCEPTION_RET);
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
+			struct cs_etm_packet_queue *packet_queue,
+			const ocsd_generic_trace_elem *elem,
+			const uint8_t trace_chan_id)
+{
+	pid_t tid = -1;
+	static u64 pid_fmt;
+	int ret;
+
+	/*
+	 * As all the ETMs run at the same exception level, the system should
+	 * have the same PID format crossing CPUs.  So cache the PID format
+	 * and reuse it for sequential decoding.
+	 */
+	if (!pid_fmt) {
+		ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt);
+		if (ret)
+			return OCSD_RESP_FATAL_SYS_ERR;
+	}
+
+	/*
+	 * Process the PE_CONTEXT packets if we have a valid contextID or VMID.
+	 * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2
+	 * as VMID, Bit ETM_OPT_CTXTID2 is set in this case.
+	 */
+	switch (pid_fmt) {
+	case BIT(ETM_OPT_CTXTID):
+		if (elem->context.ctxt_id_valid)
+			tid = elem->context.context_id;
+		break;
+	case BIT(ETM_OPT_CTXTID2):
+		if (elem->context.vmid_valid)
+			tid = elem->context.vmid;
+		break;
+	default:
+		break;
+	}
+
+	if (tid == -1)
+		return OCSD_RESP_CONT;
+
+	if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
+		return OCSD_RESP_FATAL_SYS_ERR;
+
+	/*
+	 * A timestamp is generated after a PE_CONTEXT element so make sure
+	 * to rely on that coming one.
+	 */
+	cs_etm_decoder__reset_timestamp(packet_queue);
+
+	return OCSD_RESP_CONT;
+}
+
+static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
+				const void *context,
+				const ocsd_trc_index_t indx,
+				const u8 trace_chan_id __maybe_unused,
+				const ocsd_generic_trace_elem *elem)
+{
+	ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
+	struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+	struct cs_etm_queue *etmq = decoder->data;
+	struct cs_etm_packet_queue *packet_queue;
+
+	/* First get the packet queue for this traceID */
+	packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+	if (!packet_queue)
+		return OCSD_RESP_FATAL_SYS_ERR;
+
+	switch (elem->elem_type) {
+	case OCSD_GEN_TRC_ELEM_UNKNOWN:
+		break;
+	case OCSD_GEN_TRC_ELEM_EO_TRACE:
+	case OCSD_GEN_TRC_ELEM_NO_SYNC:
+	case OCSD_GEN_TRC_ELEM_TRACE_ON:
+		resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
+							    trace_chan_id);
+		break;
+	case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
+		resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem,
+						    trace_chan_id);
+		break;
+	case OCSD_GEN_TRC_ELEM_EXCEPTION:
+		resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
+							trace_chan_id);
+		break;
+	case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
+		resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
+							    trace_chan_id);
+		break;
+	case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+		resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
+							 trace_chan_id,
+							 indx);
+		break;
+	case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+		resp = cs_etm_decoder__set_tid(etmq, packet_queue,
+					       elem, trace_chan_id);
+		break;
+	/* Unused packet types */
+	case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH:
+	case OCSD_GEN_TRC_ELEM_ADDR_NACC:
+	case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
+	case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
+	case OCSD_GEN_TRC_ELEM_EVENT:
+	case OCSD_GEN_TRC_ELEM_SWTRACE:
+	case OCSD_GEN_TRC_ELEM_CUSTOM:
+	case OCSD_GEN_TRC_ELEM_SYNC_MARKER:
+	case OCSD_GEN_TRC_ELEM_MEMTRANS:
+	default:
+		break;
+	}
+
+	return resp;
+}
+
+static int
+cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
+				   struct cs_etm_trace_params *t_params,
+				   struct cs_etm_decoder *decoder)
+{
+	ocsd_etmv3_cfg config_etmv3;
+	ocsd_etmv4_cfg trace_config_etmv4;
+	ocsd_ete_cfg trace_config_ete;
+	void *trace_config;
+	u8 csid;
+
+	switch (t_params->protocol) {
+	case CS_ETM_PROTO_ETMV3:
+	case CS_ETM_PROTO_PTM:
+		cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
+		decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
+							OCSD_BUILTIN_DCD_ETMV3 :
+							OCSD_BUILTIN_DCD_PTM;
+		trace_config = &config_etmv3;
+		break;
+	case CS_ETM_PROTO_ETMV4i:
+		cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
+		decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+		trace_config = &trace_config_etmv4;
+		break;
+	case CS_ETM_PROTO_ETE:
+		cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete);
+		decoder->decoder_name = OCSD_BUILTIN_DCD_ETE;
+		trace_config = &trace_config_ete;
+		break;
+	default:
+		return -1;
+	}
+
+	if (d_params->operation == CS_ETM_OPERATION_DECODE) {
+		if (ocsd_dt_create_decoder(decoder->dcd_tree,
+					   decoder->decoder_name,
+					   OCSD_CREATE_FLG_FULL_DECODER,
+					   trace_config, &csid))
+			return -1;
+
+		if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
+					       cs_etm_decoder__gen_trace_elem_printer,
+					       decoder))
+			return -1;
+
+		return 0;
+	} else if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+		if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name,
+					   OCSD_CREATE_FLG_PACKET_PROC,
+					   trace_config, &csid))
+			return -1;
+
+		if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
+			return -1;
+
+		return 0;
+	}
+
+	return -1;
+}
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
+		    struct cs_etm_trace_params t_params[])
+{
+	struct cs_etm_decoder *decoder;
+	ocsd_dcd_tree_src_t format;
+	u32 flags;
+	int i, ret;
+
+	if ((!t_params) || (!d_params))
+		return NULL;
+
+	decoder = zalloc(sizeof(*decoder));
+
+	if (!decoder)
+		return NULL;
+
+	decoder->data = d_params->data;
+	decoder->prev_return = OCSD_RESP_CONT;
+	format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED :
+					 OCSD_TRC_SRC_SINGLE);
+	flags = 0;
+	flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0);
+	flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0);
+	flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0);
+
+	/*
+	 * Drivers may add barrier frames when used with perf, set up to
+	 * handle this. Barriers const of FSYNC packet repeated 4 times.
+	 */
+	flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC;
+
+	/* Create decode tree for the data source */
+	decoder->dcd_tree = ocsd_create_dcd_tree(format, flags);
+
+	if (decoder->dcd_tree == 0)
+		goto err_free_decoder;
+
+	/* init library print logging support */
+	ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder);
+	if (ret != 0)
+		goto err_free_decoder;
+
+	/* init raw frame logging if required */
+	cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
+
+	for (i = 0; i < decoders; i++) {
+		ret = cs_etm_decoder__create_etm_decoder(d_params,
+							 &t_params[i],
+							 decoder);
+		if (ret != 0)
+			goto err_free_decoder;
+	}
+
+	return decoder;
+
+err_free_decoder:
+	cs_etm_decoder__free(decoder);
+	return NULL;
+}
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+				       u64 indx, const u8 *buf,
+				       size_t len, size_t *consumed)
+{
+	int ret = 0;
+	ocsd_datapath_resp_t cur = OCSD_RESP_CONT;
+	ocsd_datapath_resp_t prev_return = decoder->prev_return;
+	size_t processed = 0;
+	u32 count;
+
+	while (processed < len) {
+		if (OCSD_DATA_RESP_IS_WAIT(prev_return)) {
+			cur = ocsd_dt_process_data(decoder->dcd_tree,
+						   OCSD_OP_FLUSH,
+						   0,
+						   0,
+						   NULL,
+						   NULL);
+		} else if (OCSD_DATA_RESP_IS_CONT(prev_return)) {
+			cur = ocsd_dt_process_data(decoder->dcd_tree,
+						   OCSD_OP_DATA,
+						   indx + processed,
+						   len - processed,
+						   &buf[processed],
+						   &count);
+			processed += count;
+		} else {
+			ret = -EINVAL;
+			break;
+		}
+
+		/*
+		 * Return to the input code if the packet buffer is full.
+		 * Flushing will get done once the packet buffer has been
+		 * processed.
+		 */
+		if (OCSD_DATA_RESP_IS_WAIT(cur))
+			break;
+
+		prev_return = cur;
+	}
+
+	decoder->prev_return = cur;
+	*consumed = processed;
+
+	return ret;
+}
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder)
+{
+	if (!decoder)
+		return;
+
+	ocsd_destroy_dcd_tree(decoder->dcd_tree);
+	decoder->dcd_tree = NULL;
+	free(decoder);
+}
+
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder)
+{
+	return decoder->decoder_name;
+}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
new file mode 100644
index 000000000..92a855fbe
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -0,0 +1,109 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__CS_ETM_DECODER_H__
+#define INCLUDE__CS_ETM_DECODER_H__
+
+#include <linux/types.h>
+#include <stdio.h>
+
+struct cs_etm_decoder;
+struct cs_etm_packet;
+struct cs_etm_packet_queue;
+
+struct cs_etm_queue;
+
+typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *);
+
+struct cs_etmv3_trace_params {
+	u32 reg_ctrl;
+	u32 reg_trc_id;
+	u32 reg_ccer;
+	u32 reg_idr;
+};
+
+struct cs_etmv4_trace_params {
+	u32 reg_idr0;
+	u32 reg_idr1;
+	u32 reg_idr2;
+	u32 reg_idr8;
+	u32 reg_configr;
+	u32 reg_traceidr;
+};
+
+struct cs_ete_trace_params {
+	u32 reg_idr0;
+	u32 reg_idr1;
+	u32 reg_idr2;
+	u32 reg_idr8;
+	u32 reg_configr;
+	u32 reg_traceidr;
+	u32 reg_devarch;
+};
+
+struct cs_etm_trace_params {
+	int protocol;
+	union {
+		struct cs_etmv3_trace_params etmv3;
+		struct cs_etmv4_trace_params etmv4;
+		struct cs_ete_trace_params ete;
+	};
+};
+
+struct cs_etm_decoder_params {
+	int operation;
+	void (*packet_printer)(const char *msg);
+	cs_etm_mem_cb_type mem_acc_cb;
+	u8 formatted;
+	u8 fsyncs;
+	u8 hsyncs;
+	u8 frame_aligned;
+	void *data;
+};
+
+/*
+ * The following enums are indexed starting with 1 to align with the
+ * open source coresight trace decoder library.
+ */
+enum {
+	CS_ETM_PROTO_ETMV3 = 1,
+	CS_ETM_PROTO_ETMV4i,
+	CS_ETM_PROTO_ETMV4d,
+	CS_ETM_PROTO_PTM,
+	CS_ETM_PROTO_ETE
+};
+
+enum cs_etm_decoder_operation {
+	CS_ETM_OPERATION_PRINT = 1,
+	CS_ETM_OPERATION_DECODE,
+	CS_ETM_OPERATION_MAX,
+};
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+				       u64 indx, const u8 *buf,
+				       size_t len, size_t *consumed);
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int num_cpu,
+		    struct cs_etm_decoder_params *d_params,
+		    struct cs_etm_trace_params t_params[]);
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder);
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+				      u64 start, u64 end,
+				      cs_etm_mem_cb_type cb_func);
+
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
+			       struct cs_etm_packet *packet);
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder);
+
+#endif /* INCLUDE__CS_ETM_DECODER_H__ */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
new file mode 100644
index 000000000..09e240e44
--- /dev/null
+++ b/tools/perf/util/cs-etm.c
@@ -0,0 +1,3138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/bitops.h>
+#include <linux/coresight-pmu.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+
+#include <opencsd/ocsd_if_types.h>
+#include <stdlib.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "cs-etm.h"
+#include "cs-etm-decoder/cs-etm-decoder.h"
+#include "debug.h"
+#include "dso.h"
+#include "evlist.h"
+#include "intlist.h"
+#include "machine.h"
+#include "map.h"
+#include "perf.h"
+#include "session.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "symbol.h"
+#include "tool.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include <tools/libc_compat.h>
+#include "util/synthetic-events.h"
+
+struct cs_etm_auxtrace {
+	struct auxtrace auxtrace;
+	struct auxtrace_queues queues;
+	struct auxtrace_heap heap;
+	struct itrace_synth_opts synth_opts;
+	struct perf_session *session;
+	struct machine *machine;
+	struct thread *unknown_thread;
+
+	u8 timeless_decoding;
+	u8 snapshot_mode;
+	u8 data_queued;
+
+	int num_cpu;
+	u64 latest_kernel_timestamp;
+	u32 auxtrace_type;
+	u64 branches_sample_type;
+	u64 branches_id;
+	u64 instructions_sample_type;
+	u64 instructions_sample_period;
+	u64 instructions_id;
+	u64 **metadata;
+	unsigned int pmu_type;
+};
+
+struct cs_etm_traceid_queue {
+	u8 trace_chan_id;
+	pid_t pid, tid;
+	u64 period_instructions;
+	size_t last_branch_pos;
+	union perf_event *event_buf;
+	struct thread *thread;
+	struct branch_stack *last_branch;
+	struct branch_stack *last_branch_rb;
+	struct cs_etm_packet *prev_packet;
+	struct cs_etm_packet *packet;
+	struct cs_etm_packet_queue packet_queue;
+};
+
+struct cs_etm_queue {
+	struct cs_etm_auxtrace *etm;
+	struct cs_etm_decoder *decoder;
+	struct auxtrace_buffer *buffer;
+	unsigned int queue_nr;
+	u8 pending_timestamp_chan_id;
+	u64 offset;
+	const unsigned char *buf;
+	size_t buf_len, buf_used;
+	/* Conversion between traceID and index in traceid_queues array */
+	struct intlist *traceid_queues_list;
+	struct cs_etm_traceid_queue **traceid_queues;
+};
+
+/* RB tree for quick conversion between traceID and metadata pointers */
+static struct intlist *traceid_list;
+
+static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+					   pid_t tid);
+static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
+static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
+
+/* PTMs ETMIDR [11:8] set to b0011 */
+#define ETMIDR_PTM_VERSION 0x00000300
+
+/*
+ * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
+ * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
+ * encode the etm queue number as the upper 16 bit and the channel as
+ * the lower 16 bit.
+ */
+#define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
+		      (queue_nr << 16 | trace_chan_id)
+#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
+#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
+
+static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
+{
+	etmidr &= ETMIDR_PTM_VERSION;
+
+	if (etmidr == ETMIDR_PTM_VERSION)
+		return CS_ETM_PROTO_PTM;
+
+	return CS_ETM_PROTO_ETMV3;
+}
+
+static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
+{
+	struct int_node *inode;
+	u64 *metadata;
+
+	inode = intlist__find(traceid_list, trace_chan_id);
+	if (!inode)
+		return -EINVAL;
+
+	metadata = inode->priv;
+	*magic = metadata[CS_ETM_MAGIC];
+	return 0;
+}
+
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
+{
+	struct int_node *inode;
+	u64 *metadata;
+
+	inode = intlist__find(traceid_list, trace_chan_id);
+	if (!inode)
+		return -EINVAL;
+
+	metadata = inode->priv;
+	*cpu = (int)metadata[CS_ETM_CPU];
+	return 0;
+}
+
+/*
+ * The returned PID format is presented by two bits:
+ *
+ *   Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced;
+ *   Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ *
+ * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
+ * are enabled at the same time when the session runs on an EL2 kernel.
+ * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
+ * recorded in the trace data, the tool will selectively use
+ * CONTEXTIDR_EL2 as PID.
+ */
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
+{
+	struct int_node *inode;
+	u64 *metadata, val;
+
+	inode = intlist__find(traceid_list, trace_chan_id);
+	if (!inode)
+		return -EINVAL;
+
+	metadata = inode->priv;
+
+	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
+		val = metadata[CS_ETM_ETMCR];
+		/* CONTEXTIDR is traced */
+		if (val & BIT(ETM_OPT_CTXTID))
+			*pid_fmt = BIT(ETM_OPT_CTXTID);
+	} else {
+		val = metadata[CS_ETMV4_TRCCONFIGR];
+		/* CONTEXTIDR_EL2 is traced */
+		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
+			*pid_fmt = BIT(ETM_OPT_CTXTID2);
+		/* CONTEXTIDR_EL1 is traced */
+		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
+			*pid_fmt = BIT(ETM_OPT_CTXTID);
+	}
+
+	return 0;
+}
+
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+					      u8 trace_chan_id)
+{
+	/*
+	 * When a timestamp packet is encountered the backend code
+	 * is stopped so that the front end has time to process packets
+	 * that were accumulated in the traceID queue.  Since there can
+	 * be more than one channel per cs_etm_queue, we need to specify
+	 * what traceID queue needs servicing.
+	 */
+	etmq->pending_timestamp_chan_id = trace_chan_id;
+}
+
+static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
+				      u8 *trace_chan_id)
+{
+	struct cs_etm_packet_queue *packet_queue;
+
+	if (!etmq->pending_timestamp_chan_id)
+		return 0;
+
+	if (trace_chan_id)
+		*trace_chan_id = etmq->pending_timestamp_chan_id;
+
+	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
+						     etmq->pending_timestamp_chan_id);
+	if (!packet_queue)
+		return 0;
+
+	/* Acknowledge pending status */
+	etmq->pending_timestamp_chan_id = 0;
+
+	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
+	return packet_queue->cs_timestamp;
+}
+
+static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
+{
+	int i;
+
+	queue->head = 0;
+	queue->tail = 0;
+	queue->packet_count = 0;
+	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
+		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
+		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
+		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+		queue->packet_buffer[i].instr_count = 0;
+		queue->packet_buffer[i].last_instr_taken_branch = false;
+		queue->packet_buffer[i].last_instr_size = 0;
+		queue->packet_buffer[i].last_instr_type = 0;
+		queue->packet_buffer[i].last_instr_subtype = 0;
+		queue->packet_buffer[i].last_instr_cond = 0;
+		queue->packet_buffer[i].flags = 0;
+		queue->packet_buffer[i].exception_number = UINT32_MAX;
+		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
+		queue->packet_buffer[i].cpu = INT_MIN;
+	}
+}
+
+static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
+{
+	int idx;
+	struct int_node *inode;
+	struct cs_etm_traceid_queue *tidq;
+	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+	intlist__for_each_entry(inode, traceid_queues_list) {
+		idx = (int)(intptr_t)inode->priv;
+		tidq = etmq->traceid_queues[idx];
+		cs_etm__clear_packet_queue(&tidq->packet_queue);
+	}
+}
+
+static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
+				      struct cs_etm_traceid_queue *tidq,
+				      u8 trace_chan_id)
+{
+	int rc = -ENOMEM;
+	struct auxtrace_queue *queue;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+
+	cs_etm__clear_packet_queue(&tidq->packet_queue);
+
+	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+	tidq->tid = queue->tid;
+	tidq->pid = -1;
+	tidq->trace_chan_id = trace_chan_id;
+
+	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
+	if (!tidq->packet)
+		goto out;
+
+	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
+	if (!tidq->prev_packet)
+		goto out_free;
+
+	if (etm->synth_opts.last_branch) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += etm->synth_opts.last_branch_sz *
+		      sizeof(struct branch_entry);
+		tidq->last_branch = zalloc(sz);
+		if (!tidq->last_branch)
+			goto out_free;
+		tidq->last_branch_rb = zalloc(sz);
+		if (!tidq->last_branch_rb)
+			goto out_free;
+	}
+
+	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+	if (!tidq->event_buf)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	zfree(&tidq->last_branch_rb);
+	zfree(&tidq->last_branch);
+	zfree(&tidq->prev_packet);
+	zfree(&tidq->packet);
+out:
+	return rc;
+}
+
+static struct cs_etm_traceid_queue
+*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+	int idx;
+	struct int_node *inode;
+	struct intlist *traceid_queues_list;
+	struct cs_etm_traceid_queue *tidq, **traceid_queues;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+
+	if (etm->timeless_decoding)
+		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
+
+	traceid_queues_list = etmq->traceid_queues_list;
+
+	/*
+	 * Check if the traceid_queue exist for this traceID by looking
+	 * in the queue list.
+	 */
+	inode = intlist__find(traceid_queues_list, trace_chan_id);
+	if (inode) {
+		idx = (int)(intptr_t)inode->priv;
+		return etmq->traceid_queues[idx];
+	}
+
+	/* We couldn't find a traceid_queue for this traceID, allocate one */
+	tidq = malloc(sizeof(*tidq));
+	if (!tidq)
+		return NULL;
+
+	memset(tidq, 0, sizeof(*tidq));
+
+	/* Get a valid index for the new traceid_queue */
+	idx = intlist__nr_entries(traceid_queues_list);
+	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
+	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
+	if (!inode)
+		goto out_free;
+
+	/* Associate this traceID with this index */
+	inode->priv = (void *)(intptr_t)idx;
+
+	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
+		goto out_free;
+
+	/* Grow the traceid_queues array by one unit */
+	traceid_queues = etmq->traceid_queues;
+	traceid_queues = reallocarray(traceid_queues,
+				      idx + 1,
+				      sizeof(*traceid_queues));
+
+	/*
+	 * On failure reallocarray() returns NULL and the original block of
+	 * memory is left untouched.
+	 */
+	if (!traceid_queues)
+		goto out_free;
+
+	traceid_queues[idx] = tidq;
+	etmq->traceid_queues = traceid_queues;
+
+	return etmq->traceid_queues[idx];
+
+out_free:
+	/*
+	 * Function intlist__remove() removes the inode from the list
+	 * and delete the memory associated to it.
+	 */
+	intlist__remove(traceid_queues_list, inode);
+	free(tidq);
+
+	return NULL;
+}
+
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+	struct cs_etm_traceid_queue *tidq;
+
+	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+	if (tidq)
+		return &tidq->packet_queue;
+
+	return NULL;
+}
+
+static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
+				struct cs_etm_traceid_queue *tidq)
+{
+	struct cs_etm_packet *tmp;
+
+	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
+	    etm->synth_opts.instructions) {
+		/*
+		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+		 * the next incoming packet.
+		 */
+		tmp = tidq->packet;
+		tidq->packet = tidq->prev_packet;
+		tidq->prev_packet = tmp;
+	}
+}
+
+static void cs_etm__packet_dump(const char *pkt_string)
+{
+	const char *color = PERF_COLOR_BLUE;
+	int len = strlen(pkt_string);
+
+	if (len && (pkt_string[len-1] == '\n'))
+		color_fprintf(stdout, color, "	%s", pkt_string);
+	else
+		color_fprintf(stdout, color, "	%s\n", pkt_string);
+
+	fflush(stdout);
+}
+
+static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
+					  struct cs_etm_auxtrace *etm, int idx,
+					  u32 etmidr)
+{
+	u64 **metadata = etm->metadata;
+
+	t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
+	t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
+	t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
+}
+
+static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
+					  struct cs_etm_auxtrace *etm, int idx)
+{
+	u64 **metadata = etm->metadata;
+
+	t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
+	t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
+	t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
+	t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
+	t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
+	t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
+	t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
+}
+
+static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
+					  struct cs_etm_auxtrace *etm, int idx)
+{
+	u64 **metadata = etm->metadata;
+
+	t_params[idx].protocol = CS_ETM_PROTO_ETE;
+	t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
+	t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
+	t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
+	t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
+	t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
+	t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
+	t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
+}
+
+static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
+				     struct cs_etm_auxtrace *etm,
+				     int decoders)
+{
+	int i;
+	u32 etmidr;
+	u64 architecture;
+
+	for (i = 0; i < decoders; i++) {
+		architecture = etm->metadata[i][CS_ETM_MAGIC];
+
+		switch (architecture) {
+		case __perf_cs_etmv3_magic:
+			etmidr = etm->metadata[i][CS_ETM_ETMIDR];
+			cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
+			break;
+		case __perf_cs_etmv4_magic:
+			cs_etm__set_trace_param_etmv4(t_params, etm, i);
+			break;
+		case __perf_cs_ete_magic:
+			cs_etm__set_trace_param_ete(t_params, etm, i);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
+				       struct cs_etm_queue *etmq,
+				       enum cs_etm_decoder_operation mode,
+				       bool formatted)
+{
+	int ret = -EINVAL;
+
+	if (!(mode < CS_ETM_OPERATION_MAX))
+		goto out;
+
+	d_params->packet_printer = cs_etm__packet_dump;
+	d_params->operation = mode;
+	d_params->data = etmq;
+	d_params->formatted = formatted;
+	d_params->fsyncs = false;
+	d_params->hsyncs = false;
+	d_params->frame_aligned = true;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static void cs_etm__dump_event(struct cs_etm_queue *etmq,
+			       struct auxtrace_buffer *buffer)
+{
+	int ret;
+	const char *color = PERF_COLOR_BLUE;
+	size_t buffer_used = 0;
+
+	fprintf(stdout, "\n");
+	color_fprintf(stdout, color,
+		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
+		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
+
+	do {
+		size_t consumed;
+
+		ret = cs_etm_decoder__process_data_block(
+				etmq->decoder, buffer->offset,
+				&((u8 *)buffer->data)[buffer_used],
+				buffer->size - buffer_used, &consumed);
+		if (ret)
+			break;
+
+		buffer_used += consumed;
+	} while (buffer_used < buffer->size);
+
+	cs_etm_decoder__reset(etmq->decoder);
+}
+
+static int cs_etm__flush_events(struct perf_session *session,
+				struct perf_tool *tool)
+{
+	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	if (etm->timeless_decoding)
+		return cs_etm__process_timeless_queues(etm, -1);
+
+	return cs_etm__process_queues(etm);
+}
+
+static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
+{
+	int idx;
+	uintptr_t priv;
+	struct int_node *inode, *tmp;
+	struct cs_etm_traceid_queue *tidq;
+	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
+		priv = (uintptr_t)inode->priv;
+		idx = priv;
+
+		/* Free this traceid_queue from the array */
+		tidq = etmq->traceid_queues[idx];
+		thread__zput(tidq->thread);
+		zfree(&tidq->event_buf);
+		zfree(&tidq->last_branch);
+		zfree(&tidq->last_branch_rb);
+		zfree(&tidq->prev_packet);
+		zfree(&tidq->packet);
+		zfree(&tidq);
+
+		/*
+		 * Function intlist__remove() removes the inode from the list
+		 * and delete the memory associated to it.
+		 */
+		intlist__remove(traceid_queues_list, inode);
+	}
+
+	/* Then the RB tree itself */
+	intlist__delete(traceid_queues_list);
+	etmq->traceid_queues_list = NULL;
+
+	/* finally free the traceid_queues array */
+	zfree(&etmq->traceid_queues);
+}
+
+static void cs_etm__free_queue(void *priv)
+{
+	struct cs_etm_queue *etmq = priv;
+
+	if (!etmq)
+		return;
+
+	cs_etm_decoder__free(etmq->decoder);
+	cs_etm__free_traceid_queues(etmq);
+	free(etmq);
+}
+
+static void cs_etm__free_events(struct perf_session *session)
+{
+	unsigned int i;
+	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	struct auxtrace_queues *queues = &aux->queues;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		cs_etm__free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+
+	auxtrace_queues__free(queues);
+}
+
+static void cs_etm__free(struct perf_session *session)
+{
+	int i;
+	struct int_node *inode, *tmp;
+	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	cs_etm__free_events(session);
+	session->auxtrace = NULL;
+
+	/* First remove all traceID/metadata nodes for the RB tree */
+	intlist__for_each_entry_safe(inode, tmp, traceid_list)
+		intlist__remove(traceid_list, inode);
+	/* Then the RB tree itself */
+	intlist__delete(traceid_list);
+
+	for (i = 0; i < aux->num_cpu; i++)
+		zfree(&aux->metadata[i]);
+
+	thread__zput(aux->unknown_thread);
+	zfree(&aux->metadata);
+	zfree(&aux);
+}
+
+static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
+				      struct evsel *evsel)
+{
+	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+
+	return evsel->core.attr.type == aux->pmu_type;
+}
+
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+	struct machine *machine;
+
+	machine = etmq->etm->machine;
+
+	if (address >= machine__kernel_start(machine)) {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_KERNEL;
+		else
+			return PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_USER;
+		else if (perf_guest)
+			return PERF_RECORD_MISC_GUEST_USER;
+		else
+			return PERF_RECORD_MISC_HYPERVISOR;
+	}
+}
+
+static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
+			      u64 address, size_t size, u8 *buffer)
+{
+	u8  cpumode;
+	u64 offset;
+	int len;
+	struct thread *thread;
+	struct machine *machine;
+	struct addr_location al;
+	struct cs_etm_traceid_queue *tidq;
+
+	if (!etmq)
+		return 0;
+
+	machine = etmq->etm->machine;
+	cpumode = cs_etm__cpu_mode(etmq, address);
+	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+	if (!tidq)
+		return 0;
+
+	thread = tidq->thread;
+	if (!thread) {
+		if (cpumode != PERF_RECORD_MISC_KERNEL)
+			return 0;
+		thread = etmq->etm->unknown_thread;
+	}
+
+	if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso)
+		return 0;
+
+	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
+	    dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE))
+		return 0;
+
+	offset = al.map->map_ip(al.map, address);
+
+	map__load(al.map);
+
+	len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size);
+
+	if (len <= 0) {
+		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
+				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
+		if (!al.map->dso->auxtrace_warned) {
+			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
+				    address,
+				    al.map->dso->long_name ? al.map->dso->long_name : "Unknown");
+			al.map->dso->auxtrace_warned = true;
+		}
+		return 0;
+	}
+
+	return len;
+}
+
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+						bool formatted)
+{
+	struct cs_etm_decoder_params d_params;
+	struct cs_etm_trace_params  *t_params = NULL;
+	struct cs_etm_queue *etmq;
+	/*
+	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+	 * needed.
+	 */
+	int decoders = formatted ? etm->num_cpu : 1;
+
+	etmq = zalloc(sizeof(*etmq));
+	if (!etmq)
+		return NULL;
+
+	etmq->traceid_queues_list = intlist__new(NULL);
+	if (!etmq->traceid_queues_list)
+		goto out_free;
+
+	/* Use metadata to fill in trace parameters for trace decoder */
+	t_params = zalloc(sizeof(*t_params) * decoders);
+
+	if (!t_params)
+		goto out_free;
+
+	if (cs_etm__init_trace_params(t_params, etm, decoders))
+		goto out_free;
+
+	/* Set decoder parameters to decode trace packets */
+	if (cs_etm__init_decoder_params(&d_params, etmq,
+					dump_trace ? CS_ETM_OPERATION_PRINT :
+						     CS_ETM_OPERATION_DECODE,
+					formatted))
+		goto out_free;
+
+	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+					    t_params);
+
+	if (!etmq->decoder)
+		goto out_free;
+
+	/*
+	 * Register a function to handle all memory accesses required by
+	 * the trace decoder library.
+	 */
+	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
+					      0x0L, ((u64) -1L),
+					      cs_etm__mem_access))
+		goto out_free_decoder;
+
+	zfree(&t_params);
+	return etmq;
+
+out_free_decoder:
+	cs_etm_decoder__free(etmq->decoder);
+out_free:
+	intlist__delete(etmq->traceid_queues_list);
+	free(etmq);
+
+	return NULL;
+}
+
+static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
+			       struct auxtrace_queue *queue,
+			       unsigned int queue_nr,
+			       bool formatted)
+{
+	struct cs_etm_queue *etmq = queue->priv;
+
+	if (list_empty(&queue->head) || etmq)
+		return 0;
+
+	etmq = cs_etm__alloc_queue(etm, formatted);
+
+	if (!etmq)
+		return -ENOMEM;
+
+	queue->priv = etmq;
+	etmq->etm = etm;
+	etmq->queue_nr = queue_nr;
+	etmq->offset = 0;
+
+	return 0;
+}
+
+static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
+					    struct cs_etm_queue *etmq,
+					    unsigned int queue_nr)
+{
+	int ret = 0;
+	unsigned int cs_queue_nr;
+	u8 trace_chan_id;
+	u64 cs_timestamp;
+
+	/*
+	 * We are under a CPU-wide trace scenario.  As such we need to know
+	 * when the code that generated the traces started to execute so that
+	 * it can be correlated with execution on other CPUs.  So we get a
+	 * handle on the beginning of traces and decode until we find a
+	 * timestamp.  The timestamp is then added to the auxtrace min heap
+	 * in order to know what nibble (of all the etmqs) to decode first.
+	 */
+	while (1) {
+		/*
+		 * Fetch an aux_buffer from this etmq.  Bail if no more
+		 * blocks or an error has been encountered.
+		 */
+		ret = cs_etm__get_data_block(etmq);
+		if (ret <= 0)
+			goto out;
+
+		/*
+		 * Run decoder on the trace block.  The decoder will stop when
+		 * encountering a CS timestamp, a full packet queue or the end of
+		 * trace for that block.
+		 */
+		ret = cs_etm__decode_data_block(etmq);
+		if (ret)
+			goto out;
+
+		/*
+		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
+		 * the timestamp calculation for us.
+		 */
+		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+		/* We found a timestamp, no need to continue. */
+		if (cs_timestamp)
+			break;
+
+		/*
+		 * We didn't find a timestamp so empty all the traceid packet
+		 * queues before looking for another timestamp packet, either
+		 * in the current data block or a new one.  Packets that were
+		 * just decoded are useless since no timestamp has been
+		 * associated with them.  As such simply discard them.
+		 */
+		cs_etm__clear_all_packet_queues(etmq);
+	}
+
+	/*
+	 * We have a timestamp.  Add it to the min heap to reflect when
+	 * instructions conveyed by the range packets of this traceID queue
+	 * started to execute.  Once the same has been done for all the traceID
+	 * queues of each etmq, redenring and decoding can start in
+	 * chronological order.
+	 *
+	 * Note that packets decoded above are still in the traceID's packet
+	 * queue and will be processed in cs_etm__process_queues().
+	 */
+	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
+	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
+out:
+	return ret;
+}
+
+static inline
+void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
+				 struct cs_etm_traceid_queue *tidq)
+{
+	struct branch_stack *bs_src = tidq->last_branch_rb;
+	struct branch_stack *bs_dst = tidq->last_branch;
+	size_t nr = 0;
+
+	/*
+	 * Set the number of records before early exit: ->nr is used to
+	 * determine how many branches to copy from ->entries.
+	 */
+	bs_dst->nr = bs_src->nr;
+
+	/*
+	 * Early exit when there is nothing to copy.
+	 */
+	if (!bs_src->nr)
+		return;
+
+	/*
+	 * As bs_src->entries is a circular buffer, we need to copy from it in
+	 * two steps.  First, copy the branches from the most recently inserted
+	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
+	 */
+	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
+	memcpy(&bs_dst->entries[0],
+	       &bs_src->entries[tidq->last_branch_pos],
+	       sizeof(struct branch_entry) * nr);
+
+	/*
+	 * If we wrapped around at least once, the branches from the beginning
+	 * of the bs_src->entries buffer and until the ->last_branch_pos element
+	 * are older valid branches: copy them over.  The total number of
+	 * branches copied over will be equal to the number of branches asked by
+	 * the user in last_branch_sz.
+	 */
+	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
+		memcpy(&bs_dst->entries[nr],
+		       &bs_src->entries[0],
+		       sizeof(struct branch_entry) * tidq->last_branch_pos);
+	}
+}
+
+static inline
+void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
+{
+	tidq->last_branch_pos = 0;
+	tidq->last_branch_rb->nr = 0;
+}
+
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+					 u8 trace_chan_id, u64 addr)
+{
+	u8 instrBytes[2];
+
+	cs_etm__mem_access(etmq, trace_chan_id, addr,
+			   ARRAY_SIZE(instrBytes), instrBytes);
+	/*
+	 * T32 instruction size is indicated by bits[15:11] of the first
+	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+	 * denote a 32-bit instruction.
+	 */
+	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
+}
+
+static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
+{
+	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
+	if (packet->sample_type == CS_ETM_DISCONTINUITY)
+		return 0;
+
+	return packet->start_addr;
+}
+
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
+{
+	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
+	if (packet->sample_type == CS_ETM_DISCONTINUITY)
+		return 0;
+
+	return packet->end_addr - packet->last_instr_size;
+}
+
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+				     u64 trace_chan_id,
+				     const struct cs_etm_packet *packet,
+				     u64 offset)
+{
+	if (packet->isa == CS_ETM_ISA_T32) {
+		u64 addr = packet->start_addr;
+
+		while (offset) {
+			addr += cs_etm__t32_instr_size(etmq,
+						       trace_chan_id, addr);
+			offset--;
+		}
+		return addr;
+	}
+
+	/* Assume a 4 byte instruction size (A32/A64) */
+	return packet->start_addr + offset * 4;
+}
+
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
+					  struct cs_etm_traceid_queue *tidq)
+{
+	struct branch_stack *bs = tidq->last_branch_rb;
+	struct branch_entry *be;
+
+	/*
+	 * The branches are recorded in a circular buffer in reverse
+	 * chronological order: we start recording from the last element of the
+	 * buffer down.  After writing the first element of the stack, move the
+	 * insert position back to the end of the buffer.
+	 */
+	if (!tidq->last_branch_pos)
+		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+
+	tidq->last_branch_pos -= 1;
+
+	be       = &bs->entries[tidq->last_branch_pos];
+	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
+	be->to	 = cs_etm__first_executed_instr(tidq->packet);
+	/* No support for mispredict */
+	be->flags.mispred = 0;
+	be->flags.predicted = 1;
+
+	/*
+	 * Increment bs->nr until reaching the number of last branches asked by
+	 * the user on the command line.
+	 */
+	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
+		bs->nr += 1;
+}
+
+static int cs_etm__inject_event(union perf_event *event,
+			       struct perf_sample *sample, u64 type)
+{
+	event->header.size = perf_event__sample_event_size(sample, type, 0);
+	return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+
+static int
+cs_etm__get_trace(struct cs_etm_queue *etmq)
+{
+	struct auxtrace_buffer *aux_buffer = etmq->buffer;
+	struct auxtrace_buffer *old_buffer = aux_buffer;
+	struct auxtrace_queue *queue;
+
+	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+
+	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
+
+	/* If no more data, drop the previous auxtrace_buffer and return */
+	if (!aux_buffer) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		etmq->buf_len = 0;
+		return 0;
+	}
+
+	etmq->buffer = aux_buffer;
+
+	/* If the aux_buffer doesn't have data associated, try to load it */
+	if (!aux_buffer->data) {
+		/* get the file desc associated with the perf data file */
+		int fd = perf_data__fd(etmq->etm->session->data);
+
+		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
+		if (!aux_buffer->data)
+			return -ENOMEM;
+	}
+
+	/* If valid, drop the previous buffer */
+	if (old_buffer)
+		auxtrace_buffer__drop_data(old_buffer);
+
+	etmq->buf_used = 0;
+	etmq->buf_len = aux_buffer->size;
+	etmq->buf = aux_buffer->data;
+
+	return etmq->buf_len;
+}
+
+static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
+				    struct cs_etm_traceid_queue *tidq)
+{
+	if ((!tidq->thread) && (tidq->tid != -1))
+		tidq->thread = machine__find_thread(etm->machine, -1,
+						    tidq->tid);
+
+	if (tidq->thread)
+		tidq->pid = tidq->thread->pid_;
+}
+
+int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
+			 pid_t tid, u8 trace_chan_id)
+{
+	int cpu, err = -EINVAL;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	struct cs_etm_traceid_queue *tidq;
+
+	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+	if (!tidq)
+		return err;
+
+	if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+		return err;
+
+	err = machine__set_current_tid(etm->machine, cpu, tid, tid);
+	if (err)
+		return err;
+
+	tidq->tid = tid;
+	thread__zput(tidq->thread);
+
+	cs_etm__set_pid_tid_cpu(etm, tidq);
+	return 0;
+}
+
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
+{
+	return !!etmq->etm->timeless_decoding;
+}
+
+static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
+			      u64 trace_chan_id,
+			      const struct cs_etm_packet *packet,
+			      struct perf_sample *sample)
+{
+	/*
+	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
+	 * packet, so directly bail out with 'insn_len' = 0.
+	 */
+	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
+		sample->insn_len = 0;
+		return;
+	}
+
+	/*
+	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
+	 * cs_etm__t32_instr_size().
+	 */
+	if (packet->isa == CS_ETM_ISA_T32)
+		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
+							  sample->ip);
+	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
+	else
+		sample->insn_len = 4;
+
+	cs_etm__mem_access(etmq, trace_chan_id, sample->ip,
+			   sample->insn_len, (void *)sample->insn);
+}
+
+static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+					    struct cs_etm_traceid_queue *tidq,
+					    u64 addr, u64 period)
+{
+	int ret = 0;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	union perf_event *event = tidq->event_buf;
+	struct perf_sample sample = {.ip = 0,};
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	if (!etm->timeless_decoding)
+		sample.time = etm->latest_kernel_timestamp;
+	sample.ip = addr;
+	sample.pid = tidq->pid;
+	sample.tid = tidq->tid;
+	sample.id = etmq->etm->instructions_id;
+	sample.stream_id = etmq->etm->instructions_id;
+	sample.period = period;
+	sample.cpu = tidq->packet->cpu;
+	sample.flags = tidq->prev_packet->flags;
+	sample.cpumode = event->sample.header.misc;
+
+	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
+
+	if (etm->synth_opts.last_branch)
+		sample.branch_stack = tidq->last_branch;
+
+	if (etm->synth_opts.inject) {
+		ret = cs_etm__inject_event(event, &sample,
+					   etm->instructions_sample_type);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+	if (ret)
+		pr_err(
+			"CS ETM Trace: failed to deliver instruction event, error %d\n",
+			ret);
+
+	return ret;
+}
+
+/*
+ * The cs etm packet encodes an instruction range between a branch target
+ * and the next taken branch. Generate sample accordingly.
+ */
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
+				       struct cs_etm_traceid_queue *tidq)
+{
+	int ret = 0;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	struct perf_sample sample = {.ip = 0,};
+	union perf_event *event = tidq->event_buf;
+	struct dummy_branch_stack {
+		u64			nr;
+		u64			hw_idx;
+		struct branch_entry	entries;
+	} dummy_bs;
+	u64 ip;
+
+	ip = cs_etm__last_executed_instr(tidq->prev_packet);
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	if (!etm->timeless_decoding)
+		sample.time = etm->latest_kernel_timestamp;
+	sample.ip = ip;
+	sample.pid = tidq->pid;
+	sample.tid = tidq->tid;
+	sample.addr = cs_etm__first_executed_instr(tidq->packet);
+	sample.id = etmq->etm->branches_id;
+	sample.stream_id = etmq->etm->branches_id;
+	sample.period = 1;
+	sample.cpu = tidq->packet->cpu;
+	sample.flags = tidq->prev_packet->flags;
+	sample.cpumode = event->sample.header.misc;
+
+	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
+			  &sample);
+
+	/*
+	 * perf report cannot handle events without a branch stack
+	 */
+	if (etm->synth_opts.last_branch) {
+		dummy_bs = (struct dummy_branch_stack){
+			.nr = 1,
+			.hw_idx = -1ULL,
+			.entries = {
+				.from = sample.ip,
+				.to = sample.addr,
+			},
+		};
+		sample.branch_stack = (struct branch_stack *)&dummy_bs;
+	}
+
+	if (etm->synth_opts.inject) {
+		ret = cs_etm__inject_event(event, &sample,
+					   etm->branches_sample_type);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+	if (ret)
+		pr_err(
+		"CS ETM Trace: failed to deliver instruction event, error %d\n",
+		ret);
+
+	return ret;
+}
+
+struct cs_etm_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int cs_etm__event_synth(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine __maybe_unused)
+{
+	struct cs_etm_synth *cs_etm_synth =
+		      container_of(tool, struct cs_etm_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(cs_etm_synth->session,
+						 event, NULL);
+}
+
+static int cs_etm__synth_event(struct perf_session *session,
+			       struct perf_event_attr *attr, u64 id)
+{
+	struct cs_etm_synth cs_etm_synth;
+
+	memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
+	cs_etm_synth.session = session;
+
+	return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
+					   &id, cs_etm__event_synth);
+}
+
+static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
+				struct perf_session *session)
+{
+	struct evlist *evlist = session->evlist;
+	struct evsel *evsel;
+	struct perf_event_attr attr;
+	bool found = false;
+	u64 id;
+	int err;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type == etm->pmu_type) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		pr_debug("No selected events with CoreSight Trace data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+			    PERF_SAMPLE_PERIOD;
+	if (etm->timeless_decoding)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	else
+		attr.sample_type |= PERF_SAMPLE_TIME;
+
+	attr.exclude_user = evsel->core.attr.exclude_user;
+	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+	attr.exclude_hv = evsel->core.attr.exclude_hv;
+	attr.exclude_host = evsel->core.attr.exclude_host;
+	attr.exclude_guest = evsel->core.attr.exclude_guest;
+	attr.sample_id_all = evsel->core.attr.sample_id_all;
+	attr.read_format = evsel->core.attr.read_format;
+
+	/* create new id val to be a fixed offset from evsel id */
+	id = evsel->core.id[0] + 1000000000;
+
+	if (!id)
+		id = 1;
+
+	if (etm->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		err = cs_etm__synth_event(session, &attr, id);
+		if (err)
+			return err;
+		etm->branches_sample_type = attr.sample_type;
+		etm->branches_id = id;
+		id += 1;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+	}
+
+	if (etm->synth_opts.last_branch) {
+		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+		/*
+		 * We don't use the hardware index, but the sample generation
+		 * code uses the new format branch_stack with this field,
+		 * so the event attributes must indicate that it's present.
+		 */
+		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+	}
+
+	if (etm->synth_opts.instructions) {
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		attr.sample_period = etm->synth_opts.period;
+		etm->instructions_sample_period = attr.sample_period;
+		err = cs_etm__synth_event(session, &attr, id);
+		if (err)
+			return err;
+		etm->instructions_sample_type = attr.sample_type;
+		etm->instructions_id = id;
+		id += 1;
+	}
+
+	return 0;
+}
+
+static int cs_etm__sample(struct cs_etm_queue *etmq,
+			  struct cs_etm_traceid_queue *tidq)
+{
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	int ret;
+	u8 trace_chan_id = tidq->trace_chan_id;
+	u64 instrs_prev;
+
+	/* Get instructions remainder from previous packet */
+	instrs_prev = tidq->period_instructions;
+
+	tidq->period_instructions += tidq->packet->instr_count;
+
+	/*
+	 * Record a branch when the last instruction in
+	 * PREV_PACKET is a branch.
+	 */
+	if (etm->synth_opts.last_branch &&
+	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+	    tidq->prev_packet->last_instr_taken_branch)
+		cs_etm__update_last_branch_rb(etmq, tidq);
+
+	if (etm->synth_opts.instructions &&
+	    tidq->period_instructions >= etm->instructions_sample_period) {
+		/*
+		 * Emit instruction sample periodically
+		 * TODO: allow period to be defined in cycles and clock time
+		 */
+
+		/*
+		 * Below diagram demonstrates the instruction samples
+		 * generation flows:
+		 *
+		 *    Instrs     Instrs       Instrs       Instrs
+		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
+		 *    |            |            |            |
+		 *    V            V            V            V
+		 *   --------------------------------------------------
+		 *            ^                                  ^
+		 *            |                                  |
+		 *         Period                             Period
+		 *    instructions(Pi)                   instructions(Pi')
+		 *
+		 *            |                                  |
+		 *            \---------------- -----------------/
+		 *                             V
+		 *                 tidq->packet->instr_count
+		 *
+		 * Instrs Sample(n...) are the synthesised samples occurring
+		 * every etm->instructions_sample_period instructions - as
+		 * defined on the perf command line.  Sample(n) is being the
+		 * last sample before the current etm packet, n+1 to n+3
+		 * samples are generated from the current etm packet.
+		 *
+		 * tidq->packet->instr_count represents the number of
+		 * instructions in the current etm packet.
+		 *
+		 * Period instructions (Pi) contains the number of
+		 * instructions executed after the sample point(n) from the
+		 * previous etm packet.  This will always be less than
+		 * etm->instructions_sample_period.
+		 *
+		 * When generate new samples, it combines with two parts
+		 * instructions, one is the tail of the old packet and another
+		 * is the head of the new coming packet, to generate
+		 * sample(n+1); sample(n+2) and sample(n+3) consume the
+		 * instructions with sample period.  After sample(n+3), the rest
+		 * instructions will be used by later packet and it is assigned
+		 * to tidq->period_instructions for next round calculation.
+		 */
+
+		/*
+		 * Get the initial offset into the current packet instructions;
+		 * entry conditions ensure that instrs_prev is less than
+		 * etm->instructions_sample_period.
+		 */
+		u64 offset = etm->instructions_sample_period - instrs_prev;
+		u64 addr;
+
+		/* Prepare last branches for instruction sample */
+		if (etm->synth_opts.last_branch)
+			cs_etm__copy_last_branch_rb(etmq, tidq);
+
+		while (tidq->period_instructions >=
+				etm->instructions_sample_period) {
+			/*
+			 * Calculate the address of the sampled instruction (-1
+			 * as sample is reported as though instruction has just
+			 * been executed, but PC has not advanced to next
+			 * instruction)
+			 */
+			addr = cs_etm__instr_addr(etmq, trace_chan_id,
+						  tidq->packet, offset - 1);
+			ret = cs_etm__synth_instruction_sample(
+				etmq, tidq, addr,
+				etm->instructions_sample_period);
+			if (ret)
+				return ret;
+
+			offset += etm->instructions_sample_period;
+			tidq->period_instructions -=
+				etm->instructions_sample_period;
+		}
+	}
+
+	if (etm->synth_opts.branches) {
+		bool generate_sample = false;
+
+		/* Generate sample for tracing on packet */
+		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
+			generate_sample = true;
+
+		/* Generate sample for branch taken packet */
+		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+		    tidq->prev_packet->last_instr_taken_branch)
+			generate_sample = true;
+
+		if (generate_sample) {
+			ret = cs_etm__synth_branch_sample(etmq, tidq);
+			if (ret)
+				return ret;
+		}
+	}
+
+	cs_etm__packet_swap(etm, tidq);
+
+	return 0;
+}
+
+static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
+{
+	/*
+	 * When the exception packet is inserted, whether the last instruction
+	 * in previous range packet is taken branch or not, we need to force
+	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
+	 * to generate branch sample for the instruction range before the
+	 * exception is trapped to kernel or before the exception returning.
+	 *
+	 * The exception packet includes the dummy address values, so don't
+	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
+	 * for generating instruction and branch samples.
+	 */
+	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
+		tidq->prev_packet->last_instr_taken_branch = true;
+
+	return 0;
+}
+
+static int cs_etm__flush(struct cs_etm_queue *etmq,
+			 struct cs_etm_traceid_queue *tidq)
+{
+	int err = 0;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+
+	/* Handle start tracing packet */
+	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
+		goto swap_packet;
+
+	if (etmq->etm->synth_opts.last_branch &&
+	    etmq->etm->synth_opts.instructions &&
+	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+		u64 addr;
+
+		/* Prepare last branches for instruction sample */
+		cs_etm__copy_last_branch_rb(etmq, tidq);
+
+		/*
+		 * Generate a last branch event for the branches left in the
+		 * circular buffer at the end of the trace.
+		 *
+		 * Use the address of the end of the last reported execution
+		 * range
+		 */
+		addr = cs_etm__last_executed_instr(tidq->prev_packet);
+
+		err = cs_etm__synth_instruction_sample(
+			etmq, tidq, addr,
+			tidq->period_instructions);
+		if (err)
+			return err;
+
+		tidq->period_instructions = 0;
+
+	}
+
+	if (etm->synth_opts.branches &&
+	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+		err = cs_etm__synth_branch_sample(etmq, tidq);
+		if (err)
+			return err;
+	}
+
+swap_packet:
+	cs_etm__packet_swap(etm, tidq);
+
+	/* Reset last branches after flush the trace */
+	if (etm->synth_opts.last_branch)
+		cs_etm__reset_last_branch_rb(tidq);
+
+	return err;
+}
+
+static int cs_etm__end_block(struct cs_etm_queue *etmq,
+			     struct cs_etm_traceid_queue *tidq)
+{
+	int err;
+
+	/*
+	 * It has no new packet coming and 'etmq->packet' contains the stale
+	 * packet which was set at the previous time with packets swapping;
+	 * so skip to generate branch sample to avoid stale packet.
+	 *
+	 * For this case only flush branch stack and generate a last branch
+	 * event for the branches left in the circular buffer at the end of
+	 * the trace.
+	 */
+	if (etmq->etm->synth_opts.last_branch &&
+	    etmq->etm->synth_opts.instructions &&
+	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+		u64 addr;
+
+		/* Prepare last branches for instruction sample */
+		cs_etm__copy_last_branch_rb(etmq, tidq);
+
+		/*
+		 * Use the address of the end of the last reported execution
+		 * range.
+		 */
+		addr = cs_etm__last_executed_instr(tidq->prev_packet);
+
+		err = cs_etm__synth_instruction_sample(
+			etmq, tidq, addr,
+			tidq->period_instructions);
+		if (err)
+			return err;
+
+		tidq->period_instructions = 0;
+	}
+
+	return 0;
+}
+/*
+ * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
+ *			   if need be.
+ * Returns:	< 0	if error
+ *		= 0	if no more auxtrace_buffer to read
+ *		> 0	if the current buffer isn't empty yet
+ */
+static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
+{
+	int ret;
+
+	if (!etmq->buf_len) {
+		ret = cs_etm__get_trace(etmq);
+		if (ret <= 0)
+			return ret;
+		/*
+		 * We cannot assume consecutive blocks in the data file
+		 * are contiguous, reset the decoder to force re-sync.
+		 */
+		ret = cs_etm_decoder__reset(etmq->decoder);
+		if (ret)
+			return ret;
+	}
+
+	return etmq->buf_len;
+}
+
+static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
+				 struct cs_etm_packet *packet,
+				 u64 end_addr)
+{
+	/* Initialise to keep compiler happy */
+	u16 instr16 = 0;
+	u32 instr32 = 0;
+	u64 addr;
+
+	switch (packet->isa) {
+	case CS_ETM_ISA_T32:
+		/*
+		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
+		 *
+		 *  b'15         b'8
+		 * +-----------------+--------+
+		 * | 1 1 0 1 1 1 1 1 |  imm8  |
+		 * +-----------------+--------+
+		 *
+		 * According to the specification, it only defines SVC for T32
+		 * with 16 bits instruction and has no definition for 32bits;
+		 * so below only read 2 bytes as instruction size for T32.
+		 */
+		addr = end_addr - 2;
+		cs_etm__mem_access(etmq, trace_chan_id, addr,
+				   sizeof(instr16), (u8 *)&instr16);
+		if ((instr16 & 0xFF00) == 0xDF00)
+			return true;
+
+		break;
+	case CS_ETM_ISA_A32:
+		/*
+		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
+		 *
+		 *  b'31 b'28 b'27 b'24
+		 * +---------+---------+-------------------------+
+		 * |  !1111  | 1 1 1 1 |        imm24            |
+		 * +---------+---------+-------------------------+
+		 */
+		addr = end_addr - 4;
+		cs_etm__mem_access(etmq, trace_chan_id, addr,
+				   sizeof(instr32), (u8 *)&instr32);
+		if ((instr32 & 0x0F000000) == 0x0F000000 &&
+		    (instr32 & 0xF0000000) != 0xF0000000)
+			return true;
+
+		break;
+	case CS_ETM_ISA_A64:
+		/*
+		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
+		 *
+		 *  b'31               b'21           b'4     b'0
+		 * +-----------------------+---------+-----------+
+		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
+		 * +-----------------------+---------+-----------+
+		 */
+		addr = end_addr - 4;
+		cs_etm__mem_access(etmq, trace_chan_id, addr,
+				   sizeof(instr32), (u8 *)&instr32);
+		if ((instr32 & 0xFFE0001F) == 0xd4000001)
+			return true;
+
+		break;
+	case CS_ETM_ISA_UNKNOWN:
+	default:
+		break;
+	}
+
+	return false;
+}
+
+static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
+			       struct cs_etm_traceid_queue *tidq, u64 magic)
+{
+	u8 trace_chan_id = tidq->trace_chan_id;
+	struct cs_etm_packet *packet = tidq->packet;
+	struct cs_etm_packet *prev_packet = tidq->prev_packet;
+
+	if (magic == __perf_cs_etmv3_magic)
+		if (packet->exception_number == CS_ETMV3_EXC_SVC)
+			return true;
+
+	/*
+	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
+	 * HVC cases; need to check if it's SVC instruction based on
+	 * packet address.
+	 */
+	if (magic == __perf_cs_etmv4_magic) {
+		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
+		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
+					 prev_packet->end_addr))
+			return true;
+	}
+
+	return false;
+}
+
+static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
+				       u64 magic)
+{
+	struct cs_etm_packet *packet = tidq->packet;
+
+	if (magic == __perf_cs_etmv3_magic)
+		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
+		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
+		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
+		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
+		    packet->exception_number == CS_ETMV3_EXC_FIQ)
+			return true;
+
+	if (magic == __perf_cs_etmv4_magic)
+		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
+		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
+		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
+		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
+		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
+		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
+		    packet->exception_number == CS_ETMV4_EXC_FIQ)
+			return true;
+
+	return false;
+}
+
+static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
+				      struct cs_etm_traceid_queue *tidq,
+				      u64 magic)
+{
+	u8 trace_chan_id = tidq->trace_chan_id;
+	struct cs_etm_packet *packet = tidq->packet;
+	struct cs_etm_packet *prev_packet = tidq->prev_packet;
+
+	if (magic == __perf_cs_etmv3_magic)
+		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
+		    packet->exception_number == CS_ETMV3_EXC_HYP ||
+		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
+		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
+		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
+		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
+		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
+			return true;
+
+	if (magic == __perf_cs_etmv4_magic) {
+		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
+		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
+		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
+		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
+			return true;
+
+		/*
+		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
+		 * (SMC, HVC) are taken as sync exceptions.
+		 */
+		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
+		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
+					  prev_packet->end_addr))
+			return true;
+
+		/*
+		 * ETMv4 has 5 bits for exception number; if the numbers
+		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
+		 * they are implementation defined exceptions.
+		 *
+		 * For this case, simply take it as sync exception.
+		 */
+		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
+		    packet->exception_number <= CS_ETMV4_EXC_END)
+			return true;
+	}
+
+	return false;
+}
+
+static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
+				    struct cs_etm_traceid_queue *tidq)
+{
+	struct cs_etm_packet *packet = tidq->packet;
+	struct cs_etm_packet *prev_packet = tidq->prev_packet;
+	u8 trace_chan_id = tidq->trace_chan_id;
+	u64 magic;
+	int ret;
+
+	switch (packet->sample_type) {
+	case CS_ETM_RANGE:
+		/*
+		 * Immediate branch instruction without neither link nor
+		 * return flag, it's normal branch instruction within
+		 * the function.
+		 */
+		if (packet->last_instr_type == OCSD_INSTR_BR &&
+		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
+			packet->flags = PERF_IP_FLAG_BRANCH;
+
+			if (packet->last_instr_cond)
+				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
+		}
+
+		/*
+		 * Immediate branch instruction with link (e.g. BL), this is
+		 * branch instruction for function call.
+		 */
+		if (packet->last_instr_type == OCSD_INSTR_BR &&
+		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_CALL;
+
+		/*
+		 * Indirect branch instruction with link (e.g. BLR), this is
+		 * branch instruction for function call.
+		 */
+		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_CALL;
+
+		/*
+		 * Indirect branch instruction with subtype of
+		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
+		 * function return for A32/T32.
+		 */
+		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_RETURN;
+
+		/*
+		 * Indirect branch instruction without link (e.g. BR), usually
+		 * this is used for function return, especially for functions
+		 * within dynamic link lib.
+		 */
+		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_RETURN;
+
+		/* Return instruction for function return. */
+		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_RETURN;
+
+		/*
+		 * Decoder might insert a discontinuity in the middle of
+		 * instruction packets, fixup prev_packet with flag
+		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
+		 */
+		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
+			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
+					      PERF_IP_FLAG_TRACE_BEGIN;
+
+		/*
+		 * If the previous packet is an exception return packet
+		 * and the return address just follows SVC instruction,
+		 * it needs to calibrate the previous packet sample flags
+		 * as PERF_IP_FLAG_SYSCALLRET.
+		 */
+		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
+					   PERF_IP_FLAG_RETURN |
+					   PERF_IP_FLAG_INTERRUPT) &&
+		    cs_etm__is_svc_instr(etmq, trace_chan_id,
+					 packet, packet->start_addr))
+			prev_packet->flags = PERF_IP_FLAG_BRANCH |
+					     PERF_IP_FLAG_RETURN |
+					     PERF_IP_FLAG_SYSCALLRET;
+		break;
+	case CS_ETM_DISCONTINUITY:
+		/*
+		 * The trace is discontinuous, if the previous packet is
+		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
+		 * for previous packet.
+		 */
+		if (prev_packet->sample_type == CS_ETM_RANGE)
+			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
+					      PERF_IP_FLAG_TRACE_END;
+		break;
+	case CS_ETM_EXCEPTION:
+		ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
+		if (ret)
+			return ret;
+
+		/* The exception is for system call. */
+		if (cs_etm__is_syscall(etmq, tidq, magic))
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_CALL |
+					PERF_IP_FLAG_SYSCALLRET;
+		/*
+		 * The exceptions are triggered by external signals from bus,
+		 * interrupt controller, debug module, PE reset or halt.
+		 */
+		else if (cs_etm__is_async_exception(tidq, magic))
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_CALL |
+					PERF_IP_FLAG_ASYNC |
+					PERF_IP_FLAG_INTERRUPT;
+		/*
+		 * Otherwise, exception is caused by trap, instruction &
+		 * data fault, or alignment errors.
+		 */
+		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
+			packet->flags = PERF_IP_FLAG_BRANCH |
+					PERF_IP_FLAG_CALL |
+					PERF_IP_FLAG_INTERRUPT;
+
+		/*
+		 * When the exception packet is inserted, since exception
+		 * packet is not used standalone for generating samples
+		 * and it's affiliation to the previous instruction range
+		 * packet; so set previous range packet flags to tell perf
+		 * it is an exception taken branch.
+		 */
+		if (prev_packet->sample_type == CS_ETM_RANGE)
+			prev_packet->flags = packet->flags;
+		break;
+	case CS_ETM_EXCEPTION_RET:
+		/*
+		 * When the exception return packet is inserted, since
+		 * exception return packet is not used standalone for
+		 * generating samples and it's affiliation to the previous
+		 * instruction range packet; so set previous range packet
+		 * flags to tell perf it is an exception return branch.
+		 *
+		 * The exception return can be for either system call or
+		 * other exception types; unfortunately the packet doesn't
+		 * contain exception type related info so we cannot decide
+		 * the exception type purely based on exception return packet.
+		 * If we record the exception number from exception packet and
+		 * reuse it for exception return packet, this is not reliable
+		 * due the trace can be discontinuity or the interrupt can
+		 * be nested, thus the recorded exception number cannot be
+		 * used for exception return packet for these two cases.
+		 *
+		 * For exception return packet, we only need to distinguish the
+		 * packet is for system call or for other types.  Thus the
+		 * decision can be deferred when receive the next packet which
+		 * contains the return address, based on the return address we
+		 * can read out the previous instruction and check if it's a
+		 * system call instruction and then calibrate the sample flag
+		 * as needed.
+		 */
+		if (prev_packet->sample_type == CS_ETM_RANGE)
+			prev_packet->flags = PERF_IP_FLAG_BRANCH |
+					     PERF_IP_FLAG_RETURN |
+					     PERF_IP_FLAG_INTERRUPT;
+		break;
+	case CS_ETM_EMPTY:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
+{
+	int ret = 0;
+	size_t processed = 0;
+
+	/*
+	 * Packets are decoded and added to the decoder's packet queue
+	 * until the decoder packet processing callback has requested that
+	 * processing stops or there is nothing left in the buffer.  Normal
+	 * operations that stop processing are a timestamp packet or a full
+	 * decoder buffer queue.
+	 */
+	ret = cs_etm_decoder__process_data_block(etmq->decoder,
+						 etmq->offset,
+						 &etmq->buf[etmq->buf_used],
+						 etmq->buf_len,
+						 &processed);
+	if (ret)
+		goto out;
+
+	etmq->offset += processed;
+	etmq->buf_used += processed;
+	etmq->buf_len -= processed;
+
+out:
+	return ret;
+}
+
+static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
+					 struct cs_etm_traceid_queue *tidq)
+{
+	int ret;
+	struct cs_etm_packet_queue *packet_queue;
+
+	packet_queue = &tidq->packet_queue;
+
+	/* Process each packet in this chunk */
+	while (1) {
+		ret = cs_etm_decoder__get_packet(packet_queue,
+						 tidq->packet);
+		if (ret <= 0)
+			/*
+			 * Stop processing this chunk on
+			 * end of data or error
+			 */
+			break;
+
+		/*
+		 * Since packet addresses are swapped in packet
+		 * handling within below switch() statements,
+		 * thus setting sample flags must be called
+		 * prior to switch() statement to use address
+		 * information before packets swapping.
+		 */
+		ret = cs_etm__set_sample_flags(etmq, tidq);
+		if (ret < 0)
+			break;
+
+		switch (tidq->packet->sample_type) {
+		case CS_ETM_RANGE:
+			/*
+			 * If the packet contains an instruction
+			 * range, generate instruction sequence
+			 * events.
+			 */
+			cs_etm__sample(etmq, tidq);
+			break;
+		case CS_ETM_EXCEPTION:
+		case CS_ETM_EXCEPTION_RET:
+			/*
+			 * If the exception packet is coming,
+			 * make sure the previous instruction
+			 * range packet to be handled properly.
+			 */
+			cs_etm__exception(tidq);
+			break;
+		case CS_ETM_DISCONTINUITY:
+			/*
+			 * Discontinuity in trace, flush
+			 * previous branch stack
+			 */
+			cs_etm__flush(etmq, tidq);
+			break;
+		case CS_ETM_EMPTY:
+			/*
+			 * Should not receive empty packet,
+			 * report error.
+			 */
+			pr_err("CS ETM Trace: empty packet\n");
+			return -EINVAL;
+		default:
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
+{
+	int idx;
+	struct int_node *inode;
+	struct cs_etm_traceid_queue *tidq;
+	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+	intlist__for_each_entry(inode, traceid_queues_list) {
+		idx = (int)(intptr_t)inode->priv;
+		tidq = etmq->traceid_queues[idx];
+
+		/* Ignore return value */
+		cs_etm__process_traceid_queue(etmq, tidq);
+
+		/*
+		 * Generate an instruction sample with the remaining
+		 * branchstack entries.
+		 */
+		cs_etm__flush(etmq, tidq);
+	}
+}
+
+static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
+{
+	int err = 0;
+	struct cs_etm_traceid_queue *tidq;
+
+	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
+	if (!tidq)
+		return -EINVAL;
+
+	/* Go through each buffer in the queue and decode them one by one */
+	while (1) {
+		err = cs_etm__get_data_block(etmq);
+		if (err <= 0)
+			return err;
+
+		/* Run trace decoder until buffer consumed or end of trace */
+		do {
+			err = cs_etm__decode_data_block(etmq);
+			if (err)
+				return err;
+
+			/*
+			 * Process each packet in this chunk, nothing to do if
+			 * an error occurs other than hoping the next one will
+			 * be better.
+			 */
+			err = cs_etm__process_traceid_queue(etmq, tidq);
+
+		} while (etmq->buf_len);
+
+		if (err == 0)
+			/* Flush any remaining branch stack entries */
+			err = cs_etm__end_block(etmq, tidq);
+	}
+
+	return err;
+}
+
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+					   pid_t tid)
+{
+	unsigned int i;
+	struct auxtrace_queues *queues = &etm->queues;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
+		struct cs_etm_queue *etmq = queue->priv;
+		struct cs_etm_traceid_queue *tidq;
+
+		if (!etmq)
+			continue;
+
+		tidq = cs_etm__etmq_get_traceid_queue(etmq,
+						CS_ETM_PER_THREAD_TRACEID);
+
+		if (!tidq)
+			continue;
+
+		if ((tid == -1) || (tidq->tid == tid)) {
+			cs_etm__set_pid_tid_cpu(etm, tidq);
+			cs_etm__run_decoder(etmq);
+		}
+	}
+
+	return 0;
+}
+
+static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
+{
+	int ret = 0;
+	unsigned int cs_queue_nr, queue_nr, i;
+	u8 trace_chan_id;
+	u64 cs_timestamp;
+	struct auxtrace_queue *queue;
+	struct cs_etm_queue *etmq;
+	struct cs_etm_traceid_queue *tidq;
+
+	/*
+	 * Pre-populate the heap with one entry from each queue so that we can
+	 * start processing in time order across all queues.
+	 */
+	for (i = 0; i < etm->queues.nr_queues; i++) {
+		etmq = etm->queues.queue_array[i].priv;
+		if (!etmq)
+			continue;
+
+		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
+		if (ret)
+			return ret;
+	}
+
+	while (1) {
+		if (!etm->heap.heap_cnt)
+			goto out;
+
+		/* Take the entry at the top of the min heap */
+		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
+		queue_nr = TO_QUEUE_NR(cs_queue_nr);
+		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
+		queue = &etm->queues.queue_array[queue_nr];
+		etmq = queue->priv;
+
+		/*
+		 * Remove the top entry from the heap since we are about
+		 * to process it.
+		 */
+		auxtrace_heap__pop(&etm->heap);
+
+		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+		if (!tidq) {
+			/*
+			 * No traceID queue has been allocated for this traceID,
+			 * which means something somewhere went very wrong.  No
+			 * other choice than simply exit.
+			 */
+			ret = -EINVAL;
+			goto out;
+		}
+
+		/*
+		 * Packets associated with this timestamp are already in
+		 * the etmq's traceID queue, so process them.
+		 */
+		ret = cs_etm__process_traceid_queue(etmq, tidq);
+		if (ret < 0)
+			goto out;
+
+		/*
+		 * Packets for this timestamp have been processed, time to
+		 * move on to the next timestamp, fetching a new auxtrace_buffer
+		 * if need be.
+		 */
+refetch:
+		ret = cs_etm__get_data_block(etmq);
+		if (ret < 0)
+			goto out;
+
+		/*
+		 * No more auxtrace_buffers to process in this etmq, simply
+		 * move on to another entry in the auxtrace_heap.
+		 */
+		if (!ret)
+			continue;
+
+		ret = cs_etm__decode_data_block(etmq);
+		if (ret)
+			goto out;
+
+		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+		if (!cs_timestamp) {
+			/*
+			 * Function cs_etm__decode_data_block() returns when
+			 * there is no more traces to decode in the current
+			 * auxtrace_buffer OR when a timestamp has been
+			 * encountered on any of the traceID queues.  Since we
+			 * did not get a timestamp, there is no more traces to
+			 * process in this auxtrace_buffer.  As such empty and
+			 * flush all traceID queues.
+			 */
+			cs_etm__clear_all_traceid_queues(etmq);
+
+			/* Fetch another auxtrace_buffer for this etmq */
+			goto refetch;
+		}
+
+		/*
+		 * Add to the min heap the timestamp for packets that have
+		 * just been decoded.  They will be processed and synthesized
+		 * during the next call to cs_etm__process_traceid_queue() for
+		 * this queue/traceID.
+		 */
+		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
+		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
+	}
+
+out:
+	return ret;
+}
+
+static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
+					union perf_event *event)
+{
+	struct thread *th;
+
+	if (etm->timeless_decoding)
+		return 0;
+
+	/*
+	 * Add the tid/pid to the log so that we can get a match when
+	 * we get a contextID from the decoder.
+	 */
+	th = machine__findnew_thread(etm->machine,
+				     event->itrace_start.pid,
+				     event->itrace_start.tid);
+	if (!th)
+		return -ENOMEM;
+
+	thread__put(th);
+
+	return 0;
+}
+
+static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
+					   union perf_event *event)
+{
+	struct thread *th;
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
+	/*
+	 * Context switch in per-thread mode are irrelevant since perf
+	 * will start/stop tracing as the process is scheduled.
+	 */
+	if (etm->timeless_decoding)
+		return 0;
+
+	/*
+	 * SWITCH_IN events carry the next process to be switched out while
+	 * SWITCH_OUT events carry the process to be switched in.  As such
+	 * we don't care about IN events.
+	 */
+	if (!out)
+		return 0;
+
+	/*
+	 * Add the tid/pid to the log so that we can get a match when
+	 * we get a contextID from the decoder.
+	 */
+	th = machine__findnew_thread(etm->machine,
+				     event->context_switch.next_prev_pid,
+				     event->context_switch.next_prev_tid);
+	if (!th)
+		return -ENOMEM;
+
+	thread__put(th);
+
+	return 0;
+}
+
+static int cs_etm__process_event(struct perf_session *session,
+				 union perf_event *event,
+				 struct perf_sample *sample,
+				 struct perf_tool *tool)
+{
+	u64 sample_kernel_timestamp;
+	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("CoreSight ETM Trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (sample->time && (sample->time != (u64) -1))
+		sample_kernel_timestamp = sample->time;
+	else
+		sample_kernel_timestamp = 0;
+
+	/*
+	 * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
+	 * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
+	 * ETM_OPT_CTXTID is not enabled.
+	 */
+	if (etm->timeless_decoding &&
+	    event->header.type == PERF_RECORD_EXIT)
+		return cs_etm__process_timeless_queues(etm,
+						       event->fork.tid);
+
+	if (event->header.type == PERF_RECORD_ITRACE_START)
+		return cs_etm__process_itrace_start(etm, event);
+	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+		return cs_etm__process_switch_cpu_wide(etm, event);
+
+	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
+		/*
+		 * Record the latest kernel timestamp available in the header
+		 * for samples so that synthesised samples occur from this point
+		 * onwards.
+		 */
+		etm->latest_kernel_timestamp = sample_kernel_timestamp;
+	}
+
+	return 0;
+}
+
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+			     struct perf_record_auxtrace *event)
+{
+	struct auxtrace_buffer *buf;
+	unsigned int i;
+	/*
+	 * Find all buffers with same reference in the queues and dump them.
+	 * This is because the queues can contain multiple entries of the same
+	 * buffer that were split on aux records.
+	 */
+	for (i = 0; i < etm->queues.nr_queues; ++i)
+		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+			if (buf->reference == event->reference)
+				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
+}
+
+static int cs_etm__process_auxtrace_event(struct perf_session *session,
+					  union perf_event *event,
+					  struct perf_tool *tool __maybe_unused)
+{
+	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	if (!etm->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t  data_offset;
+		int fd = perf_data__fd(session->data);
+		bool is_pipe = perf_data__is_pipe(session->data);
+		int err;
+		int idx = event->auxtrace.idx;
+
+		if (is_pipe)
+			data_offset = 0;
+		else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
+
+		err = auxtrace_queues__add_event(&etm->queues, session,
+						 event, data_offset, &buffer);
+		if (err)
+			return err;
+
+		/*
+		 * Knowing if the trace is formatted or not requires a lookup of
+		 * the aux record so only works in non-piped mode where data is
+		 * queued in cs_etm__queue_aux_records(). Always assume
+		 * formatted in piped mode (true).
+		 */
+		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+					  idx, true);
+		if (err)
+			return err;
+
+		if (dump_trace)
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
+				auxtrace_buffer__put_data(buffer);
+			}
+	} else if (dump_trace)
+		dump_queued_data(etm, &event->auxtrace);
+
+	return 0;
+}
+
+static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
+{
+	struct evsel *evsel;
+	struct evlist *evlist = etm->session->evlist;
+
+	/* Override timeless mode with user input from --itrace=Z */
+	if (etm->synth_opts.timeless_decoding) {
+		etm->timeless_decoding = true;
+		return 0;
+	}
+
+	/*
+	 * Find the cs_etm evsel and look at what its timestamp setting was
+	 */
+	evlist__for_each_entry(evlist, evsel)
+		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
+			etm->timeless_decoding =
+				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
+			return 0;
+		}
+
+	pr_err("CS ETM: Couldn't find ETM evsel\n");
+	return -EINVAL;
+}
+
+static const char * const cs_etm_global_header_fmts[] = {
+	[CS_HEADER_VERSION]	= "	Header version		       %llx\n",
+	[CS_PMU_TYPE_CPUS]	= "	PMU type/num cpus	       %llx\n",
+	[CS_ETM_SNAPSHOT]	= "	Snapshot		       %llx\n",
+};
+
+static const char * const cs_etm_priv_fmts[] = {
+	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
+	[CS_ETM_CPU]		= "	CPU			       %lld\n",
+	[CS_ETM_NR_TRC_PARAMS]	= "	NR_TRC_PARAMS		       %llx\n",
+	[CS_ETM_ETMCR]		= "	ETMCR			       %llx\n",
+	[CS_ETM_ETMTRACEIDR]	= "	ETMTRACEIDR		       %llx\n",
+	[CS_ETM_ETMCCER]	= "	ETMCCER			       %llx\n",
+	[CS_ETM_ETMIDR]		= "	ETMIDR			       %llx\n",
+};
+
+static const char * const cs_etmv4_priv_fmts[] = {
+	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
+	[CS_ETM_CPU]		= "	CPU			       %lld\n",
+	[CS_ETM_NR_TRC_PARAMS]	= "	NR_TRC_PARAMS		       %llx\n",
+	[CS_ETMV4_TRCCONFIGR]	= "	TRCCONFIGR		       %llx\n",
+	[CS_ETMV4_TRCTRACEIDR]	= "	TRCTRACEIDR		       %llx\n",
+	[CS_ETMV4_TRCIDR0]	= "	TRCIDR0			       %llx\n",
+	[CS_ETMV4_TRCIDR1]	= "	TRCIDR1			       %llx\n",
+	[CS_ETMV4_TRCIDR2]	= "	TRCIDR2			       %llx\n",
+	[CS_ETMV4_TRCIDR8]	= "	TRCIDR8			       %llx\n",
+	[CS_ETMV4_TRCAUTHSTATUS] = "	TRCAUTHSTATUS		       %llx\n",
+	[CS_ETE_TRCDEVARCH]	= "	TRCDEVARCH                     %llx\n"
+};
+
+static const char * const param_unk_fmt =
+	"	Unknown parameter [%d]	       %llx\n";
+static const char * const magic_unk_fmt =
+	"	Magic number Unknown	       %llx\n";
+
+static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset)
+{
+	int i = *offset, j, nr_params = 0, fmt_offset;
+	__u64 magic;
+
+	/* check magic value */
+	magic = val[i + CS_ETM_MAGIC];
+	if ((magic != __perf_cs_etmv3_magic) &&
+	    (magic != __perf_cs_etmv4_magic)) {
+		/* failure - note bad magic value */
+		fprintf(stdout, magic_unk_fmt, magic);
+		return -EINVAL;
+	}
+
+	/* print common header block */
+	fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]);
+	fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]);
+
+	if (magic == __perf_cs_etmv3_magic) {
+		nr_params = CS_ETM_NR_TRC_PARAMS_V0;
+		fmt_offset = CS_ETM_ETMCR;
+		/* after common block, offset format index past NR_PARAMS */
+		for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+			fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+	} else if (magic == __perf_cs_etmv4_magic) {
+		nr_params = CS_ETMV4_NR_TRC_PARAMS_V0;
+		fmt_offset = CS_ETMV4_TRCCONFIGR;
+		/* after common block, offset format index past NR_PARAMS */
+		for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+			fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+	}
+	*offset = i;
+	return 0;
+}
+
+static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset)
+{
+	int i = *offset, j, total_params = 0;
+	__u64 magic;
+
+	magic = val[i + CS_ETM_MAGIC];
+	/* total params to print is NR_PARAMS + common block size for v1 */
+	total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1;
+
+	if (magic == __perf_cs_etmv3_magic) {
+		for (j = 0; j < total_params; j++, i++) {
+			/* if newer record - could be excess params */
+			if (j >= CS_ETM_PRIV_MAX)
+				fprintf(stdout, param_unk_fmt, j, val[i]);
+			else
+				fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+		}
+	} else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) {
+		/*
+		 * ETE and ETMv4 can be printed in the same block because the number of parameters
+		 * is saved and they share the list of parameter names. ETE is also only supported
+		 * in V1 files.
+		 */
+		for (j = 0; j < total_params; j++, i++) {
+			/* if newer record - could be excess params */
+			if (j >= CS_ETE_PRIV_MAX)
+				fprintf(stdout, param_unk_fmt, j, val[i]);
+			else
+				fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+		}
+	} else {
+		/* failure - note bad magic value and error out */
+		fprintf(stdout, magic_unk_fmt, magic);
+		return -EINVAL;
+	}
+	*offset = i;
+	return 0;
+}
+
+static void cs_etm__print_auxtrace_info(__u64 *val, int num)
+{
+	int i, cpu = 0, version, err;
+
+	/* bail out early on bad header version */
+	version = val[0];
+	if (version > CS_HEADER_CURRENT_VERSION) {
+		/* failure.. return */
+		fprintf(stdout, "	Unknown Header Version = %x, ", version);
+		fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
+		return;
+	}
+
+	for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
+		fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+
+	for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) {
+		if (version == 0)
+			err = cs_etm__print_cpu_metadata_v0(val, &i);
+		else if (version == 1)
+			err = cs_etm__print_cpu_metadata_v1(val, &i);
+		if (err)
+			return;
+	}
+}
+
+/*
+ * Read a single cpu parameter block from the auxtrace_info priv block.
+ *
+ * For version 1 there is a per cpu nr_params entry. If we are handling
+ * version 1 file, then there may be less, the same, or more params
+ * indicated by this value than the compile time number we understand.
+ *
+ * For a version 0 info block, there are a fixed number, and we need to
+ * fill out the nr_param value in the metadata we create.
+ */
+static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
+				    int out_blk_size, int nr_params_v0)
+{
+	u64 *metadata = NULL;
+	int hdr_version;
+	int nr_in_params, nr_out_params, nr_cmn_params;
+	int i, k;
+
+	metadata = zalloc(sizeof(*metadata) * out_blk_size);
+	if (!metadata)
+		return NULL;
+
+	/* read block current index & version */
+	i = *buff_in_offset;
+	hdr_version = buff_in[CS_HEADER_VERSION];
+
+	if (!hdr_version) {
+	/* read version 0 info block into a version 1 metadata block  */
+		nr_in_params = nr_params_v0;
+		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
+		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
+		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
+		/* remaining block params at offset +1 from source */
+		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
+			metadata[k + 1] = buff_in[i + k];
+		/* version 0 has 2 common params */
+		nr_cmn_params = 2;
+	} else {
+	/* read version 1 info block - input and output nr_params may differ */
+		/* version 1 has 3 common params */
+		nr_cmn_params = 3;
+		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
+
+		/* if input has more params than output - skip excess */
+		nr_out_params = nr_in_params + nr_cmn_params;
+		if (nr_out_params > out_blk_size)
+			nr_out_params = out_blk_size;
+
+		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
+			metadata[k] = buff_in[i + k];
+
+		/* record the actual nr params we copied */
+		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
+	}
+
+	/* adjust in offset by number of in params used */
+	i += nr_in_params + nr_cmn_params;
+	*buff_in_offset = i;
+	return metadata;
+}
+
+/**
+ * Puts a fragment of an auxtrace buffer into the auxtrace queues based
+ * on the bounds of aux_event, if it matches with the buffer that's at
+ * file_offset.
+ *
+ * Normally, whole auxtrace buffers would be added to the queue. But we
+ * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
+ * is reset across each buffer, so splitting the buffers up in advance has
+ * the same effect.
+ */
+static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
+				      struct perf_record_aux *aux_event, struct perf_sample *sample)
+{
+	int err;
+	char buf[PERF_SAMPLE_MAX_SIZE];
+	union perf_event *auxtrace_event_union;
+	struct perf_record_auxtrace *auxtrace_event;
+	union perf_event auxtrace_fragment;
+	__u64 aux_offset, aux_size;
+	__u32 idx;
+	bool formatted;
+
+	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+
+	/*
+	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
+	 * from looping through the auxtrace index.
+	 */
+	err = perf_session__peek_event(session, file_offset, buf,
+				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
+	if (err)
+		return err;
+	auxtrace_event = &auxtrace_event_union->auxtrace;
+	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
+		return -EINVAL;
+
+	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
+		auxtrace_event->header.size != sz) {
+		return -EINVAL;
+	}
+
+	/*
+	 * In per-thread mode, CPU is set to -1, but TID will be set instead. See
+	 * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match.
+	 */
+	if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) ||
+			auxtrace_event->cpu != sample->cpu)
+		return 1;
+
+	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
+		/*
+		 * Clamp size in snapshot mode. The buffer size is clamped in
+		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
+		 * the buffer size.
+		 */
+		aux_size = min(aux_event->aux_size, auxtrace_event->size);
+
+		/*
+		 * In this mode, the head also points to the end of the buffer so aux_offset
+		 * needs to have the size subtracted so it points to the beginning as in normal mode
+		 */
+		aux_offset = aux_event->aux_offset - aux_size;
+	} else {
+		aux_size = aux_event->aux_size;
+		aux_offset = aux_event->aux_offset;
+	}
+
+	if (aux_offset >= auxtrace_event->offset &&
+	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
+		/*
+		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
+		 * based on the sizes of the aux event, and queue that fragment.
+		 */
+		auxtrace_fragment.auxtrace = *auxtrace_event;
+		auxtrace_fragment.auxtrace.size = aux_size;
+		auxtrace_fragment.auxtrace.offset = aux_offset;
+		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
+
+		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
+			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
+		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
+						 file_offset, NULL);
+		if (err)
+			return err;
+
+		idx = auxtrace_event->idx;
+		formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
+		return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+					   idx, formatted);
+	}
+
+	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
+	return 1;
+}
+
+static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
+					u64 offset __maybe_unused, void *data __maybe_unused)
+{
+	struct perf_sample sample;
+	int ret;
+	struct auxtrace_index_entry *ent;
+	struct auxtrace_index *auxtrace_index;
+	struct evsel *evsel;
+	size_t i;
+
+	/* Don't care about any other events, we're only queuing buffers for AUX events */
+	if (event->header.type != PERF_RECORD_AUX)
+		return 0;
+
+	if (event->header.size < sizeof(struct perf_record_aux))
+		return -EINVAL;
+
+	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
+	if (!event->aux.aux_size)
+		return 0;
+
+	/*
+	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
+	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
+	 */
+	evsel = evlist__event2evsel(session->evlist, event);
+	if (!evsel)
+		return -EINVAL;
+	ret = evsel__parse_sample(evsel, event, &sample);
+	if (ret)
+		return ret;
+
+	/*
+	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
+	 */
+	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+		for (i = 0; i < auxtrace_index->nr; i++) {
+			ent = &auxtrace_index->entries[i];
+			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
+							 ent->sz, &event->aux, &sample);
+			/*
+			 * Stop search on error or successful values. Continue search on
+			 * 1 ('not found')
+			 */
+			if (ret != 1)
+				return ret;
+		}
+	}
+
+	/*
+	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
+	 * don't exit with an error because it will still be possible to decode other aux records.
+	 */
+	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
+	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
+	return 0;
+}
+
+static int cs_etm__queue_aux_records(struct perf_session *session)
+{
+	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
+								struct auxtrace_index, list);
+	if (index && index->nr > 0)
+		return perf_session__peek_events(session, session->header.data_offset,
+						 session->header.data_size,
+						 cs_etm__queue_aux_records_cb, NULL);
+
+	/*
+	 * We would get here if there are no entries in the index (either no auxtrace
+	 * buffers or no index at all). Fail silently as there is the possibility of
+	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
+	 * false.
+	 *
+	 * In that scenario, buffers will not be split by AUX records.
+	 */
+	return 0;
+}
+
+int cs_etm__process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+	struct cs_etm_auxtrace *etm = NULL;
+	struct int_node *inode;
+	unsigned int pmu_type;
+	int event_header_size = sizeof(struct perf_event_header);
+	int info_header_size;
+	int total_size = auxtrace_info->header.size;
+	int priv_size = 0;
+	int num_cpu, trcidr_idx;
+	int err = 0;
+	int i, j;
+	u64 *ptr, *hdr = NULL;
+	u64 **metadata = NULL;
+	u64 hdr_version;
+
+	/*
+	 * sizeof(auxtrace_info_event::type) +
+	 * sizeof(auxtrace_info_event::reserved) == 8
+	 */
+	info_header_size = 8;
+
+	if (total_size < (event_header_size + info_header_size))
+		return -EINVAL;
+
+	priv_size = total_size - event_header_size - info_header_size;
+
+	/* First the global part */
+	ptr = (u64 *) auxtrace_info->priv;
+
+	/* Look for version of the header */
+	hdr_version = ptr[0];
+	if (hdr_version > CS_HEADER_CURRENT_VERSION) {
+		/* print routine will print an error on bad version */
+		if (dump_trace)
+			cs_etm__print_auxtrace_info(auxtrace_info->priv, 0);
+		return -EINVAL;
+	}
+
+	hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX);
+	if (!hdr)
+		return -ENOMEM;
+
+	/* Extract header information - see cs-etm.h for format */
+	for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
+		hdr[i] = ptr[i];
+	num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff;
+	pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) &
+				    0xffffffff);
+
+	/*
+	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
+	 * has to be made for each packet that gets decoded, optimizing access
+	 * in anything other than a sequential array is worth doing.
+	 */
+	traceid_list = intlist__new(NULL);
+	if (!traceid_list) {
+		err = -ENOMEM;
+		goto err_free_hdr;
+	}
+
+	metadata = zalloc(sizeof(*metadata) * num_cpu);
+	if (!metadata) {
+		err = -ENOMEM;
+		goto err_free_traceid_list;
+	}
+
+	/*
+	 * The metadata is stored in the auxtrace_info section and encodes
+	 * the configuration of the ARM embedded trace macrocell which is
+	 * required by the trace decoder to properly decode the trace due
+	 * to its highly compressed nature.
+	 */
+	for (j = 0; j < num_cpu; j++) {
+		if (ptr[i] == __perf_cs_etmv3_magic) {
+			metadata[j] =
+				cs_etm__create_meta_blk(ptr, &i,
+							CS_ETM_PRIV_MAX,
+							CS_ETM_NR_TRC_PARAMS_V0);
+
+			/* The traceID is our handle */
+			trcidr_idx = CS_ETM_ETMTRACEIDR;
+
+		} else if (ptr[i] == __perf_cs_etmv4_magic) {
+			metadata[j] =
+				cs_etm__create_meta_blk(ptr, &i,
+							CS_ETMV4_PRIV_MAX,
+							CS_ETMV4_NR_TRC_PARAMS_V0);
+
+			/* The traceID is our handle */
+			trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+		} else if (ptr[i] == __perf_cs_ete_magic) {
+			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
+
+			/* ETE shares first part of metadata with ETMv4 */
+			trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+		} else {
+			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
+				  ptr[i]);
+			err = -EINVAL;
+			goto err_free_metadata;
+		}
+
+		if (!metadata[j]) {
+			err = -ENOMEM;
+			goto err_free_metadata;
+		}
+
+		/* Get an RB node for this CPU */
+		inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]);
+
+		/* Something went wrong, no need to continue */
+		if (!inode) {
+			err = -ENOMEM;
+			goto err_free_metadata;
+		}
+
+		/*
+		 * The node for that CPU should not be taken.
+		 * Back out if that's the case.
+		 */
+		if (inode->priv) {
+			err = -EINVAL;
+			goto err_free_metadata;
+		}
+		/* All good, associate the traceID with the metadata pointer */
+		inode->priv = metadata[j];
+	}
+
+	/*
+	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
+	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
+	 * global metadata, and each cpu's metadata respectively.
+	 * The following tests if the correct number of double words was
+	 * present in the auxtrace info section.
+	 */
+	if (i * 8 != priv_size) {
+		err = -EINVAL;
+		goto err_free_metadata;
+	}
+
+	etm = zalloc(sizeof(*etm));
+
+	if (!etm) {
+		err = -ENOMEM;
+		goto err_free_metadata;
+	}
+
+	err = auxtrace_queues__init(&etm->queues);
+	if (err)
+		goto err_free_etm;
+
+	if (session->itrace_synth_opts->set) {
+		etm->synth_opts = *session->itrace_synth_opts;
+	} else {
+		itrace_synth_opts__set_default(&etm->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
+		etm->synth_opts.callchain = false;
+	}
+
+	etm->session = session;
+	etm->machine = &session->machines.host;
+
+	etm->num_cpu = num_cpu;
+	etm->pmu_type = pmu_type;
+	etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0);
+	etm->metadata = metadata;
+	etm->auxtrace_type = auxtrace_info->type;
+
+	etm->auxtrace.process_event = cs_etm__process_event;
+	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
+	etm->auxtrace.flush_events = cs_etm__flush_events;
+	etm->auxtrace.free_events = cs_etm__free_events;
+	etm->auxtrace.free = cs_etm__free;
+	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
+	session->auxtrace = &etm->auxtrace;
+
+	err = cs_etm__setup_timeless_decoding(etm);
+	if (err)
+		return err;
+
+	etm->unknown_thread = thread__new(999999999, 999999999);
+	if (!etm->unknown_thread) {
+		err = -ENOMEM;
+		goto err_free_queues;
+	}
+
+	/*
+	 * Initialize list node so that at thread__zput() we can avoid
+	 * segmentation fault at list_del_init().
+	 */
+	INIT_LIST_HEAD(&etm->unknown_thread->node);
+
+	err = thread__set_comm(etm->unknown_thread, "unknown", 0);
+	if (err)
+		goto err_delete_thread;
+
+	if (thread__init_maps(etm->unknown_thread, etm->machine)) {
+		err = -ENOMEM;
+		goto err_delete_thread;
+	}
+
+	if (dump_trace) {
+		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
+	}
+
+	err = cs_etm__synth_events(etm, session);
+	if (err)
+		goto err_delete_thread;
+
+	err = cs_etm__queue_aux_records(session);
+	if (err)
+		goto err_delete_thread;
+
+	etm->data_queued = etm->queues.populated;
+	/*
+	 * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
+	 * cs_etm__queue_aux_fragment() for details relating to limitations.
+	 */
+	if (!etm->data_queued)
+		pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
+			   "Continuing with best effort decoding in piped mode.\n\n");
+
+	return 0;
+
+err_delete_thread:
+	thread__zput(etm->unknown_thread);
+err_free_queues:
+	auxtrace_queues__free(&etm->queues);
+	session->auxtrace = NULL;
+err_free_etm:
+	zfree(&etm);
+err_free_metadata:
+	/* No need to check @metadata[j], free(NULL) is supported */
+	for (j = 0; j < num_cpu; j++)
+		zfree(&metadata[j]);
+	zfree(&metadata);
+err_free_traceid_list:
+	intlist__delete(traceid_list);
+err_free_hdr:
+	zfree(&hdr);
+	/*
+	 * At this point, as a minimum we have valid header. Dump the rest of
+	 * the info section - the print routines will error out on structural
+	 * issues.
+	 */
+	if (dump_trace)
+		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
+	return err;
+}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
new file mode 100644
index 000000000..90c83f932
--- /dev/null
+++ b/tools/perf/util/cs-etm.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
+#define INCLUDE__UTIL_PERF_CS_ETM_H__
+
+#include "util/event.h"
+#include <linux/bits.h>
+
+struct perf_session;
+
+/*
+ * Versioning header in case things need to change in the future.  That way
+ * decoding of old snapshot is still possible.
+ */
+enum {
+	/* Starting with 0x0 */
+	CS_HEADER_VERSION,
+	/* PMU->type (32 bit), total # of CPUs (32 bit) */
+	CS_PMU_TYPE_CPUS,
+	CS_ETM_SNAPSHOT,
+	CS_HEADER_VERSION_MAX,
+};
+
+/*
+ * Update the version for new format.
+ *
+ * New version 1 format adds a param count to the per cpu metadata.
+ * This allows easy adding of new metadata parameters.
+ * Requires that new params always added after current ones.
+ * Also allows client reader to handle file versions that are different by
+ * checking the number of params in the file vs the number expected.
+ */
+#define CS_HEADER_CURRENT_VERSION 1
+
+/* Beginning of header common to both ETMv3 and V4 */
+enum {
+	CS_ETM_MAGIC,
+	CS_ETM_CPU,
+	/* Number of trace config params in following ETM specific block */
+	CS_ETM_NR_TRC_PARAMS,
+	CS_ETM_COMMON_BLK_MAX_V1,
+};
+
+/* ETMv3/PTM metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1,
+	CS_ETM_ETMTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETM_ETMCCER,
+	CS_ETM_ETMIDR,
+	CS_ETM_PRIV_MAX,
+};
+
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1)
+
+/* ETMv4 metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1,
+	CS_ETMV4_TRCTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETMV4_TRCIDR0,
+	CS_ETMV4_TRCIDR1,
+	CS_ETMV4_TRCIDR2,
+	CS_ETMV4_TRCIDR8,
+	CS_ETMV4_TRCAUTHSTATUS,
+	CS_ETMV4_PRIV_MAX,
+};
+
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1)
+
+/*
+ * ETE metadata is ETMv4 plus TRCDEVARCH register and doesn't support header V0 since it was
+ * added in header V1
+ */
+enum {
+	CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX,
+	CS_ETE_PRIV_MAX
+};
+
+/*
+ * ETMv3 exception encoding number:
+ * See Embedded Trace Macrocell specification (ARM IHI 0014Q)
+ * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
+ */
+enum {
+	CS_ETMV3_EXC_NONE = 0,
+	CS_ETMV3_EXC_DEBUG_HALT = 1,
+	CS_ETMV3_EXC_SMC = 2,
+	CS_ETMV3_EXC_HYP = 3,
+	CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4,
+	CS_ETMV3_EXC_JAZELLE_THUMBEE = 5,
+	CS_ETMV3_EXC_PE_RESET = 8,
+	CS_ETMV3_EXC_UNDEFINED_INSTR = 9,
+	CS_ETMV3_EXC_SVC = 10,
+	CS_ETMV3_EXC_PREFETCH_ABORT = 11,
+	CS_ETMV3_EXC_DATA_FAULT = 12,
+	CS_ETMV3_EXC_GENERIC = 13,
+	CS_ETMV3_EXC_IRQ = 14,
+	CS_ETMV3_EXC_FIQ = 15,
+};
+
+/*
+ * ETMv4 exception encoding number:
+ * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D)
+ * table 6-12 Possible values for the TYPE field in an Exception instruction
+ * trace packet, for ARMv7-A/R and ARMv8-A/R PEs.
+ */
+enum {
+	CS_ETMV4_EXC_RESET = 0,
+	CS_ETMV4_EXC_DEBUG_HALT = 1,
+	CS_ETMV4_EXC_CALL = 2,
+	CS_ETMV4_EXC_TRAP = 3,
+	CS_ETMV4_EXC_SYSTEM_ERROR = 4,
+	CS_ETMV4_EXC_INST_DEBUG = 6,
+	CS_ETMV4_EXC_DATA_DEBUG = 7,
+	CS_ETMV4_EXC_ALIGNMENT = 10,
+	CS_ETMV4_EXC_INST_FAULT = 11,
+	CS_ETMV4_EXC_DATA_FAULT = 12,
+	CS_ETMV4_EXC_IRQ = 14,
+	CS_ETMV4_EXC_FIQ = 15,
+	CS_ETMV4_EXC_END = 31,
+};
+
+enum cs_etm_sample_type {
+	CS_ETM_EMPTY,
+	CS_ETM_RANGE,
+	CS_ETM_DISCONTINUITY,
+	CS_ETM_EXCEPTION,
+	CS_ETM_EXCEPTION_RET,
+};
+
+enum cs_etm_isa {
+	CS_ETM_ISA_UNKNOWN,
+	CS_ETM_ISA_A64,
+	CS_ETM_ISA_A32,
+	CS_ETM_ISA_T32,
+};
+
+struct cs_etm_queue;
+
+struct cs_etm_packet {
+	enum cs_etm_sample_type sample_type;
+	enum cs_etm_isa isa;
+	u64 start_addr;
+	u64 end_addr;
+	u32 instr_count;
+	u32 last_instr_type;
+	u32 last_instr_subtype;
+	u32 flags;
+	u32 exception_number;
+	u8 last_instr_cond;
+	u8 last_instr_taken_branch;
+	u8 last_instr_size;
+	u8 trace_chan_id;
+	int cpu;
+};
+
+#define CS_ETM_PACKET_MAX_BUFFER 1024
+
+/*
+ * When working with per-thread scenarios the process under trace can
+ * be scheduled on any CPU and as such, more than one traceID may be
+ * associated with the same process.  Since a traceID of '0' is illegal
+ * as per the CoreSight architecture, use that specific value to
+ * identify the queue where all packets (with any traceID) are
+ * aggregated.
+ */
+#define CS_ETM_PER_THREAD_TRACEID 0
+
+struct cs_etm_packet_queue {
+	u32 packet_count;
+	u32 head;
+	u32 tail;
+	u32 instr_count;
+	u64 cs_timestamp;
+	u64 next_cs_timestamp;
+	struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
+};
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
+
+#define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
+
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64))
+
+#define __perf_cs_etmv3_magic 0x3030303030303030ULL
+#define __perf_cs_etmv4_magic 0x4040404040404040ULL
+#define __perf_cs_ete_magic   0x5050505050505050ULL
+#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
+#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+#define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64))
+
+#ifdef HAVE_CSTRACE_SUPPORT
+int cs_etm__process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session);
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt);
+int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
+			 pid_t tid, u8 trace_chan_id);
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+					      u8 trace_chan_id);
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
+#else
+static inline int
+cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
+			      struct perf_session *session __maybe_unused)
+{
+	return -1;
+}
+
+static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
+				  int *cpu __maybe_unused)
+{
+	return -1;
+}
+
+static inline int cs_etm__etmq_set_tid(
+				struct cs_etm_queue *etmq __maybe_unused,
+				pid_t tid __maybe_unused,
+				u8 trace_chan_id __maybe_unused)
+{
+	return -1;
+}
+
+static inline bool cs_etm__etmq_is_timeless(
+				struct cs_etm_queue *etmq __maybe_unused)
+{
+	/* What else to return? */
+	return true;
+}
+
+static inline void cs_etm__etmq_set_traceid_queue_timestamp(
+				struct cs_etm_queue *etmq __maybe_unused,
+				u8 trace_chan_id __maybe_unused) {}
+
+static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(
+				struct cs_etm_queue *etmq __maybe_unused,
+				u8 trace_chan_id __maybe_unused)
+{
+	return NULL;
+}
+#endif
+
+#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
new file mode 100644
index 000000000..9e0aee276
--- /dev/null
+++ b/tools/perf/util/data-convert-bt.c
@@ -0,0 +1,1698 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CTF writing support via babeltrace.
+ *
+ * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com>
+ * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <babeltrace/ctf-writer/writer.h>
+#include <babeltrace/ctf-writer/clock.h>
+#include <babeltrace/ctf-writer/stream.h>
+#include <babeltrace/ctf-writer/event.h>
+#include <babeltrace/ctf-writer/event-types.h>
+#include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf-ir/utils.h>
+#include <babeltrace/ctf/events.h>
+#include <traceevent/event-parse.h>
+#include "asm/bug.h"
+#include "data-convert.h"
+#include "session.h"
+#include "debug.h"
+#include "tool.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "config.h"
+#include <linux/ctype.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include "util.h"
+#include "clockid.h"
+
+#define pr_N(n, fmt, ...) \
+	eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...)  pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+struct evsel_priv {
+	struct bt_ctf_event_class *event_class;
+};
+
+#define MAX_CPUS	4096
+
+struct ctf_stream {
+	struct bt_ctf_stream *stream;
+	int cpu;
+	u32 count;
+};
+
+struct ctf_writer {
+	/* writer primitives */
+	struct bt_ctf_writer		 *writer;
+	struct ctf_stream		**stream;
+	int				  stream_cnt;
+	struct bt_ctf_stream_class	 *stream_class;
+	struct bt_ctf_clock		 *clock;
+
+	/* data types */
+	union {
+		struct {
+			struct bt_ctf_field_type	*s64;
+			struct bt_ctf_field_type	*u64;
+			struct bt_ctf_field_type	*s32;
+			struct bt_ctf_field_type	*u32;
+			struct bt_ctf_field_type	*string;
+			struct bt_ctf_field_type	*u32_hex;
+			struct bt_ctf_field_type	*u64_hex;
+		};
+		struct bt_ctf_field_type *array[6];
+	} data;
+	struct bt_ctf_event_class	*comm_class;
+	struct bt_ctf_event_class	*exit_class;
+	struct bt_ctf_event_class	*fork_class;
+	struct bt_ctf_event_class	*mmap_class;
+	struct bt_ctf_event_class	*mmap2_class;
+};
+
+struct convert {
+	struct perf_tool	tool;
+	struct ctf_writer	writer;
+
+	u64			events_size;
+	u64			events_count;
+	u64			non_sample_count;
+
+	/* Ordered events configured queue size. */
+	u64			queue_size;
+};
+
+static int value_set(struct bt_ctf_field_type *type,
+		     struct bt_ctf_event *event,
+		     const char *name, u64 val)
+{
+	struct bt_ctf_field *field;
+	bool sign = bt_ctf_field_type_integer_get_signed(type);
+	int ret;
+
+	field = bt_ctf_field_create(type);
+	if (!field) {
+		pr_err("failed to create a field %s\n", name);
+		return -1;
+	}
+
+	if (sign) {
+		ret = bt_ctf_field_signed_integer_set_value(field, val);
+		if (ret) {
+			pr_err("failed to set field value %s\n", name);
+			goto err;
+		}
+	} else {
+		ret = bt_ctf_field_unsigned_integer_set_value(field, val);
+		if (ret) {
+			pr_err("failed to set field value %s\n", name);
+			goto err;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, name, field);
+	if (ret) {
+		pr_err("failed to set payload %s\n", name);
+		goto err;
+	}
+
+	pr2("  SET [%s = %" PRIu64 "]\n", name, val);
+
+err:
+	bt_ctf_field_put(field);
+	return ret;
+}
+
+#define __FUNC_VALUE_SET(_name, _val_type)				\
+static __maybe_unused int value_set_##_name(struct ctf_writer *cw,	\
+			     struct bt_ctf_event *event,		\
+			     const char *name,				\
+			     _val_type val)				\
+{									\
+	struct bt_ctf_field_type *type = cw->data._name;		\
+	return value_set(type, event, name, (u64) val);			\
+}
+
+#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name)
+
+FUNC_VALUE_SET(s32)
+FUNC_VALUE_SET(u32)
+FUNC_VALUE_SET(s64)
+FUNC_VALUE_SET(u64)
+__FUNC_VALUE_SET(u64_hex, u64)
+
+static int string_set_value(struct bt_ctf_field *field, const char *string);
+static __maybe_unused int
+value_set_string(struct ctf_writer *cw, struct bt_ctf_event *event,
+		 const char *name, const char *string)
+{
+	struct bt_ctf_field_type *type = cw->data.string;
+	struct bt_ctf_field *field;
+	int ret = 0;
+
+	field = bt_ctf_field_create(type);
+	if (!field) {
+		pr_err("failed to create a field %s\n", name);
+		return -1;
+	}
+
+	ret = string_set_value(field, string);
+	if (ret) {
+		pr_err("failed to set value %s\n", name);
+		goto err_put_field;
+	}
+
+	ret = bt_ctf_event_set_payload(event, name, field);
+	if (ret)
+		pr_err("failed to set payload %s\n", name);
+
+err_put_field:
+	bt_ctf_field_put(field);
+	return ret;
+}
+
+static struct bt_ctf_field_type*
+get_tracepoint_field_type(struct ctf_writer *cw, struct tep_format_field *field)
+{
+	unsigned long flags = field->flags;
+
+	if (flags & TEP_FIELD_IS_STRING)
+		return cw->data.string;
+
+	if (!(flags & TEP_FIELD_IS_SIGNED)) {
+		/* unsigned long are mostly pointers */
+		if (flags & TEP_FIELD_IS_LONG || flags & TEP_FIELD_IS_POINTER)
+			return cw->data.u64_hex;
+	}
+
+	if (flags & TEP_FIELD_IS_SIGNED) {
+		if (field->size == 8)
+			return cw->data.s64;
+		else
+			return cw->data.s32;
+	}
+
+	if (field->size == 8)
+		return cw->data.u64;
+	else
+		return cw->data.u32;
+}
+
+static unsigned long long adjust_signedness(unsigned long long value_int, int size)
+{
+	unsigned long long value_mask;
+
+	/*
+	 * value_mask = (1 << (size * 8 - 1)) - 1.
+	 * Directly set value_mask for code readers.
+	 */
+	switch (size) {
+	case 1:
+		value_mask = 0x7fULL;
+		break;
+	case 2:
+		value_mask = 0x7fffULL;
+		break;
+	case 4:
+		value_mask = 0x7fffffffULL;
+		break;
+	case 8:
+		/*
+		 * For 64 bit value, return it self. There is no need
+		 * to fill high bit.
+		 */
+		/* Fall through */
+	default:
+		/* BUG! */
+		return value_int;
+	}
+
+	/* If it is a positive value, don't adjust. */
+	if ((value_int & (~0ULL - value_mask)) == 0)
+		return value_int;
+
+	/* Fill upper part of value_int with 1 to make it a negative long long. */
+	return (value_int & value_mask) | ~value_mask;
+}
+
+static int string_set_value(struct bt_ctf_field *field, const char *string)
+{
+	char *buffer = NULL;
+	size_t len = strlen(string), i, p;
+	int err;
+
+	for (i = p = 0; i < len; i++, p++) {
+		if (isprint(string[i])) {
+			if (!buffer)
+				continue;
+			buffer[p] = string[i];
+		} else {
+			char numstr[5];
+
+			snprintf(numstr, sizeof(numstr), "\\x%02x",
+				 (unsigned int)(string[i]) & 0xff);
+
+			if (!buffer) {
+				buffer = zalloc(i + (len - i) * 4 + 2);
+				if (!buffer) {
+					pr_err("failed to set unprintable string '%s'\n", string);
+					return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING");
+				}
+				if (i > 0)
+					strncpy(buffer, string, i);
+			}
+			memcpy(buffer + p, numstr, 4);
+			p += 3;
+		}
+	}
+
+	if (!buffer)
+		return bt_ctf_field_string_set_value(field, string);
+	err = bt_ctf_field_string_set_value(field, buffer);
+	free(buffer);
+	return err;
+}
+
+static int add_tracepoint_field_value(struct ctf_writer *cw,
+				      struct bt_ctf_event_class *event_class,
+				      struct bt_ctf_event *event,
+				      struct perf_sample *sample,
+				      struct tep_format_field *fmtf)
+{
+	struct bt_ctf_field_type *type;
+	struct bt_ctf_field *array_field;
+	struct bt_ctf_field *field;
+	const char *name = fmtf->name;
+	void *data = sample->raw_data;
+	unsigned long flags = fmtf->flags;
+	unsigned int n_items;
+	unsigned int i;
+	unsigned int offset;
+	unsigned int len;
+	int ret;
+
+	name = fmtf->alias;
+	offset = fmtf->offset;
+	len = fmtf->size;
+	if (flags & TEP_FIELD_IS_STRING)
+		flags &= ~TEP_FIELD_IS_ARRAY;
+
+	if (flags & TEP_FIELD_IS_DYNAMIC) {
+		unsigned long long tmp_val;
+
+		tmp_val = tep_read_number(fmtf->event->tep,
+					  data + offset, len);
+		offset = tmp_val;
+		len = offset >> 16;
+		offset &= 0xffff;
+		if (flags & TEP_FIELD_IS_RELATIVE)
+			offset += fmtf->offset + fmtf->size;
+	}
+
+	if (flags & TEP_FIELD_IS_ARRAY) {
+
+		type = bt_ctf_event_class_get_field_by_name(
+				event_class, name);
+		array_field = bt_ctf_field_create(type);
+		bt_ctf_field_type_put(type);
+		if (!array_field) {
+			pr_err("Failed to create array type %s\n", name);
+			return -1;
+		}
+
+		len = fmtf->size / fmtf->arraylen;
+		n_items = fmtf->arraylen;
+	} else {
+		n_items = 1;
+		array_field = NULL;
+	}
+
+	type = get_tracepoint_field_type(cw, fmtf);
+
+	for (i = 0; i < n_items; i++) {
+		if (flags & TEP_FIELD_IS_ARRAY)
+			field = bt_ctf_field_array_get_field(array_field, i);
+		else
+			field = bt_ctf_field_create(type);
+
+		if (!field) {
+			pr_err("failed to create a field %s\n", name);
+			return -1;
+		}
+
+		if (flags & TEP_FIELD_IS_STRING)
+			ret = string_set_value(field, data + offset + i * len);
+		else {
+			unsigned long long value_int;
+
+			value_int = tep_read_number(
+					fmtf->event->tep,
+					data + offset + i * len, len);
+
+			if (!(flags & TEP_FIELD_IS_SIGNED))
+				ret = bt_ctf_field_unsigned_integer_set_value(
+						field, value_int);
+			else
+				ret = bt_ctf_field_signed_integer_set_value(
+						field, adjust_signedness(value_int, len));
+		}
+
+		if (ret) {
+			pr_err("failed to set file value %s\n", name);
+			goto err_put_field;
+		}
+		if (!(flags & TEP_FIELD_IS_ARRAY)) {
+			ret = bt_ctf_event_set_payload(event, name, field);
+			if (ret) {
+				pr_err("failed to set payload %s\n", name);
+				goto err_put_field;
+			}
+		}
+		bt_ctf_field_put(field);
+	}
+	if (flags & TEP_FIELD_IS_ARRAY) {
+		ret = bt_ctf_event_set_payload(event, name, array_field);
+		if (ret) {
+			pr_err("Failed add payload array %s\n", name);
+			return -1;
+		}
+		bt_ctf_field_put(array_field);
+	}
+	return 0;
+
+err_put_field:
+	bt_ctf_field_put(field);
+	return -1;
+}
+
+static int add_tracepoint_fields_values(struct ctf_writer *cw,
+					struct bt_ctf_event_class *event_class,
+					struct bt_ctf_event *event,
+					struct tep_format_field *fields,
+					struct perf_sample *sample)
+{
+	struct tep_format_field *field;
+	int ret;
+
+	for (field = fields; field; field = field->next) {
+		ret = add_tracepoint_field_value(cw, event_class, event, sample,
+				field);
+		if (ret)
+			return -1;
+	}
+	return 0;
+}
+
+static int add_tracepoint_values(struct ctf_writer *cw,
+				 struct bt_ctf_event_class *event_class,
+				 struct bt_ctf_event *event,
+				 struct evsel *evsel,
+				 struct perf_sample *sample)
+{
+	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct tep_format_field *fields        = evsel->tp_format->format.fields;
+	int ret;
+
+	ret = add_tracepoint_fields_values(cw, event_class, event,
+					   common_fields, sample);
+	if (!ret)
+		ret = add_tracepoint_fields_values(cw, event_class, event,
+						   fields, sample);
+
+	return ret;
+}
+
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct perf_sample *sample)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int raw_size = sample->raw_size;
+	unsigned int nr_elements = raw_size / sizeof(u32);
+	unsigned int i;
+	int ret;
+
+	if (nr_elements * sizeof(u32) != raw_size)
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
+			   raw_size, nr_elements * sizeof(u32) - raw_size);
+
+	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'raw_len' for bpf output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for raw_len\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+	if (ret) {
+		pr_err("failed to set payload to raw_len\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'raw_data' for bpf output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'raw_data'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u32 *)(sample->raw_data))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set raw_data[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
+static int
+add_callchain_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct ip_callchain *callchain)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int nr_elements = callchain->nr;
+	unsigned int i;
+	int ret;
+
+	len_type = bt_ctf_event_class_get_field_by_name(
+			event_class, "perf_callchain_size");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'perf_callchain_size' for callchain output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for perf_callchain_size\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "perf_callchain_size", len_field);
+	if (ret) {
+		pr_err("failed to set payload to perf_callchain_size\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(
+			event_class, "perf_callchain");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'perf_callchain' for callchain output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'perf_callchain'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u64 *)(callchain->ips))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set callchain[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "perf_callchain", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
+static int add_generic_values(struct ctf_writer *cw,
+			      struct bt_ctf_event *event,
+			      struct evsel *evsel,
+			      struct perf_sample *sample)
+{
+	u64 type = evsel->core.attr.sample_type;
+	int ret;
+
+	/*
+	 * missing:
+	 *   PERF_SAMPLE_TIME         - not needed as we have it in
+	 *                              ctf event header
+	 *   PERF_SAMPLE_READ         - TODO
+	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_BRANCH_STACK - TODO
+	 *   PERF_SAMPLE_REGS_USER    - TODO
+	 *   PERF_SAMPLE_STACK_USER   - TODO
+	 */
+
+	if (type & PERF_SAMPLE_IP) {
+		ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		ret = value_set_s32(cw, event, "perf_tid", sample->tid);
+		if (ret)
+			return -1;
+
+		ret = value_set_s32(cw, event, "perf_pid", sample->pid);
+		if (ret)
+			return -1;
+	}
+
+	if ((type & PERF_SAMPLE_ID) ||
+	    (type & PERF_SAMPLE_IDENTIFIER)) {
+		ret = value_set_u64(cw, event, "perf_id", sample->id);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		ret = value_set_u64(cw, event, "perf_period", sample->period);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT) {
+		ret = value_set_u64(cw, event, "perf_weight", sample->weight);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		ret = value_set_u64(cw, event, "perf_data_src",
+				sample->data_src);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		ret = value_set_u64(cw, event, "perf_transaction",
+				sample->transaction);
+		if (ret)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int ctf_stream__flush(struct ctf_stream *cs)
+{
+	int err = 0;
+
+	if (cs) {
+		err = bt_ctf_stream_flush(cs->stream);
+		if (err)
+			pr_err("CTF stream %d flush failed\n", cs->cpu);
+
+		pr("Flush stream for cpu %d (%u samples)\n",
+		   cs->cpu, cs->count);
+
+		cs->count = 0;
+	}
+
+	return err;
+}
+
+static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
+{
+	struct ctf_stream *cs;
+	struct bt_ctf_field *pkt_ctx   = NULL;
+	struct bt_ctf_field *cpu_field = NULL;
+	struct bt_ctf_stream *stream   = NULL;
+	int ret;
+
+	cs = zalloc(sizeof(*cs));
+	if (!cs) {
+		pr_err("Failed to allocate ctf stream\n");
+		return NULL;
+	}
+
+	stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
+	if (!stream) {
+		pr_err("Failed to create CTF stream\n");
+		goto out;
+	}
+
+	pkt_ctx = bt_ctf_stream_get_packet_context(stream);
+	if (!pkt_ctx) {
+		pr_err("Failed to obtain packet context\n");
+		goto out;
+	}
+
+	cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
+	bt_ctf_field_put(pkt_ctx);
+	if (!cpu_field) {
+		pr_err("Failed to obtain cpu field\n");
+		goto out;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
+	if (ret) {
+		pr_err("Failed to update CPU number\n");
+		goto out;
+	}
+
+	bt_ctf_field_put(cpu_field);
+
+	cs->cpu    = cpu;
+	cs->stream = stream;
+	return cs;
+
+out:
+	if (cpu_field)
+		bt_ctf_field_put(cpu_field);
+	if (stream)
+		bt_ctf_stream_put(stream);
+
+	free(cs);
+	return NULL;
+}
+
+static void ctf_stream__delete(struct ctf_stream *cs)
+{
+	if (cs) {
+		bt_ctf_stream_put(cs->stream);
+		free(cs);
+	}
+}
+
+static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
+{
+	struct ctf_stream *cs = cw->stream[cpu];
+
+	if (!cs) {
+		cs = ctf_stream__create(cw, cpu);
+		cw->stream[cpu] = cs;
+	}
+
+	return cs;
+}
+
+static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
+			  struct evsel *evsel)
+{
+	int cpu = 0;
+
+	if (evsel->core.attr.sample_type & PERF_SAMPLE_CPU)
+		cpu = sample->cpu;
+
+	if (cpu > cw->stream_cnt) {
+		pr_err("Event was recorded for CPU %d, limit is at %d.\n",
+			cpu, cw->stream_cnt);
+		cpu = 0;
+	}
+
+	return cpu;
+}
+
+#define STREAM_FLUSH_COUNT 100000
+
+/*
+ * Currently we have no other way to determine the
+ * time for the stream flush other than keep track
+ * of the number of events and check it against
+ * threshold.
+ */
+static bool is_flush_needed(struct ctf_stream *cs)
+{
+	return cs->count >= STREAM_FLUSH_COUNT;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *_event,
+				struct perf_sample *sample,
+				struct evsel *evsel,
+				struct machine *machine __maybe_unused)
+{
+	struct convert *c = container_of(tool, struct convert, tool);
+	struct evsel_priv *priv = evsel->priv;
+	struct ctf_writer *cw = &c->writer;
+	struct ctf_stream *cs;
+	struct bt_ctf_event_class *event_class;
+	struct bt_ctf_event *event;
+	int ret;
+	unsigned long type = evsel->core.attr.sample_type;
+
+	if (WARN_ONCE(!priv, "Failed to setup all events.\n"))
+		return 0;
+
+	event_class = priv->event_class;
+
+	/* update stats */
+	c->events_count++;
+	c->events_size += _event->header.size;
+
+	pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count);
+
+	event = bt_ctf_event_create(event_class);
+	if (!event) {
+		pr_err("Failed to create an CTF event\n");
+		return -1;
+	}
+
+	bt_ctf_clock_set_time(cw->clock, sample->time);
+
+	ret = add_generic_values(cw, event, evsel, sample);
+	if (ret)
+		return -1;
+
+	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+		ret = add_tracepoint_values(cw, event_class, event,
+					    evsel, sample);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		ret = add_callchain_output_values(event_class,
+				event, sample->callchain);
+		if (ret)
+			return -1;
+	}
+
+	if (evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_values(event_class, event, sample);
+		if (ret)
+			return -1;
+	}
+
+	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
+	if (cs) {
+		if (is_flush_needed(cs))
+			ctf_stream__flush(cs);
+
+		cs->count++;
+		bt_ctf_stream_append_event(cs->stream, event);
+	}
+
+	bt_ctf_event_put(event);
+	return cs ? 0 : -1;
+}
+
+#define __NON_SAMPLE_SET_FIELD(_name, _type, _field) 	\
+do {							\
+	ret = value_set_##_type(cw, event, #_field, _event->_name._field);\
+	if (ret)					\
+		return -1;				\
+} while(0)
+
+#define __FUNC_PROCESS_NON_SAMPLE(_name, body) 	\
+static int process_##_name##_event(struct perf_tool *tool,	\
+				   union perf_event *_event,	\
+				   struct perf_sample *sample,	\
+				   struct machine *machine)	\
+{								\
+	struct convert *c = container_of(tool, struct convert, tool);\
+	struct ctf_writer *cw = &c->writer;			\
+	struct bt_ctf_event_class *event_class = cw->_name##_class;\
+	struct bt_ctf_event *event;				\
+	struct ctf_stream *cs;					\
+	int ret;						\
+								\
+	c->non_sample_count++;					\
+	c->events_size += _event->header.size;			\
+	event = bt_ctf_event_create(event_class);		\
+	if (!event) {						\
+		pr_err("Failed to create an CTF event\n");	\
+		return -1;					\
+	}							\
+								\
+	bt_ctf_clock_set_time(cw->clock, sample->time);		\
+	body							\
+	cs = ctf_stream(cw, 0);					\
+	if (cs) {						\
+		if (is_flush_needed(cs))			\
+			ctf_stream__flush(cs);			\
+								\
+		cs->count++;					\
+		bt_ctf_stream_append_event(cs->stream, event);	\
+	}							\
+	bt_ctf_event_put(event);				\
+								\
+	return perf_event__process_##_name(tool, _event, sample, machine);\
+}
+
+__FUNC_PROCESS_NON_SAMPLE(comm,
+	__NON_SAMPLE_SET_FIELD(comm, u32, pid);
+	__NON_SAMPLE_SET_FIELD(comm, u32, tid);
+	__NON_SAMPLE_SET_FIELD(comm, string, comm);
+)
+__FUNC_PROCESS_NON_SAMPLE(fork,
+	__NON_SAMPLE_SET_FIELD(fork, u32, pid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, tid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+	__NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+
+__FUNC_PROCESS_NON_SAMPLE(exit,
+	__NON_SAMPLE_SET_FIELD(fork, u32, pid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, tid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+	__NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap,
+	__NON_SAMPLE_SET_FIELD(mmap, u32, pid);
+	__NON_SAMPLE_SET_FIELD(mmap, u32, tid);
+	__NON_SAMPLE_SET_FIELD(mmap, u64_hex, start);
+	__NON_SAMPLE_SET_FIELD(mmap, string, filename);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap2,
+	__NON_SAMPLE_SET_FIELD(mmap2, u32, pid);
+	__NON_SAMPLE_SET_FIELD(mmap2, u32, tid);
+	__NON_SAMPLE_SET_FIELD(mmap2, u64_hex, start);
+	__NON_SAMPLE_SET_FIELD(mmap2, string, filename);
+)
+#undef __NON_SAMPLE_SET_FIELD
+#undef __FUNC_PROCESS_NON_SAMPLE
+
+/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
+static char *change_name(char *name, char *orig_name, int dup)
+{
+	char *new_name = NULL;
+	size_t len;
+
+	if (!name)
+		name = orig_name;
+
+	if (dup >= 10)
+		goto out;
+	/*
+	 * Add '_' prefix to potential keywork.  According to
+	 * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com),
+	 * further CTF spec updating may require us to use '$'.
+	 */
+	if (dup < 0)
+		len = strlen(name) + sizeof("_");
+	else
+		len = strlen(orig_name) + sizeof("_dupl_X");
+
+	new_name = malloc(len);
+	if (!new_name)
+		goto out;
+
+	if (dup < 0)
+		snprintf(new_name, len, "_%s", name);
+	else
+		snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
+
+out:
+	if (name != orig_name)
+		free(name);
+	return new_name;
+}
+
+static int event_class_add_field(struct bt_ctf_event_class *event_class,
+		struct bt_ctf_field_type *type,
+		struct tep_format_field *field)
+{
+	struct bt_ctf_field_type *t = NULL;
+	char *name;
+	int dup = 1;
+	int ret;
+
+	/* alias was already assigned */
+	if (field->alias != field->name)
+		return bt_ctf_event_class_add_field(event_class, type,
+				(char *)field->alias);
+
+	name = field->name;
+
+	/* If 'name' is a keywork, add prefix. */
+	if (bt_ctf_validate_identifier(name))
+		name = change_name(name, field->name, -1);
+
+	if (!name) {
+		pr_err("Failed to fix invalid identifier.");
+		return -1;
+	}
+	while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
+		bt_ctf_field_type_put(t);
+		name = change_name(name, field->name, dup++);
+		if (!name) {
+			pr_err("Failed to create dup name for '%s'\n", field->name);
+			return -1;
+		}
+	}
+
+	ret = bt_ctf_event_class_add_field(event_class, type, name);
+	if (!ret)
+		field->alias = name;
+
+	return ret;
+}
+
+static int add_tracepoint_fields_types(struct ctf_writer *cw,
+				       struct tep_format_field *fields,
+				       struct bt_ctf_event_class *event_class)
+{
+	struct tep_format_field *field;
+	int ret;
+
+	for (field = fields; field; field = field->next) {
+		struct bt_ctf_field_type *type;
+		unsigned long flags = field->flags;
+
+		pr2("  field '%s'\n", field->name);
+
+		type = get_tracepoint_field_type(cw, field);
+		if (!type)
+			return -1;
+
+		/*
+		 * A string is an array of chars. For this we use the string
+		 * type and don't care that it is an array. What we don't
+		 * support is an array of strings.
+		 */
+		if (flags & TEP_FIELD_IS_STRING)
+			flags &= ~TEP_FIELD_IS_ARRAY;
+
+		if (flags & TEP_FIELD_IS_ARRAY)
+			type = bt_ctf_field_type_array_create(type, field->arraylen);
+
+		ret = event_class_add_field(event_class, type, field);
+
+		if (flags & TEP_FIELD_IS_ARRAY)
+			bt_ctf_field_type_put(type);
+
+		if (ret) {
+			pr_err("Failed to add field '%s': %d\n",
+					field->name, ret);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int add_tracepoint_types(struct ctf_writer *cw,
+				struct evsel *evsel,
+				struct bt_ctf_event_class *class)
+{
+	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct tep_format_field *fields        = evsel->tp_format->format.fields;
+	int ret;
+
+	ret = add_tracepoint_fields_types(cw, common_fields, class);
+	if (!ret)
+		ret = add_tracepoint_fields_types(cw, fields, class);
+
+	return ret;
+}
+
+static int add_bpf_output_types(struct ctf_writer *cw,
+				struct bt_ctf_event_class *class)
+{
+	struct bt_ctf_field_type *len_type = cw->data.u32;
+	struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+	struct bt_ctf_field_type *seq_type;
+	int ret;
+
+	ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+	if (ret)
+		return ret;
+
+	seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+	if (!seq_type)
+		return -1;
+
+	return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
+static int add_generic_types(struct ctf_writer *cw, struct evsel *evsel,
+			     struct bt_ctf_event_class *event_class)
+{
+	u64 type = evsel->core.attr.sample_type;
+
+	/*
+	 * missing:
+	 *   PERF_SAMPLE_TIME         - not needed as we have it in
+	 *                              ctf event header
+	 *   PERF_SAMPLE_READ         - TODO
+	 *   PERF_SAMPLE_CALLCHAIN    - TODO
+	 *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+	 *                              are handled separately
+	 *   PERF_SAMPLE_BRANCH_STACK - TODO
+	 *   PERF_SAMPLE_REGS_USER    - TODO
+	 *   PERF_SAMPLE_STACK_USER   - TODO
+	 */
+
+#define ADD_FIELD(cl, t, n)						\
+	do {								\
+		pr2("  field '%s'\n", n);				\
+		if (bt_ctf_event_class_add_field(cl, t, n)) {		\
+			pr_err("Failed to add field '%s';\n", n);	\
+			return -1;					\
+		}							\
+	} while (0)
+
+	if (type & PERF_SAMPLE_IP)
+		ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip");
+
+	if (type & PERF_SAMPLE_TID) {
+		ADD_FIELD(event_class, cw->data.s32, "perf_tid");
+		ADD_FIELD(event_class, cw->data.s32, "perf_pid");
+	}
+
+	if ((type & PERF_SAMPLE_ID) ||
+	    (type & PERF_SAMPLE_IDENTIFIER))
+		ADD_FIELD(event_class, cw->data.u64, "perf_id");
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
+
+	if (type & PERF_SAMPLE_PERIOD)
+		ADD_FIELD(event_class, cw->data.u64, "perf_period");
+
+	if (type & PERF_SAMPLE_WEIGHT)
+		ADD_FIELD(event_class, cw->data.u64, "perf_weight");
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		ADD_FIELD(event_class, cw->data.u64, "perf_data_src");
+
+	if (type & PERF_SAMPLE_TRANSACTION)
+		ADD_FIELD(event_class, cw->data.u64, "perf_transaction");
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		ADD_FIELD(event_class, cw->data.u32, "perf_callchain_size");
+		ADD_FIELD(event_class,
+			bt_ctf_field_type_sequence_create(
+				cw->data.u64_hex, "perf_callchain_size"),
+			"perf_callchain");
+	}
+
+#undef ADD_FIELD
+	return 0;
+}
+
+static int add_event(struct ctf_writer *cw, struct evsel *evsel)
+{
+	struct bt_ctf_event_class *event_class;
+	struct evsel_priv *priv;
+	const char *name = evsel__name(evsel);
+	int ret;
+
+	pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type);
+
+	event_class = bt_ctf_event_class_create(name);
+	if (!event_class)
+		return -1;
+
+	ret = add_generic_types(cw, evsel, event_class);
+	if (ret)
+		goto err;
+
+	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+		ret = add_tracepoint_types(cw, evsel, event_class);
+		if (ret)
+			goto err;
+	}
+
+	if (evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_types(cw, event_class);
+		if (ret)
+			goto err;
+	}
+
+	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
+	if (ret) {
+		pr("Failed to add event class into stream.\n");
+		goto err;
+	}
+
+	priv = malloc(sizeof(*priv));
+	if (!priv)
+		goto err;
+
+	priv->event_class = event_class;
+	evsel->priv       = priv;
+	return 0;
+
+err:
+	bt_ctf_event_class_put(event_class);
+	pr_err("Failed to add event '%s'.\n", name);
+	return -1;
+}
+
+static int setup_events(struct ctf_writer *cw, struct perf_session *session)
+{
+	struct evlist *evlist = session->evlist;
+	struct evsel *evsel;
+	int ret;
+
+	evlist__for_each_entry(evlist, evsel) {
+		ret = add_event(cw, evsel);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+#define __NON_SAMPLE_ADD_FIELD(t, n)						\
+	do {							\
+		pr2("  field '%s'\n", #n);			\
+		if (bt_ctf_event_class_add_field(event_class, cw->data.t, #n)) {\
+			pr_err("Failed to add field '%s';\n", #n);\
+			return -1;				\
+		}						\
+	} while(0)
+
+#define __FUNC_ADD_NON_SAMPLE_EVENT_CLASS(_name, body) 		\
+static int add_##_name##_event(struct ctf_writer *cw)		\
+{								\
+	struct bt_ctf_event_class *event_class;			\
+	int ret;						\
+								\
+	pr("Adding "#_name" event\n");				\
+	event_class = bt_ctf_event_class_create("perf_" #_name);\
+	if (!event_class)					\
+		return -1;					\
+	body							\
+								\
+	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);\
+	if (ret) {						\
+		pr("Failed to add event class '"#_name"' into stream.\n");\
+		return ret;					\
+	}							\
+								\
+	cw->_name##_class = event_class;			\
+	bt_ctf_event_class_put(event_class);			\
+	return 0;						\
+}
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(comm,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(string, comm);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(fork,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, ppid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u32, ptid);
+	__NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(exit,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, ppid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u32, ptid);
+	__NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u64_hex, start);
+	__NON_SAMPLE_ADD_FIELD(string, filename);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap2,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u64_hex, start);
+	__NON_SAMPLE_ADD_FIELD(string, filename);
+)
+#undef __NON_SAMPLE_ADD_FIELD
+#undef __FUNC_ADD_NON_SAMPLE_EVENT_CLASS
+
+static int setup_non_sample_events(struct ctf_writer *cw,
+				   struct perf_session *session __maybe_unused)
+{
+	int ret;
+
+	ret = add_comm_event(cw);
+	if (ret)
+		return ret;
+	ret = add_exit_event(cw);
+	if (ret)
+		return ret;
+	ret = add_fork_event(cw);
+	if (ret)
+		return ret;
+	ret = add_mmap_event(cw);
+	if (ret)
+		return ret;
+	ret = add_mmap2_event(cw);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+static void cleanup_events(struct perf_session *session)
+{
+	struct evlist *evlist = session->evlist;
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		struct evsel_priv *priv;
+
+		priv = evsel->priv;
+		bt_ctf_event_class_put(priv->event_class);
+		zfree(&evsel->priv);
+	}
+
+	evlist__delete(evlist);
+	session->evlist = NULL;
+}
+
+static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
+{
+	struct ctf_stream **stream;
+	struct perf_header *ph = &session->header;
+	int ncpus;
+
+	/*
+	 * Try to get the number of cpus used in the data file,
+	 * if not present fallback to the MAX_CPUS.
+	 */
+	ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+
+	stream = zalloc(sizeof(*stream) * ncpus);
+	if (!stream) {
+		pr_err("Failed to allocate streams.\n");
+		return -ENOMEM;
+	}
+
+	cw->stream     = stream;
+	cw->stream_cnt = ncpus;
+	return 0;
+}
+
+static void free_streams(struct ctf_writer *cw)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < cw->stream_cnt; cpu++)
+		ctf_stream__delete(cw->stream[cpu]);
+
+	zfree(&cw->stream);
+}
+
+static int ctf_writer__setup_env(struct ctf_writer *cw,
+				 struct perf_session *session)
+{
+	struct perf_header *header = &session->header;
+	struct bt_ctf_writer *writer = cw->writer;
+
+#define ADD(__n, __v)							\
+do {									\
+	if (bt_ctf_writer_add_environment_field(writer, __n, __v))	\
+		return -1;						\
+} while (0)
+
+	ADD("host",    header->env.hostname);
+	ADD("sysname", "Linux");
+	ADD("release", header->env.os_release);
+	ADD("version", header->env.version);
+	ADD("machine", header->env.arch);
+	ADD("domain", "kernel");
+	ADD("tracer_name", "perf");
+
+#undef ADD
+	return 0;
+}
+
+static int ctf_writer__setup_clock(struct ctf_writer *cw,
+				   struct perf_session *session,
+				   bool tod)
+{
+	struct bt_ctf_clock *clock = cw->clock;
+	const char *desc = "perf clock";
+	int64_t offset = 0;
+
+	if (tod) {
+		struct perf_env *env = &session->header.env;
+
+		if (!env->clock.enabled) {
+			pr_err("Can't provide --tod time, missing clock data. "
+			       "Please record with -k/--clockid option.\n");
+			return -1;
+		}
+
+		desc   = clockid_name(env->clock.clockid);
+		offset = env->clock.tod_ns - env->clock.clockid_ns;
+	}
+
+#define SET(__n, __v)				\
+do {						\
+	if (bt_ctf_clock_set_##__n(clock, __v))	\
+		return -1;			\
+} while (0)
+
+	SET(frequency,   1000000000);
+	SET(offset,      offset);
+	SET(description, desc);
+	SET(precision,   10);
+	SET(is_absolute, 0);
+
+#undef SET
+	return 0;
+}
+
+static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
+{
+	struct bt_ctf_field_type *type;
+
+	type = bt_ctf_field_type_integer_create(size);
+	if (!type)
+		return NULL;
+
+	if (sign &&
+	    bt_ctf_field_type_integer_set_signed(type, 1))
+		goto err;
+
+	if (hex &&
+	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
+		goto err;
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
+	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
+	    size, sign ? "un" : "", hex ? "hex" : "");
+	return type;
+
+err:
+	bt_ctf_field_type_put(type);
+	return NULL;
+}
+
+static void ctf_writer__cleanup_data(struct ctf_writer *cw)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(cw->data.array); i++)
+		bt_ctf_field_type_put(cw->data.array[i]);
+}
+
+static int ctf_writer__init_data(struct ctf_writer *cw)
+{
+#define CREATE_INT_TYPE(type, size, sign, hex)		\
+do {							\
+	(type) = create_int_type(size, sign, hex);	\
+	if (!(type))					\
+		goto err;				\
+} while (0)
+
+	CREATE_INT_TYPE(cw->data.s64, 64, true,  false);
+	CREATE_INT_TYPE(cw->data.u64, 64, false, false);
+	CREATE_INT_TYPE(cw->data.s32, 32, true,  false);
+	CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+	CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
+	CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);
+
+	cw->data.string  = bt_ctf_field_type_string_create();
+	if (cw->data.string)
+		return 0;
+
+err:
+	ctf_writer__cleanup_data(cw);
+	pr_err("Failed to create data types.\n");
+	return -1;
+}
+
+static void ctf_writer__cleanup(struct ctf_writer *cw)
+{
+	ctf_writer__cleanup_data(cw);
+
+	bt_ctf_clock_put(cw->clock);
+	free_streams(cw);
+	bt_ctf_stream_class_put(cw->stream_class);
+	bt_ctf_writer_put(cw->writer);
+
+	/* and NULL all the pointers */
+	memset(cw, 0, sizeof(*cw));
+}
+
+static int ctf_writer__init(struct ctf_writer *cw, const char *path,
+			    struct perf_session *session, bool tod)
+{
+	struct bt_ctf_writer		*writer;
+	struct bt_ctf_stream_class	*stream_class;
+	struct bt_ctf_clock		*clock;
+	struct bt_ctf_field_type	*pkt_ctx_type;
+	int				ret;
+
+	/* CTF writer */
+	writer = bt_ctf_writer_create(path);
+	if (!writer)
+		goto err;
+
+	cw->writer = writer;
+
+	/* CTF clock */
+	clock = bt_ctf_clock_create("perf_clock");
+	if (!clock) {
+		pr("Failed to create CTF clock.\n");
+		goto err_cleanup;
+	}
+
+	cw->clock = clock;
+
+	if (ctf_writer__setup_clock(cw, session, tod)) {
+		pr("Failed to setup CTF clock.\n");
+		goto err_cleanup;
+	}
+
+	/* CTF stream class */
+	stream_class = bt_ctf_stream_class_create("perf_stream");
+	if (!stream_class) {
+		pr("Failed to create CTF stream class.\n");
+		goto err_cleanup;
+	}
+
+	cw->stream_class = stream_class;
+
+	/* CTF clock stream setup */
+	if (bt_ctf_stream_class_set_clock(stream_class, clock)) {
+		pr("Failed to assign CTF clock to stream class.\n");
+		goto err_cleanup;
+	}
+
+	if (ctf_writer__init_data(cw))
+		goto err_cleanup;
+
+	/* Add cpu_id for packet context */
+	pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
+	if (!pkt_ctx_type)
+		goto err_cleanup;
+
+	ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
+	bt_ctf_field_type_put(pkt_ctx_type);
+	if (ret)
+		goto err_cleanup;
+
+	/* CTF clock writer setup */
+	if (bt_ctf_writer_add_clock(writer, clock)) {
+		pr("Failed to assign CTF clock to writer.\n");
+		goto err_cleanup;
+	}
+
+	return 0;
+
+err_cleanup:
+	ctf_writer__cleanup(cw);
+err:
+	pr_err("Failed to setup CTF writer.\n");
+	return -1;
+}
+
+static int ctf_writer__flush_streams(struct ctf_writer *cw)
+{
+	int cpu, ret = 0;
+
+	for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
+		ret = ctf_stream__flush(cw->stream[cpu]);
+
+	return ret;
+}
+
+static int convert__config(const char *var, const char *value, void *cb)
+{
+	struct convert *c = cb;
+
+	if (!strcmp(var, "convert.queue-size"))
+		return perf_config_u64(&c->queue_size, var, value);
+
+	return 0;
+}
+
+int bt_convert__perf2ctf(const char *input, const char *path,
+			 struct perf_data_convert_opts *opts)
+{
+	struct perf_session *session;
+	struct perf_data data = {
+		.path	   = input,
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = opts->force,
+	};
+	struct convert c = {
+		.tool = {
+			.sample          = process_sample_event,
+			.mmap            = perf_event__process_mmap,
+			.mmap2           = perf_event__process_mmap2,
+			.comm            = perf_event__process_comm,
+			.exit            = perf_event__process_exit,
+			.fork            = perf_event__process_fork,
+			.lost            = perf_event__process_lost,
+			.tracing_data    = perf_event__process_tracing_data,
+			.build_id        = perf_event__process_build_id,
+			.namespaces      = perf_event__process_namespaces,
+			.ordered_events  = true,
+			.ordering_requires_timestamps = true,
+		},
+	};
+	struct ctf_writer *cw = &c.writer;
+	int err;
+
+	if (opts->all) {
+		c.tool.comm = process_comm_event;
+		c.tool.exit = process_exit_event;
+		c.tool.fork = process_fork_event;
+		c.tool.mmap = process_mmap_event;
+		c.tool.mmap2 = process_mmap2_event;
+	}
+
+	err = perf_config(convert__config, &c);
+	if (err)
+		return err;
+
+	err = -1;
+	/* perf.data session */
+	session = perf_session__new(&data, &c.tool);
+	if (IS_ERR(session))
+		return PTR_ERR(session);
+
+	/* CTF writer */
+	if (ctf_writer__init(cw, path, session, opts->tod))
+		goto free_session;
+
+	if (c.queue_size) {
+		ordered_events__set_alloc_size(&session->ordered_events,
+					       c.queue_size);
+	}
+
+	/* CTF writer env/clock setup  */
+	if (ctf_writer__setup_env(cw, session))
+		goto free_writer;
+
+	/* CTF events setup */
+	if (setup_events(cw, session))
+		goto free_writer;
+
+	if (opts->all && setup_non_sample_events(cw, session))
+		goto free_writer;
+
+	if (setup_streams(cw, session))
+		goto free_writer;
+
+	err = perf_session__process_events(session);
+	if (!err)
+		err = ctf_writer__flush_streams(cw);
+	else
+		pr_err("Error during conversion.\n");
+
+	fprintf(stderr,
+		"[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
+		data.path, path);
+
+	fprintf(stderr,
+		"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
+		(double) c.events_size / 1024.0 / 1024.0,
+		c.events_count);
+
+	if (!c.non_sample_count)
+		fprintf(stderr, ") ]\n");
+	else
+		fprintf(stderr, ", %" PRIu64 " non-samples) ]\n", c.non_sample_count);
+
+	cleanup_events(session);
+	perf_session__delete(session);
+	ctf_writer__cleanup(cw);
+
+	return err;
+
+free_writer:
+	ctf_writer__cleanup(cw);
+free_session:
+	perf_session__delete(session);
+	pr_err("Error during conversion setup.\n");
+	return err;
+}
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
new file mode 100644
index 000000000..613d6ae82
--- /dev/null
+++ b/tools/perf/util/data-convert-json.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * JSON export.
+ *
+ * Copyright (C) 2021, CodeWeavers Inc. <nfraser@codeweavers.com>
+ */
+
+#include "data-convert.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "linux/compiler.h"
+#include "linux/err.h"
+#include "util/auxtrace.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/header.h"
+#include "util/map.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/tool.h"
+
+struct convert_json {
+	struct perf_tool tool;
+	FILE *out;
+	bool first;
+	u64 events_count;
+};
+
+// Outputs a JSON-encoded string surrounded by quotes with characters escaped.
+static void output_json_string(FILE *out, const char *s)
+{
+	fputc('"', out);
+	while (*s) {
+		switch (*s) {
+
+		// required escapes with special forms as per RFC 8259
+		case '"':  fputs("\\\"", out); break;
+		case '\\': fputs("\\\\", out); break;
+		case '\b': fputs("\\b", out);  break;
+		case '\f': fputs("\\f", out);  break;
+		case '\n': fputs("\\n", out);  break;
+		case '\r': fputs("\\r", out);  break;
+		case '\t': fputs("\\t", out);  break;
+
+		default:
+			// all other control characters must be escaped by hex code
+			if (*s <= 0x1f)
+				fprintf(out, "\\u%04x", *s);
+			else
+				fputc(*s, out);
+			break;
+		}
+
+		++s;
+	}
+	fputc('"', out);
+}
+
+// Outputs an optional comma, newline and indentation to delimit a new value
+// from the previous one in a JSON object or array.
+static void output_json_delimiters(FILE *out, bool comma, int depth)
+{
+	int i;
+
+	if (comma)
+		fputc(',', out);
+	fputc('\n', out);
+	for (i = 0; i < depth; ++i)
+		fputc('\t', out);
+}
+
+// Outputs a printf format string (with delimiter) as a JSON value.
+__printf(4, 5)
+static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
+{
+	va_list args;
+
+	output_json_delimiters(out, comma, depth);
+	va_start(args, format);
+	vfprintf(out,  format, args);
+	va_end(args);
+}
+
+// Outputs a JSON key-value pair where the value is a string.
+static void output_json_key_string(FILE *out, bool comma, int depth,
+		const char *key, const char *value)
+{
+	output_json_delimiters(out, comma, depth);
+	output_json_string(out, key);
+	fputs(": ", out);
+	output_json_string(out, value);
+}
+
+// Outputs a JSON key-value pair where the value is a printf format string.
+__printf(5, 6)
+static void output_json_key_format(FILE *out, bool comma, int depth,
+		const char *key, const char *format, ...)
+{
+	va_list args;
+
+	output_json_delimiters(out, comma, depth);
+	output_json_string(out, key);
+	fputs(": ", out);
+	va_start(args, format);
+	vfprintf(out,  format, args);
+	va_end(args);
+}
+
+static void output_sample_callchain_entry(struct perf_tool *tool,
+		u64 ip, struct addr_location *al)
+{
+	struct convert_json *c = container_of(tool, struct convert_json, tool);
+	FILE *out = c->out;
+
+	output_json_format(out, false, 4, "{");
+	output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
+
+	if (al && al->sym && al->sym->namelen) {
+		fputc(',', out);
+		output_json_key_string(out, false, 5, "symbol", al->sym->name);
+
+		if (al->map && al->map->dso) {
+			const char *dso = al->map->dso->short_name;
+
+			if (dso && strlen(dso) > 0) {
+				fputc(',', out);
+				output_json_key_string(out, false, 5, "dso", dso);
+			}
+		}
+	}
+
+	output_json_format(out, false, 4, "}");
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample,
+				struct evsel *evsel __maybe_unused,
+				struct machine *machine)
+{
+	struct convert_json *c = container_of(tool, struct convert_json, tool);
+	FILE *out = c->out;
+	struct addr_location al, tal;
+	u64 sample_type = __evlist__combined_sample_type(evsel->evlist);
+	u8 cpumode = PERF_RECORD_MISC_USER;
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_err("Sample resolution failed!\n");
+		return -1;
+	}
+
+	++c->events_count;
+
+	if (c->first)
+		c->first = false;
+	else
+		fputc(',', out);
+	output_json_format(out, false, 2, "{");
+
+	output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
+	output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_);
+	output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid);
+
+	if ((sample_type & PERF_SAMPLE_CPU))
+		output_json_key_format(out, true, 3, "cpu", "%i", sample->cpu);
+	else if (al.thread->cpu >= 0)
+		output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu);
+
+	output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
+
+	output_json_key_format(out, true, 3, "callchain", "[");
+	if (sample->callchain) {
+		unsigned int i;
+		bool ok;
+		bool first_callchain = true;
+
+		for (i = 0; i < sample->callchain->nr; ++i) {
+			u64 ip = sample->callchain->ips[i];
+
+			if (ip >= PERF_CONTEXT_MAX) {
+				switch (ip) {
+				case PERF_CONTEXT_HV:
+					cpumode = PERF_RECORD_MISC_HYPERVISOR;
+					break;
+				case PERF_CONTEXT_KERNEL:
+					cpumode = PERF_RECORD_MISC_KERNEL;
+					break;
+				case PERF_CONTEXT_USER:
+					cpumode = PERF_RECORD_MISC_USER;
+					break;
+				default:
+					pr_debug("invalid callchain context: %"
+							PRId64 "\n", (s64) ip);
+					break;
+				}
+				continue;
+			}
+
+			if (first_callchain)
+				first_callchain = false;
+			else
+				fputc(',', out);
+
+			ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
+			output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
+		}
+	} else {
+		output_sample_callchain_entry(tool, sample->ip, &al);
+	}
+	output_json_format(out, false, 3, "]");
+
+	output_json_format(out, false, 2, "}");
+	return 0;
+}
+
+static void output_headers(struct perf_session *session, struct convert_json *c)
+{
+	struct stat st;
+	struct perf_header *header = &session->header;
+	int ret;
+	int fd = perf_data__fd(session->data);
+	int i;
+	FILE *out = c->out;
+
+	output_json_key_format(out, false, 2, "header-version", "%u", header->version);
+
+	ret = fstat(fd, &st);
+	if (ret >= 0) {
+		time_t stctime = st.st_mtime;
+		char buf[256];
+
+		strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
+		output_json_key_string(out, true, 2, "captured-on", buf);
+	} else {
+		pr_debug("Failed to get mtime of source file, not writing captured-on");
+	}
+
+	output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
+	output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
+	output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
+
+	output_json_key_string(out, true, 2, "hostname", header->env.hostname);
+	output_json_key_string(out, true, 2, "os-release", header->env.os_release);
+	output_json_key_string(out, true, 2, "arch", header->env.arch);
+
+	output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+	output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
+	output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
+	output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
+
+	if (header->env.clock.enabled) {
+		output_json_key_format(out, true, 2, "clockid",
+				"%u", header->env.clock.clockid);
+		output_json_key_format(out, true, 2, "clock-time",
+				"%" PRIu64, header->env.clock.clockid_ns);
+		output_json_key_format(out, true, 2, "real-time",
+				"%" PRIu64, header->env.clock.tod_ns);
+	}
+
+	output_json_key_string(out, true, 2, "perf-version", header->env.version);
+
+	output_json_key_format(out, true, 2, "cmdline", "[");
+	for (i = 0; i < header->env.nr_cmdline; i++) {
+		output_json_delimiters(out, i != 0, 3);
+		output_json_string(c->out, header->env.cmdline_argv[i]);
+	}
+	output_json_format(out, false, 2, "]");
+}
+
+int bt_convert__perf2json(const char *input_name, const char *output_name,
+		struct perf_data_convert_opts *opts __maybe_unused)
+{
+	struct perf_session *session;
+	int fd;
+	int ret = -1;
+
+	struct convert_json c = {
+		.tool = {
+			.sample         = process_sample_event,
+			.mmap           = perf_event__process_mmap,
+			.mmap2          = perf_event__process_mmap2,
+			.comm           = perf_event__process_comm,
+			.namespaces     = perf_event__process_namespaces,
+			.cgroup         = perf_event__process_cgroup,
+			.exit           = perf_event__process_exit,
+			.fork           = perf_event__process_fork,
+			.lost           = perf_event__process_lost,
+			.tracing_data   = perf_event__process_tracing_data,
+			.build_id       = perf_event__process_build_id,
+			.id_index       = perf_event__process_id_index,
+			.auxtrace_info  = perf_event__process_auxtrace_info,
+			.auxtrace       = perf_event__process_auxtrace,
+			.event_update   = perf_event__process_event_update,
+			.ordered_events = true,
+			.ordering_requires_timestamps = true,
+		},
+		.first = true,
+		.events_count = 0,
+	};
+
+	struct perf_data data = {
+		.mode = PERF_DATA_MODE_READ,
+		.path = input_name,
+		.force = opts->force,
+	};
+
+	if (opts->all) {
+		pr_err("--all is currently unsupported for JSON output.\n");
+		goto err;
+	}
+	if (opts->tod) {
+		pr_err("--tod is currently unsupported for JSON output.\n");
+		goto err;
+	}
+
+	fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
+	if (fd == -1) {
+		if (errno == EEXIST)
+			pr_err("Output file exists. Use --force to overwrite it.\n");
+		else
+			pr_err("Error opening output file!\n");
+		goto err;
+	}
+
+	c.out = fdopen(fd, "w");
+	if (!c.out) {
+		fprintf(stderr, "Error opening output file!\n");
+		close(fd);
+		goto err;
+	}
+
+	session = perf_session__new(&data, &c.tool);
+	if (IS_ERR(session)) {
+		fprintf(stderr, "Error creating perf session!\n");
+		goto err_fclose;
+	}
+
+	if (symbol__init(&session->header.env) < 0) {
+		fprintf(stderr, "Symbol init error!\n");
+		goto err_session_delete;
+	}
+
+	// The opening brace is printed manually because it isn't delimited from a
+	// previous value (i.e. we don't want a leading newline)
+	fputc('{', c.out);
+
+	// Version number for future-proofing. Most additions should be able to be
+	// done in a backwards-compatible way so this should only need to be bumped
+	// if some major breaking change must be made.
+	output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
+
+	// Output headers
+	output_json_format(c.out, true, 1, "\"headers\": {");
+	output_headers(session, &c);
+	output_json_format(c.out, false, 1, "}");
+
+	// Output samples
+	output_json_format(c.out, true, 1, "\"samples\": [");
+	perf_session__process_events(session);
+	output_json_format(c.out, false, 1, "]");
+	output_json_format(c.out, false, 0, "}");
+	fputc('\n', c.out);
+
+	fprintf(stderr,
+			"[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
+			data.path, output_name);
+
+	fprintf(stderr,
+			"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
+			(ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
+
+	ret = 0;
+err_session_delete:
+	perf_session__delete(session);
+err_fclose:
+	fclose(c.out);
+err:
+	return ret;
+}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
new file mode 100644
index 000000000..1b4c5f598
--- /dev/null
+++ b/tools/perf/util/data-convert.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DATA_CONVERT_H
+#define __DATA_CONVERT_H
+
+#include <stdbool.h>
+
+struct perf_data_convert_opts {
+	bool force;
+	bool all;
+	bool tod;
+};
+
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+			 struct perf_data_convert_opts *opts);
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+
+int bt_convert__perf2json(const char *input_name, const char *to_ctf,
+			 struct perf_data_convert_opts *opts);
+
+#endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
new file mode 100644
index 000000000..fc16299c9
--- /dev/null
+++ b/tools/perf/util/data.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <asm/bug.h>
+#include <dirent.h>
+
+#include "data.h"
+#include "util.h" // rm_rf_perf_data()
+#include "debug.h"
+#include "header.h"
+#include <internal/lib.h>
+
+static void close_dir(struct perf_data_file *files, int nr)
+{
+	while (--nr >= 0) {
+		close(files[nr].fd);
+		zfree(&files[nr].path);
+	}
+	free(files);
+}
+
+void perf_data__close_dir(struct perf_data *data)
+{
+	close_dir(data->dir.files, data->dir.nr);
+}
+
+int perf_data__create_dir(struct perf_data *data, int nr)
+{
+	struct perf_data_file *files = NULL;
+	int i, ret;
+
+	if (WARN_ON(!data->is_dir))
+		return -EINVAL;
+
+	files = zalloc(nr * sizeof(*files));
+	if (!files)
+		return -ENOMEM;
+
+	for (i = 0; i < nr; i++) {
+		struct perf_data_file *file = &files[i];
+
+		ret = asprintf(&file->path, "%s/data.%d", data->path, i);
+		if (ret < 0) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+
+		ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
+		if (ret < 0) {
+			ret = -errno;
+			goto out_err;
+		}
+
+		file->fd = ret;
+	}
+
+	data->dir.version = PERF_DIR_VERSION;
+	data->dir.files   = files;
+	data->dir.nr      = nr;
+	return 0;
+
+out_err:
+	close_dir(files, i);
+	return ret;
+}
+
+int perf_data__open_dir(struct perf_data *data)
+{
+	struct perf_data_file *files = NULL;
+	struct dirent *dent;
+	int ret = -1;
+	DIR *dir;
+	int nr = 0;
+
+	/*
+	 * Directory containing a single regular perf data file which is already
+	 * open, means there is nothing more to do here.
+	 */
+	if (perf_data__is_single_file(data))
+		return 0;
+
+	if (WARN_ON(!data->is_dir))
+		return -EINVAL;
+
+	/* The version is provided by DIR_FORMAT feature. */
+	if (WARN_ON(data->dir.version != PERF_DIR_VERSION))
+		return -1;
+
+	dir = opendir(data->path);
+	if (!dir)
+		return -EINVAL;
+
+	while ((dent = readdir(dir)) != NULL) {
+		struct perf_data_file *file;
+		char path[PATH_MAX];
+		struct stat st;
+
+		snprintf(path, sizeof(path), "%s/%s", data->path, dent->d_name);
+		if (stat(path, &st))
+			continue;
+
+		if (!S_ISREG(st.st_mode) || strncmp(dent->d_name, "data.", 5))
+			continue;
+
+		ret = -ENOMEM;
+
+		file = realloc(files, (nr + 1) * sizeof(*files));
+		if (!file)
+			goto out_err;
+
+		files = file;
+		file = &files[nr++];
+
+		file->path = strdup(path);
+		if (!file->path)
+			goto out_err;
+
+		ret = open(file->path, O_RDONLY);
+		if (ret < 0)
+			goto out_err;
+
+		file->fd = ret;
+		file->size = st.st_size;
+	}
+
+	closedir(dir);
+	if (!files)
+		return -EINVAL;
+
+	data->dir.files = files;
+	data->dir.nr    = nr;
+	return 0;
+
+out_err:
+	closedir(dir);
+	close_dir(files, nr);
+	return ret;
+}
+
+int perf_data__update_dir(struct perf_data *data)
+{
+	int i;
+
+	if (WARN_ON(!data->is_dir))
+		return -EINVAL;
+
+	for (i = 0; i < data->dir.nr; i++) {
+		struct perf_data_file *file = &data->dir.files[i];
+		struct stat st;
+
+		if (fstat(file->fd, &st))
+			return -1;
+
+		file->size = st.st_size;
+	}
+
+	return 0;
+}
+
+static bool check_pipe(struct perf_data *data)
+{
+	struct stat st;
+	bool is_pipe = false;
+	int fd = perf_data__is_read(data) ?
+		 STDIN_FILENO : STDOUT_FILENO;
+
+	if (!data->path) {
+		if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
+			is_pipe = true;
+	} else {
+		if (!strcmp(data->path, "-"))
+			is_pipe = true;
+	}
+
+	if (is_pipe) {
+		if (data->use_stdio) {
+			const char *mode;
+
+			mode = perf_data__is_read(data) ? "r" : "w";
+			data->file.fptr = fdopen(fd, mode);
+
+			if (data->file.fptr == NULL) {
+				data->file.fd = fd;
+				data->use_stdio = false;
+			}
+		} else {
+			data->file.fd = fd;
+		}
+	}
+
+	return data->is_pipe = is_pipe;
+}
+
+static int check_backup(struct perf_data *data)
+{
+	struct stat st;
+
+	if (perf_data__is_read(data))
+		return 0;
+
+	if (!stat(data->path, &st) && st.st_size) {
+		char oldname[PATH_MAX];
+		int ret;
+
+		snprintf(oldname, sizeof(oldname), "%s.old",
+			 data->path);
+
+		ret = rm_rf_perf_data(oldname);
+		if (ret) {
+			pr_err("Can't remove old data: %s (%s)\n",
+			       ret == -2 ?
+			       "Unknown file found" : strerror(errno),
+			       oldname);
+			return -1;
+		}
+
+		if (rename(data->path, oldname)) {
+			pr_err("Can't move data: %s (%s to %s)\n",
+			       strerror(errno),
+			       data->path, oldname);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static bool is_dir(struct perf_data *data)
+{
+	struct stat st;
+
+	if (stat(data->path, &st))
+		return false;
+
+	return (st.st_mode & S_IFMT) == S_IFDIR;
+}
+
+static int open_file_read(struct perf_data *data)
+{
+	int flags = data->in_place_update ? O_RDWR : O_RDONLY;
+	struct stat st;
+	int fd;
+	char sbuf[STRERR_BUFSIZE];
+
+	fd = open(data->file.path, flags);
+	if (fd < 0) {
+		int err = errno;
+
+		pr_err("failed to open %s: %s", data->file.path,
+			str_error_r(err, sbuf, sizeof(sbuf)));
+		if (err == ENOENT && !strcmp(data->file.path, "perf.data"))
+			pr_err("  (try 'perf record' first)");
+		pr_err("\n");
+		return -err;
+	}
+
+	if (fstat(fd, &st) < 0)
+		goto out_close;
+
+	if (!data->force && st.st_uid && (st.st_uid != geteuid())) {
+		pr_err("File %s not owned by current user or root (use -f to override)\n",
+		       data->file.path);
+		goto out_close;
+	}
+
+	if (!st.st_size) {
+		pr_info("zero-sized data (%s), nothing to do!\n",
+			data->file.path);
+		goto out_close;
+	}
+
+	data->file.size = st.st_size;
+	return fd;
+
+ out_close:
+	close(fd);
+	return -1;
+}
+
+static int open_file_write(struct perf_data *data)
+{
+	int fd;
+	char sbuf[STRERR_BUFSIZE];
+
+	fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
+		  S_IRUSR|S_IWUSR);
+
+	if (fd < 0)
+		pr_err("failed to open %s : %s\n", data->file.path,
+			str_error_r(errno, sbuf, sizeof(sbuf)));
+
+	return fd;
+}
+
+static int open_file(struct perf_data *data)
+{
+	int fd;
+
+	fd = perf_data__is_read(data) ?
+	     open_file_read(data) : open_file_write(data);
+
+	if (fd < 0) {
+		zfree(&data->file.path);
+		return -1;
+	}
+
+	data->file.fd = fd;
+	return 0;
+}
+
+static int open_file_dup(struct perf_data *data)
+{
+	data->file.path = strdup(data->path);
+	if (!data->file.path)
+		return -ENOMEM;
+
+	return open_file(data);
+}
+
+static int open_dir(struct perf_data *data)
+{
+	int ret;
+
+	/*
+	 * So far we open only the header, so we can read the data version and
+	 * layout.
+	 */
+	if (asprintf(&data->file.path, "%s/data", data->path) < 0)
+		return -1;
+
+	if (perf_data__is_write(data) &&
+	    mkdir(data->path, S_IRWXU) < 0)
+		return -1;
+
+	ret = open_file(data);
+
+	/* Cleanup whatever we managed to create so far. */
+	if (ret && perf_data__is_write(data))
+		rm_rf_perf_data(data->path);
+
+	return ret;
+}
+
+int perf_data__open(struct perf_data *data)
+{
+	if (check_pipe(data))
+		return 0;
+
+	/* currently it allows stdio for pipe only */
+	data->use_stdio = false;
+
+	if (!data->path)
+		data->path = "perf.data";
+
+	if (check_backup(data))
+		return -1;
+
+	if (perf_data__is_read(data))
+		data->is_dir = is_dir(data);
+
+	return perf_data__is_dir(data) ?
+	       open_dir(data) : open_file_dup(data);
+}
+
+void perf_data__close(struct perf_data *data)
+{
+	if (perf_data__is_dir(data))
+		perf_data__close_dir(data);
+
+	zfree(&data->file.path);
+
+	if (data->use_stdio)
+		fclose(data->file.fptr);
+	else
+		close(data->file.fd);
+}
+
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
+{
+	if (data->use_stdio) {
+		if (fread(buf, size, 1, data->file.fptr) == 1)
+			return size;
+		return feof(data->file.fptr) ? 0 : -1;
+	}
+	return readn(data->file.fd, buf, size);
+}
+
+ssize_t perf_data_file__write(struct perf_data_file *file,
+			      void *buf, size_t size)
+{
+	return writen(file->fd, buf, size);
+}
+
+ssize_t perf_data__write(struct perf_data *data,
+			      void *buf, size_t size)
+{
+	if (data->use_stdio) {
+		if (fwrite(buf, size, 1, data->file.fptr) == 1)
+			return size;
+		return -1;
+	}
+	return perf_data_file__write(&data->file, buf, size);
+}
+
+int perf_data__switch(struct perf_data *data,
+			   const char *postfix,
+			   size_t pos, bool at_exit,
+			   char **new_filepath)
+{
+	int ret;
+
+	if (check_pipe(data))
+		return -EINVAL;
+	if (perf_data__is_read(data))
+		return -EINVAL;
+
+	if (asprintf(new_filepath, "%s.%s", data->path, postfix) < 0)
+		return -ENOMEM;
+
+	/*
+	 * Only fire a warning, don't return error, continue fill
+	 * original file.
+	 */
+	if (rename(data->path, *new_filepath))
+		pr_warning("Failed to rename %s to %s\n", data->path, *new_filepath);
+
+	if (!at_exit) {
+		close(data->file.fd);
+		ret = perf_data__open(data);
+		if (ret < 0)
+			goto out;
+
+		if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) {
+			ret = -errno;
+			pr_debug("Failed to lseek to %zu: %s",
+				 pos, strerror(errno));
+			goto out;
+		}
+	}
+	ret = data->file.fd;
+out:
+	return ret;
+}
+
+unsigned long perf_data__size(struct perf_data *data)
+{
+	u64 size = data->file.size;
+	int i;
+
+	if (perf_data__is_single_file(data))
+		return size;
+
+	for (i = 0; i < data->dir.nr; i++) {
+		struct perf_data_file *file = &data->dir.files[i];
+
+		size += file->size;
+	}
+
+	return size;
+}
+
+int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz)
+{
+	int ret;
+
+	if (!data->is_dir)
+		return -1;
+
+	ret = snprintf(buf, buf_sz, "%s/kcore_dir", data->path);
+	if (ret < 0 || (size_t)ret >= buf_sz)
+		return -1;
+
+	return mkdir(buf, S_IRWXU);
+}
+
+bool has_kcore_dir(const char *path)
+{
+	struct dirent *d = ERR_PTR(-EINVAL);
+	const char *name = "kcore_dir";
+	DIR *dir = opendir(path);
+	size_t n = strlen(name);
+	bool result = false;
+
+	if (dir) {
+		while (d && !result) {
+			d = readdir(dir);
+			result = d ? strncmp(d->d_name, name, n) : false;
+		}
+		closedir(dir);
+	}
+
+	return result;
+}
+
+char *perf_data__kallsyms_name(struct perf_data *data)
+{
+	char *kallsyms_name;
+	struct stat st;
+
+	if (!data->is_dir)
+		return NULL;
+
+	if (asprintf(&kallsyms_name, "%s/kcore_dir/kallsyms", data->path) < 0)
+		return NULL;
+
+	if (stat(kallsyms_name, &st)) {
+		free(kallsyms_name);
+		return NULL;
+	}
+
+	return kallsyms_name;
+}
+
+char *perf_data__guest_kallsyms_name(struct perf_data *data, pid_t machine_pid)
+{
+	char *kallsyms_name;
+	struct stat st;
+
+	if (!data->is_dir)
+		return NULL;
+
+	if (asprintf(&kallsyms_name, "%s/kcore_dir__%d/kallsyms", data->path, machine_pid) < 0)
+		return NULL;
+
+	if (stat(kallsyms_name, &st)) {
+		free(kallsyms_name);
+		return NULL;
+	}
+
+	return kallsyms_name;
+}
+
+bool is_perf_data(const char *path)
+{
+	bool ret = false;
+	FILE *file;
+	u64 magic;
+
+	file = fopen(path, "r");
+	if (!file)
+		return false;
+
+	if (fread(&magic, 1, 8, file) < 8)
+		goto out;
+
+	ret = is_perf_magic(magic);
+out:
+	fclose(file);
+	return ret;
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
new file mode 100644
index 000000000..effcc195d
--- /dev/null
+++ b/tools/perf/util/data.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DATA_H
+#define __PERF_DATA_H
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <linux/types.h>
+
+enum perf_data_mode {
+	PERF_DATA_MODE_WRITE,
+	PERF_DATA_MODE_READ,
+};
+
+enum perf_dir_version {
+	PERF_DIR_SINGLE_FILE	= 0,
+	PERF_DIR_VERSION	= 1,
+};
+
+struct perf_data_file {
+	char		*path;
+	union {
+		int	 fd;
+		FILE	*fptr;
+	};
+	unsigned long	 size;
+};
+
+struct perf_data {
+	const char		*path;
+	struct perf_data_file	 file;
+	bool			 is_pipe;
+	bool			 is_dir;
+	bool			 force;
+	bool			 use_stdio;
+	bool			 in_place_update;
+	enum perf_data_mode	 mode;
+
+	struct {
+		u64			 version;
+		struct perf_data_file	*files;
+		int			 nr;
+	} dir;
+};
+
+static inline bool perf_data__is_read(struct perf_data *data)
+{
+	return data->mode == PERF_DATA_MODE_READ;
+}
+
+static inline bool perf_data__is_write(struct perf_data *data)
+{
+	return data->mode == PERF_DATA_MODE_WRITE;
+}
+
+static inline int perf_data__is_pipe(struct perf_data *data)
+{
+	return data->is_pipe;
+}
+
+static inline bool perf_data__is_dir(struct perf_data *data)
+{
+	return data->is_dir;
+}
+
+static inline bool perf_data__is_single_file(struct perf_data *data)
+{
+	return data->dir.version == PERF_DIR_SINGLE_FILE;
+}
+
+static inline int perf_data__fd(struct perf_data *data)
+{
+	if (data->use_stdio)
+		return fileno(data->file.fptr);
+
+	return data->file.fd;
+}
+
+int perf_data__open(struct perf_data *data);
+void perf_data__close(struct perf_data *data);
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
+ssize_t perf_data__write(struct perf_data *data,
+			      void *buf, size_t size);
+ssize_t perf_data_file__write(struct perf_data_file *file,
+			      void *buf, size_t size);
+/*
+ * If at_exit is set, only rename current perf.data to
+ * perf.data.<postfix>, continue write on original data.
+ * Set at_exit when flushing the last output.
+ *
+ * Return value is fd of new output.
+ */
+int perf_data__switch(struct perf_data *data,
+			   const char *postfix,
+			   size_t pos, bool at_exit, char **new_filepath);
+
+int perf_data__create_dir(struct perf_data *data, int nr);
+int perf_data__open_dir(struct perf_data *data);
+void perf_data__close_dir(struct perf_data *data);
+int perf_data__update_dir(struct perf_data *data);
+unsigned long perf_data__size(struct perf_data *data);
+int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
+bool has_kcore_dir(const char *path);
+char *perf_data__kallsyms_name(struct perf_data *data);
+char *perf_data__guest_kallsyms_name(struct perf_data *data, pid_t machine_pid);
+bool is_perf_data(const char *path);
+#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
new file mode 100644
index 000000000..e0d4f0883
--- /dev/null
+++ b/tools/perf/util/db-export.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * db-export.c: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "dso.h"
+#include "evsel.h"
+#include "machine.h"
+#include "thread.h"
+#include "comm.h"
+#include "symbol.h"
+#include "map.h"
+#include "event.h"
+#include "thread-stack.h"
+#include "callchain.h"
+#include "call-path.h"
+#include "db-export.h"
+#include <linux/zalloc.h>
+
+int db_export__init(struct db_export *dbe)
+{
+	memset(dbe, 0, sizeof(struct db_export));
+	return 0;
+}
+
+void db_export__exit(struct db_export *dbe)
+{
+	call_return_processor__free(dbe->crp);
+	dbe->crp = NULL;
+}
+
+int db_export__evsel(struct db_export *dbe, struct evsel *evsel)
+{
+	if (evsel->db_id)
+		return 0;
+
+	evsel->db_id = ++dbe->evsel_last_db_id;
+
+	if (dbe->export_evsel)
+		return dbe->export_evsel(dbe, evsel);
+
+	return 0;
+}
+
+int db_export__machine(struct db_export *dbe, struct machine *machine)
+{
+	if (machine->db_id)
+		return 0;
+
+	machine->db_id = ++dbe->machine_last_db_id;
+
+	if (dbe->export_machine)
+		return dbe->export_machine(dbe, machine);
+
+	return 0;
+}
+
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+		      struct machine *machine, struct thread *main_thread)
+{
+	u64 main_thread_db_id = 0;
+
+	if (thread->db_id)
+		return 0;
+
+	thread->db_id = ++dbe->thread_last_db_id;
+
+	if (main_thread)
+		main_thread_db_id = main_thread->db_id;
+
+	if (dbe->export_thread)
+		return dbe->export_thread(dbe, thread, main_thread_db_id,
+					  machine);
+
+	return 0;
+}
+
+static int __db_export__comm(struct db_export *dbe, struct comm *comm,
+			     struct thread *thread)
+{
+	comm->db_id = ++dbe->comm_last_db_id;
+
+	if (dbe->export_comm)
+		return dbe->export_comm(dbe, comm, thread);
+
+	return 0;
+}
+
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+		    struct thread *thread)
+{
+	if (comm->db_id)
+		return 0;
+
+	return __db_export__comm(dbe, comm, thread);
+}
+
+/*
+ * Export the "exec" comm. The "exec" comm is the program / application command
+ * name at the time it first executes. It is used to group threads for the same
+ * program. Note that the main thread pid (or thread group id tgid) cannot be
+ * used because it does not change when a new program is exec'ed.
+ */
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+			 struct thread *main_thread)
+{
+	int err;
+
+	if (comm->db_id)
+		return 0;
+
+	err = __db_export__comm(dbe, comm, main_thread);
+	if (err)
+		return err;
+
+	/*
+	 * Record the main thread for this comm. Note that the main thread can
+	 * have many "exec" comms because there will be a new one every time it
+	 * exec's. An "exec" comm however will only ever have 1 main thread.
+	 * That is different to any other threads for that same program because
+	 * exec() will effectively kill them, so the relationship between the
+	 * "exec" comm and non-main threads is 1-to-1. That is why
+	 * db_export__comm_thread() is called here for the main thread, but it
+	 * is called for non-main threads when they are exported.
+	 */
+	return db_export__comm_thread(dbe, comm, main_thread);
+}
+
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+			   struct thread *thread)
+{
+	u64 db_id;
+
+	db_id = ++dbe->comm_thread_last_db_id;
+
+	if (dbe->export_comm_thread)
+		return dbe->export_comm_thread(dbe, db_id, comm, thread);
+
+	return 0;
+}
+
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+		   struct machine *machine)
+{
+	if (dso->db_id)
+		return 0;
+
+	dso->db_id = ++dbe->dso_last_db_id;
+
+	if (dbe->export_dso)
+		return dbe->export_dso(dbe, dso, machine);
+
+	return 0;
+}
+
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+		      struct dso *dso)
+{
+	u64 *sym_db_id = symbol__priv(sym);
+
+	if (*sym_db_id)
+		return 0;
+
+	*sym_db_id = ++dbe->symbol_last_db_id;
+
+	if (dbe->export_symbol)
+		return dbe->export_symbol(dbe, sym, dso);
+
+	return 0;
+}
+
+static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
+			  u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
+{
+	int err;
+
+	if (al->map) {
+		struct dso *dso = al->map->dso;
+
+		err = db_export__dso(dbe, dso, al->maps->machine);
+		if (err)
+			return err;
+		*dso_db_id = dso->db_id;
+
+		if (!al->sym) {
+			al->sym = symbol__new(al->addr, 0, 0, 0, "unknown");
+			if (al->sym)
+				dso__insert_symbol(dso, al->sym);
+		}
+
+		if (al->sym) {
+			u64 *db_id = symbol__priv(al->sym);
+
+			err = db_export__symbol(dbe, al->sym, dso);
+			if (err)
+				return err;
+			*sym_db_id = *db_id;
+			*offset = al->addr - al->sym->start;
+		}
+	}
+
+	return 0;
+}
+
+static struct call_path *call_path_from_sample(struct db_export *dbe,
+					       struct machine *machine,
+					       struct thread *thread,
+					       struct perf_sample *sample,
+					       struct evsel *evsel)
+{
+	u64 kernel_start = machine__kernel_start(machine);
+	struct call_path *current = &dbe->cpr->call_path;
+	enum chain_order saved_order = callchain_param.order;
+	int err;
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		return NULL;
+
+	/*
+	 * Since the call path tree must be built starting with the root, we
+	 * must use ORDER_CALL for call chain resolution, in order to process
+	 * the callchain starting with the root node and ending with the leaf.
+	 */
+	callchain_param.order = ORDER_CALLER;
+	err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+					sample, NULL, NULL, PERF_MAX_STACK_DEPTH);
+	if (err) {
+		callchain_param.order = saved_order;
+		return NULL;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+	while (1) {
+		struct callchain_cursor_node *node;
+		struct addr_location al;
+		u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
+
+		memset(&al, 0, sizeof(al));
+
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+		/*
+		 * Handle export of symbol and dso for this node by
+		 * constructing an addr_location struct and then passing it to
+		 * db_ids_from_al() to perform the export.
+		 */
+		al.sym = node->ms.sym;
+		al.map = node->ms.map;
+		al.maps = thread->maps;
+		al.addr = node->ip;
+
+		if (al.map && !al.sym)
+			al.sym = dso__find_symbol(al.map->dso, al.addr);
+
+		db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
+
+		/* add node to the call path tree if it doesn't exist */
+		current = call_path__findnew(dbe->cpr, current,
+					     al.sym, node->ip,
+					     kernel_start);
+
+		callchain_cursor_advance(&callchain_cursor);
+	}
+
+	/* Reset the callchain order to its prior value. */
+	callchain_param.order = saved_order;
+
+	if (current == &dbe->cpr->call_path) {
+		/* Bail because the callchain was empty. */
+		return NULL;
+	}
+
+	return current;
+}
+
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+			   const char *name)
+{
+	if (dbe->export_branch_type)
+		return dbe->export_branch_type(dbe, branch_type, name);
+
+	return 0;
+}
+
+static int db_export__threads(struct db_export *dbe, struct thread *thread,
+			      struct thread *main_thread,
+			      struct machine *machine, struct comm **comm_ptr)
+{
+	struct comm *comm = NULL;
+	struct comm *curr_comm;
+	int err;
+
+	if (main_thread) {
+		/*
+		 * A thread has a reference to the main thread, so export the
+		 * main thread first.
+		 */
+		err = db_export__thread(dbe, main_thread, machine, main_thread);
+		if (err)
+			return err;
+		/*
+		 * Export comm before exporting the non-main thread because
+		 * db_export__comm_thread() can be called further below.
+		 */
+		comm = machine__thread_exec_comm(machine, main_thread);
+		if (comm) {
+			err = db_export__exec_comm(dbe, comm, main_thread);
+			if (err)
+				return err;
+			*comm_ptr = comm;
+		}
+	}
+
+	if (thread != main_thread) {
+		/*
+		 * For a non-main thread, db_export__comm_thread() must be
+		 * called only if thread has not previously been exported.
+		 */
+		bool export_comm_thread = comm && !thread->db_id;
+
+		err = db_export__thread(dbe, thread, machine, main_thread);
+		if (err)
+			return err;
+
+		if (export_comm_thread) {
+			err = db_export__comm_thread(dbe, comm, thread);
+			if (err)
+				return err;
+		}
+	}
+
+	curr_comm = thread__comm(thread);
+	if (curr_comm)
+		return db_export__comm(dbe, curr_comm, thread);
+
+	return 0;
+}
+
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct evsel *evsel,
+		      struct addr_location *al, struct addr_location *addr_al)
+{
+	struct thread *thread = al->thread;
+	struct export_sample es = {
+		.event = event,
+		.sample = sample,
+		.evsel = evsel,
+		.al = al,
+	};
+	struct thread *main_thread;
+	struct comm *comm = NULL;
+	int err;
+
+	err = db_export__evsel(dbe, evsel);
+	if (err)
+		return err;
+
+	err = db_export__machine(dbe, al->maps->machine);
+	if (err)
+		return err;
+
+	main_thread = thread__main_thread(al->maps->machine, thread);
+
+	err = db_export__threads(dbe, thread, main_thread, al->maps->machine, &comm);
+	if (err)
+		goto out_put;
+
+	if (comm)
+		es.comm_db_id = comm->db_id;
+
+	es.db_id = ++dbe->sample_last_db_id;
+
+	err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
+	if (err)
+		goto out_put;
+
+	if (dbe->cpr) {
+		struct call_path *cp = call_path_from_sample(dbe, al->maps->machine,
+							     thread, sample,
+							     evsel);
+		if (cp) {
+			db_export__call_path(dbe, cp);
+			es.call_path_id = cp->db_id;
+		}
+	}
+
+	if (addr_al) {
+		err = db_ids_from_al(dbe, addr_al, &es.addr_dso_db_id,
+				     &es.addr_sym_db_id, &es.addr_offset);
+		if (err)
+			goto out_put;
+		if (dbe->crp) {
+			err = thread_stack__process(thread, comm, sample, al,
+						    addr_al, es.db_id,
+						    dbe->crp);
+			if (err)
+				goto out_put;
+		}
+	}
+
+	if (dbe->export_sample)
+		err = dbe->export_sample(dbe, &es);
+
+out_put:
+	thread__put(main_thread);
+	return err;
+}
+
+static struct {
+	u32 branch_type;
+	const char *name;
+} branch_types[] = {
+	{0, "no branch"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "conditional jump"},
+	{PERF_IP_FLAG_BRANCH, "unconditional jump"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT,
+	 "software interrupt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT,
+	 "return from interrupt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET,
+	 "system call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET,
+	 "return from system call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "asynchronous branch"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+	 PERF_IP_FLAG_INTERRUPT, "hardware interrupt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "transaction abort"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "trace begin"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "trace end"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vm entry"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vm exit"},
+	{0, NULL}
+};
+
+int db_export__branch_types(struct db_export *dbe)
+{
+	int i, err = 0;
+
+	for (i = 0; branch_types[i].name ; i++) {
+		err = db_export__branch_type(dbe, branch_types[i].branch_type,
+					     branch_types[i].name);
+		if (err)
+			break;
+	}
+
+	/* Add trace begin / end variants */
+	for (i = 0; branch_types[i].name ; i++) {
+		const char *name = branch_types[i].name;
+		u32 type = branch_types[i].branch_type;
+		char buf[64];
+
+		if (type == PERF_IP_FLAG_BRANCH ||
+		    (type & (PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END)))
+			continue;
+
+		snprintf(buf, sizeof(buf), "trace begin / %s", name);
+		err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_BEGIN, buf);
+		if (err)
+			break;
+
+		snprintf(buf, sizeof(buf), "%s / trace end", name);
+		err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_END, buf);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp)
+{
+	int err;
+
+	if (cp->db_id)
+		return 0;
+
+	if (cp->parent) {
+		err = db_export__call_path(dbe, cp->parent);
+		if (err)
+			return err;
+	}
+
+	cp->db_id = ++dbe->call_path_last_db_id;
+
+	if (dbe->export_call_path)
+		return dbe->export_call_path(dbe, cp);
+
+	return 0;
+}
+
+int db_export__call_return(struct db_export *dbe, struct call_return *cr,
+			   u64 *parent_db_id)
+{
+	int err;
+
+	err = db_export__call_path(dbe, cr->cp);
+	if (err)
+		return err;
+
+	if (!cr->db_id)
+		cr->db_id = ++dbe->call_return_last_db_id;
+
+	if (parent_db_id) {
+		if (!*parent_db_id)
+			*parent_db_id = ++dbe->call_return_last_db_id;
+		cr->parent_db_id = *parent_db_id;
+	}
+
+	if (dbe->export_call_return)
+		return dbe->export_call_return(dbe, cr);
+
+	return 0;
+}
+
+static int db_export__pid_tid(struct db_export *dbe, struct machine *machine,
+			      pid_t pid, pid_t tid, u64 *db_id,
+			      struct comm **comm_ptr, bool *is_idle)
+{
+	struct thread *thread = machine__find_thread(machine, pid, tid);
+	struct thread *main_thread;
+	int err = 0;
+
+	if (!thread || !thread->comm_set)
+		goto out_put;
+
+	*is_idle = !thread->pid_ && !thread->tid;
+
+	main_thread = thread__main_thread(machine, thread);
+
+	err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr);
+
+	*db_id = thread->db_id;
+
+	thread__put(main_thread);
+out_put:
+	thread__put(thread);
+
+	return err;
+}
+
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct machine *machine)
+{
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	bool out_preempt = out &&
+		(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT);
+	int flags = out | (out_preempt << 1);
+	bool is_idle_a = false, is_idle_b = false;
+	u64 th_a_id = 0, th_b_id = 0;
+	u64 comm_out_id, comm_in_id;
+	struct comm *comm_a = NULL;
+	struct comm *comm_b = NULL;
+	u64 th_out_id, th_in_id;
+	u64 db_id;
+	int err;
+
+	err = db_export__machine(dbe, machine);
+	if (err)
+		return err;
+
+	err = db_export__pid_tid(dbe, machine, sample->pid, sample->tid,
+				 &th_a_id, &comm_a, &is_idle_a);
+	if (err)
+		return err;
+
+	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+		pid_t pid = event->context_switch.next_prev_pid;
+		pid_t tid = event->context_switch.next_prev_tid;
+
+		err = db_export__pid_tid(dbe, machine, pid, tid, &th_b_id,
+					 &comm_b, &is_idle_b);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Do not export if both threads are unknown (i.e. not being traced),
+	 * or one is unknown and the other is the idle task.
+	 */
+	if ((!th_a_id || is_idle_a) && (!th_b_id || is_idle_b))
+		return 0;
+
+	db_id = ++dbe->context_switch_last_db_id;
+
+	if (out) {
+		th_out_id   = th_a_id;
+		th_in_id    = th_b_id;
+		comm_out_id = comm_a ? comm_a->db_id : 0;
+		comm_in_id  = comm_b ? comm_b->db_id : 0;
+	} else {
+		th_out_id   = th_b_id;
+		th_in_id    = th_a_id;
+		comm_out_id = comm_b ? comm_b->db_id : 0;
+		comm_in_id  = comm_a ? comm_a->db_id : 0;
+	}
+
+	if (dbe->export_context_switch)
+		return dbe->export_context_switch(dbe, db_id, machine, sample,
+						  th_out_id, comm_out_id,
+						  th_in_id, comm_in_id, flags);
+	return 0;
+}
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
new file mode 100644
index 000000000..23983cb35
--- /dev/null
+++ b/tools/perf/util/db-export.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * db-export.h: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#ifndef __PERF_DB_EXPORT_H
+#define __PERF_DB_EXPORT_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+struct evsel;
+struct machine;
+struct thread;
+struct comm;
+struct dso;
+struct perf_sample;
+struct addr_location;
+struct call_return_processor;
+struct call_path_root;
+struct call_path;
+struct call_return;
+
+struct export_sample {
+	union perf_event	*event;
+	struct perf_sample	*sample;
+	struct evsel		*evsel;
+	struct addr_location	*al;
+	u64			db_id;
+	u64			comm_db_id;
+	u64			dso_db_id;
+	u64			sym_db_id;
+	u64			offset; /* ip offset from symbol start */
+	u64			addr_dso_db_id;
+	u64			addr_sym_db_id;
+	u64			addr_offset; /* addr offset from symbol start */
+	u64			call_path_id;
+};
+
+struct db_export {
+	int (*export_evsel)(struct db_export *dbe, struct evsel *evsel);
+	int (*export_machine)(struct db_export *dbe, struct machine *machine);
+	int (*export_thread)(struct db_export *dbe, struct thread *thread,
+			     u64 main_thread_db_id, struct machine *machine);
+	int (*export_comm)(struct db_export *dbe, struct comm *comm,
+			   struct thread *thread);
+	int (*export_comm_thread)(struct db_export *dbe, u64 db_id,
+				  struct comm *comm, struct thread *thread);
+	int (*export_dso)(struct db_export *dbe, struct dso *dso,
+			  struct machine *machine);
+	int (*export_symbol)(struct db_export *dbe, struct symbol *sym,
+			     struct dso *dso);
+	int (*export_branch_type)(struct db_export *dbe, u32 branch_type,
+				  const char *name);
+	int (*export_sample)(struct db_export *dbe, struct export_sample *es);
+	int (*export_call_path)(struct db_export *dbe, struct call_path *cp);
+	int (*export_call_return)(struct db_export *dbe,
+				  struct call_return *cr);
+	int (*export_context_switch)(struct db_export *dbe, u64 db_id,
+				     struct machine *machine,
+				     struct perf_sample *sample,
+				     u64 th_out_id, u64 comm_out_id,
+				     u64 th_in_id, u64 comm_in_id, int flags);
+	struct call_return_processor *crp;
+	struct call_path_root *cpr;
+	u64 evsel_last_db_id;
+	u64 machine_last_db_id;
+	u64 thread_last_db_id;
+	u64 comm_last_db_id;
+	u64 comm_thread_last_db_id;
+	u64 dso_last_db_id;
+	u64 symbol_last_db_id;
+	u64 sample_last_db_id;
+	u64 call_path_last_db_id;
+	u64 call_return_last_db_id;
+	u64 context_switch_last_db_id;
+};
+
+int db_export__init(struct db_export *dbe);
+void db_export__exit(struct db_export *dbe);
+int db_export__evsel(struct db_export *dbe, struct evsel *evsel);
+int db_export__machine(struct db_export *dbe, struct machine *machine);
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+		      struct machine *machine, struct thread *main_thread);
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+		    struct thread *thread);
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+			 struct thread *main_thread);
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+			   struct thread *thread);
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+		   struct machine *machine);
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+		      struct dso *dso);
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+			   const char *name);
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct evsel *evsel,
+		      struct addr_location *al, struct addr_location *addr_al);
+
+int db_export__branch_types(struct db_export *dbe);
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp);
+int db_export__call_return(struct db_export *dbe, struct call_return *cr,
+			   u64 *parent_db_id);
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct machine *machine);
+
+#endif
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
new file mode 100644
index 000000000..190e818a0
--- /dev/null
+++ b/tools/perf/util/debug.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0
+/* For general debugging purposes */
+
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <api/debug.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <sys/time.h>
+#ifdef HAVE_BACKTRACE_SUPPORT
+#include <execinfo.h>
+#endif
+#include "color.h"
+#include "event.h"
+#include "debug.h"
+#include "print_binary.h"
+#include "target.h"
+#include "ui/helpline.h"
+#include "ui/ui.h"
+#include "util/parse-sublevel-options.h"
+
+#include <linux/ctype.h>
+
+int verbose;
+int debug_peo_args;
+bool dump_trace = false, quiet = false;
+int debug_ordered_events;
+static int redirect_to_stderr;
+int debug_data_convert;
+static FILE *debug_file;
+bool debug_display_time;
+
+void debug_set_file(FILE *file)
+{
+	debug_file = file;
+}
+
+void debug_set_display_time(bool set)
+{
+	debug_display_time = set;
+}
+
+static int fprintf_time(FILE *file)
+{
+	struct timeval tod;
+	struct tm ltime;
+	char date[64];
+
+	if (!debug_display_time)
+		return 0;
+
+	if (gettimeofday(&tod, NULL) != 0)
+		return 0;
+
+	if (localtime_r(&tod.tv_sec, &ltime) == NULL)
+		return 0;
+
+	strftime(date, sizeof(date),  "%F %H:%M:%S", &ltime);
+	return fprintf(file, "[%s.%06lu] ", date, (long)tod.tv_usec);
+}
+
+int veprintf(int level, int var, const char *fmt, va_list args)
+{
+	int ret = 0;
+
+	if (var >= level) {
+		if (use_browser >= 1 && !redirect_to_stderr) {
+			ui_helpline__vshow(fmt, args);
+		} else {
+			ret = fprintf_time(debug_file);
+			ret += vfprintf(debug_file, fmt, args);
+		}
+	}
+
+	return ret;
+}
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = veprintf(level, var, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+static int veprintf_time(u64 t, const char *fmt, va_list args)
+{
+	int ret = 0;
+	u64 secs, usecs, nsecs = t;
+
+	secs   = nsecs / NSEC_PER_SEC;
+	nsecs -= secs  * NSEC_PER_SEC;
+	usecs  = nsecs / NSEC_PER_USEC;
+
+	ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
+		      secs, usecs);
+	ret += vfprintf(stderr, fmt, args);
+	return ret;
+}
+
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...)
+{
+	int ret = 0;
+	va_list args;
+
+	if (var >= level) {
+		va_start(args, fmt);
+		ret = veprintf_time(t, fmt, args);
+		va_end(args);
+	}
+
+	return ret;
+}
+
+/*
+ * Overloading libtraceevent standard info print
+ * function, display with -v in perf.
+ */
+void pr_stat(const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	veprintf(1, verbose, fmt, args);
+	va_end(args);
+	eprintf(1, verbose, "\n");
+}
+
+int dump_printf(const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (dump_trace) {
+		va_start(args, fmt);
+		ret = vprintf(fmt, args);
+		va_end(args);
+	}
+
+	return ret;
+}
+
+static int trace_event_printer(enum binary_printer_ops op,
+			       unsigned int val, void *extra, FILE *fp)
+{
+	const char *color = PERF_COLOR_BLUE;
+	union perf_event *event = (union perf_event *)extra;
+	unsigned char ch = (unsigned char)val;
+	int printed = 0;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printed += fprintf(fp, ".");
+		printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n",
+					 event->header.size);
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printed += fprintf(fp, ".");
+		break;
+	case BINARY_PRINT_ADDR:
+		printed += color_fprintf(fp, color, "  %04x: ", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		printed += color_fprintf(fp, color, " %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		printed += color_fprintf(fp, color, "   ");
+		break;
+	case BINARY_PRINT_SEP:
+		printed += color_fprintf(fp, color, "  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		printed += color_fprintf(fp, color, "%c",
+			      isprint(ch) && isascii(ch) ? ch : '.');
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		printed += color_fprintf(fp, color, " ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		printed += color_fprintf(fp, color, "\n");
+		break;
+	case BINARY_PRINT_DATA_END:
+		printed += fprintf(fp, "\n");
+		break;
+	default:
+		break;
+	}
+
+	return printed;
+}
+
+void trace_event(union perf_event *event)
+{
+	unsigned char *raw_event = (void *)event;
+
+	if (!dump_trace)
+		return;
+
+	print_binary(raw_event, event->header.size, 16,
+		     trace_event_printer, event);
+}
+
+static struct sublevel_option debug_opts[] = {
+	{ .name = "verbose",		.value_ptr = &verbose },
+	{ .name = "ordered-events",	.value_ptr = &debug_ordered_events},
+	{ .name = "stderr",		.value_ptr = &redirect_to_stderr},
+	{ .name = "data-convert",	.value_ptr = &debug_data_convert },
+	{ .name = "perf-event-open",	.value_ptr = &debug_peo_args },
+	{ .name = NULL, }
+};
+
+int perf_debug_option(const char *str)
+{
+	int ret;
+
+	ret = perf_parse_sublevel_options(str, debug_opts);
+	if (ret)
+		return ret;
+
+	/* Allow only verbose value in range (0, 10), otherwise set 0. */
+	verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
+
+	return 0;
+}
+
+int perf_quiet_option(void)
+{
+	struct sublevel_option *opt = &debug_opts[0];
+
+	/* disable all debug messages */
+	while (opt->name) {
+		*opt->value_ptr = -1;
+		opt++;
+	}
+
+	/* For debug variables that are used as bool types, set to 0. */
+	redirect_to_stderr = 0;
+	debug_peo_args = 0;
+
+	return 0;
+}
+
+#define DEBUG_WRAPPER(__n, __l)				\
+static int pr_ ## __n ## _wrapper(const char *fmt, ...)	\
+{							\
+	va_list args;					\
+	int ret;					\
+							\
+	va_start(args, fmt);				\
+	ret = veprintf(__l, verbose, fmt, args);	\
+	va_end(args);					\
+	return ret;					\
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+	debug_set_file(stderr);
+	libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}
+
+/* Obtain a backtrace and print it to stdout. */
+#ifdef HAVE_BACKTRACE_SUPPORT
+void dump_stack(void)
+{
+	void *array[16];
+	size_t size = backtrace(array, ARRAY_SIZE(array));
+	char **strings = backtrace_symbols(array, size);
+	size_t i;
+
+	printf("Obtained %zd stack frames.\n", size);
+
+	for (i = 0; i < size; i++)
+		printf("%s\n", strings[i]);
+
+	free(strings);
+}
+#else
+void dump_stack(void) {}
+#endif
+
+void sighandler_dump_stack(int sig)
+{
+	psignal(sig, "perf");
+	dump_stack();
+	signal(sig, SIG_DFL);
+	raise(sig);
+}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
new file mode 100644
index 000000000..f99468a7f
--- /dev/null
+++ b/tools/perf/util/debug.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* For debugging general purposes */
+#ifndef __PERF_DEBUG_H
+#define __PERF_DEBUG_H
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+
+extern int verbose;
+extern int debug_peo_args;
+extern bool quiet, dump_trace;
+extern int debug_ordered_events;
+extern int debug_data_convert;
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_err(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning_once(fmt, ...) ({		\
+	static int __warned;			\
+	if (unlikely(!__warned)) {		\
+		pr_warning(fmt, ##__VA_ARGS__); \
+		__warned = 1;			\
+	}					\
+})
+#define pr_info(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug(fmt, ...) \
+	eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+	eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
+
+/* Special macro to print perf_event_open arguments/return value. */
+#define pr_debug2_peo(fmt, ...) {				\
+	if (debug_peo_args)						\
+		pr_debugN(0, pr_fmt(fmt), ##__VA_ARGS__);	\
+	else							\
+		pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__);	\
+}
+
+#define pr_time_N(n, var, t, fmt, ...) \
+	eprintf_time(n, var, t, fmt, ##__VA_ARGS__)
+
+#define pr_oe_time(t, fmt, ...)  pr_time_N(1, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_oe_time2(t, fmt, ...) pr_time_N(2, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define STRERR_BUFSIZE	128	/* For the buffer size of str_error_r */
+
+union perf_event;
+
+int dump_printf(const char *fmt, ...) __printf(1, 2);
+void trace_event(union perf_event *event);
+
+int ui__error(const char *format, ...) __printf(1, 2);
+int ui__warning(const char *format, ...) __printf(1, 2);
+#define ui__warning_once(format, ...) ({		\
+	static int __warned;				\
+	if (unlikely(!__warned)) {			\
+		ui__warning(format, ##__VA_ARGS__);	\
+		__warned = 1;				\
+	}						\
+})
+
+void pr_stat(const char *fmt, ...);
+
+int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4);
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5);
+int veprintf(int level, int var, const char *fmt, va_list args);
+
+int perf_debug_option(const char *str);
+void debug_set_file(FILE *file);
+void debug_set_display_time(bool set);
+void perf_debug_setup(void);
+int perf_quiet_option(void);
+
+void dump_stack(void);
+void sighandler_dump_stack(int sig);
+
+#endif	/* __PERF_DEBUG_H */
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
new file mode 100644
index 000000000..ddf33d58b
--- /dev/null
+++ b/tools/perf/util/demangle-java.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+
+enum {
+	MODE_PREFIX = 0,
+	MODE_CLASS  = 1,
+	MODE_FUNC   = 2,
+	MODE_TYPE   = 3,
+	MODE_CTYPE  = 4, /* class arg */
+};
+
+#define BASE_ENT(c, n)	[c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+	BASE_ENT('B', "byte" ),
+	BASE_ENT('C', "char" ),
+	BASE_ENT('D', "double" ),
+	BASE_ENT('F', "float" ),
+	BASE_ENT('I', "int" ),
+	BASE_ENT('J', "long" ),
+	BASE_ENT('S', "short" ),
+	BASE_ENT('Z', "boolean" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ *  Return:
+ *	success: buf
+ *	error  : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+	int rlen = 0;
+	int array = 0;
+	int narg = 0;
+	const char *q;
+
+	if (!end)
+		end = str + strlen(str);
+
+	for (q = str; q != end; q++) {
+
+		if (rlen == (maxlen - 1))
+			break;
+
+		switch (*q) {
+		case 'L':
+			if (mode == MODE_PREFIX || mode == MODE_TYPE) {
+				if (mode == MODE_TYPE) {
+					if (narg)
+						rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+					narg++;
+				}
+				if (mode == MODE_PREFIX)
+					mode = MODE_CLASS;
+				else
+					mode = MODE_CTYPE;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'F':
+		case 'I':
+		case 'J':
+		case 'S':
+		case 'Z':
+			if (mode == MODE_TYPE) {
+				if (narg)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+				narg++;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'V':
+			if (mode == MODE_TYPE) {
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case '[':
+			if (mode != MODE_TYPE)
+				goto error;
+			array++;
+			break;
+		case '(':
+			if (mode != MODE_FUNC)
+				goto error;
+			buf[rlen++] = *q;
+			mode = MODE_TYPE;
+			break;
+		case ')':
+			if (mode != MODE_TYPE)
+				goto error;
+			buf[rlen++] = *q;
+			narg = 0;
+			break;
+		case ';':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			/* safe because at least one other char to process */
+			if (isalpha(*(q + 1)) && mode == MODE_CLASS)
+				rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			if (mode == MODE_CLASS)
+				mode = MODE_FUNC;
+			else if (mode == MODE_CTYPE)
+				mode = MODE_TYPE;
+			break;
+		case '/':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			break;
+		default :
+			buf[rlen++] = *q;
+		}
+	}
+	buf[rlen] = '\0';
+	return buf;
+error:
+	return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ * 	str: string to parse. String is not modified
+ *    flags: combination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ *	if input can be demangled, then a newly allocated string is returned.
+ *	if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+	char *buf, *ptr;
+	char *p;
+	size_t len, l1 = 0;
+
+	if (!str)
+		return NULL;
+
+	/* find start of return type */
+	p = strrchr(str, ')');
+	if (!p)
+		return NULL;
+
+	/*
+	 * expansion factor estimated to 3x
+	 */
+	len = strlen(str) * 3 + 1;
+	buf = malloc(len);
+	if (!buf)
+		return NULL;
+
+	buf[0] = '\0';
+	if (!(flags & JAVA_DEMANGLE_NORET)) {
+		/*
+		 * get return type first
+		 */
+		ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+		if (!ptr)
+			goto error;
+
+		/* add space between return type and function prototype */
+		l1 = strlen(buf);
+		buf[l1++] = ' ';
+	}
+
+	/* process function up to return type */
+	ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+	if (!ptr)
+		goto error;
+
+	return buf;
+error:
+	free(buf);
+	return NULL;
+}
diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h
new file mode 100644
index 000000000..f936c8eab
--- /dev/null
+++ b/tools/perf/util/demangle-java.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET	0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */
diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c
new file mode 100644
index 000000000..9d707bb60
--- /dev/null
+++ b/tools/perf/util/demangle-ocaml.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include "util/string2.h"
+
+#include "demangle-ocaml.h"
+
+#include <linux/ctype.h>
+
+static const char *caml_prefix = "caml";
+static const size_t caml_prefix_len = 4;
+
+/* mangled OCaml symbols start with "caml" followed by an upper-case letter */
+static bool
+ocaml_is_mangled(const char *sym)
+{
+	return 0 == strncmp(sym, caml_prefix, caml_prefix_len)
+		&& isupper(sym[caml_prefix_len]);
+}
+
+/*
+ * input:
+ *     sym: a symbol which may have been mangled by the OCaml compiler
+ * return:
+ *     if the input doesn't look like a mangled OCaml symbol, NULL is returned
+ *     otherwise, a newly allocated string containing the demangled symbol is returned
+ */
+char *
+ocaml_demangle_sym(const char *sym)
+{
+	char *result;
+	int j = 0;
+	int i;
+	int len;
+
+	if (!ocaml_is_mangled(sym)) {
+		return NULL;
+	}
+
+	len = strlen(sym);
+
+	/* the demangled symbol is always smaller than the mangled symbol */
+	result = malloc(len + 1);
+	if (!result)
+		return NULL;
+
+	/* skip "caml" prefix */
+	i = caml_prefix_len;
+
+	while (i < len) {
+		if (sym[i] == '_' && sym[i + 1] == '_') {
+			/* "__" -> "." */
+			result[j++] = '.';
+			i += 2;
+		}
+		else if (sym[i] == '$' && isxdigit(sym[i + 1]) && isxdigit(sym[i + 2])) {
+			/* "$xx" is a hex-encoded character */
+			result[j++] = (hex(sym[i + 1]) << 4) | hex(sym[i + 2]);
+			i += 3;
+		}
+		else {
+			result[j++] = sym[i++];
+		}
+	}
+	result[j] = '\0';
+
+	return result;
+}
diff --git a/tools/perf/util/demangle-ocaml.h b/tools/perf/util/demangle-ocaml.h
new file mode 100644
index 000000000..843cc4fa1
--- /dev/null
+++ b/tools/perf/util/demangle-ocaml.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_OCAML
+#define __PERF_DEMANGLE_OCAML 1
+
+char * ocaml_demangle_sym(const char *str);
+
+#endif /* __PERF_DEMANGLE_OCAML */
diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c
new file mode 100644
index 000000000..a659fc69f
--- /dev/null
+++ b/tools/perf/util/demangle-rust.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "debug.h"
+
+#include "demangle-rust.h"
+
+/*
+ * Mangled Rust symbols look like this:
+ *
+ *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
+ *
+ * The original symbol is:
+ *
+ *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
+ *
+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed
+ * with "h". Rust does not have a global namespace between crates, an illusion
+ * which Rust maintains by using the hash to distinguish things that would
+ * otherwise have the same symbol.
+ *
+ * Any path component not starting with a XID_Start character is prefixed with
+ * "_".
+ *
+ * The following escape sequences are used:
+ *
+ *     ","  =>  $C$
+ *     "@"  =>  $SP$
+ *     "*"  =>  $BP$
+ *     "&"  =>  $RF$
+ *     "<"  =>  $LT$
+ *     ">"  =>  $GT$
+ *     "("  =>  $LP$
+ *     ")"  =>  $RP$
+ *     " "  =>  $u20$
+ *     "'"  =>  $u27$
+ *     "["  =>  $u5b$
+ *     "]"  =>  $u5d$
+ *     "~"  =>  $u7e$
+ *
+ * A double ".." means "::" and a single "." means "-".
+ *
+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
+ */
+
+static const char *hash_prefix = "::h";
+static const size_t hash_prefix_len = 3;
+static const size_t hash_len = 16;
+
+static bool is_prefixed_hash(const char *start);
+static bool looks_like_rust(const char *sym, size_t len);
+static bool unescape(const char **in, char **out, const char *seq, char value);
+
+/*
+ * INPUT:
+ *     sym: symbol that has been through BFD-demangling
+ *
+ * This function looks for the following indicators:
+ *
+ *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
+ *
+ *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
+ *     hex digits. This is true of 99.9998% of hashes so once in your life you
+ *     may see a false negative. The point is to notice path components that
+ *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
+ *     this case a false positive (non-Rust symbol has an important path
+ *     component removed because it looks like a Rust hash) is worse than a
+ *     false negative (the rare Rust symbol is not demangled) so this sets the
+ *     balance in favor of false negatives.
+ *
+ *  3. There must be no characters other than a-zA-Z0-9 and _.:$
+ *
+ *  4. There must be no unrecognized $-sign sequences.
+ *
+ *  5. There must be no sequence of three or more dots in a row ("...").
+ */
+bool
+rust_is_mangled(const char *sym)
+{
+	size_t len, len_without_hash;
+
+	if (!sym)
+		return false;
+
+	len = strlen(sym);
+	if (len <= hash_prefix_len + hash_len)
+		/* Not long enough to contain "::h" + hash + something else */
+		return false;
+
+	len_without_hash = len - (hash_prefix_len + hash_len);
+	if (!is_prefixed_hash(sym + len_without_hash))
+		return false;
+
+	return looks_like_rust(sym, len_without_hash);
+}
+
+/*
+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
+ * digits must comprise between 5 and 15 (inclusive) distinct digits.
+ */
+static bool is_prefixed_hash(const char *str)
+{
+	const char *end;
+	bool seen[16];
+	size_t i;
+	int count;
+
+	if (strncmp(str, hash_prefix, hash_prefix_len))
+		return false;
+	str += hash_prefix_len;
+
+	memset(seen, false, sizeof(seen));
+	for (end = str + hash_len; str < end; str++)
+		if (*str >= '0' && *str <= '9')
+			seen[*str - '0'] = true;
+		else if (*str >= 'a' && *str <= 'f')
+			seen[*str - 'a' + 10] = true;
+		else
+			return false;
+
+	/* Count how many distinct digits seen */
+	count = 0;
+	for (i = 0; i < 16; i++)
+		if (seen[i])
+			count++;
+
+	return count >= 5 && count <= 15;
+}
+
+static bool looks_like_rust(const char *str, size_t len)
+{
+	const char *end = str + len;
+
+	while (str < end)
+		switch (*str) {
+		case '$':
+			if (!strncmp(str, "$C$", 3))
+				str += 3;
+			else if (!strncmp(str, "$SP$", 4)
+					|| !strncmp(str, "$BP$", 4)
+					|| !strncmp(str, "$RF$", 4)
+					|| !strncmp(str, "$LT$", 4)
+					|| !strncmp(str, "$GT$", 4)
+					|| !strncmp(str, "$LP$", 4)
+					|| !strncmp(str, "$RP$", 4))
+				str += 4;
+			else if (!strncmp(str, "$u20$", 5)
+					|| !strncmp(str, "$u27$", 5)
+					|| !strncmp(str, "$u5b$", 5)
+					|| !strncmp(str, "$u5d$", 5)
+					|| !strncmp(str, "$u7e$", 5))
+				str += 5;
+			else
+				return false;
+			break;
+		case '.':
+			/* Do not allow three or more consecutive dots */
+			if (!strncmp(str, "...", 3))
+				return false;
+			/* Fall through */
+		case 'a' ... 'z':
+		case 'A' ... 'Z':
+		case '0' ... '9':
+		case '_':
+		case ':':
+			str++;
+			break;
+		default:
+			return false;
+		}
+
+	return true;
+}
+
+/*
+ * INPUT:
+ *     sym: symbol for which rust_is_mangled(sym) returns true
+ *
+ * The input is demangled in-place because the mangled name is always longer
+ * than the demangled one.
+ */
+void
+rust_demangle_sym(char *sym)
+{
+	const char *in;
+	char *out;
+	const char *end;
+
+	if (!sym)
+		return;
+
+	in = sym;
+	out = sym;
+	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
+
+	while (in < end)
+		switch (*in) {
+		case '$':
+			if (!(unescape(&in, &out, "$C$", ',')
+					|| unescape(&in, &out, "$SP$", '@')
+					|| unescape(&in, &out, "$BP$", '*')
+					|| unescape(&in, &out, "$RF$", '&')
+					|| unescape(&in, &out, "$LT$", '<')
+					|| unescape(&in, &out, "$GT$", '>')
+					|| unescape(&in, &out, "$LP$", '(')
+					|| unescape(&in, &out, "$RP$", ')')
+					|| unescape(&in, &out, "$u20$", ' ')
+					|| unescape(&in, &out, "$u27$", '\'')
+					|| unescape(&in, &out, "$u5b$", '[')
+					|| unescape(&in, &out, "$u5d$", ']')
+					|| unescape(&in, &out, "$u7e$", '~'))) {
+				pr_err("demangle-rust: unexpected escape sequence");
+				goto done;
+			}
+			break;
+		case '_':
+			/*
+			 * If this is the start of a path component and the next
+			 * character is an escape sequence, ignore the
+			 * underscore. The mangler inserts an underscore to make
+			 * sure the path component begins with a XID_Start
+			 * character.
+			 */
+			if ((in == sym || in[-1] == ':') && in[1] == '$')
+				in++;
+			else
+				*out++ = *in++;
+			break;
+		case '.':
+			if (in[1] == '.') {
+				/* ".." becomes "::" */
+				*out++ = ':';
+				*out++ = ':';
+				in += 2;
+			} else {
+				/* "." becomes "-" */
+				*out++ = '-';
+				in++;
+			}
+			break;
+		case 'a' ... 'z':
+		case 'A' ... 'Z':
+		case '0' ... '9':
+		case ':':
+			*out++ = *in++;
+			break;
+		default:
+			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
+				*in);
+			goto done;
+		}
+
+done:
+	*out = '\0';
+}
+
+static bool unescape(const char **in, char **out, const char *seq, char value)
+{
+	size_t len = strlen(seq);
+
+	if (strncmp(*in, seq, len))
+		return false;
+
+	**out = value;
+
+	*in += len;
+	*out += 1;
+
+	return true;
+}
diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h
new file mode 100644
index 000000000..2fca618b1
--- /dev/null
+++ b/tools/perf/util/demangle-rust.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_RUST
+#define __PERF_DEMANGLE_RUST 1
+
+bool rust_is_mangled(const char *str);
+void rust_demangle_sym(char *str);
+
+#endif /* __PERF_DEMANGLE_RUST */
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
new file mode 100644
index 000000000..54e4d4495
--- /dev/null
+++ b/tools/perf/util/dlfilter.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dlfilter.c: Interface to perf script --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <subcmd/exec-cmd.h>
+#include <linux/zalloc.h>
+#include <linux/build_bug.h>
+
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "dso.h"
+#include "map.h"
+#include "thread.h"
+#include "trace-event.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dlfilter.h"
+#include "../include/perf/perf_dlfilter.h"
+
+static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
+{
+	struct symbol *sym = al->sym;
+
+	d_al->size = sizeof(*d_al);
+	if (al->map) {
+		struct dso *dso = al->map->dso;
+
+		if (symbol_conf.show_kernel_path && dso->long_name)
+			d_al->dso = dso->long_name;
+		else
+			d_al->dso = dso->name;
+		d_al->is_64_bit = dso->is_64_bit;
+		d_al->buildid_size = dso->bid.size;
+		d_al->buildid = dso->bid.data;
+	} else {
+		d_al->dso = NULL;
+		d_al->is_64_bit = 0;
+		d_al->buildid_size = 0;
+		d_al->buildid = NULL;
+	}
+	if (sym) {
+		d_al->sym = sym->name;
+		d_al->sym_start = sym->start;
+		d_al->sym_end = sym->end;
+		if (al->addr < sym->end)
+			d_al->symoff = al->addr - sym->start;
+		else
+			d_al->symoff = al->addr - al->map->start - sym->start;
+		d_al->sym_binding = sym->binding;
+	} else {
+		d_al->sym = NULL;
+		d_al->sym_start = 0;
+		d_al->sym_end = 0;
+		d_al->symoff = 0;
+		d_al->sym_binding = 0;
+	}
+	d_al->addr = al->addr;
+	d_al->comm = NULL;
+	d_al->filtered = 0;
+}
+
+static struct addr_location *get_al(struct dlfilter *d)
+{
+	struct addr_location *al = d->al;
+
+	if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0)
+		return NULL;
+	return al;
+}
+
+static struct thread *get_thread(struct dlfilter *d)
+{
+	struct addr_location *al = get_al(d);
+
+	return al ? al->thread : NULL;
+}
+
+static const struct perf_dlfilter_al *dlfilter__resolve_ip(void *ctx)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+	struct perf_dlfilter_al *d_al = d->d_ip_al;
+	struct addr_location *al;
+
+	if (!d->ctx_valid)
+		return NULL;
+
+	/* 'size' is also used to indicate already initialized */
+	if (d_al->size)
+		return d_al;
+
+	al = get_al(d);
+	if (!al)
+		return NULL;
+
+	al_to_d_al(al, d_al);
+
+	d_al->is_kernel_ip = machine__kernel_ip(d->machine, d->sample->ip);
+	d_al->comm = al->thread ? thread__comm_str(al->thread) : ":-1";
+	d_al->filtered = al->filtered;
+
+	return d_al;
+}
+
+static const struct perf_dlfilter_al *dlfilter__resolve_addr(void *ctx)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+	struct perf_dlfilter_al *d_addr_al = d->d_addr_al;
+	struct addr_location *addr_al = d->addr_al;
+
+	if (!d->ctx_valid || !d->d_sample->addr_correlates_sym)
+		return NULL;
+
+	/* 'size' is also used to indicate already initialized */
+	if (d_addr_al->size)
+		return d_addr_al;
+
+	if (!addr_al->thread) {
+		struct thread *thread = get_thread(d);
+
+		if (!thread)
+			return NULL;
+		thread__resolve(thread, addr_al, d->sample);
+	}
+
+	al_to_d_al(addr_al, d_addr_al);
+
+	d_addr_al->is_kernel_ip = machine__kernel_ip(d->machine, d->sample->addr);
+
+	return d_addr_al;
+}
+
+static char **dlfilter__args(void *ctx, int *dlargc)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+
+	if (dlargc)
+		*dlargc = 0;
+	else
+		return NULL;
+
+	if (!d->ctx_valid && !d->in_start && !d->in_stop)
+		return NULL;
+
+	*dlargc = d->dlargc;
+	return d->dlargv;
+}
+
+static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+	struct perf_dlfilter_al d_al;
+	struct addr_location al;
+	struct thread *thread;
+	__u32 sz;
+
+	if (!d->ctx_valid || !d_al_p)
+		return -1;
+
+	thread = get_thread(d);
+	if (!thread)
+		return -1;
+
+	thread__find_symbol_fb(thread, d->sample->cpumode, address, &al);
+
+	al_to_d_al(&al, &d_al);
+
+	d_al.is_kernel_ip = machine__kernel_ip(d->machine, address);
+
+	sz = d_al_p->size;
+	memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al)));
+	d_al_p->size = sz;
+
+	return 0;
+}
+
+static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+
+	if (!len)
+		return NULL;
+
+	*len = 0;
+
+	if (!d->ctx_valid)
+		return NULL;
+
+	if (d->sample->ip && !d->sample->insn_len) {
+		struct addr_location *al = d->al;
+
+		if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0)
+			return NULL;
+
+		if (al->thread->maps && al->thread->maps->machine)
+			script_fetch_insn(d->sample, al->thread, al->thread->maps->machine);
+	}
+
+	if (!d->sample->insn_len)
+		return NULL;
+
+	*len = d->sample->insn_len;
+
+	return (__u8 *)d->sample->insn;
+}
+
+static const char *dlfilter__srcline(void *ctx, __u32 *line_no)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+	struct addr_location *al;
+	unsigned int line = 0;
+	char *srcfile = NULL;
+	struct map *map;
+	u64 addr;
+
+	if (!d->ctx_valid || !line_no)
+		return NULL;
+
+	al = get_al(d);
+	if (!al)
+		return NULL;
+
+	map = al->map;
+	addr = al->addr;
+
+	if (map && map->dso)
+		srcfile = get_srcline_split(map->dso, map__rip_2objdump(map, addr), &line);
+
+	*line_no = line;
+	return srcfile;
+}
+
+static struct perf_event_attr *dlfilter__attr(void *ctx)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+
+	if (!d->ctx_valid)
+		return NULL;
+
+	return &d->evsel->core.attr;
+}
+
+static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len)
+{
+	struct dlfilter *d = (struct dlfilter *)ctx;
+	struct addr_location *al;
+	struct addr_location a;
+	struct map *map;
+	u64 offset;
+
+	if (!d->ctx_valid)
+		return -1;
+
+	al = get_al(d);
+	if (!al)
+		return -1;
+
+	map = al->map;
+
+	if (map && ip >= map->start && ip < map->end &&
+	    machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip))
+		goto have_map;
+
+	thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a);
+	if (!a.map)
+		return -1;
+
+	map = a.map;
+have_map:
+	offset = map->map_ip(map, ip);
+	if (ip + len >= map->end)
+		len = map->end - ip;
+	return dso__data_read_offset(map->dso, d->machine, offset, buf, len);
+}
+
+static const struct perf_dlfilter_fns perf_dlfilter_fns = {
+	.resolve_ip      = dlfilter__resolve_ip,
+	.resolve_addr    = dlfilter__resolve_addr,
+	.args            = dlfilter__args,
+	.resolve_address = dlfilter__resolve_address,
+	.insn            = dlfilter__insn,
+	.srcline         = dlfilter__srcline,
+	.attr            = dlfilter__attr,
+	.object_code     = dlfilter__object_code,
+};
+
+static char *find_dlfilter(const char *file)
+{
+	char path[PATH_MAX];
+	char *exec_path;
+
+	if (strchr(file, '/'))
+		goto out;
+
+	if (!access(file, R_OK)) {
+		/*
+		 * Prepend "./" so that dlopen will find the file in the
+		 * current directory.
+		 */
+		snprintf(path, sizeof(path), "./%s", file);
+		file = path;
+		goto out;
+	}
+
+	exec_path = get_argv_exec_path();
+	if (!exec_path)
+		goto out;
+	snprintf(path, sizeof(path), "%s/dlfilters/%s", exec_path, file);
+	free(exec_path);
+	if (!access(path, R_OK))
+		file = path;
+out:
+	return strdup(file);
+}
+
+#define CHECK_FLAG(x) BUILD_BUG_ON((u64)PERF_DLFILTER_FLAG_ ## x != (u64)PERF_IP_FLAG_ ## x)
+
+static int dlfilter__init(struct dlfilter *d, const char *file, int dlargc, char **dlargv)
+{
+	CHECK_FLAG(BRANCH);
+	CHECK_FLAG(CALL);
+	CHECK_FLAG(RETURN);
+	CHECK_FLAG(CONDITIONAL);
+	CHECK_FLAG(SYSCALLRET);
+	CHECK_FLAG(ASYNC);
+	CHECK_FLAG(INTERRUPT);
+	CHECK_FLAG(TX_ABORT);
+	CHECK_FLAG(TRACE_BEGIN);
+	CHECK_FLAG(TRACE_END);
+	CHECK_FLAG(IN_TX);
+	CHECK_FLAG(VMENTRY);
+	CHECK_FLAG(VMEXIT);
+
+	memset(d, 0, sizeof(*d));
+	d->file = find_dlfilter(file);
+	if (!d->file)
+		return -1;
+	d->dlargc = dlargc;
+	d->dlargv = dlargv;
+	return 0;
+}
+
+static void dlfilter__exit(struct dlfilter *d)
+{
+	zfree(&d->file);
+}
+
+static int dlfilter__open(struct dlfilter *d)
+{
+	d->handle = dlopen(d->file, RTLD_NOW);
+	if (!d->handle) {
+		pr_err("dlopen failed for: '%s'\n", d->file);
+		return -1;
+	}
+	d->start = dlsym(d->handle, "start");
+	d->filter_event = dlsym(d->handle, "filter_event");
+	d->filter_event_early = dlsym(d->handle, "filter_event_early");
+	d->stop = dlsym(d->handle, "stop");
+	d->fns = dlsym(d->handle, "perf_dlfilter_fns");
+	if (d->fns)
+		memcpy(d->fns, &perf_dlfilter_fns, sizeof(struct perf_dlfilter_fns));
+	return 0;
+}
+
+static int dlfilter__close(struct dlfilter *d)
+{
+	return dlclose(d->handle);
+}
+
+struct dlfilter *dlfilter__new(const char *file, int dlargc, char **dlargv)
+{
+	struct dlfilter *d = malloc(sizeof(*d));
+
+	if (!d)
+		return NULL;
+
+	if (dlfilter__init(d, file, dlargc, dlargv))
+		goto err_free;
+
+	if (dlfilter__open(d))
+		goto err_exit;
+
+	return d;
+
+err_exit:
+	dlfilter__exit(d);
+err_free:
+	free(d);
+	return NULL;
+}
+
+static void dlfilter__free(struct dlfilter *d)
+{
+	if (d) {
+		dlfilter__exit(d);
+		free(d);
+	}
+}
+
+int dlfilter__start(struct dlfilter *d, struct perf_session *session)
+{
+	if (d) {
+		d->session = session;
+		if (d->start) {
+			int ret;
+
+			d->in_start = true;
+			ret = d->start(&d->data, d);
+			d->in_start = false;
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int dlfilter__stop(struct dlfilter *d)
+{
+	if (d && d->stop) {
+		int ret;
+
+		d->in_stop = true;
+		ret = d->stop(d->data, d);
+		d->in_stop = false;
+		return ret;
+	}
+	return 0;
+}
+
+void dlfilter__cleanup(struct dlfilter *d)
+{
+	if (d) {
+		dlfilter__stop(d);
+		dlfilter__close(d);
+		dlfilter__free(d);
+	}
+}
+
+#define ASSIGN(x) d_sample.x = sample->x
+
+int dlfilter__do_filter_event(struct dlfilter *d,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct evsel *evsel,
+			      struct machine *machine,
+			      struct addr_location *al,
+			      struct addr_location *addr_al,
+			      bool early)
+{
+	struct perf_dlfilter_sample d_sample;
+	struct perf_dlfilter_al d_ip_al;
+	struct perf_dlfilter_al d_addr_al;
+	int ret;
+
+	d->event       = event;
+	d->sample      = sample;
+	d->evsel       = evsel;
+	d->machine     = machine;
+	d->al          = al;
+	d->addr_al     = addr_al;
+	d->d_sample    = &d_sample;
+	d->d_ip_al     = &d_ip_al;
+	d->d_addr_al   = &d_addr_al;
+
+	d_sample.size  = sizeof(d_sample);
+	d_ip_al.size   = 0; /* To indicate d_ip_al is not initialized */
+	d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */
+
+	ASSIGN(ip);
+	ASSIGN(pid);
+	ASSIGN(tid);
+	ASSIGN(time);
+	ASSIGN(addr);
+	ASSIGN(id);
+	ASSIGN(stream_id);
+	ASSIGN(period);
+	ASSIGN(weight);
+	ASSIGN(ins_lat);
+	ASSIGN(p_stage_cyc);
+	ASSIGN(transaction);
+	ASSIGN(insn_cnt);
+	ASSIGN(cyc_cnt);
+	ASSIGN(cpu);
+	ASSIGN(flags);
+	ASSIGN(data_src);
+	ASSIGN(phys_addr);
+	ASSIGN(data_page_size);
+	ASSIGN(code_page_size);
+	ASSIGN(cgroup);
+	ASSIGN(cpumode);
+	ASSIGN(misc);
+	ASSIGN(raw_size);
+	ASSIGN(raw_data);
+	ASSIGN(machine_pid);
+	ASSIGN(vcpu);
+
+	if (sample->branch_stack) {
+		d_sample.brstack_nr = sample->branch_stack->nr;
+		d_sample.brstack = (struct perf_branch_entry *)perf_sample__branch_entries(sample);
+	} else {
+		d_sample.brstack_nr = 0;
+		d_sample.brstack = NULL;
+	}
+
+	if (sample->callchain) {
+		d_sample.raw_callchain_nr = sample->callchain->nr;
+		d_sample.raw_callchain = (__u64 *)sample->callchain->ips;
+	} else {
+		d_sample.raw_callchain_nr = 0;
+		d_sample.raw_callchain = NULL;
+	}
+
+	d_sample.addr_correlates_sym =
+		(evsel->core.attr.sample_type & PERF_SAMPLE_ADDR) &&
+		sample_addr_correlates_sym(&evsel->core.attr);
+
+	d_sample.event = evsel__name(evsel);
+
+	d->ctx_valid = true;
+
+	if (early)
+		ret = d->filter_event_early(d->data, &d_sample, d);
+	else
+		ret = d->filter_event(d->data, &d_sample, d);
+
+	d->ctx_valid = false;
+
+	return ret;
+}
+
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+		     char **long_desc)
+{
+	char path[PATH_MAX];
+	void *handle;
+	const char *(*desc_fn)(const char **long_description);
+
+	snprintf(path, sizeof(path), "%s/%s", dirname, name);
+	handle = dlopen(path, RTLD_NOW);
+	if (!handle || !(dlsym(handle, "filter_event") || dlsym(handle, "filter_event_early")))
+		return false;
+	desc_fn = dlsym(handle, "filter_description");
+	if (desc_fn) {
+		const char *dsc;
+		const char *long_dsc;
+
+		dsc = desc_fn(&long_dsc);
+		if (dsc)
+			*desc = strdup(dsc);
+		if (long_dsc)
+			*long_desc = strdup(long_dsc);
+	}
+	dlclose(handle);
+	return true;
+}
+
+static void list_filters(const char *dirname)
+{
+	struct dirent *entry;
+	DIR *dir;
+
+	dir = opendir(dirname);
+	if (!dir)
+		return;
+
+	while ((entry = readdir(dir)) != NULL)
+	{
+		size_t n = strlen(entry->d_name);
+		char *long_desc = NULL;
+		char *desc = NULL;
+
+		if (entry->d_type == DT_DIR || n < 4 ||
+		    strcmp(".so", entry->d_name + n - 3))
+			continue;
+		if (!get_filter_desc(dirname, entry->d_name, &desc, &long_desc))
+			continue;
+		printf("  %-36s %s\n", entry->d_name, desc ? desc : "");
+		if (verbose) {
+			char *p = long_desc;
+			char *line;
+
+			while ((line = strsep(&p, "\n")) != NULL)
+				printf("%39s%s\n", "", line);
+		}
+		free(long_desc);
+		free(desc);
+	}
+
+	closedir(dir);
+}
+
+int list_available_dlfilters(const struct option *opt __maybe_unused,
+			     const char *s __maybe_unused,
+			     int unset __maybe_unused)
+{
+	char path[PATH_MAX];
+	char *exec_path;
+
+	printf("List of available dlfilters:\n");
+
+	list_filters(".");
+
+	exec_path = get_argv_exec_path();
+	if (!exec_path)
+		goto out;
+	snprintf(path, sizeof(path), "%s/dlfilters", exec_path);
+
+	list_filters(path);
+
+	free(exec_path);
+out:
+	exit(0);
+}
diff --git a/tools/perf/util/dlfilter.h b/tools/perf/util/dlfilter.h
new file mode 100644
index 000000000..cc4bb9657
--- /dev/null
+++ b/tools/perf/util/dlfilter.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * dlfilter.h: Interface to perf script --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+
+#ifndef PERF_UTIL_DLFILTER_H
+#define PERF_UTIL_DLFILTER_H
+
+struct perf_session;
+union  perf_event;
+struct perf_sample;
+struct evsel;
+struct machine;
+struct addr_location;
+struct perf_dlfilter_fns;
+struct perf_dlfilter_sample;
+struct perf_dlfilter_al;
+
+struct dlfilter {
+	char				*file;
+	void				*handle;
+	void				*data;
+	struct perf_session		*session;
+	bool				ctx_valid;
+	bool				in_start;
+	bool				in_stop;
+	int				dlargc;
+	char				**dlargv;
+
+	union perf_event		*event;
+	struct perf_sample		*sample;
+	struct evsel			*evsel;
+	struct machine			*machine;
+	struct addr_location		*al;
+	struct addr_location		*addr_al;
+	struct perf_dlfilter_sample	*d_sample;
+	struct perf_dlfilter_al		*d_ip_al;
+	struct perf_dlfilter_al		*d_addr_al;
+
+	int (*start)(void **data, void *ctx);
+	int (*stop)(void *data, void *ctx);
+
+	int (*filter_event)(void *data,
+			    const struct perf_dlfilter_sample *sample,
+			    void *ctx);
+	int (*filter_event_early)(void *data,
+				  const struct perf_dlfilter_sample *sample,
+				  void *ctx);
+
+	struct perf_dlfilter_fns *fns;
+};
+
+struct dlfilter *dlfilter__new(const char *file, int dlargc, char **dlargv);
+
+int dlfilter__start(struct dlfilter *d, struct perf_session *session);
+
+int dlfilter__do_filter_event(struct dlfilter *d,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct evsel *evsel,
+			      struct machine *machine,
+			      struct addr_location *al,
+			      struct addr_location *addr_al,
+			      bool early);
+
+void dlfilter__cleanup(struct dlfilter *d);
+
+static inline int dlfilter__filter_event(struct dlfilter *d,
+					 union perf_event *event,
+					 struct perf_sample *sample,
+					 struct evsel *evsel,
+					 struct machine *machine,
+					 struct addr_location *al,
+					 struct addr_location *addr_al)
+{
+	if (!d || !d->filter_event)
+		return 0;
+	return dlfilter__do_filter_event(d, event, sample, evsel, machine, al, addr_al, false);
+}
+
+static inline int dlfilter__filter_event_early(struct dlfilter *d,
+					       union perf_event *event,
+					       struct perf_sample *sample,
+					       struct evsel *evsel,
+					       struct machine *machine,
+					       struct addr_location *al,
+					       struct addr_location *addr_al)
+{
+	if (!d || !d->filter_event_early)
+		return 0;
+	return dlfilter__do_filter_event(d, event, sample, evsel, machine, al, addr_al, true);
+}
+
+int list_available_dlfilters(const struct option *opt, const char *s, int unset);
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+		     char **long_desc);
+
+#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
new file mode 100644
index 000000000..f1a14c0ad
--- /dev/null
+++ b/tools/perf/util/dso.c
@@ -0,0 +1,1485 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/libbpf.h>
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#endif
+#include "compress.h"
+#include "env.h"
+#include "namespaces.h"
+#include "path.h"
+#include "map.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dso.h"
+#include "dsos.h"
+#include "machine.h"
+#include "auxtrace.h"
+#include "util.h" /* O_CLOEXEC for older systems */
+#include "debug.h"
+#include "string2.h"
+#include "vdso.h"
+
+static const char * const debuglink_paths[] = {
+	"%.0s%s",
+	"%s/%s",
+	"%s/.debug/%s",
+	"/usr/lib/debug%s/%s"
+};
+
+char dso__symtab_origin(const struct dso *dso)
+{
+	static const char origin[] = {
+		[DSO_BINARY_TYPE__KALLSYMS]			= 'k',
+		[DSO_BINARY_TYPE__VMLINUX]			= 'v',
+		[DSO_BINARY_TYPE__JAVA_JIT]			= 'j',
+		[DSO_BINARY_TYPE__DEBUGLINK]			= 'l',
+		[DSO_BINARY_TYPE__BUILD_ID_CACHE]		= 'B',
+		[DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO]	= 'D',
+		[DSO_BINARY_TYPE__FEDORA_DEBUGINFO]		= 'f',
+		[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO]		= 'u',
+		[DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO]	= 'x',
+		[DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO]	= 'o',
+		[DSO_BINARY_TYPE__BUILDID_DEBUGINFO]		= 'b',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_DSO]		= 'd',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE]		= 'K',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP]	= 'm',
+		[DSO_BINARY_TYPE__GUEST_KALLSYMS]		= 'g',
+		[DSO_BINARY_TYPE__GUEST_KMODULE]		= 'G',
+		[DSO_BINARY_TYPE__GUEST_KMODULE_COMP]		= 'M',
+		[DSO_BINARY_TYPE__GUEST_VMLINUX]		= 'V',
+	};
+
+	if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND)
+		return '!';
+	return origin[dso->symtab_type];
+}
+
+int dso__read_binary_type_filename(const struct dso *dso,
+				   enum dso_binary_type type,
+				   char *root_dir, char *filename, size_t size)
+{
+	char build_id_hex[SBUILD_ID_SIZE];
+	int ret = 0;
+	size_t len;
+
+	switch (type) {
+	case DSO_BINARY_TYPE__DEBUGLINK:
+	{
+		const char *last_slash;
+		char dso_dir[PATH_MAX];
+		char symfile[PATH_MAX];
+		unsigned int i;
+
+		len = __symbol__join_symfs(filename, size, dso->long_name);
+		last_slash = filename + len;
+		while (last_slash != filename && *last_slash != '/')
+			last_slash--;
+
+		strncpy(dso_dir, filename, last_slash - filename);
+		dso_dir[last_slash-filename] = '\0';
+
+		if (!is_regular_file(filename)) {
+			ret = -1;
+			break;
+		}
+
+		ret = filename__read_debuglink(filename, symfile, PATH_MAX);
+		if (ret)
+			break;
+
+		/* Check predefined locations where debug file might reside */
+		ret = -1;
+		for (i = 0; i < ARRAY_SIZE(debuglink_paths); i++) {
+			snprintf(filename, size,
+					debuglink_paths[i], dso_dir, symfile);
+			if (is_regular_file(filename)) {
+				ret = 0;
+				break;
+			}
+		}
+
+		break;
+	}
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+		if (dso__build_id_filename(dso, filename, size, false) == NULL)
+			ret = -1;
+		break;
+
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+		if (dso__build_id_filename(dso, filename, size, true) == NULL)
+			ret = -1;
+		break;
+
+	case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+		snprintf(filename + len, size - len, "%s.debug", dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+		snprintf(filename + len, size - len, "%s", dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+		/*
+		 * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in
+		 * /usr/lib/debug/lib when it is expected to be in
+		 * /usr/lib/debug/usr/lib
+		 */
+		if (strlen(dso->long_name) < 9 ||
+		    strncmp(dso->long_name, "/usr/lib/", 9)) {
+			ret = -1;
+			break;
+		}
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+		snprintf(filename + len, size - len, "%s", dso->long_name + 4);
+		break;
+
+	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+	{
+		const char *last_slash;
+		size_t dir_size;
+
+		last_slash = dso->long_name + dso->long_name_len;
+		while (last_slash != dso->long_name && *last_slash != '/')
+			last_slash--;
+
+		len = __symbol__join_symfs(filename, size, "");
+		dir_size = last_slash - dso->long_name + 2;
+		if (dir_size > (size - len)) {
+			ret = -1;
+			break;
+		}
+		len += scnprintf(filename + len, dir_size, "%s",  dso->long_name);
+		len += scnprintf(filename + len , size - len, ".debug%s",
+								last_slash);
+		break;
+	}
+
+	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+		if (!dso->has_build_id) {
+			ret = -1;
+			break;
+		}
+
+		build_id__sprintf(&dso->bid, build_id_hex);
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/");
+		snprintf(filename + len, size - len, "%.2s/%s.debug",
+			 build_id_hex, build_id_hex + 2);
+		break;
+
+	case DSO_BINARY_TYPE__VMLINUX:
+	case DSO_BINARY_TYPE__GUEST_VMLINUX:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+		__symbol__join_symfs(filename, size, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__GUEST_KMODULE:
+	case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+		path__join3(filename, size, symbol_conf.symfs,
+			    root_dir, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+		__symbol__join_symfs(filename, size, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__KCORE:
+	case DSO_BINARY_TYPE__GUEST_KCORE:
+		snprintf(filename, size, "%s", dso->long_name);
+		break;
+
+	default:
+	case DSO_BINARY_TYPE__KALLSYMS:
+	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+	case DSO_BINARY_TYPE__JAVA_JIT:
+	case DSO_BINARY_TYPE__BPF_PROG_INFO:
+	case DSO_BINARY_TYPE__BPF_IMAGE:
+	case DSO_BINARY_TYPE__OOL:
+	case DSO_BINARY_TYPE__NOT_FOUND:
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+enum {
+	COMP_ID__NONE = 0,
+};
+
+static const struct {
+	const char *fmt;
+	int (*decompress)(const char *input, int output);
+	bool (*is_compressed)(const char *input);
+} compressions[] = {
+	[COMP_ID__NONE] = { .fmt = NULL, },
+#ifdef HAVE_ZLIB_SUPPORT
+	{ "gz", gzip_decompress_to_file, gzip_is_compressed },
+#endif
+#ifdef HAVE_LZMA_SUPPORT
+	{ "xz", lzma_decompress_to_file, lzma_is_compressed },
+#endif
+	{ NULL, NULL, NULL },
+};
+
+static int is_supported_compression(const char *ext)
+{
+	unsigned i;
+
+	for (i = 1; compressions[i].fmt; i++) {
+		if (!strcmp(ext, compressions[i].fmt))
+			return i;
+	}
+	return COMP_ID__NONE;
+}
+
+bool is_kernel_module(const char *pathname, int cpumode)
+{
+	struct kmod_path m;
+	int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	WARN_ONCE(mode != cpumode,
+		  "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
+		  cpumode);
+
+	switch (mode) {
+	case PERF_RECORD_MISC_USER:
+	case PERF_RECORD_MISC_HYPERVISOR:
+	case PERF_RECORD_MISC_GUEST_USER:
+		return false;
+	/* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
+	default:
+		if (kmod_path__parse(&m, pathname)) {
+			pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
+					pathname);
+			return true;
+		}
+	}
+
+	return m.kmod;
+}
+
+bool dso__needs_decompress(struct dso *dso)
+{
+	return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+}
+
+int filename__decompress(const char *name, char *pathname,
+			 size_t len, int comp, int *err)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd = -1;
+
+	/*
+	 * We have proper compression id for DSO and yet the file
+	 * behind the 'name' can still be plain uncompressed object.
+	 *
+	 * The reason is behind the logic we open the DSO object files,
+	 * when we try all possible 'debug' objects until we find the
+	 * data. So even if the DSO is represented by 'krava.xz' module,
+	 * we can end up here opening ~/.debug/....23432432/debug' file
+	 * which is not compressed.
+	 *
+	 * To keep this transparent, we detect this and return the file
+	 * descriptor to the uncompressed file.
+	 */
+	if (!compressions[comp].is_compressed(name))
+		return open(name, O_RDONLY);
+
+	fd = mkstemp(tmpbuf);
+	if (fd < 0) {
+		*err = errno;
+		return -1;
+	}
+
+	if (compressions[comp].decompress(name, fd)) {
+		*err = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+		close(fd);
+		fd = -1;
+	}
+
+	if (!pathname || (fd < 0))
+		unlink(tmpbuf);
+
+	if (pathname && (fd >= 0))
+		strlcpy(pathname, tmpbuf, len);
+
+	return fd;
+}
+
+static int decompress_kmodule(struct dso *dso, const char *name,
+			      char *pathname, size_t len)
+{
+	if (!dso__needs_decompress(dso))
+		return -1;
+
+	if (dso->comp == COMP_ID__NONE)
+		return -1;
+
+	return filename__decompress(name, pathname, len, dso->comp,
+				    &dso->load_errno);
+}
+
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
+{
+	return decompress_kmodule(dso, name, NULL, 0);
+}
+
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len)
+{
+	int fd = decompress_kmodule(dso, name, pathname, len);
+
+	close(fd);
+	return fd >= 0 ? 0 : -1;
+}
+
+/*
+ * Parses kernel module specified in @path and updates
+ * @m argument like:
+ *
+ *    @comp - true if @path contains supported compression suffix,
+ *            false otherwise
+ *    @kmod - true if @path contains '.ko' suffix in right position,
+ *            false otherwise
+ *    @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name
+ *            of the kernel module without suffixes, otherwise strudup-ed
+ *            base name of @path
+ *    @ext  - if (@alloc_ext && @comp) is true, it contains strdup-ed string
+ *            the compression suffix
+ *
+ * Returns 0 if there's no strdup error, -ENOMEM otherwise.
+ */
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+		       bool alloc_name)
+{
+	const char *name = strrchr(path, '/');
+	const char *ext  = strrchr(path, '.');
+	bool is_simple_name = false;
+
+	memset(m, 0x0, sizeof(*m));
+	name = name ? name + 1 : path;
+
+	/*
+	 * '.' is also a valid character for module name. For example:
+	 * [aaa.bbb] is a valid module name. '[' should have higher
+	 * priority than '.ko' suffix.
+	 *
+	 * The kernel names are from machine__mmap_name. Such
+	 * name should belong to kernel itself, not kernel module.
+	 */
+	if (name[0] == '[') {
+		is_simple_name = true;
+		if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
+		    (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
+		    (strncmp(name, "[vdso]", 6) == 0) ||
+		    (strncmp(name, "[vdso32]", 8) == 0) ||
+		    (strncmp(name, "[vdsox32]", 9) == 0) ||
+		    (strncmp(name, "[vsyscall]", 10) == 0)) {
+			m->kmod = false;
+
+		} else
+			m->kmod = true;
+	}
+
+	/* No extension, just return name. */
+	if ((ext == NULL) || is_simple_name) {
+		if (alloc_name) {
+			m->name = strdup(name);
+			return m->name ? 0 : -ENOMEM;
+		}
+		return 0;
+	}
+
+	m->comp = is_supported_compression(ext + 1);
+	if (m->comp > COMP_ID__NONE)
+		ext -= 3;
+
+	/* Check .ko extension only if there's enough name left. */
+	if (ext > name)
+		m->kmod = !strncmp(ext, ".ko", 3);
+
+	if (alloc_name) {
+		if (m->kmod) {
+			if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1)
+				return -ENOMEM;
+		} else {
+			if (asprintf(&m->name, "%s", name) == -1)
+				return -ENOMEM;
+		}
+
+		strreplace(m->name, '-', '_');
+	}
+
+	return 0;
+}
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine)
+{
+	if (machine__is_host(machine))
+		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+	/* _KMODULE_COMP should be next to _KMODULE */
+	if (m->kmod && m->comp) {
+		dso->symtab_type++;
+		dso->comp = m->comp;
+	}
+
+	dso__set_short_name(dso, strdup(m->name), true);
+}
+
+/*
+ * Global list of open DSOs and the counter.
+ */
+static LIST_HEAD(dso__data_open);
+static long dso__data_open_cnt;
+static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void dso__list_add(struct dso *dso)
+{
+	list_add_tail(&dso->data.open_entry, &dso__data_open);
+	dso__data_open_cnt++;
+}
+
+static void dso__list_del(struct dso *dso)
+{
+	list_del_init(&dso->data.open_entry);
+	WARN_ONCE(dso__data_open_cnt <= 0,
+		  "DSO data fd counter out of bounds.");
+	dso__data_open_cnt--;
+}
+
+static void close_first_dso(void);
+
+static int do_open(char *name)
+{
+	int fd;
+	char sbuf[STRERR_BUFSIZE];
+
+	do {
+		fd = open(name, O_RDONLY|O_CLOEXEC);
+		if (fd >= 0)
+			return fd;
+
+		pr_debug("dso open failed: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		if (!dso__data_open_cnt || errno != EMFILE)
+			break;
+
+		close_first_dso();
+	} while (1);
+
+	return -1;
+}
+
+static int __open_dso(struct dso *dso, struct machine *machine)
+{
+	int fd = -EINVAL;
+	char *root_dir = (char *)"";
+	char *name = malloc(PATH_MAX);
+	bool decomp = false;
+
+	if (!name)
+		return -ENOMEM;
+
+	mutex_lock(&dso->lock);
+	if (machine)
+		root_dir = machine->root_dir;
+
+	if (dso__read_binary_type_filename(dso, dso->binary_type,
+					    root_dir, name, PATH_MAX))
+		goto out;
+
+	if (!is_regular_file(name)) {
+		char *new_name;
+
+		if (errno != ENOENT || dso->nsinfo == NULL)
+			goto out;
+
+		new_name = filename_with_chroot(dso->nsinfo->pid, name);
+		if (!new_name)
+			goto out;
+
+		free(name);
+		name = new_name;
+	}
+
+	if (dso__needs_decompress(dso)) {
+		char newpath[KMOD_DECOMP_LEN];
+		size_t len = sizeof(newpath);
+
+		if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) {
+			fd = -dso->load_errno;
+			goto out;
+		}
+
+		decomp = true;
+		strcpy(name, newpath);
+	}
+
+	fd = do_open(name);
+
+	if (decomp)
+		unlink(name);
+
+out:
+	mutex_unlock(&dso->lock);
+	free(name);
+	return fd;
+}
+
+static void check_data_close(void);
+
+/**
+ * dso_close - Open DSO data file
+ * @dso: dso object
+ *
+ * Open @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static int open_dso(struct dso *dso, struct machine *machine)
+{
+	int fd;
+	struct nscookie nsc;
+
+	if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) {
+		mutex_lock(&dso->lock);
+		nsinfo__mountns_enter(dso->nsinfo, &nsc);
+		mutex_unlock(&dso->lock);
+	}
+	fd = __open_dso(dso, machine);
+	if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+		nsinfo__mountns_exit(&nsc);
+
+	if (fd >= 0) {
+		dso__list_add(dso);
+		/*
+		 * Check if we crossed the allowed number
+		 * of opened DSOs and close one if needed.
+		 */
+		check_data_close();
+	}
+
+	return fd;
+}
+
+static void close_data_fd(struct dso *dso)
+{
+	if (dso->data.fd >= 0) {
+		close(dso->data.fd);
+		dso->data.fd = -1;
+		dso->data.file_size = 0;
+		dso__list_del(dso);
+	}
+}
+
+/**
+ * dso_close - Close DSO data file
+ * @dso: dso object
+ *
+ * Close @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static void close_dso(struct dso *dso)
+{
+	close_data_fd(dso);
+}
+
+static void close_first_dso(void)
+{
+	struct dso *dso;
+
+	dso = list_first_entry(&dso__data_open, struct dso, data.open_entry);
+	close_dso(dso);
+}
+
+static rlim_t get_fd_limit(void)
+{
+	struct rlimit l;
+	rlim_t limit = 0;
+
+	/* Allow half of the current open fd limit. */
+	if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+		if (l.rlim_cur == RLIM_INFINITY)
+			limit = l.rlim_cur;
+		else
+			limit = l.rlim_cur / 2;
+	} else {
+		pr_err("failed to get fd limit\n");
+		limit = 1;
+	}
+
+	return limit;
+}
+
+static rlim_t fd_limit;
+
+/*
+ * Used only by tests/dso-data.c to reset the environment
+ * for tests. I dont expect we should change this during
+ * standard runtime.
+ */
+void reset_fd_limit(void)
+{
+	fd_limit = 0;
+}
+
+static bool may_cache_fd(void)
+{
+	if (!fd_limit)
+		fd_limit = get_fd_limit();
+
+	if (fd_limit == RLIM_INFINITY)
+		return true;
+
+	return fd_limit > (rlim_t) dso__data_open_cnt;
+}
+
+/*
+ * Check and close LRU dso if we crossed allowed limit
+ * for opened dso file descriptors. The limit is half
+ * of the RLIMIT_NOFILE files opened.
+*/
+static void check_data_close(void)
+{
+	bool cache_fd = may_cache_fd();
+
+	if (!cache_fd)
+		close_first_dso();
+}
+
+/**
+ * dso__data_close - Close DSO data file
+ * @dso: dso object
+ *
+ * External interface to close @dso's data file descriptor.
+ */
+void dso__data_close(struct dso *dso)
+{
+	pthread_mutex_lock(&dso__data_open_lock);
+	close_dso(dso);
+	pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+static void try_to_open_dso(struct dso *dso, struct machine *machine)
+{
+	enum dso_binary_type binary_type_data[] = {
+		DSO_BINARY_TYPE__BUILD_ID_CACHE,
+		DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+		DSO_BINARY_TYPE__NOT_FOUND,
+	};
+	int i = 0;
+
+	if (dso->data.fd >= 0)
+		return;
+
+	if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
+		dso->data.fd = open_dso(dso, machine);
+		goto out;
+	}
+
+	do {
+		dso->binary_type = binary_type_data[i++];
+
+		dso->data.fd = open_dso(dso, machine);
+		if (dso->data.fd >= 0)
+			goto out;
+
+	} while (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND);
+out:
+	if (dso->data.fd >= 0)
+		dso->data.status = DSO_DATA_STATUS_OK;
+	else
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+}
+
+/**
+ * dso__data_get_fd - Get dso's data file descriptor
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * External interface to find dso's file, open it and
+ * returns file descriptor.  It should be paired with
+ * dso__data_put_fd() if it returns non-negative value.
+ */
+int dso__data_get_fd(struct dso *dso, struct machine *machine)
+{
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
+	if (pthread_mutex_lock(&dso__data_open_lock) < 0)
+		return -1;
+
+	try_to_open_dso(dso, machine);
+
+	if (dso->data.fd < 0)
+		pthread_mutex_unlock(&dso__data_open_lock);
+
+	return dso->data.fd;
+}
+
+void dso__data_put_fd(struct dso *dso __maybe_unused)
+{
+	pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
+{
+	u32 flag = 1 << by;
+
+	if (dso->data.status_seen & flag)
+		return true;
+
+	dso->data.status_seen |= flag;
+
+	return false;
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
+{
+	struct bpf_prog_info_node *node;
+	ssize_t size = DSO__DATA_CACHE_SIZE;
+	u64 len;
+	u8 *buf;
+
+	node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+	if (!node || !node->info_linear) {
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		return -1;
+	}
+
+	len = node->info_linear->info.jited_prog_len;
+	buf = (u8 *)(uintptr_t)node->info_linear->info.jited_prog_insns;
+
+	if (offset >= len)
+		return -1;
+
+	size = (ssize_t)min(len - offset, (u64)size);
+	memcpy(data, buf + offset, size);
+	return size;
+}
+
+static int bpf_size(struct dso *dso)
+{
+	struct bpf_prog_info_node *node;
+
+	node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+	if (!node || !node->info_linear) {
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		return -1;
+	}
+
+	dso->data.file_size = node->info_linear->info.jited_prog_len;
+	return 0;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static void
+dso_cache__free(struct dso *dso)
+{
+	struct rb_root *root = &dso->data.cache;
+	struct rb_node *next = rb_first(root);
+
+	mutex_lock(&dso->lock);
+	while (next) {
+		struct dso_cache *cache;
+
+		cache = rb_entry(next, struct dso_cache, rb_node);
+		next = rb_next(&cache->rb_node);
+		rb_erase(&cache->rb_node, root);
+		free(cache);
+	}
+	mutex_unlock(&dso->lock);
+}
+
+static struct dso_cache *__dso_cache__find(struct dso *dso, u64 offset)
+{
+	const struct rb_root *root = &dso->data.cache;
+	struct rb_node * const *p = &root->rb_node;
+	const struct rb_node *parent = NULL;
+	struct dso_cache *cache;
+
+	while (*p != NULL) {
+		u64 end;
+
+		parent = *p;
+		cache = rb_entry(parent, struct dso_cache, rb_node);
+		end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+		if (offset < cache->offset)
+			p = &(*p)->rb_left;
+		else if (offset >= end)
+			p = &(*p)->rb_right;
+		else
+			return cache;
+	}
+
+	return NULL;
+}
+
+static struct dso_cache *
+dso_cache__insert(struct dso *dso, struct dso_cache *new)
+{
+	struct rb_root *root = &dso->data.cache;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct dso_cache *cache;
+	u64 offset = new->offset;
+
+	mutex_lock(&dso->lock);
+	while (*p != NULL) {
+		u64 end;
+
+		parent = *p;
+		cache = rb_entry(parent, struct dso_cache, rb_node);
+		end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+		if (offset < cache->offset)
+			p = &(*p)->rb_left;
+		else if (offset >= end)
+			p = &(*p)->rb_right;
+		else
+			goto out;
+	}
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+
+	cache = NULL;
+out:
+	mutex_unlock(&dso->lock);
+	return cache;
+}
+
+static ssize_t dso_cache__memcpy(struct dso_cache *cache, u64 offset, u8 *data,
+				 u64 size, bool out)
+{
+	u64 cache_offset = offset - cache->offset;
+	u64 cache_size   = min(cache->size - cache_offset, size);
+
+	if (out)
+		memcpy(data, cache->data + cache_offset, cache_size);
+	else
+		memcpy(cache->data + cache_offset, data, cache_size);
+	return cache_size;
+}
+
+static ssize_t file_read(struct dso *dso, struct machine *machine,
+			 u64 offset, char *data)
+{
+	ssize_t ret;
+
+	pthread_mutex_lock(&dso__data_open_lock);
+
+	/*
+	 * dso->data.fd might be closed if other thread opened another
+	 * file (dso) due to open file limit (RLIMIT_NOFILE).
+	 */
+	try_to_open_dso(dso, machine);
+
+	if (dso->data.fd < 0) {
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		ret = -errno;
+		goto out;
+	}
+
+	ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset);
+out:
+	pthread_mutex_unlock(&dso__data_open_lock);
+	return ret;
+}
+
+static struct dso_cache *dso_cache__populate(struct dso *dso,
+					     struct machine *machine,
+					     u64 offset, ssize_t *ret)
+{
+	u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
+	struct dso_cache *cache;
+	struct dso_cache *old;
+
+	cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+	if (!cache) {
+		*ret = -ENOMEM;
+		return NULL;
+	}
+#ifdef HAVE_LIBBPF_SUPPORT
+	if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+		*ret = bpf_read(dso, cache_offset, cache->data);
+	else
+#endif
+	if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+		*ret = DSO__DATA_CACHE_SIZE;
+	else
+		*ret = file_read(dso, machine, cache_offset, cache->data);
+
+	if (*ret <= 0) {
+		free(cache);
+		return NULL;
+	}
+
+	cache->offset = cache_offset;
+	cache->size   = *ret;
+
+	old = dso_cache__insert(dso, cache);
+	if (old) {
+		/* we lose the race */
+		free(cache);
+		cache = old;
+	}
+
+	return cache;
+}
+
+static struct dso_cache *dso_cache__find(struct dso *dso,
+					 struct machine *machine,
+					 u64 offset,
+					 ssize_t *ret)
+{
+	struct dso_cache *cache = __dso_cache__find(dso, offset);
+
+	return cache ? cache : dso_cache__populate(dso, machine, offset, ret);
+}
+
+static ssize_t dso_cache_io(struct dso *dso, struct machine *machine,
+			    u64 offset, u8 *data, ssize_t size, bool out)
+{
+	struct dso_cache *cache;
+	ssize_t ret = 0;
+
+	cache = dso_cache__find(dso, machine, offset, &ret);
+	if (!cache)
+		return ret;
+
+	return dso_cache__memcpy(cache, offset, data, size, out);
+}
+
+/*
+ * Reads and caches dso data DSO__DATA_CACHE_SIZE size chunks
+ * in the rb_tree. Any read to already cached data is served
+ * by cached data. Writes update the cache only, not the backing file.
+ */
+static ssize_t cached_io(struct dso *dso, struct machine *machine,
+			 u64 offset, u8 *data, ssize_t size, bool out)
+{
+	ssize_t r = 0;
+	u8 *p = data;
+
+	do {
+		ssize_t ret;
+
+		ret = dso_cache_io(dso, machine, offset, p, size, out);
+		if (ret < 0)
+			return ret;
+
+		/* Reached EOF, return what we have. */
+		if (!ret)
+			break;
+
+		BUG_ON(ret > size);
+
+		r      += ret;
+		p      += ret;
+		offset += ret;
+		size   -= ret;
+
+	} while (size);
+
+	return r;
+}
+
+static int file_size(struct dso *dso, struct machine *machine)
+{
+	int ret = 0;
+	struct stat st;
+	char sbuf[STRERR_BUFSIZE];
+
+	pthread_mutex_lock(&dso__data_open_lock);
+
+	/*
+	 * dso->data.fd might be closed if other thread opened another
+	 * file (dso) due to open file limit (RLIMIT_NOFILE).
+	 */
+	try_to_open_dso(dso, machine);
+
+	if (dso->data.fd < 0) {
+		ret = -errno;
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		goto out;
+	}
+
+	if (fstat(dso->data.fd, &st) < 0) {
+		ret = -errno;
+		pr_err("dso cache fstat failed: %s\n",
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		goto out;
+	}
+	dso->data.file_size = st.st_size;
+
+out:
+	pthread_mutex_unlock(&dso__data_open_lock);
+	return ret;
+}
+
+int dso__data_file_size(struct dso *dso, struct machine *machine)
+{
+	if (dso->data.file_size)
+		return 0;
+
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+#ifdef HAVE_LIBBPF_SUPPORT
+	if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+		return bpf_size(dso);
+#endif
+	return file_size(dso, machine);
+}
+
+/**
+ * dso__data_size - Return dso data size
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * Return: dso data size
+ */
+off_t dso__data_size(struct dso *dso, struct machine *machine)
+{
+	if (dso__data_file_size(dso, machine))
+		return -1;
+
+	/* For now just estimate dso data size is close to file size */
+	return dso->data.file_size;
+}
+
+static ssize_t data_read_write_offset(struct dso *dso, struct machine *machine,
+				      u64 offset, u8 *data, ssize_t size,
+				      bool out)
+{
+	if (dso__data_file_size(dso, machine))
+		return -1;
+
+	/* Check the offset sanity. */
+	if (offset > dso->data.file_size)
+		return -1;
+
+	if (offset + size < offset)
+		return -1;
+
+	return cached_io(dso, machine, offset, data, size, out);
+}
+
+/**
+ * dso__data_read_offset - Read data from dso file offset
+ * @dso: dso object
+ * @machine: machine object
+ * @offset: file offset
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso file offset. Open
+ * dso data file and use cached_read to get the data.
+ */
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size)
+{
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
+	return data_read_write_offset(dso, machine, offset, data, size, true);
+}
+
+/**
+ * dso__data_read_addr - Read data from dso address
+ * @dso: dso object
+ * @machine: machine object
+ * @add: virtual memory address
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso address.
+ */
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+			    struct machine *machine, u64 addr,
+			    u8 *data, ssize_t size)
+{
+	u64 offset = map->map_ip(map, addr);
+	return dso__data_read_offset(dso, machine, offset, data, size);
+}
+
+/**
+ * dso__data_write_cache_offs - Write data to dso data cache at file offset
+ * @dso: dso object
+ * @machine: machine object
+ * @offset: file offset
+ * @data: buffer to write
+ * @size: size of the @data buffer
+ *
+ * Write into the dso file data cache, but do not change the file itself.
+ */
+ssize_t dso__data_write_cache_offs(struct dso *dso, struct machine *machine,
+				   u64 offset, const u8 *data_in, ssize_t size)
+{
+	u8 *data = (u8 *)data_in; /* cast away const to use same fns for r/w */
+
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
+	return data_read_write_offset(dso, machine, offset, data, size, false);
+}
+
+/**
+ * dso__data_write_cache_addr - Write data to dso data cache at dso address
+ * @dso: dso object
+ * @machine: machine object
+ * @add: virtual memory address
+ * @data: buffer to write
+ * @size: size of the @data buffer
+ *
+ * External interface to write into the dso file data cache, but do not change
+ * the file itself.
+ */
+ssize_t dso__data_write_cache_addr(struct dso *dso, struct map *map,
+				   struct machine *machine, u64 addr,
+				   const u8 *data, ssize_t size)
+{
+	u64 offset = map->map_ip(map, addr);
+	return dso__data_write_cache_offs(dso, machine, offset, data, size);
+}
+
+struct map *dso__new_map(const char *name)
+{
+	struct map *map = NULL;
+	struct dso *dso = dso__new(name);
+
+	if (dso) {
+		map = map__new2(0, dso);
+		dso__put(dso);
+	}
+
+	return map;
+}
+
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+				    const char *short_name, int dso_type)
+{
+	/*
+	 * The kernel dso could be created by build_id processing.
+	 */
+	struct dso *dso = machine__findnew_dso(machine, name);
+
+	/*
+	 * We need to run this in all cases, since during the build_id
+	 * processing we had no idea this was the kernel dso.
+	 */
+	if (dso != NULL) {
+		dso__set_short_name(dso, short_name, false);
+		dso->kernel = dso_type;
+	}
+
+	return dso;
+}
+
+static void dso__set_long_name_id(struct dso *dso, const char *name, struct dso_id *id, bool name_allocated)
+{
+	struct rb_root *root = dso->root;
+
+	if (name == NULL)
+		return;
+
+	if (dso->long_name_allocated)
+		free((char *)dso->long_name);
+
+	if (root) {
+		rb_erase(&dso->rb_node, root);
+		/*
+		 * __dsos__findnew_link_by_longname_id() isn't guaranteed to
+		 * add it back, so a clean removal is required here.
+		 */
+		RB_CLEAR_NODE(&dso->rb_node);
+		dso->root = NULL;
+	}
+
+	dso->long_name		 = name;
+	dso->long_name_len	 = strlen(name);
+	dso->long_name_allocated = name_allocated;
+
+	if (root)
+		__dsos__findnew_link_by_longname_id(root, dso, NULL, id);
+}
+
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
+{
+	dso__set_long_name_id(dso, name, NULL, name_allocated);
+}
+
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
+{
+	if (name == NULL)
+		return;
+
+	if (dso->short_name_allocated)
+		free((char *)dso->short_name);
+
+	dso->short_name		  = name;
+	dso->short_name_len	  = strlen(name);
+	dso->short_name_allocated = name_allocated;
+}
+
+int dso__name_len(const struct dso *dso)
+{
+	if (!dso)
+		return strlen("[unknown]");
+	if (verbose > 0)
+		return dso->long_name_len;
+
+	return dso->short_name_len;
+}
+
+bool dso__loaded(const struct dso *dso)
+{
+	return dso->loaded;
+}
+
+bool dso__sorted_by_name(const struct dso *dso)
+{
+	return dso->sorted_by_name;
+}
+
+void dso__set_sorted_by_name(struct dso *dso)
+{
+	dso->sorted_by_name = true;
+}
+
+struct dso *dso__new_id(const char *name, struct dso_id *id)
+{
+	struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
+
+	if (dso != NULL) {
+		strcpy(dso->name, name);
+		if (id)
+			dso->id = *id;
+		dso__set_long_name_id(dso, dso->name, id, false);
+		dso__set_short_name(dso, dso->name, false);
+		dso->symbols = dso->symbol_names = RB_ROOT_CACHED;
+		dso->data.cache = RB_ROOT;
+		dso->inlined_nodes = RB_ROOT_CACHED;
+		dso->srclines = RB_ROOT_CACHED;
+		dso->data.fd = -1;
+		dso->data.status = DSO_DATA_STATUS_UNKNOWN;
+		dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
+		dso->binary_type = DSO_BINARY_TYPE__NOT_FOUND;
+		dso->is_64_bit = (sizeof(void *) == 8);
+		dso->loaded = 0;
+		dso->rel = 0;
+		dso->sorted_by_name = 0;
+		dso->has_build_id = 0;
+		dso->has_srcline = 1;
+		dso->a2l_fails = 1;
+		dso->kernel = DSO_SPACE__USER;
+		dso->needs_swap = DSO_SWAP__UNSET;
+		dso->comp = COMP_ID__NONE;
+		RB_CLEAR_NODE(&dso->rb_node);
+		dso->root = NULL;
+		INIT_LIST_HEAD(&dso->node);
+		INIT_LIST_HEAD(&dso->data.open_entry);
+		mutex_init(&dso->lock);
+		refcount_set(&dso->refcnt, 1);
+	}
+
+	return dso;
+}
+
+struct dso *dso__new(const char *name)
+{
+	return dso__new_id(name, NULL);
+}
+
+void dso__delete(struct dso *dso)
+{
+	if (!RB_EMPTY_NODE(&dso->rb_node))
+		pr_err("DSO %s is still in rbtree when being deleted!\n",
+		       dso->long_name);
+
+	/* free inlines first, as they reference symbols */
+	inlines__tree_delete(&dso->inlined_nodes);
+	srcline__tree_delete(&dso->srclines);
+	symbols__delete(&dso->symbols);
+
+	if (dso->short_name_allocated) {
+		zfree((char **)&dso->short_name);
+		dso->short_name_allocated = false;
+	}
+
+	if (dso->long_name_allocated) {
+		zfree((char **)&dso->long_name);
+		dso->long_name_allocated = false;
+	}
+
+	dso__data_close(dso);
+	auxtrace_cache__free(dso->auxtrace_cache);
+	dso_cache__free(dso);
+	dso__free_a2l(dso);
+	zfree(&dso->symsrc_filename);
+	nsinfo__zput(dso->nsinfo);
+	mutex_destroy(&dso->lock);
+	free(dso);
+}
+
+struct dso *dso__get(struct dso *dso)
+{
+	if (dso)
+		refcount_inc(&dso->refcnt);
+	return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+	if (dso && refcount_dec_and_test(&dso->refcnt))
+		dso__delete(dso);
+}
+
+void dso__set_build_id(struct dso *dso, struct build_id *bid)
+{
+	dso->bid = *bid;
+	dso->has_build_id = 1;
+}
+
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
+{
+	if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) {
+		/*
+		 * For the backward compatibility, it allows a build-id has
+		 * trailing zeros.
+		 */
+		return !memcmp(dso->bid.data, bid->data, bid->size) &&
+			!memchr_inv(&dso->bid.data[bid->size], 0,
+				    dso->bid.size - bid->size);
+	}
+
+	return dso->bid.size == bid->size &&
+	       memcmp(dso->bid.data, bid->data, dso->bid.size) == 0;
+}
+
+void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
+{
+	char path[PATH_MAX];
+
+	if (machine__is_default_guest(machine))
+		return;
+	sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
+	if (sysfs__read_build_id(path, &dso->bid) == 0)
+		dso->has_build_id = true;
+}
+
+int dso__kernel_module_get_build_id(struct dso *dso,
+				    const char *root_dir)
+{
+	char filename[PATH_MAX];
+	/*
+	 * kernel module short names are of the form "[module]" and
+	 * we need just "module" here.
+	 */
+	const char *name = dso->short_name + 1;
+
+	snprintf(filename, sizeof(filename),
+		 "%s/sys/module/%.*s/notes/.note.gnu.build-id",
+		 root_dir, (int)strlen(name) - 1, name);
+
+	if (sysfs__read_build_id(filename, &dso->bid) == 0)
+		dso->has_build_id = true;
+
+	return 0;
+}
+
+static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	build_id__sprintf(&dso->bid, sbuild_id);
+	return fprintf(fp, "%s", sbuild_id);
+}
+
+size_t dso__fprintf(struct dso *dso, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
+
+	if (dso->short_name != dso->long_name)
+		ret += fprintf(fp, "%s, ", dso->long_name);
+	ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
+	ret += dso__fprintf_buildid(dso, fp);
+	ret += fprintf(fp, ")\n");
+	for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		ret += symbol__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine)
+{
+	int fd;
+	enum dso_type type = DSO__TYPE_UNKNOWN;
+
+	fd = dso__data_get_fd(dso, machine);
+	if (fd >= 0) {
+		type = dso__type_fd(fd);
+		dso__data_put_fd(dso);
+	}
+
+	return type;
+}
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
+{
+	int idx, errnum = dso->load_errno;
+	/*
+	 * This must have a same ordering as the enum dso_load_errno.
+	 */
+	static const char *dso_load__error_str[] = {
+	"Internal tools/perf/ library error",
+	"Invalid ELF file",
+	"Can not read build id",
+	"Mismatching build id",
+	"Decompression failure",
+	};
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		const char *err = str_error_r(errnum, buf, buflen);
+
+		if (err != buf)
+			scnprintf(buf, buflen, "%s", err);
+
+		return 0;
+	}
+
+	if (errnum <  __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END)
+		return -1;
+
+	idx = errnum - __DSO_LOAD_ERRNO__START;
+	scnprintf(buf, buflen, "%s", dso_load__error_str[idx]);
+	return 0;
+}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
new file mode 100644
index 000000000..58d94175e
--- /dev/null
+++ b/tools/perf/util/dso.h
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DSO
+#define __PERF_DSO
+
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/bitops.h>
+#include "build-id.h"
+#include "mutex.h"
+
+struct machine;
+struct map;
+struct perf_env;
+
+#define DSO__NAME_KALLSYMS	"[kernel.kallsyms]"
+#define DSO__NAME_KCORE		"[kernel.kcore]"
+
+enum dso_binary_type {
+	DSO_BINARY_TYPE__KALLSYMS = 0,
+	DSO_BINARY_TYPE__GUEST_KALLSYMS,
+	DSO_BINARY_TYPE__VMLINUX,
+	DSO_BINARY_TYPE__GUEST_VMLINUX,
+	DSO_BINARY_TYPE__JAVA_JIT,
+	DSO_BINARY_TYPE__DEBUGLINK,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+	DSO_BINARY_TYPE__GUEST_KMODULE,
+	DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+	DSO_BINARY_TYPE__KCORE,
+	DSO_BINARY_TYPE__GUEST_KCORE,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__BPF_PROG_INFO,
+	DSO_BINARY_TYPE__BPF_IMAGE,
+	DSO_BINARY_TYPE__OOL,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+enum dso_space_type {
+	DSO_SPACE__USER = 0,
+	DSO_SPACE__KERNEL,
+	DSO_SPACE__KERNEL_GUEST
+};
+
+enum dso_swap_type {
+	DSO_SWAP__UNSET,
+	DSO_SWAP__NO,
+	DSO_SWAP__YES,
+};
+
+enum dso_data_status {
+	DSO_DATA_STATUS_ERROR	= -1,
+	DSO_DATA_STATUS_UNKNOWN	= 0,
+	DSO_DATA_STATUS_OK	= 1,
+};
+
+enum dso_data_status_seen {
+	DSO_DATA_STATUS_SEEN_ITRACE,
+};
+
+enum dso_type {
+	DSO__TYPE_UNKNOWN,
+	DSO__TYPE_64BIT,
+	DSO__TYPE_32BIT,
+	DSO__TYPE_X32BIT,
+};
+
+enum dso_load_errno {
+	DSO_LOAD_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__DSO_LOAD_ERRNO__START		= -10000,
+
+	DSO_LOAD_ERRNO__INTERNAL_ERROR	= __DSO_LOAD_ERRNO__START,
+
+	/* for symsrc__init() */
+	DSO_LOAD_ERRNO__INVALID_ELF,
+	DSO_LOAD_ERRNO__CANNOT_READ_BUILDID,
+	DSO_LOAD_ERRNO__MISMATCHING_BUILDID,
+
+	/* for decompress_kmodule */
+	DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE,
+
+	__DSO_LOAD_ERRNO__END,
+};
+
+#define DSO__SWAP(dso, type, val)			\
+({							\
+	type ____r = val;				\
+	BUG_ON(dso->needs_swap == DSO_SWAP__UNSET);	\
+	if (dso->needs_swap == DSO_SWAP__YES) {		\
+		switch (sizeof(____r)) {		\
+		case 2:					\
+			____r = bswap_16(val);		\
+			break;				\
+		case 4:					\
+			____r = bswap_32(val);		\
+			break;				\
+		case 8:					\
+			____r = bswap_64(val);		\
+			break;				\
+		default:				\
+			BUG_ON(1);			\
+		}					\
+	}						\
+	____r;						\
+})
+
+#define DSO__DATA_CACHE_SIZE 4096
+#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1)
+
+/*
+ * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events
+ */
+struct dso_id {
+	u32	maj;
+	u32	min;
+	u64	ino;
+	u64	ino_generation;
+};
+
+struct dso_cache {
+	struct rb_node	rb_node;
+	u64 offset;
+	u64 size;
+	char data[];
+};
+
+struct auxtrace_cache;
+
+struct dso {
+	struct mutex	 lock;
+	struct list_head node;
+	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
+	struct rb_root	 *root;		/* root of rbtree that rb_node is in */
+	struct rb_root_cached symbols;
+	struct rb_root_cached symbol_names;
+	struct rb_root_cached inlined_nodes;
+	struct rb_root_cached srclines;
+	struct {
+		u64		addr;
+		struct symbol	*symbol;
+	} last_find_result;
+	void		 *a2l;
+	char		 *symsrc_filename;
+	unsigned int	 a2l_fails;
+	enum dso_space_type	kernel;
+	enum dso_swap_type	needs_swap;
+	enum dso_binary_type	symtab_type;
+	enum dso_binary_type	binary_type;
+	enum dso_load_errno	load_errno;
+	u8		 adjust_symbols:1;
+	u8		 has_build_id:1;
+	u8		 header_build_id:1;
+	u8		 has_srcline:1;
+	u8		 hit:1;
+	u8		 annotate_warned:1;
+	u8		 auxtrace_warned:1;
+	u8		 short_name_allocated:1;
+	u8		 long_name_allocated:1;
+	u8		 is_64_bit:1;
+	bool		 sorted_by_name;
+	bool		 loaded;
+	u8		 rel;
+	struct build_id	 bid;
+	u64		 text_offset;
+	const char	 *short_name;
+	const char	 *long_name;
+	u16		 long_name_len;
+	u16		 short_name_len;
+	void		*dwfl;			/* DWARF debug info */
+	struct auxtrace_cache *auxtrace_cache;
+	int		 comp;
+
+	/* dso data file */
+	struct {
+		struct rb_root	 cache;
+		int		 fd;
+		int		 status;
+		u32		 status_seen;
+		u64		 file_size;
+		struct list_head open_entry;
+		u64		 elf_base_addr;
+		u64		 debug_frame_offset;
+		u64		 eh_frame_hdr_addr;
+		u64		 eh_frame_hdr_offset;
+	} data;
+	/* bpf prog information */
+	struct {
+		u32		id;
+		u32		sub_id;
+		struct perf_env	*env;
+	} bpf_prog;
+
+	union { /* Tool specific area */
+		void	 *priv;
+		u64	 db_id;
+	};
+	struct nsinfo	*nsinfo;
+	struct dso_id	 id;
+	refcount_t	 refcnt;
+	char		 name[];
+};
+
+/* dso__for_each_symbol - iterate over the symbols of given type
+ *
+ * @dso: the 'struct dso *' in which symbols are iterated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ */
+#define dso__for_each_symbol(dso, pos, n)	\
+	symbols__for_each_entry(&(dso)->symbols, pos, n)
+
+#define dsos__for_each_with_build_id(pos, head)	\
+	list_for_each_entry(pos, head, node)	\
+		if (!pos->has_build_id)		\
+			continue;		\
+		else
+
+static inline void dso__set_loaded(struct dso *dso)
+{
+	dso->loaded = true;
+}
+
+struct dso *dso__new_id(const char *name, struct dso_id *id);
+struct dso *dso__new(const char *name);
+void dso__delete(struct dso *dso);
+
+int dso__cmp_id(struct dso *a, struct dso *b);
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
+
+int dso__name_len(const struct dso *dso);
+
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+	dso__put(*dso);
+	*dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
+bool dso__loaded(const struct dso *dso);
+
+static inline bool dso__has_symbols(const struct dso *dso)
+{
+	return !RB_EMPTY_ROOT(&dso->symbols.rb_root);
+}
+
+bool dso__sorted_by_name(const struct dso *dso);
+void dso__set_sorted_by_name(struct dso *dso);
+void dso__sort_by_name(struct dso *dso);
+
+void dso__set_build_id(struct dso *dso, struct build_id *bid);
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
+void dso__read_running_kernel_build_id(struct dso *dso,
+				       struct machine *machine);
+int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
+
+char dso__symtab_origin(const struct dso *dso);
+int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
+				   char *root_dir, char *filename, size_t size);
+bool is_kernel_module(const char *pathname, int cpumode);
+bool dso__needs_decompress(struct dso *dso);
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len);
+int filename__decompress(const char *name, char *pathname,
+			 size_t len, int comp, int *err);
+
+#define KMOD_DECOMP_NAME  "/tmp/perf-kmod-XXXXXX"
+#define KMOD_DECOMP_LEN   sizeof(KMOD_DECOMP_NAME)
+
+struct kmod_path {
+	char *name;
+	int   comp;
+	bool  kmod;
+};
+
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+		     bool alloc_name);
+
+#define kmod_path__parse(__m, __p)      __kmod_path__parse(__m, __p, false)
+#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true)
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine);
+
+/*
+ * The dso__data_* external interface provides following functions:
+ *   dso__data_get_fd
+ *   dso__data_put_fd
+ *   dso__data_close
+ *   dso__data_size
+ *   dso__data_read_offset
+ *   dso__data_read_addr
+ *   dso__data_write_cache_offs
+ *   dso__data_write_cache_addr
+ *
+ * Please refer to the dso.c object code for each function and
+ * arguments documentation. Following text tries to explain the
+ * dso file descriptor caching.
+ *
+ * The dso__data* interface allows caching of opened file descriptors
+ * to speed up the dso data accesses. The idea is to leave the file
+ * descriptor opened ideally for the whole life of the dso object.
+ *
+ * The current usage of the dso__data_* interface is as follows:
+ *
+ * Get DSO's fd:
+ *   int fd = dso__data_get_fd(dso, machine);
+ *   if (fd >= 0) {
+ *       USE 'fd' SOMEHOW
+ *       dso__data_put_fd(dso);
+ *   }
+ *
+ * Read DSO's data:
+ *   n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
+ *   n = dso__data_read_addr(dso_0, &machine, 0, buf, BUFSIZE);
+ *
+ * Eventually close DSO's fd:
+ *   dso__data_close(dso);
+ *
+ * It is not necessary to close the DSO object data file. Each time new
+ * DSO data file is opened, the limit (RLIMIT_NOFILE/2) is checked. Once
+ * it is crossed, the oldest opened DSO object is closed.
+ *
+ * The dso__delete function calls close_dso function to ensure the
+ * data file descriptor gets closed/unmapped before the dso object
+ * is freed.
+ *
+ * TODO
+*/
+int dso__data_get_fd(struct dso *dso, struct machine *machine);
+void dso__data_put_fd(struct dso *dso);
+void dso__data_close(struct dso *dso);
+
+int dso__data_file_size(struct dso *dso, struct machine *machine);
+off_t dso__data_size(struct dso *dso, struct machine *machine);
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size);
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+			    struct machine *machine, u64 addr,
+			    u8 *data, ssize_t size);
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
+ssize_t dso__data_write_cache_offs(struct dso *dso, struct machine *machine,
+				   u64 offset, const u8 *data, ssize_t size);
+ssize_t dso__data_write_cache_addr(struct dso *dso, struct map *map,
+				   struct machine *machine, u64 addr,
+				   const u8 *data, ssize_t size);
+
+struct map *dso__new_map(const char *name);
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+				    const char *short_name, int dso_type);
+
+void dso__reset_find_symbol_cache(struct dso *dso);
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
+size_t dso__fprintf(struct dso *dso, FILE *fp);
+
+static inline bool dso__is_vmlinux(struct dso *dso)
+{
+	return dso->binary_type == DSO_BINARY_TYPE__VMLINUX ||
+	       dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
+}
+
+static inline bool dso__is_kcore(struct dso *dso)
+{
+	return dso->binary_type == DSO_BINARY_TYPE__KCORE ||
+	       dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
+}
+
+static inline bool dso__is_kallsyms(struct dso *dso)
+{
+	return dso->kernel && dso->long_name[0] != '/';
+}
+
+void dso__free_a2l(struct dso *dso);
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine);
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen);
+
+void reset_fd_limit(void);
+
+#endif /* __PERF_DSO */
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
new file mode 100644
index 000000000..2bd23e4cf
--- /dev/null
+++ b/tools/perf/util/dsos.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "dsos.h"
+#include "dso.h"
+#include "util.h"
+#include "vdso.h"
+#include "namespaces.h"
+#include <errno.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <string.h>
+#include <symbol.h> // filename__read_build_id
+#include <unistd.h>
+
+static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
+{
+	if (a->maj > b->maj) return -1;
+	if (a->maj < b->maj) return 1;
+
+	if (a->min > b->min) return -1;
+	if (a->min < b->min) return 1;
+
+	if (a->ino > b->ino) return -1;
+	if (a->ino < b->ino) return 1;
+
+	/*
+	 * Synthesized MMAP events have zero ino_generation, avoid comparing
+	 * them with MMAP events with actual ino_generation.
+	 *
+	 * I found it harmful because the mismatch resulted in a new
+	 * dso that did not have a build ID whereas the original dso did have a
+	 * build ID. The build ID was essential because the object was not found
+	 * otherwise. - Adrian
+	 */
+	if (a->ino_generation && b->ino_generation) {
+		if (a->ino_generation > b->ino_generation) return -1;
+		if (a->ino_generation < b->ino_generation) return 1;
+	}
+
+	return 0;
+}
+
+static bool dso_id__empty(struct dso_id *id)
+{
+	if (!id)
+		return true;
+
+	return !id->maj && !id->min && !id->ino && !id->ino_generation;
+}
+
+static void dso__inject_id(struct dso *dso, struct dso_id *id)
+{
+	dso->id.maj = id->maj;
+	dso->id.min = id->min;
+	dso->id.ino = id->ino;
+	dso->id.ino_generation = id->ino_generation;
+}
+
+static int dso_id__cmp(struct dso_id *a, struct dso_id *b)
+{
+	/*
+	 * The second is always dso->id, so zeroes if not set, assume passing
+	 * NULL for a means a zeroed id
+	 */
+	if (dso_id__empty(a) || dso_id__empty(b))
+		return 0;
+
+	return __dso_id__cmp(a, b);
+}
+
+int dso__cmp_id(struct dso *a, struct dso *b)
+{
+	return __dso_id__cmp(&a->id, &b->id);
+}
+
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
+{
+	bool have_build_id = false;
+	struct dso *pos;
+	struct nscookie nsc;
+
+	list_for_each_entry(pos, head, node) {
+		if (with_hits && !pos->hit && !dso__is_vdso(pos))
+			continue;
+		if (pos->has_build_id) {
+			have_build_id = true;
+			continue;
+		}
+		nsinfo__mountns_enter(pos->nsinfo, &nsc);
+		if (filename__read_build_id(pos->long_name, &pos->bid) > 0) {
+			have_build_id	  = true;
+			pos->has_build_id = true;
+		} else if (errno == ENOENT && pos->nsinfo) {
+			char *new_name = filename_with_chroot(pos->nsinfo->pid,
+							      pos->long_name);
+
+			if (new_name && filename__read_build_id(new_name,
+								&pos->bid) > 0) {
+				have_build_id = true;
+				pos->has_build_id = true;
+			}
+			free(new_name);
+		}
+		nsinfo__mountns_exit(&nsc);
+	}
+
+	return have_build_id;
+}
+
+static int __dso__cmp_long_name(const char *long_name, struct dso_id *id, struct dso *b)
+{
+	int rc = strcmp(long_name, b->long_name);
+	return rc ?: dso_id__cmp(id, &b->id);
+}
+
+static int __dso__cmp_short_name(const char *short_name, struct dso_id *id, struct dso *b)
+{
+	int rc = strcmp(short_name, b->short_name);
+	return rc ?: dso_id__cmp(id, &b->id);
+}
+
+static int dso__cmp_short_name(struct dso *a, struct dso *b)
+{
+	return __dso__cmp_short_name(a->short_name, &a->id, b);
+}
+
+/*
+ * Find a matching entry and/or link current entry to RB tree.
+ * Either one of the dso or name parameter must be non-NULL or the
+ * function will not work.
+ */
+struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso,
+						const char *name, struct dso_id *id)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	if (!name)
+		name = dso->long_name;
+	/*
+	 * Find node with the matching name
+	 */
+	while (*p) {
+		struct dso *this = rb_entry(*p, struct dso, rb_node);
+		int rc = __dso__cmp_long_name(name, id, this);
+
+		parent = *p;
+		if (rc == 0) {
+			/*
+			 * In case the new DSO is a duplicate of an existing
+			 * one, print a one-time warning & put the new entry
+			 * at the end of the list of duplicates.
+			 */
+			if (!dso || (dso == this))
+				return this;	/* Find matching dso */
+			/*
+			 * The core kernel DSOs may have duplicated long name.
+			 * In this case, the short name should be different.
+			 * Comparing the short names to differentiate the DSOs.
+			 */
+			rc = dso__cmp_short_name(dso, this);
+			if (rc == 0) {
+				pr_err("Duplicated dso name: %s\n", name);
+				return NULL;
+			}
+		}
+		if (rc < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	if (dso) {
+		/* Add new node and rebalance tree */
+		rb_link_node(&dso->rb_node, parent, p);
+		rb_insert_color(&dso->rb_node, root);
+		dso->root = root;
+	}
+	return NULL;
+}
+
+void __dsos__add(struct dsos *dsos, struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos->head);
+	__dsos__findnew_link_by_longname_id(&dsos->root, dso, NULL, &dso->id);
+	/*
+	 * It is now in the linked list, grab a reference, then garbage collect
+	 * this when needing memory, by looking at LRU dso instances in the
+	 * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+	 * anywhere besides the one for the list, do, under a lock for the
+	 * list: remove it from the list, then a dso__put(), that probably will
+	 * be the last and will then call dso__delete(), end of life.
+	 *
+	 * That, or at the end of the 'struct machine' lifetime, when all
+	 * 'struct dso' instances will be removed from the list, in
+	 * dsos__exit(), if they have no other reference from some other data
+	 * structure.
+	 *
+	 * E.g.: after processing a 'perf.data' file and storing references
+	 * to objects instantiated while processing events, we will have
+	 * references to the 'thread', 'map', 'dso' structs all from 'struct
+	 * hist_entry' instances, but we may not need anything not referenced,
+	 * so we might as well call machines__exit()/machines__delete() and
+	 * garbage collect it.
+	 */
+	dso__get(dso);
+}
+
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+	down_write(&dsos->lock);
+	__dsos__add(dsos, dso);
+	up_write(&dsos->lock);
+}
+
+static struct dso *__dsos__findnew_by_longname_id(struct rb_root *root, const char *name, struct dso_id *id)
+{
+	return __dsos__findnew_link_by_longname_id(root, NULL, name, id);
+}
+
+static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, bool cmp_short)
+{
+	struct dso *pos;
+
+	if (cmp_short) {
+		list_for_each_entry(pos, &dsos->head, node)
+			if (__dso__cmp_short_name(name, id, pos) == 0)
+				return pos;
+		return NULL;
+	}
+	return __dsos__findnew_by_longname_id(&dsos->root, name, id);
+}
+
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+	return __dsos__find_id(dsos, name, NULL, cmp_short);
+}
+
+static void dso__set_basename(struct dso *dso)
+{
+	char *base, *lname;
+	int tid;
+
+	if (sscanf(dso->long_name, "/tmp/perf-%d.map", &tid) == 1) {
+		if (asprintf(&base, "[JIT] tid %d", tid) < 0)
+			return;
+	} else {
+	      /*
+	       * basename() may modify path buffer, so we must pass
+               * a copy.
+               */
+		lname = strdup(dso->long_name);
+		if (!lname)
+			return;
+
+		/*
+		 * basename() may return a pointer to internal
+		 * storage which is reused in subsequent calls
+		 * so copy the result.
+		 */
+		base = strdup(basename(lname));
+
+		free(lname);
+
+		if (!base)
+			return;
+	}
+	dso__set_short_name(dso, base, true);
+}
+
+static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+{
+	struct dso *dso = dso__new_id(name, id);
+
+	if (dso != NULL) {
+		__dsos__add(dsos, dso);
+		dso__set_basename(dso);
+		/* Put dso here because __dsos_add already got it */
+		dso__put(dso);
+	}
+	return dso;
+}
+
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
+{
+	return __dsos__addnew_id(dsos, name, NULL);
+}
+
+static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+{
+	struct dso *dso = __dsos__find_id(dsos, name, id, false);
+
+	if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id))
+		dso__inject_id(dso, id);
+
+	return dso ? dso : __dsos__addnew_id(dsos, name, id);
+}
+
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
+{
+	struct dso *dso;
+	down_write(&dsos->lock);
+	dso = dso__get(__dsos__findnew_id(dsos, name, id));
+	up_write(&dsos->lock);
+	return dso;
+}
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+			       bool (skip)(struct dso *dso, int parm), int parm)
+{
+	struct dso *pos;
+	size_t ret = 0;
+
+	list_for_each_entry(pos, head, node) {
+		char sbuild_id[SBUILD_ID_SIZE];
+
+		if (skip && skip(pos, parm))
+			continue;
+		build_id__sprintf(&pos->bid, sbuild_id);
+		ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name);
+	}
+	return ret;
+}
+
+size_t __dsos__fprintf(struct list_head *head, FILE *fp)
+{
+	struct dso *pos;
+	size_t ret = 0;
+
+	list_for_each_entry(pos, head, node) {
+		ret += dso__fprintf(pos, fp);
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h
new file mode 100644
index 000000000..5dbec2bc6
--- /dev/null
+++ b/tools/perf/util/dsos.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DSOS
+#define __PERF_DSOS
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "rwsem.h"
+
+struct dso;
+struct dso_id;
+
+/*
+ * DSOs are put into both a list for fast iteration and rbtree for fast
+ * long name lookup.
+ */
+struct dsos {
+	struct list_head    head;
+	struct rb_root	    root;	/* rbtree root sorted by long name */
+	struct rw_semaphore lock;
+};
+
+void __dsos__add(struct dsos *dsos, struct dso *dso);
+void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+
+struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id);
+ 
+struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso,
+						const char *name, struct dso_id *id);
+
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+			       bool (skip)(struct dso *dso, int parm), int parm);
+size_t __dsos__fprintf(struct list_head *head, FILE *fp);
+
+#endif /* __PERF_DSOS */
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
new file mode 100644
index 000000000..2bd8585db
--- /dev/null
+++ b/tools/perf/util/dump-insn.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "dump-insn.h"
+
+/* Fallback code */
+
+__weak
+const char *dump_insn(struct perf_insn *x __maybe_unused,
+		      u64 ip __maybe_unused, u8 *inbuf __maybe_unused,
+		      int inlen __maybe_unused, int *lenp)
+{
+	if (lenp)
+		*lenp = 0;
+	return "?";
+}
+
+__weak
+int arch_is_branch(const unsigned char *buf __maybe_unused,
+		   size_t len __maybe_unused,
+		   int x86_64 __maybe_unused)
+{
+	return 0;
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
new file mode 100644
index 000000000..650125061
--- /dev/null
+++ b/tools/perf/util/dump-insn.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DUMP_INSN_H
+#define __PERF_DUMP_INSN_H 1
+
+#define MAXINSN 15
+
+#include <linux/types.h>
+
+struct thread;
+
+struct perf_insn {
+	/* Initialized by callers: */
+	struct thread *thread;
+	u8	      cpumode;
+	bool	      is64bit;
+	int	      cpu;
+	/* Temporary */
+	char	      out[256];
+};
+
+const char *dump_insn(struct perf_insn *x, u64 ip,
+		      u8 *inbuf, int inlen, int *lenp);
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+
+#endif
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
new file mode 100644
index 000000000..b125eaadc
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.c
@@ -0,0 +1,1401 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * dwarf-aux.c : libdw auxiliary interfaces
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include "debug.h"
+#include "dwarf-aux.h"
+#include "strbuf.h"
+#include "string2.h"
+
+/**
+ * cu_find_realpath - Find the realpath of the target file
+ * @cu_die: A DIE(dwarf information entry) of CU(compilation Unit)
+ * @fname:  The tail filename of the target file
+ *
+ * Find the real(long) path of @fname in @cu_die.
+ */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
+{
+	Dwarf_Files *files;
+	size_t nfiles, i;
+	const char *src = NULL;
+	int ret;
+
+	if (!fname)
+		return NULL;
+
+	ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
+	if (ret != 0)
+		return NULL;
+
+	for (i = 0; i < nfiles; i++) {
+		src = dwarf_filesrc(files, i, NULL, NULL);
+		if (strtailcmp(src, fname) == 0)
+			break;
+	}
+	if (i == nfiles)
+		return NULL;
+	return src;
+}
+
+/**
+ * cu_get_comp_dir - Get the path of compilation directory
+ * @cu_die: a CU DIE
+ *
+ * Get the path of compilation directory of given @cu_die.
+ * Since this depends on DW_AT_comp_dir, older gcc will not
+ * embedded it. In that case, this returns NULL.
+ */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die)
+{
+	Dwarf_Attribute attr;
+	if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
+		return NULL;
+	return dwarf_formstring(&attr);
+}
+
+/* Unlike dwarf_getsrc_die(), cu_getsrc_die() only returns statement line */
+static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr)
+{
+	Dwarf_Addr laddr;
+	Dwarf_Lines *lines;
+	Dwarf_Line *line;
+	size_t nlines, l, u, n;
+	bool flag;
+
+	if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0 ||
+	    nlines == 0)
+		return NULL;
+
+	/* Lines are sorted by address, use binary search */
+	l = 0; u = nlines - 1;
+	while (l < u) {
+		n = u - (u - l) / 2;
+		line = dwarf_onesrcline(lines, n);
+		if (!line || dwarf_lineaddr(line, &laddr) != 0)
+			return NULL;
+		if (addr < laddr)
+			u = n - 1;
+		else
+			l = n;
+	}
+	/* Going backward to find the lowest line */
+	do {
+		line = dwarf_onesrcline(lines, --l);
+		if (!line || dwarf_lineaddr(line, &laddr) != 0)
+			return NULL;
+	} while (laddr == addr);
+	l++;
+	/* Going forward to find the statement line */
+	do {
+		line = dwarf_onesrcline(lines, l++);
+		if (!line || dwarf_lineaddr(line, &laddr) != 0 ||
+		    dwarf_linebeginstatement(line, &flag) != 0)
+			return NULL;
+		if (laddr > addr)
+			return NULL;
+	} while (!flag);
+
+	return line;
+}
+
+/**
+ * cu_find_lineinfo - Get a line number and file name for given address
+ * @cu_die: a CU DIE
+ * @addr: An address
+ * @fname: a pointer which returns the file name string
+ * @lineno: a pointer which returns the line number
+ *
+ * Find a line number and file name for @addr in @cu_die.
+ */
+int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr,
+		     const char **fname, int *lineno)
+{
+	Dwarf_Line *line;
+	Dwarf_Die die_mem;
+	Dwarf_Addr faddr;
+
+	if (die_find_realfunc(cu_die, addr, &die_mem)
+	    && die_entrypc(&die_mem, &faddr) == 0 &&
+	    faddr == addr) {
+		*fname = dwarf_decl_file(&die_mem);
+		dwarf_decl_line(&die_mem, lineno);
+		goto out;
+	}
+
+	line = cu_getsrc_die(cu_die, addr);
+	if (line && dwarf_lineno(line, lineno) == 0) {
+		*fname = dwarf_linesrc(line, NULL, NULL);
+		if (!*fname)
+			/* line number is useless without filename */
+			*lineno = 0;
+	}
+
+out:
+	return *lineno ?: -ENOENT;
+}
+
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
+
+/**
+ * cu_walk_functions_at - Walk on function DIEs at given address
+ * @cu_die: A CU DIE
+ * @addr: An address
+ * @callback: A callback which called with found DIEs
+ * @data: A user data
+ *
+ * Walk on function DIEs at given @addr in @cu_die. Passed DIEs
+ * should be subprogram or inlined-subroutines.
+ */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+		    int (*callback)(Dwarf_Die *, void *), void *data)
+{
+	Dwarf_Die die_mem;
+	Dwarf_Die *sc_die;
+	int ret = -ENOENT;
+
+	/* Inlined function could be recursive. Trace it until fail */
+	for (sc_die = die_find_realfunc(cu_die, addr, &die_mem);
+	     sc_die != NULL;
+	     sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr,
+				     &die_mem)) {
+		ret = callback(sc_die, data);
+		if (ret)
+			break;
+	}
+
+	return ret;
+
+}
+
+/**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+		return NULL;
+	return dwarf_formstring(&attr);
+}
+
+/**
+ * die_compare_name - Compare diename and tname
+ * @dw_die: a DIE
+ * @tname: a string of target name
+ *
+ * Compare the name of @dw_die and @tname. Return false if @dw_die has no name.
+ */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
+{
+	const char *name;
+
+	name = dwarf_diename(dw_die);
+	return name ? (strcmp(tname, name) == 0) : false;
+}
+
+/**
+ * die_match_name - Match diename/linkage name and glob
+ * @dw_die: a DIE
+ * @glob: a string of target glob pattern
+ *
+ * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
+ */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob)
+{
+	const char *name;
+
+	name = dwarf_diename(dw_die);
+	if (name && strglobmatch(name, glob))
+		return true;
+	/* fall back to check linkage name */
+	name = die_get_linkage_name(dw_die);
+	if (name && strglobmatch(name, glob))
+		return true;
+
+	return false;
+}
+
+/**
+ * die_get_call_lineno - Get callsite line number of inline-function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site line number of @in_die. This means from where the inline
+ * function is called.
+ */
+int die_get_call_lineno(Dwarf_Die *in_die)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Word ret;
+
+	if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+		return -ENOENT;
+
+	dwarf_formudata(&attr, &ret);
+	return (int)ret;
+}
+
+/**
+ * die_get_type - Get type DIE
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
+	    dwarf_formref_die(&attr, die_mem))
+		return die_mem;
+	else
+		return NULL;
+}
+
+/* Get a type die, but skip qualifiers */
+static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+	int tag;
+
+	do {
+		vr_die = die_get_type(vr_die, die_mem);
+		if (!vr_die)
+			break;
+		tag = dwarf_tag(vr_die);
+	} while (tag == DW_TAG_const_type ||
+		 tag == DW_TAG_restrict_type ||
+		 tag == DW_TAG_volatile_type ||
+		 tag == DW_TAG_shared_type);
+
+	return vr_die;
+}
+
+/**
+ * die_get_real_type - Get a type die, but skip qualifiers and typedef
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ * If the type is qualifiers (e.g. const) or typedef, this skips it
+ * and tries to find real type (structure or basic types, e.g. int).
+ */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+	do {
+		vr_die = __die_get_real_type(vr_die, die_mem);
+	} while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
+
+	return vr_die;
+}
+
+/* Get attribute and translate it as a udata */
+static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+			      Dwarf_Word *result)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+	    dwarf_formudata(&attr, result) != 0)
+		return -ENOENT;
+
+	return 0;
+}
+
+/**
+ * die_is_signed_type - Check whether a type DIE is signed or not
+ * @tp_die: a DIE of a type
+ *
+ * Get the encoding of @tp_die and return true if the encoding
+ * is signed.
+ */
+bool die_is_signed_type(Dwarf_Die *tp_die)
+{
+	Dwarf_Word ret;
+
+	if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
+		return false;
+
+	return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
+		ret == DW_ATE_signed_fixed);
+}
+
+/**
+ * die_is_func_def - Ensure that this DIE is a subprogram and definition
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is a subprogram and NOT a declaration. This
+ * returns true if @dw_die is a function definition.
+ **/
+bool die_is_func_def(Dwarf_Die *dw_die)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Addr addr = 0;
+
+	if (dwarf_tag(dw_die) != DW_TAG_subprogram)
+		return false;
+
+	if (dwarf_attr(dw_die, DW_AT_declaration, &attr))
+		return false;
+
+	/*
+	 * DW_AT_declaration can be lost from function declaration
+	 * by gcc's bug #97060.
+	 * So we need to check this subprogram DIE has DW_AT_inline
+	 * or an entry address.
+	 */
+	if (!dwarf_attr(dw_die, DW_AT_inline, &attr) &&
+	    die_entrypc(dw_die, &addr) < 0)
+		return false;
+
+	return true;
+}
+
+/**
+ * die_entrypc - Returns entry PC (the lowest address) of a DIE
+ * @dw_die: a DIE
+ * @addr: where to store entry PC
+ *
+ * Since dwarf_entrypc() does not return entry PC if the DIE has only address
+ * range, we have to use this to retrieve the lowest address from the address
+ * range attribute.
+ */
+int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr)
+{
+	Dwarf_Addr base, end;
+	Dwarf_Attribute attr;
+
+	if (!addr)
+		return -EINVAL;
+
+	if (dwarf_entrypc(dw_die, addr) == 0)
+		return 0;
+
+	/*
+	 *  Since the dwarf_ranges() will return 0 if there is no
+	 * DW_AT_ranges attribute, we should check it first.
+	 */
+	if (!dwarf_attr(dw_die, DW_AT_ranges, &attr))
+		return -ENOENT;
+
+	return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0;
+}
+
+/**
+ * die_is_func_instance - Ensure that this DIE is an instance of a subprogram
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is an instance (which has an entry address).
+ * This returns true if @dw_die is a function instance. If not, the @dw_die
+ * must be a prototype. You can use die_walk_instances() to find actual
+ * instances.
+ **/
+bool die_is_func_instance(Dwarf_Die *dw_die)
+{
+	Dwarf_Addr tmp;
+	Dwarf_Attribute attr_mem;
+	int tag = dwarf_tag(dw_die);
+
+	if (tag != DW_TAG_subprogram &&
+	    tag != DW_TAG_inlined_subroutine)
+		return false;
+
+	return dwarf_entrypc(dw_die, &tmp) == 0 ||
+		dwarf_attr(dw_die, DW_AT_ranges, &attr_mem) != NULL;
+}
+
+/**
+ * die_get_data_member_location - Get the data-member offset
+ * @mb_die: a DIE of a member of a data structure
+ * @offs: The offset of the member in the data structure
+ *
+ * Get the offset of @mb_die in the data structure including @mb_die, and
+ * stores result offset to @offs. If any error occurs this returns errno.
+ */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Op *expr;
+	size_t nexpr;
+	int ret;
+
+	if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
+		return -ENOENT;
+
+	if (dwarf_formudata(&attr, offs) != 0) {
+		/* DW_AT_data_member_location should be DW_OP_plus_uconst */
+		ret = dwarf_getlocation(&attr, &expr, &nexpr);
+		if (ret < 0 || nexpr == 0)
+			return -ENOENT;
+
+		if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
+			pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
+				 expr[0].atom, nexpr);
+			return -ENOTSUP;
+		}
+		*offs = (Dwarf_Word)expr[0].number;
+	}
+	return 0;
+}
+
+/* Get the call file index number in CU DIE */
+static int die_get_call_fileno(Dwarf_Die *in_die)
+{
+	Dwarf_Word idx;
+
+	if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
+		return (int)idx;
+	else
+		return -ENOENT;
+}
+
+/* Get the declared file index number in CU DIE */
+static int die_get_decl_fileno(Dwarf_Die *pdie)
+{
+	Dwarf_Word idx;
+
+	if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
+		return (int)idx;
+	else
+		return -ENOENT;
+}
+
+/**
+ * die_get_call_file - Get callsite file name of inlined function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site file name of @in_die. This means from which file the inline
+ * function is called.
+ */
+const char *die_get_call_file(Dwarf_Die *in_die)
+{
+	Dwarf_Die cu_die;
+	Dwarf_Files *files;
+	int idx;
+
+	idx = die_get_call_fileno(in_die);
+	if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) ||
+	    dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
+		return NULL;
+
+	return dwarf_filesrc(files, idx, NULL, NULL);
+}
+
+
+/**
+ * die_find_child - Generic DIE search function in DIE tree
+ * @rt_die: a root DIE
+ * @callback: a callback function
+ * @data: a user data passed to the callback function
+ * @die_mem: a buffer for result DIE
+ *
+ * Trace DIE tree from @rt_die and call @callback for each child DIE.
+ * If @callback returns DIE_FIND_CB_END, this stores the DIE into
+ * @die_mem and returns it. If @callback returns DIE_FIND_CB_CONTINUE,
+ * this continues to trace the tree. Optionally, @callback can return
+ * DIE_FIND_CB_CHILD and DIE_FIND_CB_SIBLING, those means trace only
+ * the children and trace only the siblings respectively.
+ * Returns NULL if @callback can't find any appropriate DIE.
+ */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+			  int (*callback)(Dwarf_Die *, void *),
+			  void *data, Dwarf_Die *die_mem)
+{
+	Dwarf_Die child_die;
+	int ret;
+
+	ret = dwarf_child(rt_die, die_mem);
+	if (ret != 0)
+		return NULL;
+
+	do {
+		ret = callback(die_mem, data);
+		if (ret == DIE_FIND_CB_END)
+			return die_mem;
+
+		if ((ret & DIE_FIND_CB_CHILD) &&
+		    die_find_child(die_mem, callback, data, &child_die)) {
+			memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
+			return die_mem;
+		}
+	} while ((ret & DIE_FIND_CB_SIBLING) &&
+		 dwarf_siblingof(die_mem, die_mem) == 0);
+
+	return NULL;
+}
+
+struct __addr_die_search_param {
+	Dwarf_Addr	addr;
+	Dwarf_Die	*die_mem;
+};
+
+static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct __addr_die_search_param *ad = data;
+	Dwarf_Addr addr = 0;
+
+	if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+	    !dwarf_highpc(fn_die, &addr) &&
+	    addr == ad->addr) {
+		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
+
+/**
+ * die_find_tailfunc - Search for a non-inlined function with tail call at
+ * given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search for a non-inlined function DIE with tail call at @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem)
+{
+	struct __addr_die_search_param ad;
+	ad.addr = addr;
+	ad.die_mem = die_mem;
+	/* dwarf_getscopes can't find subprogram. */
+	if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
+		return NULL;
+	else
+		return die_mem;
+}
+
+/* die_find callback for non-inlined function search */
+static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct __addr_die_search_param *ad = data;
+
+	/*
+	 * Since a declaration entry doesn't has given pc, this always returns
+	 * function definition entry.
+	 */
+	if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+	    dwarf_haspc(fn_die, ad->addr)) {
+		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
+
+/**
+ * die_find_realfunc - Search a non-inlined function at given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search a non-inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem)
+{
+	struct __addr_die_search_param ad;
+	ad.addr = addr;
+	ad.die_mem = die_mem;
+	/* dwarf_getscopes can't find subprogram. */
+	if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
+		return NULL;
+	else
+		return die_mem;
+}
+
+/* die_find callback for inline function search */
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
+{
+	Dwarf_Addr *addr = data;
+
+	if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
+	    dwarf_haspc(die_mem, *addr))
+		return DIE_FIND_CB_END;
+
+	return DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_find_top_inlinefunc - Search the top inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * Even if several inlined functions are expanded recursively, this
+ * doesn't trace it down, and returns the topmost one.
+ */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+				   Dwarf_Die *die_mem)
+{
+	return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
+}
+
+/**
+ * die_find_inlinefunc - Search an inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * If several inlined functions are expanded recursively, this trace
+ * it down and returns deepest one.
+ */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+			       Dwarf_Die *die_mem)
+{
+	Dwarf_Die tmp_die;
+
+	sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
+	if (!sp_die)
+		return NULL;
+
+	/* Inlined function could be recursive. Trace it until fail */
+	while (sp_die) {
+		memcpy(die_mem, sp_die, sizeof(Dwarf_Die));
+		sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
+					&tmp_die);
+	}
+
+	return die_mem;
+}
+
+struct __instance_walk_param {
+	void    *addr;
+	int	(*callback)(Dwarf_Die *, void *);
+	void    *data;
+	int	retval;
+};
+
+static int __die_walk_instances_cb(Dwarf_Die *inst, void *data)
+{
+	struct __instance_walk_param *iwp = data;
+	Dwarf_Attribute attr_mem;
+	Dwarf_Die origin_mem;
+	Dwarf_Attribute *attr;
+	Dwarf_Die *origin;
+	int tmp;
+
+	if (!die_is_func_instance(inst))
+		return DIE_FIND_CB_CONTINUE;
+
+	attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem);
+	if (attr == NULL)
+		return DIE_FIND_CB_CONTINUE;
+
+	origin = dwarf_formref_die(attr, &origin_mem);
+	if (origin == NULL || origin->addr != iwp->addr)
+		return DIE_FIND_CB_CONTINUE;
+
+	/* Ignore redundant instances */
+	if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) {
+		dwarf_decl_line(origin, &tmp);
+		if (die_get_call_lineno(inst) == tmp) {
+			tmp = die_get_decl_fileno(origin);
+			if (die_get_call_fileno(inst) == tmp)
+				return DIE_FIND_CB_CONTINUE;
+		}
+	}
+
+	iwp->retval = iwp->callback(inst, iwp->data);
+
+	return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_walk_instances - Walk on instances of given DIE
+ * @or_die: an abstract original DIE
+ * @callback: a callback function which is called with instance DIE
+ * @data: user data
+ *
+ * Walk on the instances of give @in_die. @in_die must be an inlined function
+ * declaration. This returns the return value of @callback if it returns
+ * non-zero value, or -ENOENT if there is no instance.
+ */
+int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
+		       void *data)
+{
+	Dwarf_Die cu_die;
+	Dwarf_Die die_mem;
+	struct __instance_walk_param iwp = {
+		.addr = or_die->addr,
+		.callback = callback,
+		.data = data,
+		.retval = -ENOENT,
+	};
+
+	if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL)
+		return -ENOENT;
+
+	die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem);
+
+	return iwp.retval;
+}
+
+/* Line walker internal parameters */
+struct __line_walk_param {
+	bool recursive;
+	line_walk_callback_t callback;
+	void *data;
+	int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+	struct __line_walk_param *lw = data;
+	Dwarf_Addr addr = 0;
+	const char *fname;
+	int lineno;
+
+	if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+		fname = die_get_call_file(in_die);
+		lineno = die_get_call_lineno(in_die);
+		if (fname && lineno > 0 && die_entrypc(in_die, &addr) == 0) {
+			lw->retval = lw->callback(fname, lineno, addr, lw->data);
+			if (lw->retval != 0)
+				return DIE_FIND_CB_END;
+		}
+		if (!lw->recursive)
+			return DIE_FIND_CB_SIBLING;
+	}
+
+	if (addr) {
+		fname = dwarf_decl_file(in_die);
+		if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
+			lw->retval = lw->callback(fname, lineno, addr, lw->data);
+			if (lw->retval != 0)
+				return DIE_FIND_CB_END;
+		}
+	}
+
+	/* Continue to search nested inlined function call-sites */
+	return DIE_FIND_CB_CONTINUE;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
+				line_walk_callback_t callback, void *data)
+{
+	struct __line_walk_param lw = {
+		.recursive = recursive,
+		.callback = callback,
+		.data = data,
+		.retval = 0,
+	};
+	Dwarf_Die die_mem;
+	Dwarf_Addr addr;
+	const char *fname;
+	int lineno;
+
+	/* Handle function declaration line */
+	fname = dwarf_decl_file(sp_die);
+	if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+	    die_entrypc(sp_die, &addr) == 0) {
+		lw.retval = callback(fname, lineno, addr, data);
+		if (lw.retval != 0)
+			goto done;
+	}
+	die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+	return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+	struct __line_walk_param *lw = data;
+
+	/*
+	 * Since inlined function can include another inlined function in
+	 * the same file, we need to walk in it recursively.
+	 */
+	lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data);
+	if (lw->retval != 0)
+		return DWARF_CB_ABORT;
+
+	return DWARF_CB_OK;
+}
+
+/**
+ * die_walk_lines - Walk on lines inside given DIE
+ * @rt_die: a root DIE (CU, subprogram or inlined_subroutine)
+ * @callback: callback routine
+ * @data: user data
+ *
+ * Walk on all lines inside given @rt_die and call @callback on each line.
+ * If the @rt_die is a function, walk only on the lines inside the function,
+ * otherwise @rt_die must be a CU DIE.
+ * Note that this walks not only dwarf line list, but also function entries
+ * and inline call-site.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
+{
+	Dwarf_Lines *lines;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	const char *fname, *decf = NULL, *inf = NULL;
+	int lineno, ret = 0;
+	int decl = 0, inl;
+	Dwarf_Die die_mem, *cu_die;
+	size_t nlines, i;
+	bool flag;
+
+	/* Get the CU die */
+	if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
+		cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
+		dwarf_decl_line(rt_die, &decl);
+		decf = dwarf_decl_file(rt_die);
+	} else
+		cu_die = rt_die;
+	if (!cu_die) {
+		pr_debug2("Failed to get CU from given DIE.\n");
+		return -EINVAL;
+	}
+
+	/* Get lines list in the CU */
+	if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+		pr_debug2("Failed to get source lines on this CU.\n");
+		return -ENOENT;
+	}
+	pr_debug2("Get %zd lines from this CU\n", nlines);
+
+	/* Walk on the lines on lines list */
+	for (i = 0; i < nlines; i++) {
+		line = dwarf_onesrcline(lines, i);
+		if (line == NULL ||
+		    dwarf_lineno(line, &lineno) != 0 ||
+		    dwarf_lineaddr(line, &addr) != 0) {
+			pr_debug2("Failed to get line info. "
+				  "Possible error in debuginfo.\n");
+			continue;
+		}
+		/* Skip end-of-sequence */
+		if (dwarf_lineendsequence(line, &flag) != 0 || flag)
+			continue;
+		/* Skip Non statement line-info */
+		if (dwarf_linebeginstatement(line, &flag) != 0 || !flag)
+			continue;
+		/* Filter lines based on address */
+		if (rt_die != cu_die) {
+			/*
+			 * Address filtering
+			 * The line is included in given function, and
+			 * no inline block includes it.
+			 */
+			if (!dwarf_haspc(rt_die, addr))
+				continue;
+
+			if (die_find_inlinefunc(rt_die, addr, &die_mem)) {
+				/* Call-site check */
+				inf = die_get_call_file(&die_mem);
+				if ((inf && !strcmp(inf, decf)) &&
+				    die_get_call_lineno(&die_mem) == lineno)
+					goto found;
+
+				dwarf_decl_line(&die_mem, &inl);
+				if (inl != decl ||
+				    decf != dwarf_decl_file(&die_mem))
+					continue;
+			}
+		}
+found:
+		/* Get source line */
+		fname = dwarf_linesrc(line, NULL, NULL);
+
+		ret = callback(fname, lineno, addr, data);
+		if (ret != 0)
+			return ret;
+	}
+
+	/*
+	 * Dwarf lines doesn't include function declarations and inlined
+	 * subroutines. We have to check functions list or given function.
+	 */
+	if (rt_die != cu_die)
+		/*
+		 * Don't need walk inlined functions recursively, because
+		 * inner inlined functions don't have the lines of the
+		 * specified function.
+		 */
+		ret = __die_walk_funclines(rt_die, false, callback, data);
+	else {
+		struct __line_walk_param param = {
+			.callback = callback,
+			.data = data,
+			.retval = 0,
+		};
+		dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+		ret = param.retval;
+	}
+
+	return ret;
+}
+
+struct __find_variable_param {
+	const char *name;
+	Dwarf_Addr addr;
+};
+
+static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
+{
+	struct __find_variable_param *fvp = data;
+	Dwarf_Attribute attr;
+	int tag;
+
+	tag = dwarf_tag(die_mem);
+	if ((tag == DW_TAG_formal_parameter ||
+	     tag == DW_TAG_variable) &&
+	    die_compare_name(die_mem, fvp->name) &&
+	/*
+	 * Does the DIE have location information or const value
+	 * or external instance?
+	 */
+	    (dwarf_attr(die_mem, DW_AT_external, &attr) ||
+	     dwarf_attr(die_mem, DW_AT_location, &attr) ||
+	     dwarf_attr(die_mem, DW_AT_const_value, &attr)))
+		return DIE_FIND_CB_END;
+	if (dwarf_haspc(die_mem, fvp->addr))
+		return DIE_FIND_CB_CONTINUE;
+	else
+		return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_variable_at - Find a given name variable at given address
+ * @sp_die: a function DIE
+ * @name: variable name
+ * @addr: address
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a variable DIE called @name at @addr in @sp_die.
+ */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+				Dwarf_Addr addr, Dwarf_Die *die_mem)
+{
+	struct __find_variable_param fvp = { .name = name, .addr = addr};
+
+	return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
+			      die_mem);
+}
+
+static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
+{
+	const char *name = data;
+
+	if (dwarf_tag(die_mem) == DW_TAG_member) {
+		if (die_compare_name(die_mem, name))
+			return DIE_FIND_CB_END;
+		else if (!dwarf_diename(die_mem)) {	/* Unnamed structure */
+			Dwarf_Die type_die, tmp_die;
+			if (die_get_type(die_mem, &type_die) &&
+			    die_find_member(&type_die, name, &tmp_die))
+				return DIE_FIND_CB_END;
+		}
+	}
+	return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_member - Find a given name member in a data structure
+ * @st_die: a data structure type DIE
+ * @name: member name
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a member DIE called @name in @st_die.
+ */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+			   Dwarf_Die *die_mem)
+{
+	return die_find_child(st_die, __die_find_member_cb, (void *)name,
+			      die_mem);
+}
+
+/**
+ * die_get_typename - Get the name of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for result type name
+ *
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
+ * Note that the result will stores typedef name if possible, and stores
+ * "*(function_type)" if the type is a function pointer.
+ */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	Dwarf_Die type;
+	int tag, ret;
+	const char *tmp = "";
+
+	if (__die_get_real_type(vr_die, &type) == NULL)
+		return -ENOENT;
+
+	tag = dwarf_tag(&type);
+	if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
+		tmp = "*";
+	else if (tag == DW_TAG_subroutine_type) {
+		/* Function pointer */
+		return strbuf_add(buf, "(function_type)", 15);
+	} else {
+		if (!dwarf_diename(&type))
+			return -ENOENT;
+		if (tag == DW_TAG_union_type)
+			tmp = "union ";
+		else if (tag == DW_TAG_structure_type)
+			tmp = "struct ";
+		else if (tag == DW_TAG_enumeration_type)
+			tmp = "enum ";
+		/* Write a base name */
+		return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
+	}
+	ret = die_get_typename(&type, buf);
+	return ret ? ret : strbuf_addstr(buf, tmp);
+}
+
+/**
+ * die_get_varname - Get the name and type of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name
+ *
+ * Get the name and type of @vr_die and stores it in @buf as "type\tname".
+ */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	int ret;
+
+	ret = die_get_typename(vr_die, buf);
+	if (ret < 0) {
+		pr_debug("Failed to get type, make it unknown.\n");
+		ret = strbuf_add(buf, "(unknown_type)", 14);
+	}
+
+	return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+}
+
+#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
+/**
+ * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for variable byte offset range
+ *
+ * Get the innermost scope range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+				struct strbuf *buf)
+{
+	Dwarf_Die *scopes;
+	int count;
+	size_t offset = 0;
+	Dwarf_Addr base;
+	Dwarf_Addr start, end;
+	Dwarf_Addr entry;
+	int ret;
+	bool first = true;
+	const char *name;
+
+	ret = die_entrypc(sp_die, &entry);
+	if (ret)
+		return ret;
+
+	name = dwarf_diename(sp_die);
+	if (!name)
+		return -ENOENT;
+
+	count = dwarf_getscopes_die(vr_die, &scopes);
+
+	/* (*SCOPES)[1] is the DIE for the scope containing that scope */
+	if (count <= 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	while ((offset = dwarf_ranges(&scopes[1], offset, &base,
+					&start, &end)) > 0) {
+		start -= entry;
+		end -= entry;
+
+		if (first) {
+			ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+					  name, start, end);
+			first = false;
+		} else {
+			ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+					  start, end);
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	if (!first)
+		ret = strbuf_add(buf, "]>", 2);
+
+out:
+	free(scopes);
+	return ret;
+}
+
+/**
+ * die_get_var_range - Get byte offset range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name and byte offset range
+ *
+ * Get the byte offset range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	int ret = 0;
+	Dwarf_Addr base;
+	Dwarf_Addr start, end;
+	Dwarf_Addr entry;
+	Dwarf_Op *op;
+	size_t nops;
+	size_t offset = 0;
+	Dwarf_Attribute attr;
+	bool first = true;
+	const char *name;
+
+	ret = die_entrypc(sp_die, &entry);
+	if (ret)
+		return ret;
+
+	name = dwarf_diename(sp_die);
+	if (!name)
+		return -ENOENT;
+
+	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+		return -EINVAL;
+
+	while ((offset = dwarf_getlocations(&attr, offset, &base,
+					&start, &end, &op, &nops)) > 0) {
+		if (start == 0) {
+			/* Single Location Descriptions */
+			ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
+			goto out;
+		}
+
+		/* Location Lists */
+		start -= entry;
+		end -= entry;
+		if (first) {
+			ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+					  name, start, end);
+			first = false;
+		} else {
+			ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+					  start, end);
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	if (!first)
+		ret = strbuf_add(buf, "]>", 2);
+out:
+	return ret;
+}
+#else
+int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
+		      Dwarf_Die *vr_die __maybe_unused,
+		      struct strbuf *buf __maybe_unused)
+{
+	return -ENOTSUP;
+}
+#endif
+
+/*
+ * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
+ * @vr_die: a variable DIE
+ */
+static bool die_has_loclist(Dwarf_Die *vr_die)
+{
+	Dwarf_Attribute loc;
+	int tag = dwarf_tag(vr_die);
+
+	if (tag != DW_TAG_formal_parameter &&
+	    tag != DW_TAG_variable)
+		return false;
+
+	return (dwarf_attr_integrate(vr_die, DW_AT_location, &loc) &&
+		dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * die_is_optimized_target - Check if target program is compiled with
+ * optimization
+ * @cu_die: a CU DIE
+ *
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization. This is applicable to
+ * clang as well.
+ */
+bool die_is_optimized_target(Dwarf_Die *cu_die)
+{
+	Dwarf_Die tmp_die;
+
+	if (die_has_loclist(cu_die))
+		return true;
+
+	if (!dwarf_child(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	if (!dwarf_siblingof(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	return false;
+}
+
+/*
+ * die_search_idx - Search index of given line address
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @addr: address we are looking for
+ * @idx: index to be set by this function (return value)
+ *
+ * Search for @addr by looping over every lines of CU. If address
+ * matches, set index of that line in @idx. Note that single source
+ * line can have multiple line records. i.e. single source line can
+ * have multiple index.
+ */
+static bool die_search_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+			   Dwarf_Addr addr, unsigned long *idx)
+{
+	unsigned long i;
+	Dwarf_Addr tmp;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &tmp))
+			return false;
+
+		if (tmp == addr) {
+			*idx = i;
+			return true;
+		}
+	}
+	return false;
+}
+
+/*
+ * die_get_postprologue_addr - Search next address after function prologue
+ * @entrypc_idx: entrypc index
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @hignpc: high PC address of function
+ * @postprologue_addr: Next address after function prologue (return value)
+ *
+ * Look for prologue-end marker. If there is no explicit marker, return
+ * address of next line record or next source line.
+ */
+static bool die_get_postprologue_addr(unsigned long entrypc_idx,
+				      Dwarf_Lines *lines,
+				      unsigned long nr_lines,
+				      Dwarf_Addr highpc,
+				      Dwarf_Addr *postprologue_addr)
+{
+	unsigned long i;
+	int entrypc_lno, lno;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	bool p_end;
+
+	/* entrypc_lno is actual source line number */
+	line = dwarf_onesrcline(lines, entrypc_idx);
+	if (dwarf_lineno(line, &entrypc_lno))
+		return false;
+
+	for (i = entrypc_idx; i < nr_lines; i++) {
+		line = dwarf_onesrcline(lines, i);
+
+		if (dwarf_lineaddr(line, &addr) ||
+		    dwarf_lineno(line, &lno)    ||
+		    dwarf_lineprologueend(line, &p_end))
+			return false;
+
+		/* highpc is exclusive. [entrypc,highpc) */
+		if (addr >= highpc)
+			break;
+
+		/* clang supports prologue-end marker */
+		if (p_end)
+			break;
+
+		/* Actual next line in source */
+		if (lno != entrypc_lno)
+			break;
+
+		/*
+		 * Single source line can have multiple line records.
+		 * For Example,
+		 *     void foo() { printf("hello\n"); }
+		 * contains two line records. One points to declaration and
+		 * other points to printf() line. Variable 'lno' won't get
+		 * incremented in this case but 'i' will.
+		 */
+		if (i != entrypc_idx)
+			break;
+	}
+
+	dwarf_lineaddr(line, postprologue_addr);
+	if (*postprologue_addr >= highpc)
+		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+			       postprologue_addr);
+
+	return true;
+}
+
+/*
+ * die_skip_prologue - Use next address after prologue as probe location
+ * @sp_die: a subprogram DIE
+ * @cu_die: a CU DIE
+ * @entrypc: entrypc of the function
+ *
+ * Function prologue prepares stack and registers before executing function
+ * logic. When target program is compiled without optimization, function
+ * parameter information is only valid after prologue. When we probe entrypc
+ * of the function, and try to record function parameter, it contains
+ * garbage value.
+ */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc)
+{
+	size_t nr_lines = 0;
+	unsigned long entrypc_idx = 0;
+	Dwarf_Lines *lines = NULL;
+	Dwarf_Addr postprologue_addr;
+	Dwarf_Addr highpc;
+
+	if (dwarf_highpc(sp_die, &highpc))
+		return;
+
+	if (dwarf_getsrclines(cu_die, &lines, &nr_lines))
+		return;
+
+	if (!die_search_idx(lines, nr_lines, *entrypc, &entrypc_idx))
+		return;
+
+	if (!die_get_postprologue_addr(entrypc_idx, lines, nr_lines,
+				       highpc, &postprologue_addr))
+		return;
+
+	*entrypc = postprologue_addr;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
new file mode 100644
index 000000000..7ee0fa19b
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _DWARF_AUX_H
+#define _DWARF_AUX_H
+/*
+ * dwarf-aux.h : libdw auxiliary interfaces
+ */
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
+
+struct strbuf;
+
+/* Find the realpath of the target file */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+
+/* Get DW_AT_comp_dir (should be NULL with older gcc) */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+
+/* Get a line number and file name for given address */
+int cu_find_lineinfo(Dwarf_Die *cudie, Dwarf_Addr addr,
+		     const char **fname, int *lineno);
+
+/* Walk on functions at given address */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			 int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
+/* Get the lowest PC in DIE (including range list) */
+int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr);
+
+/* Ensure that this DIE is a subprogram and definition (not declaration) */
+bool die_is_func_def(Dwarf_Die *dw_die);
+
+/* Ensure that this DIE is an instance of a subprogram */
+bool die_is_func_instance(Dwarf_Die *dw_die);
+
+/* Compare diename and tname */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+
+/* Matching diename with glob pattern */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+
+/* Get callsite line number of inline-function instance */
+int die_get_call_lineno(Dwarf_Die *in_die);
+
+/* Get callsite file name of inlined function instance */
+const char *die_get_call_file(Dwarf_Die *in_die);
+
+/* Get type die */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Get a type die, but skip qualifiers and typedef */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Check whether the DIE is signed or not */
+bool die_is_signed_type(Dwarf_Die *tp_die);
+
+/* Get data_member_location offset */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+
+/* Return values for die_find_child() callbacks */
+enum {
+	DIE_FIND_CB_END = 0,		/* End of Search */
+	DIE_FIND_CB_CHILD = 1,		/* Search only children */
+	DIE_FIND_CB_SIBLING = 2,	/* Search only siblings */
+	DIE_FIND_CB_CONTINUE = 3,	/* Search children and siblings */
+};
+
+/* Search child DIEs */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+			 int (*callback)(Dwarf_Die *, void *),
+			 void *data, Dwarf_Die *die_mem);
+
+/* Search a non-inlined function including given address */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			     Dwarf_Die *die_mem);
+
+/* Search a non-inlined function with tail call at given address */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem);
+
+/* Search the top inlined function including given address */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+				   Dwarf_Die *die_mem);
+
+/* Search the deepest inlined function including given address */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+			       Dwarf_Die *die_mem);
+
+/* Walk on the instances of given DIE */
+int die_walk_instances(Dwarf_Die *in_die,
+		       int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_callback_t) (const char *fname, int lineno,
+				      Dwarf_Addr addr, void *data);
+
+/*
+ * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
+ * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data);
+
+/* Find a variable called 'name' at given address */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+				Dwarf_Addr addr, Dwarf_Die *die_mem);
+
+/* Find a member called 'name' */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+			   Dwarf_Die *die_mem);
+
+/* Get the name of given variable DIE */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Get the name and type of given variable DIE, stored as "type\tname" */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Check if target program is compiled with optimization */
+bool die_is_optimized_target(Dwarf_Die *cu_die);
+
+/* Use next address after prologue as probe location */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc);
+
+#endif
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
new file mode 100644
index 000000000..3fa448674
--- /dev/null
+++ b/tools/perf/util/dwarf-regs.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Written by: Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <debug.h>
+#include <dwarf-regs.h>
+#include <elf.h>
+#include <linux/kernel.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64	183  /* ARM 64 bit */
+#endif
+
+/* Define const char * {arch}_register_tbl[] */
+#define DEFINE_DWARF_REGSTR_TABLE
+#include "../arch/x86/include/dwarf-regs-table.h"
+#include "../arch/arm/include/dwarf-regs-table.h"
+#include "../arch/arm64/include/dwarf-regs-table.h"
+#include "../arch/sh/include/dwarf-regs-table.h"
+#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/s390/include/dwarf-regs-table.h"
+#include "../arch/sparc/include/dwarf-regs-table.h"
+#include "../arch/xtensa/include/dwarf-regs-table.h"
+#include "../arch/mips/include/dwarf-regs-table.h"
+
+#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+{
+	switch (machine) {
+	case EM_NONE:	/* Generic arch - use host arch */
+		return get_arch_regstr(n);
+	case EM_386:
+		return __get_dwarf_regstr(x86_32_regstr_tbl, n);
+	case EM_X86_64:
+		return __get_dwarf_regstr(x86_64_regstr_tbl, n);
+	case EM_ARM:
+		return __get_dwarf_regstr(arm_regstr_tbl, n);
+	case EM_AARCH64:
+		return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+	case EM_SH:
+		return __get_dwarf_regstr(sh_regstr_tbl, n);
+	case EM_S390:
+		return __get_dwarf_regstr(s390_regstr_tbl, n);
+	case EM_PPC:
+	case EM_PPC64:
+		return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+	case EM_SPARC:
+	case EM_SPARCV9:
+		return __get_dwarf_regstr(sparc_regstr_tbl, n);
+	case EM_XTENSA:
+		return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+	case EM_MIPS:
+		return __get_dwarf_regstr(mips_regstr_tbl, n);
+	default:
+		pr_err("ELF MACHINE %x is not supported.\n", machine);
+	}
+	return NULL;
+}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
new file mode 100644
index 000000000..5d878bae7
--- /dev/null
+++ b/tools/perf/util/env.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cpumap.h"
+#include "debug.h"
+#include "env.h"
+#include "util/header.h"
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+#include "cgroup.h"
+#include <errno.h>
+#include <sys/utsname.h>
+#include <stdlib.h>
+#include <string.h>
+#include "strbuf.h"
+
+struct perf_env perf_env;
+
+#ifdef HAVE_LIBBPF_SUPPORT
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include <bpf/libbpf.h>
+
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+				    struct bpf_prog_info_node *info_node)
+{
+	down_write(&env->bpf_progs.lock);
+	__perf_env__insert_bpf_prog_info(env, info_node);
+	up_write(&env->bpf_progs.lock);
+}
+
+void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node)
+{
+	__u32 prog_id = info_node->info_linear->info.id;
+	struct bpf_prog_info_node *node;
+	struct rb_node *parent = NULL;
+	struct rb_node **p;
+
+	p = &env->bpf_progs.infos.rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		node = rb_entry(parent, struct bpf_prog_info_node, rb_node);
+		if (prog_id < node->info_linear->info.id) {
+			p = &(*p)->rb_left;
+		} else if (prog_id > node->info_linear->info.id) {
+			p = &(*p)->rb_right;
+		} else {
+			pr_debug("duplicated bpf prog info %u\n", prog_id);
+			return;
+		}
+	}
+
+	rb_link_node(&info_node->rb_node, parent, p);
+	rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos);
+	env->bpf_progs.infos_cnt++;
+}
+
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+							__u32 prog_id)
+{
+	struct bpf_prog_info_node *node = NULL;
+	struct rb_node *n;
+
+	down_read(&env->bpf_progs.lock);
+	n = env->bpf_progs.infos.rb_node;
+
+	while (n) {
+		node = rb_entry(n, struct bpf_prog_info_node, rb_node);
+		if (prog_id < node->info_linear->info.id)
+			n = n->rb_left;
+		else if (prog_id > node->info_linear->info.id)
+			n = n->rb_right;
+		else
+			goto out;
+	}
+	node = NULL;
+
+out:
+	up_read(&env->bpf_progs.lock);
+	return node;
+}
+
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+{
+	bool ret;
+
+	down_write(&env->bpf_progs.lock);
+	ret = __perf_env__insert_btf(env, btf_node);
+	up_write(&env->bpf_progs.lock);
+	return ret;
+}
+
+bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+{
+	struct rb_node *parent = NULL;
+	__u32 btf_id = btf_node->id;
+	struct btf_node *node;
+	struct rb_node **p;
+
+	p = &env->bpf_progs.btfs.rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		node = rb_entry(parent, struct btf_node, rb_node);
+		if (btf_id < node->id) {
+			p = &(*p)->rb_left;
+		} else if (btf_id > node->id) {
+			p = &(*p)->rb_right;
+		} else {
+			pr_debug("duplicated btf %u\n", btf_id);
+			return false;
+		}
+	}
+
+	rb_link_node(&btf_node->rb_node, parent, p);
+	rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs);
+	env->bpf_progs.btfs_cnt++;
+	return true;
+}
+
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+{
+	struct btf_node *res;
+
+	down_read(&env->bpf_progs.lock);
+	res = __perf_env__find_btf(env, btf_id);
+	up_read(&env->bpf_progs.lock);
+	return res;
+}
+
+struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+{
+	struct btf_node *node = NULL;
+	struct rb_node *n;
+
+	n = env->bpf_progs.btfs.rb_node;
+
+	while (n) {
+		node = rb_entry(n, struct btf_node, rb_node);
+		if (btf_id < node->id)
+			n = n->rb_left;
+		else if (btf_id > node->id)
+			n = n->rb_right;
+		else
+			return node;
+	}
+	return NULL;
+}
+
+/* purge data in bpf_progs.infos tree */
+static void perf_env__purge_bpf(struct perf_env *env)
+{
+	struct rb_root *root;
+	struct rb_node *next;
+
+	down_write(&env->bpf_progs.lock);
+
+	root = &env->bpf_progs.infos;
+	next = rb_first(root);
+
+	while (next) {
+		struct bpf_prog_info_node *node;
+
+		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+		next = rb_next(&node->rb_node);
+		rb_erase(&node->rb_node, root);
+		free(node->info_linear);
+		free(node);
+	}
+
+	env->bpf_progs.infos_cnt = 0;
+
+	root = &env->bpf_progs.btfs;
+	next = rb_first(root);
+
+	while (next) {
+		struct btf_node *node;
+
+		node = rb_entry(next, struct btf_node, rb_node);
+		next = rb_next(&node->rb_node);
+		rb_erase(&node->rb_node, root);
+		free(node);
+	}
+
+	env->bpf_progs.btfs_cnt = 0;
+
+	up_write(&env->bpf_progs.lock);
+}
+#else // HAVE_LIBBPF_SUPPORT
+static void perf_env__purge_bpf(struct perf_env *env __maybe_unused)
+{
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+void perf_env__exit(struct perf_env *env)
+{
+	int i, j;
+
+	perf_env__purge_bpf(env);
+	perf_env__purge_cgroups(env);
+	zfree(&env->hostname);
+	zfree(&env->os_release);
+	zfree(&env->version);
+	zfree(&env->arch);
+	zfree(&env->cpu_desc);
+	zfree(&env->cpuid);
+	zfree(&env->cmdline);
+	zfree(&env->cmdline_argv);
+	zfree(&env->sibling_dies);
+	zfree(&env->sibling_cores);
+	zfree(&env->sibling_threads);
+	zfree(&env->pmu_mappings);
+	zfree(&env->cpu);
+	for (i = 0; i < env->nr_cpu_pmu_caps; i++)
+		zfree(&env->cpu_pmu_caps[i]);
+	zfree(&env->cpu_pmu_caps);
+	zfree(&env->numa_map);
+
+	for (i = 0; i < env->nr_numa_nodes; i++)
+		perf_cpu_map__put(env->numa_nodes[i].map);
+	zfree(&env->numa_nodes);
+
+	for (i = 0; i < env->caches_cnt; i++)
+		cpu_cache_level__free(&env->caches[i]);
+	zfree(&env->caches);
+
+	for (i = 0; i < env->nr_memory_nodes; i++)
+		zfree(&env->memory_nodes[i].set);
+	zfree(&env->memory_nodes);
+
+	for (i = 0; i < env->nr_hybrid_nodes; i++) {
+		zfree(&env->hybrid_nodes[i].pmu_name);
+		zfree(&env->hybrid_nodes[i].cpus);
+	}
+	zfree(&env->hybrid_nodes);
+
+	for (i = 0; i < env->nr_pmus_with_caps; i++) {
+		for (j = 0; j < env->pmu_caps[i].nr_caps; j++)
+			zfree(&env->pmu_caps[i].caps[j]);
+		zfree(&env->pmu_caps[i].caps);
+		zfree(&env->pmu_caps[i].pmu_name);
+	}
+	zfree(&env->pmu_caps);
+}
+
+void perf_env__init(struct perf_env *env)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+	env->bpf_progs.infos = RB_ROOT;
+	env->bpf_progs.btfs = RB_ROOT;
+	init_rwsem(&env->bpf_progs.lock);
+#endif
+	env->kernel_is_64_bit = -1;
+}
+
+static void perf_env__init_kernel_mode(struct perf_env *env)
+{
+	const char *arch = perf_env__raw_arch(env);
+
+	if (!strncmp(arch, "x86_64", 6) || !strncmp(arch, "aarch64", 7) ||
+	    !strncmp(arch, "arm64", 5) || !strncmp(arch, "mips64", 6) ||
+	    !strncmp(arch, "parisc64", 8) || !strncmp(arch, "riscv64", 7) ||
+	    !strncmp(arch, "s390x", 5) || !strncmp(arch, "sparc64", 7))
+		env->kernel_is_64_bit = 1;
+	else
+		env->kernel_is_64_bit = 0;
+}
+
+int perf_env__kernel_is_64_bit(struct perf_env *env)
+{
+	if (env->kernel_is_64_bit == -1)
+		perf_env__init_kernel_mode(env);
+
+	return env->kernel_is_64_bit;
+}
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
+{
+	int i;
+
+	/* do not include NULL termination */
+	env->cmdline_argv = calloc(argc, sizeof(char *));
+	if (env->cmdline_argv == NULL)
+		goto out_enomem;
+
+	/*
+	 * Must copy argv contents because it gets moved around during option
+	 * parsing:
+	 */
+	for (i = 0; i < argc ; i++) {
+		env->cmdline_argv[i] = argv[i];
+		if (env->cmdline_argv[i] == NULL)
+			goto out_free;
+	}
+
+	env->nr_cmdline = argc;
+
+	return 0;
+out_free:
+	zfree(&env->cmdline_argv);
+out_enomem:
+	return -ENOMEM;
+}
+
+int perf_env__read_cpu_topology_map(struct perf_env *env)
+{
+	int idx, nr_cpus;
+
+	if (env->cpu != NULL)
+		return 0;
+
+	if (env->nr_cpus_avail == 0)
+		env->nr_cpus_avail = cpu__max_present_cpu().cpu;
+
+	nr_cpus = env->nr_cpus_avail;
+	if (nr_cpus == -1)
+		return -EINVAL;
+
+	env->cpu = calloc(nr_cpus, sizeof(env->cpu[0]));
+	if (env->cpu == NULL)
+		return -ENOMEM;
+
+	for (idx = 0; idx < nr_cpus; ++idx) {
+		struct perf_cpu cpu = { .cpu = idx };
+
+		env->cpu[idx].core_id	= cpu__get_core_id(cpu);
+		env->cpu[idx].socket_id	= cpu__get_socket_id(cpu);
+		env->cpu[idx].die_id	= cpu__get_die_id(cpu);
+	}
+
+	env->nr_cpus_avail = nr_cpus;
+	return 0;
+}
+
+int perf_env__read_pmu_mappings(struct perf_env *env)
+{
+	struct perf_pmu *pmu = NULL;
+	u32 pmu_num = 0;
+	struct strbuf sb;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+		pmu_num++;
+	}
+	if (!pmu_num) {
+		pr_debug("pmu mappings not available\n");
+		return -ENOENT;
+	}
+	env->nr_pmu_mappings = pmu_num;
+
+	if (strbuf_init(&sb, 128 * pmu_num) < 0)
+		return -ENOMEM;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+		if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0)
+			goto error;
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, "", 1) < 0)
+			goto error;
+	}
+
+	env->pmu_mappings = strbuf_detach(&sb, NULL);
+
+	return 0;
+
+error:
+	strbuf_release(&sb);
+	return -1;
+}
+
+int perf_env__read_cpuid(struct perf_env *env)
+{
+	char cpuid[128];
+	int err = get_cpuid(cpuid, sizeof(cpuid));
+
+	if (err)
+		return err;
+
+	free(env->cpuid);
+	env->cpuid = strdup(cpuid);
+	if (env->cpuid == NULL)
+		return ENOMEM;
+	return 0;
+}
+
+static int perf_env__read_arch(struct perf_env *env)
+{
+	struct utsname uts;
+
+	if (env->arch)
+		return 0;
+
+	if (!uname(&uts))
+		env->arch = strdup(uts.machine);
+
+	return env->arch ? 0 : -ENOMEM;
+}
+
+static int perf_env__read_nr_cpus_avail(struct perf_env *env)
+{
+	if (env->nr_cpus_avail == 0)
+		env->nr_cpus_avail = cpu__max_present_cpu().cpu;
+
+	return env->nr_cpus_avail ? 0 : -ENOENT;
+}
+
+const char *perf_env__raw_arch(struct perf_env *env)
+{
+	return env && !perf_env__read_arch(env) ? env->arch : "unknown";
+}
+
+int perf_env__nr_cpus_avail(struct perf_env *env)
+{
+	return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
+}
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+	zfree(&cache->type);
+	zfree(&cache->map);
+	zfree(&cache->size);
+}
+
+/*
+ * Return architecture name in a normalized form.
+ * The conversion logic comes from the Makefile.
+ */
+static const char *normalize_arch(char *arch)
+{
+	if (!strcmp(arch, "x86_64"))
+		return "x86";
+	if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
+		return "x86";
+	if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
+		return "sparc";
+	if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5))
+		return "arm64";
+	if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
+		return "arm";
+	if (!strncmp(arch, "s390", 4))
+		return "s390";
+	if (!strncmp(arch, "parisc", 6))
+		return "parisc";
+	if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
+		return "powerpc";
+	if (!strncmp(arch, "mips", 4))
+		return "mips";
+	if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
+		return "sh";
+
+	return arch;
+}
+
+const char *perf_env__arch(struct perf_env *env)
+{
+	char *arch_name;
+
+	if (!env || !env->arch) { /* Assume local operation */
+		static struct utsname uts = { .machine[0] = '\0', };
+		if (uts.machine[0] == '\0' && uname(&uts) < 0)
+			return NULL;
+		arch_name = uts.machine;
+	} else
+		arch_name = env->arch;
+
+	return normalize_arch(arch_name);
+}
+
+const char *perf_env__cpuid(struct perf_env *env)
+{
+	int status;
+
+	if (!env || !env->cpuid) { /* Assume local operation */
+		status = perf_env__read_cpuid(env);
+		if (status)
+			return NULL;
+	}
+
+	return env->cpuid;
+}
+
+int perf_env__nr_pmu_mappings(struct perf_env *env)
+{
+	int status;
+
+	if (!env || !env->nr_pmu_mappings) { /* Assume local operation */
+		status = perf_env__read_pmu_mappings(env);
+		if (status)
+			return 0;
+	}
+
+	return env->nr_pmu_mappings;
+}
+
+const char *perf_env__pmu_mappings(struct perf_env *env)
+{
+	int status;
+
+	if (!env || !env->pmu_mappings) { /* Assume local operation */
+		status = perf_env__read_pmu_mappings(env);
+		if (status)
+			return NULL;
+	}
+
+	return env->pmu_mappings;
+}
+
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
+{
+	if (!env->nr_numa_map) {
+		struct numa_node *nn;
+		int i, nr = 0;
+
+		for (i = 0; i < env->nr_numa_nodes; i++) {
+			nn = &env->numa_nodes[i];
+			nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+		}
+
+		nr++;
+
+		/*
+		 * We initialize the numa_map array to prepare
+		 * it for missing cpus, which return node -1
+		 */
+		env->numa_map = malloc(nr * sizeof(int));
+		if (!env->numa_map)
+			return -1;
+
+		for (i = 0; i < nr; i++)
+			env->numa_map[i] = -1;
+
+		env->nr_numa_map = nr;
+
+		for (i = 0; i < env->nr_numa_nodes; i++) {
+			struct perf_cpu tmp;
+			int j;
+
+			nn = &env->numa_nodes[i];
+			perf_cpu_map__for_each_cpu(tmp, j, nn->map)
+				env->numa_map[tmp.cpu] = i;
+		}
+	}
+
+	return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
+}
+
+char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
+			     const char *cap)
+{
+	char *cap_eq;
+	int cap_size;
+	char **ptr;
+	int i, j;
+
+	if (!pmu_name || !cap)
+		return NULL;
+
+	cap_size = strlen(cap);
+	cap_eq = zalloc(cap_size + 2);
+	if (!cap_eq)
+		return NULL;
+
+	memcpy(cap_eq, cap, cap_size);
+	cap_eq[cap_size] = '=';
+
+	if (!strcmp(pmu_name, "cpu")) {
+		for (i = 0; i < env->nr_cpu_pmu_caps; i++) {
+			if (!strncmp(env->cpu_pmu_caps[i], cap_eq, cap_size + 1)) {
+				free(cap_eq);
+				return &env->cpu_pmu_caps[i][cap_size + 1];
+			}
+		}
+		goto out;
+	}
+
+	for (i = 0; i < env->nr_pmus_with_caps; i++) {
+		if (strcmp(env->pmu_caps[i].pmu_name, pmu_name))
+			continue;
+
+		ptr = env->pmu_caps[i].caps;
+
+		for (j = 0; j < env->pmu_caps[i].nr_caps; j++) {
+			if (!strncmp(ptr[j], cap_eq, cap_size + 1)) {
+				free(cap_eq);
+				return &ptr[j][cap_size + 1];
+			}
+		}
+	}
+
+out:
+	free(cap_eq);
+	return NULL;
+}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
new file mode 100644
index 000000000..359eff51c
--- /dev/null
+++ b/tools/perf/util/env.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ENV_H
+#define __PERF_ENV_H
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include "cpumap.h"
+#include "rwsem.h"
+
+struct perf_cpu_map;
+
+struct cpu_topology_map {
+	int	socket_id;
+	int	die_id;
+	int	core_id;
+};
+
+struct cpu_cache_level {
+	u32	level;
+	u32	line_size;
+	u32	sets;
+	u32	ways;
+	char	*type;
+	char	*size;
+	char	*map;
+};
+
+struct numa_node {
+	u32		 node;
+	u64		 mem_total;
+	u64		 mem_free;
+	struct perf_cpu_map	*map;
+};
+
+struct memory_node {
+	u64		 node;
+	u64		 size;
+	unsigned long	*set;
+};
+
+struct hybrid_node {
+	char	*pmu_name;
+	char	*cpus;
+};
+
+struct pmu_caps {
+	int		nr_caps;
+	unsigned int    max_branches;
+	char            **caps;
+	char            *pmu_name;
+};
+
+struct perf_env {
+	char			*hostname;
+	char			*os_release;
+	char			*version;
+	char			*arch;
+	int			nr_cpus_online;
+	int			nr_cpus_avail;
+	char			*cpu_desc;
+	char			*cpuid;
+	unsigned long long	total_mem;
+	unsigned int		msr_pmu_type;
+	unsigned int		max_branches;
+	int			kernel_is_64_bit;
+
+	int			nr_cmdline;
+	int			nr_sibling_cores;
+	int			nr_sibling_dies;
+	int			nr_sibling_threads;
+	int			nr_numa_nodes;
+	int			nr_memory_nodes;
+	int			nr_pmu_mappings;
+	int			nr_groups;
+	int			nr_cpu_pmu_caps;
+	int			nr_hybrid_nodes;
+	int			nr_pmus_with_caps;
+	char			*cmdline;
+	const char		**cmdline_argv;
+	char			*sibling_cores;
+	char			*sibling_dies;
+	char			*sibling_threads;
+	char			*pmu_mappings;
+	char			**cpu_pmu_caps;
+	struct cpu_topology_map	*cpu;
+	struct cpu_cache_level	*caches;
+	int			 caches_cnt;
+	u32			comp_ratio;
+	u32			comp_ver;
+	u32			comp_type;
+	u32			comp_level;
+	u32			comp_mmap_len;
+	struct numa_node	*numa_nodes;
+	struct memory_node	*memory_nodes;
+	unsigned long long	 memory_bsize;
+	struct hybrid_node	*hybrid_nodes;
+	struct pmu_caps		*pmu_caps;
+#ifdef HAVE_LIBBPF_SUPPORT
+	/*
+	 * bpf_info_lock protects bpf rbtrees. This is needed because the
+	 * trees are accessed by different threads in perf-top
+	 */
+	struct {
+		struct rw_semaphore	lock;
+		struct rb_root		infos;
+		u32			infos_cnt;
+		struct rb_root		btfs;
+		u32			btfs_cnt;
+	} bpf_progs;
+#endif // HAVE_LIBBPF_SUPPORT
+	/* same reason as above (for perf-top) */
+	struct {
+		struct rw_semaphore	lock;
+		struct rb_root		tree;
+	} cgroups;
+
+	/* For fast cpu to numa node lookup via perf_env__numa_node */
+	int			*numa_map;
+	int			 nr_numa_map;
+
+	/* For real clock time reference. */
+	struct {
+		u64	tod_ns;
+		u64	clockid_ns;
+		u64     clockid_res_ns;
+		int	clockid;
+		/*
+		 * enabled is valid for report mode, and is true if above
+		 * values are set, it's set in process_clock_data
+		 */
+		bool	enabled;
+	} clock;
+};
+
+enum perf_compress_type {
+	PERF_COMP_NONE = 0,
+	PERF_COMP_ZSTD,
+	PERF_COMP_MAX
+};
+
+struct bpf_prog_info_node;
+struct btf_node;
+
+extern struct perf_env perf_env;
+
+void perf_env__exit(struct perf_env *env);
+
+int perf_env__kernel_is_64_bit(struct perf_env *env);
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
+
+int perf_env__read_cpuid(struct perf_env *env);
+int perf_env__read_pmu_mappings(struct perf_env *env);
+int perf_env__nr_pmu_mappings(struct perf_env *env);
+const char *perf_env__pmu_mappings(struct perf_env *env);
+
+int perf_env__read_cpu_topology_map(struct perf_env *env);
+
+void cpu_cache_level__free(struct cpu_cache_level *cache);
+
+const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__cpuid(struct perf_env *env);
+const char *perf_env__raw_arch(struct perf_env *env);
+int perf_env__nr_cpus_avail(struct perf_env *env);
+
+void perf_env__init(struct perf_env *env);
+void __perf_env__insert_bpf_prog_info(struct perf_env *env,
+				      struct bpf_prog_info_node *info_node);
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+				    struct bpf_prog_info_node *info_node);
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+							__u32 prog_id);
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
+struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id);
+
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
+char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
+			     const char *cap);
+#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
new file mode 100644
index 000000000..1fa14598b
--- /dev/null
+++ b/tools/perf/util/event.c
@@ -0,0 +1,809 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <perf/cpumap.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <linux/perf_event.h>
+#include <linux/zalloc.h>
+#include "cpumap.h"
+#include "dso.h"
+#include "event.h"
+#include "debug.h"
+#include "hist.h"
+#include "machine.h"
+#include "sort.h"
+#include "string2.h"
+#include "strlist.h"
+#include "thread.h"
+#include "thread_map.h"
+#include "time-utils.h"
+#include <linux/ctype.h>
+#include "map.h"
+#include "util/namespaces.h"
+#include "symbol.h"
+#include "symbol/kallsyms.h"
+#include "asm/bug.h"
+#include "stat.h"
+#include "session.h"
+#include "bpf-event.h"
+#include "print_binary.h"
+#include "tool.h"
+#include "../perf.h"
+
+static const char *perf_event__names[] = {
+	[0]					= "TOTAL",
+	[PERF_RECORD_MMAP]			= "MMAP",
+	[PERF_RECORD_MMAP2]			= "MMAP2",
+	[PERF_RECORD_LOST]			= "LOST",
+	[PERF_RECORD_COMM]			= "COMM",
+	[PERF_RECORD_EXIT]			= "EXIT",
+	[PERF_RECORD_THROTTLE]			= "THROTTLE",
+	[PERF_RECORD_UNTHROTTLE]		= "UNTHROTTLE",
+	[PERF_RECORD_FORK]			= "FORK",
+	[PERF_RECORD_READ]			= "READ",
+	[PERF_RECORD_SAMPLE]			= "SAMPLE",
+	[PERF_RECORD_AUX]			= "AUX",
+	[PERF_RECORD_ITRACE_START]		= "ITRACE_START",
+	[PERF_RECORD_LOST_SAMPLES]		= "LOST_SAMPLES",
+	[PERF_RECORD_SWITCH]			= "SWITCH",
+	[PERF_RECORD_SWITCH_CPU_WIDE]		= "SWITCH_CPU_WIDE",
+	[PERF_RECORD_NAMESPACES]		= "NAMESPACES",
+	[PERF_RECORD_KSYMBOL]			= "KSYMBOL",
+	[PERF_RECORD_BPF_EVENT]			= "BPF_EVENT",
+	[PERF_RECORD_CGROUP]			= "CGROUP",
+	[PERF_RECORD_TEXT_POKE]			= "TEXT_POKE",
+	[PERF_RECORD_AUX_OUTPUT_HW_ID]		= "AUX_OUTPUT_HW_ID",
+	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
+	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
+	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
+	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
+	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
+	[PERF_RECORD_ID_INDEX]			= "ID_INDEX",
+	[PERF_RECORD_AUXTRACE_INFO]		= "AUXTRACE_INFO",
+	[PERF_RECORD_AUXTRACE]			= "AUXTRACE",
+	[PERF_RECORD_AUXTRACE_ERROR]		= "AUXTRACE_ERROR",
+	[PERF_RECORD_THREAD_MAP]		= "THREAD_MAP",
+	[PERF_RECORD_CPU_MAP]			= "CPU_MAP",
+	[PERF_RECORD_STAT_CONFIG]		= "STAT_CONFIG",
+	[PERF_RECORD_STAT]			= "STAT",
+	[PERF_RECORD_STAT_ROUND]		= "STAT_ROUND",
+	[PERF_RECORD_EVENT_UPDATE]		= "EVENT_UPDATE",
+	[PERF_RECORD_TIME_CONV]			= "TIME_CONV",
+	[PERF_RECORD_HEADER_FEATURE]		= "FEATURE",
+	[PERF_RECORD_COMPRESSED]		= "COMPRESSED",
+	[PERF_RECORD_FINISHED_INIT]		= "FINISHED_INIT",
+};
+
+const char *perf_event__name(unsigned int id)
+{
+	if (id >= ARRAY_SIZE(perf_event__names))
+		return "INVALID";
+	if (!perf_event__names[id])
+		return "UNKNOWN";
+	return perf_event__names[id];
+}
+
+struct process_symbol_args {
+	const char *name;
+	u64	   start;
+};
+
+static int find_symbol_cb(void *arg, const char *name, char type,
+			  u64 start)
+{
+	struct process_symbol_args *args = arg;
+
+	/*
+	 * Must be a function or at least an alias, as in PARISC64, where "_text" is
+	 * an 'A' to the same address as "_stext".
+	 */
+	if (!(kallsyms__is_function(type) ||
+	      type == 'A') || strcmp(name, args->name))
+		return 0;
+
+	args->start = start;
+	return 1;
+}
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr)
+{
+	struct process_symbol_args args = { .name = symbol_name, };
+
+	if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
+		return -1;
+
+	*addr = args.start;
+	return 0;
+}
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+				  struct perf_record_stat_config *event)
+{
+	unsigned i;
+
+	for (i = 0; i < event->nr; i++) {
+
+		switch (event->data[i].tag) {
+#define CASE(__term, __val)					\
+		case PERF_STAT_CONFIG_TERM__##__term:		\
+			config->__val = event->data[i].val;	\
+			break;
+
+		CASE(AGGR_MODE, aggr_mode)
+		CASE(SCALE,     scale)
+		CASE(INTERVAL,  interval)
+#undef CASE
+		default:
+			pr_warning("unknown stat config term %" PRI_lu64 "\n",
+				   event->data[i].tag);
+		}
+	}
+}
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
+{
+	const char *s;
+
+	if (event->header.misc & PERF_RECORD_MISC_COMM_EXEC)
+		s = " exec";
+	else
+		s = "";
+
+	return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
+}
+
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
+{
+	size_t ret = 0;
+	struct perf_ns_link_info *ns_link_info;
+	u32 nr_namespaces, idx;
+
+	ns_link_info = event->namespaces.link_info;
+	nr_namespaces = event->namespaces.nr_namespaces;
+
+	ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
+		       event->namespaces.pid,
+		       event->namespaces.tid,
+		       nr_namespaces);
+
+	for (idx = 0; idx < nr_namespaces; idx++) {
+		if (idx && (idx % 4 == 0))
+			ret += fprintf(fp, "\n\t\t ");
+
+		ret  += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
+				perf_ns__name(idx), (u64)ns_link_info[idx].dev,
+				(u64)ns_link_info[idx].ino,
+				((idx + 1) != nr_namespaces) ? ", " : "]\n");
+	}
+
+	return ret;
+}
+
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n",
+		       event->cgroup.id, event->cgroup.path);
+}
+
+int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_comm_event(machine, event, sample);
+}
+
+int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	return machine__process_namespaces_event(machine, event, sample);
+}
+
+int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event,
+			       struct perf_sample *sample,
+			       struct machine *machine)
+{
+	return machine__process_cgroup_event(machine, event, sample);
+}
+
+int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_lost_event(machine, event, sample);
+}
+
+int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event,
+			    struct perf_sample *sample __maybe_unused,
+			    struct machine *machine)
+{
+	return machine__process_aux_event(machine, event);
+}
+
+int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine)
+{
+	return machine__process_itrace_start_event(machine, event);
+}
+
+int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused,
+					 union perf_event *event,
+					 struct perf_sample *sample __maybe_unused,
+					 struct machine *machine)
+{
+	return machine__process_aux_output_hw_id_event(machine, event);
+}
+
+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine)
+{
+	return machine__process_lost_samples_event(machine, event, sample);
+}
+
+int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine)
+{
+	return machine__process_switch_event(machine, event);
+}
+
+int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
+				union perf_event *event,
+				struct perf_sample *sample __maybe_unused,
+				struct machine *machine)
+{
+	return machine__process_ksymbol(machine, event, sample);
+}
+
+int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event,
+			    struct perf_sample *sample,
+			    struct machine *machine)
+{
+	return machine__process_bpf(machine, event, sample);
+}
+
+int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event,
+				  struct perf_sample *sample,
+				  struct machine *machine)
+{
+	return machine__process_text_poke(machine, event, sample);
+}
+
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
+		       event->mmap.pid, event->mmap.tid, event->mmap.start,
+		       event->mmap.len, event->mmap.pgoff,
+		       (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x',
+		       event->mmap.filename);
+}
+
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
+{
+	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+		char sbuild_id[SBUILD_ID_SIZE];
+		struct build_id bid;
+
+		build_id__init(&bid, event->mmap2.build_id,
+			       event->mmap2.build_id_size);
+		build_id__sprintf(&bid, sbuild_id);
+
+		return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
+				   " <%s>]: %c%c%c%c %s\n",
+			       event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+			       event->mmap2.len, event->mmap2.pgoff, sbuild_id,
+			       (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+			       (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+			       (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+			       (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+			       event->mmap2.filename);
+	} else {
+		return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
+				   " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n",
+			       event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+			       event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
+			       event->mmap2.min, event->mmap2.ino,
+			       event->mmap2.ino_generation,
+			       (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+			       (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+			       (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+			       (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+			       event->mmap2.filename);
+	}
+}
+
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
+{
+	struct perf_thread_map *threads = thread_map__new_event(&event->thread_map);
+	size_t ret;
+
+	ret = fprintf(fp, " nr: ");
+
+	if (threads)
+		ret += thread_map__fprintf(threads, fp);
+	else
+		ret += fprintf(fp, "failed to get threads from event\n");
+
+	perf_thread_map__put(threads);
+	return ret;
+}
+
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
+{
+	struct perf_cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
+	size_t ret;
+
+	ret = fprintf(fp, ": ");
+
+	if (cpus)
+		ret += cpu_map__fprintf(cpus, fp);
+	else
+		ret += fprintf(fp, "failed to get cpumap from event\n");
+
+	perf_cpu_map__put(cpus);
+	return ret;
+}
+
+int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_mmap_event(machine, event, sample);
+}
+
+int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_mmap2_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, "(%d:%d):(%d:%d)\n",
+		       event->fork.pid, event->fork.tid,
+		       event->fork.ppid, event->fork.ptid);
+}
+
+int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_fork_event(machine, event, sample);
+}
+
+int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_exit_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n",
+		       event->aux.aux_offset, event->aux.aux_size,
+		       event->aux.flags,
+		       event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
+		       event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "",
+		       event->aux.flags & PERF_AUX_FLAG_PARTIAL   ? "P" : "");
+}
+
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " pid: %u tid: %u\n",
+		       event->itrace_start.pid, event->itrace_start.tid);
+}
+
+size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " hw_id: %#"PRI_lx64"\n",
+		       event->aux_output_hw_id.hw_id);
+}
+
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
+{
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	const char *in_out = !out ? "IN         " :
+		!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ?
+				    "OUT        " : "OUT preempt";
+
+	if (event->header.type == PERF_RECORD_SWITCH)
+		return fprintf(fp, " %s\n", in_out);
+
+	return fprintf(fp, " %s  %s pid/tid: %5d/%-5d\n",
+		       in_out, out ? "next" : "prev",
+		       event->context_switch.next_prev_pid,
+		       event->context_switch.next_prev_tid);
+}
+
+static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " lost %" PRI_lu64 "\n", event->lost.lost);
+}
+
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " addr %" PRI_lx64 " len %u type %u flags 0x%x name %s\n",
+		       event->ksymbol.addr, event->ksymbol.len,
+		       event->ksymbol.ksym_type,
+		       event->ksymbol.flags, event->ksymbol.name);
+}
+
+size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " type %u, flags %u, id %u\n",
+		       event->bpf.type, event->bpf.flags, event->bpf.id);
+}
+
+static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
+			     void *extra, FILE *fp)
+{
+	bool old = *(bool *)extra;
+
+	switch ((int)op) {
+	case BINARY_PRINT_LINE_BEGIN:
+		return fprintf(fp, "            %s bytes:", old ? "Old" : "New");
+	case BINARY_PRINT_NUM_DATA:
+		return fprintf(fp, " %02x", val);
+	case BINARY_PRINT_LINE_END:
+		return fprintf(fp, "\n");
+	default:
+		return 0;
+	}
+}
+
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp)
+{
+	struct perf_record_text_poke_event *tp = &event->text_poke;
+	size_t ret;
+	bool old;
+
+	ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr);
+	if (machine) {
+		struct addr_location al;
+
+		al.map = maps__find(machine__kernel_maps(machine), tp->addr);
+		if (al.map && map__load(al.map) >= 0) {
+			al.addr = al.map->map_ip(al.map, tp->addr);
+			al.sym = map__find_symbol(al.map, al.addr);
+			if (al.sym)
+				ret += symbol__fprintf_symname_offs(al.sym, &al, fp);
+		}
+	}
+	ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len);
+	old = true;
+	ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer,
+			       &old, fp);
+	old = false;
+	ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16,
+			       text_poke_printer, &old, fp);
+	return ret;
+}
+
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp)
+{
+	size_t ret = fprintf(fp, "PERF_RECORD_%s",
+			     perf_event__name(event->header.type));
+
+	switch (event->header.type) {
+	case PERF_RECORD_COMM:
+		ret += perf_event__fprintf_comm(event, fp);
+		break;
+	case PERF_RECORD_FORK:
+	case PERF_RECORD_EXIT:
+		ret += perf_event__fprintf_task(event, fp);
+		break;
+	case PERF_RECORD_MMAP:
+		ret += perf_event__fprintf_mmap(event, fp);
+		break;
+	case PERF_RECORD_NAMESPACES:
+		ret += perf_event__fprintf_namespaces(event, fp);
+		break;
+	case PERF_RECORD_CGROUP:
+		ret += perf_event__fprintf_cgroup(event, fp);
+		break;
+	case PERF_RECORD_MMAP2:
+		ret += perf_event__fprintf_mmap2(event, fp);
+		break;
+	case PERF_RECORD_AUX:
+		ret += perf_event__fprintf_aux(event, fp);
+		break;
+	case PERF_RECORD_ITRACE_START:
+		ret += perf_event__fprintf_itrace_start(event, fp);
+		break;
+	case PERF_RECORD_SWITCH:
+	case PERF_RECORD_SWITCH_CPU_WIDE:
+		ret += perf_event__fprintf_switch(event, fp);
+		break;
+	case PERF_RECORD_LOST:
+		ret += perf_event__fprintf_lost(event, fp);
+		break;
+	case PERF_RECORD_KSYMBOL:
+		ret += perf_event__fprintf_ksymbol(event, fp);
+		break;
+	case PERF_RECORD_BPF_EVENT:
+		ret += perf_event__fprintf_bpf(event, fp);
+		break;
+	case PERF_RECORD_TEXT_POKE:
+		ret += perf_event__fprintf_text_poke(event, machine, fp);
+		break;
+	case PERF_RECORD_AUX_OUTPUT_HW_ID:
+		ret += perf_event__fprintf_aux_output_hw_id(event, fp);
+		break;
+	default:
+		ret += fprintf(fp, "\n");
+	}
+
+	return ret;
+}
+
+int perf_event__process(struct perf_tool *tool __maybe_unused,
+			union perf_event *event,
+			struct perf_sample *sample,
+			struct machine *machine)
+{
+	return machine__process_event(machine, event, sample);
+}
+
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al)
+{
+	struct maps *maps = thread->maps;
+	struct machine *machine = maps->machine;
+	bool load_map = false;
+
+	al->maps = maps;
+	al->thread = thread;
+	al->addr = addr;
+	al->cpumode = cpumode;
+	al->filtered = 0;
+
+	if (machine == NULL) {
+		al->map = NULL;
+		return NULL;
+	}
+
+	if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
+		al->level = 'k';
+		al->maps = maps = machine__kernel_maps(machine);
+		load_map = true;
+	} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
+		al->level = '.';
+	} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
+		al->level = 'g';
+		al->maps = maps = machine__kernel_maps(machine);
+		load_map = true;
+	} else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
+		al->level = 'u';
+	} else {
+		al->level = 'H';
+		al->map = NULL;
+
+		if ((cpumode == PERF_RECORD_MISC_GUEST_USER ||
+			cpumode == PERF_RECORD_MISC_GUEST_KERNEL) &&
+			!perf_guest)
+			al->filtered |= (1 << HIST_FILTER__GUEST);
+		if ((cpumode == PERF_RECORD_MISC_USER ||
+			cpumode == PERF_RECORD_MISC_KERNEL) &&
+			!perf_host)
+			al->filtered |= (1 << HIST_FILTER__HOST);
+
+		return NULL;
+	}
+
+	al->map = maps__find(maps, al->addr);
+	if (al->map != NULL) {
+		/*
+		 * Kernel maps might be changed when loading symbols so loading
+		 * must be done prior to using kernel maps.
+		 */
+		if (load_map)
+			map__load(al->map);
+		al->addr = al->map->map_ip(al->map, al->addr);
+	}
+
+	return al->map;
+}
+
+/*
+ * For branch stacks or branch samples, the sample cpumode might not be correct
+ * because it applies only to the sample 'ip' and not necessary to 'addr' or
+ * branch stack addresses. If possible, use a fallback to deal with those cases.
+ */
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+				struct addr_location *al)
+{
+	struct map *map = thread__find_map(thread, cpumode, addr, al);
+	struct machine *machine = thread->maps->machine;
+	u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
+
+	if (map || addr_cpumode == cpumode)
+		return map;
+
+	return thread__find_map(thread, addr_cpumode, addr, al);
+}
+
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+				   u64 addr, struct addr_location *al)
+{
+	al->sym = NULL;
+	if (thread__find_map(thread, cpumode, addr, al))
+		al->sym = map__find_symbol(al->map, al->addr);
+	return al->sym;
+}
+
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+				      u64 addr, struct addr_location *al)
+{
+	al->sym = NULL;
+	if (thread__find_map_fb(thread, cpumode, addr, al))
+		al->sym = map__find_symbol(al->map, al->addr);
+	return al->sym;
+}
+
+static bool check_address_range(struct intlist *addr_list, int addr_range,
+				unsigned long addr)
+{
+	struct int_node *pos;
+
+	intlist__for_each_entry(pos, addr_list) {
+		if (addr >= pos->i && addr < pos->i + addr_range)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * Callers need to drop the reference to al->thread, obtained in
+ * machine__findnew_thread()
+ */
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample)
+{
+	struct thread *thread;
+
+	if (symbol_conf.guest_code && !machine__is_host(machine))
+		thread = machine__findnew_guest_code(machine, sample->pid);
+	else
+		thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	if (thread == NULL)
+		return -1;
+
+	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
+	thread__find_map(thread, sample->cpumode, sample->ip, al);
+	dump_printf(" ...... dso: %s\n",
+		    al->map ? al->map->dso->long_name :
+			al->level == 'H' ? "[hypervisor]" : "<not found>");
+
+	if (thread__is_filtered(thread))
+		al->filtered |= (1 << HIST_FILTER__THREAD);
+
+	al->sym = NULL;
+	al->cpu = sample->cpu;
+	al->socket = -1;
+	al->srcline = NULL;
+
+	if (al->cpu >= 0) {
+		struct perf_env *env = machine->env;
+
+		if (env && env->cpu)
+			al->socket = env->cpu[al->cpu].socket_id;
+	}
+
+	if (al->map) {
+		struct dso *dso = al->map->dso;
+
+		if (symbol_conf.dso_list &&
+		    (!dso || !(strlist__has_entry(symbol_conf.dso_list,
+						  dso->short_name) ||
+			       (dso->short_name != dso->long_name &&
+				strlist__has_entry(symbol_conf.dso_list,
+						   dso->long_name))))) {
+			al->filtered |= (1 << HIST_FILTER__DSO);
+		}
+
+		al->sym = map__find_symbol(al->map, al->addr);
+	} else if (symbol_conf.dso_list) {
+		al->filtered |= (1 << HIST_FILTER__DSO);
+	}
+
+	if (symbol_conf.sym_list) {
+		int ret = 0;
+		char al_addr_str[32];
+		size_t sz = sizeof(al_addr_str);
+
+		if (al->sym) {
+			ret = strlist__has_entry(symbol_conf.sym_list,
+						al->sym->name);
+		}
+		if (!ret && al->sym) {
+			snprintf(al_addr_str, sz, "0x%"PRIx64,
+				al->map->unmap_ip(al->map, al->sym->start));
+			ret = strlist__has_entry(symbol_conf.sym_list,
+						al_addr_str);
+		}
+		if (!ret && symbol_conf.addr_list && al->map) {
+			unsigned long addr = al->map->unmap_ip(al->map, al->addr);
+
+			ret = intlist__has_entry(symbol_conf.addr_list, addr);
+			if (!ret && symbol_conf.addr_range) {
+				ret = check_address_range(symbol_conf.addr_list,
+							  symbol_conf.addr_range,
+							  addr);
+			}
+		}
+
+		if (!ret)
+			al->filtered |= (1 << HIST_FILTER__SYMBOL);
+	}
+
+	return 0;
+}
+
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__put(struct addr_location *al)
+{
+	thread__zput(al->thread);
+}
+
+bool is_bts_event(struct perf_event_attr *attr)
+{
+	return attr->type == PERF_TYPE_HARDWARE &&
+	       (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+	       attr->sample_period == 1;
+}
+
+bool sample_addr_correlates_sym(struct perf_event_attr *attr)
+{
+	if (attr->type == PERF_TYPE_SOFTWARE &&
+	    (attr->config == PERF_COUNT_SW_PAGE_FAULTS ||
+	     attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+	     attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+		return true;
+
+	if (is_bts_event(attr))
+		return true;
+
+	return false;
+}
+
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample)
+{
+	thread__find_map_fb(thread, sample->cpumode, sample->addr, al);
+
+	al->cpu = sample->cpu;
+	al->sym = NULL;
+
+	if (al->map)
+		al->sym = map__find_symbol(al->map, al->addr);
+}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
new file mode 100644
index 000000000..12eae6917
--- /dev/null
+++ b/tools/perf/util/event.h
@@ -0,0 +1,523 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RECORD_H
+#define __PERF_RECORD_H
+/*
+ * The linux/stddef.h isn't need here, but is needed for __always_inline used
+ * in files included from uapi/linux/perf_event.h such as
+ * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
+ * detected in at least musl libc, used in Alpine Linux. -acme
+ */
+#include <stdio.h>
+#include <linux/stddef.h>
+#include <perf/event.h>
+#include <linux/types.h>
+
+#include "perf_regs.h"
+
+struct dso;
+struct machine;
+struct perf_event_attr;
+
+#ifdef __LP64__
+/*
+ * /usr/include/inttypes.h uses just 'lu' for PRIu64, but we end up defining
+ * __u64 as long long unsigned int, and then -Werror=format= kicks in and
+ * complains of the mismatched types, so use these two special extra PRI
+ * macros to overcome that.
+ */
+#define PRI_lu64 "l" PRIu64
+#define PRI_lx64 "l" PRIx64
+#define PRI_ld64 "l" PRId64
+#else
+#define PRI_lu64 PRIu64
+#define PRI_lx64 PRIx64
+#define PRI_ld64 PRId64
+#endif
+
+#define PERF_SAMPLE_MASK				\
+	(PERF_SAMPLE_IP | PERF_SAMPLE_TID |		\
+	 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR |		\
+	PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID |	\
+	 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD |		\
+	 PERF_SAMPLE_IDENTIFIER)
+
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
+/* number of register is bound by the number of bits in regs_dump::mask (64) */
+#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
+
+struct regs_dump {
+	u64 abi;
+	u64 mask;
+	u64 *regs;
+
+	/* Cached values/mask filled by first register access. */
+	u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
+	u64 cache_mask;
+};
+
+struct stack_dump {
+	u16 offset;
+	u64 size;
+	char *data;
+};
+
+struct sample_read_value {
+	u64 value;
+	u64 id;   /* only if PERF_FORMAT_ID */
+	u64 lost; /* only if PERF_FORMAT_LOST */
+};
+
+struct sample_read {
+	u64 time_enabled;
+	u64 time_running;
+	union {
+		struct {
+			u64 nr;
+			struct sample_read_value *values;
+		} group;
+		struct sample_read_value one;
+	};
+};
+
+static inline size_t sample_read_value_size(u64 read_format)
+{
+	/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+	if (read_format & PERF_FORMAT_LOST)
+		return sizeof(struct sample_read_value);
+	else
+		return offsetof(struct sample_read_value, lost);
+}
+
+static inline struct sample_read_value *
+next_sample_read_value(struct sample_read_value *v, u64 read_format)
+{
+	return (void *)v + sample_read_value_size(read_format);
+}
+
+#define sample_read_group__for_each(v, nr, rf)		\
+	for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++)
+
+struct ip_callchain {
+	u64 nr;
+	u64 ips[];
+};
+
+struct branch_stack;
+
+enum {
+	PERF_IP_FLAG_BRANCH		= 1ULL << 0,
+	PERF_IP_FLAG_CALL		= 1ULL << 1,
+	PERF_IP_FLAG_RETURN		= 1ULL << 2,
+	PERF_IP_FLAG_CONDITIONAL	= 1ULL << 3,
+	PERF_IP_FLAG_SYSCALLRET		= 1ULL << 4,
+	PERF_IP_FLAG_ASYNC		= 1ULL << 5,
+	PERF_IP_FLAG_INTERRUPT		= 1ULL << 6,
+	PERF_IP_FLAG_TX_ABORT		= 1ULL << 7,
+	PERF_IP_FLAG_TRACE_BEGIN	= 1ULL << 8,
+	PERF_IP_FLAG_TRACE_END		= 1ULL << 9,
+	PERF_IP_FLAG_IN_TX		= 1ULL << 10,
+	PERF_IP_FLAG_VMENTRY		= 1ULL << 11,
+	PERF_IP_FLAG_VMEXIT		= 1ULL << 12,
+	PERF_IP_FLAG_INTR_DISABLE	= 1ULL << 13,
+	PERF_IP_FLAG_INTR_TOGGLE	= 1ULL << 14,
+};
+
+#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
+
+#define PERF_BRANCH_MASK		(\
+	PERF_IP_FLAG_BRANCH		|\
+	PERF_IP_FLAG_CALL		|\
+	PERF_IP_FLAG_RETURN		|\
+	PERF_IP_FLAG_CONDITIONAL	|\
+	PERF_IP_FLAG_SYSCALLRET		|\
+	PERF_IP_FLAG_ASYNC		|\
+	PERF_IP_FLAG_INTERRUPT		|\
+	PERF_IP_FLAG_TX_ABORT		|\
+	PERF_IP_FLAG_TRACE_BEGIN	|\
+	PERF_IP_FLAG_TRACE_END		|\
+	PERF_IP_FLAG_VMENTRY		|\
+	PERF_IP_FLAG_VMEXIT)
+
+#define MAX_INSN 16
+
+struct aux_sample {
+	u64 size;
+	void *data;
+};
+
+struct perf_sample {
+	u64 ip;
+	u32 pid, tid;
+	u64 time;
+	u64 addr;
+	u64 id;
+	u64 stream_id;
+	u64 period;
+	u64 weight;
+	u64 transaction;
+	u64 insn_cnt;
+	u64 cyc_cnt;
+	u32 cpu;
+	u32 raw_size;
+	u64 data_src;
+	u64 phys_addr;
+	u64 data_page_size;
+	u64 code_page_size;
+	u64 cgroup;
+	u32 flags;
+	u32 machine_pid;
+	u32 vcpu;
+	u16 insn_len;
+	u8  cpumode;
+	u16 misc;
+	u16 ins_lat;
+	u16 p_stage_cyc;
+	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
+	char insn[MAX_INSN];
+	void *raw_data;
+	struct ip_callchain *callchain;
+	struct branch_stack *branch_stack;
+	struct regs_dump  user_regs;
+	struct regs_dump  intr_regs;
+	struct stack_dump user_stack;
+	struct sample_read read;
+	struct aux_sample aux_sample;
+};
+
+#define PERF_MEM_DATA_SRC_NONE \
+	(PERF_MEM_S(OP, NA) |\
+	 PERF_MEM_S(LVL, NA) |\
+	 PERF_MEM_S(SNOOP, NA) |\
+	 PERF_MEM_S(LOCK, NA) |\
+	 PERF_MEM_S(TLB, NA))
+
+/* Attribute type for custom synthesized events */
+#define PERF_TYPE_SYNTH		(INT_MAX + 1U)
+
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+	PERF_SYNTH_INTEL_PTWRITE,
+	PERF_SYNTH_INTEL_MWAIT,
+	PERF_SYNTH_INTEL_PWRE,
+	PERF_SYNTH_INTEL_EXSTOP,
+	PERF_SYNTH_INTEL_PWRX,
+	PERF_SYNTH_INTEL_CBR,
+	PERF_SYNTH_INTEL_PSB,
+	PERF_SYNTH_INTEL_EVT,
+	PERF_SYNTH_INTEL_IFLAG_CHG,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data().  It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+	u64	payload;
+};
+
+struct perf_synth_intel_mwait {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	hints		:  8,
+				reserved1	: 24,
+				extensions	:  2,
+				reserved2	: 30;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_pwre {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	reserved1	:  7,
+				hw		:  1,
+				subcstate	:  4,
+				cstate		:  4,
+				reserved2	: 48;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_exstop {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+};
+
+struct perf_synth_intel_pwrx {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	deepest_cstate	:  4,
+				last_cstate	:  4,
+				wake_reason	:  4,
+				reserved1	: 52;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_cbr {
+	u32 padding;
+	union {
+		struct {
+			u32	cbr		:  8,
+				reserved1	:  8,
+				max_nonturbo	:  8,
+				reserved2	:  8;
+		};
+		u32	flags;
+	};
+	u32 freq;
+	u32 reserved3;
+};
+
+struct perf_synth_intel_psb {
+	u32 padding;
+	u32 reserved;
+	u64 offset;
+};
+
+struct perf_synth_intel_evd {
+	union {
+		struct {
+			u8	evd_type;
+			u8	reserved[7];
+		};
+		u64	et;
+	};
+	u64	payload;
+};
+
+/* Intel PT Event Trace */
+struct perf_synth_intel_evt {
+	u32 padding;
+	union {
+		struct {
+			u32	type		:  5,
+				reserved	:  2,
+				ip		:  1,
+				vector		:  8,
+				evd_cnt		: 16;
+		};
+		u32	cfe;
+	};
+	struct perf_synth_intel_evd evd[0];
+};
+
+struct perf_synth_intel_iflag_chg {
+	u32 padding;
+	union {
+		struct {
+			u32	iflag		:  1,
+				via_branch	:  1;
+		};
+		u32	flags;
+	};
+	u64	branch_ip; /* If via_branch */
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+	return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+	return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
+enum {
+	PERF_STAT_ROUND_TYPE__INTERVAL	= 0,
+	PERF_STAT_ROUND_TYPE__FINAL	= 1,
+};
+
+void perf_event__print_totals(void);
+
+struct perf_cpu_map;
+struct perf_record_stat_config;
+struct perf_stat_config;
+struct perf_tool;
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+				  struct perf_record_stat_config *event);
+
+int perf_event__process_comm(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_lost(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine);
+int perf_event__process_aux(struct perf_tool *tool,
+			    union perf_event *event,
+			    struct perf_sample *sample,
+			    struct machine *machine);
+int perf_event__process_itrace_start(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine);
+int perf_event__process_aux_output_hw_id(struct perf_tool *tool,
+					 union perf_event *event,
+					 struct perf_sample *sample,
+					 struct machine *machine);
+int perf_event__process_switch(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample,
+			       struct machine *machine);
+int perf_event__process_namespaces(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine);
+int perf_event__process_cgroup(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample,
+			       struct machine *machine);
+int perf_event__process_mmap(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_mmap2(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_fork(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_exit(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_ksymbol(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct machine *machine);
+int perf_event__process_bpf(struct perf_tool *tool,
+			    union perf_event *event,
+			    struct perf_sample *sample,
+			    struct machine *machine);
+int perf_event__process_text_poke(struct perf_tool *tool,
+				  union perf_event *event,
+				  struct perf_sample *sample,
+				  struct machine *machine);
+int perf_event__process(struct perf_tool *tool,
+			union perf_event *event,
+			struct perf_sample *sample,
+			struct machine *machine);
+
+struct addr_location;
+
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample);
+
+void addr_location__put(struct addr_location *al);
+
+struct thread;
+
+bool is_bts_event(struct perf_event_attr *attr);
+bool sample_addr_correlates_sym(struct perf_event_attr *attr);
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample);
+
+const char *perf_event__name(unsigned int id);
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr);
+
+void event_attr_init(struct perf_event_attr *attr);
+
+int perf_event_paranoid(void);
+bool perf_event_paranoid_check(int max_level);
+
+extern int sysctl_perf_event_max_stack;
+extern int sysctl_perf_event_max_contexts_per_stack;
+extern unsigned int proc_map_timeout;
+
+#define PAGE_SIZE_NAME_LEN	32
+char *get_page_size_name(u64 size, char *str);
+
+void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
+const char *arch_perf_header_entry(const char *se_header);
+int arch_support_sort_key(const char *sort_key);
+
+static inline bool perf_event_header__cpumode_is_guest(u8 cpumode)
+{
+	return cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	       cpumode == PERF_RECORD_MISC_GUEST_USER;
+}
+
+static inline bool perf_event_header__misc_is_guest(u16 misc)
+{
+	return perf_event_header__cpumode_is_guest(misc & PERF_RECORD_MISC_CPUMODE_MASK);
+}
+
+static inline bool perf_event_header__is_guest(const struct perf_event_header *header)
+{
+	return perf_event_header__misc_is_guest(header->misc);
+}
+
+static inline bool perf_event__is_guest(const union perf_event *event)
+{
+	return perf_event_header__is_guest(&event->header);
+}
+
+#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
new file mode 100644
index 000000000..8fecc9fba
--- /dev/null
+++ b/tools/perf/util/events_stats.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVENTS_STATS_
+#define __PERF_EVENTS_STATS_
+
+#include <stdio.h>
+#include <perf/event.h>
+#include <linux/types.h>
+#include "auxtrace.h"
+
+/*
+ * The kernel collects the number of events it couldn't send in a stretch and
+ * when possible sends this number in a PERF_RECORD_LOST event. The number of
+ * such "chunks" of lost events is stored in .nr_events[PERF_EVENT_LOST] while
+ * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
+ * the sum of all struct perf_record_lost.lost fields reported.
+ *
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct perf_record_lost_samples.lost fields reported.
+ *
+ * The total_period is needed because by default auto-freq is used, so
+ * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
+ * the total number of low level events, it is necessary to sum all struct
+ * perf_record_sample.period and stash the result in total_period.
+ */
+struct events_stats {
+	u64 total_lost;
+	u64 total_lost_samples;
+	u64 total_aux_lost;
+	u64 total_aux_partial;
+	u64 total_aux_collision;
+	u64 total_invalid_chains;
+	u32 nr_events[PERF_RECORD_HEADER_MAX];
+	u32 nr_lost_warned;
+	u32 nr_unknown_events;
+	u32 nr_invalid_chains;
+	u32 nr_unknown_id;
+	u32 nr_unprocessable_samples;
+	u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+	u32 nr_proc_map_timeout;
+};
+
+struct hists_stats {
+	u64 total_period;
+	u64 total_non_filtered_period;
+	u32 nr_samples;
+	u32 nr_non_filtered_samples;
+	u32 nr_lost_samples;
+};
+
+void events_stats__inc(struct events_stats *stats, u32 type);
+
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp,
+			     bool skip_empty);
+
+#endif /* __PERF_EVENTS_STATS_ */
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
new file mode 100644
index 000000000..57f02beef
--- /dev/null
+++ b/tools/perf/util/evlist-hybrid.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <inttypes.h>
+#include "cpumap.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "../perf.h"
+#include "util/pmu-hybrid.h"
+#include "util/evlist-hybrid.h"
+#include "debug.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+
+int evlist__add_default_hybrid(struct evlist *evlist, bool precise)
+{
+	struct evsel *evsel;
+	struct perf_pmu *pmu;
+	__u64 config;
+	struct perf_cpu_map *cpus;
+
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		config = PERF_COUNT_HW_CPU_CYCLES |
+			 ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT);
+		evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE,
+					  config);
+		if (!evsel)
+			return -ENOMEM;
+
+		cpus = perf_cpu_map__get(pmu->cpus);
+		evsel->core.cpus = cpus;
+		evsel->core.own_cpus = perf_cpu_map__get(cpus);
+		evsel->pmu_name = strdup(pmu->name);
+		evlist__add(evlist, evsel);
+	}
+
+	return 0;
+}
+
+static bool group_hybrid_conflict(struct evsel *leader)
+{
+	struct evsel *pos, *prev = NULL;
+
+	for_each_group_evsel(pos, leader) {
+		if (!evsel__is_hybrid(pos))
+			continue;
+
+		if (prev && strcmp(prev->pmu_name, pos->pmu_name))
+			return true;
+
+		prev = pos;
+	}
+
+	return false;
+}
+
+void evlist__warn_hybrid_group(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_group_leader(evsel) &&
+		    evsel->core.nr_members > 1 &&
+		    group_hybrid_conflict(evsel)) {
+			pr_warning("WARNING: events in group from "
+				   "different hybrid PMUs!\n");
+			return;
+		}
+	}
+}
+
+bool evlist__has_hybrid(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->pmu_name &&
+		    perf_pmu__is_hybrid(evsel->pmu_name)) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
+{
+	struct perf_cpu_map *cpus;
+	struct evsel *evsel, *tmp;
+	struct perf_pmu *pmu;
+	int ret, unmatched_count = 0, events_nr = 0;
+
+	if (!perf_pmu__has_hybrid() || !cpu_list)
+		return 0;
+
+	cpus = perf_cpu_map__new(cpu_list);
+	if (!cpus)
+		return -1;
+
+	/*
+	 * The evsels are created with hybrid pmu's cpus. But now we
+	 * need to check and adjust the cpus of evsel by cpu_list because
+	 * cpu_list may cause conflicts with cpus of evsel. For example,
+	 * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need
+	 * to adjust the cpus of evsel to cpu6-7. And then propatate maps
+	 * in evlist__create_maps().
+	 */
+	evlist__for_each_entry_safe(evlist, tmp, evsel) {
+		struct perf_cpu_map *matched_cpus, *unmatched_cpus;
+		char buf1[128], buf2[128];
+
+		pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name);
+		if (!pmu)
+			continue;
+
+		ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus,
+					   &unmatched_cpus);
+		if (ret)
+			goto out;
+
+		events_nr++;
+
+		if (perf_cpu_map__nr(matched_cpus) > 0 &&
+		    (perf_cpu_map__nr(unmatched_cpus) > 0 ||
+		     perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
+		     perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
+			perf_cpu_map__put(evsel->core.cpus);
+			perf_cpu_map__put(evsel->core.own_cpus);
+			evsel->core.cpus = perf_cpu_map__get(matched_cpus);
+			evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
+
+			if (perf_cpu_map__nr(unmatched_cpus) > 0) {
+				cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
+				pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
+					   buf1, pmu->name, evsel->name);
+			}
+		}
+
+		if (perf_cpu_map__nr(matched_cpus) == 0) {
+			evlist__remove(evlist, evsel);
+			evsel__delete(evsel);
+
+			cpu_map__snprint(cpus, buf1, sizeof(buf1));
+			cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2));
+			pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n",
+				   buf1, pmu->name, pmu->name, buf2);
+			unmatched_count++;
+		}
+
+		perf_cpu_map__put(matched_cpus);
+		perf_cpu_map__put(unmatched_cpus);
+	}
+	if (events_nr)
+		ret = (unmatched_count == events_nr) ? -1 : 0;
+out:
+	perf_cpu_map__put(cpus);
+	return ret;
+}
diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h
new file mode 100644
index 000000000..aacdb1b0f
--- /dev/null
+++ b/tools/perf/util/evlist-hybrid.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVLIST_HYBRID_H
+#define __PERF_EVLIST_HYBRID_H
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "evlist.h"
+#include <unistd.h>
+
+int evlist__add_default_hybrid(struct evlist *evlist, bool precise);
+void evlist__warn_hybrid_group(struct evlist *evlist);
+bool evlist__has_hybrid(struct evlist *evlist);
+int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list);
+
+#endif /* __PERF_EVLIST_HYBRID_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000..ca08e6dc8
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,2450 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ */
+#include <api/fs/fs.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include "cpumap.h"
+#include "util/mmap.h"
+#include "thread_map.h"
+#include "target.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "record.h"
+#include "debug.h"
+#include "units.h"
+#include "bpf_counter.h"
+#include <internal/lib.h> // page_size
+#include "affinity.h"
+#include "../perf.h"
+#include "asm/bug.h"
+#include "bpf-event.h"
+#include "util/string2.h"
+#include "util/perf_api_probe.h"
+#include "util/evsel_fprintf.h"
+#include "util/evlist-hybrid.h"
+#include "util/pmu.h"
+#include <signal.h>
+#include <unistd.h>
+#include <sched.h>
+#include <stdlib.h>
+
+#include "parse-events.h"
+#include <subcmd/parse-options.h>
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/timerfd.h>
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
+#include <linux/log2.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/mmap.h>
+
+#include <internal/xyarray.h>
+
+#ifdef LACKS_SIGQUEUE_PROTOTYPE
+int sigqueue(pid_t pid, int sig, const union sigval value);
+#endif
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
+
+void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
+		  struct perf_thread_map *threads)
+{
+	perf_evlist__init(&evlist->core);
+	perf_evlist__set_maps(&evlist->core, cpus, threads);
+	evlist->workload.pid = -1;
+	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+	evlist->ctl_fd.fd = -1;
+	evlist->ctl_fd.ack = -1;
+	evlist->ctl_fd.pos = -1;
+}
+
+struct evlist *evlist__new(void)
+{
+	struct evlist *evlist = zalloc(sizeof(*evlist));
+
+	if (evlist != NULL)
+		evlist__init(evlist, NULL, NULL);
+
+	return evlist;
+}
+
+struct evlist *evlist__new_default(void)
+{
+	struct evlist *evlist = evlist__new();
+
+	if (evlist && evlist__add_default(evlist)) {
+		evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
+struct evlist *evlist__new_dummy(void)
+{
+	struct evlist *evlist = evlist__new();
+
+	if (evlist && evlist__add_dummy(evlist)) {
+		evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
+/**
+ * evlist__set_id_pos - set the positions of event ids.
+ * @evlist: selected event list
+ *
+ * Events with compatible sample types all have the same id_pos
+ * and is_pos.  For convenience, put a copy on evlist.
+ */
+void evlist__set_id_pos(struct evlist *evlist)
+{
+	struct evsel *first = evlist__first(evlist);
+
+	evlist->id_pos = first->id_pos;
+	evlist->is_pos = first->is_pos;
+}
+
+static void evlist__update_id_pos(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__calc_id_pos(evsel);
+
+	evlist__set_id_pos(evlist);
+}
+
+static void evlist__purge(struct evlist *evlist)
+{
+	struct evsel *pos, *n;
+
+	evlist__for_each_entry_safe(evlist, n, pos) {
+		list_del_init(&pos->core.node);
+		pos->evlist = NULL;
+		evsel__delete(pos);
+	}
+
+	evlist->core.nr_entries = 0;
+}
+
+void evlist__exit(struct evlist *evlist)
+{
+	event_enable_timer__exit(&evlist->eet);
+	zfree(&evlist->mmap);
+	zfree(&evlist->overwrite_mmap);
+	perf_evlist__exit(&evlist->core);
+}
+
+void evlist__delete(struct evlist *evlist)
+{
+	if (evlist == NULL)
+		return;
+
+	evlist__munmap(evlist);
+	evlist__close(evlist);
+	evlist__purge(evlist);
+	evlist__exit(evlist);
+	free(evlist);
+}
+
+void evlist__add(struct evlist *evlist, struct evsel *entry)
+{
+	perf_evlist__add(&evlist->core, &entry->core);
+	entry->evlist = evlist;
+	entry->tracking = !entry->core.idx;
+
+	if (evlist->core.nr_entries == 1)
+		evlist__set_id_pos(evlist);
+}
+
+void evlist__remove(struct evlist *evlist, struct evsel *evsel)
+{
+	evsel->evlist = NULL;
+	perf_evlist__remove(&evlist->core, &evsel->core);
+}
+
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list)
+{
+	while (!list_empty(list)) {
+		struct evsel *evsel, *temp, *leader = NULL;
+
+		__evlist__for_each_entry_safe(list, temp, evsel) {
+			list_del_init(&evsel->core.node);
+			evlist__add(evlist, evsel);
+			leader = evsel;
+			break;
+		}
+
+		__evlist__for_each_entry_safe(list, temp, evsel) {
+			if (evsel__has_leader(evsel, leader)) {
+				list_del_init(&evsel->core.node);
+				evlist__add(evlist, evsel);
+			}
+		}
+	}
+}
+
+int __evlist__set_tracepoints_handlers(struct evlist *evlist,
+				       const struct evsel_str_handler *assocs, size_t nr_assocs)
+{
+	size_t i;
+	int err;
+
+	for (i = 0; i < nr_assocs; i++) {
+		// Adding a handler for an event not in this evlist, just ignore it.
+		struct evsel *evsel = evlist__find_tracepoint_by_name(evlist, assocs[i].name);
+		if (evsel == NULL)
+			continue;
+
+		err = -EEXIST;
+		if (evsel->handler != NULL)
+			goto out;
+		evsel->handler = assocs[i].handler;
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+void evlist__set_leader(struct evlist *evlist)
+{
+	perf_evlist__set_leader(&evlist->core);
+}
+
+int __evlist__add_default(struct evlist *evlist, bool precise)
+{
+	struct evsel *evsel;
+
+	evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE,
+				  PERF_COUNT_HW_CPU_CYCLES);
+	if (evsel == NULL)
+		return -ENOMEM;
+
+	evlist__add(evlist, evsel);
+	return 0;
+}
+
+static struct evsel *evlist__dummy_event(struct evlist *evlist)
+{
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_DUMMY,
+		.size	= sizeof(attr), /* to capture ABI version */
+		/* Avoid frequency mode for dummy events to avoid associated timers. */
+		.freq = 0,
+		.sample_period = 1,
+	};
+
+	return evsel__new_idx(&attr, evlist->core.nr_entries);
+}
+
+int evlist__add_dummy(struct evlist *evlist)
+{
+	struct evsel *evsel = evlist__dummy_event(evlist);
+
+	if (evsel == NULL)
+		return -ENOMEM;
+
+	evlist__add(evlist, evsel);
+	return 0;
+}
+
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
+{
+	struct evsel *evsel = evlist__dummy_event(evlist);
+
+	if (!evsel)
+		return NULL;
+
+	evsel->core.attr.exclude_kernel = 1;
+	evsel->core.attr.exclude_guest = 1;
+	evsel->core.attr.exclude_hv = 1;
+	evsel->core.system_wide = system_wide;
+	evsel->no_aux_samples = true;
+	evsel->name = strdup("dummy:u");
+
+	evlist__add(evlist, evsel);
+	return evsel;
+}
+
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
+{
+	struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0);
+
+	if (IS_ERR(evsel))
+		return evsel;
+
+	evsel__set_sample_bit(evsel, CPU);
+	evsel__set_sample_bit(evsel, TIME);
+
+	evsel->core.system_wide = system_wide;
+	evsel->no_aux_samples = true;
+
+	evlist__add(evlist, evsel);
+	return evsel;
+};
+
+int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
+{
+	struct evsel *evsel, *n;
+	LIST_HEAD(head);
+	size_t i;
+
+	for (i = 0; i < nr_attrs; i++) {
+		evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
+		if (evsel == NULL)
+			goto out_delete_partial_list;
+		list_add_tail(&evsel->core.node, &head);
+	}
+
+	evlist__splice_list_tail(evlist, &head);
+
+	return 0;
+
+out_delete_partial_list:
+	__evlist__for_each_entry_safe(&head, n, evsel)
+		evsel__delete(evsel);
+	return -1;
+}
+
+int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
+{
+	size_t i;
+
+	for (i = 0; i < nr_attrs; i++)
+		event_attr_init(attrs + i);
+
+	return evlist__add_attrs(evlist, attrs, nr_attrs);
+}
+
+__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
+					  struct perf_event_attr *attrs,
+					  size_t nr_attrs)
+{
+	if (!nr_attrs)
+		return 0;
+
+	return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
+}
+
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
+		    (int)evsel->core.attr.config == id)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
+		    (strcmp(evsel->name, name) == 0))
+			return evsel;
+	}
+
+	return NULL;
+}
+
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
+{
+	struct evsel *evsel = evsel__newtp(sys, name);
+
+	if (IS_ERR(evsel))
+		return -1;
+
+	evsel->handler = handler;
+	evlist__add(evlist, evsel);
+	return 0;
+}
+
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
+{
+	struct evlist_cpu_iterator itr = {
+		.container = evlist,
+		.evsel = NULL,
+		.cpu_map_idx = 0,
+		.evlist_cpu_map_idx = 0,
+		.evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
+		.cpu = (struct perf_cpu){ .cpu = -1},
+		.affinity = affinity,
+	};
+
+	if (evlist__empty(evlist)) {
+		/* Ensure the empty list doesn't iterate. */
+		itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr;
+	} else {
+		itr.evsel = evlist__first(evlist);
+		if (itr.affinity) {
+			itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+			affinity__set(itr.affinity, itr.cpu.cpu);
+			itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+			/*
+			 * If this CPU isn't in the evsel's cpu map then advance
+			 * through the list.
+			 */
+			if (itr.cpu_map_idx == -1)
+				evlist_cpu_iterator__next(&itr);
+		}
+	}
+	return itr;
+}
+
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr)
+{
+	while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) {
+		evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel);
+		evlist_cpu_itr->cpu_map_idx =
+			perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+					  evlist_cpu_itr->cpu);
+		if (evlist_cpu_itr->cpu_map_idx != -1)
+			return;
+	}
+	evlist_cpu_itr->evlist_cpu_map_idx++;
+	if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) {
+		evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container);
+		evlist_cpu_itr->cpu =
+			perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus,
+					  evlist_cpu_itr->evlist_cpu_map_idx);
+		if (evlist_cpu_itr->affinity)
+			affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu);
+		evlist_cpu_itr->cpu_map_idx =
+			perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+					  evlist_cpu_itr->cpu);
+		/*
+		 * If this CPU isn't in the evsel's cpu map then advance through
+		 * the list.
+		 */
+		if (evlist_cpu_itr->cpu_map_idx == -1)
+			evlist_cpu_iterator__next(evlist_cpu_itr);
+	}
+}
+
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr)
+{
+	return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr;
+}
+
+static int evsel__strcmp(struct evsel *pos, char *evsel_name)
+{
+	if (!evsel_name)
+		return 0;
+	if (evsel__is_dummy_event(pos))
+		return 1;
+	return strcmp(pos->name, evsel_name);
+}
+
+static int evlist__is_enabled(struct evlist *evlist)
+{
+	struct evsel *pos;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (!evsel__is_group_leader(pos) || !pos->core.fd)
+			continue;
+		/* If at least one event is enabled, evlist is enabled. */
+		if (!pos->disabled)
+			return true;
+	}
+	return false;
+}
+
+static void __evlist__disable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
+{
+	struct evsel *pos;
+	struct evlist_cpu_iterator evlist_cpu_itr;
+	struct affinity saved_affinity, *affinity = NULL;
+	bool has_imm = false;
+
+	// See explanation in evlist__close()
+	if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+		if (affinity__setup(&saved_affinity) < 0)
+			return;
+		affinity = &saved_affinity;
+	}
+
+	/* Disable 'immediate' events last */
+	for (int imm = 0; imm <= 1; imm++) {
+		evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+			pos = evlist_cpu_itr.evsel;
+			if (evsel__strcmp(pos, evsel_name))
+				continue;
+			if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+				continue;
+			if (excl_dummy && evsel__is_dummy_event(pos))
+				continue;
+			if (pos->immediate)
+				has_imm = true;
+			if (pos->immediate != imm)
+				continue;
+			evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
+		}
+		if (!has_imm)
+			break;
+	}
+
+	affinity__cleanup(affinity);
+	evlist__for_each_entry(evlist, pos) {
+		if (evsel__strcmp(pos, evsel_name))
+			continue;
+		if (!evsel__is_group_leader(pos) || !pos->core.fd)
+			continue;
+		if (excl_dummy && evsel__is_dummy_event(pos))
+			continue;
+		pos->disabled = true;
+	}
+
+	/*
+	 * If we disabled only single event, we need to check
+	 * the enabled state of the evlist manually.
+	 */
+	if (evsel_name)
+		evlist->enabled = evlist__is_enabled(evlist);
+	else
+		evlist->enabled = false;
+}
+
+void evlist__disable(struct evlist *evlist)
+{
+	__evlist__disable(evlist, NULL, false);
+}
+
+void evlist__disable_non_dummy(struct evlist *evlist)
+{
+	__evlist__disable(evlist, NULL, true);
+}
+
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
+{
+	__evlist__disable(evlist, evsel_name, false);
+}
+
+static void __evlist__enable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
+{
+	struct evsel *pos;
+	struct evlist_cpu_iterator evlist_cpu_itr;
+	struct affinity saved_affinity, *affinity = NULL;
+
+	// See explanation in evlist__close()
+	if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+		if (affinity__setup(&saved_affinity) < 0)
+			return;
+		affinity = &saved_affinity;
+	}
+
+	evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+		pos = evlist_cpu_itr.evsel;
+		if (evsel__strcmp(pos, evsel_name))
+			continue;
+		if (!evsel__is_group_leader(pos) || !pos->core.fd)
+			continue;
+		if (excl_dummy && evsel__is_dummy_event(pos))
+			continue;
+		evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
+	}
+	affinity__cleanup(affinity);
+	evlist__for_each_entry(evlist, pos) {
+		if (evsel__strcmp(pos, evsel_name))
+			continue;
+		if (!evsel__is_group_leader(pos) || !pos->core.fd)
+			continue;
+		if (excl_dummy && evsel__is_dummy_event(pos))
+			continue;
+		pos->disabled = false;
+	}
+
+	/*
+	 * Even single event sets the 'enabled' for evlist,
+	 * so the toggle can work properly and toggle to
+	 * 'disabled' state.
+	 */
+	evlist->enabled = true;
+}
+
+void evlist__enable(struct evlist *evlist)
+{
+	__evlist__enable(evlist, NULL, false);
+}
+
+void evlist__enable_non_dummy(struct evlist *evlist)
+{
+	__evlist__enable(evlist, NULL, true);
+}
+
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name)
+{
+	__evlist__enable(evlist, evsel_name, false);
+}
+
+void evlist__toggle_enable(struct evlist *evlist)
+{
+	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
+}
+
+int evlist__add_pollfd(struct evlist *evlist, int fd)
+{
+	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
+}
+
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
+{
+	return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask);
+}
+
+#ifdef HAVE_EVENTFD_SUPPORT
+int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd)
+{
+	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+				       fdarray_flag__nonfilterable |
+				       fdarray_flag__non_perf_event);
+}
+#endif
+
+int evlist__poll(struct evlist *evlist, int timeout)
+{
+	return perf_evlist__poll(&evlist->core, timeout);
+}
+
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id)
+{
+	struct hlist_head *head;
+	struct perf_sample_id *sid;
+	int hash;
+
+	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+	head = &evlist->core.heads[hash];
+
+	hlist_for_each_entry(sid, head, node)
+		if (sid->id == id)
+			return sid;
+
+	return NULL;
+}
+
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id)
+{
+	struct perf_sample_id *sid;
+
+	if (evlist->core.nr_entries == 1 || !id)
+		return evlist__first(evlist);
+
+	sid = evlist__id2sid(evlist, id);
+	if (sid)
+		return container_of(sid->evsel, struct evsel, core);
+
+	if (!evlist__sample_id_all(evlist))
+		return evlist__first(evlist);
+
+	return NULL;
+}
+
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id)
+{
+	struct perf_sample_id *sid;
+
+	if (!id)
+		return NULL;
+
+	sid = evlist__id2sid(evlist, id);
+	if (sid)
+		return container_of(sid->evsel, struct evsel, core);
+
+	return NULL;
+}
+
+static int evlist__event2id(struct evlist *evlist, union perf_event *event, u64 *id)
+{
+	const __u64 *array = event->sample.array;
+	ssize_t n;
+
+	n = (event->header.size - sizeof(event->header)) >> 3;
+
+	if (event->header.type == PERF_RECORD_SAMPLE) {
+		if (evlist->id_pos >= n)
+			return -1;
+		*id = array[evlist->id_pos];
+	} else {
+		if (evlist->is_pos > n)
+			return -1;
+		n -= evlist->is_pos;
+		*id = array[n];
+	}
+	return 0;
+}
+
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event)
+{
+	struct evsel *first = evlist__first(evlist);
+	struct hlist_head *head;
+	struct perf_sample_id *sid;
+	int hash;
+	u64 id;
+
+	if (evlist->core.nr_entries == 1)
+		return first;
+
+	if (!first->core.attr.sample_id_all &&
+	    event->header.type != PERF_RECORD_SAMPLE)
+		return first;
+
+	if (evlist__event2id(evlist, event, &id))
+		return NULL;
+
+	/* Synthesized events have an id of zero */
+	if (!id)
+		return first;
+
+	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+	head = &evlist->core.heads[hash];
+
+	hlist_for_each_entry(sid, head, node) {
+		if (sid->id == id)
+			return container_of(sid->evsel, struct evsel, core);
+	}
+	return NULL;
+}
+
+static int evlist__set_paused(struct evlist *evlist, bool value)
+{
+	int i;
+
+	if (!evlist->overwrite_mmap)
+		return 0;
+
+	for (i = 0; i < evlist->core.nr_mmaps; i++) {
+		int fd = evlist->overwrite_mmap[i].core.fd;
+		int err;
+
+		if (fd < 0)
+			continue;
+		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int evlist__pause(struct evlist *evlist)
+{
+	return evlist__set_paused(evlist, true);
+}
+
+static int evlist__resume(struct evlist *evlist)
+{
+	return evlist__set_paused(evlist, false);
+}
+
+static void evlist__munmap_nofree(struct evlist *evlist)
+{
+	int i;
+
+	if (evlist->mmap)
+		for (i = 0; i < evlist->core.nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->mmap[i].core);
+
+	if (evlist->overwrite_mmap)
+		for (i = 0; i < evlist->core.nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->overwrite_mmap[i].core);
+}
+
+void evlist__munmap(struct evlist *evlist)
+{
+	evlist__munmap_nofree(evlist);
+	zfree(&evlist->mmap);
+	zfree(&evlist->overwrite_mmap);
+}
+
+static void perf_mmap__unmap_cb(struct perf_mmap *map)
+{
+	struct mmap *m = container_of(map, struct mmap, core);
+
+	mmap__munmap(m);
+}
+
+static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
+				       bool overwrite)
+{
+	int i;
+	struct mmap *map;
+
+	map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
+	if (!map)
+		return NULL;
+
+	for (i = 0; i < evlist->core.nr_mmaps; i++) {
+		struct perf_mmap *prev = i ? &map[i - 1].core : NULL;
+
+		/*
+		 * When the perf_mmap() call is made we grab one refcount, plus
+		 * one extra to let perf_mmap__consume() get the last
+		 * events after all real references (perf_mmap__get()) are
+		 * dropped.
+		 *
+		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
+		 * thus does perf_mmap__get() on it.
+		 */
+		perf_mmap__init(&map[i].core, prev, overwrite, perf_mmap__unmap_cb);
+	}
+
+	return map;
+}
+
+static void
+perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
+			 struct perf_evsel *_evsel,
+			 struct perf_mmap_param *_mp,
+			 int idx)
+{
+	struct evlist *evlist = container_of(_evlist, struct evlist, core);
+	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
+	struct evsel *evsel = container_of(_evsel, struct evsel, core);
+
+	auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx);
+}
+
+static struct perf_mmap*
+perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
+{
+	struct evlist *evlist = container_of(_evlist, struct evlist, core);
+	struct mmap *maps;
+
+	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
+
+	if (!maps) {
+		maps = evlist__alloc_mmap(evlist, overwrite);
+		if (!maps)
+			return NULL;
+
+		if (overwrite) {
+			evlist->overwrite_mmap = maps;
+			if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
+				evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+		} else {
+			evlist->mmap = maps;
+		}
+	}
+
+	return &maps[idx].core;
+}
+
+static int
+perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
+			  int output, struct perf_cpu cpu)
+{
+	struct mmap *map = container_of(_map, struct mmap, core);
+	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
+
+	return mmap__mmap(map, mp, output, cpu);
+}
+
+unsigned long perf_event_mlock_kb_in_pages(void)
+{
+	unsigned long pages;
+	int max;
+
+	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+		/*
+		 * Pick a once upon a time good value, i.e. things look
+		 * strange since we can't read a sysctl value, but lets not
+		 * die yet...
+		 */
+		max = 512;
+	} else {
+		max -= (page_size / 1024);
+	}
+
+	pages = (max * 1024) / page_size;
+	if (!is_power_of_2(pages))
+		pages = rounddown_pow_of_two(pages);
+
+	return pages;
+}
+
+size_t evlist__mmap_size(unsigned long pages)
+{
+	if (pages == UINT_MAX)
+		pages = perf_event_mlock_kb_in_pages();
+	else if (!is_power_of_2(pages))
+		return 0;
+
+	return (pages + 1) * page_size;
+}
+
+static long parse_pages_arg(const char *str, unsigned long min,
+			    unsigned long max)
+{
+	unsigned long pages, val;
+	static struct parse_tag tags[] = {
+		{ .tag  = 'B', .mult = 1       },
+		{ .tag  = 'K', .mult = 1 << 10 },
+		{ .tag  = 'M', .mult = 1 << 20 },
+		{ .tag  = 'G', .mult = 1 << 30 },
+		{ .tag  = 0 },
+	};
+
+	if (str == NULL)
+		return -EINVAL;
+
+	val = parse_tag_value(str, tags);
+	if (val != (unsigned long) -1) {
+		/* we got file size value */
+		pages = PERF_ALIGN(val, page_size) / page_size;
+	} else {
+		/* we got pages count value */
+		char *eptr;
+		pages = strtoul(str, &eptr, 10);
+		if (*eptr != '\0')
+			return -EINVAL;
+	}
+
+	if (pages == 0 && min == 0) {
+		/* leave number of pages at 0 */
+	} else if (!is_power_of_2(pages)) {
+		char buf[100];
+
+		/* round pages up to next power of 2 */
+		pages = roundup_pow_of_two(pages);
+		if (!pages)
+			return -EINVAL;
+
+		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
+		pr_info("rounding mmap pages size to %s (%lu pages)\n",
+			buf, pages);
+	}
+
+	if (pages > max)
+		return -EINVAL;
+
+	return pages;
+}
+
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
+{
+	unsigned long max = UINT_MAX;
+	long pages;
+
+	if (max > SIZE_MAX / page_size)
+		max = SIZE_MAX / page_size;
+
+	pages = parse_pages_arg(str, 1, max);
+	if (pages < 0) {
+		pr_err("Invalid argument for --mmap_pages/-m\n");
+		return -1;
+	}
+
+	*mmap_pages = pages;
+	return 0;
+}
+
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused)
+{
+	return __evlist__parse_mmap_pages(opt->value, str);
+}
+
+/**
+ * evlist__mmap_ex - Create mmaps to receive events.
+ * @evlist: list of events
+ * @pages: map length in pages
+ * @overwrite: overwrite older events?
+ * @auxtrace_pages - auxtrace map length in pages
+ * @auxtrace_overwrite - overwrite older auxtrace data?
+ *
+ * If @overwrite is %false the user needs to signal event consumption using
+ * perf_mmap__write_tail().  Using evlist__mmap_read() does this
+ * automatically.
+ *
+ * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
+ * consumption using auxtrace_mmap__write_tail().
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+			 unsigned int auxtrace_pages,
+			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
+			 int comp_level)
+{
+	/*
+	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+	 * Its value is decided by evsel's write_backward.
+	 * So &mp should not be passed through const pointer.
+	 */
+	struct mmap_params mp = {
+		.nr_cblocks	= nr_cblocks,
+		.affinity	= affinity,
+		.flush		= flush,
+		.comp_level	= comp_level
+	};
+	struct perf_evlist_mmap_ops ops = {
+		.idx  = perf_evlist__mmap_cb_idx,
+		.get  = perf_evlist__mmap_cb_get,
+		.mmap = perf_evlist__mmap_cb_mmap,
+	};
+
+	evlist->core.mmap_len = evlist__mmap_size(pages);
+	pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
+
+	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
+				   auxtrace_pages, auxtrace_overwrite);
+
+	return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core);
+}
+
+int evlist__mmap(struct evlist *evlist, unsigned int pages)
+{
+	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
+}
+
+int evlist__create_maps(struct evlist *evlist, struct target *target)
+{
+	bool all_threads = (target->per_thread && target->system_wide);
+	struct perf_cpu_map *cpus;
+	struct perf_thread_map *threads;
+
+	/*
+	 * If specify '-a' and '--per-thread' to perf record, perf record
+	 * will override '--per-thread'. target->per_thread = false and
+	 * target->system_wide = true.
+	 *
+	 * If specify '--per-thread' only to perf record,
+	 * target->per_thread = true and target->system_wide = false.
+	 *
+	 * So target->per_thread && target->system_wide is false.
+	 * For perf record, thread_map__new_str doesn't call
+	 * thread_map__new_all_cpus. That will keep perf record's
+	 * current behavior.
+	 *
+	 * For perf stat, it allows the case that target->per_thread and
+	 * target->system_wide are all true. It means to collect system-wide
+	 * per-thread data. thread_map__new_str will call
+	 * thread_map__new_all_cpus to enumerate all threads.
+	 */
+	threads = thread_map__new_str(target->pid, target->tid, target->uid,
+				      all_threads);
+
+	if (!threads)
+		return -1;
+
+	if (target__uses_dummy_map(target))
+		cpus = perf_cpu_map__dummy_new();
+	else
+		cpus = perf_cpu_map__new(target->cpu_list);
+
+	if (!cpus)
+		goto out_delete_threads;
+
+	evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid;
+
+	perf_evlist__set_maps(&evlist->core, cpus, threads);
+
+	/* as evlist now has references, put count here */
+	perf_cpu_map__put(cpus);
+	perf_thread_map__put(threads);
+
+	return 0;
+
+out_delete_threads:
+	perf_thread_map__put(threads);
+	return -1;
+}
+
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
+{
+	struct evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->filter == NULL)
+			continue;
+
+		/*
+		 * filters only work for tracepoint event, which doesn't have cpu limit.
+		 * So evlist and evsel should always be same.
+		 */
+		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
+		if (err) {
+			*err_evsel = evsel;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter)
+{
+	struct evsel *evsel;
+	int err = 0;
+
+	if (filter == NULL)
+		return -1;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
+		err = evsel__set_filter(evsel, filter);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter)
+{
+	struct evsel *evsel;
+	int err = 0;
+
+	if (filter == NULL)
+		return -1;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
+		err = evsel__append_tp_filter(evsel, filter);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
+{
+	char *filter;
+	size_t i;
+
+	for (i = 0; i < npids; ++i) {
+		if (i == 0) {
+			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
+				return NULL;
+		} else {
+			char *tmp;
+
+			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
+				goto out_free;
+
+			free(filter);
+			filter = tmp;
+		}
+	}
+
+	return filter;
+out_free:
+	free(filter);
+	return NULL;
+}
+
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+{
+	char *filter = asprintf__tp_filter_pids(npids, pids);
+	int ret = evlist__set_tp_filter(evlist, filter);
+
+	free(filter);
+	return ret;
+}
+
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
+{
+	return evlist__set_tp_filter_pids(evlist, 1, &pid);
+}
+
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+{
+	char *filter = asprintf__tp_filter_pids(npids, pids);
+	int ret = evlist__append_tp_filter(evlist, filter);
+
+	free(filter);
+	return ret;
+}
+
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
+{
+	return evlist__append_tp_filter_pids(evlist, 1, &pid);
+}
+
+bool evlist__valid_sample_type(struct evlist *evlist)
+{
+	struct evsel *pos;
+
+	if (evlist->core.nr_entries == 1)
+		return true;
+
+	if (evlist->id_pos < 0 || evlist->is_pos < 0)
+		return false;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (pos->id_pos != evlist->id_pos ||
+		    pos->is_pos != evlist->is_pos)
+			return false;
+	}
+
+	return true;
+}
+
+u64 __evlist__combined_sample_type(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	if (evlist->combined_sample_type)
+		return evlist->combined_sample_type;
+
+	evlist__for_each_entry(evlist, evsel)
+		evlist->combined_sample_type |= evsel->core.attr.sample_type;
+
+	return evlist->combined_sample_type;
+}
+
+u64 evlist__combined_sample_type(struct evlist *evlist)
+{
+	evlist->combined_sample_type = 0;
+	return __evlist__combined_sample_type(evlist);
+}
+
+u64 evlist__combined_branch_type(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	u64 branch_type = 0;
+
+	evlist__for_each_entry(evlist, evsel)
+		branch_type |= evsel->core.attr.branch_sample_type;
+	return branch_type;
+}
+
+bool evlist__valid_read_format(struct evlist *evlist)
+{
+	struct evsel *first = evlist__first(evlist), *pos = first;
+	u64 read_format = first->core.attr.read_format;
+	u64 sample_type = first->core.attr.sample_type;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (read_format != pos->core.attr.read_format) {
+			pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n",
+				 read_format, (u64)pos->core.attr.read_format);
+		}
+	}
+
+	/* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */
+	if ((sample_type & PERF_SAMPLE_READ) &&
+	    !(read_format & PERF_FORMAT_ID)) {
+		return false;
+	}
+
+	return true;
+}
+
+u16 evlist__id_hdr_size(struct evlist *evlist)
+{
+	struct evsel *first = evlist__first(evlist);
+
+	return first->core.attr.sample_id_all ? evsel__id_hdr_size(first) : 0;
+}
+
+bool evlist__valid_sample_id_all(struct evlist *evlist)
+{
+	struct evsel *first = evlist__first(evlist), *pos = first;
+
+	evlist__for_each_entry_continue(evlist, pos) {
+		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
+			return false;
+	}
+
+	return true;
+}
+
+bool evlist__sample_id_all(struct evlist *evlist)
+{
+	struct evsel *first = evlist__first(evlist);
+	return first->core.attr.sample_id_all;
+}
+
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
+{
+	evlist->selected = evsel;
+}
+
+void evlist__close(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	struct evlist_cpu_iterator evlist_cpu_itr;
+	struct affinity affinity;
+
+	/*
+	 * With perf record core.user_requested_cpus is usually NULL.
+	 * Use the old method to handle this for now.
+	 */
+	if (!evlist->core.user_requested_cpus ||
+	    cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+		evlist__for_each_entry_reverse(evlist, evsel)
+			evsel__close(evsel);
+		return;
+	}
+
+	if (affinity__setup(&affinity) < 0)
+		return;
+
+	evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
+		perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core,
+				      evlist_cpu_itr.cpu_map_idx);
+	}
+
+	affinity__cleanup(&affinity);
+	evlist__for_each_entry_reverse(evlist, evsel) {
+		perf_evsel__free_fd(&evsel->core);
+		perf_evsel__free_id(&evsel->core);
+	}
+	perf_evlist__reset_id_hash(&evlist->core);
+}
+
+static int evlist__create_syswide_maps(struct evlist *evlist)
+{
+	struct perf_cpu_map *cpus;
+	struct perf_thread_map *threads;
+
+	/*
+	 * Try reading /sys/devices/system/cpu/online to get
+	 * an all cpus map.
+	 *
+	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
+	 * code needs an overhaul to properly forward the
+	 * error, and we may not want to do that fallback to a
+	 * default cpu identity map :-\
+	 */
+	cpus = perf_cpu_map__new(NULL);
+	if (!cpus)
+		goto out;
+
+	threads = perf_thread_map__new_dummy();
+	if (!threads)
+		goto out_put;
+
+	perf_evlist__set_maps(&evlist->core, cpus, threads);
+
+	perf_thread_map__put(threads);
+out_put:
+	perf_cpu_map__put(cpus);
+out:
+	return -ENOMEM;
+}
+
+int evlist__open(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	int err;
+
+	/*
+	 * Default: one fd per CPU, all threads, aka systemwide
+	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
+	 */
+	if (evlist->core.threads == NULL && evlist->core.user_requested_cpus == NULL) {
+		err = evlist__create_syswide_maps(evlist);
+		if (err < 0)
+			goto out_err;
+	}
+
+	evlist__update_id_pos(evlist);
+
+	evlist__for_each_entry(evlist, evsel) {
+		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
+		if (err < 0)
+			goto out_err;
+	}
+
+	return 0;
+out_err:
+	evlist__close(evlist);
+	errno = -err;
+	return err;
+}
+
+int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[],
+			     bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
+{
+	int child_ready_pipe[2], go_pipe[2];
+	char bf;
+
+	if (pipe(child_ready_pipe) < 0) {
+		perror("failed to create 'ready' pipe");
+		return -1;
+	}
+
+	if (pipe(go_pipe) < 0) {
+		perror("failed to create 'go' pipe");
+		goto out_close_ready_pipe;
+	}
+
+	evlist->workload.pid = fork();
+	if (evlist->workload.pid < 0) {
+		perror("failed to fork");
+		goto out_close_pipes;
+	}
+
+	if (!evlist->workload.pid) {
+		int ret;
+
+		if (pipe_output)
+			dup2(2, 1);
+
+		signal(SIGTERM, SIG_DFL);
+
+		close(child_ready_pipe[0]);
+		close(go_pipe[1]);
+		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+		/*
+		 * Change the name of this process not to confuse --exclude-perf users
+		 * that sees 'perf' in the window up to the execvp() and thinks that
+		 * perf samples are not being excluded.
+		 */
+		prctl(PR_SET_NAME, "perf-exec");
+
+		/*
+		 * Tell the parent we're ready to go
+		 */
+		close(child_ready_pipe[1]);
+
+		/*
+		 * Wait until the parent tells us to go.
+		 */
+		ret = read(go_pipe[0], &bf, 1);
+		/*
+		 * The parent will ask for the execvp() to be performed by
+		 * writing exactly one byte, in workload.cork_fd, usually via
+		 * evlist__start_workload().
+		 *
+		 * For cancelling the workload without actually running it,
+		 * the parent will just close workload.cork_fd, without writing
+		 * anything, i.e. read will return zero and we just exit()
+		 * here.
+		 */
+		if (ret != 1) {
+			if (ret == -1)
+				perror("unable to read pipe");
+			exit(ret);
+		}
+
+		execvp(argv[0], (char **)argv);
+
+		if (exec_error) {
+			union sigval val;
+
+			val.sival_int = errno;
+			if (sigqueue(getppid(), SIGUSR1, val))
+				perror(argv[0]);
+		} else
+			perror(argv[0]);
+		exit(-1);
+	}
+
+	if (exec_error) {
+		struct sigaction act = {
+			.sa_flags     = SA_SIGINFO,
+			.sa_sigaction = exec_error,
+		};
+		sigaction(SIGUSR1, &act, NULL);
+	}
+
+	if (target__none(target)) {
+		if (evlist->core.threads == NULL) {
+			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
+				__func__, __LINE__);
+			goto out_close_pipes;
+		}
+		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
+	}
+
+	close(child_ready_pipe[1]);
+	close(go_pipe[0]);
+	/*
+	 * wait for child to settle
+	 */
+	if (read(child_ready_pipe[0], &bf, 1) == -1) {
+		perror("unable to read pipe");
+		goto out_close_pipes;
+	}
+
+	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
+	evlist->workload.cork_fd = go_pipe[1];
+	close(child_ready_pipe[0]);
+	return 0;
+
+out_close_pipes:
+	close(go_pipe[0]);
+	close(go_pipe[1]);
+out_close_ready_pipe:
+	close(child_ready_pipe[0]);
+	close(child_ready_pipe[1]);
+	return -1;
+}
+
+int evlist__start_workload(struct evlist *evlist)
+{
+	if (evlist->workload.cork_fd > 0) {
+		char bf = 0;
+		int ret;
+		/*
+		 * Remove the cork, let it rip!
+		 */
+		ret = write(evlist->workload.cork_fd, &bf, 1);
+		if (ret < 0)
+			perror("unable to write to pipe");
+
+		close(evlist->workload.cork_fd);
+		return ret;
+	}
+
+	return 0;
+}
+
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
+{
+	struct evsel *evsel = evlist__event2evsel(evlist, event);
+	int ret;
+
+	if (!evsel)
+		return -EFAULT;
+	ret = evsel__parse_sample(evsel, event, sample);
+	if (ret)
+		return ret;
+	if (perf_guest && sample->id) {
+		struct perf_sample_id *sid = evlist__id2sid(evlist, sample->id);
+
+		if (sid) {
+			sample->machine_pid = sid->machine_pid;
+			sample->vcpu = sid->vcpu.cpu;
+		}
+	}
+	return 0;
+}
+
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp)
+{
+	struct evsel *evsel = evlist__event2evsel(evlist, event);
+
+	if (!evsel)
+		return -EFAULT;
+	return evsel__parse_sample_timestamp(evsel, event, timestamp);
+}
+
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size)
+{
+	int printed, value;
+	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+
+	switch (err) {
+	case EACCES:
+	case EPERM:
+		printed = scnprintf(buf, size,
+				    "Error:\t%s.\n"
+				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
+
+		value = perf_event_paranoid();
+
+		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
+
+		if (value >= 2) {
+			printed += scnprintf(buf + printed, size - printed,
+					     "For your workloads it needs to be <= 1\nHint:\t");
+		}
+		printed += scnprintf(buf + printed, size - printed,
+				     "For system wide tracing it needs to be set to -1.\n");
+
+		printed += scnprintf(buf + printed, size - printed,
+				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
+				    "Hint:\tThe current value is %d.", value);
+		break;
+	case EINVAL: {
+		struct evsel *first = evlist__first(evlist);
+		int max_freq;
+
+		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+			goto out_default;
+
+		if (first->core.attr.sample_freq < (u64)max_freq)
+			goto out_default;
+
+		printed = scnprintf(buf, size,
+				    "Error:\t%s.\n"
+				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+				    emsg, max_freq, first->core.attr.sample_freq);
+		break;
+	}
+	default:
+out_default:
+		scnprintf(buf, size, "%s", emsg);
+		break;
+	}
+
+	return 0;
+}
+
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
+{
+	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
+
+	switch (err) {
+	case EPERM:
+		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
+		printed += scnprintf(buf + printed, size - printed,
+				     "Error:\t%s.\n"
+				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
+				     "Hint:\tTried using %zd kB.\n",
+				     emsg, pages_max_per_user, pages_attempted);
+
+		if (pages_attempted >= pages_max_per_user) {
+			printed += scnprintf(buf + printed, size - printed,
+					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
+					     pages_max_per_user + pages_attempted);
+		}
+
+		printed += scnprintf(buf + printed, size - printed,
+				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
+		break;
+	default:
+		scnprintf(buf, size, "%s", emsg);
+		break;
+	}
+
+	return 0;
+}
+
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel)
+{
+	struct evsel *evsel, *n;
+	LIST_HEAD(move);
+
+	if (move_evsel == evlist__first(evlist))
+		return;
+
+	evlist__for_each_entry_safe(evlist, n, evsel) {
+		if (evsel__leader(evsel) == evsel__leader(move_evsel))
+			list_move_tail(&evsel->core.node, &move);
+	}
+
+	list_splice(&move, &evlist->core.entries);
+}
+
+struct evsel *evlist__get_tracking_event(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->tracking)
+			return evsel;
+	}
+
+	return evlist__first(evlist);
+}
+
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel)
+{
+	struct evsel *evsel;
+
+	if (tracking_evsel->tracking)
+		return;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel != tracking_evsel)
+			evsel->tracking = false;
+	}
+
+	tracking_evsel->tracking = true;
+}
+
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel->name)
+			continue;
+		if (strcmp(str, evsel->name) == 0)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state)
+{
+	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
+	enum action {
+		NONE,
+		PAUSE,
+		RESUME,
+	} action = NONE;
+
+	if (!evlist->overwrite_mmap)
+		return;
+
+	switch (old_state) {
+	case BKW_MMAP_NOTREADY: {
+		if (state != BKW_MMAP_RUNNING)
+			goto state_err;
+		break;
+	}
+	case BKW_MMAP_RUNNING: {
+		if (state != BKW_MMAP_DATA_PENDING)
+			goto state_err;
+		action = PAUSE;
+		break;
+	}
+	case BKW_MMAP_DATA_PENDING: {
+		if (state != BKW_MMAP_EMPTY)
+			goto state_err;
+		break;
+	}
+	case BKW_MMAP_EMPTY: {
+		if (state != BKW_MMAP_RUNNING)
+			goto state_err;
+		action = RESUME;
+		break;
+	}
+	default:
+		WARN_ONCE(1, "Shouldn't get there\n");
+	}
+
+	evlist->bkw_mmap_state = state;
+
+	switch (action) {
+	case PAUSE:
+		evlist__pause(evlist);
+		break;
+	case RESUME:
+		evlist__resume(evlist);
+		break;
+	case NONE:
+	default:
+		break;
+	}
+
+state_err:
+	return;
+}
+
+bool evlist__exclude_kernel(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel->core.attr.exclude_kernel)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+void evlist__force_leader(struct evlist *evlist)
+{
+	if (!evlist->core.nr_groups) {
+		struct evsel *leader = evlist__first(evlist);
+
+		evlist__set_leader(evlist);
+		leader->forced_leader = true;
+	}
+}
+
+struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *evsel, bool close)
+{
+	struct evsel *c2, *leader;
+	bool is_open = true;
+
+	leader = evsel__leader(evsel);
+
+	pr_debug("Weak group for %s/%d failed\n",
+			leader->name, leader->core.nr_members);
+
+	/*
+	 * for_each_group_member doesn't work here because it doesn't
+	 * include the first entry.
+	 */
+	evlist__for_each_entry(evsel_list, c2) {
+		if (c2 == evsel)
+			is_open = false;
+		if (evsel__has_leader(c2, leader)) {
+			if (is_open && close)
+				perf_evsel__close(&c2->core);
+			/*
+			 * We want to close all members of the group and reopen
+			 * them. Some events, like Intel topdown, require being
+			 * in a group and so keep these in the group.
+			 */
+			evsel__remove_from_group(c2, leader);
+
+			/*
+			 * Set this for all former members of the group
+			 * to indicate they get reopened.
+			 */
+			c2->reset_group = true;
+		}
+	}
+	/* Reset the leader count if all entries were removed. */
+	if (leader->core.nr_members == 1)
+		leader->core.nr_members = 0;
+	return leader;
+}
+
+static int evlist__parse_control_fifo(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
+{
+	char *s, *p;
+	int ret = 0, fd;
+
+	if (strncmp(str, "fifo:", 5))
+		return -EINVAL;
+
+	str += 5;
+	if (!*str || *str == ',')
+		return -EINVAL;
+
+	s = strdup(str);
+	if (!s)
+		return -ENOMEM;
+
+	p = strchr(s, ',');
+	if (p)
+		*p = '\0';
+
+	/*
+	 * O_RDWR avoids POLLHUPs which is necessary to allow the other
+	 * end of a FIFO to be repeatedly opened and closed.
+	 */
+	fd = open(s, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+	if (fd < 0) {
+		pr_err("Failed to open '%s'\n", s);
+		ret = -errno;
+		goto out_free;
+	}
+	*ctl_fd = fd;
+	*ctl_fd_close = true;
+
+	if (p && *++p) {
+		/* O_RDWR | O_NONBLOCK means the other end need not be open */
+		fd = open(p, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+		if (fd < 0) {
+			pr_err("Failed to open '%s'\n", p);
+			ret = -errno;
+			goto out_free;
+		}
+		*ctl_fd_ack = fd;
+	}
+
+out_free:
+	free(s);
+	return ret;
+}
+
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
+{
+	char *comma = NULL, *endptr = NULL;
+
+	*ctl_fd_close = false;
+
+	if (strncmp(str, "fd:", 3))
+		return evlist__parse_control_fifo(str, ctl_fd, ctl_fd_ack, ctl_fd_close);
+
+	*ctl_fd = strtoul(&str[3], &endptr, 0);
+	if (endptr == &str[3])
+		return -EINVAL;
+
+	comma = strchr(str, ',');
+	if (comma) {
+		if (endptr != comma)
+			return -EINVAL;
+
+		*ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+		if (endptr == comma + 1 || *endptr != '\0')
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close)
+{
+	if (*ctl_fd_close) {
+		*ctl_fd_close = false;
+		close(ctl_fd);
+		if (ctl_fd_ack >= 0)
+			close(ctl_fd_ack);
+	}
+}
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
+{
+	if (fd == -1) {
+		pr_debug("Control descriptor is not initialized\n");
+		return 0;
+	}
+
+	evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+						     fdarray_flag__nonfilterable |
+						     fdarray_flag__non_perf_event);
+	if (evlist->ctl_fd.pos < 0) {
+		evlist->ctl_fd.pos = -1;
+		pr_err("Failed to add ctl fd entry: %m\n");
+		return -1;
+	}
+
+	evlist->ctl_fd.fd = fd;
+	evlist->ctl_fd.ack = ack;
+
+	return 0;
+}
+
+bool evlist__ctlfd_initialized(struct evlist *evlist)
+{
+	return evlist->ctl_fd.pos >= 0;
+}
+
+int evlist__finalize_ctlfd(struct evlist *evlist)
+{
+	struct pollfd *entries = evlist->core.pollfd.entries;
+
+	if (!evlist__ctlfd_initialized(evlist))
+		return 0;
+
+	entries[evlist->ctl_fd.pos].fd = -1;
+	entries[evlist->ctl_fd.pos].events = 0;
+	entries[evlist->ctl_fd.pos].revents = 0;
+
+	evlist->ctl_fd.pos = -1;
+	evlist->ctl_fd.ack = -1;
+	evlist->ctl_fd.fd = -1;
+
+	return 0;
+}
+
+static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
+			      char *cmd_data, size_t data_size)
+{
+	int err;
+	char c;
+	size_t bytes_read = 0;
+
+	*cmd = EVLIST_CTL_CMD_UNSUPPORTED;
+	memset(cmd_data, 0, data_size);
+	data_size--;
+
+	do {
+		err = read(evlist->ctl_fd.fd, &c, 1);
+		if (err > 0) {
+			if (c == '\n' || c == '\0')
+				break;
+			cmd_data[bytes_read++] = c;
+			if (bytes_read == data_size)
+				break;
+			continue;
+		} else if (err == -1) {
+			if (errno == EINTR)
+				continue;
+			if (errno == EAGAIN || errno == EWOULDBLOCK)
+				err = 0;
+			else
+				pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
+		}
+		break;
+	} while (1);
+
+	pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
+		 bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
+
+	if (bytes_read > 0) {
+		if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
+			     (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_ENABLE;
+		} else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
+				    (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_DISABLE;
+		} else if (!strncmp(cmd_data, EVLIST_CTL_CMD_SNAPSHOT_TAG,
+				    (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_SNAPSHOT;
+			pr_debug("is snapshot\n");
+		} else if (!strncmp(cmd_data, EVLIST_CTL_CMD_EVLIST_TAG,
+				    (sizeof(EVLIST_CTL_CMD_EVLIST_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_EVLIST;
+		} else if (!strncmp(cmd_data, EVLIST_CTL_CMD_STOP_TAG,
+				    (sizeof(EVLIST_CTL_CMD_STOP_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_STOP;
+		} else if (!strncmp(cmd_data, EVLIST_CTL_CMD_PING_TAG,
+				    (sizeof(EVLIST_CTL_CMD_PING_TAG)-1))) {
+			*cmd = EVLIST_CTL_CMD_PING;
+		}
+	}
+
+	return bytes_read ? (int)bytes_read : err;
+}
+
+int evlist__ctlfd_ack(struct evlist *evlist)
+{
+	int err;
+
+	if (evlist->ctl_fd.ack == -1)
+		return 0;
+
+	err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG,
+		    sizeof(EVLIST_CTL_CMD_ACK_TAG));
+	if (err == -1)
+		pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack);
+
+	return err;
+}
+
+static int get_cmd_arg(char *cmd_data, size_t cmd_size, char **arg)
+{
+	char *data = cmd_data + cmd_size;
+
+	/* no argument */
+	if (!*data)
+		return 0;
+
+	/* there's argument */
+	if (*data == ' ') {
+		*arg = data + 1;
+		return 1;
+	}
+
+	/* malformed */
+	return -1;
+}
+
+static int evlist__ctlfd_enable(struct evlist *evlist, char *cmd_data, bool enable)
+{
+	struct evsel *evsel;
+	char *name;
+	int err;
+
+	err = get_cmd_arg(cmd_data,
+			  enable ? sizeof(EVLIST_CTL_CMD_ENABLE_TAG) - 1 :
+				   sizeof(EVLIST_CTL_CMD_DISABLE_TAG) - 1,
+			  &name);
+	if (err < 0) {
+		pr_info("failed: wrong command\n");
+		return -1;
+	}
+
+	if (err) {
+		evsel = evlist__find_evsel_by_str(evlist, name);
+		if (evsel) {
+			if (enable)
+				evlist__enable_evsel(evlist, name);
+			else
+				evlist__disable_evsel(evlist, name);
+			pr_info("Event %s %s\n", evsel->name,
+				enable ? "enabled" : "disabled");
+		} else {
+			pr_info("failed: can't find '%s' event\n", name);
+		}
+	} else {
+		if (enable) {
+			evlist__enable(evlist);
+			pr_info(EVLIST_ENABLED_MSG);
+		} else {
+			evlist__disable(evlist);
+			pr_info(EVLIST_DISABLED_MSG);
+		}
+	}
+
+	return 0;
+}
+
+static int evlist__ctlfd_list(struct evlist *evlist, char *cmd_data)
+{
+	struct perf_attr_details details = { .verbose = false, };
+	struct evsel *evsel;
+	char *arg;
+	int err;
+
+	err = get_cmd_arg(cmd_data,
+			  sizeof(EVLIST_CTL_CMD_EVLIST_TAG) - 1,
+			  &arg);
+	if (err < 0) {
+		pr_info("failed: wrong command\n");
+		return -1;
+	}
+
+	if (err) {
+		if (!strcmp(arg, "-v")) {
+			details.verbose = true;
+		} else if (!strcmp(arg, "-g")) {
+			details.event_group = true;
+		} else if (!strcmp(arg, "-F")) {
+			details.freq = true;
+		} else {
+			pr_info("failed: wrong command\n");
+			return -1;
+		}
+	}
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__fprintf(evsel, &details, stderr);
+
+	return 0;
+}
+
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
+{
+	int err = 0;
+	char cmd_data[EVLIST_CTL_CMD_MAX_LEN];
+	int ctlfd_pos = evlist->ctl_fd.pos;
+	struct pollfd *entries = evlist->core.pollfd.entries;
+
+	if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents)
+		return 0;
+
+	if (entries[ctlfd_pos].revents & POLLIN) {
+		err = evlist__ctlfd_recv(evlist, cmd, cmd_data,
+					 EVLIST_CTL_CMD_MAX_LEN);
+		if (err > 0) {
+			switch (*cmd) {
+			case EVLIST_CTL_CMD_ENABLE:
+			case EVLIST_CTL_CMD_DISABLE:
+				err = evlist__ctlfd_enable(evlist, cmd_data,
+							   *cmd == EVLIST_CTL_CMD_ENABLE);
+				break;
+			case EVLIST_CTL_CMD_EVLIST:
+				err = evlist__ctlfd_list(evlist, cmd_data);
+				break;
+			case EVLIST_CTL_CMD_SNAPSHOT:
+			case EVLIST_CTL_CMD_STOP:
+			case EVLIST_CTL_CMD_PING:
+				break;
+			case EVLIST_CTL_CMD_ACK:
+			case EVLIST_CTL_CMD_UNSUPPORTED:
+			default:
+				pr_debug("ctlfd: unsupported %d\n", *cmd);
+				break;
+			}
+			if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED ||
+			      *cmd == EVLIST_CTL_CMD_SNAPSHOT))
+				evlist__ctlfd_ack(evlist);
+		}
+	}
+
+	if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR))
+		evlist__finalize_ctlfd(evlist);
+	else
+		entries[ctlfd_pos].revents = 0;
+
+	return err;
+}
+
+/**
+ * struct event_enable_time - perf record -D/--delay single time range.
+ * @start: start of time range to enable events in milliseconds
+ * @end: end of time range to enable events in milliseconds
+ *
+ * N.B. this structure is also accessed as an array of int.
+ */
+struct event_enable_time {
+	int	start;
+	int	end;
+};
+
+static int parse_event_enable_time(const char *str, struct event_enable_time *range, bool first)
+{
+	const char *fmt = first ? "%u - %u %n" : " , %u - %u %n";
+	int ret, start, end, n;
+
+	ret = sscanf(str, fmt, &start, &end, &n);
+	if (ret != 2 || end <= start)
+		return -EINVAL;
+	if (range) {
+		range->start = start;
+		range->end = end;
+	}
+	return n;
+}
+
+static ssize_t parse_event_enable_times(const char *str, struct event_enable_time *range)
+{
+	int incr = !!range;
+	bool first = true;
+	ssize_t ret, cnt;
+
+	for (cnt = 0; *str; cnt++) {
+		ret = parse_event_enable_time(str, range, first);
+		if (ret < 0)
+			return ret;
+		/* Check no overlap */
+		if (!first && range && range->start <= range[-1].end)
+			return -EINVAL;
+		str += ret;
+		range += incr;
+		first = false;
+	}
+	return cnt;
+}
+
+/**
+ * struct event_enable_timer - control structure for perf record -D/--delay.
+ * @evlist: event list
+ * @times: time ranges that events are enabled (N.B. this is also accessed as an
+ *         array of int)
+ * @times_cnt: number of time ranges
+ * @timerfd: timer file descriptor
+ * @pollfd_pos: position in @evlist array of file descriptors to poll (fdarray)
+ * @times_step: current position in (int *)@times)[],
+ *              refer event_enable_timer__process()
+ *
+ * Note, this structure is only used when there are time ranges, not when there
+ * is only an initial delay.
+ */
+struct event_enable_timer {
+	struct evlist *evlist;
+	struct event_enable_time *times;
+	size_t	times_cnt;
+	int	timerfd;
+	int	pollfd_pos;
+	size_t	times_step;
+};
+
+static int str_to_delay(const char *str)
+{
+	char *endptr;
+	long d;
+
+	d = strtol(str, &endptr, 10);
+	if (*endptr || d > INT_MAX || d < -1)
+		return 0;
+	return d;
+}
+
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+				    const char *str, int unset)
+{
+	enum fdarray_flags flags = fdarray_flag__nonfilterable | fdarray_flag__non_perf_event;
+	struct event_enable_timer *eet;
+	ssize_t times_cnt;
+	ssize_t ret;
+	int err;
+
+	if (unset)
+		return 0;
+
+	opts->initial_delay = str_to_delay(str);
+	if (opts->initial_delay)
+		return 0;
+
+	ret = parse_event_enable_times(str, NULL);
+	if (ret < 0)
+		return ret;
+
+	times_cnt = ret;
+	if (times_cnt == 0)
+		return -EINVAL;
+
+	eet = zalloc(sizeof(*eet));
+	if (!eet)
+		return -ENOMEM;
+
+	eet->times = calloc(times_cnt, sizeof(*eet->times));
+	if (!eet->times) {
+		err = -ENOMEM;
+		goto free_eet;
+	}
+
+	if (parse_event_enable_times(str, eet->times) != times_cnt) {
+		err = -EINVAL;
+		goto free_eet_times;
+	}
+
+	eet->times_cnt = times_cnt;
+
+	eet->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
+	if (eet->timerfd == -1) {
+		err = -errno;
+		pr_err("timerfd_create failed: %s\n", strerror(errno));
+		goto free_eet_times;
+	}
+
+	eet->pollfd_pos = perf_evlist__add_pollfd(&evlist->core, eet->timerfd, NULL, POLLIN, flags);
+	if (eet->pollfd_pos < 0) {
+		err = eet->pollfd_pos;
+		goto close_timerfd;
+	}
+
+	eet->evlist = evlist;
+	evlist->eet = eet;
+	opts->initial_delay = eet->times[0].start;
+
+	return 0;
+
+close_timerfd:
+	close(eet->timerfd);
+free_eet_times:
+	free(eet->times);
+free_eet:
+	free(eet);
+	return err;
+}
+
+static int event_enable_timer__set_timer(struct event_enable_timer *eet, int ms)
+{
+	struct itimerspec its = {
+		.it_value.tv_sec = ms / MSEC_PER_SEC,
+		.it_value.tv_nsec = (ms % MSEC_PER_SEC) * NSEC_PER_MSEC,
+	};
+	int err = 0;
+
+	if (timerfd_settime(eet->timerfd, 0, &its, NULL) < 0) {
+		err = -errno;
+		pr_err("timerfd_settime failed: %s\n", strerror(errno));
+	}
+	return err;
+}
+
+int event_enable_timer__start(struct event_enable_timer *eet)
+{
+	int ms;
+
+	if (!eet)
+		return 0;
+
+	ms = eet->times[0].end - eet->times[0].start;
+	eet->times_step = 1;
+
+	return event_enable_timer__set_timer(eet, ms);
+}
+
+int event_enable_timer__process(struct event_enable_timer *eet)
+{
+	struct pollfd *entries;
+	short revents;
+
+	if (!eet)
+		return 0;
+
+	entries = eet->evlist->core.pollfd.entries;
+	revents = entries[eet->pollfd_pos].revents;
+	entries[eet->pollfd_pos].revents = 0;
+
+	if (revents & POLLIN) {
+		size_t step = eet->times_step;
+		size_t pos = step / 2;
+
+		if (step & 1) {
+			evlist__disable_non_dummy(eet->evlist);
+			pr_info(EVLIST_DISABLED_MSG);
+			if (pos >= eet->times_cnt - 1) {
+				/* Disarm timer */
+				event_enable_timer__set_timer(eet, 0);
+				return 1; /* Stop */
+			}
+		} else {
+			evlist__enable_non_dummy(eet->evlist);
+			pr_info(EVLIST_ENABLED_MSG);
+		}
+
+		step += 1;
+		pos = step / 2;
+
+		if (pos < eet->times_cnt) {
+			int *times = (int *)eet->times; /* Accessing 'times' as array of int */
+			int ms = times[step] - times[step - 1];
+
+			eet->times_step = step;
+			return event_enable_timer__set_timer(eet, ms);
+		}
+	}
+
+	return 0;
+}
+
+void event_enable_timer__exit(struct event_enable_timer **ep)
+{
+	if (!ep || !*ep)
+		return;
+	free((*ep)->times);
+	zfree(ep);
+}
+
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.idx == idx)
+			return evsel;
+	}
+	return NULL;
+}
+
+int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf)
+{
+	struct evsel *evsel;
+	int printed = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_dummy_event(evsel))
+			continue;
+		if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) {
+			printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel));
+		} else {
+			printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : "");
+			break;
+		}
+	}
+
+	return printed;
+}
+
+void evlist__check_mem_load_aux(struct evlist *evlist)
+{
+	struct evsel *leader, *evsel, *pos;
+
+	/*
+	 * For some platforms, the 'mem-loads' event is required to use
+	 * together with 'mem-loads-aux' within a group and 'mem-loads-aux'
+	 * must be the group leader. Now we disable this group before reporting
+	 * because 'mem-loads-aux' is just an auxiliary event. It doesn't carry
+	 * any valid memory load information.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		leader = evsel__leader(evsel);
+		if (leader == evsel)
+			continue;
+
+		if (leader->name && strstr(leader->name, "mem-loads-aux")) {
+			for_each_group_evsel(pos, leader) {
+				evsel__set_leader(pos, pos);
+				pos->core.nr_members = 0;
+			}
+		}
+	}
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000..16734c675
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,443 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVLIST_H
+#define __PERF_EVLIST_H 1
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <api/fd/array.h>
+#include <internal/evlist.h>
+#include <internal/evsel.h>
+#include "events_stats.h"
+#include "evsel.h"
+#include <pthread.h>
+#include <signal.h>
+#include <unistd.h>
+
+struct pollfd;
+struct thread_map;
+struct perf_cpu_map;
+struct record_opts;
+
+/*
+ * State machine of bkw_mmap_state:
+ *
+ *                     .________________(forbid)_____________.
+ *                     |                                     V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ *                     ^  ^              |   ^               |
+ *                     |  |__(forbid)____/   |___(forbid)___/|
+ *                     |                                     |
+ *                      \_________________(3)_______________/
+ *
+ * NOTREADY     : Backward ring buffers are not ready
+ * RUNNING      : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY        : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+	BKW_MMAP_NOTREADY,
+	BKW_MMAP_RUNNING,
+	BKW_MMAP_DATA_PENDING,
+	BKW_MMAP_EMPTY,
+};
+
+struct event_enable_timer;
+
+struct evlist {
+	struct perf_evlist core;
+	bool		 enabled;
+	int		 id_pos;
+	int		 is_pos;
+	u64		 combined_sample_type;
+	enum bkw_mmap_state bkw_mmap_state;
+	struct {
+		int	cork_fd;
+		pid_t	pid;
+	} workload;
+	struct mmap *mmap;
+	struct mmap *overwrite_mmap;
+	struct evsel *selected;
+	struct events_stats stats;
+	struct perf_env	*env;
+	const char *hybrid_pmu_name;
+	void (*trace_event_sample_raw)(struct evlist *evlist,
+				       union perf_event *event,
+				       struct perf_sample *sample);
+	u64		first_sample_time;
+	u64		last_sample_time;
+	struct {
+		pthread_t		th;
+		volatile int		done;
+	} thread;
+	struct {
+		int	fd;	/* control file descriptor */
+		int	ack;	/* ack file descriptor for control commands */
+		int	pos;	/* index at evlist core object to check signals */
+	} ctl_fd;
+	struct event_enable_timer *eet;
+};
+
+struct evsel_str_handler {
+	const char *name;
+	void	   *handler;
+};
+
+struct evlist *evlist__new(void);
+struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_dummy(void);
+void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
+		  struct perf_thread_map *threads);
+void evlist__exit(struct evlist *evlist);
+void evlist__delete(struct evlist *evlist);
+
+void evlist__add(struct evlist *evlist, struct evsel *entry);
+void evlist__remove(struct evlist *evlist, struct evsel *evsel);
+
+int __evlist__add_default(struct evlist *evlist, bool precise);
+
+static inline int evlist__add_default(struct evlist *evlist)
+{
+	return __evlist__add_default(evlist, true);
+}
+
+int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
+
+int __evlist__add_default_attrs(struct evlist *evlist,
+				     struct perf_event_attr *attrs, size_t nr_attrs);
+
+int arch_evlist__add_default_attrs(struct evlist *evlist,
+				   struct perf_event_attr *attrs,
+				   size_t nr_attrs);
+
+#define evlist__add_default_attrs(evlist, array) \
+	arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+
+struct evsel *arch_evlist__leader(struct list_head *list);
+
+int evlist__add_dummy(struct evlist *evlist);
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide);
+static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
+{
+	return evlist__add_aux_dummy(evlist, true);
+}
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide);
+
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+			 evsel__sb_cb_t cb, void *data);
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target);
+void evlist__stop_sb_thread(struct evlist *evlist);
+
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
+
+int __evlist__set_tracepoints_handlers(struct evlist *evlist,
+				       const struct evsel_str_handler *assocs,
+				       size_t nr_assocs);
+
+#define evlist__set_tracepoints_handlers(evlist, array) \
+	__evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
+
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter);
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter);
+
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name);
+
+int evlist__add_pollfd(struct evlist *evlist, int fd);
+int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
+
+#ifdef HAVE_EVENTFD_SUPPORT
+int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd);
+#endif
+
+int evlist__poll(struct evlist *evlist, int timeout);
+
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id);
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id);
+
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id);
+
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
+
+void evlist__mmap_consume(struct evlist *evlist, int idx);
+
+int evlist__open(struct evlist *evlist);
+void evlist__close(struct evlist *evlist);
+
+struct callchain_param;
+
+void evlist__set_id_pos(struct evlist *evlist);
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain);
+int record_opts__config(struct record_opts *opts);
+
+int evlist__prepare_workload(struct evlist *evlist, struct target *target,
+			     const char *argv[], bool pipe_output,
+			     void (*exec_error)(int signo, siginfo_t *info, void *ucontext));
+int evlist__start_workload(struct evlist *evlist);
+
+struct option;
+
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset);
+
+unsigned long perf_event_mlock_kb_in_pages(void);
+
+int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
+			 unsigned int auxtrace_pages,
+			 bool auxtrace_overwrite, int nr_cblocks,
+			 int affinity, int flush, int comp_level);
+int evlist__mmap(struct evlist *evlist, unsigned int pages);
+void evlist__munmap(struct evlist *evlist);
+
+size_t evlist__mmap_size(unsigned long pages);
+
+void evlist__disable(struct evlist *evlist);
+void evlist__enable(struct evlist *evlist);
+void evlist__toggle_enable(struct evlist *evlist);
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__disable_non_dummy(struct evlist *evlist);
+void evlist__enable_non_dummy(struct evlist *evlist);
+
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
+
+int evlist__create_maps(struct evlist *evlist, struct target *target);
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
+
+void evlist__set_leader(struct evlist *evlist);
+
+u64 __evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_branch_type(struct evlist *evlist);
+bool evlist__sample_id_all(struct evlist *evlist);
+u16 evlist__id_hdr_size(struct evlist *evlist);
+
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp);
+
+bool evlist__valid_sample_type(struct evlist *evlist);
+bool evlist__valid_sample_id_all(struct evlist *evlist);
+bool evlist__valid_read_format(struct evlist *evlist);
+
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list);
+
+static inline bool evlist__empty(struct evlist *evlist)
+{
+	return list_empty(&evlist->core.entries);
+}
+
+static inline struct evsel *evlist__first(struct evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(&evlist->core);
+
+	return container_of(evsel, struct evsel, core);
+}
+
+static inline struct evsel *evlist__last(struct evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(&evlist->core);
+
+	return container_of(evsel, struct evsel, core);
+}
+
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
+
+bool evlist__can_select_event(struct evlist *evlist, const char *str);
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
+
+/**
+ * __evlist__for_each_entry - iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry(list, evsel) \
+        list_for_each_entry(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry - iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry(evlist, evsel) \
+	__evlist__for_each_entry(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_continue(list, evsel) \
+        list_for_each_entry_continue(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_continue(evlist, evsel) \
+	__evlist__for_each_entry_continue(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_from(list, evsel) \
+	list_for_each_entry_from(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_from(evlist, evsel) \
+	__evlist__for_each_entry_from(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_reverse(list, evsel) \
+        list_for_each_entry_reverse(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_reverse(evlist, evsel) \
+	__evlist__for_each_entry_reverse(&(evlist)->core.entries, evsel)
+
+/**
+ * __evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @tmp: struct evsel temp iterator
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_safe(list, tmp, evsel) \
+        list_for_each_entry_safe(evsel, tmp, list, core.node)
+
+/**
+ * evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ * @tmp: struct evsel temp iterator
+ */
+#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
+	__evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
+
+/** Iterator state for evlist__for_each_cpu */
+struct evlist_cpu_iterator {
+	/** The list being iterated through. */
+	struct evlist *container;
+	/** The current evsel of the iterator. */
+	struct evsel *evsel;
+	/** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */
+	int cpu_map_idx;
+	/**
+	 * The CPU map index corresponding to evlist->core.all_cpus for the
+	 * current CPU.  Distinct from cpu_map_idx as the evsel's cpu map may
+	 * contain fewer entries.
+	 */
+	int evlist_cpu_map_idx;
+	/** The number of CPU map entries in evlist->core.all_cpus. */
+	int evlist_cpu_map_nr;
+	/** The current CPU of the iterator. */
+	struct perf_cpu cpu;
+	/** If present, used to set the affinity when switching between CPUs. */
+	struct affinity *affinity;
+};
+
+/**
+ * evlist__for_each_cpu - without affinity, iterate over the evlist. With
+ *                        affinity, iterate over all CPUs and then the evlist
+ *                        for each evsel on that CPU. When switching between
+ *                        CPUs the affinity is set to the CPU to avoid IPIs
+ *                        during syscalls.
+ * @evlist_cpu_itr: the iterator instance.
+ * @evlist: evlist instance to iterate.
+ * @affinity: NULL or used to set the affinity to the current CPU.
+ */
+#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity)		\
+	for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity);	\
+	     !evlist_cpu_iterator__end(&evlist_cpu_itr);		\
+	     evlist_cpu_iterator__next(&evlist_cpu_itr))
+
+/** Returns an iterator set to the first CPU/evsel of evlist. */
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity);
+/** Move to next element in iterator, updating CPU, evsel and the affinity. */
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr);
+/** Returns true when iterator is at the end of the CPUs and evlist. */
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr);
+
+struct evsel *evlist__get_tracking_event(struct evlist *evlist);
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
+
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
+
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
+
+bool evlist__exclude_kernel(struct evlist *evlist);
+
+void evlist__force_leader(struct evlist *evlist);
+
+struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close);
+
+#define EVLIST_CTL_CMD_ENABLE_TAG  "enable"
+#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
+#define EVLIST_CTL_CMD_ACK_TAG     "ack\n"
+#define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot"
+#define EVLIST_CTL_CMD_EVLIST_TAG "evlist"
+#define EVLIST_CTL_CMD_STOP_TAG "stop"
+#define EVLIST_CTL_CMD_PING_TAG "ping"
+
+#define EVLIST_CTL_CMD_MAX_LEN 64
+
+enum evlist_ctl_cmd {
+	EVLIST_CTL_CMD_UNSUPPORTED = 0,
+	EVLIST_CTL_CMD_ENABLE,
+	EVLIST_CTL_CMD_DISABLE,
+	EVLIST_CTL_CMD_ACK,
+	EVLIST_CTL_CMD_SNAPSHOT,
+	EVLIST_CTL_CMD_EVLIST,
+	EVLIST_CTL_CMD_STOP,
+	EVLIST_CTL_CMD_PING,
+};
+
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close);
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
+int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
+int evlist__finalize_ctlfd(struct evlist *evlist);
+bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
+int evlist__ctlfd_ack(struct evlist *evlist);
+
+#define EVLIST_ENABLED_MSG "Events enabled\n"
+#define EVLIST_DISABLED_MSG "Events disabled\n"
+
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+				    const char *str, int unset);
+int event_enable_timer__start(struct event_enable_timer *eet);
+void event_enable_timer__exit(struct event_enable_timer **ep);
+int event_enable_timer__process(struct event_enable_timer *eet);
+
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
+
+int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
+void evlist__check_mem_load_aux(struct evlist *evlist);
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
new file mode 100644
index 000000000..76605fde3
--- /dev/null
+++ b/tools/perf/util/evsel.c
@@ -0,0 +1,3186 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ */
+
+#include <byteswap.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <api/fs/fs.h>
+#include <api/fs/tracing_path.h>
+#include <traceevent/event-parse.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <perf/evsel.h>
+#include "asm/bug.h"
+#include "bpf_counter.h"
+#include "callchain.h"
+#include "cgroup.h"
+#include "counts.h"
+#include "event.h"
+#include "evsel.h"
+#include "util/env.h"
+#include "util/evsel_config.h"
+#include "util/evsel_fprintf.h"
+#include "evlist.h"
+#include <perf/cpumap.h>
+#include "thread_map.h"
+#include "target.h"
+#include "perf_regs.h"
+#include "record.h"
+#include "debug.h"
+#include "trace-event.h"
+#include "stat.h"
+#include "string2.h"
+#include "memswap.h"
+#include "util.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
+#include "pmu-hybrid.h"
+#include "off_cpu.h"
+#include "../perf-sys.h"
+#include "util/parse-branch-options.h"
+#include <internal/xyarray.h>
+#include <internal/lib.h>
+
+#include <linux/ctype.h>
+
+struct perf_missing_features perf_missing_features;
+
+static clockid_t clockid;
+
+static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = {
+	NULL,
+	"duration_time",
+	"user_time",
+	"system_time",
+};
+
+const char *perf_tool_event__to_str(enum perf_tool_event ev)
+{
+	if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX)
+		return perf_tool_event__tool_names[ev];
+
+	return NULL;
+}
+
+enum perf_tool_event perf_tool_event__from_str(const char *str)
+{
+	int i;
+
+	perf_tool_event__for_each_event(i) {
+		if (!strcmp(str, perf_tool_event__tool_names[i]))
+			return i;
+	}
+	return PERF_TOOL_NONE;
+}
+
+
+static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
+{
+	return 0;
+}
+
+void __weak test_attr__ready(void) { }
+
+static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
+{
+}
+
+static struct {
+	size_t	size;
+	int	(*init)(struct evsel *evsel);
+	void	(*fini)(struct evsel *evsel);
+} perf_evsel__object = {
+	.size = sizeof(struct evsel),
+	.init = evsel__no_extra_init,
+	.fini = evsel__no_extra_fini,
+};
+
+int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
+			 void (*fini)(struct evsel *evsel))
+{
+
+	if (object_size == 0)
+		goto set_methods;
+
+	if (perf_evsel__object.size > object_size)
+		return -EINVAL;
+
+	perf_evsel__object.size = object_size;
+
+set_methods:
+	if (init != NULL)
+		perf_evsel__object.init = init;
+
+	if (fini != NULL)
+		perf_evsel__object.fini = fini;
+
+	return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
+
+int __evsel__sample_size(u64 sample_type)
+{
+	u64 mask = sample_type & PERF_SAMPLE_MASK;
+	int size = 0;
+	int i;
+
+	for (i = 0; i < 64; i++) {
+		if (mask & (1ULL << i))
+			size++;
+	}
+
+	size *= sizeof(u64);
+
+	return size;
+}
+
+/**
+ * __perf_evsel__calc_id_pos - calculate id_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
+ * perf_record_sample.
+ */
+static int __perf_evsel__calc_id_pos(u64 sample_type)
+{
+	int idx = 0;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		return 0;
+
+	if (!(sample_type & PERF_SAMPLE_ID))
+		return -1;
+
+	if (sample_type & PERF_SAMPLE_IP)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		idx += 1;
+
+	return idx;
+}
+
+/**
+ * __perf_evsel__calc_is_pos - calculate is_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position (counting backwards) of the event id
+ * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
+ * sample_id_all is used there is an id sample appended to non-sample events.
+ */
+static int __perf_evsel__calc_is_pos(u64 sample_type)
+{
+	int idx = 1;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		return 1;
+
+	if (!(sample_type & PERF_SAMPLE_ID))
+		return -1;
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		idx += 1;
+
+	return idx;
+}
+
+void evsel__calc_id_pos(struct evsel *evsel)
+{
+	evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
+	evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
+}
+
+void __evsel__set_sample_bit(struct evsel *evsel,
+				  enum perf_event_sample_format bit)
+{
+	if (!(evsel->core.attr.sample_type & bit)) {
+		evsel->core.attr.sample_type |= bit;
+		evsel->sample_size += sizeof(u64);
+		evsel__calc_id_pos(evsel);
+	}
+}
+
+void __evsel__reset_sample_bit(struct evsel *evsel,
+				    enum perf_event_sample_format bit)
+{
+	if (evsel->core.attr.sample_type & bit) {
+		evsel->core.attr.sample_type &= ~bit;
+		evsel->sample_size -= sizeof(u64);
+		evsel__calc_id_pos(evsel);
+	}
+}
+
+void evsel__set_sample_id(struct evsel *evsel,
+			       bool can_sample_identifier)
+{
+	if (can_sample_identifier) {
+		evsel__reset_sample_bit(evsel, ID);
+		evsel__set_sample_bit(evsel, IDENTIFIER);
+	} else {
+		evsel__set_sample_bit(evsel, ID);
+	}
+	evsel->core.attr.read_format |= PERF_FORMAT_ID;
+}
+
+/**
+ * evsel__is_function_event - Return whether given evsel is a function
+ * trace event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if event is function trace event
+ */
+bool evsel__is_function_event(struct evsel *evsel)
+{
+#define FUNCTION_EVENT "ftrace:function"
+
+	return evsel->name &&
+	       !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
+
+#undef FUNCTION_EVENT
+}
+
+void evsel__init(struct evsel *evsel,
+		 struct perf_event_attr *attr, int idx)
+{
+	perf_evsel__init(&evsel->core, attr, idx);
+	evsel->tracking	   = !idx;
+	evsel->unit	   = strdup("");
+	evsel->scale	   = 1.0;
+	evsel->max_events  = ULONG_MAX;
+	evsel->evlist	   = NULL;
+	evsel->bpf_obj	   = NULL;
+	evsel->bpf_fd	   = -1;
+	INIT_LIST_HEAD(&evsel->config_terms);
+	INIT_LIST_HEAD(&evsel->bpf_counter_list);
+	perf_evsel__object.init(evsel);
+	evsel->sample_size = __evsel__sample_size(attr->sample_type);
+	evsel__calc_id_pos(evsel);
+	evsel->cmdline_group_boundary = false;
+	evsel->metric_expr   = NULL;
+	evsel->metric_name   = NULL;
+	evsel->metric_events = NULL;
+	evsel->per_pkg_mask  = NULL;
+	evsel->collect_stat  = false;
+	evsel->pmu_name      = NULL;
+}
+
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
+{
+	struct evsel *evsel = zalloc(perf_evsel__object.size);
+
+	if (!evsel)
+		return NULL;
+	evsel__init(evsel, attr, idx);
+
+	if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
+		evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+					    PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+		evsel->core.attr.sample_period = 1;
+	}
+
+	if (evsel__is_clock(evsel)) {
+		free((char *)evsel->unit);
+		evsel->unit = strdup("msec");
+		evsel->scale = 1e-6;
+	}
+
+	return evsel;
+}
+
+static bool perf_event_can_profile_kernel(void)
+{
+	return perf_event_paranoid_check(1);
+}
+
+struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config)
+{
+	struct perf_event_attr attr = {
+		.type	= type,
+		.config	= config,
+		.exclude_kernel	= !perf_event_can_profile_kernel(),
+	};
+	struct evsel *evsel;
+
+	event_attr_init(&attr);
+
+	/*
+	 * Now let the usual logic to set up the perf_event_attr defaults
+	 * to kick in when we return and before perf_evsel__open() is called.
+	 */
+	evsel = evsel__new(&attr);
+	if (evsel == NULL)
+		goto out;
+
+	arch_evsel__fixup_new_cycles(&evsel->core.attr);
+
+	evsel->precise_max = true;
+
+	/* use asprintf() because free(evsel) assumes name is allocated */
+	if (asprintf(&evsel->name, "cycles%s%s%.*s",
+		     (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
+		     attr.exclude_kernel ? "u" : "",
+		     attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
+		goto error_free;
+out:
+	return evsel;
+error_free:
+	evsel__delete(evsel);
+	evsel = NULL;
+	goto out;
+}
+
+int copy_config_terms(struct list_head *dst, struct list_head *src)
+{
+	struct evsel_config_term *pos, *tmp;
+
+	list_for_each_entry(pos, src, list) {
+		tmp = malloc(sizeof(*tmp));
+		if (tmp == NULL)
+			return -ENOMEM;
+
+		*tmp = *pos;
+		if (tmp->free_str) {
+			tmp->val.str = strdup(pos->val.str);
+			if (tmp->val.str == NULL) {
+				free(tmp);
+				return -ENOMEM;
+			}
+		}
+		list_add_tail(&tmp->list, dst);
+	}
+	return 0;
+}
+
+static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
+{
+	return copy_config_terms(&dst->config_terms, &src->config_terms);
+}
+
+/**
+ * evsel__clone - create a new evsel copied from @orig
+ * @orig: original evsel
+ *
+ * The assumption is that @orig is not configured nor opened yet.
+ * So we only care about the attributes that can be set while it's parsed.
+ */
+struct evsel *evsel__clone(struct evsel *orig)
+{
+	struct evsel *evsel;
+
+	BUG_ON(orig->core.fd);
+	BUG_ON(orig->counts);
+	BUG_ON(orig->priv);
+	BUG_ON(orig->per_pkg_mask);
+
+	/* cannot handle BPF objects for now */
+	if (orig->bpf_obj)
+		return NULL;
+
+	evsel = evsel__new(&orig->core.attr);
+	if (evsel == NULL)
+		return NULL;
+
+	evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
+	evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus);
+	evsel->core.threads = perf_thread_map__get(orig->core.threads);
+	evsel->core.nr_members = orig->core.nr_members;
+	evsel->core.system_wide = orig->core.system_wide;
+	evsel->core.requires_cpu = orig->core.requires_cpu;
+
+	if (orig->name) {
+		evsel->name = strdup(orig->name);
+		if (evsel->name == NULL)
+			goto out_err;
+	}
+	if (orig->group_name) {
+		evsel->group_name = strdup(orig->group_name);
+		if (evsel->group_name == NULL)
+			goto out_err;
+	}
+	if (orig->pmu_name) {
+		evsel->pmu_name = strdup(orig->pmu_name);
+		if (evsel->pmu_name == NULL)
+			goto out_err;
+	}
+	if (orig->filter) {
+		evsel->filter = strdup(orig->filter);
+		if (evsel->filter == NULL)
+			goto out_err;
+	}
+	if (orig->metric_id) {
+		evsel->metric_id = strdup(orig->metric_id);
+		if (evsel->metric_id == NULL)
+			goto out_err;
+	}
+	evsel->cgrp = cgroup__get(orig->cgrp);
+	evsel->tp_format = orig->tp_format;
+	evsel->handler = orig->handler;
+	evsel->core.leader = orig->core.leader;
+
+	evsel->max_events = orig->max_events;
+	evsel->tool_event = orig->tool_event;
+	free((char *)evsel->unit);
+	evsel->unit = strdup(orig->unit);
+	if (evsel->unit == NULL)
+		goto out_err;
+
+	evsel->scale = orig->scale;
+	evsel->snapshot = orig->snapshot;
+	evsel->per_pkg = orig->per_pkg;
+	evsel->percore = orig->percore;
+	evsel->precise_max = orig->precise_max;
+	evsel->use_uncore_alias = orig->use_uncore_alias;
+	evsel->is_libpfm_event = orig->is_libpfm_event;
+
+	evsel->exclude_GH = orig->exclude_GH;
+	evsel->sample_read = orig->sample_read;
+	evsel->auto_merge_stats = orig->auto_merge_stats;
+	evsel->collect_stat = orig->collect_stat;
+	evsel->weak_group = orig->weak_group;
+	evsel->use_config_name = orig->use_config_name;
+
+	if (evsel__copy_config_terms(evsel, orig) < 0)
+		goto out_err;
+
+	return evsel;
+
+out_err:
+	evsel__delete(evsel);
+	return NULL;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx)
+{
+	struct evsel *evsel = zalloc(perf_evsel__object.size);
+	int err = -ENOMEM;
+
+	if (evsel == NULL) {
+		goto out_err;
+	} else {
+		struct perf_event_attr attr = {
+			.type	       = PERF_TYPE_TRACEPOINT,
+			.sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+					  PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+		};
+
+		if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
+			goto out_free;
+
+		evsel->tp_format = trace_event__tp_format(sys, name);
+		if (IS_ERR(evsel->tp_format)) {
+			err = PTR_ERR(evsel->tp_format);
+			goto out_free;
+		}
+
+		event_attr_init(&attr);
+		attr.config = evsel->tp_format->id;
+		attr.sample_period = 1;
+		evsel__init(evsel, &attr, idx);
+	}
+
+	return evsel;
+
+out_free:
+	zfree(&evsel->name);
+	free(evsel);
+out_err:
+	return ERR_PTR(err);
+}
+
+const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
+	"cycles",
+	"instructions",
+	"cache-references",
+	"cache-misses",
+	"branches",
+	"branch-misses",
+	"bus-cycles",
+	"stalled-cycles-frontend",
+	"stalled-cycles-backend",
+	"ref-cycles",
+};
+
+char *evsel__bpf_counter_events;
+
+bool evsel__match_bpf_counter_events(const char *name)
+{
+	int name_len;
+	bool match;
+	char *ptr;
+
+	if (!evsel__bpf_counter_events)
+		return false;
+
+	ptr = strstr(evsel__bpf_counter_events, name);
+	name_len = strlen(name);
+
+	/* check name matches a full token in evsel__bpf_counter_events */
+	match = (ptr != NULL) &&
+		((ptr == evsel__bpf_counter_events) || (*(ptr - 1) == ',')) &&
+		((*(ptr + name_len) == ',') || (*(ptr + name_len) == '\0'));
+
+	return match;
+}
+
+static const char *__evsel__hw_name(u64 config)
+{
+	if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
+		return evsel__hw_names[config];
+
+	return "unknown-hardware";
+}
+
+static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
+{
+	int colon = 0, r = 0;
+	struct perf_event_attr *attr = &evsel->core.attr;
+	bool exclude_guest_default = false;
+
+#define MOD_PRINT(context, mod)	do {					\
+		if (!attr->exclude_##context) {				\
+			if (!colon) colon = ++r;			\
+			r += scnprintf(bf + r, size - r, "%c", mod);	\
+		} } while(0)
+
+	if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
+		MOD_PRINT(kernel, 'k');
+		MOD_PRINT(user, 'u');
+		MOD_PRINT(hv, 'h');
+		exclude_guest_default = true;
+	}
+
+	if (attr->precise_ip) {
+		if (!colon)
+			colon = ++r;
+		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
+		exclude_guest_default = true;
+	}
+
+	if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
+		MOD_PRINT(host, 'H');
+		MOD_PRINT(guest, 'G');
+	}
+#undef MOD_PRINT
+	if (colon)
+		bf[colon - 1] = ':';
+	return r;
+}
+
+int __weak arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+{
+	return scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
+}
+
+static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+{
+	int r = arch_evsel__hw_name(evsel, bf, size);
+	return r + evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = {
+	"cpu-clock",
+	"task-clock",
+	"page-faults",
+	"context-switches",
+	"cpu-migrations",
+	"minor-faults",
+	"major-faults",
+	"alignment-faults",
+	"emulation-faults",
+	"dummy",
+};
+
+static const char *__evsel__sw_name(u64 config)
+{
+	if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
+		return evsel__sw_names[config];
+	return "unknown-software";
+}
+
+static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
+{
+	int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
+	return r + evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size)
+{
+	return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev));
+}
+
+static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+{
+	int r;
+
+	r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
+
+	if (type & HW_BREAKPOINT_R)
+		r += scnprintf(bf + r, size - r, "r");
+
+	if (type & HW_BREAKPOINT_W)
+		r += scnprintf(bf + r, size - r, "w");
+
+	if (type & HW_BREAKPOINT_X)
+		r += scnprintf(bf + r, size - r, "x");
+
+	return r;
+}
+
+static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
+{
+	struct perf_event_attr *attr = &evsel->core.attr;
+	int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+	return r + evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
+ { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
+ { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
+ { "LLC",	"L2",							},
+ { "dTLB",	"d-tlb",	"Data-TLB",				},
+ { "iTLB",	"i-tlb",	"Instruction-TLB",			},
+ { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
+ { "node",								},
+};
+
+const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
+ { "load",	"loads",	"read",					},
+ { "store",	"stores",	"write",				},
+ { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
+};
+
+const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
+ { "refs",	"Reference",	"ops",		"access",		},
+ { "misses",	"miss",							},
+};
+
+#define C(x)		PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ	(1 << C(OP_READ))
+#define CACHE_WRITE	(1 << C(OP_WRITE))
+#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
+#define COP(x)		(1 << x)
+
+/*
+ * cache operation stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static const unsigned long evsel__hw_cache_stat[C(MAX)] = {
+ [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)]	= (CACHE_READ),
+ [C(BPU)]	= (CACHE_READ),
+ [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+};
+
+bool evsel__is_cache_op_valid(u8 type, u8 op)
+{
+	if (evsel__hw_cache_stat[type] & COP(op))
+		return true;	/* valid */
+	else
+		return false;	/* invalid */
+}
+
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
+{
+	if (result) {
+		return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
+				 evsel__hw_cache_op[op][0],
+				 evsel__hw_cache_result[result][0]);
+	}
+
+	return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
+			 evsel__hw_cache_op[op][1]);
+}
+
+static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
+{
+	u8 op, result, type = (config >>  0) & 0xff;
+	const char *err = "unknown-ext-hardware-cache-type";
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX)
+		goto out_err;
+
+	op = (config >>  8) & 0xff;
+	err = "unknown-ext-hardware-cache-op";
+	if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		goto out_err;
+
+	result = (config >> 16) & 0xff;
+	err = "unknown-ext-hardware-cache-result";
+	if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		goto out_err;
+
+	err = "invalid-cache";
+	if (!evsel__is_cache_op_valid(type, op))
+		goto out_err;
+
+	return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+out_err:
+	return scnprintf(bf, size, "%s", err);
+}
+
+static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
+{
+	int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
+	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
+{
+	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
+	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+const char *evsel__name(struct evsel *evsel)
+{
+	char bf[128];
+
+	if (!evsel)
+		goto out_unknown;
+
+	if (evsel->name)
+		return evsel->name;
+
+	switch (evsel->core.attr.type) {
+	case PERF_TYPE_RAW:
+		evsel__raw_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_HARDWARE:
+		evsel__hw_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_HW_CACHE:
+		evsel__hw_cache_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		if (evsel__is_tool(evsel))
+			evsel__tool_name(evsel->tool_event, bf, sizeof(bf));
+		else
+			evsel__sw_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
+		break;
+
+	case PERF_TYPE_BREAKPOINT:
+		evsel__bp_name(evsel, bf, sizeof(bf));
+		break;
+
+	default:
+		scnprintf(bf, sizeof(bf), "unknown attr type: %d",
+			  evsel->core.attr.type);
+		break;
+	}
+
+	evsel->name = strdup(bf);
+
+	if (evsel->name)
+		return evsel->name;
+out_unknown:
+	return "unknown";
+}
+
+const char *evsel__metric_id(const struct evsel *evsel)
+{
+	if (evsel->metric_id)
+		return evsel->metric_id;
+
+	if (evsel__is_tool(evsel))
+		return perf_tool_event__to_str(evsel->tool_event);
+
+	return "unknown";
+}
+
+const char *evsel__group_name(struct evsel *evsel)
+{
+	return evsel->group_name ?: "anon group";
+}
+
+/*
+ * Returns the group details for the specified leader,
+ * with following rules.
+ *
+ *  For record -e '{cycles,instructions}'
+ *    'anon group { cycles:u, instructions:u }'
+ *
+ *  For record -e 'cycles,instructions' and report --group
+ *    'cycles:u, instructions:u'
+ */
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
+{
+	int ret = 0;
+	struct evsel *pos;
+	const char *group_name = evsel__group_name(evsel);
+
+	if (!evsel->forced_leader)
+		ret = scnprintf(buf, size, "%s { ", group_name);
+
+	ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel));
+
+	for_each_group_member(pos, evsel)
+		ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos));
+
+	if (!evsel->forced_leader)
+		ret += scnprintf(buf + ret, size - ret, " }");
+
+	return ret;
+}
+
+static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+				      struct callchain_param *param)
+{
+	bool function = evsel__is_function_event(evsel);
+	struct perf_event_attr *attr = &evsel->core.attr;
+
+	evsel__set_sample_bit(evsel, CALLCHAIN);
+
+	attr->sample_max_stack = param->max_stack;
+
+	if (opts->kernel_callchains)
+		attr->exclude_callchain_user = 1;
+	if (opts->user_callchains)
+		attr->exclude_callchain_kernel = 1;
+	if (param->record_mode == CALLCHAIN_LBR) {
+		if (!opts->branch_stack) {
+			if (attr->exclude_user) {
+				pr_warning("LBR callstack option is only available "
+					   "to get user callchain information. "
+					   "Falling back to framepointers.\n");
+			} else {
+				evsel__set_sample_bit(evsel, BRANCH_STACK);
+				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+							PERF_SAMPLE_BRANCH_CALL_STACK |
+							PERF_SAMPLE_BRANCH_NO_CYCLES |
+							PERF_SAMPLE_BRANCH_NO_FLAGS |
+							PERF_SAMPLE_BRANCH_HW_INDEX;
+			}
+		} else
+			 pr_warning("Cannot use LBR callstack with branch stack. "
+				    "Falling back to framepointers.\n");
+	}
+
+	if (param->record_mode == CALLCHAIN_DWARF) {
+		if (!function) {
+			evsel__set_sample_bit(evsel, REGS_USER);
+			evsel__set_sample_bit(evsel, STACK_USER);
+			if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
+				attr->sample_regs_user |= DWARF_MINIMAL_REGS;
+				pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
+					   "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
+					   "so the minimal registers set (IP, SP) is explicitly forced.\n");
+			} else {
+				attr->sample_regs_user |= arch__user_reg_mask();
+			}
+			attr->sample_stack_user = param->dump_size;
+			attr->exclude_callchain_user = 1;
+		} else {
+			pr_info("Cannot use DWARF unwind for function trace event,"
+				" falling back to framepointers.\n");
+		}
+	}
+
+	if (function) {
+		pr_info("Disabling user space callchains for function trace event.\n");
+		attr->exclude_callchain_user = 1;
+	}
+}
+
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+			     struct callchain_param *param)
+{
+	if (param->enabled)
+		return __evsel__config_callchain(evsel, opts, param);
+}
+
+static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
+{
+	struct perf_event_attr *attr = &evsel->core.attr;
+
+	evsel__reset_sample_bit(evsel, CALLCHAIN);
+	if (param->record_mode == CALLCHAIN_LBR) {
+		evsel__reset_sample_bit(evsel, BRANCH_STACK);
+		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+					      PERF_SAMPLE_BRANCH_CALL_STACK |
+					      PERF_SAMPLE_BRANCH_HW_INDEX);
+	}
+	if (param->record_mode == CALLCHAIN_DWARF) {
+		evsel__reset_sample_bit(evsel, REGS_USER);
+		evsel__reset_sample_bit(evsel, STACK_USER);
+	}
+}
+
+static void evsel__apply_config_terms(struct evsel *evsel,
+				      struct record_opts *opts, bool track)
+{
+	struct evsel_config_term *term;
+	struct list_head *config_terms = &evsel->config_terms;
+	struct perf_event_attr *attr = &evsel->core.attr;
+	/* callgraph default */
+	struct callchain_param param = {
+		.record_mode = callchain_param.record_mode,
+	};
+	u32 dump_size = 0;
+	int max_stack = 0;
+	const char *callgraph_buf = NULL;
+
+	list_for_each_entry(term, config_terms, list) {
+		switch (term->type) {
+		case EVSEL__CONFIG_TERM_PERIOD:
+			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
+				attr->sample_period = term->val.period;
+				attr->freq = 0;
+				evsel__reset_sample_bit(evsel, PERIOD);
+			}
+			break;
+		case EVSEL__CONFIG_TERM_FREQ:
+			if (!(term->weak && opts->user_freq != UINT_MAX)) {
+				attr->sample_freq = term->val.freq;
+				attr->freq = 1;
+				evsel__set_sample_bit(evsel, PERIOD);
+			}
+			break;
+		case EVSEL__CONFIG_TERM_TIME:
+			if (term->val.time)
+				evsel__set_sample_bit(evsel, TIME);
+			else
+				evsel__reset_sample_bit(evsel, TIME);
+			break;
+		case EVSEL__CONFIG_TERM_CALLGRAPH:
+			callgraph_buf = term->val.str;
+			break;
+		case EVSEL__CONFIG_TERM_BRANCH:
+			if (term->val.str && strcmp(term->val.str, "no")) {
+				evsel__set_sample_bit(evsel, BRANCH_STACK);
+				parse_branch_str(term->val.str,
+						 &attr->branch_sample_type);
+			} else
+				evsel__reset_sample_bit(evsel, BRANCH_STACK);
+			break;
+		case EVSEL__CONFIG_TERM_STACK_USER:
+			dump_size = term->val.stack_user;
+			break;
+		case EVSEL__CONFIG_TERM_MAX_STACK:
+			max_stack = term->val.max_stack;
+			break;
+		case EVSEL__CONFIG_TERM_MAX_EVENTS:
+			evsel->max_events = term->val.max_events;
+			break;
+		case EVSEL__CONFIG_TERM_INHERIT:
+			/*
+			 * attr->inherit should has already been set by
+			 * evsel__config. If user explicitly set
+			 * inherit using config terms, override global
+			 * opt->no_inherit setting.
+			 */
+			attr->inherit = term->val.inherit ? 1 : 0;
+			break;
+		case EVSEL__CONFIG_TERM_OVERWRITE:
+			attr->write_backward = term->val.overwrite ? 1 : 0;
+			break;
+		case EVSEL__CONFIG_TERM_DRV_CFG:
+			break;
+		case EVSEL__CONFIG_TERM_PERCORE:
+			break;
+		case EVSEL__CONFIG_TERM_AUX_OUTPUT:
+			attr->aux_output = term->val.aux_output ? 1 : 0;
+			break;
+		case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
+			/* Already applied by auxtrace */
+			break;
+		case EVSEL__CONFIG_TERM_CFG_CHG:
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* User explicitly set per-event callgraph, clear the old setting and reset. */
+	if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+		bool sample_address = false;
+
+		if (max_stack) {
+			param.max_stack = max_stack;
+			if (callgraph_buf == NULL)
+				callgraph_buf = "fp";
+		}
+
+		/* parse callgraph parameters */
+		if (callgraph_buf != NULL) {
+			if (!strcmp(callgraph_buf, "no")) {
+				param.enabled = false;
+				param.record_mode = CALLCHAIN_NONE;
+			} else {
+				param.enabled = true;
+				if (parse_callchain_record(callgraph_buf, &param)) {
+					pr_err("per-event callgraph setting for %s failed. "
+					       "Apply callgraph global setting for it\n",
+					       evsel->name);
+					return;
+				}
+				if (param.record_mode == CALLCHAIN_DWARF)
+					sample_address = true;
+			}
+		}
+		if (dump_size > 0) {
+			dump_size = round_up(dump_size, sizeof(u64));
+			param.dump_size = dump_size;
+		}
+
+		/* If global callgraph set, clear it */
+		if (callchain_param.enabled)
+			evsel__reset_callgraph(evsel, &callchain_param);
+
+		/* set perf-event callgraph */
+		if (param.enabled) {
+			if (sample_address) {
+				evsel__set_sample_bit(evsel, ADDR);
+				evsel__set_sample_bit(evsel, DATA_SRC);
+				evsel->core.attr.mmap_data = track;
+			}
+			evsel__config_callchain(evsel, opts, &param);
+		}
+	}
+}
+
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
+{
+	struct evsel_config_term *term, *found_term = NULL;
+
+	list_for_each_entry(term, &evsel->config_terms, list) {
+		if (term->type == type)
+			found_term = term;
+	}
+
+	return found_term;
+}
+
+void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+	evsel__set_sample_bit(evsel, WEIGHT);
+}
+
+void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused)
+{
+}
+
+void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
+				    struct perf_event_attr *attr __maybe_unused)
+{
+}
+
+static void evsel__set_default_freq_period(struct record_opts *opts,
+					   struct perf_event_attr *attr)
+{
+	if (opts->freq) {
+		attr->freq = 1;
+		attr->sample_freq = opts->freq;
+	} else {
+		attr->sample_period = opts->default_interval;
+	}
+}
+
+static bool evsel__is_offcpu_event(struct evsel *evsel)
+{
+	return evsel__is_bpf_output(evsel) && !strcmp(evsel->name, OFFCPU_EVENT);
+}
+
+/*
+ * The enable_on_exec/disabled value strategy:
+ *
+ *  1) For any type of traced program:
+ *    - all independent events and group leaders are disabled
+ *    - all group members are enabled
+ *
+ *     Group members are ruled by group leaders. They need to
+ *     be enabled, because the group scheduling relies on that.
+ *
+ *  2) For traced programs executed by perf:
+ *     - all independent events and group leaders have
+ *       enable_on_exec set
+ *     - we don't specifically enable or disable any event during
+ *       the record command
+ *
+ *     Independent events and group leaders are initially disabled
+ *     and get enabled by exec. Group members are ruled by group
+ *     leaders as stated in 1).
+ *
+ *  3) For traced programs attached by perf (pid/tid):
+ *     - we specifically enable or disable all events during
+ *       the record command
+ *
+ *     When attaching events to already running traced we
+ *     enable/disable events specifically, as there's no
+ *     initial traced exec call.
+ */
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+		   struct callchain_param *callchain)
+{
+	struct evsel *leader = evsel__leader(evsel);
+	struct perf_event_attr *attr = &evsel->core.attr;
+	int track = evsel->tracking;
+	bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
+
+	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
+	attr->inherit	    = !opts->no_inherit;
+	attr->write_backward = opts->overwrite ? 1 : 0;
+	attr->read_format   = PERF_FORMAT_LOST;
+
+	evsel__set_sample_bit(evsel, IP);
+	evsel__set_sample_bit(evsel, TID);
+
+	if (evsel->sample_read) {
+		evsel__set_sample_bit(evsel, READ);
+
+		/*
+		 * We need ID even in case of single event, because
+		 * PERF_SAMPLE_READ process ID specific data.
+		 */
+		evsel__set_sample_id(evsel, false);
+
+		/*
+		 * Apply group format only if we belong to group
+		 * with more than one members.
+		 */
+		if (leader->core.nr_members > 1) {
+			attr->read_format |= PERF_FORMAT_GROUP;
+			attr->inherit = 0;
+		}
+	}
+
+	/*
+	 * We default some events to have a default interval. But keep
+	 * it a weak assumption overridable by the user.
+	 */
+	if ((evsel->is_libpfm_event && !attr->sample_period) ||
+	    (!evsel->is_libpfm_event && (!attr->sample_period ||
+					 opts->user_freq != UINT_MAX ||
+					 opts->user_interval != ULLONG_MAX)))
+		evsel__set_default_freq_period(opts, attr);
+
+	/*
+	 * If attr->freq was set (here or earlier), ask for period
+	 * to be sampled.
+	 */
+	if (attr->freq)
+		evsel__set_sample_bit(evsel, PERIOD);
+
+	if (opts->no_samples)
+		attr->sample_freq = 0;
+
+	if (opts->inherit_stat) {
+		evsel->core.attr.read_format |=
+			PERF_FORMAT_TOTAL_TIME_ENABLED |
+			PERF_FORMAT_TOTAL_TIME_RUNNING |
+			PERF_FORMAT_ID;
+		attr->inherit_stat = 1;
+	}
+
+	if (opts->sample_address) {
+		evsel__set_sample_bit(evsel, ADDR);
+		attr->mmap_data = track;
+	}
+
+	/*
+	 * We don't allow user space callchains for  function trace
+	 * event, due to issues with page faults while tracing page
+	 * fault handler and its overall trickiness nature.
+	 */
+	if (evsel__is_function_event(evsel))
+		evsel->core.attr.exclude_callchain_user = 1;
+
+	if (callchain && callchain->enabled && !evsel->no_aux_samples)
+		evsel__config_callchain(evsel, opts, callchain);
+
+	if (opts->sample_intr_regs && !evsel->no_aux_samples &&
+	    !evsel__is_dummy_event(evsel)) {
+		attr->sample_regs_intr = opts->sample_intr_regs;
+		evsel__set_sample_bit(evsel, REGS_INTR);
+	}
+
+	if (opts->sample_user_regs && !evsel->no_aux_samples &&
+	    !evsel__is_dummy_event(evsel)) {
+		attr->sample_regs_user |= opts->sample_user_regs;
+		evsel__set_sample_bit(evsel, REGS_USER);
+	}
+
+	if (target__has_cpu(&opts->target) || opts->sample_cpu)
+		evsel__set_sample_bit(evsel, CPU);
+
+	/*
+	 * When the user explicitly disabled time don't force it here.
+	 */
+	if (opts->sample_time &&
+	    (!perf_missing_features.sample_id_all &&
+	    (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
+	     opts->sample_time_set)))
+		evsel__set_sample_bit(evsel, TIME);
+
+	if (opts->raw_samples && !evsel->no_aux_samples) {
+		evsel__set_sample_bit(evsel, TIME);
+		evsel__set_sample_bit(evsel, RAW);
+		evsel__set_sample_bit(evsel, CPU);
+	}
+
+	if (opts->sample_address)
+		evsel__set_sample_bit(evsel, DATA_SRC);
+
+	if (opts->sample_phys_addr)
+		evsel__set_sample_bit(evsel, PHYS_ADDR);
+
+	if (opts->no_buffering) {
+		attr->watermark = 0;
+		attr->wakeup_events = 1;
+	}
+	if (opts->branch_stack && !evsel->no_aux_samples) {
+		evsel__set_sample_bit(evsel, BRANCH_STACK);
+		attr->branch_sample_type = opts->branch_stack;
+	}
+
+	if (opts->sample_weight)
+		arch_evsel__set_sample_weight(evsel);
+
+	attr->task     = track;
+	attr->mmap     = track;
+	attr->mmap2    = track && !perf_missing_features.mmap2;
+	attr->comm     = track;
+	attr->build_id = track && opts->build_id;
+
+	/*
+	 * ksymbol is tracked separately with text poke because it needs to be
+	 * system wide and enabled immediately.
+	 */
+	if (!opts->text_poke)
+		attr->ksymbol = track && !perf_missing_features.ksymbol;
+	attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
+
+	if (opts->record_namespaces)
+		attr->namespaces  = track;
+
+	if (opts->record_cgroup) {
+		attr->cgroup = track && !perf_missing_features.cgroup;
+		evsel__set_sample_bit(evsel, CGROUP);
+	}
+
+	if (opts->sample_data_page_size)
+		evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
+
+	if (opts->sample_code_page_size)
+		evsel__set_sample_bit(evsel, CODE_PAGE_SIZE);
+
+	if (opts->record_switch_events)
+		attr->context_switch = track;
+
+	if (opts->sample_transaction)
+		evsel__set_sample_bit(evsel, TRANSACTION);
+
+	if (opts->running_time) {
+		evsel->core.attr.read_format |=
+			PERF_FORMAT_TOTAL_TIME_ENABLED |
+			PERF_FORMAT_TOTAL_TIME_RUNNING;
+	}
+
+	/*
+	 * XXX see the function comment above
+	 *
+	 * Disabling only independent events or group leaders,
+	 * keeping group members enabled.
+	 */
+	if (evsel__is_group_leader(evsel))
+		attr->disabled = 1;
+
+	/*
+	 * Setting enable_on_exec for independent events and
+	 * group leaders for traced executed by perf.
+	 */
+	if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
+	    !opts->initial_delay)
+		attr->enable_on_exec = 1;
+
+	if (evsel->immediate) {
+		attr->disabled = 0;
+		attr->enable_on_exec = 0;
+	}
+
+	clockid = opts->clockid;
+	if (opts->use_clockid) {
+		attr->use_clockid = 1;
+		attr->clockid = opts->clockid;
+	}
+
+	if (evsel->precise_max)
+		attr->precise_ip = 3;
+
+	if (opts->all_user) {
+		attr->exclude_kernel = 1;
+		attr->exclude_user   = 0;
+	}
+
+	if (opts->all_kernel) {
+		attr->exclude_kernel = 0;
+		attr->exclude_user   = 1;
+	}
+
+	if (evsel->core.own_cpus || evsel->unit)
+		evsel->core.attr.read_format |= PERF_FORMAT_ID;
+
+	/*
+	 * Apply event specific term settings,
+	 * it overloads any global configuration.
+	 */
+	evsel__apply_config_terms(evsel, opts, track);
+
+	evsel->ignore_missing_thread = opts->ignore_missing_thread;
+
+	/* The --period option takes the precedence. */
+	if (opts->period_set) {
+		if (opts->period)
+			evsel__set_sample_bit(evsel, PERIOD);
+		else
+			evsel__reset_sample_bit(evsel, PERIOD);
+	}
+
+	/*
+	 * A dummy event never triggers any actual counter and therefore
+	 * cannot be used with branch_stack.
+	 *
+	 * For initial_delay, a dummy event is added implicitly.
+	 * The software event will trigger -EOPNOTSUPP error out,
+	 * if BRANCH_STACK bit is set.
+	 */
+	if (evsel__is_dummy_event(evsel))
+		evsel__reset_sample_bit(evsel, BRANCH_STACK);
+
+	if (evsel__is_offcpu_event(evsel))
+		evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
+
+	arch__post_evsel_config(evsel, attr);
+}
+
+int evsel__set_filter(struct evsel *evsel, const char *filter)
+{
+	char *new_filter = strdup(filter);
+
+	if (new_filter != NULL) {
+		free(evsel->filter);
+		evsel->filter = new_filter;
+		return 0;
+	}
+
+	return -1;
+}
+
+static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
+{
+	char *new_filter;
+
+	if (evsel->filter == NULL)
+		return evsel__set_filter(evsel, filter);
+
+	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
+		free(evsel->filter);
+		evsel->filter = new_filter;
+		return 0;
+	}
+
+	return -1;
+}
+
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
+{
+	return evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
+{
+	return evsel__append_filter(evsel, "%s,%s", filter);
+}
+
+/* Caller has to clear disabled after going through all CPUs. */
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
+{
+	return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
+}
+
+int evsel__enable(struct evsel *evsel)
+{
+	int err = perf_evsel__enable(&evsel->core);
+
+	if (!err)
+		evsel->disabled = false;
+	return err;
+}
+
+/* Caller has to set disabled after going through all CPUs. */
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
+{
+	return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
+}
+
+int evsel__disable(struct evsel *evsel)
+{
+	int err = perf_evsel__disable(&evsel->core);
+	/*
+	 * We mark it disabled here so that tools that disable a event can
+	 * ignore events after they disable it. I.e. the ring buffer may have
+	 * already a few more events queued up before the kernel got the stop
+	 * request.
+	 */
+	if (!err)
+		evsel->disabled = true;
+
+	return err;
+}
+
+void free_config_terms(struct list_head *config_terms)
+{
+	struct evsel_config_term *term, *h;
+
+	list_for_each_entry_safe(term, h, config_terms, list) {
+		list_del_init(&term->list);
+		if (term->free_str)
+			zfree(&term->val.str);
+		free(term);
+	}
+}
+
+static void evsel__free_config_terms(struct evsel *evsel)
+{
+	free_config_terms(&evsel->config_terms);
+}
+
+void evsel__exit(struct evsel *evsel)
+{
+	assert(list_empty(&evsel->core.node));
+	assert(evsel->evlist == NULL);
+	bpf_counter__destroy(evsel);
+	evsel__free_counts(evsel);
+	perf_evsel__free_fd(&evsel->core);
+	perf_evsel__free_id(&evsel->core);
+	evsel__free_config_terms(evsel);
+	cgroup__put(evsel->cgrp);
+	perf_cpu_map__put(evsel->core.cpus);
+	perf_cpu_map__put(evsel->core.own_cpus);
+	perf_thread_map__put(evsel->core.threads);
+	zfree(&evsel->group_name);
+	zfree(&evsel->name);
+	zfree(&evsel->pmu_name);
+	zfree(&evsel->unit);
+	zfree(&evsel->metric_id);
+	evsel__zero_per_pkg(evsel);
+	hashmap__free(evsel->per_pkg_mask);
+	evsel->per_pkg_mask = NULL;
+	zfree(&evsel->metric_events);
+	perf_evsel__object.fini(evsel);
+}
+
+void evsel__delete(struct evsel *evsel)
+{
+	evsel__exit(evsel);
+	free(evsel);
+}
+
+void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
+			   struct perf_counts_values *count)
+{
+	struct perf_counts_values tmp;
+
+	if (!evsel->prev_raw_counts)
+		return;
+
+	if (cpu_map_idx == -1) {
+		tmp = evsel->prev_raw_counts->aggr;
+		evsel->prev_raw_counts->aggr = *count;
+	} else {
+		tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+		*perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
+	}
+
+	count->val = count->val - tmp.val;
+	count->ena = count->ena - tmp.ena;
+	count->run = count->run - tmp.run;
+}
+
+static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+	struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
+
+	return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
+}
+
+static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
+			     u64 val, u64 ena, u64 run, u64 lost)
+{
+	struct perf_counts_values *count;
+
+	count = perf_counts(counter->counts, cpu_map_idx, thread);
+
+	count->val    = val;
+	count->ena    = ena;
+	count->run    = run;
+	count->lost   = lost;
+
+	perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
+}
+
+static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
+{
+	u64 read_format = leader->core.attr.read_format;
+	struct sample_read_value *v;
+	u64 nr, ena = 0, run = 0, lost = 0;
+
+	nr = *data++;
+
+	if (nr != (u64) leader->core.nr_members)
+		return -EINVAL;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		ena = *data++;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		run = *data++;
+
+	v = (void *)data;
+	sample_read_group__for_each(v, nr, read_format) {
+		struct evsel *counter;
+
+		counter = evlist__id2evsel(leader->evlist, v->id);
+		if (!counter)
+			return -EINVAL;
+
+		if (read_format & PERF_FORMAT_LOST)
+			lost = v->lost;
+
+		evsel__set_count(counter, cpu_map_idx, thread, v->value, ena, run, lost);
+	}
+
+	return 0;
+}
+
+static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
+{
+	struct perf_stat_evsel *ps = leader->stats;
+	u64 read_format = leader->core.attr.read_format;
+	int size = perf_evsel__read_size(&leader->core);
+	u64 *data = ps->group_data;
+
+	if (!(read_format & PERF_FORMAT_ID))
+		return -EINVAL;
+
+	if (!evsel__is_group_leader(leader))
+		return -EINVAL;
+
+	if (!data) {
+		data = zalloc(size);
+		if (!data)
+			return -ENOMEM;
+
+		ps->group_data = data;
+	}
+
+	if (FD(leader, cpu_map_idx, thread) < 0)
+		return -EINVAL;
+
+	if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
+		return -errno;
+
+	return evsel__process_group_data(leader, cpu_map_idx, thread, data);
+}
+
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+	u64 read_format = evsel->core.attr.read_format;
+
+	if (read_format & PERF_FORMAT_GROUP)
+		return evsel__read_group(evsel, cpu_map_idx, thread);
+
+	return evsel__read_one(evsel, cpu_map_idx, thread);
+}
+
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
+{
+	struct perf_counts_values count;
+	size_t nv = scale ? 3 : 1;
+
+	if (FD(evsel, cpu_map_idx, thread) < 0)
+		return -EINVAL;
+
+	if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
+		return -ENOMEM;
+
+	if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
+		return -errno;
+
+	evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
+	perf_counts_values__scale(&count, scale, NULL);
+	*perf_counts(evsel->counts, cpu_map_idx, thread) = count;
+	return 0;
+}
+
+static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
+				  int cpu_map_idx)
+{
+	struct perf_cpu cpu;
+
+	cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+	return perf_cpu_map__idx(other->core.cpus, cpu);
+}
+
+static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
+{
+	struct evsel *leader = evsel__leader(evsel);
+
+	if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
+	    (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
+		return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
+	}
+
+	return cpu_map_idx;
+}
+
+static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+	struct evsel *leader = evsel__leader(evsel);
+	int fd;
+
+	if (evsel__is_group_leader(evsel))
+		return -1;
+
+	/*
+	 * Leader must be already processed/open,
+	 * if not it's a bug.
+	 */
+	BUG_ON(!leader->core.fd);
+
+	cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
+	if (cpu_map_idx == -1)
+		return -1;
+
+	fd = FD(leader, cpu_map_idx, thread);
+	BUG_ON(fd == -1);
+
+	return fd;
+}
+
+static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
+{
+	for (int cpu = 0; cpu < nr_cpus; cpu++)
+		for (int thread = thread_idx; thread < nr_threads - 1; thread++)
+			FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
+}
+
+static int update_fds(struct evsel *evsel,
+		      int nr_cpus, int cpu_map_idx,
+		      int nr_threads, int thread_idx)
+{
+	struct evsel *pos;
+
+	if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
+		return -EINVAL;
+
+	evlist__for_each_entry(evsel->evlist, pos) {
+		nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
+
+		evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
+
+		/*
+		 * Since fds for next evsel has not been created,
+		 * there is no need to iterate whole event list.
+		 */
+		if (pos == evsel)
+			break;
+	}
+	return 0;
+}
+
+static bool evsel__ignore_missing_thread(struct evsel *evsel,
+					 int nr_cpus, int cpu_map_idx,
+					 struct perf_thread_map *threads,
+					 int thread, int err)
+{
+	pid_t ignore_pid = perf_thread_map__pid(threads, thread);
+
+	if (!evsel->ignore_missing_thread)
+		return false;
+
+	/* The system wide setup does not work with threads. */
+	if (evsel->core.system_wide)
+		return false;
+
+	/* The -ESRCH is perf event syscall errno for pid's not found. */
+	if (err != -ESRCH)
+		return false;
+
+	/* If there's only one thread, let it fail. */
+	if (threads->nr == 1)
+		return false;
+
+	/*
+	 * We should remove fd for missing_thread first
+	 * because thread_map__remove() will decrease threads->nr.
+	 */
+	if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
+		return false;
+
+	if (thread_map__remove(threads, thread))
+		return false;
+
+	pr_warning("WARNING: Ignored open failure for pid %d\n",
+		   ignore_pid);
+	return true;
+}
+
+static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
+				void *priv __maybe_unused)
+{
+	return fprintf(fp, "  %-32s %s\n", name, val);
+}
+
+static void display_attr(struct perf_event_attr *attr)
+{
+	if (verbose >= 2 || debug_peo_args) {
+		fprintf(stderr, "%.60s\n", graph_dotted_line);
+		fprintf(stderr, "perf_event_attr:\n");
+		perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
+		fprintf(stderr, "%.60s\n", graph_dotted_line);
+	}
+}
+
+bool evsel__precise_ip_fallback(struct evsel *evsel)
+{
+	/* Do not try less precise if not requested. */
+	if (!evsel->precise_max)
+		return false;
+
+	/*
+	 * We tried all the precise_ip values, and it's
+	 * still failing, so leave it to standard fallback.
+	 */
+	if (!evsel->core.attr.precise_ip) {
+		evsel->core.attr.precise_ip = evsel->precise_ip_original;
+		return false;
+	}
+
+	if (!evsel->precise_ip_original)
+		evsel->precise_ip_original = evsel->core.attr.precise_ip;
+
+	evsel->core.attr.precise_ip--;
+	pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
+	display_attr(&evsel->core.attr);
+	return true;
+}
+
+static struct perf_cpu_map *empty_cpu_map;
+static struct perf_thread_map *empty_thread_map;
+
+static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads)
+{
+	int nthreads = perf_thread_map__nr(threads);
+
+	if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
+	    (perf_missing_features.aux_output     && evsel->core.attr.aux_output))
+		return -EINVAL;
+
+	if (cpus == NULL) {
+		if (empty_cpu_map == NULL) {
+			empty_cpu_map = perf_cpu_map__dummy_new();
+			if (empty_cpu_map == NULL)
+				return -ENOMEM;
+		}
+
+		cpus = empty_cpu_map;
+	}
+
+	if (threads == NULL) {
+		if (empty_thread_map == NULL) {
+			empty_thread_map = thread_map__new_by_tid(-1);
+			if (empty_thread_map == NULL)
+				return -ENOMEM;
+		}
+
+		threads = empty_thread_map;
+	}
+
+	if (evsel->core.fd == NULL &&
+	    perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
+		return -ENOMEM;
+
+	evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
+	if (evsel->cgrp)
+		evsel->open_flags |= PERF_FLAG_PID_CGROUP;
+
+	return 0;
+}
+
+static void evsel__disable_missing_features(struct evsel *evsel)
+{
+	if (perf_missing_features.read_lost)
+		evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
+	if (perf_missing_features.weight_struct) {
+		evsel__set_sample_bit(evsel, WEIGHT);
+		evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
+	}
+	if (perf_missing_features.clockid_wrong)
+		evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
+	if (perf_missing_features.clockid) {
+		evsel->core.attr.use_clockid = 0;
+		evsel->core.attr.clockid = 0;
+	}
+	if (perf_missing_features.cloexec)
+		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
+	if (perf_missing_features.mmap2)
+		evsel->core.attr.mmap2 = 0;
+	if (evsel->pmu && evsel->pmu->missing_features.exclude_guest)
+		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
+	if (perf_missing_features.lbr_flags)
+		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
+				     PERF_SAMPLE_BRANCH_NO_CYCLES);
+	if (perf_missing_features.group_read && evsel->core.attr.inherit)
+		evsel->core.attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
+	if (perf_missing_features.ksymbol)
+		evsel->core.attr.ksymbol = 0;
+	if (perf_missing_features.bpf)
+		evsel->core.attr.bpf_event = 0;
+	if (perf_missing_features.branch_hw_idx)
+		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
+	if (perf_missing_features.sample_id_all)
+		evsel->core.attr.sample_id_all = 0;
+}
+
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+			struct perf_thread_map *threads)
+{
+	int err;
+
+	err = __evsel__prepare_open(evsel, cpus, threads);
+	if (err)
+		return err;
+
+	evsel__disable_missing_features(evsel);
+
+	return err;
+}
+
+bool evsel__detect_missing_features(struct evsel *evsel)
+{
+	/*
+	 * Must probe features in the order they were added to the
+	 * perf_event_attr interface.
+	 */
+	if (!perf_missing_features.read_lost &&
+	    (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
+		perf_missing_features.read_lost = true;
+		pr_debug2("switching off PERF_FORMAT_LOST support\n");
+		return true;
+	} else if (!perf_missing_features.weight_struct &&
+	    (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
+		perf_missing_features.weight_struct = true;
+		pr_debug2("switching off weight struct support\n");
+		return true;
+	} else if (!perf_missing_features.code_page_size &&
+	    (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
+		perf_missing_features.code_page_size = true;
+		pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
+		return false;
+	} else if (!perf_missing_features.data_page_size &&
+	    (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
+		perf_missing_features.data_page_size = true;
+		pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
+		return false;
+	} else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
+		perf_missing_features.cgroup = true;
+		pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
+		return false;
+	} else if (!perf_missing_features.branch_hw_idx &&
+	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
+		perf_missing_features.branch_hw_idx = true;
+		pr_debug2("switching off branch HW index support\n");
+		return true;
+	} else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
+		perf_missing_features.aux_output = true;
+		pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
+		return false;
+	} else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
+		perf_missing_features.bpf = true;
+		pr_debug2_peo("switching off bpf_event\n");
+		return true;
+	} else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
+		perf_missing_features.ksymbol = true;
+		pr_debug2_peo("switching off ksymbol\n");
+		return true;
+	} else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
+		perf_missing_features.write_backward = true;
+		pr_debug2_peo("switching off write_backward\n");
+		return false;
+	} else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
+		perf_missing_features.clockid_wrong = true;
+		pr_debug2_peo("switching off clockid\n");
+		return true;
+	} else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
+		perf_missing_features.clockid = true;
+		pr_debug2_peo("switching off use_clockid\n");
+		return true;
+	} else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) {
+		perf_missing_features.cloexec = true;
+		pr_debug2_peo("switching off cloexec flag\n");
+		return true;
+	} else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
+		perf_missing_features.mmap2 = true;
+		pr_debug2_peo("switching off mmap2\n");
+		return true;
+	} else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
+		   (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) {
+		if (evsel->pmu == NULL) {
+			evsel->pmu = evsel__find_pmu(evsel);
+			if (evsel->pmu)
+				evsel->pmu->missing_features.exclude_guest = true;
+			else {
+				/* we cannot find PMU, disable attrs now */
+				evsel->core.attr.exclude_host = false;
+				evsel->core.attr.exclude_guest = false;
+			}
+		}
+
+		if (evsel->exclude_GH) {
+			pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n");
+			return false;
+		}
+		if (!perf_missing_features.exclude_guest) {
+			perf_missing_features.exclude_guest = true;
+			pr_debug2_peo("switching off exclude_guest, exclude_host\n");
+		}
+		return true;
+	} else if (!perf_missing_features.sample_id_all) {
+		perf_missing_features.sample_id_all = true;
+		pr_debug2_peo("switching off sample_id_all\n");
+		return true;
+	} else if (!perf_missing_features.lbr_flags &&
+			(evsel->core.attr.branch_sample_type &
+			 (PERF_SAMPLE_BRANCH_NO_CYCLES |
+			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
+		perf_missing_features.lbr_flags = true;
+		pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
+		return true;
+	} else if (!perf_missing_features.group_read &&
+		    evsel->core.attr.inherit &&
+		   (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
+		   evsel__is_group_leader(evsel)) {
+		perf_missing_features.group_read = true;
+		pr_debug2_peo("switching off group read\n");
+		return true;
+	} else {
+		return false;
+	}
+}
+
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
+{
+	int old_errno;
+	struct rlimit l;
+
+	if (*set_rlimit < INCREASED_MAX) {
+		old_errno = errno;
+
+		if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+			if (*set_rlimit == NO_CHANGE) {
+				l.rlim_cur = l.rlim_max;
+			} else {
+				l.rlim_cur = l.rlim_max + 1000;
+				l.rlim_max = l.rlim_cur;
+			}
+			if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
+				(*set_rlimit) += 1;
+				errno = old_errno;
+				return true;
+			}
+		}
+		errno = old_errno;
+	}
+
+	return false;
+}
+
+static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads,
+		int start_cpu_map_idx, int end_cpu_map_idx)
+{
+	int idx, thread, nthreads;
+	int pid = -1, err, old_errno;
+	enum rlimit_action set_rlimit = NO_CHANGE;
+
+	err = __evsel__prepare_open(evsel, cpus, threads);
+	if (err)
+		return err;
+
+	if (cpus == NULL)
+		cpus = empty_cpu_map;
+
+	if (threads == NULL)
+		threads = empty_thread_map;
+
+	nthreads = perf_thread_map__nr(threads);
+
+	if (evsel->cgrp)
+		pid = evsel->cgrp->fd;
+
+fallback_missing_features:
+	evsel__disable_missing_features(evsel);
+
+	display_attr(&evsel->core.attr);
+
+	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
+
+		for (thread = 0; thread < nthreads; thread++) {
+			int fd, group_fd;
+retry_open:
+			if (thread >= nthreads)
+				break;
+
+			if (!evsel->cgrp && !evsel->core.system_wide)
+				pid = perf_thread_map__pid(threads, thread);
+
+			group_fd = get_group_fd(evsel, idx, thread);
+
+			test_attr__ready();
+
+			/* Debug message used by test scripts */
+			pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
+				pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
+
+			fd = sys_perf_event_open(&evsel->core.attr, pid,
+						perf_cpu_map__cpu(cpus, idx).cpu,
+						group_fd, evsel->open_flags);
+
+			FD(evsel, idx, thread) = fd;
+
+			if (fd < 0) {
+				err = -errno;
+
+				pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
+					  err);
+				goto try_fallback;
+			}
+
+			bpf_counter__install_pe(evsel, idx, fd);
+
+			if (unlikely(test_attr__enabled)) {
+				test_attr__open(&evsel->core.attr, pid,
+						perf_cpu_map__cpu(cpus, idx),
+						fd, group_fd, evsel->open_flags);
+			}
+
+			/* Debug message used by test scripts */
+			pr_debug2_peo(" = %d\n", fd);
+
+			if (evsel->bpf_fd >= 0) {
+				int evt_fd = fd;
+				int bpf_fd = evsel->bpf_fd;
+
+				err = ioctl(evt_fd,
+					    PERF_EVENT_IOC_SET_BPF,
+					    bpf_fd);
+				if (err && errno != EEXIST) {
+					pr_err("failed to attach bpf fd %d: %s\n",
+					       bpf_fd, strerror(errno));
+					err = -EINVAL;
+					goto out_close;
+				}
+			}
+
+			set_rlimit = NO_CHANGE;
+
+			/*
+			 * If we succeeded but had to kill clockid, fail and
+			 * have evsel__open_strerror() print us a nice error.
+			 */
+			if (perf_missing_features.clockid ||
+			    perf_missing_features.clockid_wrong) {
+				err = -EINVAL;
+				goto out_close;
+			}
+		}
+	}
+
+	return 0;
+
+try_fallback:
+	if (evsel__precise_ip_fallback(evsel))
+		goto retry_open;
+
+	if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
+					 idx, threads, thread, err)) {
+		/* We just removed 1 thread, so lower the upper nthreads limit. */
+		nthreads--;
+
+		/* ... and pretend like nothing have happened. */
+		err = 0;
+		goto retry_open;
+	}
+	/*
+	 * perf stat needs between 5 and 22 fds per CPU. When we run out
+	 * of them try to increase the limits.
+	 */
+	if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
+		goto retry_open;
+
+	if (err != -EINVAL || idx > 0 || thread > 0)
+		goto out_close;
+
+	if (evsel__detect_missing_features(evsel))
+		goto fallback_missing_features;
+out_close:
+	if (err)
+		threads->err_thread = thread;
+
+	old_errno = errno;
+	do {
+		while (--thread >= 0) {
+			if (FD(evsel, idx, thread) >= 0)
+				close(FD(evsel, idx, thread));
+			FD(evsel, idx, thread) = -1;
+		}
+		thread = nthreads;
+	} while (--idx >= 0);
+	errno = old_errno;
+	return err;
+}
+
+int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads)
+{
+	return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
+}
+
+void evsel__close(struct evsel *evsel)
+{
+	perf_evsel__close(&evsel->core);
+	perf_evsel__free_id(&evsel->core);
+}
+
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
+{
+	if (cpu_map_idx == -1)
+		return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
+
+	return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
+}
+
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
+{
+	return evsel__open(evsel, NULL, threads);
+}
+
+static int perf_evsel__parse_id_sample(const struct evsel *evsel,
+				       const union perf_event *event,
+				       struct perf_sample *sample)
+{
+	u64 type = evsel->core.attr.sample_type;
+	const __u64 *array = event->sample.array;
+	bool swapped = evsel->needs_swap;
+	union u64_swap u;
+
+	array += ((event->header.size -
+		   sizeof(event->header)) / sizeof(u64)) - 1;
+
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		sample->id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+		}
+
+		sample->cpu = u.val32[0];
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		sample->stream_id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		sample->id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		sample->time = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+			u.val32[1] = bswap_32(u.val32[1]);
+		}
+
+		sample->pid = u.val32[0];
+		sample->tid = u.val32[1];
+		array--;
+	}
+
+	return 0;
+}
+
+static inline bool overflow(const void *endp, u16 max_size, const void *offset,
+			    u64 size)
+{
+	return size > max_size || offset + size > endp;
+}
+
+#define OVERFLOW_CHECK(offset, size, max_size)				\
+	do {								\
+		if (overflow(endp, (max_size), (offset), (size)))	\
+			return -EFAULT;					\
+	} while (0)
+
+#define OVERFLOW_CHECK_u64(offset) \
+	OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
+
+static int
+perf_event__check_size(union perf_event *event, unsigned int sample_size)
+{
+	/*
+	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
+	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
+	 * check the format does not go past the end of the event.
+	 */
+	if (sample_size + sizeof(event->header) > event->header.size)
+		return -EFAULT;
+
+	return 0;
+}
+
+void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
+					  const __u64 *array,
+					  u64 type __maybe_unused)
+{
+	data->weight = *array;
+}
+
+u64 evsel__bitfield_swap_branch_flags(u64 value)
+{
+	u64 new_val = 0;
+
+	/*
+	 * branch_flags
+	 * union {
+	 * 	u64 values;
+	 * 	struct {
+	 * 		mispred:1	//target mispredicted
+	 * 		predicted:1	//target predicted
+	 * 		in_tx:1		//in transaction
+	 * 		abort:1		//transaction abort
+	 * 		cycles:16	//cycle count to last branch
+	 * 		type:4		//branch type
+	 * 		reserved:40
+	 * 	}
+	 * }
+	 *
+	 * Avoid bswap64() the entire branch_flag.value,
+	 * as it has variable bit-field sizes. Instead the
+	 * macro takes the bit-field position/size,
+	 * swaps it based on the host endianness.
+	 *
+	 * tep_is_bigendian() is used here instead of
+	 * bigendian() to avoid python test fails.
+	 */
+	if (tep_is_bigendian()) {
+		new_val = bitfield_swap(value, 0, 1);
+		new_val |= bitfield_swap(value, 1, 1);
+		new_val |= bitfield_swap(value, 2, 1);
+		new_val |= bitfield_swap(value, 3, 1);
+		new_val |= bitfield_swap(value, 4, 16);
+		new_val |= bitfield_swap(value, 20, 4);
+		new_val |= bitfield_swap(value, 24, 40);
+	} else {
+		new_val = bitfield_swap(value, 63, 1);
+		new_val |= bitfield_swap(value, 62, 1);
+		new_val |= bitfield_swap(value, 61, 1);
+		new_val |= bitfield_swap(value, 60, 1);
+		new_val |= bitfield_swap(value, 44, 16);
+		new_val |= bitfield_swap(value, 40, 4);
+		new_val |= bitfield_swap(value, 0, 40);
+	}
+
+	return new_val;
+}
+
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+			struct perf_sample *data)
+{
+	u64 type = evsel->core.attr.sample_type;
+	bool swapped = evsel->needs_swap;
+	const __u64 *array;
+	u16 max_size = event->header.size;
+	const void *endp = (void *)event + max_size;
+	u64 sz;
+
+	/*
+	 * used for cross-endian analysis. See git commit 65014ab3
+	 * for why this goofiness is needed.
+	 */
+	union u64_swap u;
+
+	memset(data, 0, sizeof(*data));
+	data->cpu = data->pid = data->tid = -1;
+	data->stream_id = data->id = data->time = -1ULL;
+	data->period = evsel->core.attr.sample_period;
+	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	data->misc    = event->header.misc;
+	data->id = -1ULL;
+	data->data_src = PERF_MEM_DATA_SRC_NONE;
+	data->vcpu = -1;
+
+	if (event->header.type != PERF_RECORD_SAMPLE) {
+		if (!evsel->core.attr.sample_id_all)
+			return 0;
+		return perf_evsel__parse_id_sample(evsel, event, data);
+	}
+
+	array = event->sample.array;
+
+	if (perf_event__check_size(event, evsel->sample_size))
+		return -EFAULT;
+
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		data->id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_IP) {
+		data->ip = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+			u.val32[1] = bswap_32(u.val32[1]);
+		}
+
+		data->pid = u.val32[0];
+		data->tid = u.val32[1];
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		data->time = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ADDR) {
+		data->addr = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		data->id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+		}
+
+		data->cpu = u.val32[0];
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		data->period = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_READ) {
+		u64 read_format = evsel->core.attr.read_format;
+
+		OVERFLOW_CHECK_u64(array);
+		if (read_format & PERF_FORMAT_GROUP)
+			data->read.group.nr = *array;
+		else
+			data->read.one.value = *array;
+
+		array++;
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			OVERFLOW_CHECK_u64(array);
+			data->read.time_enabled = *array;
+			array++;
+		}
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			OVERFLOW_CHECK_u64(array);
+			data->read.time_running = *array;
+			array++;
+		}
+
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			const u64 max_group_nr = UINT64_MAX /
+					sizeof(struct sample_read_value);
+
+			if (data->read.group.nr > max_group_nr)
+				return -EFAULT;
+
+			sz = data->read.group.nr * sample_read_value_size(read_format);
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->read.group.values =
+					(struct sample_read_value *)array;
+			array = (void *)array + sz;
+		} else {
+			OVERFLOW_CHECK_u64(array);
+			data->read.one.id = *array;
+			array++;
+
+			if (read_format & PERF_FORMAT_LOST) {
+				OVERFLOW_CHECK_u64(array);
+				data->read.one.lost = *array;
+				array++;
+			}
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+		OVERFLOW_CHECK_u64(array);
+		data->callchain = (struct ip_callchain *)array++;
+		if (data->callchain->nr > max_callchain_nr)
+			return -EFAULT;
+		sz = data->callchain->nr * sizeof(u64);
+		OVERFLOW_CHECK(array, sz, max_size);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		OVERFLOW_CHECK_u64(array);
+		u.val64 = *array;
+
+		/*
+		 * Undo swap of u64, then swap on individual u32s,
+		 * get the size of the raw area and undo all of the
+		 * swap. The pevent interface handles endianness by
+		 * itself.
+		 */
+		if (swapped) {
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+			u.val32[1] = bswap_32(u.val32[1]);
+		}
+		data->raw_size = u.val32[0];
+
+		/*
+		 * The raw data is aligned on 64bits including the
+		 * u32 size, so it's safe to use mem_bswap_64.
+		 */
+		if (swapped)
+			mem_bswap_64((void *) array, data->raw_size);
+
+		array = (void *)array + sizeof(u32);
+
+		OVERFLOW_CHECK(array, data->raw_size, max_size);
+		data->raw_data = (void *)array;
+		array = (void *)array + data->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		const u64 max_branch_nr = UINT64_MAX /
+					  sizeof(struct branch_entry);
+		struct branch_entry *e;
+		unsigned int i;
+
+		OVERFLOW_CHECK_u64(array);
+		data->branch_stack = (struct branch_stack *)array++;
+
+		if (data->branch_stack->nr > max_branch_nr)
+			return -EFAULT;
+
+		sz = data->branch_stack->nr * sizeof(struct branch_entry);
+		if (evsel__has_branch_hw_idx(evsel)) {
+			sz += sizeof(u64);
+			e = &data->branch_stack->entries[0];
+		} else {
+			data->no_hw_idx = true;
+			/*
+			 * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
+			 * only nr and entries[] will be output by kernel.
+			 */
+			e = (struct branch_entry *)&data->branch_stack->hw_idx;
+		}
+
+		if (swapped) {
+			/*
+			 * struct branch_flag does not have endian
+			 * specific bit field definition. And bswap
+			 * will not resolve the issue, since these
+			 * are bit fields.
+			 *
+			 * evsel__bitfield_swap_branch_flags() uses a
+			 * bitfield_swap macro to swap the bit position
+			 * based on the host endians.
+			 */
+			for (i = 0; i < data->branch_stack->nr; i++, e++)
+				e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
+		}
+
+		OVERFLOW_CHECK(array, sz, max_size);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		OVERFLOW_CHECK_u64(array);
+		data->user_regs.abi = *array;
+		array++;
+
+		if (data->user_regs.abi) {
+			u64 mask = evsel->core.attr.sample_regs_user;
+
+			sz = hweight64(mask) * sizeof(u64);
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->user_regs.mask = mask;
+			data->user_regs.regs = (u64 *)array;
+			array = (void *)array + sz;
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		OVERFLOW_CHECK_u64(array);
+		sz = *array++;
+
+		data->user_stack.offset = ((char *)(array - 1)
+					  - (char *) event);
+
+		if (!sz) {
+			data->user_stack.size = 0;
+		} else {
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->user_stack.data = (char *)array;
+			array = (void *)array + sz;
+			OVERFLOW_CHECK_u64(array);
+			data->user_stack.size = *array++;
+			if (WARN_ONCE(data->user_stack.size > sz,
+				      "user stack dump failure\n"))
+				return -EFAULT;
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
+		OVERFLOW_CHECK_u64(array);
+		arch_perf_parse_sample_weight(data, array, type);
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		OVERFLOW_CHECK_u64(array);
+		data->data_src = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		OVERFLOW_CHECK_u64(array);
+		data->transaction = *array;
+		array++;
+	}
+
+	data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
+	if (type & PERF_SAMPLE_REGS_INTR) {
+		OVERFLOW_CHECK_u64(array);
+		data->intr_regs.abi = *array;
+		array++;
+
+		if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+			u64 mask = evsel->core.attr.sample_regs_intr;
+
+			sz = hweight64(mask) * sizeof(u64);
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->intr_regs.mask = mask;
+			data->intr_regs.regs = (u64 *)array;
+			array = (void *)array + sz;
+		}
+	}
+
+	data->phys_addr = 0;
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		data->phys_addr = *array;
+		array++;
+	}
+
+	data->cgroup = 0;
+	if (type & PERF_SAMPLE_CGROUP) {
+		data->cgroup = *array;
+		array++;
+	}
+
+	data->data_page_size = 0;
+	if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+		data->data_page_size = *array;
+		array++;
+	}
+
+	data->code_page_size = 0;
+	if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
+		data->code_page_size = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_AUX) {
+		OVERFLOW_CHECK_u64(array);
+		sz = *array++;
+
+		OVERFLOW_CHECK(array, sz, max_size);
+		/* Undo swap of data */
+		if (swapped)
+			mem_bswap_64((char *)array, sz);
+		data->aux_sample.size = sz;
+		data->aux_sample.data = (char *)array;
+		array = (void *)array + sz;
+	}
+
+	return 0;
+}
+
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+				  u64 *timestamp)
+{
+	u64 type = evsel->core.attr.sample_type;
+	const __u64 *array;
+
+	if (!(type & PERF_SAMPLE_TIME))
+		return -1;
+
+	if (event->header.type != PERF_RECORD_SAMPLE) {
+		struct perf_sample data = {
+			.time = -1ULL,
+		};
+
+		if (!evsel->core.attr.sample_id_all)
+			return -1;
+		if (perf_evsel__parse_id_sample(evsel, event, &data))
+			return -1;
+
+		*timestamp = data.time;
+		return 0;
+	}
+
+	array = event->sample.array;
+
+	if (perf_event__check_size(event, evsel->sample_size))
+		return -EFAULT;
+
+	if (type & PERF_SAMPLE_IDENTIFIER)
+		array++;
+
+	if (type & PERF_SAMPLE_IP)
+		array++;
+
+	if (type & PERF_SAMPLE_TID)
+		array++;
+
+	if (type & PERF_SAMPLE_TIME)
+		*timestamp = *array;
+
+	return 0;
+}
+
+u16 evsel__id_hdr_size(struct evsel *evsel)
+{
+	u64 sample_type = evsel->core.attr.sample_type;
+	u16 size = 0;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		size += sizeof(u64);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		size += sizeof(u64);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		size += sizeof(u64);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		size += sizeof(u64);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		size += sizeof(u64);
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		size += sizeof(u64);
+
+	return size;
+}
+
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
+{
+	return tep_find_field(evsel->tp_format, name);
+}
+
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
+{
+	struct tep_format_field *field = evsel__field(evsel, name);
+	int offset;
+
+	if (!field)
+		return NULL;
+
+	offset = field->offset;
+
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+		offset = *(int *)(sample->raw_data + field->offset);
+		offset &= 0xffff;
+		if (field->flags & TEP_FIELD_IS_RELATIVE)
+			offset += field->offset + field->size;
+	}
+
+	return sample->raw_data + offset;
+}
+
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
+			 bool needs_swap)
+{
+	u64 value;
+	void *ptr = sample->raw_data + field->offset;
+
+	switch (field->size) {
+	case 1:
+		return *(u8 *)ptr;
+	case 2:
+		value = *(u16 *)ptr;
+		break;
+	case 4:
+		value = *(u32 *)ptr;
+		break;
+	case 8:
+		memcpy(&value, ptr, sizeof(u64));
+		break;
+	default:
+		return 0;
+	}
+
+	if (!needs_swap)
+		return value;
+
+	switch (field->size) {
+	case 2:
+		return bswap_16(value);
+	case 4:
+		return bswap_32(value);
+	case 8:
+		return bswap_64(value);
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name)
+{
+	struct tep_format_field *field = evsel__field(evsel, name);
+
+	if (!field)
+		return 0;
+
+	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
+{
+	int paranoid;
+
+	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
+	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
+	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+		/*
+		 * If it's cycles then fall back to hrtimer based
+		 * cpu-clock-tick sw counter, which is always available even if
+		 * no PMU support.
+		 *
+		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
+		 * b0a873e).
+		 */
+		scnprintf(msg, msgsize, "%s",
+"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
+
+		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
+		evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK;
+
+		zfree(&evsel->name);
+		return true;
+	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
+		   (paranoid = perf_event_paranoid()) > 1) {
+		const char *name = evsel__name(evsel);
+		char *new_name;
+		const char *sep = ":";
+
+		/* If event has exclude user then don't exclude kernel. */
+		if (evsel->core.attr.exclude_user)
+			return false;
+
+		/* Is there already the separator in the name. */
+		if (strchr(name, '/') ||
+		    (strchr(name, ':') && !evsel->is_libpfm_event))
+			sep = "";
+
+		if (asprintf(&new_name, "%s%su", name, sep) < 0)
+			return false;
+
+		if (evsel->name)
+			free(evsel->name);
+		evsel->name = new_name;
+		scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
+			  "to fall back to excluding kernel and hypervisor "
+			  " samples", paranoid);
+		evsel->core.attr.exclude_kernel = 1;
+		evsel->core.attr.exclude_hv     = 1;
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool find_process(const char *name)
+{
+	size_t len = strlen(name);
+	DIR *dir;
+	struct dirent *d;
+	int ret = -1;
+
+	dir = opendir(procfs__mountpoint());
+	if (!dir)
+		return false;
+
+	/* Walk through the directory. */
+	while (ret && (d = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+		char *data;
+		size_t size;
+
+		if ((d->d_type != DT_DIR) ||
+		     !strcmp(".", d->d_name) ||
+		     !strcmp("..", d->d_name))
+			continue;
+
+		scnprintf(path, sizeof(path), "%s/%s/comm",
+			  procfs__mountpoint(), d->d_name);
+
+		if (filename__read_str(path, &data, &size))
+			continue;
+
+		ret = strncmp(name, data, len);
+		free(data);
+	}
+
+	closedir(dir);
+	return ret ? false : true;
+}
+
+static bool is_amd(const char *arch, const char *cpuid)
+{
+	return arch && !strcmp("x86", arch) && cpuid && strstarts(cpuid, "AuthenticAMD");
+}
+
+static bool is_amd_ibs(struct evsel *evsel)
+{
+	return evsel->core.attr.precise_ip
+	    || (evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3));
+}
+
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+			 int err, char *msg, size_t size)
+{
+	struct perf_env *env = evsel__env(evsel);
+	const char *arch = perf_env__arch(env);
+	const char *cpuid = perf_env__cpuid(env);
+	char sbuf[STRERR_BUFSIZE];
+	int printed = 0, enforced = 0;
+
+	switch (err) {
+	case EPERM:
+	case EACCES:
+		printed += scnprintf(msg + printed, size - printed,
+			"Access to performance monitoring and observability operations is limited.\n");
+
+		if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
+			if (enforced) {
+				printed += scnprintf(msg + printed, size - printed,
+					"Enforced MAC policy settings (SELinux) can limit access to performance\n"
+					"monitoring and observability operations. Inspect system audit records for\n"
+					"more perf_event access control information and adjusting the policy.\n");
+			}
+		}
+
+		if (err == EPERM)
+			printed += scnprintf(msg, size,
+				"No permission to enable %s event.\n\n", evsel__name(evsel));
+
+		return scnprintf(msg + printed, size - printed,
+		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
+		 "access to performance monitoring and observability operations for processes\n"
+		 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
+		 "More information can be found at 'Perf events and tool security' document:\n"
+		 "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
+		 "perf_event_paranoid setting is %d:\n"
+		 "  -1: Allow use of (almost) all events by all users\n"
+		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
+		 ">= 0: Disallow raw and ftrace function tracepoint access\n"
+		 ">= 1: Disallow CPU event access\n"
+		 ">= 2: Disallow kernel profiling\n"
+		 "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
+		 "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
+		 perf_event_paranoid());
+	case ENOENT:
+		return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
+	case EMFILE:
+		return scnprintf(msg, size, "%s",
+			 "Too many events are opened.\n"
+			 "Probably the maximum number of open file descriptors has been reached.\n"
+			 "Hint: Try again after reducing the number of events.\n"
+			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
+	case ENOMEM:
+		if (evsel__has_callchain(evsel) &&
+		    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
+			return scnprintf(msg, size,
+					 "Not enough memory to setup event with callchain.\n"
+					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
+					 "Hint: Current value: %d", sysctl__max_stack());
+		break;
+	case ENODEV:
+		if (target->cpu_list)
+			return scnprintf(msg, size, "%s",
+	 "No such device - did you specify an out-of-range profile CPU?");
+		break;
+	case EOPNOTSUPP:
+		if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+			return scnprintf(msg, size,
+	"%s: PMU Hardware or event type doesn't support branch stack sampling.",
+					 evsel__name(evsel));
+		if (evsel->core.attr.aux_output)
+			return scnprintf(msg, size,
+	"%s: PMU Hardware doesn't support 'aux_output' feature",
+					 evsel__name(evsel));
+		if (evsel->core.attr.sample_period != 0)
+			return scnprintf(msg, size,
+	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
+					 evsel__name(evsel));
+		if (evsel->core.attr.precise_ip)
+			return scnprintf(msg, size, "%s",
+	"\'precise\' request may not be supported. Try removing 'p' modifier.");
+#if defined(__i386__) || defined(__x86_64__)
+		if (evsel->core.attr.type == PERF_TYPE_HARDWARE)
+			return scnprintf(msg, size, "%s",
+	"No hardware sampling interrupt available.\n");
+#endif
+		break;
+	case EBUSY:
+		if (find_process("oprofiled"))
+			return scnprintf(msg, size,
+	"The PMU counters are busy/taken by another profiler.\n"
+	"We found oprofile daemon running, please stop it and try again.");
+		break;
+	case EINVAL:
+		if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
+			return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel.");
+		if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
+			return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
+		if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
+			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
+		if (perf_missing_features.clockid)
+			return scnprintf(msg, size, "clockid feature not supported.");
+		if (perf_missing_features.clockid_wrong)
+			return scnprintf(msg, size, "wrong clockid (%d).", clockid);
+		if (perf_missing_features.aux_output)
+			return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
+		if (!target__has_cpu(target))
+			return scnprintf(msg, size,
+	"Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
+					evsel__name(evsel));
+		if (is_amd(arch, cpuid)) {
+			if (is_amd_ibs(evsel)) {
+				if (evsel->core.attr.exclude_kernel)
+					return scnprintf(msg, size,
+	"AMD IBS can't exclude kernel events.  Try running at a higher privilege level.");
+				if (!evsel->core.system_wide)
+					return scnprintf(msg, size,
+	"AMD IBS may only be available in system-wide/per-cpu mode.  Try using -a, or -C and workload affinity");
+			}
+		}
+
+		break;
+	case ENODATA:
+		return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
+				 "Please add an auxiliary event in front of the load latency event.");
+	default:
+		break;
+	}
+
+	return scnprintf(msg, size,
+	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
+	"/bin/dmesg | grep -i perf may provide additional information.\n",
+			 err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
+}
+
+struct perf_env *evsel__env(struct evsel *evsel)
+{
+	if (evsel && evsel->evlist && evsel->evlist->env)
+		return evsel->evlist->env;
+	return &perf_env;
+}
+
+static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
+{
+	int cpu_map_idx, thread;
+
+	for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
+		for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
+		     thread++) {
+			int fd = FD(evsel, cpu_map_idx, thread);
+
+			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
+						   cpu_map_idx, thread, fd) < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
+{
+	struct perf_cpu_map *cpus = evsel->core.cpus;
+	struct perf_thread_map *threads = evsel->core.threads;
+
+	if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
+		return -ENOMEM;
+
+	return store_evsel_ids(evsel, evlist);
+}
+
+void evsel__zero_per_pkg(struct evsel *evsel)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	if (evsel->per_pkg_mask) {
+		hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
+			free((char *)cur->key);
+
+		hashmap__clear(evsel->per_pkg_mask);
+	}
+}
+
+bool evsel__is_hybrid(struct evsel *evsel)
+{
+	return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
+}
+
+struct evsel *evsel__leader(struct evsel *evsel)
+{
+	return container_of(evsel->core.leader, struct evsel, core);
+}
+
+bool evsel__has_leader(struct evsel *evsel, struct evsel *leader)
+{
+	return evsel->core.leader == &leader->core;
+}
+
+bool evsel__is_leader(struct evsel *evsel)
+{
+	return evsel__has_leader(evsel, evsel);
+}
+
+void evsel__set_leader(struct evsel *evsel, struct evsel *leader)
+{
+	evsel->core.leader = &leader->core;
+}
+
+int evsel__source_count(const struct evsel *evsel)
+{
+	struct evsel *pos;
+	int count = 0;
+
+	evlist__for_each_entry(evsel->evlist, pos) {
+		if (pos->metric_leader == evsel)
+			count++;
+	}
+	return count;
+}
+
+bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused)
+{
+	return false;
+}
+
+/*
+ * Remove an event from a given group (leader).
+ * Some events, e.g., perf metrics Topdown events,
+ * must always be grouped. Ignore the events.
+ */
+void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader)
+{
+	if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) {
+		evsel__set_leader(evsel, evsel);
+		evsel->core.nr_members = 0;
+		leader->core.nr_members--;
+	}
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
new file mode 100644
index 000000000..8ce30329a
--- /dev/null
+++ b/tools/perf/util/evsel.h
@@ -0,0 +1,528 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVSEL_H
+#define __PERF_EVSEL_H 1
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <internal/evsel.h>
+#include <perf/evsel.h>
+#include "symbol_conf.h"
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
+
+struct bpf_object;
+struct cgroup;
+struct perf_counts;
+struct perf_stat_evsel;
+union perf_event;
+struct bpf_counter_ops;
+struct target;
+struct hashmap;
+struct bperf_leader_bpf;
+struct bperf_follower_bpf;
+struct perf_pmu;
+
+typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
+
+enum perf_tool_event {
+	PERF_TOOL_NONE		= 0,
+	PERF_TOOL_DURATION_TIME = 1,
+	PERF_TOOL_USER_TIME = 2,
+	PERF_TOOL_SYSTEM_TIME = 3,
+
+	PERF_TOOL_MAX,
+};
+
+const char *perf_tool_event__to_str(enum perf_tool_event ev);
+enum perf_tool_event perf_tool_event__from_str(const char *str);
+
+#define perf_tool_event__for_each_event(ev)		\
+	for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++)
+
+/** struct evsel - event selector
+ *
+ * @evlist - evlist this evsel is in, if it is in one.
+ * @core - libperf evsel object
+ * @name - Can be set to retain the original event name passed by the user,
+ *         so that when showing results in tools such as 'perf stat', we
+ *         show the name used, not some alias.
+ * @id_pos: the position of the event id (PERF_SAMPLE_ID or
+ *          PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of
+ *          struct perf_record_sample
+ * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or
+ *          PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
+ *          is used there is an id sample appended to non-sample events
+ * @priv:   And what is in its containing unnamed union are tool specific
+ */
+struct evsel {
+	struct perf_evsel	core;
+	struct evlist		*evlist;
+	off_t			id_offset;
+	int			id_pos;
+	int			is_pos;
+	unsigned int		sample_size;
+
+	/*
+	 * These fields can be set in the parse-events code or similar.
+	 * Please check evsel__clone() to copy them properly so that
+	 * they can be released properly.
+	 */
+	struct {
+		char			*name;
+		char			*group_name;
+		const char		*pmu_name;
+		struct tep_event	*tp_format;
+		char			*filter;
+		unsigned long		max_events;
+		double			scale;
+		const char		*unit;
+		struct cgroup		*cgrp;
+		const char		*metric_id;
+		enum perf_tool_event	tool_event;
+		/* parse modifier helper */
+		int			exclude_GH;
+		int			sample_read;
+		bool			snapshot;
+		bool			per_pkg;
+		bool			percore;
+		bool			precise_max;
+		bool			use_uncore_alias;
+		bool			is_libpfm_event;
+		bool			auto_merge_stats;
+		bool			collect_stat;
+		bool			weak_group;
+		bool			bpf_counter;
+		bool			use_config_name;
+		int			bpf_fd;
+		struct bpf_object	*bpf_obj;
+		struct list_head	config_terms;
+	};
+
+	/*
+	 * metric fields are similar, but needs more care as they can have
+	 * references to other metric (evsel).
+	 */
+	const char *		metric_expr;
+	const char *		metric_name;
+	struct evsel		**metric_events;
+	struct evsel		*metric_leader;
+
+	void			*handler;
+	struct perf_counts	*counts;
+	struct perf_counts	*prev_raw_counts;
+	unsigned long		nr_events_printed;
+	struct perf_stat_evsel  *stats;
+	void			*priv;
+	u64			db_id;
+	bool			uniquified_name;
+	bool 			supported;
+	bool 			needs_swap;
+	bool 			disabled;
+	bool			no_aux_samples;
+	bool			immediate;
+	bool			tracking;
+	bool			ignore_missing_thread;
+	bool			forced_leader;
+	bool			cmdline_group_boundary;
+	bool			merged_stat;
+	bool			reset_group;
+	bool			errored;
+	bool			needs_auxtrace_mmap;
+	struct hashmap		*per_pkg_mask;
+	int			err;
+	struct {
+		evsel__sb_cb_t	*cb;
+		void		*data;
+	} side_band;
+	/*
+	 * For reporting purposes, an evsel sample can have a callchain
+	 * synthesized from AUX area data. Keep track of synthesized sample
+	 * types here. Note, the recorded sample_type cannot be changed because
+	 * it is needed to continue to parse events.
+	 * See also evsel__has_callchain().
+	 */
+	__u64			synth_sample_type;
+
+	/*
+	 * bpf_counter_ops serves two use cases:
+	 *   1. perf-stat -b          counting events used byBPF programs
+	 *   2. perf-stat --use-bpf   use BPF programs to aggregate counts
+	 */
+	struct bpf_counter_ops	*bpf_counter_ops;
+
+	/* for perf-stat -b */
+	struct list_head	bpf_counter_list;
+
+	/* for perf-stat --use-bpf */
+	int			bperf_leader_prog_fd;
+	int			bperf_leader_link_fd;
+	union {
+		struct bperf_leader_bpf *leader_skel;
+		struct bperf_follower_bpf *follower_skel;
+	};
+	unsigned long		open_flags;
+	int			precise_ip_original;
+
+	/* for missing_features */
+	struct perf_pmu		*pmu;
+};
+
+struct perf_missing_features {
+	bool sample_id_all;
+	bool exclude_guest;
+	bool mmap2;
+	bool cloexec;
+	bool clockid;
+	bool clockid_wrong;
+	bool lbr_flags;
+	bool write_backward;
+	bool group_read;
+	bool ksymbol;
+	bool bpf;
+	bool aux_output;
+	bool branch_hw_idx;
+	bool cgroup;
+	bool data_page_size;
+	bool code_page_size;
+	bool weight_struct;
+	bool read_lost;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
+struct perf_cpu_map;
+struct thread_map;
+struct record_opts;
+
+static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
+{
+	return perf_evsel__cpus(&evsel->core);
+}
+
+static inline int evsel__nr_cpus(struct evsel *evsel)
+{
+	return perf_cpu_map__nr(evsel__cpus(evsel));
+}
+
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+			   struct perf_counts_values *count);
+
+int evsel__object_config(size_t object_size,
+			 int (*init)(struct evsel *evsel),
+			 void (*fini)(struct evsel *evsel));
+
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel);
+bool evsel__is_aux_event(struct evsel *evsel);
+
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
+
+static inline struct evsel *evsel__new(struct perf_event_attr *attr)
+{
+	return evsel__new_idx(attr, 0);
+}
+
+struct evsel *evsel__clone(struct evsel *orig);
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
+
+int copy_config_terms(struct list_head *dst, struct list_head *src);
+void free_config_terms(struct list_head *config_terms);
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static inline struct evsel *evsel__newtp(const char *sys, const char *name)
+{
+	return evsel__newtp_idx(sys, name, 0);
+}
+
+struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config);
+
+struct tep_event *event_format__new(const char *sys, const char *name);
+
+void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
+void evsel__exit(struct evsel *evsel);
+void evsel__delete(struct evsel *evsel);
+
+struct callchain_param;
+
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+		   struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+			     struct callchain_param *callchain);
+
+int __evsel__sample_size(u64 sample_type);
+void evsel__calc_id_pos(struct evsel *evsel);
+
+bool evsel__is_cache_op_valid(u8 type, u8 op);
+
+static inline bool evsel__is_bpf(struct evsel *evsel)
+{
+	return evsel->bpf_counter_ops != NULL;
+}
+
+static inline bool evsel__is_bperf(struct evsel *evsel)
+{
+	return evsel->bpf_counter_ops != NULL && list_empty(&evsel->bpf_counter_list);
+}
+
+#define EVSEL__MAX_ALIASES 8
+
+extern const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *const evsel__sw_names[PERF_COUNT_SW_MAX];
+extern char *evsel__bpf_counter_events;
+bool evsel__match_bpf_counter_events(const char *name);
+int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
+
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
+const char *evsel__name(struct evsel *evsel);
+const char *evsel__metric_id(const struct evsel *evsel);
+
+static inline bool evsel__is_tool(const struct evsel *evsel)
+{
+	return evsel->tool_event != PERF_TOOL_NONE;
+}
+
+const char *evsel__group_name(struct evsel *evsel);
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
+
+void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+
+#define evsel__set_sample_bit(evsel, bit) \
+	__evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+#define evsel__reset_sample_bit(evsel, bit) \
+	__evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
+
+void arch_evsel__set_sample_weight(struct evsel *evsel);
+void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
+void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr);
+
+int evsel__set_filter(struct evsel *evsel, const char *filter);
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx);
+int evsel__enable(struct evsel *evsel);
+int evsel__disable(struct evsel *evsel);
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx);
+
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx);
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
+int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads);
+void evsel__close(struct evsel *evsel);
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads);
+bool evsel__detect_missing_features(struct evsel *evsel);
+
+enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
+
+bool evsel__precise_ip_fallback(struct evsel *evsel);
+
+struct perf_sample;
+
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name);
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name);
+
+static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name)
+{
+	return evsel__rawptr(evsel, sample, name);
+}
+
+struct tep_format_field;
+
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap);
+
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
+
+#define evsel__match(evsel, t, c)		\
+	(evsel->core.attr.type == PERF_TYPE_##t &&	\
+	 evsel->core.attr.config == PERF_COUNT_##c)
+
+static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
+{
+	return (e1->core.attr.type == e2->core.attr.type) &&
+	       (e1->core.attr.config == e2->core.attr.config);
+}
+
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
+
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
+
+/**
+ * evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu_map_idx - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+	return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false);
+}
+
+/**
+ * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu_map_idx - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+	return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true);
+}
+
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+			struct perf_sample *sample);
+
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+				  u64 *timestamp);
+
+u16 evsel__id_hdr_size(struct evsel *evsel);
+
+static inline struct evsel *evsel__next(struct evsel *evsel)
+{
+	return list_entry(evsel->core.node.next, struct evsel, core.node);
+}
+
+static inline struct evsel *evsel__prev(struct evsel *evsel)
+{
+	return list_entry(evsel->core.node.prev, struct evsel, core.node);
+}
+
+/**
+ * evsel__is_group_leader - Return whether given evsel is a leader event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if @evsel is a group leader or a stand-alone event
+ */
+static inline bool evsel__is_group_leader(const struct evsel *evsel)
+{
+	return evsel->core.leader == &evsel->core;
+}
+
+/**
+ * evsel__is_group_event - Return whether given evsel is a group event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true iff event group view is enabled and @evsel is a actual group
+ * leader which has other members in the group
+ */
+static inline bool evsel__is_group_event(struct evsel *evsel)
+{
+	if (!symbol_conf.event_group)
+		return false;
+
+	return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
+}
+
+bool evsel__is_function_event(struct evsel *evsel);
+
+static inline bool evsel__is_bpf_output(struct evsel *evsel)
+{
+	return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+}
+
+static inline bool evsel__is_clock(struct evsel *evsel)
+{
+	return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+	       evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+}
+
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+			 int err, char *msg, size_t size);
+
+static inline int evsel__group_idx(struct evsel *evsel)
+{
+	return evsel->core.idx - evsel->core.leader->idx;
+}
+
+/* Iterates group WITHOUT the leader. */
+#define for_each_group_member(_evsel, _leader) 					\
+for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
+     (_evsel) && (_evsel)->core.leader == (&_leader->core);					\
+     (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+/* Iterates group WITH the leader. */
+#define for_each_group_evsel(_evsel, _leader) 					\
+for ((_evsel) = _leader; 							\
+     (_evsel) && (_evsel)->core.leader == (&_leader->core);					\
+     (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
+{
+	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
+static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel)
+{
+	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
+static inline bool evsel__has_callchain(const struct evsel *evsel)
+{
+	/*
+	 * For reporting purposes, an evsel sample can have a recorded callchain
+	 * or a callchain synthesized from AUX area data.
+	 */
+	return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN ||
+	       evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN;
+}
+
+static inline bool evsel__has_br_stack(const struct evsel *evsel)
+{
+	/*
+	 * For reporting purposes, an evsel sample can have a recorded branch
+	 * stack or a branch stack synthesized from AUX area data.
+	 */
+	return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK ||
+	       evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK;
+}
+
+static inline bool evsel__is_dummy_event(struct evsel *evsel)
+{
+	return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) &&
+	       (evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
+}
+
+struct perf_env *evsel__env(struct evsel *evsel);
+
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+
+void evsel__zero_per_pkg(struct evsel *evsel);
+bool evsel__is_hybrid(struct evsel *evsel);
+struct evsel *evsel__leader(struct evsel *evsel);
+bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
+bool evsel__is_leader(struct evsel *evsel);
+void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
+int evsel__source_count(const struct evsel *evsel);
+void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader);
+
+bool arch_evsel__must_be_in_group(const struct evsel *evsel);
+
+/*
+ * Macro to swap the bit-field postition and size.
+ * Used when,
+ * - dont need to swap the entire u64 &&
+ * - when u64 has variable bit-field sizes &&
+ * - when presented in a host endian which is different
+ *   than the source endian of the perf.data file
+ */
+#define bitfield_swap(src, pos, size)	\
+	((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
+
+u64 evsel__bitfield_swap_branch_flags(u64 value);
+#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
new file mode 100644
index 000000000..aee6f808b
--- /dev/null
+++ b/tools/perf/util/evsel_config.h
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_CONFIG_H
+#define __PERF_EVSEL_CONFIG_H 1
+
+#include <linux/types.h>
+#include <stdbool.h>
+
+/*
+ * The 'struct evsel_config_term' is used to pass event
+ * specific configuration data to evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * evsel::config_terms list head.
+*/
+enum evsel_term_type {
+	EVSEL__CONFIG_TERM_PERIOD,
+	EVSEL__CONFIG_TERM_FREQ,
+	EVSEL__CONFIG_TERM_TIME,
+	EVSEL__CONFIG_TERM_CALLGRAPH,
+	EVSEL__CONFIG_TERM_STACK_USER,
+	EVSEL__CONFIG_TERM_INHERIT,
+	EVSEL__CONFIG_TERM_MAX_STACK,
+	EVSEL__CONFIG_TERM_MAX_EVENTS,
+	EVSEL__CONFIG_TERM_OVERWRITE,
+	EVSEL__CONFIG_TERM_DRV_CFG,
+	EVSEL__CONFIG_TERM_BRANCH,
+	EVSEL__CONFIG_TERM_PERCORE,
+	EVSEL__CONFIG_TERM_AUX_OUTPUT,
+	EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
+	EVSEL__CONFIG_TERM_CFG_CHG,
+};
+
+struct evsel_config_term {
+	struct list_head      list;
+	enum evsel_term_type  type;
+	bool		      free_str;
+	union {
+		u64	      period;
+		u64	      freq;
+		bool	      time;
+		u64	      stack_user;
+		int	      max_stack;
+		bool	      inherit;
+		bool	      overwrite;
+		unsigned long max_events;
+		bool	      percore;
+		bool	      aux_output;
+		u32	      aux_sample_size;
+		u64	      cfg_chg;
+		char	      *str;
+	} val;
+	bool weak;
+};
+
+struct evsel;
+
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type);
+
+#define evsel__get_config_term(evsel, type) \
+	__evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type)
+
+#endif // __PERF_EVSEL_CONFIG_H
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
new file mode 100644
index 000000000..8c2ea8001
--- /dev/null
+++ b/tools/perf/util/evsel_fprintf.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "util/evsel_fprintf.h"
+#include "util/event.h"
+#include "callchain.h"
+#include "map.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dso.h"
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (!*first) {
+		ret += fprintf(fp, ",");
+	} else {
+		ret += fprintf(fp, ":");
+		*first = false;
+	}
+
+	va_start(args, fmt);
+	ret += vfprintf(fp, fmt, args);
+	va_end(args);
+	return ret;
+}
+
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
+{
+	return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
+}
+
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp)
+{
+	bool first = true;
+	int printed = 0;
+
+	if (details->event_group) {
+		struct evsel *pos;
+
+		if (!evsel__is_group_leader(evsel))
+			return 0;
+
+		if (evsel->core.nr_members > 1)
+			printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+		printed += fprintf(fp, "%s", evsel__name(evsel));
+		for_each_group_member(pos, evsel)
+			printed += fprintf(fp, ",%s", evsel__name(pos));
+
+		if (evsel->core.nr_members > 1)
+			printed += fprintf(fp, "}");
+		goto out;
+	}
+
+	printed += fprintf(fp, "%s", evsel__name(evsel));
+
+	if (details->verbose) {
+		printed += perf_event_attr__fprintf(fp, &evsel->core.attr,
+						    __print_attr__fprintf, &first);
+	} else if (details->freq) {
+		const char *term = "sample_freq";
+
+		if (!evsel->core.attr.freq)
+			term = "sample_period";
+
+		printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+					 term, (u64)evsel->core.attr.sample_freq);
+	}
+
+	if (details->trace_fields) {
+		struct tep_format_field *field;
+
+		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+			printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+			goto out;
+		}
+
+		field = evsel->tp_format->format.fields;
+		if (field == NULL) {
+			printed += comma_fprintf(fp, &first, " (no trace field)");
+			goto out;
+		}
+
+		printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+		field = field->next;
+		while (field) {
+			printed += comma_fprintf(fp, &first, "%s", field->name);
+			field = field->next;
+		}
+	}
+out:
+	fputc('\n', fp);
+	return ++printed;
+}
+
+#ifndef PYTHON_PERF
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+			      unsigned int print_opts, struct callchain_cursor *cursor,
+			      struct strlist *bt_stop_list, FILE *fp)
+{
+	int printed = 0;
+	struct callchain_cursor_node *node;
+	int print_ip = print_opts & EVSEL__PRINT_IP;
+	int print_sym = print_opts & EVSEL__PRINT_SYM;
+	int print_dso = print_opts & EVSEL__PRINT_DSO;
+	int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+	int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
+	int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+	int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+	int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW;
+	int print_skip_ignored = print_opts & EVSEL__PRINT_SKIP_IGNORED;
+	char s = print_oneline ? ' ' : '\t';
+	bool first = true;
+
+	if (sample->callchain) {
+		struct addr_location node_al;
+
+		callchain_cursor_commit(cursor);
+
+		while (1) {
+			struct symbol *sym;
+			struct map *map;
+			u64 addr = 0;
+
+			node = callchain_cursor_current(cursor);
+			if (!node)
+				break;
+
+			sym = node->ms.sym;
+			map = node->ms.map;
+
+			if (sym && sym->ignore && print_skip_ignored)
+				goto next;
+
+			printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+			if (print_arrow && !first)
+				printed += fprintf(fp, " <-");
+
+			if (map)
+				addr = map->map_ip(map, node->ip);
+
+			if (print_ip) {
+				/* Show binary offset for userspace addr */
+				if (map && !map->dso->kernel)
+					printed += fprintf(fp, "%c%16" PRIx64, s, addr);
+				else
+					printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
+			}
+
+			if (print_sym) {
+				printed += fprintf(fp, " ");
+				node_al.addr = addr;
+				node_al.map  = map;
+
+				if (print_symoffset) {
+					printed += __symbol__fprintf_symname_offs(sym, &node_al,
+										  print_unknown_as_addr,
+										  true, fp);
+				} else {
+					printed += __symbol__fprintf_symname(sym, &node_al,
+									     print_unknown_as_addr, fp);
+				}
+			}
+
+			if (print_dso && (!sym || !sym->inlined)) {
+				printed += fprintf(fp, " (");
+				printed += map__fprintf_dsoname(map, fp);
+				printed += fprintf(fp, ")");
+			}
+
+			if (print_srcline)
+				printed += map__fprintf_srcline(map, addr, "\n  ", fp);
+
+			if (sym && sym->inlined)
+				printed += fprintf(fp, " (inlined)");
+
+			if (!print_oneline)
+				printed += fprintf(fp, "\n");
+
+			/* Add srccode here too? */
+			if (bt_stop_list && sym &&
+			    strlist__has_entry(bt_stop_list, sym->name)) {
+				break;
+			}
+
+			first = false;
+next:
+			callchain_cursor_advance(cursor);
+		}
+	}
+
+	return printed;
+}
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+			int left_alignment, unsigned int print_opts,
+			struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp)
+{
+	int printed = 0;
+	int print_ip = print_opts & EVSEL__PRINT_IP;
+	int print_sym = print_opts & EVSEL__PRINT_SYM;
+	int print_dso = print_opts & EVSEL__PRINT_DSO;
+	int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+	int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+	int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+	if (cursor != NULL) {
+		printed += sample__fprintf_callchain(sample, left_alignment, print_opts,
+						     cursor, bt_stop_list, fp);
+	} else {
+		printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+		if (print_ip)
+			printed += fprintf(fp, "%16" PRIx64, sample->ip);
+
+		if (print_sym) {
+			printed += fprintf(fp, " ");
+			if (print_symoffset) {
+				printed += __symbol__fprintf_symname_offs(al->sym, al,
+									  print_unknown_as_addr,
+									  true, fp);
+			} else {
+				printed += __symbol__fprintf_symname(al->sym, al,
+								     print_unknown_as_addr, fp);
+			}
+		}
+
+		if (print_dso) {
+			printed += fprintf(fp, " (");
+			printed += map__fprintf_dsoname(al->map, fp);
+			printed += fprintf(fp, ")");
+		}
+
+		if (print_srcline)
+			printed += map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
+	}
+
+	return printed;
+}
+#endif /* PYTHON_PERF */
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
new file mode 100644
index 000000000..3093d096c
--- /dev/null
+++ b/tools/perf/util/evsel_fprintf.h
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_EVSEL_FPRINTF_H
+#define __PERF_EVSEL_FPRINTF_H 1
+
+#include <stdio.h>
+#include <stdbool.h>
+
+struct evsel;
+
+struct perf_attr_details {
+	bool freq;
+	bool verbose;
+	bool event_group;
+	bool force;
+	bool trace_fields;
+};
+
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp);
+
+#define EVSEL__PRINT_IP			(1<<0)
+#define EVSEL__PRINT_SYM		(1<<1)
+#define EVSEL__PRINT_DSO		(1<<2)
+#define EVSEL__PRINT_SYMOFFSET		(1<<3)
+#define EVSEL__PRINT_ONELINE		(1<<4)
+#define EVSEL__PRINT_SRCLINE		(1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR	(1<<6)
+#define EVSEL__PRINT_CALLCHAIN_ARROW	(1<<7)
+#define EVSEL__PRINT_SKIP_IGNORED	(1<<8)
+
+struct addr_location;
+struct perf_event_attr;
+struct perf_sample;
+struct callchain_cursor;
+struct strlist;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+			      unsigned int print_opts, struct callchain_cursor *cursor,
+			      struct strlist *bt_stop_list, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+			int left_alignment, unsigned int print_opts,
+			struct callchain_cursor *cursor,
+			struct strlist *bt_stop_list, FILE *fp);
+
+typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+			     attr__fprintf_f attr__fprintf, void *priv);
+#endif // __PERF_EVSEL_H
diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c
new file mode 100644
index 000000000..40cb56a93
--- /dev/null
+++ b/tools/perf/util/evswitch.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+
+#include "evswitch.h"
+#include "evlist.h"
+
+bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel)
+{
+	if (evswitch->on && evswitch->discarding) {
+		if (evswitch->on != evsel)
+			return true;
+
+		evswitch->discarding = false;
+
+		if (!evswitch->show_on_off_events)
+			return true;
+
+		return false;
+	}
+
+	if (evswitch->off && !evswitch->discarding) {
+		if (evswitch->off != evsel)
+			return false;
+
+		evswitch->discarding = true;
+
+		if (!evswitch->show_on_off_events)
+			return true;
+	}
+
+	return false;
+}
+
+static int evswitch__fprintf_enoent(FILE *fp, const char *evtype, const char *evname)
+{
+	int printed = fprintf(fp, "ERROR: switch-%s event not found (%s)\n", evtype, evname);
+
+	return printed += fprintf(fp, "HINT:  use 'perf evlist' to see the available event names\n");
+}
+
+int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp)
+{
+	if (evswitch->on_name) {
+		evswitch->on = evlist__find_evsel_by_str(evlist, evswitch->on_name);
+		if (evswitch->on == NULL) {
+			evswitch__fprintf_enoent(fp, "on", evswitch->on_name);
+			return -ENOENT;
+		}
+		evswitch->discarding = true;
+	}
+
+	if (evswitch->off_name) {
+		evswitch->off = evlist__find_evsel_by_str(evlist, evswitch->off_name);
+		if (evswitch->off == NULL) {
+			evswitch__fprintf_enoent(fp, "off", evswitch->off_name);
+			return -ENOENT;
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/evswitch.h b/tools/perf/util/evswitch.h
new file mode 100644
index 000000000..fd30460b6
--- /dev/null
+++ b/tools/perf/util/evswitch.h
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+#ifndef __PERF_EVSWITCH_H
+#define __PERF_EVSWITCH_H 1
+
+#include <stdbool.h>
+#include <stdio.h>
+
+struct evsel;
+struct evlist;
+
+struct evswitch {
+	struct evsel *on, *off;
+	const char   *on_name, *off_name;
+	bool	     discarding;
+	bool	     show_on_off_events;
+};
+
+int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp);
+
+bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel);
+
+#define OPTS_EVSWITCH(evswitch)								  \
+	OPT_STRING(0, "switch-on", &(evswitch)->on_name,				  \
+		   "event", "Consider events after the ocurrence of this event"),	  \
+	OPT_STRING(0, "switch-off", &(evswitch)->off_name,				  \
+		   "event", "Stop considering events after the ocurrence of this event"), \
+	OPT_BOOLEAN(0, "show-on-off-events", &(evswitch)->show_on_off_events,		  \
+		    "Show the on/off switch events, used with --switch-on and --switch-off")
+
+#endif /* __PERF_EVSWITCH_H */
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
new file mode 100644
index 000000000..aaacf514d
--- /dev/null
+++ b/tools/perf/util/expr.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "metricgroup.h"
+#include "cpumap.h"
+#include "cputopo.h"
+#include "debug.h"
+#include "expr.h"
+#include "expr-bison.h"
+#include "expr-flex.h"
+#include "smt.h"
+#include "tsc.h"
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <ctype.h>
+#include <math.h>
+
+#ifdef PARSER_DEBUG
+extern int expr_debug;
+#endif
+
+struct expr_id_data {
+	union {
+		struct {
+			double val;
+			int source_count;
+		} val;
+		struct {
+			double val;
+			const char *metric_name;
+			const char *metric_expr;
+		} ref;
+	};
+
+	enum {
+		/* Holding a double value. */
+		EXPR_ID_DATA__VALUE,
+		/* Reference to another metric. */
+		EXPR_ID_DATA__REF,
+		/* A reference but the value has been computed. */
+		EXPR_ID_DATA__REF_VALUE,
+	} kind;
+};
+
+static size_t key_hash(const void *key, void *ctx __maybe_unused)
+{
+	const char *str = (const char *)key;
+	size_t hash = 0;
+
+	while (*str != '\0') {
+		hash *= 31;
+		hash += *str;
+		str++;
+	}
+	return hash;
+}
+
+static bool key_equal(const void *key1, const void *key2,
+		    void *ctx __maybe_unused)
+{
+	return !strcmp((const char *)key1, (const char *)key2);
+}
+
+struct hashmap *ids__new(void)
+{
+	struct hashmap *hash;
+
+	hash = hashmap__new(key_hash, key_equal, NULL);
+	if (IS_ERR(hash))
+		return NULL;
+	return hash;
+}
+
+void ids__free(struct hashmap *ids)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	if (ids == NULL)
+		return;
+
+	hashmap__for_each_entry(ids, cur, bkt) {
+		free((char *)cur->key);
+		free(cur->value);
+	}
+
+	hashmap__free(ids);
+}
+
+int ids__insert(struct hashmap *ids, const char *id)
+{
+	struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+	char *old_key = NULL;
+	int ret;
+
+	ret = hashmap__set(ids, id, data_ptr,
+			   (const void **)&old_key, (void **)&old_data);
+	if (ret)
+		free(data_ptr);
+	free(old_key);
+	free(old_data);
+	return ret;
+}
+
+struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2)
+{
+	size_t bkt;
+	struct hashmap_entry *cur;
+	int ret;
+	struct expr_id_data *old_data = NULL;
+	char *old_key = NULL;
+
+	if (!ids1)
+		return ids2;
+
+	if (!ids2)
+		return ids1;
+
+	if (hashmap__size(ids1) <  hashmap__size(ids2)) {
+		struct hashmap *tmp = ids1;
+
+		ids1 = ids2;
+		ids2 = tmp;
+	}
+	hashmap__for_each_entry(ids2, cur, bkt) {
+		ret = hashmap__set(ids1, cur->key, cur->value,
+				(const void **)&old_key, (void **)&old_data);
+		free(old_key);
+		free(old_data);
+
+		if (ret) {
+			hashmap__free(ids1);
+			hashmap__free(ids2);
+			return NULL;
+		}
+	}
+	hashmap__free(ids2);
+	return ids1;
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
+{
+	return ids__insert(ctx->ids, id);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
+{
+	return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+				  double val, int source_count)
+{
+	struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+	char *old_key = NULL;
+	int ret;
+
+	data_ptr = malloc(sizeof(*data_ptr));
+	if (!data_ptr)
+		return -ENOMEM;
+	data_ptr->val.val = val;
+	data_ptr->val.source_count = source_count;
+	data_ptr->kind = EXPR_ID_DATA__VALUE;
+
+	ret = hashmap__set(ctx->ids, id, data_ptr,
+			   (const void **)&old_key, (void **)&old_data);
+	if (ret)
+		free(data_ptr);
+	free(old_key);
+	free(old_data);
+	return ret;
+}
+
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
+{
+	struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+	char *old_key = NULL;
+	char *name;
+	int ret;
+
+	data_ptr = zalloc(sizeof(*data_ptr));
+	if (!data_ptr)
+		return -ENOMEM;
+
+	name = strdup(ref->metric_name);
+	if (!name) {
+		free(data_ptr);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Intentionally passing just const char pointers,
+	 * originally from 'struct pmu_event' object.
+	 * We don't need to change them, so there's no
+	 * need to create our own copy.
+	 */
+	data_ptr->ref.metric_name = ref->metric_name;
+	data_ptr->ref.metric_expr = ref->metric_expr;
+	data_ptr->kind = EXPR_ID_DATA__REF;
+
+	ret = hashmap__set(ctx->ids, name, data_ptr,
+			   (const void **)&old_key, (void **)&old_data);
+	if (ret)
+		free(data_ptr);
+
+	pr_debug2("adding ref metric %s: %s\n",
+		  ref->metric_name, ref->metric_expr);
+
+	free(old_key);
+	free(old_data);
+	return ret;
+}
+
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+		 struct expr_id_data **data)
+{
+	return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1;
+}
+
+bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
+			 struct expr_parse_ctx *needles)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+	struct expr_id_data *data;
+
+	hashmap__for_each_entry(needles->ids, cur, bkt) {
+		if (expr__get_id(haystack, cur->key, &data))
+			return false;
+	}
+	return true;
+}
+
+
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+		     struct expr_id_data **datap)
+{
+	struct expr_id_data *data;
+
+	if (expr__get_id(ctx, id, datap) || !*datap) {
+		pr_debug("%s not found\n", id);
+		return -1;
+	}
+
+	data = *datap;
+
+	switch (data->kind) {
+	case EXPR_ID_DATA__VALUE:
+		pr_debug2("lookup(%s): val %f\n", id, data->val.val);
+		break;
+	case EXPR_ID_DATA__REF:
+		pr_debug2("lookup(%s): ref metric name %s\n", id,
+			data->ref.metric_name);
+		pr_debug("processing metric: %s ENTRY\n", id);
+		data->kind = EXPR_ID_DATA__REF_VALUE;
+		if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr)) {
+			pr_debug("%s failed to count\n", id);
+			return -1;
+		}
+		pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val);
+		break;
+	case EXPR_ID_DATA__REF_VALUE:
+		pr_debug2("lookup(%s): ref val %f metric name %s\n", id,
+			data->ref.val, data->ref.metric_name);
+		break;
+	default:
+		assert(0);  /* Unreachable. */
+	}
+
+	return 0;
+}
+
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
+{
+	struct expr_id_data *old_val = NULL;
+	char *old_key = NULL;
+
+	hashmap__delete(ctx->ids, id,
+			(const void **)&old_key, (void **)&old_val);
+	free(old_key);
+	free(old_val);
+}
+
+struct expr_parse_ctx *expr__ctx_new(void)
+{
+	struct expr_parse_ctx *ctx;
+
+	ctx = malloc(sizeof(struct expr_parse_ctx));
+	if (!ctx)
+		return NULL;
+
+	ctx->ids = hashmap__new(key_hash, key_equal, NULL);
+	if (IS_ERR(ctx->ids)) {
+		free(ctx);
+		return NULL;
+	}
+	ctx->sctx.user_requested_cpu_list = NULL;
+	ctx->sctx.runtime = 0;
+	ctx->sctx.system_wide = false;
+
+	return ctx;
+}
+
+void expr__ctx_clear(struct expr_parse_ctx *ctx)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	hashmap__for_each_entry(ctx->ids, cur, bkt) {
+		free((char *)cur->key);
+		free(cur->value);
+	}
+	hashmap__clear(ctx->ids);
+}
+
+void expr__ctx_free(struct expr_parse_ctx *ctx)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	if (!ctx)
+		return;
+
+	free(ctx->sctx.user_requested_cpu_list);
+	hashmap__for_each_entry(ctx->ids, cur, bkt) {
+		free((char *)cur->key);
+		free(cur->value);
+	}
+	hashmap__free(ctx->ids);
+	free(ctx);
+}
+
+static int
+__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
+	      bool compute_ids)
+{
+	YY_BUFFER_STATE buffer;
+	void *scanner;
+	int ret;
+
+	pr_debug2("parsing metric: %s\n", expr);
+
+	ret = expr_lex_init_extra(&ctx->sctx, &scanner);
+	if (ret)
+		return ret;
+
+	buffer = expr__scan_string(expr, scanner);
+
+#ifdef PARSER_DEBUG
+	expr_debug = 1;
+	expr_set_debug(1, scanner);
+#endif
+
+	ret = expr_parse(val, ctx, compute_ids, scanner);
+
+	expr__flush_buffer(buffer, scanner);
+	expr__delete_buffer(buffer, scanner);
+	expr_lex_destroy(scanner);
+	return ret;
+}
+
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+		const char *expr)
+{
+	return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false) ? -1 : 0;
+}
+
+int expr__find_ids(const char *expr, const char *one,
+		   struct expr_parse_ctx *ctx)
+{
+	int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true);
+
+	if (one)
+		expr__del_id(ctx, one);
+
+	return ret;
+}
+
+double expr_id_data__value(const struct expr_id_data *data)
+{
+	if (data->kind == EXPR_ID_DATA__VALUE)
+		return data->val.val;
+	assert(data->kind == EXPR_ID_DATA__REF_VALUE);
+	return data->ref.val;
+}
+
+double expr_id_data__source_count(const struct expr_id_data *data)
+{
+	assert(data->kind == EXPR_ID_DATA__VALUE);
+	return data->val.source_count;
+}
+
+#if !defined(__i386__) && !defined(__x86_64__)
+double arch_get_tsc_freq(void)
+{
+	return 0.0;
+}
+#endif
+
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx)
+{
+	static struct cpu_topology *topology;
+	double result = NAN;
+
+	if (!strcmp("#num_cpus", literal)) {
+		result = cpu__max_present_cpu().cpu;
+		goto out;
+	}
+
+	if (!strcasecmp("#system_tsc_freq", literal)) {
+		result = arch_get_tsc_freq();
+		goto out;
+	}
+
+	/*
+	 * Assume that topology strings are consistent, such as CPUs "0-1"
+	 * wouldn't be listed as "0,1", and so after deduplication the number of
+	 * these strings gives an indication of the number of packages, dies,
+	 * etc.
+	 */
+	if (!topology) {
+		topology = cpu_topology__new();
+		if (!topology) {
+			pr_err("Error creating CPU topology");
+			goto out;
+		}
+	}
+	if (!strcasecmp("#smt_on", literal)) {
+		result = smt_on(topology) ? 1.0 : 0.0;
+		goto out;
+	}
+	if (!strcmp("#core_wide", literal)) {
+		result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list, topology)
+			? 1.0 : 0.0;
+		goto out;
+	}
+	if (!strcmp("#num_packages", literal)) {
+		result = topology->package_cpus_lists;
+		goto out;
+	}
+	if (!strcmp("#num_dies", literal)) {
+		result = topology->die_cpus_lists;
+		goto out;
+	}
+	if (!strcmp("#num_cores", literal)) {
+		result = topology->core_cpus_lists;
+		goto out;
+	}
+
+	pr_err("Unrecognized literal '%s'", literal);
+out:
+	pr_debug2("literal: %s = %f\n", literal, result);
+	return result;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
new file mode 100644
index 000000000..d6c1668dc
--- /dev/null
+++ b/tools/perf/util/expr.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PARSE_CTX_H
+#define PARSE_CTX_H 1
+
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
+
+struct metric_ref;
+
+struct expr_scanner_ctx {
+	char *user_requested_cpu_list;
+	int runtime;
+	bool system_wide;
+};
+
+struct expr_parse_ctx {
+	struct hashmap	*ids;
+	struct expr_scanner_ctx sctx;
+};
+
+struct expr_id_data;
+
+struct hashmap *ids__new(void);
+void ids__free(struct hashmap *ids);
+int ids__insert(struct hashmap *ids, const char *id);
+/*
+ * Union two sets of ids (hashmaps) and construct a third, freeing ids1 and
+ * ids2.
+ */
+struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2);
+
+struct expr_parse_ctx *expr__ctx_new(void);
+void expr__ctx_clear(struct expr_parse_ctx *ctx);
+void expr__ctx_free(struct expr_parse_ctx *ctx);
+
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+				double val, int source_count);
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+		 struct expr_id_data **data);
+bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
+			 struct expr_parse_ctx *needles);
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+		     struct expr_id_data **datap);
+
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+		const char *expr);
+
+int expr__find_ids(const char *expr, const char *one,
+		   struct expr_parse_ctx *ids);
+
+double expr_id_data__value(const struct expr_id_data *data);
+double expr_id_data__source_count(const struct expr_id_data *data);
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx);
+
+#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
new file mode 100644
index 000000000..0168a9637
--- /dev/null
+++ b/tools/perf/util/expr.l
@@ -0,0 +1,133 @@
+%option prefix="expr_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <linux/compiler.h>
+#include "expr.h"
+#include "expr-bison.h"
+#include <math.h>
+
+char *expr_get_text(yyscan_t yyscanner);
+YYSTYPE *expr_get_lval(yyscan_t yyscanner);
+
+static double __value(YYSTYPE *yylval, char *str, int token)
+{
+	double num;
+
+	errno = 0;
+	num = strtod(str, NULL);
+	if (errno)
+		return EXPR_ERROR;
+
+	yylval->num = num;
+	return token;
+}
+
+static int value(yyscan_t scanner)
+{
+	YYSTYPE *yylval = expr_get_lval(scanner);
+	char *text = expr_get_text(scanner);
+
+	return __value(yylval, text, NUMBER);
+}
+
+/*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+static char *normalize(char *str, int runtime)
+{
+	char *ret = str;
+	char *dst = str;
+
+	while (*str) {
+		if (*str == '\\')
+			*dst++ = *++str;
+		else if (*str == '?') {
+			char *paramval;
+			int i = 0;
+			int size = asprintf(&paramval, "%d", runtime);
+
+			if (size < 0)
+				*dst++ = '0';
+			else {
+				while (i < size)
+					*dst++ = paramval[i++];
+				free(paramval);
+			}
+		}
+		else
+			*dst++ = *str;
+		str++;
+	}
+
+	*dst = 0x0;
+	return ret;
+}
+
+static int str(yyscan_t scanner, int token, int runtime)
+{
+	YYSTYPE *yylval = expr_get_lval(scanner);
+	char *text = expr_get_text(scanner);
+
+	yylval->str = normalize(strdup(text), runtime);
+	if (!yylval->str)
+		return EXPR_ERROR;
+
+	yylval->str = normalize(yylval->str, runtime);
+	return token;
+}
+
+static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx)
+{
+	YYSTYPE *yylval = expr_get_lval(scanner);
+
+	yylval->num = expr__get_literal(expr_get_text(scanner), sctx);
+	if (isnan(yylval->num))
+		return EXPR_ERROR;
+
+	return LITERAL;
+}
+%}
+
+number		([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)?
+
+sch		[-,=]
+spec		\\{sch}
+sym		[0-9a-zA-Z_\.:@?]+
+symbol		({spec}|{sym})+
+literal		#[0-9a-zA-Z_\.\-]+
+
+%%
+	struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
+
+d_ratio		{ return D_RATIO; }
+max		{ return MAX; }
+min		{ return MIN; }
+if		{ return IF; }
+else		{ return ELSE; }
+source_count	{ return SOURCE_COUNT; }
+{literal}	{ return literal(yyscanner, sctx); }
+{number}	{ return value(yyscanner); }
+{symbol}	{ return str(yyscanner, ID, sctx->runtime); }
+"|"		{ return '|'; }
+"^"		{ return '^'; }
+"&"		{ return '&'; }
+"<"		{ return '<'; }
+">"		{ return '>'; }
+"-"		{ return '-'; }
+"+"		{ return '+'; }
+"*"		{ return '*'; }
+"/"		{ return '/'; }
+"%"		{ return '%'; }
+"("		{ return '('; }
+")"		{ return ')'; }
+","		{ return ','; }
+.		{ }
+%%
+
+int expr_wrap(void *scanner __maybe_unused)
+{
+	return 1;
+}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
new file mode 100644
index 000000000..635e56235
--- /dev/null
+++ b/tools/perf/util/expr.y
@@ -0,0 +1,301 @@
+/* Simple expression parser */
+%{
+#define YYDEBUG 1
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include "util/debug.h"
+#define IN_EXPR_Y 1
+#include "expr.h"
+%}
+
+%define api.pure full
+
+%parse-param { double *final_val }
+%parse-param { struct expr_parse_ctx *ctx }
+%parse-param { bool compute_ids }
+%parse-param {void *scanner}
+%lex-param {void* scanner}
+
+%union {
+	double	 num;
+	char	*str;
+	struct ids {
+		/*
+		 * When creating ids, holds the working set of event ids. NULL
+		 * implies the set is empty.
+		 */
+		struct hashmap *ids;
+		/*
+		 * The metric value. When not creating ids this is the value
+		 * read from a counter, a constant or some computed value. When
+		 * creating ids the value is either a constant or BOTTOM. NAN is
+		 * used as the special BOTTOM value, representing a "set of all
+		 * values" case.
+		 */
+		double val;
+	} ids;
+}
+
+%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR
+%left MIN MAX IF
+%left '|'
+%left '^'
+%left '&'
+%left '<' '>'
+%left '-' '+'
+%left '*' '/' '%'
+%left NEG NOT
+%type <num> NUMBER LITERAL
+%type <str> ID
+%destructor { free ($$); } <str>
+%type <ids> expr if_expr
+%destructor { ids__free($$.ids); } <ids>
+
+%{
+static void expr_error(double *final_val __maybe_unused,
+		       struct expr_parse_ctx *ctx __maybe_unused,
+		       bool compute_ids __maybe_unused,
+		       void *scanner,
+		       const char *s)
+{
+	pr_debug("%s\n", s);
+}
+
+/*
+ * During compute ids, the special "bottom" value uses NAN to represent the set
+ * of all values. NAN is selected as it isn't a useful constant value.
+ */
+#define BOTTOM NAN
+
+/* During computing ids, does val represent a constant (non-BOTTOM) value? */
+static bool is_const(double val)
+{
+	return isfinite(val);
+}
+
+static struct ids union_expr(struct ids ids1, struct ids ids2)
+{
+	struct ids result = {
+		.val = BOTTOM,
+		.ids = ids__union(ids1.ids, ids2.ids),
+	};
+	return result;
+}
+
+static struct ids handle_id(struct expr_parse_ctx *ctx, char *id,
+			    bool compute_ids, bool source_count)
+{
+	struct ids result;
+
+	if (!compute_ids) {
+		/*
+		 * Compute the event's value from ID. If the ID isn't known then
+		 * it isn't used to compute the formula so set to NAN.
+		 */
+		struct expr_id_data *data;
+
+		result.val = NAN;
+		if (expr__resolve_id(ctx, id, &data) == 0) {
+			result.val = source_count
+				? expr_id_data__source_count(data)
+				: expr_id_data__value(data);
+		}
+		result.ids = NULL;
+		free(id);
+	} else {
+		/*
+		 * Set the value to BOTTOM to show that any value is possible
+		 * when the event is computed. Create a set of just the ID.
+		 */
+		result.val = BOTTOM;
+		result.ids = ids__new();
+		if (!result.ids || ids__insert(result.ids, id)) {
+			pr_err("Error creating IDs for '%s'", id);
+			free(id);
+		}
+	}
+	return result;
+}
+
+/*
+ * If we're not computing ids or $1 and $3 are constants, compute the new
+ * constant value using OP. Its invariant that there are no ids.  If computing
+ * ids for non-constants union the set of IDs that must be computed.
+ */
+#define BINARY_LONG_OP(RESULT, OP, LHS, RHS)				\
+	if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \
+		assert(LHS.ids == NULL);				\
+		assert(RHS.ids == NULL);				\
+		RESULT.val = (long)LHS.val OP (long)RHS.val;		\
+		RESULT.ids = NULL;					\
+	} else {							\
+	        RESULT = union_expr(LHS, RHS);				\
+	}
+
+#define BINARY_OP(RESULT, OP, LHS, RHS)					\
+	if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \
+		assert(LHS.ids == NULL);				\
+		assert(RHS.ids == NULL);				\
+		RESULT.val = LHS.val OP RHS.val;			\
+		RESULT.ids = NULL;					\
+	} else {							\
+	        RESULT = union_expr(LHS, RHS);				\
+	}
+
+%}
+%%
+
+start: if_expr
+{
+	if (compute_ids)
+		ctx->ids = ids__union($1.ids, ctx->ids);
+
+	if (final_val)
+		*final_val = $1.val;
+}
+;
+
+if_expr: expr IF expr ELSE if_expr
+{
+	if (fpclassify($3.val) == FP_ZERO) {
+		/*
+		 * The IF expression evaluated to 0 so treat as false, take the
+		 * ELSE and discard everything else.
+		 */
+		$$.val = $5.val;
+		$$.ids = $5.ids;
+		ids__free($1.ids);
+		ids__free($3.ids);
+	} else if (!compute_ids || is_const($3.val)) {
+		/*
+		 * If ids aren't computed then treat the expression as true. If
+		 * ids are being computed and the IF expr is a non-zero
+		 * constant, then also evaluate the true case.
+		 */
+		$$.val = $1.val;
+		$$.ids = $1.ids;
+		ids__free($3.ids);
+		ids__free($5.ids);
+	} else if ($1.val == $5.val) {
+		/*
+		 * LHS == RHS, so both are an identical constant. No need to
+		 * evaluate any events.
+		 */
+		$$.val = $1.val;
+		$$.ids = NULL;
+		ids__free($1.ids);
+		ids__free($3.ids);
+		ids__free($5.ids);
+	} else {
+		/*
+		 * Value is either the LHS or RHS and we need the IF expression
+		 * to compute it.
+		 */
+		$$ = union_expr($1, union_expr($3, $5));
+	}
+}
+| expr
+;
+
+expr: NUMBER
+{
+	$$.val = $1;
+	$$.ids = NULL;
+}
+| ID				{ $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); }
+| SOURCE_COUNT '(' ID ')'	{ $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); }
+| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); }
+| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); }
+| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); }
+| expr '<' expr { BINARY_OP($$, <, $1, $3); }
+| expr '>' expr { BINARY_OP($$, >, $1, $3); }
+| expr '+' expr { BINARY_OP($$, +, $1, $3); }
+| expr '-' expr { BINARY_OP($$, -, $1, $3); }
+| expr '*' expr { BINARY_OP($$, *, $1, $3); }
+| expr '/' expr
+{
+	if (fpclassify($3.val) == FP_ZERO) {
+		pr_debug("division by zero\n");
+		YYABORT;
+	} else if (!compute_ids || (is_const($1.val) && is_const($3.val))) {
+		assert($1.ids == NULL);
+		assert($3.ids == NULL);
+		$$.val = $1.val / $3.val;
+		$$.ids = NULL;
+	} else {
+		/* LHS and/or RHS need computing from event IDs so union. */
+		$$ = union_expr($1, $3);
+	}
+}
+| expr '%' expr
+{
+	if (fpclassify($3.val) == FP_ZERO) {
+		pr_debug("division by zero\n");
+		YYABORT;
+	} else if (!compute_ids || (is_const($1.val) && is_const($3.val))) {
+		assert($1.ids == NULL);
+		assert($3.ids == NULL);
+		$$.val = (long)$1.val % (long)$3.val;
+		$$.ids = NULL;
+	} else {
+		/* LHS and/or RHS need computing from event IDs so union. */
+		$$ = union_expr($1, $3);
+	}
+}
+| D_RATIO '(' expr ',' expr ')'
+{
+	if (fpclassify($5.val) == FP_ZERO) {
+		/*
+		 * Division by constant zero always yields zero and no events
+		 * are necessary.
+		 */
+		assert($5.ids == NULL);
+		$$.val = 0.0;
+		$$.ids = NULL;
+		ids__free($3.ids);
+	} else if (!compute_ids || (is_const($3.val) && is_const($5.val))) {
+		assert($3.ids == NULL);
+		assert($5.ids == NULL);
+		$$.val = $3.val / $5.val;
+		$$.ids = NULL;
+	} else {
+		/* LHS and/or RHS need computing from event IDs so union. */
+		$$ = union_expr($3, $5);
+	}
+}
+| '-' expr %prec NEG
+{
+	$$.val = -$2.val;
+	$$.ids = $2.ids;
+}
+| '(' if_expr ')'
+{
+	$$ = $2;
+}
+| MIN '(' expr ',' expr ')'
+{
+	if (!compute_ids) {
+		$$.val = $3.val < $5.val ? $3.val : $5.val;
+		$$.ids = NULL;
+	} else {
+		$$ = union_expr($3, $5);
+	}
+}
+| MAX '(' expr ',' expr ')'
+{
+	if (!compute_ids) {
+		$$.val = $3.val > $5.val ? $3.val : $5.val;
+		$$.ids = NULL;
+	} else {
+		$$ = union_expr($3, $5);
+	}
+}
+| LITERAL
+{
+	$$.val = $1;
+	$$.ids = NULL;
+}
+;
+
+%%
diff --git a/tools/perf/util/find-map.c b/tools/perf/util/find-map.c
new file mode 100644
index 000000000..7b2300588
--- /dev/null
+++ b/tools/perf/util/find-map.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+static int find_map(void **start, void **end, const char *name)
+{
+	FILE *maps;
+	char line[128];
+	int found = 0;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		fprintf(stderr, "cannot open maps\n");
+		return -1;
+	}
+
+	while (!found && fgets(line, sizeof(line), maps)) {
+		int m = -1;
+
+		/* We care only about private r-x mappings. */
+		if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n",
+				start, end, &m))
+			continue;
+		if (m < 0)
+			continue;
+
+		if (!strncmp(&line[m], name, strlen(name)))
+			found = 1;
+	}
+
+	fclose(maps);
+	return !found;
+}
diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c
new file mode 100644
index 000000000..6225cbc52
--- /dev/null
+++ b/tools/perf/util/fncache.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Manage a cache of file names' existence */
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/list.h>
+#include "fncache.h"
+
+struct fncache {
+	struct hlist_node nd;
+	bool res;
+	char name[];
+};
+
+#define FNHSIZE 61
+
+static struct hlist_head fncache_hash[FNHSIZE];
+
+unsigned shash(const unsigned char *s)
+{
+	unsigned h = 0;
+	while (*s)
+		h = 65599 * h + *s++;
+	return h ^ (h >> 16);
+}
+
+static bool lookup_fncache(const char *name, bool *res)
+{
+	int h = shash((const unsigned char *)name) % FNHSIZE;
+	struct fncache *n;
+
+	hlist_for_each_entry(n, &fncache_hash[h], nd) {
+		if (!strcmp(n->name, name)) {
+			*res = n->res;
+			return true;
+		}
+	}
+	return false;
+}
+
+static void update_fncache(const char *name, bool res)
+{
+	struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1);
+	int h = shash((const unsigned char *)name) % FNHSIZE;
+
+	if (!n)
+		return;
+	strcpy(n->name, name);
+	n->res = res;
+	hlist_add_head(&n->nd, &fncache_hash[h]);
+}
+
+/* No LRU, only use when bounded in some other way. */
+bool file_available(const char *name)
+{
+	bool res;
+
+	if (lookup_fncache(name, &res))
+		return res;
+	res = access(name, R_OK) == 0;
+	update_fncache(name, res);
+	return res;
+}
diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h
new file mode 100644
index 000000000..fe020beae
--- /dev/null
+++ b/tools/perf/util/fncache.h
@@ -0,0 +1,7 @@
+#ifndef _FCACHE_H
+#define _FCACHE_H 1
+
+unsigned shash(const unsigned char *s);
+bool file_available(const char *name);
+
+#endif
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
new file mode 100644
index 000000000..a34cd1573
--- /dev/null
+++ b/tools/perf/util/ftrace.h
@@ -0,0 +1,82 @@
+#ifndef __PERF_FTRACE_H__
+#define __PERF_FTRACE_H__
+
+#include <linux/list.h>
+
+#include "target.h"
+
+struct evlist;
+
+struct perf_ftrace {
+	struct evlist		*evlist;
+	struct target		target;
+	const char		*tracer;
+	struct list_head	filters;
+	struct list_head	notrace;
+	struct list_head	graph_funcs;
+	struct list_head	nograph_funcs;
+	unsigned long		percpu_buffer_size;
+	bool			inherit;
+	bool			use_nsec;
+	int			graph_depth;
+	int			func_stack_trace;
+	int			func_irq_info;
+	int			graph_nosleep_time;
+	int			graph_noirqs;
+	int			graph_verbose;
+	int			graph_thresh;
+	unsigned int		initial_delay;
+};
+
+struct filter_entry {
+	struct list_head	list;
+	char			name[];
+};
+
+#define NUM_BUCKET  22  /* 20 + 2 (for outliers in both direction) */
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+				  int buckets[]);
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
+
+#else  /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+	return -1;
+}
+
+static inline int
+perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+	return -1;
+}
+
+static inline int
+perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+	return -1;
+}
+
+static inline int
+perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+			      int buckets[] __maybe_unused)
+{
+	return -1;
+}
+
+static inline int
+perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+	return -1;
+}
+
+#endif  /* HAVE_BPF_SKEL */
+
+#endif  /* __PERF_FTRACE_H__ */
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
new file mode 100644
index 000000000..ac17a3cb5
--- /dev/null
+++ b/tools/perf/util/genelf.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@gmail.com>
+ */
+
+#include <sys/types.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <err.h>
+#ifdef HAVE_DWARF_SUPPORT
+#include <dwarf.h>
+#endif
+
+#include "genelf.h"
+#include "../util/jitdump.h"
+#include <linux/compiler.h>
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO_SUPPORT
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA	/* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/evp.h>
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+  unsigned int namesz;  /* Size of entry's owner string */
+  unsigned int descsz;  /* Size of the note descriptor */
+  unsigned int type;    /* Interpretation of the descriptor */
+  char         name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+	char *output;
+	int fd;
+};
+
+static char shd_string_table[] = {
+	0,
+	'.', 't', 'e', 'x', 't', 0,			/*  1 */
+	'.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /*  7 */
+	'.', 's', 'y', 'm', 't', 'a', 'b', 0,		/* 17 */
+	'.', 's', 't', 'r', 't', 'a', 'b', 0,		/* 25 */
+	'.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+	'.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', '_', 'h', 'd', 'r', 0, /* 90 */
+	'.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', 0, /* 104 */
+};
+
+static struct buildid_note {
+	Elf_Note desc;		/* descsz: size of build-id, must be multiple of 4 */
+	char	 name[4];	/* GNU\0 */
+	char	 build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+	/* symbol 0 MUST be the undefined symbol */
+	{ .st_name  = 0, /* index in sym_string table */
+	  .st_info  = ELF_ST_TYPE(STT_NOTYPE),
+	  .st_shndx = 0, /* for now */
+	  .st_value = 0x0,
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0,
+	},
+	{ .st_name  = 1, /* index in sym_string table */
+	  .st_info  = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+	  .st_shndx = 1,
+	  .st_value = 0, /* for now */
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0, /* for now */
+	}
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code __maybe_unused,
+	     size_t csize __maybe_unused)
+{
+	int fd;
+	size_t sz = sizeof(note->build_id);
+	ssize_t sret;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd == -1)
+		err(1, "cannot access /dev/urandom for buildid");
+
+	sret = read(fd, note->build_id, sz);
+
+	close(fd);
+
+	if (sret != (ssize_t)sz)
+		memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code,
+	     size_t csize)
+{
+	if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+		errx(1, "build_id too small for SHA1");
+
+	SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+	EVP_MD_CTX *mdctx;
+
+	if (sizeof(note->build_id) < 16)
+		errx(1, "build_id too small for MD5");
+
+	mdctx = EVP_MD_CTX_new();
+	if (!mdctx)
+		errx(2, "failed to create EVP_MD_CTX");
+
+	EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
+	EVP_DigestUpdate(mdctx, &load_addr, sizeof(load_addr));
+	EVP_DigestUpdate(mdctx, code, csize);
+	EVP_DigestFinal_ex(mdctx, (unsigned char *)note->build_id, NULL);
+	EVP_MD_CTX_free(mdctx);
+}
+#endif
+
+static int
+jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size,
+		      uint64_t unwinding_size, uint64_t base_offset)
+{
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Shdr *shdr;
+	uint64_t unwinding_table_size = unwinding_size - unwinding_header_size;
+
+	/*
+	 * setup eh_frame section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 8;
+	d->d_off = 0LL;
+	d->d_buf = unwinding;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = unwinding_table_size;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 104;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = base_offset;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup eh_frame_hdr section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 4;
+	d->d_off = 0LL;
+	d->d_buf = unwinding + unwinding_table_size;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = unwinding_header_size;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 90;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = base_offset + unwinding_table_size;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	return 0;
+}
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+	      const void *code, int csize,
+	      void *debug __maybe_unused, int nr_debug_entries __maybe_unused,
+	      void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size)
+{
+	Elf *e;
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Ehdr *ehdr;
+	Elf_Phdr *phdr;
+	Elf_Shdr *shdr;
+	uint64_t eh_frame_base_offset;
+	char *strsym = NULL;
+	int symlen;
+	int retval = -1;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		warnx("ELF initialization failed");
+		return -1;
+	}
+
+	e = elf_begin(fd, ELF_C_WRITE, NULL);
+	if (!e) {
+		warnx("elf_begin failed");
+		goto error;
+	}
+
+	/*
+	 * setup ELF header
+	 */
+	ehdr = elf_newehdr(e);
+	if (!ehdr) {
+		warnx("cannot get ehdr");
+		goto error;
+	}
+
+	ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+	ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+	ehdr->e_machine = GEN_ELF_ARCH;
+	ehdr->e_type = ET_DYN;
+	ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
+
+	/*
+	 * setup program header
+	 */
+	phdr = elf_newphdr(e, 1);
+	phdr[0].p_type = PT_LOAD;
+	phdr[0].p_offset = GEN_ELF_TEXT_OFFSET;
+	phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET;
+	phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET;
+	phdr[0].p_filesz = csize;
+	phdr[0].p_memsz = csize;
+	phdr[0].p_flags = PF_X | PF_R;
+	phdr[0].p_align = 8;
+
+	/*
+	 * setup text section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 16;
+	d->d_off = 0LL;
+	d->d_buf = (void *)code;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = csize;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 1;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+	shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * Setup .eh_frame_hdr and .eh_frame
+	 */
+	if (unwinding) {
+		eh_frame_base_offset = ALIGN_8(GEN_ELF_TEXT_OFFSET + csize);
+		retval = jit_add_eh_frame_info(e, unwinding,
+					       unwinding_header_size, unwinding_size,
+					       eh_frame_base_offset);
+		if (retval)
+			goto error;
+		retval = -1;
+	}
+
+	/*
+	 * setup section headers string table
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = shd_string_table;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(shd_string_table);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup symtab section
+	 */
+	symtab[1].st_size  = csize;
+	symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 8;
+	d->d_off = 0LL;
+	d->d_buf = symtab;
+	d->d_type = ELF_T_SYM;
+	d->d_size = sizeof(symtab);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+	shdr->sh_type = SHT_SYMTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = sizeof(Elf_Sym);
+	shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */
+
+	/*
+	 * setup symbols string table
+	 * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+	 */
+	symlen = 2 + strlen(sym);
+	strsym = calloc(1, symlen);
+	if (!strsym) {
+		warnx("cannot allocate strsym");
+		goto error;
+	}
+	strcpy(strsym + 1, sym);
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = strsym;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = symlen;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 25; /* offset in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup build-id section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	/*
+	 * build-id generation
+	 */
+	gen_build_id(&bnote, load_addr, code, csize);
+	bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+	bnote.desc.descsz = sizeof(bnote.build_id);
+	bnote.desc.type   = NT_GNU_BUILD_ID;
+	strcpy(bnote.name, "GNU");
+
+	d->d_align = 4;
+	d->d_off = 0LL;
+	d->d_buf = &bnote;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(bnote);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 33; /* offset in shd_string_table */
+	shdr->sh_type = SHT_NOTE;
+	shdr->sh_addr = 0x0;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_size = sizeof(bnote);
+	shdr->sh_entsize = 0;
+
+#ifdef HAVE_DWARF_SUPPORT
+	if (debug && nr_debug_entries) {
+		retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+		if (retval)
+			goto error;
+	} else
+#endif
+	{
+		if (elf_update(e, ELF_C_WRITE) < 0) {
+			warnx("elf_update 4 failed");
+			goto error;
+		}
+	}
+
+	retval = 0;
+error:
+	(void)elf_end(e);
+
+	free(strsym);
+
+
+	return retval;
+}
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
new file mode 100644
index 000000000..6af062d1c
--- /dev/null
+++ b/tools/perf/util/genelf.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+#include <linux/math.h>
+
+/* genelf.c */
+int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+		  const void *code, int csize, void *debug, int nr_debug_entries,
+		  void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size);
+#ifdef HAVE_DWARF_SUPPORT
+/* genelf_debug.c */
+int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
+#endif
+
+#if   defined(__arm__)
+#define GEN_ELF_ARCH	EM_ARM
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH	EM_AARCH64
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH	EM_X86_64
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH	EM_386
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__powerpc64__)
+#define GEN_ELF_ARCH	EM_PPC64
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH	EM_PPC
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH	EM_SPARCV9
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH	EM_SPARC
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__s390x__)
+#define GEN_ELF_ARCH	EM_S390
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__riscv) && __riscv_xlen == 64
+#define GEN_ELF_ARCH	EM_RISCV
+#define GEN_ELF_CLASS	ELFCLASS64
+#else
+#error "unsupported architecture"
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define GEN_ELF_ENDIAN	ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr	elf64_newehdr
+#define elf_newphdr	elf64_newphdr
+#define elf_getshdr	elf64_getshdr
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Phdr	Elf64_Phdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Sym		Elf64_Sym
+#define ELF_ST_TYPE(a)	ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr	elf32_newehdr
+#define elf_newphdr	elf32_newphdr
+#define elf_getshdr	elf32_getshdr
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Phdr	Elf32_Phdr
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define ELF_ST_TYPE(a)	ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET round_up(sizeof(Elf_Ehdr) + sizeof(Elf_Phdr), 16)
+
+#endif
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
new file mode 100644
index 000000000..dd40683bd
--- /dev/null
+++ b/tools/perf/util/genelf_debug.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@google.com>
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE	(4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t  sword;
+typedef int16_t  shalf;
+typedef uint8_t  ubyte;
+typedef int8_t   sbyte;
+
+struct buffer_ext {
+	size_t cur_pos;
+	size_t max_sz;
+	void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+	size_t i;
+	warnx("DUMP for %s", msg);
+	for (i = 0 ; i < be->cur_pos; i++)
+		warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+	void *tmp;
+	size_t be_sz = be->max_sz;
+
+retry:
+	if ((be->cur_pos + sz) < be_sz) {
+		memcpy(be->data + be->cur_pos, addr, sz);
+		be->cur_pos += sz;
+		return 0;
+	}
+
+	if (!be_sz)
+		be_sz = BUFFER_EXT_DFL_SIZE;
+	else
+		be_sz <<= 1;
+
+	tmp = realloc(be->data, be_sz);
+	if (!tmp)
+		return -1;
+
+	be->data   = tmp;
+	be->max_sz = be_sz;
+
+	goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+	be->data = NULL;
+	be->cur_pos = 0;
+	be->max_sz = 0;
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+	return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+	return be->data;
+}
+
+struct debug_line_header {
+	// Not counting this field
+	uword total_length;
+	// version number (2 currently)
+	uhalf version;
+	// relative offset from next field to
+	// program statement
+	uword prolog_length;
+	ubyte minimum_instruction_length;
+	ubyte default_is_stmt;
+	// line_base - see DWARF 2 specs
+	sbyte line_base;
+	// line_range - see DWARF 2 specs
+	ubyte line_range;
+	// number of opcode + 1
+	ubyte opcode_base;
+	/* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+	/* follow the search directories index, zero terminated string
+	 * terminated by an empty string.
+	 */
+	/* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+	 * the directory index entry, 0 means current directory, then mtime
+	 * and filesize, last entry is followed by en empty string.
+	 */
+	/* follow the first program statement */
+} __packed;
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+	uword total_length;
+	uhalf version;
+	uword debug_abbrev_offset;
+	ubyte pointer_size;
+} __packed;
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+	.total_length = -1,
+	.version = 2,
+	.prolog_length = -1,
+	.minimum_instruction_length = 1,	/* could be better when min instruction size != 1 */
+	.default_is_stmt = 1,	/* we don't take care about basic block */
+	.line_base = -5,	/* sensible value for line base ... */
+	.line_range = -14,     /* ... and line range are guessed statically */
+	.opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+	0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+	[DW_LNS_advance_pc]   = 1,
+	[DW_LNS_advance_line] = 1,
+	[DW_LNS_set_file] =  1,
+	[DW_LNS_set_column] = 1,
+	[DW_LNS_fixed_advance_pc] = 1,
+	[DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+	.total_length = -1,
+	.version = 2,
+	.debug_abbrev_offset = 0,     /* we reuse the same abbrev entries for all comp unit */
+	.pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+	buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+	buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+				 unsigned long data)
+{
+	do {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (data)
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	} while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+	int more = 1;
+	int negative = data < 0;
+	int size = sizeof(long) * CHAR_BIT;
+	while (more) {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (negative)
+			data |= - (1 << (size - 7));
+		if ((data == 0 && !(cur & 0x40)) ||
+		    (data == -1l && (cur & 0x40)))
+			more = 0;
+		else
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	}
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+				 void *data, size_t data_len)
+{
+	buffer_ext_add(be, (char *)"", 1);
+
+	emit_unsigned_LEB128(be, data_len + 1);
+
+	buffer_ext_add(be, &opcode, 1);
+	buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+	buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext  *be,
+			       ubyte opcode, long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+				 unsigned long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+	emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext  *be, long delta_lineno)
+{
+	emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+	emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+	emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+				     const char *filename)
+{
+	buffer_ext_add(be, (void *)"", 1);
+
+	/* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+	emit_unsigned_LEB128(be, strlen(filename) + 5);
+	emit_opcode(be, DW_LNE_define_file);
+	emit_string(be, filename);
+	/* directory index 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* last modification date on file 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* filesize 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+				 void *address)
+{
+	emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+				unsigned int last_line,
+				unsigned long last_vma)
+{
+	unsigned int temp;
+	unsigned long delta_addr;
+
+	/*
+	 * delta from line_base
+	 */
+	temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+	if (temp >= default_debug_line_header.line_range)
+		return 0;
+
+	/*
+	 * delta of addresses
+	 */
+	delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+	/* This is not sufficient to ensure opcode will be in [0-256] but
+	 * sufficient to ensure when summing with the delta lineno we will
+	 * not overflow the unsigned long opcode */
+
+	if (delta_addr <= 256 / default_debug_line_header.line_range) {
+		unsigned long opcode = temp +
+			(delta_addr * default_debug_line_header.line_range) +
+			default_debug_line_header.opcode_base;
+
+		return opcode <= 255 ? opcode : 0;
+	}
+	return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+			     struct debug_entry *ent, size_t nr_entry,
+			     unsigned long code_addr)
+{
+	size_t i;
+
+	/*
+	 * Machine state at start of a statement program
+	 * address = 0
+	 * file    = 1
+	 * line    = 1
+	 * column  = 0
+	 * is_stmt = default_is_stmt as given in the debug_line_header
+	 * basic block = 0
+	 * end sequence = 0
+	 */
+
+	/* start state of the state machine we take care of */
+	unsigned long last_vma = 0;
+	char const  *cur_filename = NULL;
+	unsigned long cur_file_idx = 0;
+	int last_line = 1;
+
+	emit_lne_set_address(be, (void *)code_addr);
+
+	for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+		int need_copy = 0;
+		ubyte special_opcode;
+
+		/*
+		 * check if filename changed, if so add it
+		 */
+		if (!cur_filename || strcmp(cur_filename, ent->name)) {
+			emit_lne_define_filename(be, ent->name);
+			cur_filename = ent->name;
+			emit_set_file(be, ++cur_file_idx);
+			need_copy = 1;
+		}
+
+		special_opcode = get_special_opcode(ent, last_line, last_vma);
+		if (special_opcode != 0) {
+			last_line = ent->lineno;
+			last_vma  = ent->addr;
+			emit_opcode(be, special_opcode);
+		} else {
+			/*
+			 * lines differ, emit line delta
+			 */
+			if (last_line != ent->lineno) {
+				emit_advance_lineno(be, ent->lineno - last_line);
+				last_line = ent->lineno;
+				need_copy = 1;
+			}
+			/*
+			 * addresses differ, emit address delta
+			 */
+			if (last_vma != ent->addr) {
+				emit_advance_pc(be, ent->addr - last_vma);
+				last_vma = ent->addr;
+				need_copy = 1;
+			}
+			/*
+			 * add new row to matrix
+			 */
+			if (need_copy)
+				emit_opcode(be, DW_LNS_copy);
+		}
+	}
+}
+
+static void add_debug_line(struct buffer_ext *be,
+	struct debug_entry *ent, size_t nr_entry,
+	unsigned long code_addr)
+{
+	struct debug_line_header * dbg_header;
+	size_t old_size;
+
+	old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, (void *)&default_debug_line_header,
+		 sizeof(default_debug_line_header));
+
+	buffer_ext_add(be, &standard_opcode_length,  sizeof(standard_opcode_length));
+
+	// empty directory entry
+	buffer_ext_add(be, (void *)"", 1);
+
+	// empty filename directory
+	buffer_ext_add(be, (void *)"", 1);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, minimum_instruction_length);
+
+	emit_lineno_info(be, ent, nr_entry, code_addr);
+
+	emit_lne_end_of_sequence(be);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+        emit_unsigned_LEB128(be, 1);
+        emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+        emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+        emit_unsigned_LEB128(be, DW_AT_stmt_list);
+        emit_unsigned_LEB128(be, DW_FORM_data4);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+		     size_t offset_debug_line)
+{
+	struct compilation_unit_header *comp_unit_header;
+	size_t old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, &default_comp_unit_header,
+		       sizeof(default_comp_unit_header));
+
+	emit_unsigned_LEB128(be, 1);
+	emit_uword(be, offset_debug_line);
+
+	comp_unit_header = buffer_ext_addr(be) + old_size;
+	comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+		       void *debug, int nr_debug_entries,
+		       struct buffer_ext *dl,
+		       struct buffer_ext *da,
+		       struct buffer_ext *di)
+{
+	struct debug_entry *ent = debug;
+	int i;
+
+	for (i = 0; i < nr_debug_entries; i++) {
+		ent->addr = ent->addr - code_addr;
+		ent = debug_entry_next(ent);
+	}
+	add_compilation_unit(di, buffer_ext_size(dl));
+	add_debug_line(dl, debug, nr_debug_entries, GEN_ELF_TEXT_OFFSET);
+	add_debug_abbrev(da);
+	if (0) buffer_ext_dump(da, "abbrev");
+
+	return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Shdr *shdr;
+	struct buffer_ext dl, di, da;
+	int ret;
+
+	buffer_ext_init(&dl);
+	buffer_ext_init(&di);
+	buffer_ext_init(&da);
+
+	ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di);
+	if (ret)
+		return -1;
+	/*
+	 * setup .debug_line section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&dl);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&dl);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 52; /* .debug_line */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_info section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&di);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&di);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 64; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_abbrev section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&da);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&da);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 76; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * now we update the ELF image with all the sections
+	 */
+	if (elf_update(e, ELF_C_WRITE) < 0) {
+		warnx("elf_update debug failed");
+		return -1;
+	}
+	return 0;
+}
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
new file mode 100755
index 000000000..c3cef36d4
--- /dev/null
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+    char name[16];
+    char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+
+echo "#ifdef HAVE_LIBELF_SUPPORT"
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* full.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
+
+echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)"
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* audit*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
+echo "};"
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
new file mode 100644
index 000000000..e68935e9a
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+#include "get_current_dir_name.h"
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Android's 'bionic' library, for one, doesn't have this */
+
+char *get_current_dir_name(void)
+{
+	char pwd[PATH_MAX];
+
+	return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
+}
+#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/get_current_dir_name.h b/tools/perf/util/get_current_dir_name.h
new file mode 100644
index 000000000..69f7d5537
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.h
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef __PERF_GET_CURRENT_DIR_NAME_H
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+char *get_current_dir_name(void);
+#endif // HAVE_GET_CURRENT_DIR_NAME
+#endif // __PERF_GET_CURRENT_DIR_NAME_H
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
new file mode 100644
index 000000000..aeb09c288
--- /dev/null
+++ b/tools/perf/util/hashmap.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	entry->next = *pprev;
+	*pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	*pprev = entry->next;
+	entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx)
+{
+	map->hash_fn = hash_fn;
+	map->equal_fn = equal_fn;
+	map->ctx = ctx;
+
+	map->buckets = NULL;
+	map->cap = 0;
+	map->cap_bits = 0;
+	map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx)
+{
+	struct hashmap *map = malloc(sizeof(struct hashmap));
+
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+	hashmap__init(map, hash_fn, equal_fn, ctx);
+	return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+	struct hashmap_entry *cur, *tmp;
+	size_t bkt;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		free(cur);
+	}
+	free(map->buckets);
+	map->buckets = NULL;
+	map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+	if (IS_ERR_OR_NULL(map))
+		return;
+
+	hashmap__clear(map);
+	free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+	return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+	return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+	/* grow if empty or more than 75% filled */
+	return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+	struct hashmap_entry **new_buckets;
+	struct hashmap_entry *cur, *tmp;
+	size_t new_cap_bits, new_cap;
+	size_t h, bkt;
+
+	new_cap_bits = map->cap_bits + 1;
+	if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+		new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+	new_cap = 1UL << new_cap_bits;
+	new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+	if (!new_buckets)
+		return -ENOMEM;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+		hashmap_add_entry(&new_buckets[h], cur);
+	}
+
+	map->cap = new_cap;
+	map->cap_bits = new_cap_bits;
+	free(map->buckets);
+	map->buckets = new_buckets;
+
+	return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+			       const void *key, size_t hash,
+			       struct hashmap_entry ***pprev,
+			       struct hashmap_entry **entry)
+{
+	struct hashmap_entry *cur, **prev_ptr;
+
+	if (!map->buckets)
+		return false;
+
+	for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+	     cur;
+	     prev_ptr = &cur->next, cur = cur->next) {
+		if (map->equal_fn(cur->key, key, map->ctx)) {
+			if (pprev)
+				*pprev = prev_ptr;
+			*entry = cur;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+	int err;
+
+	if (old_key)
+		*old_key = NULL;
+	if (old_value)
+		*old_value = NULL;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (strategy != HASHMAP_APPEND &&
+	    hashmap_find_entry(map, key, h, NULL, &entry)) {
+		if (old_key)
+			*old_key = entry->key;
+		if (old_value)
+			*old_value = entry->value;
+
+		if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+			entry->key = key;
+			entry->value = value;
+			return 0;
+		} else if (strategy == HASHMAP_ADD) {
+			return -EEXIST;
+		}
+	}
+
+	if (strategy == HASHMAP_UPDATE)
+		return -ENOENT;
+
+	if (hashmap_needs_to_grow(map)) {
+		err = hashmap_grow(map);
+		if (err)
+			return err;
+		h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	}
+
+	entry = malloc(sizeof(struct hashmap_entry));
+	if (!entry)
+		return -ENOMEM;
+
+	entry->key = key;
+	entry->value = value;
+	hashmap_add_entry(&map->buckets[h], entry);
+	map->sz++;
+
+	return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, NULL, &entry))
+		return false;
+
+	if (value)
+		*value = entry->value;
+	return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value)
+{
+	struct hashmap_entry **pprev, *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+		return false;
+
+	if (old_key)
+		*old_key = entry->key;
+	if (old_value)
+		*old_value = entry->value;
+
+	hashmap_del_entry(pprev, entry);
+	free(entry);
+	map->sz--;
+
+	return true;
+}
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
new file mode 100644
index 000000000..10a4c4cd1
--- /dev/null
+++ b/tools/perf/util/hashmap.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <limits.h>
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+	/* shuffle bits and return requested number of upper bits */
+	if (bits == 0)
+		return 0;
+
+#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
+	/* LP64 case */
+	return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
+#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__)
+	return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits);
+#else
+#	error "Unsupported size_t size"
+#endif
+}
+
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+	size_t h = 0;
+
+	while (*s) {
+		h = h * 31 + *s;
+		s++;
+	}
+	return h;
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+	const void *key;
+	void *value;
+	struct hashmap_entry *next;
+};
+
+struct hashmap {
+	hashmap_hash_fn hash_fn;
+	hashmap_equal_fn equal_fn;
+	void *ctx;
+
+	struct hashmap_entry **buckets;
+	size_t cap;
+	size_t cap_bits;
+	size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) {	\
+	.hash_fn = (hash_fn),			\
+	.equal_fn = (equal_fn),			\
+	.ctx = (ctx),				\
+	.buckets = NULL,			\
+	.cap = 0,				\
+	.cap_bits = 0,				\
+	.sz = 0,				\
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ *   update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ *   nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ *   This turns hashmap into a multimap by allowing multiple values to be
+ *   associated with the same key. Most useful read API for such hashmap is
+ *   hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ *   used, it will return last inserted key/value entry (first in a bucket
+ *   chain).
+ */
+enum hashmap_insert_strategy {
+	HASHMAP_ADD,
+	HASHMAP_SET,
+	HASHMAP_UPDATE,
+	HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+			       const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+			       const void *key, void *value,
+			       const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_SET,
+			       old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+				  const void *key, void *value,
+				  const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+			       old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+				  const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt)				    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt)		    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt];				    \
+		     cur && ({tmp = cur->next; true; });		    \
+		     cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key)			    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
+	     cur;							    \
+	     cur = cur->next)						    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)		    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
+	     cur && ({ tmp = cur->next; true; });			    \
+	     cur = tmp)							    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
new file mode 100644
index 000000000..b2b029356
--- /dev/null
+++ b/tools/perf/util/header.c
@@ -0,0 +1,4472 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include "string2.h"
+#include <sys/param.h>
+#include <sys/types.h>
+#include <byteswap.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/zalloc.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <linux/time64.h>
+#include <dirent.h>
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/libbpf.h>
+#endif
+#include <perf/cpumap.h>
+
+#include "dso.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util/evsel_fprintf.h"
+#include "header.h"
+#include "memswap.h"
+#include "trace-event.h"
+#include "session.h"
+#include "symbol.h"
+#include "debug.h"
+#include "cpumap.h"
+#include "pmu.h"
+#include "vdso.h"
+#include "strbuf.h"
+#include "build-id.h"
+#include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
+#include "tool.h"
+#include "time-utils.h"
+#include "units.h"
+#include "util/util.h" // perf_exe()
+#include "cputopo.h"
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include "clockid.h"
+#include "pmu-hybrid.h"
+
+#include <linux/ctype.h>
+#include <internal/lib.h>
+
+/*
+ * magic2 = "PERFILE2"
+ * must be a numerical value to let the endianness
+ * determine the memory layout. That way we are able
+ * to detect endianness when reading the perf.data file
+ * back.
+ *
+ * we check for legacy (PERFFILE) format.
+ */
+static const char *__perf_magic1 = "PERFFILE";
+static const u64 __perf_magic2    = 0x32454c4946524550ULL;
+static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
+
+#define PERF_MAGIC	__perf_magic2
+
+const char perf_version_string[] = PERF_VERSION;
+
+struct perf_file_attr {
+	struct perf_event_attr	attr;
+	struct perf_file_section	ids;
+};
+
+void perf_header__set_feat(struct perf_header *header, int feat)
+{
+	set_bit(feat, header->adds_features);
+}
+
+void perf_header__clear_feat(struct perf_header *header, int feat)
+{
+	clear_bit(feat, header->adds_features);
+}
+
+bool perf_header__has_feat(const struct perf_header *header, int feat)
+{
+	return test_bit(feat, header->adds_features);
+}
+
+static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
+{
+	ssize_t ret = writen(ff->fd, buf, size);
+
+	if (ret != (ssize_t)size)
+		return ret < 0 ? (int)ret : -1;
+	return 0;
+}
+
+static int __do_write_buf(struct feat_fd *ff,  const void *buf, size_t size)
+{
+	/* struct perf_event_header::size is u16 */
+	const size_t max_size = 0xffff - sizeof(struct perf_event_header);
+	size_t new_size = ff->size;
+	void *addr;
+
+	if (size + ff->offset > max_size)
+		return -E2BIG;
+
+	while (size > (new_size - ff->offset))
+		new_size <<= 1;
+	new_size = min(max_size, new_size);
+
+	if (ff->size < new_size) {
+		addr = realloc(ff->buf, new_size);
+		if (!addr)
+			return -ENOMEM;
+		ff->buf = addr;
+		ff->size = new_size;
+	}
+
+	memcpy(ff->buf + ff->offset, buf, size);
+	ff->offset += size;
+
+	return 0;
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+int do_write(struct feat_fd *ff, const void *buf, size_t size)
+{
+	if (!ff->buf)
+		return __do_write_fd(ff, buf, size);
+	return __do_write_buf(ff, buf, size);
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
+{
+	u64 *p = (u64 *) set;
+	int i, ret;
+
+	ret = do_write(ff, &size, sizeof(size));
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+		ret = do_write(ff, p + i, sizeof(*p));
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+int write_padded(struct feat_fd *ff, const void *bf,
+		 size_t count, size_t count_aligned)
+{
+	static const char zero_buf[NAME_ALIGN];
+	int err = do_write(ff, bf, count);
+
+	if (!err)
+		err = do_write(ff, zero_buf, count_aligned - count);
+
+	return err;
+}
+
+#define string_size(str)						\
+	(PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32))
+
+/* Return: 0 if succeeded, -ERR if failed. */
+static int do_write_string(struct feat_fd *ff, const char *str)
+{
+	u32 len, olen;
+	int ret;
+
+	olen = strlen(str) + 1;
+	len = PERF_ALIGN(olen, NAME_ALIGN);
+
+	/* write len, incl. \0 */
+	ret = do_write(ff, &len, sizeof(len));
+	if (ret < 0)
+		return ret;
+
+	return write_padded(ff, str, olen, len);
+}
+
+static int __do_read_fd(struct feat_fd *ff, void *addr, ssize_t size)
+{
+	ssize_t ret = readn(ff->fd, addr, size);
+
+	if (ret != size)
+		return ret < 0 ? (int)ret : -1;
+	return 0;
+}
+
+static int __do_read_buf(struct feat_fd *ff, void *addr, ssize_t size)
+{
+	if (size > (ssize_t)ff->size - ff->offset)
+		return -1;
+
+	memcpy(addr, ff->buf + ff->offset, size);
+	ff->offset += size;
+
+	return 0;
+
+}
+
+static int __do_read(struct feat_fd *ff, void *addr, ssize_t size)
+{
+	if (!ff->buf)
+		return __do_read_fd(ff, addr, size);
+	return __do_read_buf(ff, addr, size);
+}
+
+static int do_read_u32(struct feat_fd *ff, u32 *addr)
+{
+	int ret;
+
+	ret = __do_read(ff, addr, sizeof(*addr));
+	if (ret)
+		return ret;
+
+	if (ff->ph->needs_swap)
+		*addr = bswap_32(*addr);
+	return 0;
+}
+
+static int do_read_u64(struct feat_fd *ff, u64 *addr)
+{
+	int ret;
+
+	ret = __do_read(ff, addr, sizeof(*addr));
+	if (ret)
+		return ret;
+
+	if (ff->ph->needs_swap)
+		*addr = bswap_64(*addr);
+	return 0;
+}
+
+static char *do_read_string(struct feat_fd *ff)
+{
+	u32 len;
+	char *buf;
+
+	if (do_read_u32(ff, &len))
+		return NULL;
+
+	buf = malloc(len);
+	if (!buf)
+		return NULL;
+
+	if (!__do_read(ff, buf, len)) {
+		/*
+		 * strings are padded by zeroes
+		 * thus the actual strlen of buf
+		 * may be less than len
+		 */
+		return buf;
+	}
+
+	free(buf);
+	return NULL;
+}
+
+/* Return: 0 if succeeded, -ERR if failed. */
+static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
+{
+	unsigned long *set;
+	u64 size, *p;
+	int i, ret;
+
+	ret = do_read_u64(ff, &size);
+	if (ret)
+		return ret;
+
+	set = bitmap_zalloc(size);
+	if (!set)
+		return -ENOMEM;
+
+	p = (u64 *) set;
+
+	for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+		ret = do_read_u64(ff, p + i);
+		if (ret < 0) {
+			free(set);
+			return ret;
+		}
+	}
+
+	*pset  = set;
+	*psize = size;
+	return 0;
+}
+
+static int write_tracing_data(struct feat_fd *ff,
+			      struct evlist *evlist)
+{
+	if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+		return -1;
+
+	return read_tracing_data(ff->fd, &evlist->core.entries);
+}
+
+static int write_build_id(struct feat_fd *ff,
+			  struct evlist *evlist __maybe_unused)
+{
+	struct perf_session *session;
+	int err;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	if (!perf_session__read_build_ids(session, true))
+		return -1;
+
+	if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+		return -1;
+
+	err = perf_session__write_buildid_table(session, ff);
+	if (err < 0) {
+		pr_debug("failed to write buildid table\n");
+		return err;
+	}
+	perf_session__cache_build_ids(session);
+
+	return 0;
+}
+
+static int write_hostname(struct feat_fd *ff,
+			  struct evlist *evlist __maybe_unused)
+{
+	struct utsname uts;
+	int ret;
+
+	ret = uname(&uts);
+	if (ret < 0)
+		return -1;
+
+	return do_write_string(ff, uts.nodename);
+}
+
+static int write_osrelease(struct feat_fd *ff,
+			   struct evlist *evlist __maybe_unused)
+{
+	struct utsname uts;
+	int ret;
+
+	ret = uname(&uts);
+	if (ret < 0)
+		return -1;
+
+	return do_write_string(ff, uts.release);
+}
+
+static int write_arch(struct feat_fd *ff,
+		      struct evlist *evlist __maybe_unused)
+{
+	struct utsname uts;
+	int ret;
+
+	ret = uname(&uts);
+	if (ret < 0)
+		return -1;
+
+	return do_write_string(ff, uts.machine);
+}
+
+static int write_version(struct feat_fd *ff,
+			 struct evlist *evlist __maybe_unused)
+{
+	return do_write_string(ff, perf_version_string);
+}
+
+static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc)
+{
+	FILE *file;
+	char *buf = NULL;
+	char *s, *p;
+	const char *search = cpuinfo_proc;
+	size_t len = 0;
+	int ret = -1;
+
+	if (!search)
+		return -1;
+
+	file = fopen("/proc/cpuinfo", "r");
+	if (!file)
+		return -1;
+
+	while (getline(&buf, &len, file) > 0) {
+		ret = strncmp(buf, search, strlen(search));
+		if (!ret)
+			break;
+	}
+
+	if (ret) {
+		ret = -1;
+		goto done;
+	}
+
+	s = buf;
+
+	p = strchr(buf, ':');
+	if (p && *(p+1) == ' ' && *(p+2))
+		s = p + 2;
+	p = strchr(s, '\n');
+	if (p)
+		*p = '\0';
+
+	/* squash extra space characters (branding string) */
+	p = s;
+	while (*p) {
+		if (isspace(*p)) {
+			char *r = p + 1;
+			char *q = skip_spaces(r);
+			*p = ' ';
+			if (q != (p+1))
+				while ((*r++ = *q++));
+		}
+		p++;
+	}
+	ret = do_write_string(ff, s);
+done:
+	free(buf);
+	fclose(file);
+	return ret;
+}
+
+static int write_cpudesc(struct feat_fd *ff,
+		       struct evlist *evlist __maybe_unused)
+{
+#if defined(__powerpc__) || defined(__hppa__) || defined(__sparc__)
+#define CPUINFO_PROC	{ "cpu", }
+#elif defined(__s390__)
+#define CPUINFO_PROC	{ "vendor_id", }
+#elif defined(__sh__)
+#define CPUINFO_PROC	{ "cpu type", }
+#elif defined(__alpha__) || defined(__mips__)
+#define CPUINFO_PROC	{ "cpu model", }
+#elif defined(__arm__)
+#define CPUINFO_PROC	{ "model name", "Processor", }
+#elif defined(__arc__)
+#define CPUINFO_PROC	{ "Processor", }
+#elif defined(__xtensa__)
+#define CPUINFO_PROC	{ "core ID", }
+#else
+#define CPUINFO_PROC	{ "model name", }
+#endif
+	const char *cpuinfo_procs[] = CPUINFO_PROC;
+#undef CPUINFO_PROC
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(cpuinfo_procs); i++) {
+		int ret;
+		ret = __write_cpudesc(ff, cpuinfo_procs[i]);
+		if (ret >= 0)
+			return ret;
+	}
+	return -1;
+}
+
+
+static int write_nrcpus(struct feat_fd *ff,
+			struct evlist *evlist __maybe_unused)
+{
+	long nr;
+	u32 nrc, nra;
+	int ret;
+
+	nrc = cpu__max_present_cpu().cpu;
+
+	nr = sysconf(_SC_NPROCESSORS_ONLN);
+	if (nr < 0)
+		return -1;
+
+	nra = (u32)(nr & UINT_MAX);
+
+	ret = do_write(ff, &nrc, sizeof(nrc));
+	if (ret < 0)
+		return ret;
+
+	return do_write(ff, &nra, sizeof(nra));
+}
+
+static int write_event_desc(struct feat_fd *ff,
+			    struct evlist *evlist)
+{
+	struct evsel *evsel;
+	u32 nre, nri, sz;
+	int ret;
+
+	nre = evlist->core.nr_entries;
+
+	/*
+	 * write number of events
+	 */
+	ret = do_write(ff, &nre, sizeof(nre));
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * size of perf_event_attr struct
+	 */
+	sz = (u32)sizeof(evsel->core.attr);
+	ret = do_write(ff, &sz, sizeof(sz));
+	if (ret < 0)
+		return ret;
+
+	evlist__for_each_entry(evlist, evsel) {
+		ret = do_write(ff, &evsel->core.attr, sz);
+		if (ret < 0)
+			return ret;
+		/*
+		 * write number of unique id per event
+		 * there is one id per instance of an event
+		 *
+		 * copy into an nri to be independent of the
+		 * type of ids,
+		 */
+		nri = evsel->core.ids;
+		ret = do_write(ff, &nri, sizeof(nri));
+		if (ret < 0)
+			return ret;
+
+		/*
+		 * write event string as passed on cmdline
+		 */
+		ret = do_write_string(ff, evsel__name(evsel));
+		if (ret < 0)
+			return ret;
+		/*
+		 * write unique ids for this event
+		 */
+		ret = do_write(ff, evsel->core.id, evsel->core.ids * sizeof(u64));
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int write_cmdline(struct feat_fd *ff,
+			 struct evlist *evlist __maybe_unused)
+{
+	char pbuf[MAXPATHLEN], *buf;
+	int i, ret, n;
+
+	/* actual path to perf binary */
+	buf = perf_exe(pbuf, MAXPATHLEN);
+
+	/* account for binary path */
+	n = perf_env.nr_cmdline + 1;
+
+	ret = do_write(ff, &n, sizeof(n));
+	if (ret < 0)
+		return ret;
+
+	ret = do_write_string(ff, buf);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0 ; i < perf_env.nr_cmdline; i++) {
+		ret = do_write_string(ff, perf_env.cmdline_argv[i]);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+
+static int write_cpu_topology(struct feat_fd *ff,
+			      struct evlist *evlist __maybe_unused)
+{
+	struct cpu_topology *tp;
+	u32 i;
+	int ret, j;
+
+	tp = cpu_topology__new();
+	if (!tp)
+		return -1;
+
+	ret = do_write(ff, &tp->package_cpus_lists, sizeof(tp->package_cpus_lists));
+	if (ret < 0)
+		goto done;
+
+	for (i = 0; i < tp->package_cpus_lists; i++) {
+		ret = do_write_string(ff, tp->package_cpus_list[i]);
+		if (ret < 0)
+			goto done;
+	}
+	ret = do_write(ff, &tp->core_cpus_lists, sizeof(tp->core_cpus_lists));
+	if (ret < 0)
+		goto done;
+
+	for (i = 0; i < tp->core_cpus_lists; i++) {
+		ret = do_write_string(ff, tp->core_cpus_list[i]);
+		if (ret < 0)
+			break;
+	}
+
+	ret = perf_env__read_cpu_topology_map(&perf_env);
+	if (ret < 0)
+		goto done;
+
+	for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+		ret = do_write(ff, &perf_env.cpu[j].core_id,
+			       sizeof(perf_env.cpu[j].core_id));
+		if (ret < 0)
+			return ret;
+		ret = do_write(ff, &perf_env.cpu[j].socket_id,
+			       sizeof(perf_env.cpu[j].socket_id));
+		if (ret < 0)
+			return ret;
+	}
+
+	if (!tp->die_cpus_lists)
+		goto done;
+
+	ret = do_write(ff, &tp->die_cpus_lists, sizeof(tp->die_cpus_lists));
+	if (ret < 0)
+		goto done;
+
+	for (i = 0; i < tp->die_cpus_lists; i++) {
+		ret = do_write_string(ff, tp->die_cpus_list[i]);
+		if (ret < 0)
+			goto done;
+	}
+
+	for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+		ret = do_write(ff, &perf_env.cpu[j].die_id,
+			       sizeof(perf_env.cpu[j].die_id));
+		if (ret < 0)
+			return ret;
+	}
+
+done:
+	cpu_topology__delete(tp);
+	return ret;
+}
+
+
+
+static int write_total_mem(struct feat_fd *ff,
+			   struct evlist *evlist __maybe_unused)
+{
+	char *buf = NULL;
+	FILE *fp;
+	size_t len = 0;
+	int ret = -1, n;
+	uint64_t mem;
+
+	fp = fopen("/proc/meminfo", "r");
+	if (!fp)
+		return -1;
+
+	while (getline(&buf, &len, fp) > 0) {
+		ret = strncmp(buf, "MemTotal:", 9);
+		if (!ret)
+			break;
+	}
+	if (!ret) {
+		n = sscanf(buf, "%*s %"PRIu64, &mem);
+		if (n == 1)
+			ret = do_write(ff, &mem, sizeof(mem));
+	} else
+		ret = -1;
+	free(buf);
+	fclose(fp);
+	return ret;
+}
+
+static int write_numa_topology(struct feat_fd *ff,
+			       struct evlist *evlist __maybe_unused)
+{
+	struct numa_topology *tp;
+	int ret = -1;
+	u32 i;
+
+	tp = numa_topology__new();
+	if (!tp)
+		return -ENOMEM;
+
+	ret = do_write(ff, &tp->nr, sizeof(u32));
+	if (ret < 0)
+		goto err;
+
+	for (i = 0; i < tp->nr; i++) {
+		struct numa_topology_node *n = &tp->nodes[i];
+
+		ret = do_write(ff, &n->node, sizeof(u32));
+		if (ret < 0)
+			goto err;
+
+		ret = do_write(ff, &n->mem_total, sizeof(u64));
+		if (ret)
+			goto err;
+
+		ret = do_write(ff, &n->mem_free, sizeof(u64));
+		if (ret)
+			goto err;
+
+		ret = do_write_string(ff, n->cpus);
+		if (ret < 0)
+			goto err;
+	}
+
+	ret = 0;
+
+err:
+	numa_topology__delete(tp);
+	return ret;
+}
+
+/*
+ * File format:
+ *
+ * struct pmu_mappings {
+ *	u32	pmu_num;
+ *	struct pmu_map {
+ *		u32	type;
+ *		char	name[];
+ *	}[pmu_num];
+ * };
+ */
+
+static int write_pmu_mappings(struct feat_fd *ff,
+			      struct evlist *evlist __maybe_unused)
+{
+	struct perf_pmu *pmu = NULL;
+	u32 pmu_num = 0;
+	int ret;
+
+	/*
+	 * Do a first pass to count number of pmu to avoid lseek so this
+	 * works in pipe mode as well.
+	 */
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+		pmu_num++;
+	}
+
+	ret = do_write(ff, &pmu_num, sizeof(pmu_num));
+	if (ret < 0)
+		return ret;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+
+		ret = do_write(ff, &pmu->type, sizeof(pmu->type));
+		if (ret < 0)
+			return ret;
+
+		ret = do_write_string(ff, pmu->name);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * File format:
+ *
+ * struct group_descs {
+ *	u32	nr_groups;
+ *	struct group_desc {
+ *		char	name[];
+ *		u32	leader_idx;
+ *		u32	nr_members;
+ *	}[nr_groups];
+ * };
+ */
+static int write_group_desc(struct feat_fd *ff,
+			    struct evlist *evlist)
+{
+	u32 nr_groups = evlist->core.nr_groups;
+	struct evsel *evsel;
+	int ret;
+
+	ret = do_write(ff, &nr_groups, sizeof(nr_groups));
+	if (ret < 0)
+		return ret;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
+			const char *name = evsel->group_name ?: "{anon_group}";
+			u32 leader_idx = evsel->core.idx;
+			u32 nr_members = evsel->core.nr_members;
+
+			ret = do_write_string(ff, name);
+			if (ret < 0)
+				return ret;
+
+			ret = do_write(ff, &leader_idx, sizeof(leader_idx));
+			if (ret < 0)
+				return ret;
+
+			ret = do_write(ff, &nr_members, sizeof(nr_members));
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Return the CPU id as a raw string.
+ *
+ * Each architecture should provide a more precise id string that
+ * can be use to match the architecture's "mapfile".
+ */
+char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+	return NULL;
+}
+
+/* Return zero when the cpuid from the mapfile.csv matches the
+ * cpuid string generated on this platform.
+ * Otherwise return non-zero.
+ */
+int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+	regex_t re;
+	regmatch_t pmatch[1];
+	int match;
+
+	if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
+		/* Warn unable to generate match particular string. */
+		pr_info("Invalid regular expression %s\n", mapcpuid);
+		return 1;
+	}
+
+	match = !regexec(&re, cpuid, 1, pmatch, 0);
+	regfree(&re);
+	if (match) {
+		size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+		/* Verify the entire string matched. */
+		if (match_len == strlen(cpuid))
+			return 0;
+	}
+	return 1;
+}
+
+/*
+ * default get_cpuid(): nothing gets recorded
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
+ */
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
+{
+	return ENOSYS; /* Not implemented */
+}
+
+static int write_cpuid(struct feat_fd *ff,
+		       struct evlist *evlist __maybe_unused)
+{
+	char buffer[64];
+	int ret;
+
+	ret = get_cpuid(buffer, sizeof(buffer));
+	if (ret)
+		return -1;
+
+	return do_write_string(ff, buffer);
+}
+
+static int write_branch_stack(struct feat_fd *ff __maybe_unused,
+			      struct evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static int write_auxtrace(struct feat_fd *ff,
+			  struct evlist *evlist __maybe_unused)
+{
+	struct perf_session *session;
+	int err;
+
+	if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+		return -1;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	err = auxtrace_index__write(ff->fd, &session->auxtrace_index);
+	if (err < 0)
+		pr_err("Failed to write auxtrace index\n");
+	return err;
+}
+
+static int write_clockid(struct feat_fd *ff,
+			 struct evlist *evlist __maybe_unused)
+{
+	return do_write(ff, &ff->ph->env.clock.clockid_res_ns,
+			sizeof(ff->ph->env.clock.clockid_res_ns));
+}
+
+static int write_clock_data(struct feat_fd *ff,
+			    struct evlist *evlist __maybe_unused)
+{
+	u64 *data64;
+	u32 data32;
+	int ret;
+
+	/* version */
+	data32 = 1;
+
+	ret = do_write(ff, &data32, sizeof(data32));
+	if (ret < 0)
+		return ret;
+
+	/* clockid */
+	data32 = ff->ph->env.clock.clockid;
+
+	ret = do_write(ff, &data32, sizeof(data32));
+	if (ret < 0)
+		return ret;
+
+	/* TOD ref time */
+	data64 = &ff->ph->env.clock.tod_ns;
+
+	ret = do_write(ff, data64, sizeof(*data64));
+	if (ret < 0)
+		return ret;
+
+	/* clockid ref time */
+	data64 = &ff->ph->env.clock.clockid_ns;
+
+	return do_write(ff, data64, sizeof(*data64));
+}
+
+static int write_hybrid_topology(struct feat_fd *ff,
+				 struct evlist *evlist __maybe_unused)
+{
+	struct hybrid_topology *tp;
+	int ret;
+	u32 i;
+
+	tp = hybrid_topology__new();
+	if (!tp)
+		return -ENOENT;
+
+	ret = do_write(ff, &tp->nr, sizeof(u32));
+	if (ret < 0)
+		goto err;
+
+	for (i = 0; i < tp->nr; i++) {
+		struct hybrid_topology_node *n = &tp->nodes[i];
+
+		ret = do_write_string(ff, n->pmu_name);
+		if (ret < 0)
+			goto err;
+
+		ret = do_write_string(ff, n->cpus);
+		if (ret < 0)
+			goto err;
+	}
+
+	ret = 0;
+
+err:
+	hybrid_topology__delete(tp);
+	return ret;
+}
+
+static int write_dir_format(struct feat_fd *ff,
+			    struct evlist *evlist __maybe_unused)
+{
+	struct perf_session *session;
+	struct perf_data *data;
+
+	session = container_of(ff->ph, struct perf_session, header);
+	data = session->data;
+
+	if (WARN_ON(!perf_data__is_dir(data)))
+		return -1;
+
+	return do_write(ff, &data->dir.version, sizeof(data->dir.version));
+}
+
+/*
+ * Check whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *    -1 -> error case
+ */
+int is_cpu_online(unsigned int cpu)
+{
+	char *str;
+	size_t strlen;
+	char buf[256];
+	int status = -1;
+	struct stat statbuf;
+
+	snprintf(buf, sizeof(buf),
+		"/sys/devices/system/cpu/cpu%d", cpu);
+	if (stat(buf, &statbuf) != 0)
+		return 0;
+
+	/*
+	 * Check if /sys/devices/system/cpu/cpux/online file
+	 * exists. Some cases cpu0 won't have online file since
+	 * it is not expected to be turned off generally.
+	 * In kernels without CONFIG_HOTPLUG_CPU, this
+	 * file won't exist
+	 */
+	snprintf(buf, sizeof(buf),
+		"/sys/devices/system/cpu/cpu%d/online", cpu);
+	if (stat(buf, &statbuf) != 0)
+		return 1;
+
+	/*
+	 * Read online file using sysfs__read_str.
+	 * If read or open fails, return -1.
+	 * If read succeeds, return value from file
+	 * which gets stored in "str"
+	 */
+	snprintf(buf, sizeof(buf),
+		"devices/system/cpu/cpu%d/online", cpu);
+
+	if (sysfs__read_str(buf, &str, &strlen) < 0)
+		return status;
+
+	status = atoi(str);
+
+	free(str);
+	return status;
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static int write_bpf_prog_info(struct feat_fd *ff,
+			       struct evlist *evlist __maybe_unused)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+	int ret;
+
+	down_read(&env->bpf_progs.lock);
+
+	ret = do_write(ff, &env->bpf_progs.infos_cnt,
+		       sizeof(env->bpf_progs.infos_cnt));
+	if (ret < 0)
+		goto out;
+
+	root = &env->bpf_progs.infos;
+	next = rb_first(root);
+	while (next) {
+		struct bpf_prog_info_node *node;
+		size_t len;
+
+		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+		next = rb_next(&node->rb_node);
+		len = sizeof(struct perf_bpil) +
+			node->info_linear->data_len;
+
+		/* before writing to file, translate address to offset */
+		bpil_addr_to_offs(node->info_linear);
+		ret = do_write(ff, node->info_linear, len);
+		/*
+		 * translate back to address even when do_write() fails,
+		 * so that this function never changes the data.
+		 */
+		bpil_offs_to_addr(node->info_linear);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	up_read(&env->bpf_progs.lock);
+	return ret;
+}
+
+static int write_bpf_btf(struct feat_fd *ff,
+			 struct evlist *evlist __maybe_unused)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+	int ret;
+
+	down_read(&env->bpf_progs.lock);
+
+	ret = do_write(ff, &env->bpf_progs.btfs_cnt,
+		       sizeof(env->bpf_progs.btfs_cnt));
+
+	if (ret < 0)
+		goto out;
+
+	root = &env->bpf_progs.btfs;
+	next = rb_first(root);
+	while (next) {
+		struct btf_node *node;
+
+		node = rb_entry(next, struct btf_node, rb_node);
+		next = rb_next(&node->rb_node);
+		ret = do_write(ff, &node->id,
+			       sizeof(u32) * 2 + node->data_size);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	up_read(&env->bpf_progs.lock);
+	return ret;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+	struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+	return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+	if (a->level != b->level)
+		return false;
+
+	if (a->line_size != b->line_size)
+		return false;
+
+	if (a->sets != b->sets)
+		return false;
+
+	if (a->ways != b->ways)
+		return false;
+
+	if (strcmp(a->type, b->type))
+		return false;
+
+	if (strcmp(a->size, b->size))
+		return false;
+
+	if (strcmp(a->map, b->map))
+		return false;
+
+	return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+	char path[PATH_MAX], file[PATH_MAX];
+	struct stat st;
+	size_t len;
+
+	scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+	scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+	if (stat(file, &st))
+		return 1;
+
+	scnprintf(file, PATH_MAX, "%s/level", path);
+	if (sysfs__read_int(file, (int *) &cache->level))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+	if (sysfs__read_int(file, (int *) &cache->line_size))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+	if (sysfs__read_int(file, (int *) &cache->sets))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+	if (sysfs__read_int(file, (int *) &cache->ways))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/type", path);
+	if (sysfs__read_str(file, &cache->type, &len))
+		return -1;
+
+	cache->type[len] = 0;
+	cache->type = strim(cache->type);
+
+	scnprintf(file, PATH_MAX, "%s/size", path);
+	if (sysfs__read_str(file, &cache->size, &len)) {
+		zfree(&cache->type);
+		return -1;
+	}
+
+	cache->size[len] = 0;
+	cache->size = strim(cache->size);
+
+	scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+	if (sysfs__read_str(file, &cache->map, &len)) {
+		zfree(&cache->size);
+		zfree(&cache->type);
+		return -1;
+	}
+
+	cache->map[len] = 0;
+	cache->map = strim(cache->map);
+	return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+	fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+#define MAX_CACHE_LVL 4
+
+static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
+{
+	u32 i, cnt = 0;
+	u32 nr, cpu;
+	u16 level;
+
+	nr = cpu__max_cpu().cpu;
+
+	for (cpu = 0; cpu < nr; cpu++) {
+		for (level = 0; level < MAX_CACHE_LVL; level++) {
+			struct cpu_cache_level c;
+			int err;
+
+			err = cpu_cache_level__read(&c, cpu, level);
+			if (err < 0)
+				return err;
+
+			if (err == 1)
+				break;
+
+			for (i = 0; i < cnt; i++) {
+				if (cpu_cache_level__cmp(&c, &caches[i]))
+					break;
+			}
+
+			if (i == cnt)
+				caches[cnt++] = c;
+			else
+				cpu_cache_level__free(&c);
+		}
+	}
+	*cntp = cnt;
+	return 0;
+}
+
+static int write_cache(struct feat_fd *ff,
+		       struct evlist *evlist __maybe_unused)
+{
+	u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL;
+	struct cpu_cache_level caches[max_caches];
+	u32 cnt = 0, i, version = 1;
+	int ret;
+
+	ret = build_caches(caches, &cnt);
+	if (ret)
+		goto out;
+
+	qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+	ret = do_write(ff, &version, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &cnt, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level *c = &caches[i];
+
+		#define _W(v)					\
+			ret = do_write(ff, &c->v, sizeof(u32));	\
+			if (ret < 0)				\
+				goto out;
+
+		_W(level)
+		_W(line_size)
+		_W(sets)
+		_W(ways)
+		#undef _W
+
+		#define _W(v)						\
+			ret = do_write_string(ff, (const char *) c->v);	\
+			if (ret < 0)					\
+				goto out;
+
+		_W(type)
+		_W(size)
+		_W(map)
+		#undef _W
+	}
+
+out:
+	for (i = 0; i < cnt; i++)
+		cpu_cache_level__free(&caches[i]);
+	return ret;
+}
+
+static int write_stat(struct feat_fd *ff __maybe_unused,
+		      struct evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static int write_sample_time(struct feat_fd *ff,
+			     struct evlist *evlist)
+{
+	int ret;
+
+	ret = do_write(ff, &evlist->first_sample_time,
+		       sizeof(evlist->first_sample_time));
+	if (ret < 0)
+		return ret;
+
+	return do_write(ff, &evlist->last_sample_time,
+			sizeof(evlist->last_sample_time));
+}
+
+
+static int memory_node__read(struct memory_node *n, unsigned long idx)
+{
+	unsigned int phys, size = 0;
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+
+#define for_each_memory(mem, dir)					\
+	while ((ent = readdir(dir)))					\
+		if (strcmp(ent->d_name, ".") &&				\
+		    strcmp(ent->d_name, "..") &&			\
+		    sscanf(ent->d_name, "memory%u", &mem) == 1)
+
+	scnprintf(path, PATH_MAX,
+		  "%s/devices/system/node/node%lu",
+		  sysfs__mountpoint(), idx);
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_warning("failed: can't open memory sysfs data\n");
+		return -1;
+	}
+
+	for_each_memory(phys, dir) {
+		size = max(phys, size);
+	}
+
+	size++;
+
+	n->set = bitmap_zalloc(size);
+	if (!n->set) {
+		closedir(dir);
+		return -ENOMEM;
+	}
+
+	n->node = idx;
+	n->size = size;
+
+	rewinddir(dir);
+
+	for_each_memory(phys, dir) {
+		set_bit(phys, n->set);
+	}
+
+	closedir(dir);
+	return 0;
+}
+
+static int memory_node__sort(const void *a, const void *b)
+{
+	const struct memory_node *na = a;
+	const struct memory_node *nb = b;
+
+	return na->node - nb->node;
+}
+
+static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
+{
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+	u64 cnt = 0;
+	int ret = 0;
+
+	scnprintf(path, PATH_MAX, "%s/devices/system/node/",
+		  sysfs__mountpoint());
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_debug2("%s: couldn't read %s, does this arch have topology information?\n",
+			  __func__, path);
+		return -1;
+	}
+
+	while (!ret && (ent = readdir(dir))) {
+		unsigned int idx;
+		int r;
+
+		if (!strcmp(ent->d_name, ".") ||
+		    !strcmp(ent->d_name, ".."))
+			continue;
+
+		r = sscanf(ent->d_name, "node%u", &idx);
+		if (r != 1)
+			continue;
+
+		if (WARN_ONCE(cnt >= size,
+			"failed to write MEM_TOPOLOGY, way too many nodes\n")) {
+			closedir(dir);
+			return -1;
+		}
+
+		ret = memory_node__read(&nodes[cnt++], idx);
+	}
+
+	*cntp = cnt;
+	closedir(dir);
+
+	if (!ret)
+		qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+
+	return ret;
+}
+
+#define MAX_MEMORY_NODES 2000
+
+/*
+ * The MEM_TOPOLOGY holds physical memory map for every
+ * node in system. The format of data is as follows:
+ *
+ *  0 - version          | for future changes
+ *  8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ * 16 - count            | number of nodes
+ *
+ * For each node we store map of physical indexes for
+ * each node:
+ *
+ * 32 - node id          | node index
+ * 40 - size             | size of bitmap
+ * 48 - bitmap           | bitmap of memory indexes that belongs to node
+ */
+static int write_mem_topology(struct feat_fd *ff __maybe_unused,
+			      struct evlist *evlist __maybe_unused)
+{
+	static struct memory_node nodes[MAX_MEMORY_NODES];
+	u64 bsize, version = 1, i, nr;
+	int ret;
+
+	ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
+			      (unsigned long long *) &bsize);
+	if (ret)
+		return ret;
+
+	ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
+	if (ret)
+		return ret;
+
+	ret = do_write(ff, &version, sizeof(version));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &bsize, sizeof(bsize));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &nr, sizeof(nr));
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < nr; i++) {
+		struct memory_node *n = &nodes[i];
+
+		#define _W(v)						\
+			ret = do_write(ff, &n->v, sizeof(n->v));	\
+			if (ret < 0)					\
+				goto out;
+
+		_W(node)
+		_W(size)
+
+		#undef _W
+
+		ret = do_write_bitmap(ff, n->set, n->size);
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int write_compressed(struct feat_fd *ff __maybe_unused,
+			    struct evlist *evlist __maybe_unused)
+{
+	int ret;
+
+	ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver));
+	if (ret)
+		return ret;
+
+	ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type));
+	if (ret)
+		return ret;
+
+	ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level));
+	if (ret)
+		return ret;
+
+	ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio));
+	if (ret)
+		return ret;
+
+	return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
+}
+
+static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
+			    bool write_pmu)
+{
+	struct perf_pmu_caps *caps = NULL;
+	int ret;
+
+	ret = do_write(ff, &pmu->nr_caps, sizeof(pmu->nr_caps));
+	if (ret < 0)
+		return ret;
+
+	list_for_each_entry(caps, &pmu->caps, list) {
+		ret = do_write_string(ff, caps->name);
+		if (ret < 0)
+			return ret;
+
+		ret = do_write_string(ff, caps->value);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (write_pmu) {
+		ret = do_write_string(ff, pmu->name);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int write_cpu_pmu_caps(struct feat_fd *ff,
+			      struct evlist *evlist __maybe_unused)
+{
+	struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+	int ret;
+
+	if (!cpu_pmu)
+		return -ENOENT;
+
+	ret = perf_pmu__caps_parse(cpu_pmu);
+	if (ret < 0)
+		return ret;
+
+	return __write_pmu_caps(ff, cpu_pmu, false);
+}
+
+static int write_pmu_caps(struct feat_fd *ff,
+			  struct evlist *evlist __maybe_unused)
+{
+	struct perf_pmu *pmu = NULL;
+	int nr_pmu = 0;
+	int ret;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name || !strcmp(pmu->name, "cpu") ||
+		    perf_pmu__caps_parse(pmu) <= 0)
+			continue;
+		nr_pmu++;
+	}
+
+	ret = do_write(ff, &nr_pmu, sizeof(nr_pmu));
+	if (ret < 0)
+		return ret;
+
+	if (!nr_pmu)
+		return 0;
+
+	/*
+	 * Write hybrid pmu caps first to maintain compatibility with
+	 * older perf tool.
+	 */
+	pmu = NULL;
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		ret = __write_pmu_caps(ff, pmu, true);
+		if (ret < 0)
+			return ret;
+	}
+
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name || !strcmp(pmu->name, "cpu") ||
+		    !pmu->nr_caps || perf_pmu__is_hybrid(pmu->name))
+			continue;
+
+		ret = __write_pmu_caps(ff, pmu, true);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static void print_hostname(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
+}
+
+static void print_osrelease(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# os release : %s\n", ff->ph->env.os_release);
+}
+
+static void print_arch(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# arch : %s\n", ff->ph->env.arch);
+}
+
+static void print_cpudesc(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# cpudesc : %s\n", ff->ph->env.cpu_desc);
+}
+
+static void print_nrcpus(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# nrcpus online : %u\n", ff->ph->env.nr_cpus_online);
+	fprintf(fp, "# nrcpus avail : %u\n", ff->ph->env.nr_cpus_avail);
+}
+
+static void print_version(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# perf version : %s\n", ff->ph->env.version);
+}
+
+static void print_cmdline(struct feat_fd *ff, FILE *fp)
+{
+	int nr, i;
+
+	nr = ff->ph->env.nr_cmdline;
+
+	fprintf(fp, "# cmdline : ");
+
+	for (i = 0; i < nr; i++) {
+		char *argv_i = strdup(ff->ph->env.cmdline_argv[i]);
+		if (!argv_i) {
+			fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]);
+		} else {
+			char *mem = argv_i;
+			do {
+				char *quote = strchr(argv_i, '\'');
+				if (!quote)
+					break;
+				*quote++ = '\0';
+				fprintf(fp, "%s\\\'", argv_i);
+				argv_i = quote;
+			} while (1);
+			fprintf(fp, "%s ", argv_i);
+			free(mem);
+		}
+	}
+	fputc('\n', fp);
+}
+
+static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_header *ph = ff->ph;
+	int cpu_nr = ph->env.nr_cpus_avail;
+	int nr, i;
+	char *str;
+
+	nr = ph->env.nr_sibling_cores;
+	str = ph->env.sibling_cores;
+
+	for (i = 0; i < nr; i++) {
+		fprintf(fp, "# sibling sockets : %s\n", str);
+		str += strlen(str) + 1;
+	}
+
+	if (ph->env.nr_sibling_dies) {
+		nr = ph->env.nr_sibling_dies;
+		str = ph->env.sibling_dies;
+
+		for (i = 0; i < nr; i++) {
+			fprintf(fp, "# sibling dies    : %s\n", str);
+			str += strlen(str) + 1;
+		}
+	}
+
+	nr = ph->env.nr_sibling_threads;
+	str = ph->env.sibling_threads;
+
+	for (i = 0; i < nr; i++) {
+		fprintf(fp, "# sibling threads : %s\n", str);
+		str += strlen(str) + 1;
+	}
+
+	if (ph->env.nr_sibling_dies) {
+		if (ph->env.cpu != NULL) {
+			for (i = 0; i < cpu_nr; i++)
+				fprintf(fp, "# CPU %d: Core ID %d, "
+					    "Die ID %d, Socket ID %d\n",
+					    i, ph->env.cpu[i].core_id,
+					    ph->env.cpu[i].die_id,
+					    ph->env.cpu[i].socket_id);
+		} else
+			fprintf(fp, "# Core ID, Die ID and Socket ID "
+				    "information is not available\n");
+	} else {
+		if (ph->env.cpu != NULL) {
+			for (i = 0; i < cpu_nr; i++)
+				fprintf(fp, "# CPU %d: Core ID %d, "
+					    "Socket ID %d\n",
+					    i, ph->env.cpu[i].core_id,
+					    ph->env.cpu[i].socket_id);
+		} else
+			fprintf(fp, "# Core ID and Socket ID "
+				    "information is not available\n");
+	}
+}
+
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+		ff->ph->env.clock.clockid_res_ns * 1000);
+}
+
+static void print_clock_data(struct feat_fd *ff, FILE *fp)
+{
+	struct timespec clockid_ns;
+	char tstr[64], date[64];
+	struct timeval tod_ns;
+	clockid_t clockid;
+	struct tm ltime;
+	u64 ref;
+
+	if (!ff->ph->env.clock.enabled) {
+		fprintf(fp, "# reference time disabled\n");
+		return;
+	}
+
+	/* Compute TOD time. */
+	ref = ff->ph->env.clock.tod_ns;
+	tod_ns.tv_sec = ref / NSEC_PER_SEC;
+	ref -= tod_ns.tv_sec * NSEC_PER_SEC;
+	tod_ns.tv_usec = ref / NSEC_PER_USEC;
+
+	/* Compute clockid time. */
+	ref = ff->ph->env.clock.clockid_ns;
+	clockid_ns.tv_sec = ref / NSEC_PER_SEC;
+	ref -= clockid_ns.tv_sec * NSEC_PER_SEC;
+	clockid_ns.tv_nsec = ref;
+
+	clockid = ff->ph->env.clock.clockid;
+
+	if (localtime_r(&tod_ns.tv_sec, &ltime) == NULL)
+		snprintf(tstr, sizeof(tstr), "<error>");
+	else {
+		strftime(date, sizeof(date), "%F %T", &ltime);
+		scnprintf(tstr, sizeof(tstr), "%s.%06d",
+			  date, (int) tod_ns.tv_usec);
+	}
+
+	fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
+	fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
+		    tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec,
+		    (long) clockid_ns.tv_sec, clockid_ns.tv_nsec,
+		    clockid_name(clockid));
+}
+
+static void print_hybrid_topology(struct feat_fd *ff, FILE *fp)
+{
+	int i;
+	struct hybrid_node *n;
+
+	fprintf(fp, "# hybrid cpu system:\n");
+	for (i = 0; i < ff->ph->env.nr_hybrid_nodes; i++) {
+		n = &ff->ph->env.hybrid_nodes[i];
+		fprintf(fp, "# %s cpu list : %s\n", n->pmu_name, n->cpus);
+	}
+}
+
+static void print_dir_format(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_session *session;
+	struct perf_data *data;
+
+	session = container_of(ff->ph, struct perf_session, header);
+	data = session->data;
+
+	fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version);
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+
+	down_read(&env->bpf_progs.lock);
+
+	root = &env->bpf_progs.infos;
+	next = rb_first(root);
+
+	while (next) {
+		struct bpf_prog_info_node *node;
+
+		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+		next = rb_next(&node->rb_node);
+
+		__bpf_event__print_bpf_prog_info(&node->info_linear->info,
+						 env, fp);
+	}
+
+	up_read(&env->bpf_progs.lock);
+}
+
+static void print_bpf_btf(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct rb_root *root;
+	struct rb_node *next;
+
+	down_read(&env->bpf_progs.lock);
+
+	root = &env->bpf_progs.btfs;
+	next = rb_first(root);
+
+	while (next) {
+		struct btf_node *node;
+
+		node = rb_entry(next, struct btf_node, rb_node);
+		next = rb_next(&node->rb_node);
+		fprintf(fp, "# btf info of id %u\n", node->id);
+	}
+
+	up_read(&env->bpf_progs.lock);
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static void free_event_desc(struct evsel *events)
+{
+	struct evsel *evsel;
+
+	if (!events)
+		return;
+
+	for (evsel = events; evsel->core.attr.size; evsel++) {
+		zfree(&evsel->name);
+		zfree(&evsel->core.id);
+	}
+
+	free(events);
+}
+
+static bool perf_attr_check(struct perf_event_attr *attr)
+{
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) {
+		pr_warning("Reserved bits are set unexpectedly. "
+			   "Please update perf tool.\n");
+		return false;
+	}
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) {
+		pr_warning("Unknown sample type (0x%llx) is detected. "
+			   "Please update perf tool.\n",
+			   attr->sample_type);
+		return false;
+	}
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1)) {
+		pr_warning("Unknown read format (0x%llx) is detected. "
+			   "Please update perf tool.\n",
+			   attr->read_format);
+		return false;
+	}
+
+	if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+	    (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) {
+		pr_warning("Unknown branch sample type (0x%llx) is detected. "
+			   "Please update perf tool.\n",
+			   attr->branch_sample_type);
+
+		return false;
+	}
+
+	return true;
+}
+
+static struct evsel *read_event_desc(struct feat_fd *ff)
+{
+	struct evsel *evsel, *events = NULL;
+	u64 *id;
+	void *buf = NULL;
+	u32 nre, sz, nr, i, j;
+	size_t msz;
+
+	/* number of events */
+	if (do_read_u32(ff, &nre))
+		goto error;
+
+	if (do_read_u32(ff, &sz))
+		goto error;
+
+	/* buffer to hold on file attr struct */
+	buf = malloc(sz);
+	if (!buf)
+		goto error;
+
+	/* the last event terminates with evsel->core.attr.size == 0: */
+	events = calloc(nre + 1, sizeof(*events));
+	if (!events)
+		goto error;
+
+	msz = sizeof(evsel->core.attr);
+	if (sz < msz)
+		msz = sz;
+
+	for (i = 0, evsel = events; i < nre; evsel++, i++) {
+		evsel->core.idx = i;
+
+		/*
+		 * must read entire on-file attr struct to
+		 * sync up with layout.
+		 */
+		if (__do_read(ff, buf, sz))
+			goto error;
+
+		if (ff->ph->needs_swap)
+			perf_event__attr_swap(buf);
+
+		memcpy(&evsel->core.attr, buf, msz);
+
+		if (!perf_attr_check(&evsel->core.attr))
+			goto error;
+
+		if (do_read_u32(ff, &nr))
+			goto error;
+
+		if (ff->ph->needs_swap)
+			evsel->needs_swap = true;
+
+		evsel->name = do_read_string(ff);
+		if (!evsel->name)
+			goto error;
+
+		if (!nr)
+			continue;
+
+		id = calloc(nr, sizeof(*id));
+		if (!id)
+			goto error;
+		evsel->core.ids = nr;
+		evsel->core.id = id;
+
+		for (j = 0 ; j < nr; j++) {
+			if (do_read_u64(ff, id))
+				goto error;
+			id++;
+		}
+	}
+out:
+	free(buf);
+	return events;
+error:
+	free_event_desc(events);
+	events = NULL;
+	goto out;
+}
+
+static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
+				void *priv __maybe_unused)
+{
+	return fprintf(fp, ", %s = %s", name, val);
+}
+
+static void print_event_desc(struct feat_fd *ff, FILE *fp)
+{
+	struct evsel *evsel, *events;
+	u32 j;
+	u64 *id;
+
+	if (ff->events)
+		events = ff->events;
+	else
+		events = read_event_desc(ff);
+
+	if (!events) {
+		fprintf(fp, "# event desc: not available or unable to read\n");
+		return;
+	}
+
+	for (evsel = events; evsel->core.attr.size; evsel++) {
+		fprintf(fp, "# event : name = %s, ", evsel->name);
+
+		if (evsel->core.ids) {
+			fprintf(fp, ", id = {");
+			for (j = 0, id = evsel->core.id; j < evsel->core.ids; j++, id++) {
+				if (j)
+					fputc(',', fp);
+				fprintf(fp, " %"PRIu64, *id);
+			}
+			fprintf(fp, " }");
+		}
+
+		perf_event_attr__fprintf(fp, &evsel->core.attr, __desc_attr__fprintf, NULL);
+
+		fputc('\n', fp);
+	}
+
+	free_event_desc(events);
+	ff->events = NULL;
+}
+
+static void print_total_mem(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# total memory : %llu kB\n", ff->ph->env.total_mem);
+}
+
+static void print_numa_topology(struct feat_fd *ff, FILE *fp)
+{
+	int i;
+	struct numa_node *n;
+
+	for (i = 0; i < ff->ph->env.nr_numa_nodes; i++) {
+		n = &ff->ph->env.numa_nodes[i];
+
+		fprintf(fp, "# node%u meminfo  : total = %"PRIu64" kB,"
+			    " free = %"PRIu64" kB\n",
+			n->node, n->mem_total, n->mem_free);
+
+		fprintf(fp, "# node%u cpu list : ", n->node);
+		cpu_map__fprintf(n->map, fp);
+	}
+}
+
+static void print_cpuid(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# cpuid : %s\n", ff->ph->env.cpuid);
+}
+
+static void print_branch_stack(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains samples with branch stack\n");
+}
+
+static void print_auxtrace(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
+}
+
+static void print_stat(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains stat data\n");
+}
+
+static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
+{
+	int i;
+
+	fprintf(fp, "# CPU cache info:\n");
+	for (i = 0; i < ff->ph->env.caches_cnt; i++) {
+		fprintf(fp, "#  ");
+		cpu_cache_level__fprintf(fp, &ff->ph->env.caches[i]);
+	}
+}
+
+static void print_compressed(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
+		ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown",
+		ff->ph->env.comp_level, ff->ph->env.comp_ratio);
+}
+
+static void __print_pmu_caps(FILE *fp, int nr_caps, char **caps, char *pmu_name)
+{
+	const char *delimiter = "";
+	int i;
+
+	if (!nr_caps) {
+		fprintf(fp, "# %s pmu capabilities: not available\n", pmu_name);
+		return;
+	}
+
+	fprintf(fp, "# %s pmu capabilities: ", pmu_name);
+	for (i = 0; i < nr_caps; i++) {
+		fprintf(fp, "%s%s", delimiter, caps[i]);
+		delimiter = ", ";
+	}
+
+	fprintf(fp, "\n");
+}
+
+static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+	__print_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps,
+			 ff->ph->env.cpu_pmu_caps, (char *)"cpu");
+}
+
+static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+	struct pmu_caps *pmu_caps;
+
+	for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) {
+		pmu_caps = &ff->ph->env.pmu_caps[i];
+		__print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
+				 pmu_caps->pmu_name);
+	}
+}
+
+static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
+{
+	const char *delimiter = "# pmu mappings: ";
+	char *str, *tmp;
+	u32 pmu_num;
+	u32 type;
+
+	pmu_num = ff->ph->env.nr_pmu_mappings;
+	if (!pmu_num) {
+		fprintf(fp, "# pmu mappings: not available\n");
+		return;
+	}
+
+	str = ff->ph->env.pmu_mappings;
+
+	while (pmu_num) {
+		type = strtoul(str, &tmp, 0);
+		if (*tmp != ':')
+			goto error;
+
+		str = tmp + 1;
+		fprintf(fp, "%s%s = %" PRIu32, delimiter, str, type);
+
+		delimiter = ", ";
+		str += strlen(str) + 1;
+		pmu_num--;
+	}
+
+	fprintf(fp, "\n");
+
+	if (!pmu_num)
+		return;
+error:
+	fprintf(fp, "# pmu mappings: unable to read\n");
+}
+
+static void print_group_desc(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_session *session;
+	struct evsel *evsel;
+	u32 nr = 0;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
+			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel));
+
+			nr = evsel->core.nr_members - 1;
+		} else if (nr) {
+			fprintf(fp, ",%s", evsel__name(evsel));
+
+			if (--nr == 0)
+				fprintf(fp, "}\n");
+		}
+	}
+}
+
+static void print_sample_time(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_session *session;
+	char time_buf[32];
+	double d;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	timestamp__scnprintf_usec(session->evlist->first_sample_time,
+				  time_buf, sizeof(time_buf));
+	fprintf(fp, "# time of first sample : %s\n", time_buf);
+
+	timestamp__scnprintf_usec(session->evlist->last_sample_time,
+				  time_buf, sizeof(time_buf));
+	fprintf(fp, "# time of last sample : %s\n", time_buf);
+
+	d = (double)(session->evlist->last_sample_time -
+		session->evlist->first_sample_time) / NSEC_PER_MSEC;
+
+	fprintf(fp, "# sample duration : %10.3f ms\n", d);
+}
+
+static void memory_node__fprintf(struct memory_node *n,
+				 unsigned long long bsize, FILE *fp)
+{
+	char buf_map[100], buf_size[50];
+	unsigned long long size;
+
+	size = bsize * bitmap_weight(n->set, n->size);
+	unit_number__scnprintf(buf_size, 50, size);
+
+	bitmap_scnprintf(n->set, n->size, buf_map, 100);
+	fprintf(fp, "#  %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
+}
+
+static void print_mem_topology(struct feat_fd *ff, FILE *fp)
+{
+	struct memory_node *nodes;
+	int i, nr;
+
+	nodes = ff->ph->env.memory_nodes;
+	nr    = ff->ph->env.nr_memory_nodes;
+
+	fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
+		nr, ff->ph->env.memory_bsize);
+
+	for (i = 0; i < nr; i++) {
+		memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+	}
+}
+
+static int __event_process_build_id(struct perf_record_header_build_id *bev,
+				    char *filename,
+				    struct perf_session *session)
+{
+	int err = -1;
+	struct machine *machine;
+	u16 cpumode;
+	struct dso *dso;
+	enum dso_space_type dso_space;
+
+	machine = perf_session__findnew_machine(session, bev->pid);
+	if (!machine)
+		goto out;
+
+	cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	switch (cpumode) {
+	case PERF_RECORD_MISC_KERNEL:
+		dso_space = DSO_SPACE__KERNEL;
+		break;
+	case PERF_RECORD_MISC_GUEST_KERNEL:
+		dso_space = DSO_SPACE__KERNEL_GUEST;
+		break;
+	case PERF_RECORD_MISC_USER:
+	case PERF_RECORD_MISC_GUEST_USER:
+		dso_space = DSO_SPACE__USER;
+		break;
+	default:
+		goto out;
+	}
+
+	dso = machine__findnew_dso(machine, filename);
+	if (dso != NULL) {
+		char sbuild_id[SBUILD_ID_SIZE];
+		struct build_id bid;
+		size_t size = BUILD_ID_SIZE;
+
+		if (bev->header.misc & PERF_RECORD_MISC_BUILD_ID_SIZE)
+			size = bev->size;
+
+		build_id__init(&bid, bev->data, size);
+		dso__set_build_id(dso, &bid);
+		dso->header_build_id = 1;
+
+		if (dso_space != DSO_SPACE__USER) {
+			struct kmod_path m = { .name = NULL, };
+
+			if (!kmod_path__parse_name(&m, filename) && m.kmod)
+				dso__set_module_info(dso, &m, machine);
+
+			dso->kernel = dso_space;
+			free(m.name);
+		}
+
+		build_id__sprintf(&dso->bid, sbuild_id);
+		pr_debug("build id event received for %s: %s [%zu]\n",
+			 dso->long_name, sbuild_id, size);
+		dso__put(dso);
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+						 int input, u64 offset, u64 size)
+{
+	struct perf_session *session = container_of(header, struct perf_session, header);
+	struct {
+		struct perf_event_header   header;
+		u8			   build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+		char			   filename[0];
+	} old_bev;
+	struct perf_record_header_build_id bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size;
+
+	while (offset < limit) {
+		ssize_t len;
+
+		if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+			return -1;
+
+		if (header->needs_swap)
+			perf_event_header__bswap(&old_bev.header);
+
+		len = old_bev.header.size - sizeof(old_bev);
+		if (readn(input, filename, len) != len)
+			return -1;
+
+		bev.header = old_bev.header;
+
+		/*
+		 * As the pid is the missing value, we need to fill
+		 * it properly. The header.misc value give us nice hint.
+		 */
+		bev.pid	= HOST_KERNEL_ID;
+		if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+		    bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+			bev.pid	= DEFAULT_GUEST_KERNEL_ID;
+
+		memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+		__event_process_build_id(&bev, filename, session);
+
+		offset += bev.header.size;
+	}
+
+	return 0;
+}
+
+static int perf_header__read_build_ids(struct perf_header *header,
+				       int input, u64 offset, u64 size)
+{
+	struct perf_session *session = container_of(header, struct perf_session, header);
+	struct perf_record_header_build_id bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size, orig_offset = offset;
+	int err = -1;
+
+	while (offset < limit) {
+		ssize_t len;
+
+		if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
+			goto out;
+
+		if (header->needs_swap)
+			perf_event_header__bswap(&bev.header);
+
+		len = bev.header.size - sizeof(bev);
+		if (readn(input, filename, len) != len)
+			goto out;
+		/*
+		 * The a1645ce1 changeset:
+		 *
+		 * "perf: 'perf kvm' tool for monitoring guest performance from host"
+		 *
+		 * Added a field to struct perf_record_header_build_id that broke the file
+		 * format.
+		 *
+		 * Since the kernel build-id is the first entry, process the
+		 * table using the old format if the well known
+		 * '[kernel.kallsyms]' string for the kernel build-id has the
+		 * first 4 characters chopped off (where the pid_t sits).
+		 */
+		if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+			if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+				return -1;
+			return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+		}
+
+		__event_process_build_id(&bev, filename, session);
+
+		offset += bev.header.size;
+	}
+	err = 0;
+out:
+	return err;
+}
+
+/* Macro for features that simply need to read and store a string. */
+#define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \
+static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \
+{\
+	free(ff->ph->env.__feat_env);		     \
+	ff->ph->env.__feat_env = do_read_string(ff); \
+	return ff->ph->env.__feat_env ? 0 : -ENOMEM; \
+}
+
+FEAT_PROCESS_STR_FUN(hostname, hostname);
+FEAT_PROCESS_STR_FUN(osrelease, os_release);
+FEAT_PROCESS_STR_FUN(version, version);
+FEAT_PROCESS_STR_FUN(arch, arch);
+FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc);
+FEAT_PROCESS_STR_FUN(cpuid, cpuid);
+
+static int process_tracing_data(struct feat_fd *ff, void *data)
+{
+	ssize_t ret = trace_report(ff->fd, data, false);
+
+	return ret < 0 ? -1 : 0;
+}
+
+static int process_build_id(struct feat_fd *ff, void *data __maybe_unused)
+{
+	if (perf_header__read_build_ids(ff->ph, ff->fd, ff->offset, ff->size))
+		pr_debug("Failed to read buildids, continuing...\n");
+	return 0;
+}
+
+static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused)
+{
+	int ret;
+	u32 nr_cpus_avail, nr_cpus_online;
+
+	ret = do_read_u32(ff, &nr_cpus_avail);
+	if (ret)
+		return ret;
+
+	ret = do_read_u32(ff, &nr_cpus_online);
+	if (ret)
+		return ret;
+	ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail;
+	ff->ph->env.nr_cpus_online = (int)nr_cpus_online;
+	return 0;
+}
+
+static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused)
+{
+	u64 total_mem;
+	int ret;
+
+	ret = do_read_u64(ff, &total_mem);
+	if (ret)
+		return -1;
+	ff->ph->env.total_mem = (unsigned long long)total_mem;
+	return 0;
+}
+
+static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.idx == idx)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+static void evlist__set_event_name(struct evlist *evlist, struct evsel *event)
+{
+	struct evsel *evsel;
+
+	if (!event->name)
+		return;
+
+	evsel = evlist__find_by_index(evlist, event->core.idx);
+	if (!evsel)
+		return;
+
+	if (evsel->name)
+		return;
+
+	evsel->name = strdup(event->name);
+}
+
+static int
+process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_session *session;
+	struct evsel *evsel, *events = read_event_desc(ff);
+
+	if (!events)
+		return 0;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	if (session->data->is_pipe) {
+		/* Save events for reading later by print_event_desc,
+		 * since they can't be read again in pipe mode. */
+		ff->events = events;
+	}
+
+	for (evsel = events; evsel->core.attr.size; evsel++)
+		evlist__set_event_name(session->evlist, evsel);
+
+	if (!session->data->is_pipe)
+		free_event_desc(events);
+
+	return 0;
+}
+
+static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused)
+{
+	char *str, *cmdline = NULL, **argv = NULL;
+	u32 nr, i, len = 0;
+
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	ff->ph->env.nr_cmdline = nr;
+
+	cmdline = zalloc(ff->size + nr + 1);
+	if (!cmdline)
+		return -1;
+
+	argv = zalloc(sizeof(char *) * (nr + 1));
+	if (!argv)
+		goto error;
+
+	for (i = 0; i < nr; i++) {
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		argv[i] = cmdline + len;
+		memcpy(argv[i], str, strlen(str) + 1);
+		len += strlen(str) + 1;
+		free(str);
+	}
+	ff->ph->env.cmdline = cmdline;
+	ff->ph->env.cmdline_argv = (const char **) argv;
+	return 0;
+
+error:
+	free(argv);
+	free(cmdline);
+	return -1;
+}
+
+static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+	u32 nr, i;
+	char *str;
+	struct strbuf sb;
+	int cpu_nr = ff->ph->env.nr_cpus_avail;
+	u64 size = 0;
+	struct perf_header *ph = ff->ph;
+	bool do_core_id_test = true;
+
+	ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
+	if (!ph->env.cpu)
+		return -1;
+
+	if (do_read_u32(ff, &nr))
+		goto free_cpu;
+
+	ph->env.nr_sibling_cores = nr;
+	size += sizeof(u32);
+	if (strbuf_init(&sb, 128) < 0)
+		goto free_cpu;
+
+	for (i = 0; i < nr; i++) {
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
+		size += string_size(str);
+		free(str);
+	}
+	ph->env.sibling_cores = strbuf_detach(&sb, NULL);
+
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	ph->env.nr_sibling_threads = nr;
+	size += sizeof(u32);
+
+	for (i = 0; i < nr; i++) {
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
+		size += string_size(str);
+		free(str);
+	}
+	ph->env.sibling_threads = strbuf_detach(&sb, NULL);
+
+	/*
+	 * The header may be from old perf,
+	 * which doesn't include core id and socket id information.
+	 */
+	if (ff->size <= size) {
+		zfree(&ph->env.cpu);
+		return 0;
+	}
+
+	/* On s390 the socket_id number is not related to the numbers of cpus.
+	 * The socket_id number might be higher than the numbers of cpus.
+	 * This depends on the configuration.
+	 * AArch64 is the same.
+	 */
+	if (ph->env.arch && (!strncmp(ph->env.arch, "s390", 4)
+			  || !strncmp(ph->env.arch, "aarch64", 7)))
+		do_core_id_test = false;
+
+	for (i = 0; i < (u32)cpu_nr; i++) {
+		if (do_read_u32(ff, &nr))
+			goto free_cpu;
+
+		ph->env.cpu[i].core_id = nr;
+		size += sizeof(u32);
+
+		if (do_read_u32(ff, &nr))
+			goto free_cpu;
+
+		if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) {
+			pr_debug("socket_id number is too big."
+				 "You may need to upgrade the perf tool.\n");
+			goto free_cpu;
+		}
+
+		ph->env.cpu[i].socket_id = nr;
+		size += sizeof(u32);
+	}
+
+	/*
+	 * The header may be from old perf,
+	 * which doesn't include die information.
+	 */
+	if (ff->size <= size)
+		return 0;
+
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	ph->env.nr_sibling_dies = nr;
+	size += sizeof(u32);
+
+	for (i = 0; i < nr; i++) {
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
+		size += string_size(str);
+		free(str);
+	}
+	ph->env.sibling_dies = strbuf_detach(&sb, NULL);
+
+	for (i = 0; i < (u32)cpu_nr; i++) {
+		if (do_read_u32(ff, &nr))
+			goto free_cpu;
+
+		ph->env.cpu[i].die_id = nr;
+	}
+
+	return 0;
+
+error:
+	strbuf_release(&sb);
+free_cpu:
+	zfree(&ph->env.cpu);
+	return -1;
+}
+
+static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct numa_node *nodes, *n;
+	u32 nr, i;
+	char *str;
+
+	/* nr nodes */
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	nodes = zalloc(sizeof(*nodes) * nr);
+	if (!nodes)
+		return -ENOMEM;
+
+	for (i = 0; i < nr; i++) {
+		n = &nodes[i];
+
+		/* node number */
+		if (do_read_u32(ff, &n->node))
+			goto error;
+
+		if (do_read_u64(ff, &n->mem_total))
+			goto error;
+
+		if (do_read_u64(ff, &n->mem_free))
+			goto error;
+
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		n->map = perf_cpu_map__new(str);
+		if (!n->map)
+			goto error;
+
+		free(str);
+	}
+	ff->ph->env.nr_numa_nodes = nr;
+	ff->ph->env.numa_nodes = nodes;
+	return 0;
+
+error:
+	free(nodes);
+	return -1;
+}
+
+static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
+{
+	char *name;
+	u32 pmu_num;
+	u32 type;
+	struct strbuf sb;
+
+	if (do_read_u32(ff, &pmu_num))
+		return -1;
+
+	if (!pmu_num) {
+		pr_debug("pmu mappings not available\n");
+		return 0;
+	}
+
+	ff->ph->env.nr_pmu_mappings = pmu_num;
+	if (strbuf_init(&sb, 128) < 0)
+		return -1;
+
+	while (pmu_num) {
+		if (do_read_u32(ff, &type))
+			goto error;
+
+		name = do_read_string(ff);
+		if (!name)
+			goto error;
+
+		if (strbuf_addf(&sb, "%u:%s", type, name) < 0)
+			goto error;
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, "", 1) < 0)
+			goto error;
+
+		if (!strcmp(name, "msr"))
+			ff->ph->env.msr_pmu_type = type;
+
+		free(name);
+		pmu_num--;
+	}
+	ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
+	return 0;
+
+error:
+	strbuf_release(&sb);
+	return -1;
+}
+
+static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+	size_t ret = -1;
+	u32 i, nr, nr_groups;
+	struct perf_session *session;
+	struct evsel *evsel, *leader = NULL;
+	struct group_desc {
+		char *name;
+		u32 leader_idx;
+		u32 nr_members;
+	} *desc;
+
+	if (do_read_u32(ff, &nr_groups))
+		return -1;
+
+	ff->ph->env.nr_groups = nr_groups;
+	if (!nr_groups) {
+		pr_debug("group desc not available\n");
+		return 0;
+	}
+
+	desc = calloc(nr_groups, sizeof(*desc));
+	if (!desc)
+		return -1;
+
+	for (i = 0; i < nr_groups; i++) {
+		desc[i].name = do_read_string(ff);
+		if (!desc[i].name)
+			goto out_free;
+
+		if (do_read_u32(ff, &desc[i].leader_idx))
+			goto out_free;
+
+		if (do_read_u32(ff, &desc[i].nr_members))
+			goto out_free;
+	}
+
+	/*
+	 * Rebuild group relationship based on the group_desc
+	 */
+	session = container_of(ff->ph, struct perf_session, header);
+	session->evlist->core.nr_groups = nr_groups;
+
+	i = nr = 0;
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->core.idx == (int) desc[i].leader_idx) {
+			evsel__set_leader(evsel, evsel);
+			/* {anon_group} is a dummy name */
+			if (strcmp(desc[i].name, "{anon_group}")) {
+				evsel->group_name = desc[i].name;
+				desc[i].name = NULL;
+			}
+			evsel->core.nr_members = desc[i].nr_members;
+
+			if (i >= nr_groups || nr > 0) {
+				pr_debug("invalid group desc\n");
+				goto out_free;
+			}
+
+			leader = evsel;
+			nr = evsel->core.nr_members - 1;
+			i++;
+		} else if (nr) {
+			/* This is a group member */
+			evsel__set_leader(evsel, leader);
+
+			nr--;
+		}
+	}
+
+	if (i != nr_groups || nr != 0) {
+		pr_debug("invalid group desc\n");
+		goto out_free;
+	}
+
+	ret = 0;
+out_free:
+	for (i = 0; i < nr_groups; i++)
+		zfree(&desc[i].name);
+	free(desc);
+
+	return ret;
+}
+
+static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_session *session;
+	int err;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	err = auxtrace_index__process(ff->fd, ff->size, session,
+				      ff->ph->needs_swap);
+	if (err < 0)
+		pr_err("Failed to process auxtrace index\n");
+	return err;
+}
+
+static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct cpu_cache_level *caches;
+	u32 cnt, i, version;
+
+	if (do_read_u32(ff, &version))
+		return -1;
+
+	if (version != 1)
+		return -1;
+
+	if (do_read_u32(ff, &cnt))
+		return -1;
+
+	caches = zalloc(sizeof(*caches) * cnt);
+	if (!caches)
+		return -1;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level c;
+
+		#define _R(v)						\
+			if (do_read_u32(ff, &c.v))\
+				goto out_free_caches;			\
+
+		_R(level)
+		_R(line_size)
+		_R(sets)
+		_R(ways)
+		#undef _R
+
+		#define _R(v)					\
+			c.v = do_read_string(ff);		\
+			if (!c.v)				\
+				goto out_free_caches;
+
+		_R(type)
+		_R(size)
+		_R(map)
+		#undef _R
+
+		caches[i] = c;
+	}
+
+	ff->ph->env.caches = caches;
+	ff->ph->env.caches_cnt = cnt;
+	return 0;
+out_free_caches:
+	free(caches);
+	return -1;
+}
+
+static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_session *session;
+	u64 first_sample_time, last_sample_time;
+	int ret;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	ret = do_read_u64(ff, &first_sample_time);
+	if (ret)
+		return -1;
+
+	ret = do_read_u64(ff, &last_sample_time);
+	if (ret)
+		return -1;
+
+	session->evlist->first_sample_time = first_sample_time;
+	session->evlist->last_sample_time = last_sample_time;
+	return 0;
+}
+
+static int process_mem_topology(struct feat_fd *ff,
+				void *data __maybe_unused)
+{
+	struct memory_node *nodes;
+	u64 version, i, nr, bsize;
+	int ret = -1;
+
+	if (do_read_u64(ff, &version))
+		return -1;
+
+	if (version != 1)
+		return -1;
+
+	if (do_read_u64(ff, &bsize))
+		return -1;
+
+	if (do_read_u64(ff, &nr))
+		return -1;
+
+	nodes = zalloc(sizeof(*nodes) * nr);
+	if (!nodes)
+		return -1;
+
+	for (i = 0; i < nr; i++) {
+		struct memory_node n;
+
+		#define _R(v)				\
+			if (do_read_u64(ff, &n.v))	\
+				goto out;		\
+
+		_R(node)
+		_R(size)
+
+		#undef _R
+
+		if (do_read_bitmap(ff, &n.set, &n.size))
+			goto out;
+
+		nodes[i] = n;
+	}
+
+	ff->ph->env.memory_bsize    = bsize;
+	ff->ph->env.memory_nodes    = nodes;
+	ff->ph->env.nr_memory_nodes = nr;
+	ret = 0;
+
+out:
+	if (ret)
+		free(nodes);
+	return ret;
+}
+
+static int process_clockid(struct feat_fd *ff,
+			   void *data __maybe_unused)
+{
+	if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns))
+		return -1;
+
+	return 0;
+}
+
+static int process_clock_data(struct feat_fd *ff,
+			      void *_data __maybe_unused)
+{
+	u32 data32;
+	u64 data64;
+
+	/* version */
+	if (do_read_u32(ff, &data32))
+		return -1;
+
+	if (data32 != 1)
+		return -1;
+
+	/* clockid */
+	if (do_read_u32(ff, &data32))
+		return -1;
+
+	ff->ph->env.clock.clockid = data32;
+
+	/* TOD ref time */
+	if (do_read_u64(ff, &data64))
+		return -1;
+
+	ff->ph->env.clock.tod_ns = data64;
+
+	/* clockid ref time */
+	if (do_read_u64(ff, &data64))
+		return -1;
+
+	ff->ph->env.clock.clockid_ns = data64;
+	ff->ph->env.clock.enabled = true;
+	return 0;
+}
+
+static int process_hybrid_topology(struct feat_fd *ff,
+				   void *data __maybe_unused)
+{
+	struct hybrid_node *nodes, *n;
+	u32 nr, i;
+
+	/* nr nodes */
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	nodes = zalloc(sizeof(*nodes) * nr);
+	if (!nodes)
+		return -ENOMEM;
+
+	for (i = 0; i < nr; i++) {
+		n = &nodes[i];
+
+		n->pmu_name = do_read_string(ff);
+		if (!n->pmu_name)
+			goto error;
+
+		n->cpus = do_read_string(ff);
+		if (!n->cpus)
+			goto error;
+	}
+
+	ff->ph->env.nr_hybrid_nodes = nr;
+	ff->ph->env.hybrid_nodes = nodes;
+	return 0;
+
+error:
+	for (i = 0; i < nr; i++) {
+		free(nodes[i].pmu_name);
+		free(nodes[i].cpus);
+	}
+
+	free(nodes);
+	return -1;
+}
+
+static int process_dir_format(struct feat_fd *ff,
+			      void *_data __maybe_unused)
+{
+	struct perf_session *session;
+	struct perf_data *data;
+
+	session = container_of(ff->ph, struct perf_session, header);
+	data = session->data;
+
+	if (WARN_ON(!perf_data__is_dir(data)))
+		return -1;
+
+	return do_read_u64(ff, &data->dir.version);
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct bpf_prog_info_node *info_node;
+	struct perf_env *env = &ff->ph->env;
+	struct perf_bpil *info_linear;
+	u32 count, i;
+	int err = -1;
+
+	if (ff->ph->needs_swap) {
+		pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n");
+		return 0;
+	}
+
+	if (do_read_u32(ff, &count))
+		return -1;
+
+	down_write(&env->bpf_progs.lock);
+
+	for (i = 0; i < count; ++i) {
+		u32 info_len, data_len;
+
+		info_linear = NULL;
+		info_node = NULL;
+		if (do_read_u32(ff, &info_len))
+			goto out;
+		if (do_read_u32(ff, &data_len))
+			goto out;
+
+		if (info_len > sizeof(struct bpf_prog_info)) {
+			pr_warning("detected invalid bpf_prog_info\n");
+			goto out;
+		}
+
+		info_linear = malloc(sizeof(struct perf_bpil) +
+				     data_len);
+		if (!info_linear)
+			goto out;
+		info_linear->info_len = sizeof(struct bpf_prog_info);
+		info_linear->data_len = data_len;
+		if (do_read_u64(ff, (u64 *)(&info_linear->arrays)))
+			goto out;
+		if (__do_read(ff, &info_linear->info, info_len))
+			goto out;
+		if (info_len < sizeof(struct bpf_prog_info))
+			memset(((void *)(&info_linear->info)) + info_len, 0,
+			       sizeof(struct bpf_prog_info) - info_len);
+
+		if (__do_read(ff, info_linear->data, data_len))
+			goto out;
+
+		info_node = malloc(sizeof(struct bpf_prog_info_node));
+		if (!info_node)
+			goto out;
+
+		/* after reading from file, translate offset to address */
+		bpil_offs_to_addr(info_linear);
+		info_node->info_linear = info_linear;
+		__perf_env__insert_bpf_prog_info(env, info_node);
+	}
+
+	up_write(&env->bpf_progs.lock);
+	return 0;
+out:
+	free(info_linear);
+	free(info_node);
+	up_write(&env->bpf_progs.lock);
+	return err;
+}
+
+static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_env *env = &ff->ph->env;
+	struct btf_node *node = NULL;
+	u32 count, i;
+	int err = -1;
+
+	if (ff->ph->needs_swap) {
+		pr_warning("interpreting btf from systems with endianness is not yet supported\n");
+		return 0;
+	}
+
+	if (do_read_u32(ff, &count))
+		return -1;
+
+	down_write(&env->bpf_progs.lock);
+
+	for (i = 0; i < count; ++i) {
+		u32 id, data_size;
+
+		if (do_read_u32(ff, &id))
+			goto out;
+		if (do_read_u32(ff, &data_size))
+			goto out;
+
+		node = malloc(sizeof(struct btf_node) + data_size);
+		if (!node)
+			goto out;
+
+		node->id = id;
+		node->data_size = data_size;
+
+		if (__do_read(ff, node->data, data_size))
+			goto out;
+
+		__perf_env__insert_btf(env, node);
+		node = NULL;
+	}
+
+	err = 0;
+out:
+	up_write(&env->bpf_progs.lock);
+	free(node);
+	return err;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static int process_compressed(struct feat_fd *ff,
+			      void *data __maybe_unused)
+{
+	if (do_read_u32(ff, &(ff->ph->env.comp_ver)))
+		return -1;
+
+	if (do_read_u32(ff, &(ff->ph->env.comp_type)))
+		return -1;
+
+	if (do_read_u32(ff, &(ff->ph->env.comp_level)))
+		return -1;
+
+	if (do_read_u32(ff, &(ff->ph->env.comp_ratio)))
+		return -1;
+
+	if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len)))
+		return -1;
+
+	return 0;
+}
+
+static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
+			      char ***caps, unsigned int *max_branches)
+{
+	char *name, *value, *ptr;
+	u32 nr_pmu_caps, i;
+
+	*nr_caps = 0;
+	*caps = NULL;
+
+	if (do_read_u32(ff, &nr_pmu_caps))
+		return -1;
+
+	if (!nr_pmu_caps)
+		return 0;
+
+	*caps = zalloc(sizeof(char *) * nr_pmu_caps);
+	if (!*caps)
+		return -1;
+
+	for (i = 0; i < nr_pmu_caps; i++) {
+		name = do_read_string(ff);
+		if (!name)
+			goto error;
+
+		value = do_read_string(ff);
+		if (!value)
+			goto free_name;
+
+		if (asprintf(&ptr, "%s=%s", name, value) < 0)
+			goto free_value;
+
+		(*caps)[i] = ptr;
+
+		if (!strcmp(name, "branches"))
+			*max_branches = atoi(value);
+
+		free(value);
+		free(name);
+	}
+	*nr_caps = nr_pmu_caps;
+	return 0;
+
+free_value:
+	free(value);
+free_name:
+	free(name);
+error:
+	for (; i > 0; i--)
+		free((*caps)[i - 1]);
+	free(*caps);
+	*caps = NULL;
+	*nr_caps = 0;
+	return -1;
+}
+
+static int process_cpu_pmu_caps(struct feat_fd *ff,
+				void *data __maybe_unused)
+{
+	int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
+				     &ff->ph->env.cpu_pmu_caps,
+				     &ff->ph->env.max_branches);
+
+	if (!ret && !ff->ph->env.cpu_pmu_caps)
+		pr_debug("cpu pmu capabilities not available\n");
+	return ret;
+}
+
+static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct pmu_caps *pmu_caps;
+	u32 nr_pmu, i;
+	int ret;
+	int j;
+
+	if (do_read_u32(ff, &nr_pmu))
+		return -1;
+
+	if (!nr_pmu) {
+		pr_debug("pmu capabilities not available\n");
+		return 0;
+	}
+
+	pmu_caps = zalloc(sizeof(*pmu_caps) * nr_pmu);
+	if (!pmu_caps)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_pmu; i++) {
+		ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps,
+					 &pmu_caps[i].caps,
+					 &pmu_caps[i].max_branches);
+		if (ret)
+			goto err;
+
+		pmu_caps[i].pmu_name = do_read_string(ff);
+		if (!pmu_caps[i].pmu_name) {
+			ret = -1;
+			goto err;
+		}
+		if (!pmu_caps[i].nr_caps) {
+			pr_debug("%s pmu capabilities not available\n",
+				 pmu_caps[i].pmu_name);
+		}
+	}
+
+	ff->ph->env.nr_pmus_with_caps = nr_pmu;
+	ff->ph->env.pmu_caps = pmu_caps;
+	return 0;
+
+err:
+	for (i = 0; i < nr_pmu; i++) {
+		for (j = 0; j < pmu_caps[i].nr_caps; j++)
+			free(pmu_caps[i].caps[j]);
+		free(pmu_caps[i].caps);
+		free(pmu_caps[i].pmu_name);
+	}
+
+	free(pmu_caps);
+	return ret;
+}
+
+#define FEAT_OPR(n, func, __full_only) \
+	[HEADER_##n] = {					\
+		.name	    = __stringify(n),			\
+		.write	    = write_##func,			\
+		.print	    = print_##func,			\
+		.full_only  = __full_only,			\
+		.process    = process_##func,			\
+		.synthesize = true				\
+	}
+
+#define FEAT_OPN(n, func, __full_only) \
+	[HEADER_##n] = {					\
+		.name	    = __stringify(n),			\
+		.write	    = write_##func,			\
+		.print	    = print_##func,			\
+		.full_only  = __full_only,			\
+		.process    = process_##func			\
+	}
+
+/* feature_ops not implemented: */
+#define print_tracing_data	NULL
+#define print_build_id		NULL
+
+#define process_branch_stack	NULL
+#define process_stat		NULL
+
+// Only used in util/synthetic-events.c
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
+
+const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
+	FEAT_OPN(TRACING_DATA,	tracing_data,	false),
+	FEAT_OPN(BUILD_ID,	build_id,	false),
+	FEAT_OPR(HOSTNAME,	hostname,	false),
+	FEAT_OPR(OSRELEASE,	osrelease,	false),
+	FEAT_OPR(VERSION,	version,	false),
+	FEAT_OPR(ARCH,		arch,		false),
+	FEAT_OPR(NRCPUS,	nrcpus,		false),
+	FEAT_OPR(CPUDESC,	cpudesc,	false),
+	FEAT_OPR(CPUID,		cpuid,		false),
+	FEAT_OPR(TOTAL_MEM,	total_mem,	false),
+	FEAT_OPR(EVENT_DESC,	event_desc,	false),
+	FEAT_OPR(CMDLINE,	cmdline,	false),
+	FEAT_OPR(CPU_TOPOLOGY,	cpu_topology,	true),
+	FEAT_OPR(NUMA_TOPOLOGY,	numa_topology,	true),
+	FEAT_OPN(BRANCH_STACK,	branch_stack,	false),
+	FEAT_OPR(PMU_MAPPINGS,	pmu_mappings,	false),
+	FEAT_OPR(GROUP_DESC,	group_desc,	false),
+	FEAT_OPN(AUXTRACE,	auxtrace,	false),
+	FEAT_OPN(STAT,		stat,		false),
+	FEAT_OPN(CACHE,		cache,		true),
+	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
+	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
+	FEAT_OPR(CLOCKID,	clockid,	false),
+	FEAT_OPN(DIR_FORMAT,	dir_format,	false),
+#ifdef HAVE_LIBBPF_SUPPORT
+	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false),
+	FEAT_OPR(BPF_BTF,       bpf_btf,        false),
+#endif
+	FEAT_OPR(COMPRESSED,	compressed,	false),
+	FEAT_OPR(CPU_PMU_CAPS,	cpu_pmu_caps,	false),
+	FEAT_OPR(CLOCK_DATA,	clock_data,	false),
+	FEAT_OPN(HYBRID_TOPOLOGY,	hybrid_topology,	true),
+	FEAT_OPR(PMU_CAPS,	pmu_caps,	false),
+};
+
+struct header_print_data {
+	FILE *fp;
+	bool full; /* extended list of headers */
+};
+
+static int perf_file_section__fprintf_info(struct perf_file_section *section,
+					   struct perf_header *ph,
+					   int feat, int fd, void *data)
+{
+	struct header_print_data *hd = data;
+	struct feat_fd ff;
+
+	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+				"%d, continuing...\n", section->offset, feat);
+		return 0;
+	}
+	if (feat >= HEADER_LAST_FEATURE) {
+		pr_warning("unknown feature %d\n", feat);
+		return 0;
+	}
+	if (!feat_ops[feat].print)
+		return 0;
+
+	ff = (struct  feat_fd) {
+		.fd = fd,
+		.ph = ph,
+	};
+
+	if (!feat_ops[feat].full_only || hd->full)
+		feat_ops[feat].print(&ff, hd->fp);
+	else
+		fprintf(hd->fp, "# %s info available, use -I to display\n",
+			feat_ops[feat].name);
+
+	return 0;
+}
+
+int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
+{
+	struct header_print_data hd;
+	struct perf_header *header = &session->header;
+	int fd = perf_data__fd(session->data);
+	struct stat st;
+	time_t stctime;
+	int ret, bit;
+
+	hd.fp = fp;
+	hd.full = full;
+
+	ret = fstat(fd, &st);
+	if (ret == -1)
+		return -1;
+
+	stctime = st.st_mtime;
+	fprintf(fp, "# captured on    : %s", ctime(&stctime));
+
+	fprintf(fp, "# header version : %u\n", header->version);
+	fprintf(fp, "# data offset    : %" PRIu64 "\n", header->data_offset);
+	fprintf(fp, "# data size      : %" PRIu64 "\n", header->data_size);
+	fprintf(fp, "# feat offset    : %" PRIu64 "\n", header->feat_offset);
+
+	perf_header__process_sections(header, fd, &hd,
+				      perf_file_section__fprintf_info);
+
+	if (session->data->is_pipe)
+		return 0;
+
+	fprintf(fp, "# missing features: ");
+	for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) {
+		if (bit)
+			fprintf(fp, "%s ", feat_ops[bit].name);
+	}
+
+	fprintf(fp, "\n");
+	return 0;
+}
+
+struct header_fw {
+	struct feat_writer	fw;
+	struct feat_fd		*ff;
+};
+
+static int feat_writer_cb(struct feat_writer *fw, void *buf, size_t sz)
+{
+	struct header_fw *h = container_of(fw, struct header_fw, fw);
+
+	return do_write(h->ff, buf, sz);
+}
+
+static int do_write_feat(struct feat_fd *ff, int type,
+			 struct perf_file_section **p,
+			 struct evlist *evlist,
+			 struct feat_copier *fc)
+{
+	int err;
+	int ret = 0;
+
+	if (perf_header__has_feat(ff->ph, type)) {
+		if (!feat_ops[type].write)
+			return -1;
+
+		if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+			return -1;
+
+		(*p)->offset = lseek(ff->fd, 0, SEEK_CUR);
+
+		/*
+		 * Hook to let perf inject copy features sections from the input
+		 * file.
+		 */
+		if (fc && fc->copy) {
+			struct header_fw h = {
+				.fw.write = feat_writer_cb,
+				.ff = ff,
+			};
+
+			/* ->copy() returns 0 if the feature was not copied */
+			err = fc->copy(fc, type, &h.fw);
+		} else {
+			err = 0;
+		}
+		if (!err)
+			err = feat_ops[type].write(ff, evlist);
+		if (err < 0) {
+			pr_debug("failed to write feature %s\n", feat_ops[type].name);
+
+			/* undo anything written */
+			lseek(ff->fd, (*p)->offset, SEEK_SET);
+
+			return -1;
+		}
+		(*p)->size = lseek(ff->fd, 0, SEEK_CUR) - (*p)->offset;
+		(*p)++;
+	}
+	return ret;
+}
+
+static int perf_header__adds_write(struct perf_header *header,
+				   struct evlist *evlist, int fd,
+				   struct feat_copier *fc)
+{
+	int nr_sections;
+	struct feat_fd ff;
+	struct perf_file_section *feat_sec, *p;
+	int sec_size;
+	u64 sec_start;
+	int feat;
+	int err;
+
+	ff = (struct feat_fd){
+		.fd  = fd,
+		.ph = header,
+	};
+
+	nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+	if (!nr_sections)
+		return 0;
+
+	feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
+	if (feat_sec == NULL)
+		return -ENOMEM;
+
+	sec_size = sizeof(*feat_sec) * nr_sections;
+
+	sec_start = header->feat_offset;
+	lseek(fd, sec_start + sec_size, SEEK_SET);
+
+	for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+		if (do_write_feat(&ff, feat, &p, evlist, fc))
+			perf_header__clear_feat(header, feat);
+	}
+
+	lseek(fd, sec_start, SEEK_SET);
+	/*
+	 * may write more than needed due to dropped feature, but
+	 * this is okay, reader will skip the missing entries
+	 */
+	err = do_write(&ff, feat_sec, sec_size);
+	if (err < 0)
+		pr_debug("failed to write feature section\n");
+	free(feat_sec);
+	return err;
+}
+
+int perf_header__write_pipe(int fd)
+{
+	struct perf_pipe_file_header f_header;
+	struct feat_fd ff;
+	int err;
+
+	ff = (struct feat_fd){ .fd = fd };
+
+	f_header = (struct perf_pipe_file_header){
+		.magic	   = PERF_MAGIC,
+		.size	   = sizeof(f_header),
+	};
+
+	err = do_write(&ff, &f_header, sizeof(f_header));
+	if (err < 0) {
+		pr_debug("failed to write perf pipe header\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int perf_session__do_write_header(struct perf_session *session,
+					 struct evlist *evlist,
+					 int fd, bool at_exit,
+					 struct feat_copier *fc)
+{
+	struct perf_file_header f_header;
+	struct perf_file_attr   f_attr;
+	struct perf_header *header = &session->header;
+	struct evsel *evsel;
+	struct feat_fd ff;
+	u64 attr_offset;
+	int err;
+
+	ff = (struct feat_fd){ .fd = fd};
+	lseek(fd, sizeof(f_header), SEEK_SET);
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		evsel->id_offset = lseek(fd, 0, SEEK_CUR);
+		err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64));
+		if (err < 0) {
+			pr_debug("failed to write perf header\n");
+			return err;
+		}
+	}
+
+	attr_offset = lseek(ff.fd, 0, SEEK_CUR);
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
+			/*
+			 * We are likely in "perf inject" and have read
+			 * from an older file. Update attr size so that
+			 * reader gets the right offset to the ids.
+			 */
+			evsel->core.attr.size = sizeof(evsel->core.attr);
+		}
+		f_attr = (struct perf_file_attr){
+			.attr = evsel->core.attr,
+			.ids  = {
+				.offset = evsel->id_offset,
+				.size   = evsel->core.ids * sizeof(u64),
+			}
+		};
+		err = do_write(&ff, &f_attr, sizeof(f_attr));
+		if (err < 0) {
+			pr_debug("failed to write perf header attribute\n");
+			return err;
+		}
+	}
+
+	if (!header->data_offset)
+		header->data_offset = lseek(fd, 0, SEEK_CUR);
+	header->feat_offset = header->data_offset + header->data_size;
+
+	if (at_exit) {
+		err = perf_header__adds_write(header, evlist, fd, fc);
+		if (err < 0)
+			return err;
+	}
+
+	f_header = (struct perf_file_header){
+		.magic	   = PERF_MAGIC,
+		.size	   = sizeof(f_header),
+		.attr_size = sizeof(f_attr),
+		.attrs = {
+			.offset = attr_offset,
+			.size   = evlist->core.nr_entries * sizeof(f_attr),
+		},
+		.data = {
+			.offset = header->data_offset,
+			.size	= header->data_size,
+		},
+		/* event_types is ignored, store zeros */
+	};
+
+	memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
+
+	lseek(fd, 0, SEEK_SET);
+	err = do_write(&ff, &f_header, sizeof(f_header));
+	if (err < 0) {
+		pr_debug("failed to write perf header\n");
+		return err;
+	}
+	lseek(fd, header->data_offset + header->data_size, SEEK_SET);
+
+	return 0;
+}
+
+int perf_session__write_header(struct perf_session *session,
+			       struct evlist *evlist,
+			       int fd, bool at_exit)
+{
+	return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
+}
+
+size_t perf_session__data_offset(const struct evlist *evlist)
+{
+	struct evsel *evsel;
+	size_t data_offset;
+
+	data_offset = sizeof(struct perf_file_header);
+	evlist__for_each_entry(evlist, evsel) {
+		data_offset += evsel->core.ids * sizeof(u64);
+	}
+	data_offset += evlist->core.nr_entries * sizeof(struct perf_file_attr);
+
+	return data_offset;
+}
+
+int perf_session__inject_header(struct perf_session *session,
+				struct evlist *evlist,
+				int fd,
+				struct feat_copier *fc)
+{
+	return perf_session__do_write_header(session, evlist, fd, true, fc);
+}
+
+static int perf_header__getbuffer64(struct perf_header *header,
+				    int fd, void *buf, size_t size)
+{
+	if (readn(fd, buf, size) <= 0)
+		return -1;
+
+	if (header->needs_swap)
+		mem_bswap_64(buf, size);
+
+	return 0;
+}
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+				  void *data,
+				  int (*process)(struct perf_file_section *section,
+						 struct perf_header *ph,
+						 int feat, int fd, void *data))
+{
+	struct perf_file_section *feat_sec, *sec;
+	int nr_sections;
+	int sec_size;
+	int feat;
+	int err;
+
+	nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+	if (!nr_sections)
+		return 0;
+
+	feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
+	if (!feat_sec)
+		return -1;
+
+	sec_size = sizeof(*feat_sec) * nr_sections;
+
+	lseek(fd, header->feat_offset, SEEK_SET);
+
+	err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
+	if (err < 0)
+		goto out_free;
+
+	for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) {
+		err = process(sec++, header, feat, fd, data);
+		if (err < 0)
+			goto out_free;
+	}
+	err = 0;
+out_free:
+	free(feat_sec);
+	return err;
+}
+
+static const int attr_file_abi_sizes[] = {
+	[0] = PERF_ATTR_SIZE_VER0,
+	[1] = PERF_ATTR_SIZE_VER1,
+	[2] = PERF_ATTR_SIZE_VER2,
+	[3] = PERF_ATTR_SIZE_VER3,
+	[4] = PERF_ATTR_SIZE_VER4,
+	0,
+};
+
+/*
+ * In the legacy file format, the magic number is not used to encode endianness.
+ * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
+ * on ABI revisions, we need to try all combinations for all endianness to
+ * detect the endianness.
+ */
+static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+	uint64_t ref_size, attr_size;
+	int i;
+
+	for (i = 0 ; attr_file_abi_sizes[i]; i++) {
+		ref_size = attr_file_abi_sizes[i]
+			 + sizeof(struct perf_file_section);
+		if (hdr_sz != ref_size) {
+			attr_size = bswap_64(hdr_sz);
+			if (attr_size != ref_size)
+				continue;
+
+			ph->needs_swap = true;
+		}
+		pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
+			 i,
+			 ph->needs_swap);
+		return 0;
+	}
+	/* could not determine endianness */
+	return -1;
+}
+
+#define PERF_PIPE_HDR_VER0	16
+
+static const size_t attr_pipe_abi_sizes[] = {
+	[0] = PERF_PIPE_HDR_VER0,
+	0,
+};
+
+/*
+ * In the legacy pipe format, there is an implicit assumption that endianness
+ * between host recording the samples, and host parsing the samples is the
+ * same. This is not always the case given that the pipe output may always be
+ * redirected into a file and analyzed on a different machine with possibly a
+ * different endianness and perf_event ABI revisions in the perf tool itself.
+ */
+static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+	u64 attr_size;
+	int i;
+
+	for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
+		if (hdr_sz != attr_pipe_abi_sizes[i]) {
+			attr_size = bswap_64(hdr_sz);
+			if (attr_size != hdr_sz)
+				continue;
+
+			ph->needs_swap = true;
+		}
+		pr_debug("Pipe ABI%d perf.data file detected\n", i);
+		return 0;
+	}
+	return -1;
+}
+
+bool is_perf_magic(u64 magic)
+{
+	if (!memcmp(&magic, __perf_magic1, sizeof(magic))
+		|| magic == __perf_magic2
+		|| magic == __perf_magic2_sw)
+		return true;
+
+	return false;
+}
+
+static int check_magic_endian(u64 magic, uint64_t hdr_sz,
+			      bool is_pipe, struct perf_header *ph)
+{
+	int ret;
+
+	/* check for legacy format */
+	ret = memcmp(&magic, __perf_magic1, sizeof(magic));
+	if (ret == 0) {
+		ph->version = PERF_HEADER_VERSION_1;
+		pr_debug("legacy perf.data format\n");
+		if (is_pipe)
+			return try_all_pipe_abis(hdr_sz, ph);
+
+		return try_all_file_abis(hdr_sz, ph);
+	}
+	/*
+	 * the new magic number serves two purposes:
+	 * - unique number to identify actual perf.data files
+	 * - encode endianness of file
+	 */
+	ph->version = PERF_HEADER_VERSION_2;
+
+	/* check magic number with one endianness */
+	if (magic == __perf_magic2)
+		return 0;
+
+	/* check magic number with opposite endianness */
+	if (magic != __perf_magic2_sw)
+		return -1;
+
+	ph->needs_swap = true;
+
+	return 0;
+}
+
+int perf_file_header__read(struct perf_file_header *header,
+			   struct perf_header *ph, int fd)
+{
+	ssize_t ret;
+
+	lseek(fd, 0, SEEK_SET);
+
+	ret = readn(fd, header, sizeof(*header));
+	if (ret <= 0)
+		return -1;
+
+	if (check_magic_endian(header->magic,
+			       header->attr_size, false, ph) < 0) {
+		pr_debug("magic/endian check failed\n");
+		return -1;
+	}
+
+	if (ph->needs_swap) {
+		mem_bswap_64(header, offsetof(struct perf_file_header,
+			     adds_features));
+	}
+
+	if (header->size != sizeof(*header)) {
+		/* Support the previous format */
+		if (header->size == offsetof(typeof(*header), adds_features))
+			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+		else
+			return -1;
+	} else if (ph->needs_swap) {
+		/*
+		 * feature bitmap is declared as an array of unsigned longs --
+		 * not good since its size can differ between the host that
+		 * generated the data file and the host analyzing the file.
+		 *
+		 * We need to handle endianness, but we don't know the size of
+		 * the unsigned long where the file was generated. Take a best
+		 * guess at determining it: try 64-bit swap first (ie., file
+		 * created on a 64-bit host), and check if the hostname feature
+		 * bit is set (this feature bit is forced on as of fbe96f2).
+		 * If the bit is not, undo the 64-bit swap and try a 32-bit
+		 * swap. If the hostname bit is still not set (e.g., older data
+		 * file), punt and fallback to the original behavior --
+		 * clearing all feature bits and setting buildid.
+		 */
+		mem_bswap_64(&header->adds_features,
+			    BITS_TO_U64(HEADER_FEAT_BITS));
+
+		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+			/* unswap as u64 */
+			mem_bswap_64(&header->adds_features,
+				    BITS_TO_U64(HEADER_FEAT_BITS));
+
+			/* unswap as u32 */
+			mem_bswap_32(&header->adds_features,
+				    BITS_TO_U32(HEADER_FEAT_BITS));
+		}
+
+		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+			set_bit(HEADER_BUILD_ID, header->adds_features);
+		}
+	}
+
+	memcpy(&ph->adds_features, &header->adds_features,
+	       sizeof(ph->adds_features));
+
+	ph->data_offset  = header->data.offset;
+	ph->data_size	 = header->data.size;
+	ph->feat_offset  = header->data.offset + header->data.size;
+	return 0;
+}
+
+static int perf_file_section__process(struct perf_file_section *section,
+				      struct perf_header *ph,
+				      int feat, int fd, void *data)
+{
+	struct feat_fd fdd = {
+		.fd	= fd,
+		.ph	= ph,
+		.size	= section->size,
+		.offset	= section->offset,
+	};
+
+	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+			  "%d, continuing...\n", section->offset, feat);
+		return 0;
+	}
+
+	if (feat >= HEADER_LAST_FEATURE) {
+		pr_debug("unknown feature %d, continuing...\n", feat);
+		return 0;
+	}
+
+	if (!feat_ops[feat].process)
+		return 0;
+
+	return feat_ops[feat].process(&fdd, data);
+}
+
+static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
+				       struct perf_header *ph,
+				       struct perf_data* data,
+				       bool repipe, int repipe_fd)
+{
+	struct feat_fd ff = {
+		.fd = repipe_fd,
+		.ph = ph,
+	};
+	ssize_t ret;
+
+	ret = perf_data__read(data, header, sizeof(*header));
+	if (ret <= 0)
+		return -1;
+
+	if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
+		pr_debug("endian/magic failed\n");
+		return -1;
+	}
+
+	if (ph->needs_swap)
+		header->size = bswap_64(header->size);
+
+	if (repipe && do_write(&ff, header, sizeof(*header)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int perf_header__read_pipe(struct perf_session *session, int repipe_fd)
+{
+	struct perf_header *header = &session->header;
+	struct perf_pipe_file_header f_header;
+
+	if (perf_file_header__read_pipe(&f_header, header, session->data,
+					session->repipe, repipe_fd) < 0) {
+		pr_debug("incompatible file format\n");
+		return -EINVAL;
+	}
+
+	return f_header.size == sizeof(f_header) ? 0 : -1;
+}
+
+static int read_attr(int fd, struct perf_header *ph,
+		     struct perf_file_attr *f_attr)
+{
+	struct perf_event_attr *attr = &f_attr->attr;
+	size_t sz, left;
+	size_t our_sz = sizeof(f_attr->attr);
+	ssize_t ret;
+
+	memset(f_attr, 0, sizeof(*f_attr));
+
+	/* read minimal guaranteed structure */
+	ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
+	if (ret <= 0) {
+		pr_debug("cannot read %d bytes of header attr\n",
+			 PERF_ATTR_SIZE_VER0);
+		return -1;
+	}
+
+	/* on file perf_event_attr size */
+	sz = attr->size;
+
+	if (ph->needs_swap)
+		sz = bswap_32(sz);
+
+	if (sz == 0) {
+		/* assume ABI0 */
+		sz =  PERF_ATTR_SIZE_VER0;
+	} else if (sz > our_sz) {
+		pr_debug("file uses a more recent and unsupported ABI"
+			 " (%zu bytes extra)\n", sz - our_sz);
+		return -1;
+	}
+	/* what we have not yet read and that we know about */
+	left = sz - PERF_ATTR_SIZE_VER0;
+	if (left) {
+		void *ptr = attr;
+		ptr += PERF_ATTR_SIZE_VER0;
+
+		ret = readn(fd, ptr, left);
+	}
+	/* read perf_file_section, ids are read in caller */
+	ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
+
+	return ret <= 0 ? -1 : 0;
+}
+
+static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent)
+{
+	struct tep_event *event;
+	char bf[128];
+
+	/* already prepared */
+	if (evsel->tp_format)
+		return 0;
+
+	if (pevent == NULL) {
+		pr_debug("broken or missing trace data\n");
+		return -1;
+	}
+
+	event = tep_find_event(pevent, evsel->core.attr.config);
+	if (event == NULL) {
+		pr_debug("cannot find event format for %d\n", (int)evsel->core.attr.config);
+		return -1;
+	}
+
+	if (!evsel->name) {
+		snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name);
+		evsel->name = strdup(bf);
+		if (evsel->name == NULL)
+			return -1;
+	}
+
+	evsel->tp_format = event;
+	return 0;
+}
+
+static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_handle *pevent)
+{
+	struct evsel *pos;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (pos->core.attr.type == PERF_TYPE_TRACEPOINT &&
+		    evsel__prepare_tracepoint_event(pos, pevent))
+			return -1;
+	}
+
+	return 0;
+}
+
+int perf_session__read_header(struct perf_session *session, int repipe_fd)
+{
+	struct perf_data *data = session->data;
+	struct perf_header *header = &session->header;
+	struct perf_file_header	f_header;
+	struct perf_file_attr	f_attr;
+	u64			f_id;
+	int nr_attrs, nr_ids, i, j, err;
+	int fd = perf_data__fd(data);
+
+	session->evlist = evlist__new();
+	if (session->evlist == NULL)
+		return -ENOMEM;
+
+	session->evlist->env = &header->env;
+	session->machines.host.env = &header->env;
+
+	/*
+	 * We can read 'pipe' data event from regular file,
+	 * check for the pipe header regardless of source.
+	 */
+	err = perf_header__read_pipe(session, repipe_fd);
+	if (!err || perf_data__is_pipe(data)) {
+		data->is_pipe = true;
+		return err;
+	}
+
+	if (perf_file_header__read(&f_header, header, fd) < 0)
+		return -EINVAL;
+
+	if (header->needs_swap && data->in_place_update) {
+		pr_err("In-place update not supported when byte-swapping is required\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Sanity check that perf.data was written cleanly; data size is
+	 * initialized to 0 and updated only if the on_exit function is run.
+	 * If data size is still 0 then the file contains only partial
+	 * information.  Just warn user and process it as much as it can.
+	 */
+	if (f_header.data.size == 0) {
+		pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
+			   "Was the 'perf record' command properly terminated?\n",
+			   data->file.path);
+	}
+
+	if (f_header.attr_size == 0) {
+		pr_err("ERROR: The %s file's attr size field is 0 which is unexpected.\n"
+		       "Was the 'perf record' command properly terminated?\n",
+		       data->file.path);
+		return -EINVAL;
+	}
+
+	nr_attrs = f_header.attrs.size / f_header.attr_size;
+	lseek(fd, f_header.attrs.offset, SEEK_SET);
+
+	for (i = 0; i < nr_attrs; i++) {
+		struct evsel *evsel;
+		off_t tmp;
+
+		if (read_attr(fd, header, &f_attr) < 0)
+			goto out_errno;
+
+		if (header->needs_swap) {
+			f_attr.ids.size   = bswap_64(f_attr.ids.size);
+			f_attr.ids.offset = bswap_64(f_attr.ids.offset);
+			perf_event__attr_swap(&f_attr.attr);
+		}
+
+		tmp = lseek(fd, 0, SEEK_CUR);
+		evsel = evsel__new(&f_attr.attr);
+
+		if (evsel == NULL)
+			goto out_delete_evlist;
+
+		evsel->needs_swap = header->needs_swap;
+		/*
+		 * Do it before so that if perf_evsel__alloc_id fails, this
+		 * entry gets purged too at evlist__delete().
+		 */
+		evlist__add(session->evlist, evsel);
+
+		nr_ids = f_attr.ids.size / sizeof(u64);
+		/*
+		 * We don't have the cpu and thread maps on the header, so
+		 * for allocating the perf_sample_id table we fake 1 cpu and
+		 * hattr->ids threads.
+		 */
+		if (perf_evsel__alloc_id(&evsel->core, 1, nr_ids))
+			goto out_delete_evlist;
+
+		lseek(fd, f_attr.ids.offset, SEEK_SET);
+
+		for (j = 0; j < nr_ids; j++) {
+			if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
+				goto out_errno;
+
+			perf_evlist__id_add(&session->evlist->core, &evsel->core, 0, j, f_id);
+		}
+
+		lseek(fd, tmp, SEEK_SET);
+	}
+
+	perf_header__process_sections(header, fd, &session->tevent,
+				      perf_file_section__process);
+
+	if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent))
+		goto out_delete_evlist;
+
+	return 0;
+out_errno:
+	return -errno;
+
+out_delete_evlist:
+	evlist__delete(session->evlist);
+	session->evlist = NULL;
+	return -ENOMEM;
+}
+
+int perf_event__process_feature(struct perf_session *session,
+				union perf_event *event)
+{
+	struct perf_tool *tool = session->tool;
+	struct feat_fd ff = { .fd = 0 };
+	struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event;
+	int type = fe->header.type;
+	u64 feat = fe->feat_id;
+	int ret = 0;
+
+	if (type < 0 || type >= PERF_RECORD_HEADER_MAX) {
+		pr_warning("invalid record type %d in pipe-mode\n", type);
+		return 0;
+	}
+	if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) {
+		pr_warning("invalid record type %d in pipe-mode\n", type);
+		return -1;
+	}
+
+	if (!feat_ops[feat].process)
+		return 0;
+
+	ff.buf  = (void *)fe->data;
+	ff.size = event->header.size - sizeof(*fe);
+	ff.ph = &session->header;
+
+	if (feat_ops[feat].process(&ff, NULL)) {
+		ret = -1;
+		goto out;
+	}
+
+	if (!feat_ops[feat].print || !tool->show_feat_hdr)
+		goto out;
+
+	if (!feat_ops[feat].full_only ||
+	    tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) {
+		feat_ops[feat].print(&ff, stdout);
+	} else {
+		fprintf(stdout, "# %s info available, use -I to display\n",
+			feat_ops[feat].name);
+	}
+out:
+	free_event_desc(ff.events);
+	return ret;
+}
+
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
+{
+	struct perf_record_event_update *ev = &event->event_update;
+	struct perf_cpu_map *map;
+	size_t ret;
+
+	ret = fprintf(fp, "\n... id:    %" PRI_lu64 "\n", ev->id);
+
+	switch (ev->type) {
+	case PERF_EVENT_UPDATE__SCALE:
+		ret += fprintf(fp, "... scale: %f\n", ev->scale.scale);
+		break;
+	case PERF_EVENT_UPDATE__UNIT:
+		ret += fprintf(fp, "... unit:  %s\n", ev->unit);
+		break;
+	case PERF_EVENT_UPDATE__NAME:
+		ret += fprintf(fp, "... name:  %s\n", ev->name);
+		break;
+	case PERF_EVENT_UPDATE__CPUS:
+		ret += fprintf(fp, "... ");
+
+		map = cpu_map__new_data(&ev->cpus.cpus);
+		if (map) {
+			ret += cpu_map__fprintf(map, fp);
+			perf_cpu_map__put(map);
+		} else
+			ret += fprintf(fp, "failed to get cpus\n");
+		break;
+	default:
+		ret += fprintf(fp, "... unknown type\n");
+		break;
+	}
+
+	return ret;
+}
+
+int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct evlist **pevlist)
+{
+	u32 i, n_ids;
+	u64 *ids;
+	struct evsel *evsel;
+	struct evlist *evlist = *pevlist;
+
+	if (evlist == NULL) {
+		*pevlist = evlist = evlist__new();
+		if (evlist == NULL)
+			return -ENOMEM;
+	}
+
+	evsel = evsel__new(&event->attr.attr);
+	if (evsel == NULL)
+		return -ENOMEM;
+
+	evlist__add(evlist, evsel);
+
+	n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size;
+	n_ids = n_ids / sizeof(u64);
+	/*
+	 * We don't have the cpu and thread maps on the header, so
+	 * for allocating the perf_sample_id table we fake 1 cpu and
+	 * hattr->ids threads.
+	 */
+	if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
+		return -ENOMEM;
+
+	ids = (void *)&event->attr.attr + event->attr.attr.size;
+	for (i = 0; i < n_ids; i++) {
+		perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]);
+	}
+
+	return 0;
+}
+
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct evlist **pevlist)
+{
+	struct perf_record_event_update *ev = &event->event_update;
+	struct evlist *evlist;
+	struct evsel *evsel;
+	struct perf_cpu_map *map;
+
+	if (dump_trace)
+		perf_event__fprintf_event_update(event, stdout);
+
+	if (!pevlist || *pevlist == NULL)
+		return -EINVAL;
+
+	evlist = *pevlist;
+
+	evsel = evlist__id2evsel(evlist, ev->id);
+	if (evsel == NULL)
+		return -EINVAL;
+
+	switch (ev->type) {
+	case PERF_EVENT_UPDATE__UNIT:
+		free((char *)evsel->unit);
+		evsel->unit = strdup(ev->unit);
+		break;
+	case PERF_EVENT_UPDATE__NAME:
+		free(evsel->name);
+		evsel->name = strdup(ev->name);
+		break;
+	case PERF_EVENT_UPDATE__SCALE:
+		evsel->scale = ev->scale.scale;
+		break;
+	case PERF_EVENT_UPDATE__CPUS:
+		map = cpu_map__new_data(&ev->cpus.cpus);
+		if (map) {
+			perf_cpu_map__put(evsel->core.own_cpus);
+			evsel->core.own_cpus = map;
+		} else
+			pr_err("failed to get event_update cpus\n");
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int perf_event__process_tracing_data(struct perf_session *session,
+				     union perf_event *event)
+{
+	ssize_t size_read, padding, size = event->tracing_data.size;
+	int fd = perf_data__fd(session->data);
+	char buf[BUFSIZ];
+
+	/*
+	 * The pipe fd is already in proper place and in any case
+	 * we can't move it, and we'd screw the case where we read
+	 * 'pipe' data from regular file. The trace_report reads
+	 * data from 'fd' so we need to set it directly behind the
+	 * event, where the tracing data starts.
+	 */
+	if (!perf_data__is_pipe(session->data)) {
+		off_t offset = lseek(fd, 0, SEEK_CUR);
+
+		/* setup for reading amidst mmap */
+		lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
+		      SEEK_SET);
+	}
+
+	size_read = trace_report(fd, &session->tevent,
+				 session->repipe);
+	padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
+
+	if (readn(fd, buf, padding) < 0) {
+		pr_err("%s: reading input file", __func__);
+		return -1;
+	}
+	if (session->repipe) {
+		int retw = write(STDOUT_FILENO, buf, padding);
+		if (retw <= 0 || retw != padding) {
+			pr_err("%s: repiping tracing data padding", __func__);
+			return -1;
+		}
+	}
+
+	if (size_read + padding != size) {
+		pr_err("%s: tracing data size mismatch", __func__);
+		return -1;
+	}
+
+	evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent);
+
+	return size_read + padding;
+}
+
+int perf_event__process_build_id(struct perf_session *session,
+				 union perf_event *event)
+{
+	__event_process_build_id(&event->build_id,
+				 event->build_id.filename,
+				 session);
+	return 0;
+}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
new file mode 100644
index 000000000..2d5e601ba
--- /dev/null
+++ b/tools/perf/util/header.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HEADER_H
+#define __PERF_HEADER_H
+
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <sys/types.h>
+#include <stdio.h> // FILE
+#include <stdbool.h>
+#include <linux/bitmap.h>
+#include <linux/types.h>
+#include "env.h"
+#include "pmu.h"
+
+enum {
+	HEADER_RESERVED		= 0,	/* always cleared */
+	HEADER_FIRST_FEATURE	= 1,
+	HEADER_TRACING_DATA	= 1,
+	HEADER_BUILD_ID,
+
+	HEADER_HOSTNAME,
+	HEADER_OSRELEASE,
+	HEADER_VERSION,
+	HEADER_ARCH,
+	HEADER_NRCPUS,
+	HEADER_CPUDESC,
+	HEADER_CPUID,
+	HEADER_TOTAL_MEM,
+	HEADER_CMDLINE,
+	HEADER_EVENT_DESC,
+	HEADER_CPU_TOPOLOGY,
+	HEADER_NUMA_TOPOLOGY,
+	HEADER_BRANCH_STACK,
+	HEADER_PMU_MAPPINGS,
+	HEADER_GROUP_DESC,
+	HEADER_AUXTRACE,
+	HEADER_STAT,
+	HEADER_CACHE,
+	HEADER_SAMPLE_TIME,
+	HEADER_MEM_TOPOLOGY,
+	HEADER_CLOCKID,
+	HEADER_DIR_FORMAT,
+	HEADER_BPF_PROG_INFO,
+	HEADER_BPF_BTF,
+	HEADER_COMPRESSED,
+	HEADER_CPU_PMU_CAPS,
+	HEADER_CLOCK_DATA,
+	HEADER_HYBRID_TOPOLOGY,
+	HEADER_PMU_CAPS,
+	HEADER_LAST_FEATURE,
+	HEADER_FEAT_BITS	= 256,
+};
+
+enum perf_header_version {
+	PERF_HEADER_VERSION_1,
+	PERF_HEADER_VERSION_2,
+};
+
+struct perf_file_section {
+	u64 offset;
+	u64 size;
+};
+
+struct perf_file_header {
+	u64				magic;
+	u64				size;
+	u64				attr_size;
+	struct perf_file_section	attrs;
+	struct perf_file_section	data;
+	/* event_types is ignored */
+	struct perf_file_section	event_types;
+	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+};
+
+struct perf_pipe_file_header {
+	u64				magic;
+	u64				size;
+};
+
+struct perf_header;
+
+int perf_file_header__read(struct perf_file_header *header,
+			   struct perf_header *ph, int fd);
+
+struct perf_header {
+	enum perf_header_version	version;
+	bool				needs_swap;
+	u64				data_offset;
+	u64				data_size;
+	u64				feat_offset;
+	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+	struct perf_env 	env;
+};
+
+struct feat_fd {
+	struct perf_header *ph;
+	int		   fd;
+	void		   *buf;	/* Either buf != NULL or fd >= 0 */
+	ssize_t		   offset;
+	size_t		   size;
+	struct evsel	   *events;
+};
+
+struct perf_header_feature_ops {
+	int	   (*write)(struct feat_fd *ff, struct evlist *evlist);
+	void	   (*print)(struct feat_fd *ff, FILE *fp);
+	int	   (*process)(struct feat_fd *ff, void *data);
+	const char *name;
+	bool	   full_only;
+	bool	   synthesize;
+};
+
+struct evlist;
+struct perf_session;
+struct perf_tool;
+union perf_event;
+
+int perf_session__read_header(struct perf_session *session, int repipe_fd);
+int perf_session__write_header(struct perf_session *session,
+			       struct evlist *evlist,
+			       int fd, bool at_exit);
+int perf_header__write_pipe(int fd);
+
+/* feat_writer writes a feature section to output */
+struct feat_writer {
+	int (*write)(struct feat_writer *fw, void *buf, size_t sz);
+};
+
+/* feat_copier copies a feature section using feat_writer to output */
+struct feat_copier {
+	int (*copy)(struct feat_copier *fc, int feat, struct feat_writer *fw);
+};
+
+int perf_session__inject_header(struct perf_session *session,
+				struct evlist *evlist,
+				int fd,
+				struct feat_copier *fc);
+
+size_t perf_session__data_offset(const struct evlist *evlist);
+
+void perf_header__set_feat(struct perf_header *header, int feat);
+void perf_header__clear_feat(struct perf_header *header, int feat);
+bool perf_header__has_feat(const struct perf_header *header, int feat);
+
+int perf_header__set_cmdline(int argc, const char **argv);
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+				  void *data,
+				  int (*process)(struct perf_file_section *section,
+				  struct perf_header *ph,
+				  int feat, int fd, void *data));
+
+int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+int perf_event__process_feature(struct perf_session *session,
+				union perf_event *event);
+int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
+			     struct evlist **pevlist);
+int perf_event__process_event_update(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct evlist **pevlist);
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
+int perf_event__process_tracing_data(struct perf_session *session,
+				     union perf_event *event);
+int perf_event__process_build_id(struct perf_session *session,
+				 union perf_event *event);
+bool is_perf_magic(u64 magic);
+
+#define NAME_ALIGN 64
+
+struct feat_fd;
+
+int do_write(struct feat_fd *fd, const void *buf, size_t size);
+
+int write_padded(struct feat_fd *fd, const void *bf,
+		 size_t count, size_t count_aligned);
+
+int is_cpu_online(unsigned int cpu);
+/*
+ * arch specific callback
+ */
+int get_cpuid(char *buffer, size_t sz);
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
+int strcmp_cpuid_str(const char *s1, const char *s2);
+#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
new file mode 100644
index 000000000..ab9e16123
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cache.h"
+#include "config.h"
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <subcmd/help.h>
+#include "../builtin.h"
+#include "levenshtein.h"
+#include <linux/zalloc.h>
+
+static int autocorrect;
+
+static int perf_unknown_cmd_config(const char *var, const char *value,
+				   void *cb __maybe_unused)
+{
+	if (!strcmp(var, "help.autocorrect"))
+		return perf_config_int(&autocorrect, var,value);
+
+	return 0;
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = (*c1)->len;
+	int l2 = (*c2)->len;
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+	unsigned int i, nr = cmds->cnt + old->cnt;
+	void *tmp;
+
+	if (nr > cmds->alloc) {
+		/* Choose bigger one to alloc */
+		if (alloc_nr(cmds->alloc) < nr)
+			cmds->alloc = nr;
+		else
+			cmds->alloc = alloc_nr(cmds->alloc);
+		tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names));
+		if (!tmp)
+			return -1;
+		cmds->names = tmp;
+	}
+	for (i = 0; i < old->cnt; i++)
+		cmds->names[cmds->cnt++] = old->names[i];
+	zfree(&old->names);
+	old->cnt = 0;
+	return 0;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+	unsigned int i, n = 0, best_similarity = 0;
+	struct cmdnames main_cmds, other_cmds;
+
+	memset(&main_cmds, 0, sizeof(main_cmds));
+	memset(&other_cmds, 0, sizeof(main_cmds));
+
+	perf_config(perf_unknown_cmd_config, NULL);
+
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	if (add_cmd_list(&main_cmds, &other_cmds) < 0) {
+		fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n");
+		goto end;
+	}
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(main_cmds.names), cmdname_compare);
+	uniq(&main_cmds);
+
+	if (main_cmds.cnt) {
+		/* This reuses cmdname->len for similarity index */
+		for (i = 0; i < main_cmds.cnt; ++i)
+			main_cmds.names[i]->len =
+				levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+		qsort(main_cmds.names, main_cmds.cnt,
+		      sizeof(*main_cmds.names), levenshtein_compare);
+
+		best_similarity = main_cmds.names[0]->len;
+		n = 1;
+		while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+			++n;
+	}
+
+	if (autocorrect && n == 1) {
+		const char *assumed = main_cmds.names[0]->name;
+
+		main_cmds.names[0] = NULL;
+		clean_cmdnames(&main_cmds);
+		fprintf(stderr, "WARNING: You called a perf program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, assumed);
+		if (autocorrect > 0) {
+			fprintf(stderr, "in %0.1f seconds automatically...\n",
+				(float)autocorrect/10.0);
+			poll(NULL, 0, autocorrect * 100);
+		}
+		return assumed;
+	}
+
+	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+	if (main_cmds.cnt && best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean %s?\n",
+			n < 2 ? "this": "one of these");
+
+		for (i = 0; i < n; i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+end:
+	exit(1);
+}
diff --git a/tools/perf/util/help-unknown-cmd.h b/tools/perf/util/help-unknown-cmd.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.h
diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build
new file mode 100644
index 000000000..db3db8b75
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o
diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c
new file mode 100644
index 000000000..a17c423a5
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/bitops.h>
+#include <stdarg.h>
+
+#include "../color.h"
+#include "hisi-ptt-pkt-decoder.h"
+
+/*
+ * For 8DW format, the bit[31:11] of DW0 is always 0x1fffff, which can be
+ * used to distinguish the data format.
+ * 8DW format is like:
+ *   bits [                 31:11                 ][       10:0       ]
+ *        |---------------------------------------|-------------------|
+ *    DW0 [                0x1fffff               ][ Reserved (0x7ff) ]
+ *    DW1 [                       Prefix                              ]
+ *    DW2 [                     Header DW0                            ]
+ *    DW3 [                     Header DW1                            ]
+ *    DW4 [                     Header DW2                            ]
+ *    DW5 [                     Header DW3                            ]
+ *    DW6 [                   Reserved (0x0)                          ]
+ *    DW7 [                        Time                               ]
+ *
+ * 4DW format is like:
+ *   bits [31:30] [ 29:25 ][24][23][22][21][    20:11   ][    10:0    ]
+ *        |-----|---------|---|---|---|---|-------------|-------------|
+ *    DW0 [ Fmt ][  Type  ][T9][T8][TH][SO][   Length   ][    Time    ]
+ *    DW1 [                     Header DW1                            ]
+ *    DW2 [                     Header DW2                            ]
+ *    DW3 [                     Header DW3                            ]
+ */
+
+enum hisi_ptt_8dw_pkt_field_type {
+	HISI_PTT_8DW_CHK_AND_RSV0,
+	HISI_PTT_8DW_PREFIX,
+	HISI_PTT_8DW_HEAD0,
+	HISI_PTT_8DW_HEAD1,
+	HISI_PTT_8DW_HEAD2,
+	HISI_PTT_8DW_HEAD3,
+	HISI_PTT_8DW_RSV1,
+	HISI_PTT_8DW_TIME,
+	HISI_PTT_8DW_TYPE_MAX
+};
+
+enum hisi_ptt_4dw_pkt_field_type {
+	HISI_PTT_4DW_HEAD1,
+	HISI_PTT_4DW_HEAD2,
+	HISI_PTT_4DW_HEAD3,
+	HISI_PTT_4DW_TYPE_MAX
+};
+
+static const char * const hisi_ptt_8dw_pkt_field_name[] = {
+	[HISI_PTT_8DW_PREFIX]	= "Prefix",
+	[HISI_PTT_8DW_HEAD0]	= "Header DW0",
+	[HISI_PTT_8DW_HEAD1]	= "Header DW1",
+	[HISI_PTT_8DW_HEAD2]	= "Header DW2",
+	[HISI_PTT_8DW_HEAD3]	= "Header DW3",
+	[HISI_PTT_8DW_TIME]	= "Time"
+};
+
+static const char * const hisi_ptt_4dw_pkt_field_name[] = {
+	[HISI_PTT_4DW_HEAD1]	= "Header DW1",
+	[HISI_PTT_4DW_HEAD2]	= "Header DW2",
+	[HISI_PTT_4DW_HEAD3]	= "Header DW3",
+};
+
+union hisi_ptt_4dw {
+	struct {
+		uint32_t format : 2;
+		uint32_t type : 5;
+		uint32_t t9 : 1;
+		uint32_t t8 : 1;
+		uint32_t th : 1;
+		uint32_t so : 1;
+		uint32_t len : 10;
+		uint32_t time : 11;
+	};
+	uint32_t value;
+};
+
+static void hisi_ptt_print_pkt(const unsigned char *buf, int pos, const char *desc)
+{
+	const char *color = PERF_COLOR_BLUE;
+	int i;
+
+	printf(".");
+	color_fprintf(stdout, color, "  %08x: ", pos);
+	for (i = 0; i < HISI_PTT_FIELD_LENTH; i++)
+		color_fprintf(stdout, color, "%02x ", buf[pos + i]);
+	for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++)
+		color_fprintf(stdout, color, "   ");
+	color_fprintf(stdout, color, "  %s\n", desc);
+}
+
+static int hisi_ptt_8dw_kpt_desc(const unsigned char *buf, int pos)
+{
+	int i;
+
+	for (i = 0; i < HISI_PTT_8DW_TYPE_MAX; i++) {
+		/* Do not show 8DW check field and reserved fields */
+		if (i == HISI_PTT_8DW_CHK_AND_RSV0 || i == HISI_PTT_8DW_RSV1) {
+			pos += HISI_PTT_FIELD_LENTH;
+			continue;
+		}
+
+		hisi_ptt_print_pkt(buf, pos, hisi_ptt_8dw_pkt_field_name[i]);
+		pos += HISI_PTT_FIELD_LENTH;
+	}
+
+	return hisi_ptt_pkt_size[HISI_PTT_8DW_PKT];
+}
+
+static void hisi_ptt_4dw_print_dw0(const unsigned char *buf, int pos)
+{
+	const char *color = PERF_COLOR_BLUE;
+	union hisi_ptt_4dw dw0;
+	int i;
+
+	dw0.value = *(uint32_t *)(buf + pos);
+	printf(".");
+	color_fprintf(stdout, color, "  %08x: ", pos);
+	for (i = 0; i < HISI_PTT_FIELD_LENTH; i++)
+		color_fprintf(stdout, color, "%02x ", buf[pos + i]);
+	for (i = 0; i < HISI_PTT_MAX_SPACE_LEN; i++)
+		color_fprintf(stdout, color, "   ");
+
+	color_fprintf(stdout, color,
+		      "  %s %x %s %x %s %x %s %x %s %x %s %x %s %x %s %x\n",
+		      "Format", dw0.format, "Type", dw0.type, "T9", dw0.t9,
+		      "T8", dw0.t8, "TH", dw0.th, "SO", dw0.so, "Length",
+		      dw0.len, "Time", dw0.time);
+}
+
+static int hisi_ptt_4dw_kpt_desc(const unsigned char *buf, int pos)
+{
+	int i;
+
+	hisi_ptt_4dw_print_dw0(buf, pos);
+	pos += HISI_PTT_FIELD_LENTH;
+
+	for (i = 0; i < HISI_PTT_4DW_TYPE_MAX; i++) {
+		hisi_ptt_print_pkt(buf, pos, hisi_ptt_4dw_pkt_field_name[i]);
+		pos += HISI_PTT_FIELD_LENTH;
+	}
+
+	return hisi_ptt_pkt_size[HISI_PTT_4DW_PKT];
+}
+
+int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type)
+{
+	if (type == HISI_PTT_8DW_PKT)
+		return hisi_ptt_8dw_kpt_desc(buf, pos);
+
+	return hisi_ptt_4dw_kpt_desc(buf, pos);
+}
diff --git a/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h
new file mode 100644
index 000000000..e78f1b5bc
--- /dev/null
+++ b/tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#ifndef INCLUDE__HISI_PTT_PKT_DECODER_H__
+#define INCLUDE__HISI_PTT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define HISI_PTT_8DW_CHECK_MASK		GENMASK(31, 11)
+#define HISI_PTT_IS_8DW_PKT		GENMASK(31, 11)
+#define HISI_PTT_MAX_SPACE_LEN		10
+#define HISI_PTT_FIELD_LENTH		4
+
+enum hisi_ptt_pkt_type {
+	HISI_PTT_4DW_PKT,
+	HISI_PTT_8DW_PKT,
+	HISI_PTT_PKT_MAX
+};
+
+static int hisi_ptt_pkt_size[] = {
+	[HISI_PTT_4DW_PKT]	= 16,
+	[HISI_PTT_8DW_PKT]	= 32,
+};
+
+int hisi_ptt_pkt_desc(const unsigned char *buf, int pos, enum hisi_ptt_pkt_type type);
+
+#endif
diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c
new file mode 100644
index 000000000..764d660d3
--- /dev/null
+++ b/tools/perf/util/hisi-ptt.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "debug.h"
+#include "evsel.h"
+#include "hisi-ptt.h"
+#include "hisi-ptt-decoder/hisi-ptt-pkt-decoder.h"
+#include "machine.h"
+#include "session.h"
+#include "tool.h"
+#include <internal/lib.h>
+
+struct hisi_ptt {
+	struct auxtrace auxtrace;
+	u32 auxtrace_type;
+	struct perf_session *session;
+	struct machine *machine;
+	u32 pmu_type;
+};
+
+struct hisi_ptt_queue {
+	struct hisi_ptt *ptt;
+	struct auxtrace_buffer *buffer;
+};
+
+static enum hisi_ptt_pkt_type hisi_ptt_check_packet_type(unsigned char *buf)
+{
+	uint32_t head = *(uint32_t *)buf;
+
+	if ((HISI_PTT_8DW_CHECK_MASK & head) == HISI_PTT_IS_8DW_PKT)
+		return HISI_PTT_8DW_PKT;
+
+	return HISI_PTT_4DW_PKT;
+}
+
+static void hisi_ptt_dump(struct hisi_ptt *ptt __maybe_unused,
+			  unsigned char *buf, size_t len)
+{
+	const char *color = PERF_COLOR_BLUE;
+	enum hisi_ptt_pkt_type type;
+	size_t pos = 0;
+	int pkt_len;
+
+	type = hisi_ptt_check_packet_type(buf);
+	len = round_down(len, hisi_ptt_pkt_size[type]);
+	color_fprintf(stdout, color, ". ... HISI PTT data: size %zu bytes\n",
+		      len);
+
+	while (len > 0) {
+		pkt_len = hisi_ptt_pkt_desc(buf, pos, type);
+		if (!pkt_len)
+			color_fprintf(stdout, color, " Bad packet!\n");
+
+		pos += pkt_len;
+		len -= pkt_len;
+	}
+}
+
+static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf,
+				size_t len)
+{
+	printf(".\n");
+
+	hisi_ptt_dump(ptt, buf, len);
+}
+
+static int hisi_ptt_process_event(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused,
+				  struct perf_sample *sample __maybe_unused,
+				  struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
+					   union perf_event *event,
+					   struct perf_tool *tool __maybe_unused)
+{
+	struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
+					    auxtrace);
+	int fd = perf_data__fd(session->data);
+	int size = event->auxtrace.size;
+	void *data = malloc(size);
+	off_t data_offset;
+	int err;
+
+	if (!data)
+		return -errno;
+
+	if (perf_data__is_pipe(session->data)) {
+		data_offset = 0;
+	} else {
+		data_offset = lseek(fd, 0, SEEK_CUR);
+		if (data_offset == -1)
+			return -errno;
+	}
+
+	err = readn(fd, data, size);
+	if (err != (ssize_t)size) {
+		free(data);
+		return -errno;
+	}
+
+	if (dump_trace)
+		hisi_ptt_dump_event(ptt, data, size);
+
+	free(data);
+	return 0;
+}
+
+static int hisi_ptt_flush(struct perf_session *session __maybe_unused,
+			  struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static void hisi_ptt_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static void hisi_ptt_free(struct perf_session *session)
+{
+	struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt,
+					    auxtrace);
+
+	session->auxtrace = NULL;
+	free(ptt);
+}
+
+static bool hisi_ptt_evsel_is_auxtrace(struct perf_session *session,
+				       struct evsel *evsel)
+{
+	struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, auxtrace);
+
+	return evsel->core.attr.type == ptt->pmu_type;
+}
+
+static void hisi_ptt_print_info(__u64 type)
+{
+	if (!dump_trace)
+		return;
+
+	fprintf(stdout, "  PMU Type           %" PRId64 "\n", (s64) type);
+}
+
+int hisi_ptt_process_auxtrace_info(union perf_event *event,
+				   struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+	struct hisi_ptt *ptt;
+
+	if (auxtrace_info->header.size < HISI_PTT_AUXTRACE_PRIV_SIZE +
+				sizeof(struct perf_record_auxtrace_info))
+		return -EINVAL;
+
+	ptt = zalloc(sizeof(*ptt));
+	if (!ptt)
+		return -ENOMEM;
+
+	ptt->session = session;
+	ptt->machine = &session->machines.host; /* No kvm support */
+	ptt->auxtrace_type = auxtrace_info->type;
+	ptt->pmu_type = auxtrace_info->priv[0];
+
+	ptt->auxtrace.process_event = hisi_ptt_process_event;
+	ptt->auxtrace.process_auxtrace_event = hisi_ptt_process_auxtrace_event;
+	ptt->auxtrace.flush_events = hisi_ptt_flush;
+	ptt->auxtrace.free_events = hisi_ptt_free_events;
+	ptt->auxtrace.free = hisi_ptt_free;
+	ptt->auxtrace.evsel_is_auxtrace = hisi_ptt_evsel_is_auxtrace;
+	session->auxtrace = &ptt->auxtrace;
+
+	hisi_ptt_print_info(auxtrace_info->priv[0]);
+
+	return 0;
+}
diff --git a/tools/perf/util/hisi-ptt.h b/tools/perf/util/hisi-ptt.h
new file mode 100644
index 000000000..2db9b4056
--- /dev/null
+++ b/tools/perf/util/hisi-ptt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#ifndef INCLUDE__PERF_HISI_PTT_H__
+#define INCLUDE__PERF_HISI_PTT_H__
+
+#define HISI_PTT_PMU_NAME		"hisi_ptt"
+#define HISI_PTT_AUXTRACE_PRIV_SIZE	sizeof(u64)
+
+struct auxtrace_record *hisi_ptt_recording_init(int *err,
+						struct perf_pmu *hisi_ptt_pmu);
+
+int hisi_ptt_process_auxtrace_info(union perf_event *event,
+				   struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
new file mode 100644
index 000000000..bffd058cb
--- /dev/null
+++ b/tools/perf/util/hist.c
@@ -0,0 +1,2896 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "callchain.h"
+#include "debug.h"
+#include "dso.h"
+#include "build-id.h"
+#include "hist.h"
+#include "map.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "mem-events.h"
+#include "session.h"
+#include "namespaces.h"
+#include "cgroup.h"
+#include "sort.h"
+#include "units.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "annotate.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "thread.h"
+#include "block-info.h"
+#include "ui/progress.h"
+#include <errno.h>
+#include <math.h>
+#include <inttypes.h>
+#include <sys/param.h>
+#include <linux/rbtree.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+				       struct hist_entry *he);
+static bool hists__filter_entry_by_thread(struct hists *hists,
+					  struct hist_entry *he);
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+					  struct hist_entry *he);
+static bool hists__filter_entry_by_socket(struct hists *hists,
+					  struct hist_entry *he);
+
+u16 hists__col_len(struct hists *hists, enum hist_column col)
+{
+	return hists->col_len[col];
+}
+
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+	hists->col_len[col] = len;
+}
+
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+	if (len > hists__col_len(hists, col)) {
+		hists__set_col_len(hists, col, len);
+		return true;
+	}
+	return false;
+}
+
+void hists__reset_col_len(struct hists *hists)
+{
+	enum hist_column col;
+
+	for (col = 0; col < HISTC_NR_COLS; ++col)
+		hists__set_col_len(hists, col, 0);
+}
+
+static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
+{
+	const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+
+	if (hists__col_len(hists, dso) < unresolved_col_width &&
+	    !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
+	    !symbol_conf.dso_list)
+		hists__set_col_len(hists, dso, unresolved_col_width);
+}
+
+void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
+{
+	const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+	int symlen;
+	u16 len;
+
+	if (h->block_info)
+		return;
+	/*
+	 * +4 accounts for '[x] ' priv level info
+	 * +2 accounts for 0x prefix on raw addresses
+	 * +3 accounts for ' y ' symtab origin info
+	 */
+	if (h->ms.sym) {
+		symlen = h->ms.sym->namelen + 4;
+		if (verbose > 0)
+			symlen += BITS_PER_LONG / 4 + 2 + 3;
+		hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+	} else {
+		symlen = unresolved_col_width + 4 + 2;
+		hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+		hists__set_unres_dso_col_len(hists, HISTC_DSO);
+	}
+
+	len = thread__comm_len(h->thread);
+	if (hists__new_col_len(hists, HISTC_COMM, len))
+		hists__set_col_len(hists, HISTC_THREAD, len + 8);
+
+	if (h->ms.map) {
+		len = dso__name_len(h->ms.map->dso);
+		hists__new_col_len(hists, HISTC_DSO, len);
+	}
+
+	if (h->parent)
+		hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
+
+	if (h->branch_info) {
+		if (h->branch_info->from.ms.sym) {
+			symlen = (int)h->branch_info->from.ms.sym->namelen + 4;
+			if (verbose > 0)
+				symlen += BITS_PER_LONG / 4 + 2 + 3;
+			hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+
+			symlen = dso__name_len(h->branch_info->from.ms.map->dso);
+			hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+			hists__new_col_len(hists, HISTC_ADDR_FROM, symlen);
+			hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
+		}
+
+		if (h->branch_info->to.ms.sym) {
+			symlen = (int)h->branch_info->to.ms.sym->namelen + 4;
+			if (verbose > 0)
+				symlen += BITS_PER_LONG / 4 + 2 + 3;
+			hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+
+			symlen = dso__name_len(h->branch_info->to.ms.map->dso);
+			hists__new_col_len(hists, HISTC_DSO_TO, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+			hists__new_col_len(hists, HISTC_ADDR_TO, symlen);
+			hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
+		}
+
+		if (h->branch_info->srcline_from)
+			hists__new_col_len(hists, HISTC_SRCLINE_FROM,
+					strlen(h->branch_info->srcline_from));
+		if (h->branch_info->srcline_to)
+			hists__new_col_len(hists, HISTC_SRCLINE_TO,
+					strlen(h->branch_info->srcline_to));
+	}
+
+	if (h->mem_info) {
+		if (h->mem_info->daddr.ms.sym) {
+			symlen = (int)h->mem_info->daddr.ms.sym->namelen + 4
+			       + unresolved_col_width + 2;
+			hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+					   symlen);
+			hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+					   symlen + 1);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+					   symlen);
+			hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+					   symlen);
+		}
+
+		if (h->mem_info->iaddr.ms.sym) {
+			symlen = (int)h->mem_info->iaddr.ms.sym->namelen + 4
+			       + unresolved_col_width + 2;
+			hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+					   symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+					   symlen);
+		}
+
+		if (h->mem_info->daddr.ms.map) {
+			symlen = dso__name_len(h->mem_info->daddr.ms.map->dso);
+			hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
+					   symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+		}
+
+		hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
+				   unresolved_col_width + 4 + 2);
+
+		hists__new_col_len(hists, HISTC_MEM_DATA_PAGE_SIZE,
+				   unresolved_col_width + 4 + 2);
+
+	} else {
+		symlen = unresolved_col_width + 4 + 2;
+		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
+		hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen);
+		hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+	}
+
+	hists__new_col_len(hists, HISTC_CGROUP, 6);
+	hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+	hists__new_col_len(hists, HISTC_CPU, 3);
+	hists__new_col_len(hists, HISTC_SOCKET, 6);
+	hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
+	hists__new_col_len(hists, HISTC_MEM_TLB, 22);
+	hists__new_col_len(hists, HISTC_MEM_SNOOP, 12);
+	hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
+	hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
+	hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
+	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
+	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
+	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
+	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+	hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2);
+
+	if (symbol_conf.nanosecs)
+		hists__new_col_len(hists, HISTC_TIME, 16);
+	else
+		hists__new_col_len(hists, HISTC_TIME, 12);
+	hists__new_col_len(hists, HISTC_CODE_PAGE_SIZE, 6);
+
+	if (h->srcline) {
+		len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
+		hists__new_col_len(hists, HISTC_SRCLINE, len);
+	}
+
+	if (h->srcfile)
+		hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
+
+	if (h->transaction)
+		hists__new_col_len(hists, HISTC_TRANSACTION,
+				   hist_entry__transaction_len());
+
+	if (h->trace_output)
+		hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
+
+	if (h->cgroup) {
+		const char *cgrp_name = "unknown";
+		struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env,
+						   h->cgroup);
+		if (cgrp != NULL)
+			cgrp_name = cgrp->name;
+
+		hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name));
+	}
+}
+
+void hists__output_recalc_col_len(struct hists *hists, int max_rows)
+{
+	struct rb_node *next = rb_first_cached(&hists->entries);
+	struct hist_entry *n;
+	int row = 0;
+
+	hists__reset_col_len(hists);
+
+	while (next && row++ < max_rows) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		if (!n->filtered)
+			hists__calc_col_len(hists, n);
+		next = rb_next(&n->rb_node);
+	}
+}
+
+static void he_stat__add_cpumode_period(struct he_stat *he_stat,
+					unsigned int cpumode, u64 period)
+{
+	switch (cpumode) {
+	case PERF_RECORD_MISC_KERNEL:
+		he_stat->period_sys += period;
+		break;
+	case PERF_RECORD_MISC_USER:
+		he_stat->period_us += period;
+		break;
+	case PERF_RECORD_MISC_GUEST_KERNEL:
+		he_stat->period_guest_sys += period;
+		break;
+	case PERF_RECORD_MISC_GUEST_USER:
+		he_stat->period_guest_us += period;
+		break;
+	default:
+		break;
+	}
+}
+
+static long hist_time(unsigned long htime)
+{
+	unsigned long time_quantum = symbol_conf.time_quantum;
+	if (time_quantum)
+		return (htime / time_quantum) * time_quantum;
+	return htime;
+}
+
+static void he_stat__add_period(struct he_stat *he_stat, u64 period)
+{
+	he_stat->period		+= period;
+	he_stat->nr_events	+= 1;
+}
+
+static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
+{
+	dest->period		+= src->period;
+	dest->period_sys	+= src->period_sys;
+	dest->period_us		+= src->period_us;
+	dest->period_guest_sys	+= src->period_guest_sys;
+	dest->period_guest_us	+= src->period_guest_us;
+	dest->nr_events		+= src->nr_events;
+}
+
+static void he_stat__decay(struct he_stat *he_stat)
+{
+	he_stat->period = (he_stat->period * 7) / 8;
+	he_stat->nr_events = (he_stat->nr_events * 7) / 8;
+	/* XXX need decay for weight too? */
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
+static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
+{
+	u64 prev_period = he->stat.period;
+	u64 diff;
+
+	if (prev_period == 0)
+		return true;
+
+	he_stat__decay(&he->stat);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__decay(he->stat_acc);
+	decay_callchain(he->callchain);
+
+	diff = prev_period - he->stat.period;
+
+	if (!he->depth) {
+		hists->stats.total_period -= diff;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period -= diff;
+	}
+
+	if (!he->leaf) {
+		struct hist_entry *child;
+		struct rb_node *node = rb_first_cached(&he->hroot_out);
+		while (node) {
+			child = rb_entry(node, struct hist_entry, rb_node);
+			node = rb_next(node);
+
+			if (hists__decay_entry(hists, child))
+				hists__delete_entry(hists, child);
+		}
+	}
+
+	return he->stat.period == 0;
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
+{
+	struct rb_root_cached *root_in;
+	struct rb_root_cached *root_out;
+
+	if (he->parent_he) {
+		root_in  = &he->parent_he->hroot_in;
+		root_out = &he->parent_he->hroot_out;
+	} else {
+		if (hists__has(hists, need_collapse))
+			root_in = &hists->entries_collapsed;
+		else
+			root_in = hists->entries_in;
+		root_out = &hists->entries;
+	}
+
+	rb_erase_cached(&he->rb_node_in, root_in);
+	rb_erase_cached(&he->rb_node, root_out);
+
+	--hists->nr_entries;
+	if (!he->filtered)
+		--hists->nr_non_filtered_entries;
+
+	hist_entry__delete(he);
+}
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
+{
+	struct rb_node *next = rb_first_cached(&hists->entries);
+	struct hist_entry *n;
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+		if (((zap_user && n->level == '.') ||
+		     (zap_kernel && n->level != '.') ||
+		     hists__decay_entry(hists, n))) {
+			hists__delete_entry(hists, n);
+		}
+	}
+}
+
+void hists__delete_entries(struct hists *hists)
+{
+	struct rb_node *next = rb_first_cached(&hists->entries);
+	struct hist_entry *n;
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		hists__delete_entry(hists, n);
+	}
+}
+
+struct hist_entry *hists__get_entry(struct hists *hists, int idx)
+{
+	struct rb_node *next = rb_first_cached(&hists->entries);
+	struct hist_entry *n;
+	int i = 0;
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		if (i == idx)
+			return n;
+
+		next = rb_next(&n->rb_node);
+		i++;
+	}
+
+	return NULL;
+}
+
+/*
+ * histogram, sorted on item, collects periods
+ */
+
+static int hist_entry__init(struct hist_entry *he,
+			    struct hist_entry *template,
+			    bool sample_self,
+			    size_t callchain_size)
+{
+	*he = *template;
+	he->callchain_size = callchain_size;
+
+	if (symbol_conf.cumulate_callchain) {
+		he->stat_acc = malloc(sizeof(he->stat));
+		if (he->stat_acc == NULL)
+			return -ENOMEM;
+		memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+		if (!sample_self)
+			memset(&he->stat, 0, sizeof(he->stat));
+	}
+
+	map__get(he->ms.map);
+
+	if (he->branch_info) {
+		/*
+		 * This branch info is (a part of) allocated from
+		 * sample__resolve_bstack() and will be freed after
+		 * adding new entries.  So we need to save a copy.
+		 */
+		he->branch_info = malloc(sizeof(*he->branch_info));
+		if (he->branch_info == NULL)
+			goto err;
+
+		memcpy(he->branch_info, template->branch_info,
+		       sizeof(*he->branch_info));
+
+		map__get(he->branch_info->from.ms.map);
+		map__get(he->branch_info->to.ms.map);
+	}
+
+	if (he->mem_info) {
+		map__get(he->mem_info->iaddr.ms.map);
+		map__get(he->mem_info->daddr.ms.map);
+	}
+
+	if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
+		callchain_init(he->callchain);
+
+	if (he->raw_data) {
+		he->raw_data = memdup(he->raw_data, he->raw_size);
+		if (he->raw_data == NULL)
+			goto err_infos;
+	}
+
+	if (he->srcline) {
+		he->srcline = strdup(he->srcline);
+		if (he->srcline == NULL)
+			goto err_rawdata;
+	}
+
+	if (symbol_conf.res_sample) {
+		he->res_samples = calloc(sizeof(struct res_sample),
+					symbol_conf.res_sample);
+		if (!he->res_samples)
+			goto err_srcline;
+	}
+
+	INIT_LIST_HEAD(&he->pairs.node);
+	thread__get(he->thread);
+	he->hroot_in  = RB_ROOT_CACHED;
+	he->hroot_out = RB_ROOT_CACHED;
+
+	if (!symbol_conf.report_hierarchy)
+		he->leaf = true;
+
+	return 0;
+
+err_srcline:
+	zfree(&he->srcline);
+
+err_rawdata:
+	zfree(&he->raw_data);
+
+err_infos:
+	if (he->branch_info) {
+		map__put(he->branch_info->from.ms.map);
+		map__put(he->branch_info->to.ms.map);
+		zfree(&he->branch_info);
+	}
+	if (he->mem_info) {
+		map__put(he->mem_info->iaddr.ms.map);
+		map__put(he->mem_info->daddr.ms.map);
+	}
+err:
+	map__zput(he->ms.map);
+	zfree(&he->stat_acc);
+	return -ENOMEM;
+}
+
+static void *hist_entry__zalloc(size_t size)
+{
+	return zalloc(size + sizeof(struct hist_entry));
+}
+
+static void hist_entry__free(void *ptr)
+{
+	free(ptr);
+}
+
+static struct hist_entry_ops default_ops = {
+	.new	= hist_entry__zalloc,
+	.free	= hist_entry__free,
+};
+
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+					  bool sample_self)
+{
+	struct hist_entry_ops *ops = template->ops;
+	size_t callchain_size = 0;
+	struct hist_entry *he;
+	int err = 0;
+
+	if (!ops)
+		ops = template->ops = &default_ops;
+
+	if (symbol_conf.use_callchain)
+		callchain_size = sizeof(struct callchain_root);
+
+	he = ops->new(callchain_size);
+	if (he) {
+		err = hist_entry__init(he, template, sample_self, callchain_size);
+		if (err) {
+			ops->free(he);
+			he = NULL;
+		}
+	}
+
+	return he;
+}
+
+static u8 symbol__parent_filter(const struct symbol *parent)
+{
+	if (symbol_conf.exclude_other && parent == NULL)
+		return 1 << HIST_FILTER__PARENT;
+	return 0;
+}
+
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+	if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
+		return;
+
+	he->hists->callchain_period += period;
+	if (!he->filtered)
+		he->hists->callchain_non_filtered_period += period;
+}
+
+static struct hist_entry *hists__findnew_entry(struct hists *hists,
+					       struct hist_entry *entry,
+					       struct addr_location *al,
+					       bool sample_self)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	int64_t cmp;
+	u64 period = entry->stat.period;
+	bool leftmost = true;
+
+	p = &hists->entries_in->rb_root.rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		/*
+		 * Make sure that it receives arguments in a same order as
+		 * hist_entry__collapse() so that we can use an appropriate
+		 * function when searching an entry regardless which sort
+		 * keys were used.
+		 */
+		cmp = hist_entry__cmp(he, entry);
+
+		if (!cmp) {
+			if (sample_self) {
+				he_stat__add_period(&he->stat, period);
+				hist_entry__add_callchain_period(he, period);
+			}
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_period(he->stat_acc, period);
+
+			/*
+			 * This mem info was allocated from sample__resolve_mem
+			 * and will not be used anymore.
+			 */
+			mem_info__zput(entry->mem_info);
+
+			block_info__zput(entry->block_info);
+
+			/* If the map of an existing hist_entry has
+			 * become out-of-date due to an exec() or
+			 * similar, update it.  Otherwise we will
+			 * mis-adjust symbol addresses when computing
+			 * the history counter to increment.
+			 */
+			if (he->ms.map != entry->ms.map) {
+				map__put(he->ms.map);
+				he->ms.map = map__get(entry->ms.map);
+			}
+			goto out;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+
+	he = hist_entry__new(entry, sample_self);
+	if (!he)
+		return NULL;
+
+	if (sample_self)
+		hist_entry__add_callchain_period(he, period);
+	hists->nr_entries++;
+
+	rb_link_node(&he->rb_node_in, parent, p);
+	rb_insert_color_cached(&he->rb_node_in, hists->entries_in, leftmost);
+out:
+	if (sample_self)
+		he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
+	return he;
+}
+
+static unsigned random_max(unsigned high)
+{
+	unsigned thresh = -high % high;
+	for (;;) {
+		unsigned r = random();
+		if (r >= thresh)
+			return r % high;
+	}
+}
+
+static void hists__res_sample(struct hist_entry *he, struct perf_sample *sample)
+{
+	struct res_sample *r;
+	int j;
+
+	if (he->num_res < symbol_conf.res_sample) {
+		j = he->num_res++;
+	} else {
+		j = random_max(symbol_conf.res_sample);
+	}
+	r = &he->res_samples[j];
+	r->time = sample->time;
+	r->cpu = sample->cpu;
+	r->tid = sample->tid;
+}
+
+static struct hist_entry*
+__hists__add_entry(struct hists *hists,
+		   struct addr_location *al,
+		   struct symbol *sym_parent,
+		   struct branch_info *bi,
+		   struct mem_info *mi,
+		   struct block_info *block_info,
+		   struct perf_sample *sample,
+		   bool sample_self,
+		   struct hist_entry_ops *ops)
+{
+	struct namespaces *ns = thread__namespaces(al->thread);
+	struct hist_entry entry = {
+		.thread	= al->thread,
+		.comm = thread__comm(al->thread),
+		.cgroup_id = {
+			.dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
+			.ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
+		},
+		.cgroup = sample->cgroup,
+		.ms = {
+			.maps	= al->maps,
+			.map	= al->map,
+			.sym	= al->sym,
+		},
+		.srcline = (char *) al->srcline,
+		.socket	 = al->socket,
+		.cpu	 = al->cpu,
+		.cpumode = al->cpumode,
+		.ip	 = al->addr,
+		.level	 = al->level,
+		.code_page_size = sample->code_page_size,
+		.stat = {
+			.nr_events = 1,
+			.period	= sample->period,
+		},
+		.parent = sym_parent,
+		.filtered = symbol__parent_filter(sym_parent) | al->filtered,
+		.hists	= hists,
+		.branch_info = bi,
+		.mem_info = mi,
+		.block_info = block_info,
+		.transaction = sample->transaction,
+		.raw_data = sample->raw_data,
+		.raw_size = sample->raw_size,
+		.ops = ops,
+		.time = hist_time(sample->time),
+		.weight = sample->weight,
+		.ins_lat = sample->ins_lat,
+		.p_stage_cyc = sample->p_stage_cyc,
+	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
+
+	if (!hists->has_callchains && he && he->callchain_size != 0)
+		hists->has_callchains = true;
+	if (he && symbol_conf.res_sample)
+		hists__res_sample(he, sample);
+	return he;
+}
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *sym_parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self)
+{
+	return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL,
+				  sample, sample_self, NULL);
+}
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+					struct hist_entry_ops *ops,
+					struct addr_location *al,
+					struct symbol *sym_parent,
+					struct branch_info *bi,
+					struct mem_info *mi,
+					struct perf_sample *sample,
+					bool sample_self)
+{
+	return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL,
+				  sample, sample_self, ops);
+}
+
+struct hist_entry *hists__add_entry_block(struct hists *hists,
+					  struct addr_location *al,
+					  struct block_info *block_info)
+{
+	struct hist_entry entry = {
+		.block_info = block_info,
+		.hists = hists,
+		.ms = {
+			.maps = al->maps,
+			.map = al->map,
+			.sym = al->sym,
+		},
+	}, *he = hists__findnew_entry(hists, &entry, al, false);
+
+	return he;
+}
+
+static int
+iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+		    struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+			struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct perf_sample *sample = iter->sample;
+	struct mem_info *mi;
+
+	mi = sample__resolve_mem(sample, al);
+	if (mi == NULL)
+		return -ENOMEM;
+
+	iter->priv = mi;
+	return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	u64 cost;
+	struct mem_info *mi = iter->priv;
+	struct hists *hists = evsel__hists(iter->evsel);
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he;
+
+	if (mi == NULL)
+		return -EINVAL;
+
+	cost = sample->weight;
+	if (!cost)
+		cost = 1;
+
+	/*
+	 * must pass period=weight in order to get the correct
+	 * sorting from hists__collapse_resort() which is solely
+	 * based on periods. We want sorting be done on nr_events * weight
+	 * and this is indirectly achieved by passing period=weight here
+	 * and the he_stat__add_period() function.
+	 */
+	sample->period = cost;
+
+	he = hists__add_entry(hists, al, iter->parent, NULL, mi,
+			      sample, true);
+	if (!he)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_mem_entry(struct hist_entry_iter *iter,
+		      struct addr_location *al __maybe_unused)
+{
+	struct evsel *evsel = iter->evsel;
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he = iter->he;
+	int err = -EINVAL;
+
+	if (he == NULL)
+		goto out;
+
+	hists__inc_nr_samples(hists, he->filtered);
+
+	err = hist_entry__append_callchain(he, iter->sample);
+
+out:
+	/*
+	 * We don't need to free iter->priv (mem_info) here since the mem info
+	 * was either already freed in hists__findnew_entry() or passed to a
+	 * new hist entry by hist_entry__new().
+	 */
+	iter->priv = NULL;
+
+	iter->he = NULL;
+	return err;
+}
+
+static int
+iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return -ENOMEM;
+
+	iter->curr = 0;
+	iter->total = sample->branch_stack->nr;
+
+	iter->priv = bi;
+	return 0;
+}
+
+static int
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+			     struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi = iter->priv;
+	int i = iter->curr;
+
+	if (bi == NULL)
+		return 0;
+
+	if (iter->curr >= iter->total)
+		return 0;
+
+	al->maps = bi[i].to.ms.maps;
+	al->map = bi[i].to.ms.map;
+	al->sym = bi[i].to.ms.sym;
+	al->addr = bi[i].to.addr;
+	return 1;
+}
+
+static int
+iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct evsel *evsel = iter->evsel;
+	struct hists *hists = evsel__hists(evsel);
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he = NULL;
+	int i = iter->curr;
+	int err = 0;
+
+	bi = iter->priv;
+
+	if (iter->hide_unresolved && !(bi[i].from.ms.sym && bi[i].to.ms.sym))
+		goto out;
+
+	/*
+	 * The report shows the percentage of total branches captured
+	 * and not events sampled. Thus we use a pseudo period of 1.
+	 */
+	sample->period = 1;
+	sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
+
+	he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
+			      sample, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	hists__inc_nr_samples(hists, he->filtered);
+
+out:
+	iter->he = he;
+	iter->curr++;
+	return err;
+}
+
+static int
+iter_finish_branch_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
+			  struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he;
+
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_normal_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	struct hist_entry *he = iter->he;
+	struct evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+
+	if (he == NULL)
+		return 0;
+
+	iter->he = NULL;
+
+	hists__inc_nr_samples(evsel__hists(evsel), he->filtered);
+
+	return hist_entry__append_callchain(he, sample);
+}
+
+static int
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
+			      struct addr_location *al __maybe_unused)
+{
+	struct hist_entry **he_cache;
+
+	callchain_cursor_commit(&callchain_cursor);
+
+	/*
+	 * This is for detecting cycles or recursions so that they're
+	 * cumulated only one time to prevent entries more than 100%
+	 * overhead.
+	 */
+	he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
+	if (he_cache == NULL)
+		return -ENOMEM;
+
+	iter->priv = he_cache;
+	iter->curr = 0;
+
+	return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
+				 struct addr_location *al)
+{
+	struct evsel *evsel = iter->evsel;
+	struct hists *hists = evsel__hists(evsel);
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	int err = 0;
+
+	he = hists__add_entry(hists, al, iter->parent, NULL, NULL,
+			      sample, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	hist_entry__append_callchain(he, sample);
+
+	/*
+	 * We need to re-initialize the cursor since callchain_append()
+	 * advanced the cursor to the end.
+	 */
+	callchain_cursor_commit(&callchain_cursor);
+
+	hists__inc_nr_samples(hists, he->filtered);
+
+	return err;
+}
+
+static int
+iter_next_cumulative_entry(struct hist_entry_iter *iter,
+			   struct addr_location *al)
+{
+	struct callchain_cursor_node *node;
+
+	node = callchain_cursor_current(&callchain_cursor);
+	if (node == NULL)
+		return 0;
+
+	return fill_callchain_info(al, node, iter->hide_unresolved);
+}
+
+static bool
+hist_entry__fast__sym_diff(struct hist_entry *left,
+			   struct hist_entry *right)
+{
+	struct symbol *sym_l = left->ms.sym;
+	struct symbol *sym_r = right->ms.sym;
+
+	if (!sym_l && !sym_r)
+		return left->ip != right->ip;
+
+	return !!_sort__sym_cmp(sym_l, sym_r);
+}
+
+
+static int
+iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
+			       struct addr_location *al)
+{
+	struct evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	struct hist_entry he_tmp = {
+		.hists = evsel__hists(evsel),
+		.cpu = al->cpu,
+		.thread = al->thread,
+		.comm = thread__comm(al->thread),
+		.ip = al->addr,
+		.ms = {
+			.maps = al->maps,
+			.map = al->map,
+			.sym = al->sym,
+		},
+		.srcline = (char *) al->srcline,
+		.parent = iter->parent,
+		.raw_data = sample->raw_data,
+		.raw_size = sample->raw_size,
+	};
+	int i;
+	struct callchain_cursor cursor;
+	bool fast = hists__has(he_tmp.hists, sym);
+
+	callchain_cursor_snapshot(&cursor, &callchain_cursor);
+
+	callchain_cursor_advance(&callchain_cursor);
+
+	/*
+	 * Check if there's duplicate entries in the callchain.
+	 * It's possible that it has cycles or recursive calls.
+	 */
+	for (i = 0; i < iter->curr; i++) {
+		/*
+		 * For most cases, there are no duplicate entries in callchain.
+		 * The symbols are usually different. Do a quick check for
+		 * symbols first.
+		 */
+		if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp))
+			continue;
+
+		if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+			/* to avoid calling callback function */
+			iter->he = NULL;
+			return 0;
+		}
+	}
+
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, false);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
+		callchain_append(he->callchain, &cursor, sample->period);
+	return 0;
+}
+
+static int
+iter_finish_cumulative_entry(struct hist_entry_iter *iter,
+			     struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return 0;
+}
+
+const struct hist_iter_ops hist_iter_mem = {
+	.prepare_entry 		= iter_prepare_mem_entry,
+	.add_single_entry 	= iter_add_single_mem_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_mem_entry,
+};
+
+const struct hist_iter_ops hist_iter_branch = {
+	.prepare_entry 		= iter_prepare_branch_entry,
+	.add_single_entry 	= iter_add_single_branch_entry,
+	.next_entry 		= iter_next_branch_entry,
+	.add_next_entry 	= iter_add_next_branch_entry,
+	.finish_entry 		= iter_finish_branch_entry,
+};
+
+const struct hist_iter_ops hist_iter_normal = {
+	.prepare_entry 		= iter_prepare_normal_entry,
+	.add_single_entry 	= iter_add_single_normal_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_normal_entry,
+};
+
+const struct hist_iter_ops hist_iter_cumulative = {
+	.prepare_entry 		= iter_prepare_cumulative_entry,
+	.add_single_entry 	= iter_add_single_cumulative_entry,
+	.next_entry 		= iter_next_cumulative_entry,
+	.add_next_entry 	= iter_add_next_cumulative_entry,
+	.finish_entry 		= iter_finish_cumulative_entry,
+};
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 int max_stack_depth, void *arg)
+{
+	int err, err2;
+	struct map *alm = NULL;
+
+	if (al)
+		alm = map__get(al->map);
+
+	err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
+					iter->evsel, al, max_stack_depth);
+	if (err) {
+		map__put(alm);
+		return err;
+	}
+
+	err = iter->ops->prepare_entry(iter, al);
+	if (err)
+		goto out;
+
+	err = iter->ops->add_single_entry(iter, al);
+	if (err)
+		goto out;
+
+	if (iter->he && iter->add_entry_cb) {
+		err = iter->add_entry_cb(iter, al, true, arg);
+		if (err)
+			goto out;
+	}
+
+	while (iter->ops->next_entry(iter, al)) {
+		err = iter->ops->add_next_entry(iter, al);
+		if (err)
+			break;
+
+		if (iter->he && iter->add_entry_cb) {
+			err = iter->add_entry_cb(iter, al, false, arg);
+			if (err)
+				goto out;
+		}
+	}
+
+out:
+	err2 = iter->ops->finish_entry(iter, al);
+	if (!err)
+		err = err2;
+
+	map__put(alm);
+
+	return err;
+}
+
+int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct hists *hists = left->hists;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp = 0;
+
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
+		cmp = fmt->cmp(fmt, left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct hists *hists = left->hists;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp = 0;
+
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
+		cmp = fmt->collapse(fmt, left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+void hist_entry__delete(struct hist_entry *he)
+{
+	struct hist_entry_ops *ops = he->ops;
+
+	thread__zput(he->thread);
+	map__zput(he->ms.map);
+
+	if (he->branch_info) {
+		map__zput(he->branch_info->from.ms.map);
+		map__zput(he->branch_info->to.ms.map);
+		free_srcline(he->branch_info->srcline_from);
+		free_srcline(he->branch_info->srcline_to);
+		zfree(&he->branch_info);
+	}
+
+	if (he->mem_info) {
+		map__zput(he->mem_info->iaddr.ms.map);
+		map__zput(he->mem_info->daddr.ms.map);
+		mem_info__zput(he->mem_info);
+	}
+
+	if (he->block_info)
+		block_info__zput(he->block_info);
+
+	zfree(&he->res_samples);
+	zfree(&he->stat_acc);
+	free_srcline(he->srcline);
+	if (he->srcfile && he->srcfile[0])
+		zfree(&he->srcfile);
+	free_callchain(he->callchain);
+	zfree(&he->trace_output);
+	zfree(&he->raw_data);
+	ops->free(he);
+}
+
+/*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max length for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed)
+{
+	if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+		const int width = fmt->width(fmt, hpp, he->hists);
+		if (printed < width) {
+			advance_hpp(hpp, printed);
+			printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+		}
+	}
+
+	return printed;
+}
+
+/*
+ * collapse the histogram
+ */
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+				       enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+	return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+						enum hist_filter type,
+						fmt_chk_fn check)
+{
+	struct perf_hpp_fmt *fmt;
+	bool type_match = false;
+	struct hist_entry *parent = he->parent_he;
+
+	switch (type) {
+	case HIST_FILTER__THREAD:
+		if (symbol_conf.comm_list == NULL &&
+		    symbol_conf.pid_list == NULL &&
+		    symbol_conf.tid_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__DSO:
+		if (symbol_conf.dso_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__SYMBOL:
+		if (symbol_conf.sym_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__PARENT:
+	case HIST_FILTER__GUEST:
+	case HIST_FILTER__HOST:
+	case HIST_FILTER__SOCKET:
+	case HIST_FILTER__C2C:
+	default:
+		return;
+	}
+
+	/* if it's filtered by own fmt, it has to have filter bits */
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (check(fmt)) {
+			type_match = true;
+			break;
+		}
+	}
+
+	if (type_match) {
+		/*
+		 * If the filter is for current level entry, propagate
+		 * filter marker to parents.  The marker bit was
+		 * already set by default so it only needs to clear
+		 * non-filtered entries.
+		 */
+		if (!(he->filtered & (1 << type))) {
+			while (parent) {
+				parent->filtered &= ~(1 << type);
+				parent = parent->parent_he;
+			}
+		}
+	} else {
+		/*
+		 * If current entry doesn't have matching formats, set
+		 * filter marker for upper level entries.  it will be
+		 * cleared if its lower level entries is not filtered.
+		 *
+		 * For lower-level entries, it inherits parent's
+		 * filter bit so that lower level entries of a
+		 * non-filtered entry won't set the filter marker.
+		 */
+		if (parent == NULL)
+			he->filtered |= (1 << type);
+		else
+			he->filtered |= (parent->filtered & (1 << type));
+	}
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+					    check_thread_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+					    perf_hpp__is_dso_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+					    perf_hpp__is_sym_entry);
+
+	hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+						 struct rb_root_cached *root,
+						 struct hist_entry *he,
+						 struct hist_entry *parent_he,
+						 struct perf_hpp_list *hpp_list)
+{
+	struct rb_node **p = &root->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter, *new;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp;
+	bool leftmost = true;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = 0;
+		perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (!cmp) {
+			he_stat__add_stat(&iter->stat, &he->stat);
+			return iter;
+		}
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else {
+			p = &parent->rb_right;
+			leftmost = false;
+		}
+	}
+
+	new = hist_entry__new(he, true);
+	if (new == NULL)
+		return NULL;
+
+	hists->nr_entries++;
+
+	/* save related format list for output */
+	new->hpp_list = hpp_list;
+	new->parent_he = parent_he;
+
+	hist_entry__apply_hierarchy_filters(new);
+
+	/* some fields are now passed to 'new' */
+	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+		if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			he->trace_output = NULL;
+		else
+			new->trace_output = NULL;
+
+		if (perf_hpp__is_srcline_entry(fmt))
+			he->srcline = NULL;
+		else
+			new->srcline = NULL;
+
+		if (perf_hpp__is_srcfile_entry(fmt))
+			he->srcfile = NULL;
+		else
+			new->srcfile = NULL;
+	}
+
+	rb_link_node(&new->rb_node_in, parent, p);
+	rb_insert_color_cached(&new->rb_node_in, root, leftmost);
+	return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+					 struct rb_root_cached *root,
+					 struct hist_entry *he)
+{
+	struct perf_hpp_list_node *node;
+	struct hist_entry *new_he = NULL;
+	struct hist_entry *parent = NULL;
+	int depth = 0;
+	int ret = 0;
+
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		/* skip period (overhead) and elided columns */
+		if (node->level == 0 || node->skip)
+			continue;
+
+		/* insert copy of 'he' for each fmt into the hierarchy */
+		new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+		if (new_he == NULL) {
+			ret = -1;
+			break;
+		}
+
+		root = &new_he->hroot_in;
+		new_he->depth = depth++;
+		parent = new_he;
+	}
+
+	if (new_he) {
+		new_he->leaf = true;
+
+		if (hist_entry__has_callchains(new_he) &&
+		    symbol_conf.use_callchain) {
+			callchain_cursor_reset(&callchain_cursor);
+			if (callchain_merge(&callchain_cursor,
+					    new_he->callchain,
+					    he->callchain) < 0)
+				ret = -1;
+		}
+	}
+
+	/* 'he' is no longer used */
+	hist_entry__delete(he);
+
+	/* return 0 (or -1) since it already applied filters */
+	return ret;
+}
+
+static int hists__collapse_insert_entry(struct hists *hists,
+					struct rb_root_cached *root,
+					struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+	bool leftmost = true;
+
+	if (symbol_conf.report_hierarchy)
+		return hists__hierarchy_insert_entry(hists, root, he);
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			int ret = 0;
+
+			he_stat__add_stat(&iter->stat, &he->stat);
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_stat(iter->stat_acc, he->stat_acc);
+
+			if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
+				callchain_cursor_reset(&callchain_cursor);
+				if (callchain_merge(&callchain_cursor,
+						    iter->callchain,
+						    he->callchain) < 0)
+					ret = -1;
+			}
+			hist_entry__delete(he);
+			return ret;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+	hists->nr_entries++;
+
+	rb_link_node(&he->rb_node_in, parent, p);
+	rb_insert_color_cached(&he->rb_node_in, root, leftmost);
+	return 1;
+}
+
+struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists)
+{
+	struct rb_root_cached *root;
+
+	mutex_lock(&hists->lock);
+
+	root = hists->entries_in;
+	if (++hists->entries_in > &hists->entries_in_array[1])
+		hists->entries_in = &hists->entries_in_array[0];
+
+	mutex_unlock(&hists->lock);
+
+	return root;
+}
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
+{
+	hists__filter_entry_by_dso(hists, he);
+	hists__filter_entry_by_thread(hists, he);
+	hists__filter_entry_by_symbol(hists, he);
+	hists__filter_entry_by_socket(hists, he);
+}
+
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+{
+	struct rb_root_cached *root;
+	struct rb_node *next;
+	struct hist_entry *n;
+	int ret;
+
+	if (!hists__has(hists, need_collapse))
+		return 0;
+
+	hists->nr_entries = 0;
+
+	root = hists__get_rotate_entries_in(hists);
+
+	next = rb_first_cached(root);
+
+	while (next) {
+		if (session_done())
+			break;
+		n = rb_entry(next, struct hist_entry, rb_node_in);
+		next = rb_next(&n->rb_node_in);
+
+		rb_erase_cached(&n->rb_node_in, root);
+		ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+		if (ret < 0)
+			return -1;
+
+		if (ret) {
+			/*
+			 * If it wasn't combined with one of the entries already
+			 * collapsed, we need to apply the filters that may have
+			 * been set by, say, the hist_browser.
+			 */
+			hists__apply_filters(hists, n);
+		}
+		if (prog)
+			ui_progress__update(prog, 1);
+	}
+	return 0;
+}
+
+static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
+{
+	struct hists *hists = a->hists;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp = 0;
+
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__should_skip(fmt, a->hists))
+			continue;
+
+		cmp = fmt->sort(fmt, a, b);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static void hists__reset_filter_stats(struct hists *hists)
+{
+	hists->nr_non_filtered_entries = 0;
+	hists->stats.total_non_filtered_period = 0;
+}
+
+void hists__reset_stats(struct hists *hists)
+{
+	hists->nr_entries = 0;
+	hists->stats.total_period = 0;
+
+	hists__reset_filter_stats(hists);
+}
+
+static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h)
+{
+	hists->nr_non_filtered_entries++;
+	hists->stats.total_non_filtered_period += h->stat.period;
+}
+
+void hists__inc_stats(struct hists *hists, struct hist_entry *h)
+{
+	if (!h->filtered)
+		hists__inc_filter_stats(hists, h);
+
+	hists->nr_entries++;
+	hists->stats.total_period += h->stat.period;
+}
+
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	node = rb_first_cached(&hists->entries);
+
+	hists->stats.total_period = 0;
+	hists->stats.total_non_filtered_period = 0;
+
+	/*
+	 * recalculate total period using top-level entries only
+	 * since lower level entries only see non-filtered entries
+	 * but upper level entries have sum of both entries.
+	 */
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node);
+		node = rb_next(node);
+
+		hists->stats.total_period += he->stat.period;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period += he->stat.period;
+	}
+}
+
+static void hierarchy_insert_output_entry(struct rb_root_cached *root,
+					  struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
+	bool leftmost = true;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &parent->rb_left;
+		else {
+			p = &parent->rb_right;
+			leftmost = false;
+		}
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color_cached(&he->rb_node, root, leftmost);
+
+	/* update column width of dynamic entry */
+	perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt))
+			fmt->sort(fmt, he, NULL);
+	}
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+					   struct ui_progress *prog,
+					   struct rb_root_cached *root_in,
+					   struct rb_root_cached *root_out,
+					   u64 min_callchain_hits,
+					   bool use_callchain)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	*root_out = RB_ROOT_CACHED;
+	node = rb_first_cached(root_in);
+
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+		node = rb_next(node);
+
+		hierarchy_insert_output_entry(root_out, he);
+
+		if (prog)
+			ui_progress__update(prog, 1);
+
+		hists->nr_entries++;
+		if (!he->filtered) {
+			hists->nr_non_filtered_entries++;
+			hists__calc_col_len(hists, he);
+		}
+
+		if (!he->leaf) {
+			hists__hierarchy_output_resort(hists, prog,
+						       &he->hroot_in,
+						       &he->hroot_out,
+						       min_callchain_hits,
+						       use_callchain);
+			continue;
+		}
+
+		if (!use_callchain)
+			continue;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+	}
+}
+
+static void __hists__insert_output_entry(struct rb_root_cached *entries,
+					 struct hist_entry *he,
+					 u64 min_callchain_hits,
+					 bool use_callchain)
+{
+	struct rb_node **p = &entries->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
+	bool leftmost = true;
+
+	if (use_callchain) {
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				      min_callchain_hits, &callchain_param);
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color_cached(&he->rb_node, entries, leftmost);
+
+	perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    perf_hpp__defined_dynamic_entry(fmt, he->hists))
+			fmt->sort(fmt, he, NULL);  /* update column width */
+	}
+}
+
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+			  bool use_callchain, hists__resort_cb_t cb,
+			  void *cb_arg)
+{
+	struct rb_root_cached *root;
+	struct rb_node *next;
+	struct hist_entry *n;
+	u64 callchain_total;
+	u64 min_callchain_hits;
+
+	callchain_total = hists->callchain_period;
+	if (symbol_conf.filter_relative)
+		callchain_total = hists->callchain_non_filtered_period;
+
+	min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+	hists__reset_stats(hists);
+	hists__reset_col_len(hists);
+
+	if (symbol_conf.report_hierarchy) {
+		hists__hierarchy_output_resort(hists, prog,
+					       &hists->entries_collapsed,
+					       &hists->entries,
+					       min_callchain_hits,
+					       use_callchain);
+		hierarchy_recalc_total_periods(hists);
+		return;
+	}
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	next = rb_first_cached(root);
+	hists->entries = RB_ROOT_CACHED;
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node_in);
+		next = rb_next(&n->rb_node_in);
+
+		if (cb && cb(n, cb_arg))
+			continue;
+
+		__hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
+		hists__inc_stats(hists, n);
+
+		if (!n->filtered)
+			hists__calc_col_len(hists, n);
+
+		if (prog)
+			ui_progress__update(prog, 1);
+	}
+}
+
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+			     hists__resort_cb_t cb, void *cb_arg)
+{
+	bool use_callchain;
+
+	if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+		use_callchain = evsel__has_callchain(evsel);
+	else
+		use_callchain = symbol_conf.use_callchain;
+
+	use_callchain |= symbol_conf.show_branchflag_count;
+
+	output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg);
+}
+
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
+{
+	return evsel__output_resort_cb(evsel, prog, NULL, NULL);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+	output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL);
+}
+
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+			     hists__resort_cb_t cb)
+{
+	output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+	if (he->leaf || hmd == HMD_FORCE_SIBLING)
+		return false;
+
+	if (he->unfolded || hmd == HMD_FORCE_CHILD)
+		return true;
+
+	return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	while (can_goto_child(he, HMD_NORMAL)) {
+		node = rb_last(&he->hroot_out.rb_root);
+		he = rb_entry(node, struct hist_entry, rb_node);
+	}
+	return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	if (can_goto_child(he, hmd))
+		node = rb_first_cached(&he->hroot_out);
+	else
+		node = rb_next(node);
+
+	while (node == NULL) {
+		he = he->parent_he;
+		if (he == NULL)
+			break;
+
+		node = rb_next(&he->rb_node);
+	}
+	return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	node = rb_prev(node);
+	if (node)
+		return rb_hierarchy_last(node);
+
+	he = he->parent_he;
+	if (he == NULL)
+		return NULL;
+
+	return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+	struct rb_node *node;
+	struct hist_entry *child;
+	float percent;
+
+	if (he->leaf)
+		return false;
+
+	node = rb_first_cached(&he->hroot_out);
+	child = rb_entry(node, struct hist_entry, rb_node);
+
+	while (node && child->filtered) {
+		node = rb_next(node);
+		child = rb_entry(node, struct hist_entry, rb_node);
+	}
+
+	if (node)
+		percent = hist_entry__get_percent_limit(child);
+	else
+		percent = 0;
+
+	return node && percent >= limit;
+}
+
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
+				       enum hist_filter filter)
+{
+	h->filtered &= ~(1 << filter);
+
+	if (symbol_conf.report_hierarchy) {
+		struct hist_entry *parent = h->parent_he;
+
+		while (parent) {
+			he_stat__add_stat(&parent->stat, &h->stat);
+
+			parent->filtered &= ~(1 << filter);
+
+			if (parent->filtered)
+				goto next;
+
+			/* force fold unfiltered entry for simplicity */
+			parent->unfolded = false;
+			parent->has_no_entry = false;
+			parent->row_offset = 0;
+			parent->nr_rows = 0;
+next:
+			parent = parent->parent_he;
+		}
+	}
+
+	if (h->filtered)
+		return;
+
+	/* force fold unfiltered entry for simplicity */
+	h->unfolded = false;
+	h->has_no_entry = false;
+	h->row_offset = 0;
+	h->nr_rows = 0;
+
+	hists->stats.nr_non_filtered_samples += h->stat.nr_events;
+
+	hists__inc_filter_stats(hists, h);
+	hists__calc_col_len(hists, h);
+}
+
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+				       struct hist_entry *he)
+{
+	if (hists->dso_filter != NULL &&
+	    (he->ms.map == NULL || he->ms.map->dso != hists->dso_filter)) {
+		he->filtered |= (1 << HIST_FILTER__DSO);
+		return true;
+	}
+
+	return false;
+}
+
+static bool hists__filter_entry_by_thread(struct hists *hists,
+					  struct hist_entry *he)
+{
+	if (hists->thread_filter != NULL &&
+	    he->thread != hists->thread_filter) {
+		he->filtered |= (1 << HIST_FILTER__THREAD);
+		return true;
+	}
+
+	return false;
+}
+
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+					  struct hist_entry *he)
+{
+	if (hists->symbol_filter_str != NULL &&
+	    (!he->ms.sym || strstr(he->ms.sym->name,
+				   hists->symbol_filter_str) == NULL)) {
+		he->filtered |= (1 << HIST_FILTER__SYMBOL);
+		return true;
+	}
+
+	return false;
+}
+
+static bool hists__filter_entry_by_socket(struct hists *hists,
+					  struct hist_entry *he)
+{
+	if ((hists->socket_filter > -1) &&
+	    (he->socket != hists->socket_filter)) {
+		he->filtered |= (1 << HIST_FILTER__SOCKET);
+		return true;
+	}
+
+	return false;
+}
+
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
+{
+	struct rb_node *nd;
+
+	hists->stats.nr_non_filtered_samples = 0;
+
+	hists__reset_filter_stats(hists);
+	hists__reset_col_len(hists);
+
+	for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (filter(hists, h))
+			continue;
+
+		hists__remove_entry_filter(hists, h, type);
+	}
+}
+
+static void resort_filtered_entry(struct rb_root_cached *root,
+				  struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct rb_root_cached new_root = RB_ROOT_CACHED;
+	struct rb_node *nd;
+	bool leftmost = true;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color_cached(&he->rb_node, root, leftmost);
+
+	if (he->leaf || he->filtered)
+		return;
+
+	nd = rb_first_cached(&he->hroot_out);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase_cached(&h->rb_node, &he->hroot_out);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	he->hroot_out = new_root;
+}
+
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
+{
+	struct rb_node *nd;
+	struct rb_root_cached new_root = RB_ROOT_CACHED;
+
+	hists->stats.nr_non_filtered_samples = 0;
+
+	hists__reset_filter_stats(hists);
+	hists__reset_col_len(hists);
+
+	nd = rb_first_cached(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		int ret;
+
+		ret = hist_entry__filter(h, type, arg);
+
+		/*
+		 * case 1. non-matching type
+		 * zero out the period, set filter marker and move to child
+		 */
+		if (ret < 0) {
+			memset(&h->stat, 0, sizeof(h->stat));
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+		}
+		/*
+		 * case 2. matched type (filter out)
+		 * set filter marker and move to next
+		 */
+		else if (ret == 1) {
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+		/*
+		 * case 3. ok (not filtered)
+		 * add period to hists and parents, erase the filter marker
+		 * and move to next sibling
+		 */
+		else {
+			hists__remove_entry_filter(hists, h, type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+	}
+
+	hierarchy_recalc_total_periods(hists);
+
+	/*
+	 * resort output after applying a new filter since filter in a lower
+	 * hierarchy can change periods in a upper hierarchy.
+	 */
+	nd = rb_first_cached(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase_cached(&h->rb_node, &hists->entries);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+					hists->thread_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__THREAD,
+				      hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+					hists->dso_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__DSO,
+				      hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+					hists->symbol_filter_str);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+				      hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+					&hists->socket_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+				      hists__filter_entry_by_socket);
+}
+
+void events_stats__inc(struct events_stats *stats, u32 type)
+{
+	++stats->nr_events[0];
+	++stats->nr_events[type];
+}
+
+static void hists_stats__inc(struct hists_stats *stats)
+{
+	++stats->nr_samples;
+}
+
+void hists__inc_nr_events(struct hists *hists)
+{
+	hists_stats__inc(&hists->stats);
+}
+
+void hists__inc_nr_samples(struct hists *hists, bool filtered)
+{
+	hists_stats__inc(&hists->stats);
+	if (!filtered)
+		hists->stats.nr_non_filtered_samples++;
+}
+
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost)
+{
+	hists->stats.nr_lost_samples += lost;
+}
+
+static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
+						 struct hist_entry *pair)
+{
+	struct rb_root_cached *root;
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	int64_t cmp;
+	bool leftmost = true;
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	p = &root->rb_root.rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = hist_entry__collapse(he, pair);
+
+		if (!cmp)
+			goto out;
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+
+	he = hist_entry__new(pair, true);
+	if (he) {
+		memset(&he->stat, 0, sizeof(he->stat));
+		he->hists = hists;
+		if (symbol_conf.cumulate_callchain)
+			memset(he->stat_acc, 0, sizeof(he->stat));
+		rb_link_node(&he->rb_node_in, parent, p);
+		rb_insert_color_cached(&he->rb_node_in, root, leftmost);
+		hists__inc_stats(hists, he);
+		he->dummy = true;
+	}
+out:
+	return he;
+}
+
+static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
+						    struct rb_root_cached *root,
+						    struct hist_entry *pair)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+	bool leftmost = true;
+
+	p = &root->rb_root.rb_node;
+	while (*p != NULL) {
+		int64_t cmp = 0;
+
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, he, pair);
+			if (cmp)
+				break;
+		}
+		if (!cmp)
+			goto out;
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else {
+			p = &parent->rb_right;
+			leftmost = false;
+		}
+	}
+
+	he = hist_entry__new(pair, true);
+	if (he) {
+		rb_link_node(&he->rb_node_in, parent, p);
+		rb_insert_color_cached(&he->rb_node_in, root, leftmost);
+
+		he->dummy = true;
+		he->hists = hists;
+		memset(&he->stat, 0, sizeof(he->stat));
+		hists__inc_stats(hists, he);
+	}
+out:
+	return he;
+}
+
+static struct hist_entry *hists__find_entry(struct hists *hists,
+					    struct hist_entry *he)
+{
+	struct rb_node *n;
+
+	if (hists__has(hists, need_collapse))
+		n = hists->entries_collapsed.rb_root.rb_node;
+	else
+		n = hists->entries_in->rb_root.rb_node;
+
+	while (n) {
+		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
+		int64_t cmp = hist_entry__collapse(iter, he);
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return iter;
+	}
+
+	return NULL;
+}
+
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *root,
+						      struct hist_entry *he)
+{
+	struct rb_node *n = root->rb_root.rb_node;
+
+	while (n) {
+		struct hist_entry *iter;
+		struct perf_hpp_fmt *fmt;
+		int64_t cmp = 0;
+
+		iter = rb_entry(n, struct hist_entry, rb_node_in);
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return iter;
+	}
+
+	return NULL;
+}
+
+static void hists__match_hierarchy(struct rb_root_cached *leader_root,
+				   struct rb_root_cached *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	for (nd = rb_first_cached(leader_root); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node_in);
+		pair = hists__find_hierarchy_entry(other_root, pos);
+
+		if (pair) {
+			hist_entry__add_pair(pair, pos);
+			hists__match_hierarchy(&pos->hroot_in, &pair->hroot_in);
+		}
+	}
+}
+
+/*
+ * Look for pairs to link to the leader buckets (hist_entries):
+ */
+void hists__match(struct hists *leader, struct hists *other)
+{
+	struct rb_root_cached *root;
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__match_hierarchy(&leader->entries_collapsed,
+					      &other->entries_collapsed);
+	}
+
+	if (hists__has(leader, need_collapse))
+		root = &leader->entries_collapsed;
+	else
+		root = leader->entries_in;
+
+	for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node_in);
+		pair = hists__find_entry(other, pos);
+
+		if (pair)
+			hist_entry__add_pair(pair, pos);
+	}
+}
+
+static int hists__link_hierarchy(struct hists *leader_hists,
+				 struct hist_entry *parent,
+				 struct rb_root_cached *leader_root,
+				 struct rb_root_cached *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *leader;
+
+	for (nd = rb_first_cached(other_root); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+		if (hist_entry__has_pairs(pos)) {
+			bool found = false;
+
+			list_for_each_entry(leader, &pos->pairs.head, pairs.node) {
+				if (leader->hists == leader_hists) {
+					found = true;
+					break;
+				}
+			}
+			if (!found)
+				return -1;
+		} else {
+			leader = add_dummy_hierarchy_entry(leader_hists,
+							   leader_root, pos);
+			if (leader == NULL)
+				return -1;
+
+			/* do not point parent in the pos */
+			leader->parent_he = parent;
+
+			hist_entry__add_pair(pos, leader);
+		}
+
+		if (!pos->leaf) {
+			if (hists__link_hierarchy(leader_hists, leader,
+						  &leader->hroot_in,
+						  &pos->hroot_in) < 0)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Look for entries in the other hists that are not present in the leader, if
+ * we find them, just add a dummy entry on the leader hists, with period=0,
+ * nr_events=0, to serve as the list header.
+ */
+int hists__link(struct hists *leader, struct hists *other)
+{
+	struct rb_root_cached *root;
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__link_hierarchy(leader, NULL,
+					     &leader->entries_collapsed,
+					     &other->entries_collapsed);
+	}
+
+	if (hists__has(other, need_collapse))
+		root = &other->entries_collapsed;
+	else
+		root = other->entries_in;
+
+	for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+		if (!hist_entry__has_pairs(pos)) {
+			pair = hists__add_dummy_entry(leader, pos);
+			if (pair == NULL)
+				return -1;
+			hist_entry__add_pair(pos, pair);
+		}
+	}
+
+	return 0;
+}
+
+int hists__unlink(struct hists *hists)
+{
+	struct rb_root_cached *root;
+	struct rb_node *nd;
+	struct hist_entry *pos;
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+		list_del_init(&pos->pairs.node);
+	}
+
+	return 0;
+}
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+			  struct perf_sample *sample, bool nonany_branch_mode,
+			  u64 *total_cycles)
+{
+	struct branch_info *bi;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+
+	/* If we have branch cycles always annotate them. */
+	if (bs && bs->nr && entries[0].flags.cycles) {
+		bi = sample__resolve_bstack(sample, al);
+		if (bi) {
+			struct addr_map_symbol *prev = NULL;
+
+			/*
+			 * Ignore errors, still want to process the
+			 * other entries.
+			 *
+			 * For non standard branch modes always
+			 * force no IPC (prev == NULL)
+			 *
+			 * Note that perf stores branches reversed from
+			 * program order!
+			 */
+			for (int i = bs->nr - 1; i >= 0; i--) {
+				addr_map_symbol__account_cycles(&bi[i].from,
+					nonany_branch_mode ? NULL : prev,
+					bi[i].flags.cycles);
+				prev = &bi[i].to;
+
+				if (total_cycles)
+					*total_cycles += bi[i].flags.cycles;
+			}
+			for (unsigned int i = 0; i < bs->nr; i++) {
+				map__put(bi[i].to.ms.map);
+				maps__put(bi[i].to.ms.maps);
+				map__put(bi[i].from.ms.map);
+				maps__put(bi[i].from.ms.maps);
+			}
+			free(bi);
+		}
+	}
+}
+
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp,
+				 bool skip_empty)
+{
+	struct evsel *pos;
+	size_t ret = 0;
+
+	evlist__for_each_entry(evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+
+		if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples)
+			continue;
+
+		ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
+		if (hists->stats.nr_samples)
+			ret += fprintf(fp, "%16s events: %10d\n",
+				       "SAMPLE", hists->stats.nr_samples);
+		if (hists->stats.nr_lost_samples)
+			ret += fprintf(fp, "%16s events: %10d\n",
+				       "LOST_SAMPLES", hists->stats.nr_lost_samples);
+	}
+
+	return ret;
+}
+
+
+u64 hists__total_period(struct hists *hists)
+{
+	return symbol_conf.filter_relative ? hists->stats.total_non_filtered_period :
+		hists->stats.total_period;
+}
+
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
+{
+	char unit;
+	int printed;
+	const struct dso *dso = hists->dso_filter;
+	struct thread *thread = hists->thread_filter;
+	int socket_id = hists->socket_filter;
+	unsigned long nr_samples = hists->stats.nr_samples;
+	u64 nr_events = hists->stats.total_period;
+	struct evsel *evsel = hists_to_evsel(hists);
+	const char *ev_name = evsel__name(evsel);
+	char buf[512], sample_freq_str[64] = "";
+	size_t buflen = sizeof(buf);
+	char ref[30] = " show reference callgraph, ";
+	bool enable_ref = false;
+
+	if (symbol_conf.filter_relative) {
+		nr_samples = hists->stats.nr_non_filtered_samples;
+		nr_events = hists->stats.total_non_filtered_period;
+	}
+
+	if (evsel__is_group_event(evsel)) {
+		struct evsel *pos;
+
+		evsel__group_desc(evsel, buf, buflen);
+		ev_name = buf;
+
+		for_each_group_member(pos, evsel) {
+			struct hists *pos_hists = evsel__hists(pos);
+
+			if (symbol_conf.filter_relative) {
+				nr_samples += pos_hists->stats.nr_non_filtered_samples;
+				nr_events += pos_hists->stats.total_non_filtered_period;
+			} else {
+				nr_samples += pos_hists->stats.nr_samples;
+				nr_events += pos_hists->stats.total_period;
+			}
+		}
+	}
+
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(ev_name, "call-graph=no"))
+		enable_ref = true;
+
+	if (show_freq)
+		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->core.attr.sample_freq);
+
+	nr_samples = convert_unit(nr_samples, &unit);
+	printed = scnprintf(bf, size,
+			   "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+			   nr_samples, unit, evsel->core.nr_members > 1 ? "s" : "",
+			   ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+
+
+	if (hists->uid_filter_str)
+		printed += snprintf(bf + printed, size - printed,
+				    ", UID: %s", hists->uid_filter_str);
+	if (thread) {
+		if (hists__has(hists, thread)) {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s(%d)",
+				     (thread->comm_set ? thread__comm_str(thread) : ""),
+				    thread->tid);
+		} else {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s",
+				     (thread->comm_set ? thread__comm_str(thread) : ""));
+		}
+	}
+	if (dso)
+		printed += scnprintf(bf + printed, size - printed,
+				    ", DSO: %s", dso->short_name);
+	if (socket_id > -1)
+		printed += scnprintf(bf + printed, size - printed,
+				    ", Processor Socket: %d", socket_id);
+
+	return printed;
+}
+
+int parse_filter_percentage(const struct option *opt __maybe_unused,
+			    const char *arg, int unset __maybe_unused)
+{
+	if (!strcmp(arg, "relative"))
+		symbol_conf.filter_relative = true;
+	else if (!strcmp(arg, "absolute"))
+		symbol_conf.filter_relative = false;
+	else {
+		pr_debug("Invalid percentage: %s\n", arg);
+		return -1;
+	}
+
+	return 0;
+}
+
+int perf_hist_config(const char *var, const char *value)
+{
+	if (!strcmp(var, "hist.percentage"))
+		return parse_filter_percentage(NULL, value, 0);
+
+	return 0;
+}
+
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
+{
+	memset(hists, 0, sizeof(*hists));
+	hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT_CACHED;
+	hists->entries_in = &hists->entries_in_array[0];
+	hists->entries_collapsed = RB_ROOT_CACHED;
+	hists->entries = RB_ROOT_CACHED;
+	mutex_init(&hists->lock);
+	hists->socket_filter = -1;
+	hists->hpp_list = hpp_list;
+	INIT_LIST_HEAD(&hists->hpp_formats);
+	return 0;
+}
+
+static void hists__delete_remaining_entries(struct rb_root_cached *root)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	while (!RB_EMPTY_ROOT(&root->rb_root)) {
+		node = rb_first_cached(root);
+		rb_erase_cached(node, root);
+
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+		hist_entry__delete(he);
+	}
+}
+
+static void hists__delete_all_entries(struct hists *hists)
+{
+	hists__delete_entries(hists);
+	hists__delete_remaining_entries(&hists->entries_in_array[0]);
+	hists__delete_remaining_entries(&hists->entries_in_array[1]);
+	hists__delete_remaining_entries(&hists->entries_collapsed);
+}
+
+static void hists_evsel__exit(struct evsel *evsel)
+{
+	struct hists *hists = evsel__hists(evsel);
+	struct perf_hpp_fmt *fmt, *pos;
+	struct perf_hpp_list_node *node, *tmp;
+
+	hists__delete_all_entries(hists);
+
+	list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+			list_del_init(&fmt->list);
+			free(fmt);
+		}
+		list_del_init(&node->list);
+		free(node);
+	}
+}
+
+static int hists_evsel__init(struct evsel *evsel)
+{
+	struct hists *hists = evsel__hists(evsel);
+
+	__hists__init(hists, &perf_hpp_list);
+	return 0;
+}
+
+/*
+ * XXX We probably need a hists_evsel__exit() to free the hist_entries
+ * stored in the rbtree...
+ */
+
+int hists__init(void)
+{
+	int err = evsel__object_config(sizeof(struct hists_evsel),
+				       hists_evsel__init, hists_evsel__exit);
+	if (err)
+		fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
+
+	return err;
+}
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+	INIT_LIST_HEAD(&list->fields);
+	INIT_LIST_HEAD(&list->sorts);
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
new file mode 100644
index 000000000..ebd8a8f78
--- /dev/null
+++ b/tools/perf/util/hist.h
@@ -0,0 +1,595 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HIST_H
+#define __PERF_HIST_H
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+#include "evsel.h"
+#include "color.h"
+#include "events_stats.h"
+#include "mutex.h"
+
+struct hist_entry;
+struct hist_entry_ops;
+struct addr_location;
+struct map_symbol;
+struct mem_info;
+struct branch_info;
+struct branch_stack;
+struct block_info;
+struct symbol;
+struct ui_progress;
+
+enum hist_filter {
+	HIST_FILTER__DSO,
+	HIST_FILTER__THREAD,
+	HIST_FILTER__PARENT,
+	HIST_FILTER__SYMBOL,
+	HIST_FILTER__GUEST,
+	HIST_FILTER__HOST,
+	HIST_FILTER__SOCKET,
+	HIST_FILTER__C2C,
+};
+
+enum hist_column {
+	HISTC_SYMBOL,
+	HISTC_TIME,
+	HISTC_DSO,
+	HISTC_THREAD,
+	HISTC_COMM,
+	HISTC_CGROUP_ID,
+	HISTC_CGROUP,
+	HISTC_PARENT,
+	HISTC_CPU,
+	HISTC_SOCKET,
+	HISTC_SRCLINE,
+	HISTC_SRCFILE,
+	HISTC_MISPREDICT,
+	HISTC_IN_TX,
+	HISTC_ABORT,
+	HISTC_SYMBOL_FROM,
+	HISTC_SYMBOL_TO,
+	HISTC_DSO_FROM,
+	HISTC_DSO_TO,
+	HISTC_LOCAL_WEIGHT,
+	HISTC_GLOBAL_WEIGHT,
+	HISTC_CODE_PAGE_SIZE,
+	HISTC_MEM_DADDR_SYMBOL,
+	HISTC_MEM_DADDR_DSO,
+	HISTC_MEM_PHYS_DADDR,
+	HISTC_MEM_DATA_PAGE_SIZE,
+	HISTC_MEM_LOCKED,
+	HISTC_MEM_TLB,
+	HISTC_MEM_LVL,
+	HISTC_MEM_SNOOP,
+	HISTC_MEM_DCACHELINE,
+	HISTC_MEM_IADDR_SYMBOL,
+	HISTC_TRANSACTION,
+	HISTC_CYCLES,
+	HISTC_SRCLINE_FROM,
+	HISTC_SRCLINE_TO,
+	HISTC_TRACE,
+	HISTC_SYM_SIZE,
+	HISTC_DSO_SIZE,
+	HISTC_SYMBOL_IPC,
+	HISTC_MEM_BLOCKED,
+	HISTC_LOCAL_INS_LAT,
+	HISTC_GLOBAL_INS_LAT,
+	HISTC_LOCAL_P_STAGE_CYC,
+	HISTC_GLOBAL_P_STAGE_CYC,
+	HISTC_ADDR_FROM,
+	HISTC_ADDR_TO,
+	HISTC_ADDR,
+	HISTC_NR_COLS, /* Last entry */
+};
+
+struct thread;
+struct dso;
+
+struct hists {
+	struct rb_root_cached	entries_in_array[2];
+	struct rb_root_cached	*entries_in;
+	struct rb_root_cached	entries;
+	struct rb_root_cached	entries_collapsed;
+	u64			nr_entries;
+	u64			nr_non_filtered_entries;
+	u64			callchain_period;
+	u64			callchain_non_filtered_period;
+	struct thread		*thread_filter;
+	const struct dso	*dso_filter;
+	const char		*uid_filter_str;
+	const char		*symbol_filter_str;
+	struct mutex		lock;
+	struct hists_stats	stats;
+	u64			event_stream;
+	u16			col_len[HISTC_NR_COLS];
+	bool			has_callchains;
+	int			socket_filter;
+	struct perf_hpp_list	*hpp_list;
+	struct list_head	hpp_formats;
+	int			nr_hpp_node;
+};
+
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
+struct hist_entry_iter;
+
+struct hist_iter_ops {
+	int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
+};
+
+struct hist_entry_iter {
+	int total;
+	int curr;
+
+	bool hide_unresolved;
+
+	struct evsel *evsel;
+	struct perf_sample *sample;
+	struct hist_entry *he;
+	struct symbol *parent;
+	void *priv;
+
+	const struct hist_iter_ops *ops;
+	/* user-defined callback function (optional) */
+	int (*add_entry_cb)(struct hist_entry_iter *iter,
+			    struct addr_location *al, bool single, void *arg);
+};
+
+extern const struct hist_iter_ops hist_iter_normal;
+extern const struct hist_iter_ops hist_iter_branch;
+extern const struct hist_iter_ops hist_iter_mem;
+extern const struct hist_iter_ops hist_iter_cumulative;
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self);
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+					struct hist_entry_ops *ops,
+					struct addr_location *al,
+					struct symbol *sym_parent,
+					struct branch_info *bi,
+					struct mem_info *mi,
+					struct perf_sample *sample,
+					bool sample_self);
+
+struct hist_entry *hists__add_entry_block(struct hists *hists,
+					  struct addr_location *al,
+					  struct block_info *bi);
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 int max_stack_depth, void *arg);
+
+struct perf_hpp;
+struct perf_hpp_fmt;
+
+int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+int hist_entry__transaction_len(void);
+int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
+			      struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed);
+void hist_entry__delete(struct hist_entry *he);
+
+typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
+
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+			     hists__resort_cb_t cb, void *cb_arg);
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
+void hists__output_resort(struct hists *hists, struct ui_progress *prog);
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+			     hists__resort_cb_t cb);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
+void hists__delete_entries(struct hists *hists);
+void hists__output_recalc_col_len(struct hists *hists, int max_rows);
+
+struct hist_entry *hists__get_entry(struct hists *hists, int idx);
+
+u64 hists__total_period(struct hists *hists);
+void hists__reset_stats(struct hists *hists);
+void hists__inc_stats(struct hists *hists, struct hist_entry *h);
+void hists__inc_nr_events(struct hists *hists);
+void hists__inc_nr_samples(struct hists *hists, bool filtered);
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost);
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+		      int max_cols, float min_pcnt, FILE *fp,
+		      bool ignore_callchains);
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp,
+				 bool skip_empty);
+
+void hists__filter_by_dso(struct hists *hists);
+void hists__filter_by_thread(struct hists *hists);
+void hists__filter_by_symbol(struct hists *hists);
+void hists__filter_by_socket(struct hists *hists);
+
+static inline bool hists__has_filter(struct hists *hists)
+{
+	return hists->thread_filter || hists->dso_filter ||
+		hists->symbol_filter_str || (hists->socket_filter > -1);
+}
+
+u16 hists__col_len(struct hists *hists, enum hist_column col);
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len);
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len);
+void hists__reset_col_len(struct hists *hists);
+void hists__calc_col_len(struct hists *hists, struct hist_entry *he);
+
+void hists__match(struct hists *leader, struct hists *other);
+int hists__link(struct hists *leader, struct hists *other);
+int hists__unlink(struct hists *hists);
+
+struct hists_evsel {
+	struct evsel evsel;
+	struct hists	  hists;
+};
+
+static inline struct evsel *hists_to_evsel(struct hists *hists)
+{
+	struct hists_evsel *hevsel = container_of(hists, struct hists_evsel, hists);
+	return &hevsel->evsel;
+}
+
+static inline struct hists *evsel__hists(struct evsel *evsel)
+{
+	struct hists_evsel *hevsel = (struct hists_evsel *)evsel;
+	return &hevsel->hists;
+}
+
+static __pure inline bool hists__has_callchains(struct hists *hists)
+{
+	return hists->has_callchains;
+}
+
+int hists__init(void);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
+
+struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists);
+
+struct perf_hpp {
+	char *buf;
+	size_t size;
+	const char *sep;
+	void *ptr;
+	bool skip;
+};
+
+struct perf_hpp_fmt {
+	const char *name;
+	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		      struct hists *hists, int line, int *span);
+	int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hists *hists);
+	int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hist_entry *he);
+	int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hist_entry *he);
+	int64_t (*cmp)(struct perf_hpp_fmt *fmt,
+		       struct hist_entry *a, struct hist_entry *b);
+	int64_t (*collapse)(struct perf_hpp_fmt *fmt,
+			    struct hist_entry *a, struct hist_entry *b);
+	int64_t (*sort)(struct perf_hpp_fmt *fmt,
+			struct hist_entry *a, struct hist_entry *b);
+	bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+	void (*free)(struct perf_hpp_fmt *fmt);
+
+	struct list_head list;
+	struct list_head sort_list;
+	bool elide;
+	int len;
+	int user_len;
+	int idx;
+	int level;
+};
+
+struct perf_hpp_list {
+	struct list_head fields;
+	struct list_head sorts;
+
+	int nr_header_lines;
+	int need_collapse;
+	int parent;
+	int sym;
+	int dso;
+	int socket;
+	int thread;
+	int comm;
+};
+
+extern struct perf_hpp_list perf_hpp_list;
+
+struct perf_hpp_list_node {
+	struct list_head	list;
+	struct perf_hpp_list	hpp;
+	int			level;
+	bool			skip;
+};
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+				    struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+					struct perf_hpp_fmt *format);
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+				       struct perf_hpp_fmt *format);
+
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__prepend_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+	list_for_each_entry(format, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+	list_for_each_entry(format, &(_list)->sorts, sort_list)
+
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
+
+#define hists__for_each_format(hists, format) \
+	perf_hpp_list__for_each_format((hists)->hpp_list, format)
+
+#define hists__for_each_sort_list(hists, format) \
+	perf_hpp_list__for_each_sort_list((hists)->hpp_list, format)
+
+extern struct perf_hpp_fmt perf_hpp__format[];
+
+enum {
+	/* Matches perf_hpp__format array. */
+	PERF_HPP__OVERHEAD,
+	PERF_HPP__OVERHEAD_SYS,
+	PERF_HPP__OVERHEAD_US,
+	PERF_HPP__OVERHEAD_GUEST_SYS,
+	PERF_HPP__OVERHEAD_GUEST_US,
+	PERF_HPP__OVERHEAD_ACC,
+	PERF_HPP__SAMPLES,
+	PERF_HPP__PERIOD,
+
+	PERF_HPP__MAX_INDEX
+};
+
+void perf_hpp__init(void);
+void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+				  struct evlist *evlist);
+
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
+
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
+					 struct hists *hists)
+{
+	if (format->elide)
+		return true;
+
+	if (perf_hpp__is_dynamic_entry(format) &&
+	    !perf_hpp__defined_dynamic_entry(format, hists))
+		return true;
+
+	return false;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
+
+typedef u64 (*hpp_field_fn)(struct hist_entry *he);
+typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
+typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
+
+int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	     struct hist_entry *he, hpp_field_fn get_field,
+	     const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		 struct hist_entry *he, hpp_field_fn get_field,
+		 const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+
+static inline void advance_hpp(struct perf_hpp *hpp, int inc)
+{
+	hpp->buf  += inc;
+	hpp->size -= inc;
+}
+
+static inline size_t perf_hpp__use_color(void)
+{
+	return !symbol_conf.field_sep;
+}
+
+static inline size_t perf_hpp__color_overhead(void)
+{
+	return perf_hpp__use_color() ?
+	       (COLOR_MAXLEN + sizeof(PERF_COLOR_RESET)) * PERF_HPP__MAX_INDEX
+	       : 0;
+}
+
+struct evlist;
+
+struct hist_browser_timer {
+	void (*timer)(void *arg);
+	void *arg;
+	int refresh;
+};
+
+struct annotation_options;
+struct res_sample;
+
+enum rstype {
+	A_NORMAL,
+	A_ASM,
+	A_SOURCE
+};
+
+struct block_hist;
+
+#ifdef HAVE_SLANG_SUPPORT
+#include "../ui/keysyms.h"
+void attr_to_script(char *buf, struct perf_event_attr *attr);
+
+int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
+			     struct hist_browser_timer *hbt,
+			     struct annotation_options *annotation_opts);
+
+int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
+			     struct hist_browser_timer *hbt,
+			     struct annotation_options *annotation_opts);
+
+int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
+			     float min_pcnt, struct perf_env *env, bool warn_lost_event,
+			     struct annotation_options *annotation_options);
+
+int script_browse(const char *script_opt, struct evsel *evsel);
+
+void run_script(char *cmd);
+int res_sample_browse(struct res_sample *res_samples, int num_res,
+		      struct evsel *evsel, enum rstype rstype);
+void res_sample_init(void);
+
+int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
+			   float min_percent, struct perf_env *env,
+			   struct annotation_options *annotation_opts);
+#else
+static inline
+int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
+			     const char *help __maybe_unused,
+			     struct hist_browser_timer *hbt __maybe_unused,
+			     float min_pcnt __maybe_unused,
+			     struct perf_env *env __maybe_unused,
+			     bool warn_lost_event __maybe_unused,
+			     struct annotation_options *annotation_options __maybe_unused)
+{
+	return 0;
+}
+static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
+					   struct evsel *evsel __maybe_unused,
+					   struct hist_browser_timer *hbt __maybe_unused,
+					   struct annotation_options *annotation_options __maybe_unused)
+{
+	return 0;
+}
+
+static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
+					   struct evsel *evsel __maybe_unused,
+					   struct hist_browser_timer *hbt __maybe_unused,
+					   struct annotation_options *annotation_opts __maybe_unused)
+{
+	return 0;
+}
+
+static inline int script_browse(const char *script_opt __maybe_unused,
+				struct evsel *evsel __maybe_unused)
+{
+	return 0;
+}
+
+static inline int res_sample_browse(struct res_sample *res_samples __maybe_unused,
+				    int num_res __maybe_unused,
+				    struct evsel *evsel __maybe_unused,
+				    enum rstype rstype __maybe_unused)
+{
+	return 0;
+}
+
+static inline void res_sample_init(void) {}
+
+static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused,
+					 struct evsel *evsel __maybe_unused,
+					 float min_percent __maybe_unused,
+					 struct perf_env *env __maybe_unused,
+					 struct annotation_options *annotation_opts __maybe_unused)
+{
+	return 0;
+}
+
+#define K_LEFT  -1000
+#define K_RIGHT -2000
+#define K_SWITCH_INPUT_DATA -3000
+#define K_RELOAD -4000
+#endif
+
+unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+			  struct perf_sample *sample, bool nonany_branch_mode,
+			  u64 *total_cycles);
+
+struct option;
+int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
+int perf_hist_config(const char *var, const char *value);
+
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+	HMD_NORMAL,
+	HMD_FORCE_SIBLING,
+	HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+				    enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+	return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT  3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+			   struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq);
+
+static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t size)
+{
+	return __hists__scnprintf_title(hists, bf, size, true);
+}
+
+#endif	/* __PERF_HIST_H */
diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h
new file mode 100644
index 000000000..3aff4cf44
--- /dev/null
+++ b/tools/perf/util/include/asm/asm-offsets.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* stub */
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 000000000..2270481c7
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif	/* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 000000000..e9876be63
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
+
+#endif	/* PERF_DWARF2_H */
+
diff --git a/tools/perf/util/include/asm/swab.h b/tools/perf/util/include/asm/swab.h
new file mode 100644
index 000000000..ed5389425
--- /dev/null
+++ b/tools/perf/util/include/asm/swab.h
@@ -0,0 +1 @@
+/* stub */
diff --git a/tools/perf/util/include/asm/system.h b/tools/perf/util/include/asm/system.h
new file mode 100644
index 000000000..710cecca9
--- /dev/null
+++ b/tools/perf/util/include/asm/system.h
@@ -0,0 +1 @@
+/* Empty */
diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h
new file mode 100644
index 000000000..548100315
--- /dev/null
+++ b/tools/perf/util/include/asm/uaccess.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_ASM_UACCESS_H_
+#define _PERF_ASM_UACCESS_H_
+
+#define __get_user(src, dest)						\
+({									\
+	(src) = *dest;							\
+	0;								\
+})
+
+#define get_user	__get_user
+
+#define access_ok(addr, size)	1
+
+#endif
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
new file mode 100644
index 000000000..7d99a084e
--- /dev/null
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_DWARF_REGS_H_
+#define _PERF_DWARF_REGS_H_
+
+#ifdef HAVE_DWARF_SUPPORT
+const char *get_arch_regstr(unsigned int n);
+/*
+ * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
+ * n: DWARF register number
+ * machine: ELF machine signature (EM_*)
+ */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
+#endif
+
+#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+/*
+ * Arch should support fetching the offset of a register in pt_regs
+ * by its name. See kernel's regs_query_register_offset in
+ * arch/xxx/kernel/ptrace.c.
+ */
+int regs_query_register_offset(const char *name);
+#endif
+#endif
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 000000000..75e224841
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+/* Some toolchains use other characters (e.g. '`') to mark new line in macro */
+#ifndef ASM_NL
+#define ASM_NL		 ;
+#endif
+
+#ifndef __ALIGN
+#define __ALIGN		.align 4,0x90
+#define __ALIGN_STR	".align 4,0x90"
+#endif
+
+/* SYM_T_FUNC -- type used by assembler to mark functions */
+#ifndef SYM_T_FUNC
+#define SYM_T_FUNC				STT_FUNC
+#endif
+
+/* SYM_A_* -- align the symbol? */
+#define SYM_A_ALIGN				ALIGN
+
+/* SYM_L_* -- linkage of symbols */
+#define SYM_L_GLOBAL(name)			.globl name
+#define SYM_L_WEAK(name)			.weak name
+#define SYM_L_LOCAL(name)			/* nothing */
+
+#define ALIGN __ALIGN
+
+/* === generic annotations === */
+
+/* SYM_ENTRY -- use only if you have to for non-paired symbols */
+#ifndef SYM_ENTRY
+#define SYM_ENTRY(name, linkage, align...)		\
+	linkage(name) ASM_NL				\
+	align ASM_NL					\
+	name:
+#endif
+
+/* SYM_START -- use only if you have to */
+#ifndef SYM_START
+#define SYM_START(name, linkage, align...)		\
+	SYM_ENTRY(name, linkage, align)
+#endif
+
+/* SYM_END -- use only if you have to */
+#ifndef SYM_END
+#define SYM_END(name, sym_type)				\
+	.type name sym_type ASM_NL			\
+	.set .L__sym_size_##name, .-name ASM_NL		\
+	.size name, .-name
+#endif
+
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage)			\
+	linkage(alias) ASM_NL						\
+	.set alias, name ASM_NL						\
+	.type alias sym_type ASM_NL					\
+	.set .L__sym_size_##alias, .L__sym_size_##name ASM_NL		\
+	.size alias, .L__sym_size_##alias
+#endif
+
+/* SYM_FUNC_START -- use for global functions */
+#ifndef SYM_FUNC_START
+#define SYM_FUNC_START(name)				\
+	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+/* SYM_FUNC_START_LOCAL -- use for local functions */
+#ifndef SYM_FUNC_START_LOCAL
+#define SYM_FUNC_START_LOCAL(name)			\
+	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
+#endif
+
+/* SYM_FUNC_START_WEAK -- use for weak functions */
+#ifndef SYM_FUNC_START_WEAK
+#define SYM_FUNC_START_WEAK(name)			\
+	SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN)
+#endif
+
+/*
+ * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
+ * SYM_FUNC_START_WEAK, ...
+ */
+#ifndef SYM_FUNC_END
+#define SYM_FUNC_END(name)				\
+	SYM_END(name, SYM_T_FUNC)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name)					\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
+// In the kernel sources (include/linux/cfi_types.h), this has a different
+// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang
+// definition:
+#ifndef SYM_TYPED_START
+#define SYM_TYPED_START(name, linkage, align...)        \
+        SYM_START(name, linkage, align)
+#endif
+
+#ifndef SYM_TYPED_FUNC_START
+#define SYM_TYPED_FUNC_START(name)                      \
+        SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+#endif	/* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000..2c8147a62
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,939 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/zalloc.h>
+
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "symbol.h"
+#include "session.h"
+#include "tool.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "tsc.h"
+#include "auxtrace.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-bts.h"
+#include "util/synthetic-events.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_BTS_ERR_NOINSN  5
+#define INTEL_BTS_ERR_LOST    9
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define le64_to_cpu bswap_64
+#else
+#define le64_to_cpu
+#endif
+
+struct intel_bts {
+	struct auxtrace			auxtrace;
+	struct auxtrace_queues		queues;
+	struct auxtrace_heap		heap;
+	u32				auxtrace_type;
+	struct perf_session		*session;
+	struct machine			*machine;
+	bool				sampling_mode;
+	bool				snapshot_mode;
+	bool				data_queued;
+	u32				pmu_type;
+	struct perf_tsc_conversion	tc;
+	bool				cap_user_time_zero;
+	struct itrace_synth_opts	synth_opts;
+	bool				sample_branches;
+	u32				branches_filter;
+	u64				branches_sample_type;
+	u64				branches_id;
+	size_t				branches_event_size;
+	unsigned long			num_events;
+};
+
+struct intel_bts_queue {
+	struct intel_bts	*bts;
+	unsigned int		queue_nr;
+	struct auxtrace_buffer	*buffer;
+	bool			on_heap;
+	bool			done;
+	pid_t			pid;
+	pid_t			tid;
+	int			cpu;
+	u64			time;
+	struct intel_pt_insn	intel_pt_insn;
+	u32			sample_flags;
+};
+
+struct branch {
+	u64 from;
+	u64 to;
+	u64 misc;
+};
+
+static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
+			   unsigned char *buf, size_t len)
+{
+	struct branch *branch;
+	size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
+	const char *color = PERF_COLOR_BLUE;
+
+	color_fprintf(stdout, color,
+		      ". ... Intel BTS data: size %zu bytes\n",
+		      len);
+
+	while (len) {
+		if (len >= br_sz)
+			sz = br_sz;
+		else
+			sz = len;
+		printf(".");
+		color_fprintf(stdout, color, "  %08x: ", pos);
+		for (i = 0; i < sz; i++)
+			color_fprintf(stdout, color, " %02x", buf[i]);
+		for (; i < br_sz; i++)
+			color_fprintf(stdout, color, "   ");
+		if (len >= br_sz) {
+			branch = (struct branch *)buf;
+			color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
+				      le64_to_cpu(branch->from),
+				      le64_to_cpu(branch->to),
+				      le64_to_cpu(branch->misc) & 0x10 ?
+							"pred" : "miss");
+		} else {
+			color_fprintf(stdout, color, " Bad record!\n");
+		}
+		pos += sz;
+		buf += sz;
+		len -= sz;
+	}
+}
+
+static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
+				 size_t len)
+{
+	printf(".\n");
+	intel_bts_dump(bts, buf, len);
+}
+
+static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
+{
+	union perf_event event;
+	int err;
+
+	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+			     INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
+			     sample->tid, 0, "Lost trace data", sample->time);
+
+	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+	if (err)
+		pr_err("Intel BTS: failed to deliver error event, error %d\n",
+		       err);
+
+	return err;
+}
+
+static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
+						     unsigned int queue_nr)
+{
+	struct intel_bts_queue *btsq;
+
+	btsq = zalloc(sizeof(struct intel_bts_queue));
+	if (!btsq)
+		return NULL;
+
+	btsq->bts = bts;
+	btsq->queue_nr = queue_nr;
+	btsq->pid = -1;
+	btsq->tid = -1;
+	btsq->cpu = -1;
+
+	return btsq;
+}
+
+static int intel_bts_setup_queue(struct intel_bts *bts,
+				 struct auxtrace_queue *queue,
+				 unsigned int queue_nr)
+{
+	struct intel_bts_queue *btsq = queue->priv;
+
+	if (list_empty(&queue->head))
+		return 0;
+
+	if (!btsq) {
+		btsq = intel_bts_alloc_queue(bts, queue_nr);
+		if (!btsq)
+			return -ENOMEM;
+		queue->priv = btsq;
+
+		if (queue->cpu != -1)
+			btsq->cpu = queue->cpu;
+		btsq->tid = queue->tid;
+	}
+
+	if (bts->sampling_mode)
+		return 0;
+
+	if (!btsq->on_heap && !btsq->buffer) {
+		int ret;
+
+		btsq->buffer = auxtrace_buffer__next(queue, NULL);
+		if (!btsq->buffer)
+			return 0;
+
+		ret = auxtrace_heap__add(&bts->heap, queue_nr,
+					 btsq->buffer->reference);
+		if (ret)
+			return ret;
+		btsq->on_heap = true;
+	}
+
+	return 0;
+}
+
+static int intel_bts_setup_queues(struct intel_bts *bts)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < bts->queues.nr_queues; i++) {
+		ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
+					    i);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static inline int intel_bts_update_queues(struct intel_bts *bts)
+{
+	if (bts->queues.new_data) {
+		bts->queues.new_data = false;
+		return intel_bts_setup_queues(bts);
+	}
+	return 0;
+}
+
+static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
+					     unsigned char *buf_b, size_t len_b)
+{
+	size_t offs, len;
+
+	if (len_a > len_b)
+		offs = len_a - len_b;
+	else
+		offs = 0;
+
+	for (; offs < len_a; offs += sizeof(struct branch)) {
+		len = len_a - offs;
+		if (!memcmp(buf_a + offs, buf_b, len))
+			return buf_b + len;
+	}
+
+	return buf_b;
+}
+
+static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
+				    struct auxtrace_buffer *b)
+{
+	struct auxtrace_buffer *a;
+	void *start;
+
+	if (b->list.prev == &queue->head)
+		return 0;
+	a = list_entry(b->list.prev, struct auxtrace_buffer, list);
+	start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
+	if (!start)
+		return -EINVAL;
+	b->use_size = b->data + b->size - start;
+	b->use_data = start;
+	return 0;
+}
+
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+	return machine__kernel_ip(bts->machine, ip) ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
+static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
+					 struct branch *branch)
+{
+	int ret;
+	struct intel_bts *bts = btsq->bts;
+	union perf_event event;
+	struct perf_sample sample = { .ip = 0, };
+
+	if (bts->synth_opts.initial_skip &&
+	    bts->num_events++ <= bts->synth_opts.initial_skip)
+		return 0;
+
+	sample.ip = le64_to_cpu(branch->from);
+	sample.cpumode = intel_bts_cpumode(bts, sample.ip);
+	sample.pid = btsq->pid;
+	sample.tid = btsq->tid;
+	sample.addr = le64_to_cpu(branch->to);
+	sample.id = btsq->bts->branches_id;
+	sample.stream_id = btsq->bts->branches_id;
+	sample.period = 1;
+	sample.cpu = btsq->cpu;
+	sample.flags = btsq->sample_flags;
+	sample.insn_len = btsq->intel_pt_insn.length;
+	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
+
+	event.sample.header.type = PERF_RECORD_SAMPLE;
+	event.sample.header.misc = sample.cpumode;
+	event.sample.header.size = sizeof(struct perf_event_header);
+
+	if (bts->synth_opts.inject) {
+		event.sample.header.size = bts->branches_event_size;
+		ret = perf_event__synthesize_sample(&event,
+						    bts->branches_sample_type,
+						    0, &sample);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
+	if (ret)
+		pr_err("Intel BTS: failed to deliver branch event, error %d\n",
+		       ret);
+
+	return ret;
+}
+
+static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
+{
+	struct machine *machine = btsq->bts->machine;
+	struct thread *thread;
+	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+	ssize_t len;
+	bool x86_64;
+	int err = -1;
+
+	thread = machine__find_thread(machine, -1, btsq->tid);
+	if (!thread)
+		return -1;
+
+	len = thread__memcpy(thread, machine, buf, ip, INTEL_PT_INSN_BUF_SZ, &x86_64);
+	if (len <= 0)
+		goto out_put;
+
+	if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
+		goto out_put;
+
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
+				 pid_t tid, u64 ip)
+{
+	union perf_event event;
+	int err;
+
+	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+			     INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
+			     "Failed to get instruction", 0);
+
+	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+	if (err)
+		pr_err("Intel BTS: failed to deliver error event, error %d\n",
+		       err);
+
+	return err;
+}
+
+static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
+				     struct branch *branch)
+{
+	int err;
+
+	if (!branch->from) {
+		if (branch->to)
+			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+					     PERF_IP_FLAG_TRACE_BEGIN;
+		else
+			btsq->sample_flags = 0;
+		btsq->intel_pt_insn.length = 0;
+	} else if (!branch->to) {
+		btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+				     PERF_IP_FLAG_TRACE_END;
+		btsq->intel_pt_insn.length = 0;
+	} else {
+		err = intel_bts_get_next_insn(btsq, branch->from);
+		if (err) {
+			btsq->sample_flags = 0;
+			btsq->intel_pt_insn.length = 0;
+			if (!btsq->bts->synth_opts.errors)
+				return 0;
+			err = intel_bts_synth_error(btsq->bts, btsq->cpu,
+						    btsq->pid, btsq->tid,
+						    branch->from);
+			return err;
+		}
+		btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
+		/* Check for an async branch into the kernel */
+		if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
+		    machine__kernel_ip(btsq->bts->machine, branch->to) &&
+		    btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
+					   PERF_IP_FLAG_CALL |
+					   PERF_IP_FLAG_SYSCALLRET))
+			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+					     PERF_IP_FLAG_CALL |
+					     PERF_IP_FLAG_ASYNC |
+					     PERF_IP_FLAG_INTERRUPT;
+	}
+
+	return 0;
+}
+
+static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
+				    struct auxtrace_buffer *buffer,
+				    struct thread *thread)
+{
+	struct branch *branch;
+	size_t sz, bsz = sizeof(struct branch);
+	u32 filter = btsq->bts->branches_filter;
+	int err = 0;
+
+	if (buffer->use_data) {
+		sz = buffer->use_size;
+		branch = buffer->use_data;
+	} else {
+		sz = buffer->size;
+		branch = buffer->data;
+	}
+
+	if (!btsq->bts->sample_branches)
+		return 0;
+
+	for (; sz > bsz; branch += 1, sz -= bsz) {
+		if (!branch->from && !branch->to)
+			continue;
+		intel_bts_get_branch_type(btsq, branch);
+		if (btsq->bts->synth_opts.thread_stack)
+			thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
+					    le64_to_cpu(branch->from),
+					    le64_to_cpu(branch->to),
+					    btsq->intel_pt_insn.length,
+					    buffer->buffer_nr + 1, true, 0, 0);
+		if (filter && !(filter & btsq->sample_flags))
+			continue;
+		err = intel_bts_synth_branch_sample(btsq, branch);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
+{
+	struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
+	struct auxtrace_queue *queue;
+	struct thread *thread;
+	int err;
+
+	if (btsq->done)
+		return 1;
+
+	if (btsq->pid == -1) {
+		thread = machine__find_thread(btsq->bts->machine, -1,
+					      btsq->tid);
+		if (thread)
+			btsq->pid = thread->pid_;
+	} else {
+		thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
+						 btsq->tid);
+	}
+
+	queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
+
+	if (!buffer)
+		buffer = auxtrace_buffer__next(queue, NULL);
+
+	if (!buffer) {
+		if (!btsq->bts->sampling_mode)
+			btsq->done = 1;
+		err = 1;
+		goto out_put;
+	}
+
+	/* Currently there is no support for split buffers */
+	if (buffer->consecutive) {
+		err = -EINVAL;
+		goto out_put;
+	}
+
+	if (!buffer->data) {
+		int fd = perf_data__fd(btsq->bts->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data) {
+			err = -ENOMEM;
+			goto out_put;
+		}
+	}
+
+	if (btsq->bts->snapshot_mode && !buffer->consecutive &&
+	    intel_bts_do_fix_overlap(queue, buffer)) {
+		err = -ENOMEM;
+		goto out_put;
+	}
+
+	if (!btsq->bts->synth_opts.callchain &&
+	    !btsq->bts->synth_opts.thread_stack && thread &&
+	    (!old_buffer || btsq->bts->sampling_mode ||
+	     (btsq->bts->snapshot_mode && !buffer->consecutive)))
+		thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
+
+	err = intel_bts_process_buffer(btsq, buffer, thread);
+
+	auxtrace_buffer__drop_data(buffer);
+
+	btsq->buffer = auxtrace_buffer__next(queue, buffer);
+	if (btsq->buffer) {
+		if (timestamp)
+			*timestamp = btsq->buffer->reference;
+	} else {
+		if (!btsq->bts->sampling_mode)
+			btsq->done = 1;
+	}
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
+{
+	u64 ts = 0;
+	int ret;
+
+	while (1) {
+		ret = intel_bts_process_queue(btsq, &ts);
+		if (ret < 0)
+			return ret;
+		if (ret)
+			break;
+	}
+	return 0;
+}
+
+static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
+{
+	struct auxtrace_queues *queues = &bts->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &bts->queues.queue_array[i];
+		struct intel_bts_queue *btsq = queue->priv;
+
+		if (btsq && btsq->tid == tid)
+			return intel_bts_flush_queue(btsq);
+	}
+	return 0;
+}
+
+static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
+{
+	while (1) {
+		unsigned int queue_nr;
+		struct auxtrace_queue *queue;
+		struct intel_bts_queue *btsq;
+		u64 ts = 0;
+		int ret;
+
+		if (!bts->heap.heap_cnt)
+			return 0;
+
+		if (bts->heap.heap_array[0].ordinal > timestamp)
+			return 0;
+
+		queue_nr = bts->heap.heap_array[0].queue_nr;
+		queue = &bts->queues.queue_array[queue_nr];
+		btsq = queue->priv;
+
+		auxtrace_heap__pop(&bts->heap);
+
+		ret = intel_bts_process_queue(btsq, &ts);
+		if (ret < 0) {
+			auxtrace_heap__add(&bts->heap, queue_nr, ts);
+			return ret;
+		}
+
+		if (!ret) {
+			ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		} else {
+			btsq->on_heap = false;
+		}
+	}
+
+	return 0;
+}
+
+static int intel_bts_process_event(struct perf_session *session,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct perf_tool *tool)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+	u64 timestamp;
+	int err;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("Intel BTS requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (sample->time && sample->time != (u64)-1)
+		timestamp = perf_time_to_tsc(sample->time, &bts->tc);
+	else
+		timestamp = 0;
+
+	err = intel_bts_update_queues(bts);
+	if (err)
+		return err;
+
+	err = intel_bts_process_queues(bts, timestamp);
+	if (err)
+		return err;
+	if (event->header.type == PERF_RECORD_EXIT) {
+		err = intel_bts_process_tid_exit(bts, event->fork.tid);
+		if (err)
+			return err;
+	}
+
+	if (event->header.type == PERF_RECORD_AUX &&
+	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+	    bts->synth_opts.errors)
+		err = intel_bts_lost(bts, sample);
+
+	return err;
+}
+
+static int intel_bts_process_auxtrace_event(struct perf_session *session,
+					    union perf_event *event,
+					    struct perf_tool *tool __maybe_unused)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+
+	if (bts->sampling_mode)
+		return 0;
+
+	if (!bts->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t data_offset;
+		int fd = perf_data__fd(session->data);
+		int err;
+
+		if (perf_data__is_pipe(session->data)) {
+			data_offset = 0;
+		} else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
+
+		err = auxtrace_queues__add_event(&bts->queues, session, event,
+						 data_offset, &buffer);
+		if (err)
+			return err;
+
+		/* Dump here now we have copied a piped trace out of the pipe */
+		if (dump_trace) {
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				intel_bts_dump_event(bts, buffer->data,
+						     buffer->size);
+				auxtrace_buffer__put_data(buffer);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int intel_bts_flush(struct perf_session *session,
+			   struct perf_tool *tool __maybe_unused)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+	int ret;
+
+	if (dump_trace || bts->sampling_mode)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	ret = intel_bts_update_queues(bts);
+	if (ret < 0)
+		return ret;
+
+	return intel_bts_process_queues(bts, MAX_TIMESTAMP);
+}
+
+static void intel_bts_free_queue(void *priv)
+{
+	struct intel_bts_queue *btsq = priv;
+
+	if (!btsq)
+		return;
+	free(btsq);
+}
+
+static void intel_bts_free_events(struct perf_session *session)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+	struct auxtrace_queues *queues = &bts->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		intel_bts_free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+	auxtrace_queues__free(queues);
+}
+
+static void intel_bts_free(struct perf_session *session)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+
+	auxtrace_heap__free(&bts->heap);
+	intel_bts_free_events(session);
+	session->auxtrace = NULL;
+	free(bts);
+}
+
+static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
+					struct evsel *evsel)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+
+	return evsel->core.attr.type == bts->pmu_type;
+}
+
+struct intel_bts_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int intel_bts_event_synth(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_sample *sample __maybe_unused,
+				 struct machine *machine __maybe_unused)
+{
+	struct intel_bts_synth *intel_bts_synth =
+			container_of(tool, struct intel_bts_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(intel_bts_synth->session,
+						 event, NULL);
+}
+
+static int intel_bts_synth_event(struct perf_session *session,
+				 struct perf_event_attr *attr, u64 id)
+{
+	struct intel_bts_synth intel_bts_synth;
+
+	memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
+	intel_bts_synth.session = session;
+
+	return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
+					   &id, intel_bts_event_synth);
+}
+
+static int intel_bts_synth_events(struct intel_bts *bts,
+				  struct perf_session *session)
+{
+	struct evlist *evlist = session->evlist;
+	struct evsel *evsel;
+	struct perf_event_attr attr;
+	bool found = false;
+	u64 id;
+	int err;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type == bts->pmu_type && evsel->core.ids) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		pr_debug("There are no selected events with Intel BTS data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+			    PERF_SAMPLE_PERIOD;
+	attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+	attr.exclude_user = evsel->core.attr.exclude_user;
+	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+	attr.exclude_hv = evsel->core.attr.exclude_hv;
+	attr.exclude_host = evsel->core.attr.exclude_host;
+	attr.exclude_guest = evsel->core.attr.exclude_guest;
+	attr.sample_id_all = evsel->core.attr.sample_id_all;
+	attr.read_format = evsel->core.attr.read_format;
+
+	id = evsel->core.id[0] + 1000000000;
+	if (!id)
+		id = 1;
+
+	if (bts->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+			 id, (u64)attr.sample_type);
+		err = intel_bts_synth_event(session, &attr, id);
+		if (err) {
+			pr_err("%s: failed to synthesize 'branches' event type\n",
+			       __func__);
+			return err;
+		}
+		bts->sample_branches = true;
+		bts->branches_sample_type = attr.sample_type;
+		bts->branches_id = id;
+		/*
+		 * We only use sample types from PERF_SAMPLE_MASK so we can use
+		 * __evsel__sample_size() here.
+		 */
+		bts->branches_event_size = sizeof(struct perf_record_sample) +
+					   __evsel__sample_size(attr.sample_type);
+	}
+
+	return 0;
+}
+
+static const char * const intel_bts_info_fmts[] = {
+	[INTEL_BTS_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
+	[INTEL_BTS_TIME_SHIFT]		= "  Time Shift         %"PRIu64"\n",
+	[INTEL_BTS_TIME_MULT]		= "  Time Muliplier     %"PRIu64"\n",
+	[INTEL_BTS_TIME_ZERO]		= "  Time Zero          %"PRIu64"\n",
+	[INTEL_BTS_CAP_USER_TIME_ZERO]	= "  Cap Time Zero      %"PRId64"\n",
+	[INTEL_BTS_SNAPSHOT_MODE]	= "  Snapshot mode      %"PRId64"\n",
+};
+
+static void intel_bts_print_info(__u64 *arr, int start, int finish)
+{
+	int i;
+
+	if (!dump_trace)
+		return;
+
+	for (i = start; i <= finish; i++)
+		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
+}
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+				    struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+	size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
+	struct intel_bts *bts;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
+					min_sz)
+		return -EINVAL;
+
+	bts = zalloc(sizeof(struct intel_bts));
+	if (!bts)
+		return -ENOMEM;
+
+	err = auxtrace_queues__init(&bts->queues);
+	if (err)
+		goto err_free;
+
+	bts->session = session;
+	bts->machine = &session->machines.host; /* No kvm support */
+	bts->auxtrace_type = auxtrace_info->type;
+	bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
+	bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
+	bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
+	bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
+	bts->cap_user_time_zero =
+			auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
+	bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
+
+	bts->sampling_mode = false;
+
+	bts->auxtrace.process_event = intel_bts_process_event;
+	bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
+	bts->auxtrace.flush_events = intel_bts_flush;
+	bts->auxtrace.free_events = intel_bts_free_events;
+	bts->auxtrace.free = intel_bts_free;
+	bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace;
+	session->auxtrace = &bts->auxtrace;
+
+	intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
+			     INTEL_BTS_SNAPSHOT_MODE);
+
+	if (dump_trace)
+		return 0;
+
+	if (session->itrace_synth_opts->set) {
+		bts->synth_opts = *session->itrace_synth_opts;
+	} else {
+		itrace_synth_opts__set_default(&bts->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
+		bts->synth_opts.thread_stack =
+				session->itrace_synth_opts->thread_stack;
+	}
+
+	if (bts->synth_opts.calls)
+		bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+					PERF_IP_FLAG_TRACE_END;
+	if (bts->synth_opts.returns)
+		bts->branches_filter |= PERF_IP_FLAG_RETURN |
+					PERF_IP_FLAG_TRACE_BEGIN;
+
+	err = intel_bts_synth_events(bts, session);
+	if (err)
+		goto err_free_queues;
+
+	err = auxtrace_queues__process_index(&bts->queues, session);
+	if (err)
+		goto err_free_queues;
+
+	if (bts->queues.populated)
+		bts->data_queued = true;
+
+	return 0;
+
+err_free_queues:
+	auxtrace_queues__free(&bts->queues);
+	session->auxtrace = NULL;
+err_free:
+	free(bts);
+	return err;
+}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 000000000..53d5aa027
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel-bts.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__PERF_INTEL_BTS_H__
+#define INCLUDE__PERF_INTEL_BTS_H__
+
+#define INTEL_BTS_PMU_NAME "intel_bts"
+
+enum {
+	INTEL_BTS_PMU_TYPE,
+	INTEL_BTS_TIME_SHIFT,
+	INTEL_BTS_TIME_MULT,
+	INTEL_BTS_TIME_ZERO,
+	INTEL_BTS_CAP_USER_TIME_ZERO,
+	INTEL_BTS_SNAPSHOT_MODE,
+	INTEL_BTS_AUXTRACE_PRIV_MAX,
+};
+
+#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+
+struct auxtrace_record *intel_bts_recording_init(int *err);
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+				    struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
new file mode 100644
index 000000000..b41c2e9c6
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -0,0 +1,22 @@
+perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+
+inat_tables_script = $(srctree)/tools/arch/x86/tools/gen-insn-attr-x86.awk
+inat_tables_maps = $(srctree)/tools/arch/x86/lib/x86-opcode-map.txt
+
+$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+	$(call rule_mkdir)
+	@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
+
+# Busybox's diff doesn't have -I, avoid warning in the case
+
+$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+
+ifeq ($(CC_NO_CLANG), 1)
+  CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+endif
+
+CFLAGS_intel-pt-insn-decoder.o += -Wno-packed
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
new file mode 100644
index 000000000..7145c5890
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -0,0 +1,4513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "intel-pt-pkt-decoder.h"
+#include "intel-pt-decoder.h"
+#include "intel-pt-log.h"
+
+#define BITULL(x) (1ULL << (x))
+
+/* IA32_RTIT_CTL MSR bits */
+#define INTEL_PT_CYC_ENABLE		BITULL(1)
+#define INTEL_PT_CYC_THRESHOLD		(BITULL(22) | BITULL(21) | BITULL(20) | BITULL(19))
+#define INTEL_PT_CYC_THRESHOLD_SHIFT	19
+
+#define INTEL_PT_BLK_SIZE 1024
+
+#define BIT63 (((uint64_t)1 << 63))
+
+#define SEVEN_BYTES 0xffffffffffffffULL
+
+#define NO_VMCS 0xffffffffffULL
+
+#define INTEL_PT_RETURN 1
+
+/*
+ * Default maximum number of loops with no packets consumed i.e. stuck in a
+ * loop.
+ */
+#define INTEL_PT_MAX_LOOPS 100000
+
+struct intel_pt_blk {
+	struct intel_pt_blk *prev;
+	uint64_t ip[INTEL_PT_BLK_SIZE];
+};
+
+struct intel_pt_stack {
+	struct intel_pt_blk *blk;
+	struct intel_pt_blk *spare;
+	int pos;
+};
+
+enum intel_pt_p_once {
+	INTEL_PT_PRT_ONCE_UNK_VMCS,
+	INTEL_PT_PRT_ONCE_ERANGE,
+};
+
+enum intel_pt_pkt_state {
+	INTEL_PT_STATE_NO_PSB,
+	INTEL_PT_STATE_NO_IP,
+	INTEL_PT_STATE_ERR_RESYNC,
+	INTEL_PT_STATE_IN_SYNC,
+	INTEL_PT_STATE_TNT_CONT,
+	INTEL_PT_STATE_TNT,
+	INTEL_PT_STATE_TIP,
+	INTEL_PT_STATE_TIP_PGD,
+	INTEL_PT_STATE_FUP,
+	INTEL_PT_STATE_FUP_NO_TIP,
+	INTEL_PT_STATE_FUP_IN_PSB,
+	INTEL_PT_STATE_RESAMPLE,
+	INTEL_PT_STATE_VM_TIME_CORRELATION,
+};
+
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+	switch (pkt_state) {
+	case INTEL_PT_STATE_NO_PSB:
+	case INTEL_PT_STATE_NO_IP:
+	case INTEL_PT_STATE_ERR_RESYNC:
+	case INTEL_PT_STATE_IN_SYNC:
+	case INTEL_PT_STATE_TNT_CONT:
+	case INTEL_PT_STATE_RESAMPLE:
+	case INTEL_PT_STATE_VM_TIME_CORRELATION:
+		return true;
+	case INTEL_PT_STATE_TNT:
+	case INTEL_PT_STATE_TIP:
+	case INTEL_PT_STATE_TIP_PGD:
+	case INTEL_PT_STATE_FUP:
+	case INTEL_PT_STATE_FUP_NO_TIP:
+	case INTEL_PT_STATE_FUP_IN_PSB:
+		return false;
+	default:
+		return true;
+	};
+}
+
+#ifdef INTEL_PT_STRICT
+#define INTEL_PT_STATE_ERR1	INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_NO_PSB
+#else
+#define INTEL_PT_STATE_ERR1	(decoder->pkt_state)
+#define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_IP
+#define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_ERR_RESYNC
+#define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_IN_SYNC
+#endif
+
+struct intel_pt_decoder {
+	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
+	int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
+	struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs);
+	void *data;
+	struct intel_pt_state state;
+	const unsigned char *buf;
+	size_t len;
+	bool return_compression;
+	bool branch_enable;
+	bool mtc_insn;
+	bool pge;
+	bool have_tma;
+	bool have_cyc;
+	bool fixup_last_mtc;
+	bool have_last_ip;
+	bool in_psb;
+	bool hop;
+	bool leap;
+	bool emulated_ptwrite;
+	bool vm_time_correlation;
+	bool vm_tm_corr_dry_run;
+	bool vm_tm_corr_reliable;
+	bool vm_tm_corr_same_buf;
+	bool vm_tm_corr_continuous;
+	bool nr;
+	bool next_nr;
+	bool iflag;
+	bool next_iflag;
+	enum intel_pt_param_flags flags;
+	uint64_t pos;
+	uint64_t last_ip;
+	uint64_t ip;
+	uint64_t pip_payload;
+	uint64_t timestamp;
+	uint64_t tsc_timestamp;
+	uint64_t ref_timestamp;
+	uint64_t buf_timestamp;
+	uint64_t sample_timestamp;
+	uint64_t ret_addr;
+	uint64_t ctc_timestamp;
+	uint64_t ctc_delta;
+	uint64_t cycle_cnt;
+	uint64_t cyc_ref_timestamp;
+	uint64_t first_timestamp;
+	uint64_t last_reliable_timestamp;
+	uint64_t vmcs;
+	uint64_t print_once;
+	uint64_t last_ctc;
+	uint32_t last_mtc;
+	uint32_t tsc_ctc_ratio_n;
+	uint32_t tsc_ctc_ratio_d;
+	uint32_t tsc_ctc_mult;
+	uint32_t tsc_slip;
+	uint32_t ctc_rem_mask;
+	int mtc_shift;
+	struct intel_pt_stack stack;
+	enum intel_pt_pkt_state pkt_state;
+	enum intel_pt_pkt_ctx pkt_ctx;
+	enum intel_pt_pkt_ctx prev_pkt_ctx;
+	enum intel_pt_blk_type blk_type;
+	int blk_type_pos;
+	struct intel_pt_pkt packet;
+	struct intel_pt_pkt tnt;
+	int pkt_step;
+	int pkt_len;
+	int last_packet_type;
+	unsigned int cbr;
+	unsigned int cbr_seen;
+	unsigned int max_non_turbo_ratio;
+	double max_non_turbo_ratio_fp;
+	double cbr_cyc_to_tsc;
+	double calc_cyc_to_tsc;
+	bool have_calc_cyc_to_tsc;
+	int exec_mode;
+	unsigned int insn_bytes;
+	uint64_t period;
+	enum intel_pt_period_type period_type;
+	uint64_t tot_insn_cnt;
+	uint64_t period_insn_cnt;
+	uint64_t period_mask;
+	uint64_t period_ticks;
+	uint64_t last_masked_timestamp;
+	uint64_t tot_cyc_cnt;
+	uint64_t sample_tot_cyc_cnt;
+	uint64_t base_cyc_cnt;
+	uint64_t cyc_cnt_timestamp;
+	uint64_t ctl;
+	uint64_t cyc_threshold;
+	double tsc_to_cyc;
+	bool continuous_period;
+	bool overflow;
+	bool set_fup_tx_flags;
+	bool set_fup_ptw;
+	bool set_fup_mwait;
+	bool set_fup_pwre;
+	bool set_fup_exstop;
+	bool set_fup_bep;
+	bool set_fup_cfe_ip;
+	bool set_fup_cfe;
+	bool set_fup_mode_exec;
+	bool sample_cyc;
+	unsigned int fup_tx_flags;
+	unsigned int tx_flags;
+	uint64_t fup_ptw_payload;
+	uint64_t fup_mwait_payload;
+	uint64_t fup_pwre_payload;
+	uint64_t cbr_payload;
+	uint64_t timestamp_insn_cnt;
+	uint64_t sample_insn_cnt;
+	uint64_t stuck_ip;
+	struct intel_pt_pkt fup_cfe_pkt;
+	int max_loops;
+	int no_progress;
+	int stuck_ip_prd;
+	int stuck_ip_cnt;
+	uint64_t psb_ip;
+	const unsigned char *next_buf;
+	size_t next_len;
+	unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+	int evd_cnt;
+	struct intel_pt_evd evd[INTEL_PT_MAX_EVDS];
+};
+
+static uint64_t intel_pt_lower_power_of_2(uint64_t x)
+{
+	int i;
+
+	for (i = 0; x != 1; i++)
+		x >>= 1;
+
+	return x << i;
+}
+
+__printf(1, 2)
+static void p_log(const char *fmt, ...)
+{
+	char buf[512];
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+
+	fprintf(stderr, "%s\n", buf);
+	intel_pt_log("%s\n", buf);
+}
+
+static bool intel_pt_print_once(struct intel_pt_decoder *decoder,
+				enum intel_pt_p_once id)
+{
+	uint64_t bit = 1ULL << id;
+
+	if (decoder->print_once & bit)
+		return false;
+	decoder->print_once |= bit;
+	return true;
+}
+
+static uint64_t intel_pt_cyc_threshold(uint64_t ctl)
+{
+	if (!(ctl & INTEL_PT_CYC_ENABLE))
+		return 0;
+
+	return (ctl & INTEL_PT_CYC_THRESHOLD) >> INTEL_PT_CYC_THRESHOLD_SHIFT;
+}
+
+static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
+{
+	if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
+		uint64_t period;
+
+		period = intel_pt_lower_power_of_2(decoder->period);
+		decoder->period_mask  = ~(period - 1);
+		decoder->period_ticks = period;
+	}
+}
+
+static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
+{
+	if (!d)
+		return 0;
+	return (t / d) * n + ((t % d) * n) / d;
+}
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
+{
+	struct intel_pt_decoder *decoder;
+
+	if (!params->get_trace || !params->walk_insn)
+		return NULL;
+
+	decoder = zalloc(sizeof(struct intel_pt_decoder));
+	if (!decoder)
+		return NULL;
+
+	decoder->get_trace          = params->get_trace;
+	decoder->walk_insn          = params->walk_insn;
+	decoder->pgd_ip             = params->pgd_ip;
+	decoder->lookahead          = params->lookahead;
+	decoder->findnew_vmcs_info  = params->findnew_vmcs_info;
+	decoder->data               = params->data;
+	decoder->return_compression = params->return_compression;
+	decoder->branch_enable      = params->branch_enable;
+	decoder->hop                = params->quick >= 1;
+	decoder->leap               = params->quick >= 2;
+	decoder->vm_time_correlation = params->vm_time_correlation;
+	decoder->vm_tm_corr_dry_run = params->vm_tm_corr_dry_run;
+	decoder->first_timestamp    = params->first_timestamp;
+	decoder->last_reliable_timestamp = params->first_timestamp;
+	decoder->max_loops          = params->max_loops ? params->max_loops : INTEL_PT_MAX_LOOPS;
+
+	decoder->flags              = params->flags;
+
+	decoder->ctl                = params->ctl;
+	decoder->period             = params->period;
+	decoder->period_type        = params->period_type;
+
+	decoder->max_non_turbo_ratio    = params->max_non_turbo_ratio;
+	decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
+
+	decoder->cyc_threshold = intel_pt_cyc_threshold(decoder->ctl);
+
+	intel_pt_setup_period(decoder);
+
+	decoder->mtc_shift = params->mtc_period;
+	decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
+
+	decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
+	decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
+
+	if (!decoder->tsc_ctc_ratio_n)
+		decoder->tsc_ctc_ratio_d = 0;
+
+	if (decoder->tsc_ctc_ratio_d) {
+		if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
+			decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
+						decoder->tsc_ctc_ratio_d;
+	}
+
+	/*
+	 * A TSC packet can slip past MTC packets so that the timestamp appears
+	 * to go backwards. One estimate is that can be up to about 40 CPU
+	 * cycles, which is certainly less than 0x1000 TSC ticks, but accept
+	 * slippage an order of magnitude more to be on the safe side.
+	 */
+	decoder->tsc_slip = 0x10000;
+
+	intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
+	intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+	intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
+	intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
+	intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+
+	if (decoder->hop)
+		intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
+
+	return decoder;
+}
+
+void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder,
+				  uint64_t first_timestamp)
+{
+	decoder->first_timestamp = first_timestamp;
+}
+
+static void intel_pt_pop_blk(struct intel_pt_stack *stack)
+{
+	struct intel_pt_blk *blk = stack->blk;
+
+	stack->blk = blk->prev;
+	if (!stack->spare)
+		stack->spare = blk;
+	else
+		free(blk);
+}
+
+static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
+{
+	if (!stack->pos) {
+		if (!stack->blk)
+			return 0;
+		intel_pt_pop_blk(stack);
+		if (!stack->blk)
+			return 0;
+		stack->pos = INTEL_PT_BLK_SIZE;
+	}
+	return stack->blk->ip[--stack->pos];
+}
+
+static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
+{
+	struct intel_pt_blk *blk;
+
+	if (stack->spare) {
+		blk = stack->spare;
+		stack->spare = NULL;
+	} else {
+		blk = malloc(sizeof(struct intel_pt_blk));
+		if (!blk)
+			return -ENOMEM;
+	}
+
+	blk->prev = stack->blk;
+	stack->blk = blk;
+	stack->pos = 0;
+	return 0;
+}
+
+static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
+{
+	int err;
+
+	if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
+		err = intel_pt_alloc_blk(stack);
+		if (err)
+			return err;
+	}
+
+	stack->blk->ip[stack->pos++] = ip;
+	return 0;
+}
+
+static void intel_pt_clear_stack(struct intel_pt_stack *stack)
+{
+	while (stack->blk)
+		intel_pt_pop_blk(stack);
+	stack->pos = 0;
+}
+
+static void intel_pt_free_stack(struct intel_pt_stack *stack)
+{
+	intel_pt_clear_stack(stack);
+	zfree(&stack->blk);
+	zfree(&stack->spare);
+}
+
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
+{
+	intel_pt_free_stack(&decoder->stack);
+	free(decoder);
+}
+
+static int intel_pt_ext_err(int code)
+{
+	switch (code) {
+	case -ENOMEM:
+		return INTEL_PT_ERR_NOMEM;
+	case -ENOSYS:
+		return INTEL_PT_ERR_INTERN;
+	case -EBADMSG:
+		return INTEL_PT_ERR_BADPKT;
+	case -ENODATA:
+		return INTEL_PT_ERR_NODATA;
+	case -EILSEQ:
+		return INTEL_PT_ERR_NOINSN;
+	case -ENOENT:
+		return INTEL_PT_ERR_MISMAT;
+	case -EOVERFLOW:
+		return INTEL_PT_ERR_OVR;
+	case -ENOSPC:
+		return INTEL_PT_ERR_LOST;
+	case -ELOOP:
+		return INTEL_PT_ERR_NELOOP;
+	case -ECONNRESET:
+		return INTEL_PT_ERR_EPTW;
+	default:
+		return INTEL_PT_ERR_UNK;
+	}
+}
+
+static const char *intel_pt_err_msgs[] = {
+	[INTEL_PT_ERR_NOMEM]  = "Memory allocation failed",
+	[INTEL_PT_ERR_INTERN] = "Internal error",
+	[INTEL_PT_ERR_BADPKT] = "Bad packet",
+	[INTEL_PT_ERR_NODATA] = "No more data",
+	[INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
+	[INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
+	[INTEL_PT_ERR_OVR]    = "Overflow packet",
+	[INTEL_PT_ERR_LOST]   = "Lost trace data",
+	[INTEL_PT_ERR_UNK]    = "Unknown error!",
+	[INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)",
+	[INTEL_PT_ERR_EPTW]   = "Broken emulated ptwrite",
+};
+
+int intel_pt__strerror(int code, char *buf, size_t buflen)
+{
+	if (code < 1 || code >= INTEL_PT_ERR_MAX)
+		code = INTEL_PT_ERR_UNK;
+	strlcpy(buf, intel_pt_err_msgs[code], buflen);
+	return 0;
+}
+
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
+				 uint64_t last_ip)
+{
+	uint64_t ip;
+
+	switch (packet->count) {
+	case 1:
+		ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
+		     packet->payload;
+		break;
+	case 2:
+		ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
+		     packet->payload;
+		break;
+	case 3:
+		ip = packet->payload;
+		/* Sign-extend 6-byte ip */
+		if (ip & (uint64_t)0x800000000000ULL)
+			ip |= (uint64_t)0xffff000000000000ULL;
+		break;
+	case 4:
+		ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+		     packet->payload;
+		break;
+	case 6:
+		ip = packet->payload;
+		break;
+	default:
+		return 0;
+	}
+
+	return ip;
+}
+
+static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
+{
+	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+	decoder->have_last_ip = true;
+}
+
+static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
+{
+	intel_pt_set_last_ip(decoder);
+	decoder->ip = decoder->last_ip;
+}
+
+static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
+{
+	intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
+			    decoder->buf);
+}
+
+static int intel_pt_bug(struct intel_pt_decoder *decoder)
+{
+	intel_pt_log("ERROR: Internal error\n");
+	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+	return -ENOSYS;
+}
+
+static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
+{
+	decoder->tx_flags = 0;
+}
+
+static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
+{
+	decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
+}
+
+static inline void intel_pt_update_pip(struct intel_pt_decoder *decoder)
+{
+	decoder->pip_payload = decoder->packet.payload;
+}
+
+static inline void intel_pt_update_nr(struct intel_pt_decoder *decoder)
+{
+	decoder->next_nr = decoder->pip_payload & 1;
+}
+
+static inline void intel_pt_set_nr(struct intel_pt_decoder *decoder)
+{
+	decoder->nr = decoder->pip_payload & 1;
+	decoder->next_nr = decoder->nr;
+}
+
+static inline void intel_pt_set_pip(struct intel_pt_decoder *decoder)
+{
+	intel_pt_update_pip(decoder);
+	intel_pt_set_nr(decoder);
+}
+
+static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
+{
+	intel_pt_clear_tx_flags(decoder);
+	decoder->have_tma = false;
+	decoder->pkt_len = 1;
+	decoder->pkt_step = 1;
+	intel_pt_decoder_log_packet(decoder);
+	if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
+		intel_pt_log("ERROR: Bad packet\n");
+		decoder->pkt_state = INTEL_PT_STATE_ERR1;
+	}
+	return -EBADMSG;
+}
+
+static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
+{
+	decoder->sample_timestamp = decoder->timestamp;
+	decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+	decoder->state.cycles = decoder->tot_cyc_cnt;
+}
+
+static void intel_pt_reposition(struct intel_pt_decoder *decoder)
+{
+	decoder->ip = 0;
+	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+	decoder->timestamp = 0;
+	decoder->have_tma = false;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
+{
+	struct intel_pt_buffer buffer = { .buf = 0, };
+	int ret;
+
+	decoder->pkt_step = 0;
+
+	intel_pt_log("Getting more data\n");
+	ret = decoder->get_trace(&buffer, decoder->data);
+	if (ret)
+		return ret;
+	decoder->buf = buffer.buf;
+	decoder->len = buffer.len;
+	if (!decoder->len) {
+		intel_pt_log("No more data\n");
+		return -ENODATA;
+	}
+	decoder->buf_timestamp = buffer.ref_timestamp;
+	if (!buffer.consecutive || reposition) {
+		intel_pt_reposition(decoder);
+		decoder->ref_timestamp = buffer.ref_timestamp;
+		decoder->state.trace_nr = buffer.trace_nr;
+		decoder->vm_tm_corr_same_buf = false;
+		intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
+			     decoder->ref_timestamp);
+		return -ENOLINK;
+	}
+
+	return 0;
+}
+
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder,
+				  bool reposition)
+{
+	if (!decoder->next_buf)
+		return intel_pt_get_data(decoder, reposition);
+
+	decoder->buf = decoder->next_buf;
+	decoder->len = decoder->next_len;
+	decoder->next_buf = 0;
+	decoder->next_len = 0;
+	return 0;
+}
+
+static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
+{
+	unsigned char *buf = decoder->temp_buf;
+	size_t old_len, len, n;
+	int ret;
+
+	old_len = decoder->len;
+	len = decoder->len;
+	memcpy(buf, decoder->buf, len);
+
+	ret = intel_pt_get_data(decoder, false);
+	if (ret) {
+		decoder->pos += old_len;
+		return ret < 0 ? ret : -EINVAL;
+	}
+
+	n = INTEL_PT_PKT_MAX_SZ - len;
+	if (n > decoder->len)
+		n = decoder->len;
+	memcpy(buf + len, decoder->buf, n);
+	len += n;
+
+	decoder->prev_pkt_ctx = decoder->pkt_ctx;
+	ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
+	if (ret < (int)old_len) {
+		decoder->next_buf = decoder->buf;
+		decoder->next_len = decoder->len;
+		decoder->buf = buf;
+		decoder->len = old_len;
+		return intel_pt_bad_packet(decoder);
+	}
+
+	decoder->next_buf = decoder->buf + (ret - old_len);
+	decoder->next_len = decoder->len - (ret - old_len);
+
+	decoder->buf = buf;
+	decoder->len = ret;
+
+	return ret;
+}
+
+struct intel_pt_pkt_info {
+	struct intel_pt_decoder	  *decoder;
+	struct intel_pt_pkt       packet;
+	uint64_t                  pos;
+	int                       pkt_len;
+	int                       last_packet_type;
+	void                      *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+				  intel_pt_pkt_cb_t cb, void *data)
+{
+	struct intel_pt_pkt_info pkt_info;
+	const unsigned char *buf = decoder->buf;
+	enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
+	size_t len = decoder->len;
+	int ret;
+
+	pkt_info.decoder          = decoder;
+	pkt_info.pos              = decoder->pos;
+	pkt_info.pkt_len          = decoder->pkt_step;
+	pkt_info.last_packet_type = decoder->last_packet_type;
+	pkt_info.data             = data;
+
+	while (1) {
+		do {
+			pkt_info.pos += pkt_info.pkt_len;
+			buf          += pkt_info.pkt_len;
+			len          -= pkt_info.pkt_len;
+
+			if (!len)
+				return INTEL_PT_NEED_MORE_BYTES;
+
+			ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
+						  &pkt_ctx);
+			if (!ret)
+				return INTEL_PT_NEED_MORE_BYTES;
+			if (ret < 0)
+				return ret;
+
+			pkt_info.pkt_len = ret;
+		} while (pkt_info.packet.type == INTEL_PT_PAD);
+
+		ret = cb(&pkt_info);
+		if (ret)
+			return 0;
+
+		pkt_info.last_packet_type = pkt_info.packet.type;
+	}
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+	uint64_t        cycle_cnt;
+	unsigned int    cbr;
+	uint32_t        last_mtc;
+	uint64_t        ctc_timestamp;
+	uint64_t        ctc_delta;
+	uint64_t        tsc_timestamp;
+	uint64_t        timestamp;
+	bool            have_tma;
+	bool            fixup_last_mtc;
+	bool            from_mtc;
+	double          cbr_cyc_to_tsc;
+};
+
+/*
+ * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
+ * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
+ * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
+ * packet by copying the missing bits from the current MTC assuming the least
+ * difference between the two, and that the current MTC comes after last_mtc.
+ */
+static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
+				    uint32_t *last_mtc)
+{
+	uint32_t first_missing_bit = 1U << (16 - mtc_shift);
+	uint32_t mask = ~(first_missing_bit - 1);
+
+	*last_mtc |= mtc & mask;
+	if (*last_mtc >= mtc) {
+		*last_mtc -= first_missing_bit;
+		*last_mtc &= 0xff;
+	}
+}
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct intel_pt_decoder *decoder = pkt_info->decoder;
+	struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+	uint64_t timestamp;
+	double cyc_to_tsc;
+	unsigned int cbr;
+	uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+	switch (pkt_info->packet.type) {
+	case INTEL_PT_TNT:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TIP:
+	case INTEL_PT_FUP:
+	case INTEL_PT_PSB:
+	case INTEL_PT_PIP:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_PAD:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_MNT:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+	case INTEL_PT_BBP:
+	case INTEL_PT_BIP:
+	case INTEL_PT_BEP:
+	case INTEL_PT_BEP_IP:
+	case INTEL_PT_CFE:
+	case INTEL_PT_CFE_IP:
+	case INTEL_PT_EVD:
+		return 0;
+
+	case INTEL_PT_MTC:
+		if (!data->have_tma)
+			return 0;
+
+		mtc = pkt_info->packet.payload;
+		if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
+			data->fixup_last_mtc = false;
+			intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+						&data->last_mtc);
+		}
+		if (mtc > data->last_mtc)
+			mtc_delta = mtc - data->last_mtc;
+		else
+			mtc_delta = mtc + 256 - data->last_mtc;
+		data->ctc_delta += mtc_delta << decoder->mtc_shift;
+		data->last_mtc = mtc;
+
+		if (decoder->tsc_ctc_mult) {
+			timestamp = data->ctc_timestamp +
+				data->ctc_delta * decoder->tsc_ctc_mult;
+		} else {
+			timestamp = data->ctc_timestamp +
+				multdiv(data->ctc_delta,
+					decoder->tsc_ctc_ratio_n,
+					decoder->tsc_ctc_ratio_d);
+		}
+
+		if (timestamp < data->timestamp)
+			return 1;
+
+		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+			data->timestamp = timestamp;
+			return 0;
+		}
+
+		break;
+
+	case INTEL_PT_TSC:
+		/*
+		 * For now, do not support using TSC packets - refer
+		 * intel_pt_calc_cyc_to_tsc().
+		 */
+		if (data->from_mtc)
+			return 1;
+		timestamp = pkt_info->packet.payload |
+			    (data->timestamp & (0xffULL << 56));
+		if (data->from_mtc && timestamp < data->timestamp &&
+		    data->timestamp - timestamp < decoder->tsc_slip)
+			return 1;
+		if (timestamp < data->timestamp)
+			timestamp += (1ULL << 56);
+		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+			if (data->from_mtc)
+				return 1;
+			data->tsc_timestamp = timestamp;
+			data->timestamp = timestamp;
+			return 0;
+		}
+		break;
+
+	case INTEL_PT_TMA:
+		if (data->from_mtc)
+			return 1;
+
+		if (!decoder->tsc_ctc_ratio_d)
+			return 0;
+
+		ctc = pkt_info->packet.payload;
+		fc = pkt_info->packet.count;
+		ctc_rem = ctc & decoder->ctc_rem_mask;
+
+		data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+		data->ctc_timestamp = data->tsc_timestamp - fc;
+		if (decoder->tsc_ctc_mult) {
+			data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+		} else {
+			data->ctc_timestamp -=
+				multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+					decoder->tsc_ctc_ratio_d);
+		}
+
+		data->ctc_delta = 0;
+		data->have_tma = true;
+		data->fixup_last_mtc = true;
+
+		return 0;
+
+	case INTEL_PT_CYC:
+		data->cycle_cnt += pkt_info->packet.payload;
+		return 0;
+
+	case INTEL_PT_CBR:
+		cbr = pkt_info->packet.payload;
+		if (data->cbr && data->cbr != cbr)
+			return 1;
+		data->cbr = cbr;
+		data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+		return 0;
+
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_OVF:
+	case INTEL_PT_BAD: /* Does not happen */
+	default:
+		return 1;
+	}
+
+	if (!data->cbr && decoder->cbr) {
+		data->cbr = decoder->cbr;
+		data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+	}
+
+	if (!data->cycle_cnt)
+		return 1;
+
+	cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+	if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+	    cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+		return 1;
+	}
+
+	decoder->calc_cyc_to_tsc = cyc_to_tsc;
+	decoder->have_calc_cyc_to_tsc = true;
+
+	if (data->cbr) {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+	} else {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+			     cyc_to_tsc, pkt_info->pos);
+	}
+
+	return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+				     bool from_mtc)
+{
+	struct intel_pt_calc_cyc_to_tsc_info data = {
+		.cycle_cnt      = 0,
+		.cbr            = 0,
+		.last_mtc       = decoder->last_mtc,
+		.ctc_timestamp  = decoder->ctc_timestamp,
+		.ctc_delta      = decoder->ctc_delta,
+		.tsc_timestamp  = decoder->tsc_timestamp,
+		.timestamp      = decoder->timestamp,
+		.have_tma       = decoder->have_tma,
+		.fixup_last_mtc = decoder->fixup_last_mtc,
+		.from_mtc       = from_mtc,
+		.cbr_cyc_to_tsc = 0,
+	};
+
+	/*
+	 * For now, do not support using TSC packets for at least the reasons:
+	 * 1) timing might have stopped
+	 * 2) TSC packets within PSB+ can slip against CYC packets
+	 */
+	if (!from_mtc)
+		return;
+
+	intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
+static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
+{
+	int ret;
+
+	decoder->last_packet_type = decoder->packet.type;
+
+	do {
+		decoder->pos += decoder->pkt_step;
+		decoder->buf += decoder->pkt_step;
+		decoder->len -= decoder->pkt_step;
+
+		if (!decoder->len) {
+			ret = intel_pt_get_next_data(decoder, false);
+			if (ret)
+				return ret;
+		}
+
+		decoder->prev_pkt_ctx = decoder->pkt_ctx;
+		ret = intel_pt_get_packet(decoder->buf, decoder->len,
+					  &decoder->packet, &decoder->pkt_ctx);
+		if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
+		    decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
+			ret = intel_pt_get_split_packet(decoder);
+			if (ret < 0)
+				return ret;
+		}
+		if (ret <= 0)
+			return intel_pt_bad_packet(decoder);
+
+		decoder->pkt_len = ret;
+		decoder->pkt_step = ret;
+		intel_pt_decoder_log_packet(decoder);
+	} while (decoder->packet.type == INTEL_PT_PAD);
+
+	return 0;
+}
+
+static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp, masked_timestamp;
+
+	timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+	masked_timestamp = timestamp & decoder->period_mask;
+	if (decoder->continuous_period) {
+		if (masked_timestamp > decoder->last_masked_timestamp)
+			return 1;
+	} else {
+		timestamp += 1;
+		masked_timestamp = timestamp & decoder->period_mask;
+		if (masked_timestamp > decoder->last_masked_timestamp) {
+			decoder->last_masked_timestamp = masked_timestamp;
+			decoder->continuous_period = true;
+		}
+	}
+
+	if (masked_timestamp < decoder->last_masked_timestamp)
+		return decoder->period_ticks;
+
+	return decoder->period_ticks - (timestamp - masked_timestamp);
+}
+
+static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
+{
+	switch (decoder->period_type) {
+	case INTEL_PT_PERIOD_INSTRUCTIONS:
+		return decoder->period - decoder->period_insn_cnt;
+	case INTEL_PT_PERIOD_TICKS:
+		return intel_pt_next_period(decoder);
+	case INTEL_PT_PERIOD_NONE:
+	case INTEL_PT_PERIOD_MTC:
+	default:
+		return 0;
+	}
+}
+
+static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp, masked_timestamp;
+
+	switch (decoder->period_type) {
+	case INTEL_PT_PERIOD_INSTRUCTIONS:
+		decoder->period_insn_cnt = 0;
+		break;
+	case INTEL_PT_PERIOD_TICKS:
+		timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+		masked_timestamp = timestamp & decoder->period_mask;
+		if (masked_timestamp > decoder->last_masked_timestamp)
+			decoder->last_masked_timestamp = masked_timestamp;
+		else
+			decoder->last_masked_timestamp += decoder->period_ticks;
+		break;
+	case INTEL_PT_PERIOD_NONE:
+	case INTEL_PT_PERIOD_MTC:
+	default:
+		break;
+	}
+
+	decoder->state.type |= INTEL_PT_INSTRUCTION;
+}
+
+/*
+ * Sample FUP instruction at the same time as reporting the FUP event, so the
+ * instruction sample gets the same flags as the FUP event.
+ */
+static void intel_pt_sample_fup_insn(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_insn intel_pt_insn;
+	uint64_t max_insn_cnt, insn_cnt = 0;
+	int err;
+
+	decoder->state.insn_op = INTEL_PT_OP_OTHER;
+	decoder->state.insn_len = 0;
+
+	if (!decoder->branch_enable || !decoder->pge || decoder->hop ||
+	    decoder->ip != decoder->last_ip)
+		return;
+
+	if (!decoder->mtc_insn)
+		decoder->mtc_insn = true;
+
+	max_insn_cnt = intel_pt_next_sample(decoder);
+	if (max_insn_cnt != 1)
+		return;
+
+	err = decoder->walk_insn(&intel_pt_insn, &insn_cnt, &decoder->ip,
+				 0, max_insn_cnt, decoder->data);
+	/* Ignore error, it will be reported next walk anyway */
+	if (err)
+		return;
+
+	if (intel_pt_insn.branch != INTEL_PT_BR_NO_BRANCH) {
+		intel_pt_log_at("ERROR: Unexpected branch at FUP instruction", decoder->ip);
+		return;
+	}
+
+	decoder->tot_insn_cnt += insn_cnt;
+	decoder->timestamp_insn_cnt += insn_cnt;
+	decoder->sample_insn_cnt += insn_cnt;
+	decoder->period_insn_cnt += insn_cnt;
+
+	intel_pt_sample_insn(decoder);
+
+	decoder->state.type |= INTEL_PT_INSTRUCTION;
+	decoder->ip += intel_pt_insn.length;
+}
+
+static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
+			      struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+	uint64_t max_insn_cnt, insn_cnt = 0;
+	int err;
+
+	if (!decoder->mtc_insn)
+		decoder->mtc_insn = true;
+
+	max_insn_cnt = intel_pt_next_sample(decoder);
+
+	err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
+				 max_insn_cnt, decoder->data);
+
+	decoder->tot_insn_cnt += insn_cnt;
+	decoder->timestamp_insn_cnt += insn_cnt;
+	decoder->sample_insn_cnt += insn_cnt;
+	decoder->period_insn_cnt += insn_cnt;
+
+	if (err) {
+		decoder->no_progress = 0;
+		decoder->pkt_state = INTEL_PT_STATE_ERR2;
+		intel_pt_log_at("ERROR: Failed to get instruction",
+				decoder->ip);
+		if (err == -ENOENT)
+			return -ENOLINK;
+		return -EILSEQ;
+	}
+
+	if (ip && decoder->ip == ip) {
+		err = -EAGAIN;
+		goto out;
+	}
+
+	if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+		intel_pt_sample_insn(decoder);
+
+	if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
+		decoder->state.type = INTEL_PT_INSTRUCTION;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->ip += intel_pt_insn->length;
+		err = INTEL_PT_RETURN;
+		goto out;
+	}
+
+	if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
+		/* Zero-length calls are excluded */
+		if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
+		    intel_pt_insn->rel) {
+			err = intel_pt_push(&decoder->stack, decoder->ip +
+					    intel_pt_insn->length);
+			if (err)
+				goto out;
+		}
+	} else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
+		decoder->ret_addr = intel_pt_pop(&decoder->stack);
+	}
+
+	if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
+		int cnt = decoder->no_progress++;
+
+		decoder->state.from_ip = decoder->ip;
+		decoder->ip += intel_pt_insn->length +
+				intel_pt_insn->rel;
+		decoder->state.to_ip = decoder->ip;
+		err = INTEL_PT_RETURN;
+
+		/*
+		 * Check for being stuck in a loop.  This can happen if a
+		 * decoder error results in the decoder erroneously setting the
+		 * ip to an address that is itself in an infinite loop that
+		 * consumes no packets.  When that happens, there must be an
+		 * unconditional branch.
+		 */
+		if (cnt) {
+			if (cnt == 1) {
+				decoder->stuck_ip = decoder->state.to_ip;
+				decoder->stuck_ip_prd = 1;
+				decoder->stuck_ip_cnt = 1;
+			} else if (cnt > decoder->max_loops ||
+				   decoder->state.to_ip == decoder->stuck_ip) {
+				intel_pt_log_at("ERROR: Never-ending loop",
+						decoder->state.to_ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+				err = -ELOOP;
+				goto out;
+			} else if (!--decoder->stuck_ip_cnt) {
+				decoder->stuck_ip_prd += 1;
+				decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
+				decoder->stuck_ip = decoder->state.to_ip;
+			}
+		}
+		goto out_no_progress;
+	}
+out:
+	decoder->no_progress = 0;
+out_no_progress:
+	decoder->state.insn_op = intel_pt_insn->op;
+	decoder->state.insn_len = intel_pt_insn->length;
+	memcpy(decoder->state.insn, intel_pt_insn->buf,
+	       INTEL_PT_INSN_BUF_SZ);
+
+	if (decoder->tx_flags & INTEL_PT_IN_TX)
+		decoder->state.flags |= INTEL_PT_IN_TX;
+
+	return err;
+}
+
+static void intel_pt_mode_exec_status(struct intel_pt_decoder *decoder)
+{
+	bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+	decoder->exec_mode = decoder->packet.payload;
+	decoder->iflag = iflag;
+	decoder->next_iflag = iflag;
+	decoder->state.from_iflag = iflag;
+	decoder->state.to_iflag = iflag;
+}
+
+static void intel_pt_mode_exec(struct intel_pt_decoder *decoder)
+{
+	bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+	decoder->exec_mode = decoder->packet.payload;
+	decoder->next_iflag = iflag;
+}
+
+static void intel_pt_sample_iflag(struct intel_pt_decoder *decoder)
+{
+	decoder->state.type |= INTEL_PT_IFLAG_CHG;
+	decoder->state.from_iflag = decoder->iflag;
+	decoder->state.to_iflag = decoder->next_iflag;
+	decoder->iflag = decoder->next_iflag;
+}
+
+static void intel_pt_sample_iflag_chg(struct intel_pt_decoder *decoder)
+{
+	if (decoder->iflag != decoder->next_iflag)
+		intel_pt_sample_iflag(decoder);
+}
+
+static void intel_pt_clear_fup_event(struct intel_pt_decoder *decoder)
+{
+	decoder->set_fup_tx_flags = false;
+	decoder->set_fup_ptw = false;
+	decoder->set_fup_mwait = false;
+	decoder->set_fup_pwre = false;
+	decoder->set_fup_exstop = false;
+	decoder->set_fup_bep = false;
+	decoder->set_fup_cfe_ip = false;
+	decoder->set_fup_cfe = false;
+	decoder->evd_cnt = 0;
+	decoder->set_fup_mode_exec = false;
+	decoder->iflag = decoder->next_iflag;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip)
+{
+	enum intel_pt_sample_type type = decoder->state.type;
+	bool sample_fup_insn = false;
+	bool ret = false;
+
+	decoder->state.type &= ~INTEL_PT_BRANCH;
+
+	if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) {
+		bool ip = decoder->set_fup_cfe_ip;
+
+		decoder->set_fup_cfe_ip = false;
+		decoder->set_fup_cfe = false;
+		decoder->state.type |= INTEL_PT_EVT;
+		if (!ip && decoder->pge)
+			decoder->state.type |= INTEL_PT_BRANCH;
+		decoder->state.cfe_type = decoder->fup_cfe_pkt.count;
+		decoder->state.cfe_vector = decoder->fup_cfe_pkt.payload;
+		decoder->state.evd_cnt = decoder->evd_cnt;
+		decoder->state.evd = decoder->evd;
+		decoder->evd_cnt = 0;
+		if (ip || decoder->pge)
+			decoder->state.flags |= INTEL_PT_FUP_IP;
+		ret = true;
+	}
+	if (decoder->set_fup_mode_exec) {
+		decoder->set_fup_mode_exec = false;
+		intel_pt_sample_iflag(decoder);
+		sample_fup_insn = no_tip;
+		ret = true;
+	}
+	if (decoder->set_fup_tx_flags) {
+		decoder->set_fup_tx_flags = false;
+		decoder->tx_flags = decoder->fup_tx_flags;
+		decoder->state.type |= INTEL_PT_TRANSACTION;
+		if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+			decoder->state.type |= INTEL_PT_BRANCH;
+		decoder->state.flags = decoder->fup_tx_flags;
+		ret = true;
+	}
+	if (decoder->set_fup_ptw) {
+		decoder->set_fup_ptw = false;
+		decoder->state.type |= INTEL_PT_PTW;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		decoder->state.ptw_payload = decoder->fup_ptw_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_mwait) {
+		decoder->set_fup_mwait = false;
+		decoder->state.type |= INTEL_PT_MWAIT_OP;
+		decoder->state.mwait_payload = decoder->fup_mwait_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_pwre) {
+		decoder->set_fup_pwre = false;
+		decoder->state.type |= INTEL_PT_PWR_ENTRY;
+		decoder->state.pwre_payload = decoder->fup_pwre_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_exstop) {
+		decoder->set_fup_exstop = false;
+		decoder->state.type |= INTEL_PT_EX_STOP;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		ret = true;
+	}
+	if (decoder->set_fup_bep) {
+		decoder->set_fup_bep = false;
+		decoder->state.type |= INTEL_PT_BLK_ITEMS;
+		ret = true;
+	}
+	if (decoder->overflow) {
+		decoder->overflow = false;
+		if (!ret && !decoder->pge) {
+			if (decoder->hop) {
+				decoder->state.type = 0;
+				decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+			}
+			decoder->pge = true;
+			decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN;
+			decoder->state.from_ip = 0;
+			decoder->state.to_ip = decoder->ip;
+			return true;
+		}
+	}
+	if (ret) {
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		if (sample_fup_insn)
+			intel_pt_sample_fup_insn(decoder);
+	} else {
+		decoder->state.type = type;
+	}
+	return ret;
+}
+
+static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
+					  struct intel_pt_insn *intel_pt_insn,
+					  uint64_t ip, int err)
+{
+	return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
+	       intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
+	       ip == decoder->ip + intel_pt_insn->length;
+}
+
+static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_insn intel_pt_insn;
+	uint64_t ip;
+	int err;
+
+	ip = decoder->last_ip;
+
+	while (1) {
+		err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
+		if (err == INTEL_PT_RETURN)
+			return 0;
+		if (err == -EAGAIN ||
+		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+			bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			if (intel_pt_fup_event(decoder, no_tip) && no_tip)
+				return 0;
+			return -EAGAIN;
+		}
+		decoder->set_fup_tx_flags = false;
+		if (err)
+			return err;
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+			intel_pt_log_at("ERROR: Unexpected indirect branch",
+					decoder->ip);
+			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+			return -ENOENT;
+		}
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+			intel_pt_log_at("ERROR: Unexpected conditional branch",
+					decoder->ip);
+			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+			return -ENOENT;
+		}
+
+		intel_pt_bug(decoder);
+	}
+}
+
+static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_insn intel_pt_insn;
+	int err;
+
+	err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+	if (err == INTEL_PT_RETURN &&
+	    decoder->pgd_ip &&
+	    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+	    (decoder->state.type & INTEL_PT_BRANCH) &&
+	    decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+		/* Unconditional branch leaving filter region */
+		decoder->no_progress = 0;
+		decoder->pge = false;
+		decoder->continuous_period = false;
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->state.type |= INTEL_PT_TRACE_END;
+		intel_pt_update_nr(decoder);
+		return 0;
+	}
+	if (err == INTEL_PT_RETURN)
+		return 0;
+	if (err)
+		return err;
+
+	intel_pt_update_nr(decoder);
+	intel_pt_sample_iflag_chg(decoder);
+
+	if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+		if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->state.from_ip = decoder->ip;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				decoder->state.to_ip = decoder->last_ip;
+				decoder->ip = decoder->last_ip;
+			}
+			decoder->state.type |= INTEL_PT_TRACE_END;
+		} else {
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->state.from_ip = decoder->ip;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				decoder->state.to_ip = decoder->last_ip;
+				decoder->ip = decoder->last_ip;
+			}
+		}
+		return 0;
+	}
+
+	if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+		uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+				 intel_pt_insn.rel;
+
+		if (decoder->pgd_ip &&
+		    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+		    decoder->pgd_ip(to_ip, decoder->data)) {
+			/* Conditional branch leaving filter region */
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->ip = to_ip;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = to_ip;
+			decoder->state.type |= INTEL_PT_TRACE_END;
+			return 0;
+		}
+		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
+				decoder->ip);
+		decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+		return -ENOENT;
+	}
+
+	return intel_pt_bug(decoder);
+}
+
+struct eptw_data {
+	int bit_countdown;
+	uint64_t payload;
+};
+
+static int intel_pt_eptw_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct eptw_data *data = pkt_info->data;
+	int nr_bits;
+
+	switch (pkt_info->packet.type) {
+	case INTEL_PT_PAD:
+	case INTEL_PT_MNT:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_MTC:
+	case INTEL_PT_FUP:
+	case INTEL_PT_CYC:
+	case INTEL_PT_CBR:
+	case INTEL_PT_TSC:
+	case INTEL_PT_TMA:
+	case INTEL_PT_PIP:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_PSB:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_BBP:
+	case INTEL_PT_BIP:
+	case INTEL_PT_BEP:
+	case INTEL_PT_BEP_IP:
+	case INTEL_PT_CFE:
+	case INTEL_PT_CFE_IP:
+	case INTEL_PT_EVD:
+		break;
+
+	case INTEL_PT_TNT:
+		nr_bits = data->bit_countdown;
+		if (nr_bits > pkt_info->packet.count)
+			nr_bits = pkt_info->packet.count;
+		data->payload <<= nr_bits;
+		data->payload |= pkt_info->packet.payload >> (64 - nr_bits);
+		data->bit_countdown -= nr_bits;
+		return !data->bit_countdown;
+
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TIP:
+	case INTEL_PT_BAD:
+	case INTEL_PT_OVF:
+	case INTEL_PT_TRACESTOP:
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+static int intel_pt_emulated_ptwrite(struct intel_pt_decoder *decoder)
+{
+	int n = 64 - decoder->tnt.count;
+	struct eptw_data data = {
+		.bit_countdown = n,
+		.payload = decoder->tnt.payload >> n,
+	};
+
+	decoder->emulated_ptwrite = false;
+	intel_pt_log("Emulated ptwrite detected\n");
+
+	intel_pt_pkt_lookahead(decoder, intel_pt_eptw_lookahead_cb, &data);
+	if (data.bit_countdown)
+		return -ECONNRESET;
+
+	decoder->state.type = INTEL_PT_PTW;
+	decoder->state.from_ip = decoder->ip;
+	decoder->state.to_ip = 0;
+	decoder->state.ptw_payload = data.payload;
+	return 0;
+}
+
+static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_insn intel_pt_insn;
+	int err;
+
+	while (1) {
+		if (decoder->emulated_ptwrite)
+			return intel_pt_emulated_ptwrite(decoder);
+		err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+		if (err == INTEL_PT_RETURN) {
+			decoder->emulated_ptwrite = intel_pt_insn.emulated_ptwrite;
+			return 0;
+		}
+		if (err) {
+			decoder->emulated_ptwrite = false;
+			return err;
+		}
+
+		if (intel_pt_insn.op == INTEL_PT_OP_RET) {
+			if (!decoder->return_compression) {
+				intel_pt_log_at("ERROR: RET when expecting conditional branch",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				return -ENOENT;
+			}
+			if (!decoder->ret_addr) {
+				intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				return -ENOENT;
+			}
+			if (!(decoder->tnt.payload & BIT63)) {
+				intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				return -ENOENT;
+			}
+			decoder->tnt.count -= 1;
+			if (decoder->tnt.count)
+				decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+			else
+				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->tnt.payload <<= 1;
+			decoder->state.from_ip = decoder->ip;
+			decoder->ip = decoder->ret_addr;
+			decoder->state.to_ip = decoder->ip;
+			return 0;
+		}
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+			/* Handle deferred TIPs */
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type != INTEL_PT_TIP ||
+			    decoder->packet.count == 0) {
+				intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				decoder->pkt_step = 0;
+				return -ENOENT;
+			}
+			intel_pt_set_last_ip(decoder);
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = decoder->last_ip;
+			decoder->ip = decoder->last_ip;
+			intel_pt_update_nr(decoder);
+			intel_pt_sample_iflag_chg(decoder);
+			return 0;
+		}
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+			decoder->tnt.count -= 1;
+			if (decoder->tnt.count)
+				decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+			else
+				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			if (decoder->tnt.payload & BIT63) {
+				decoder->tnt.payload <<= 1;
+				decoder->state.from_ip = decoder->ip;
+				decoder->ip += intel_pt_insn.length +
+					       intel_pt_insn.rel;
+				decoder->state.to_ip = decoder->ip;
+				return 0;
+			}
+			/* Instruction sample for a non-taken branch */
+			if (decoder->state.type & INTEL_PT_INSTRUCTION) {
+				decoder->tnt.payload <<= 1;
+				decoder->state.type = INTEL_PT_INSTRUCTION;
+				decoder->state.from_ip = decoder->ip;
+				decoder->state.to_ip = 0;
+				decoder->ip += intel_pt_insn.length;
+				return 0;
+			}
+			decoder->sample_cyc = false;
+			decoder->ip += intel_pt_insn.length;
+			if (!decoder->tnt.count) {
+				intel_pt_update_sample_time(decoder);
+				return -EAGAIN;
+			}
+			decoder->tnt.payload <<= 1;
+			continue;
+		}
+
+		return intel_pt_bug(decoder);
+	}
+}
+
+static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
+{
+	unsigned int fup_tx_flags;
+	int err;
+
+	fup_tx_flags = decoder->packet.payload &
+		       (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
+	err = intel_pt_get_next_packet(decoder);
+	if (err)
+		return err;
+	if (decoder->packet.type == INTEL_PT_FUP) {
+		decoder->fup_tx_flags = fup_tx_flags;
+		decoder->set_fup_tx_flags = true;
+		if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
+			*no_tip = true;
+	} else {
+		intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
+				decoder->pos);
+		intel_pt_update_in_tx(decoder);
+	}
+	return 0;
+}
+
+static int intel_pt_evd(struct intel_pt_decoder *decoder)
+{
+	if (decoder->evd_cnt >= INTEL_PT_MAX_EVDS) {
+		intel_pt_log_at("ERROR: Too many EVD packets", decoder->pos);
+		return -ENOSYS;
+	}
+	decoder->evd[decoder->evd_cnt++] = (struct intel_pt_evd){
+		.type = decoder->packet.count,
+		.payload = decoder->packet.payload,
+	};
+	return 0;
+}
+
+static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
+{
+	timestamp |= (ref_timestamp & (0xffULL << 56));
+
+	if (timestamp < ref_timestamp) {
+		if (ref_timestamp - timestamp > (1ULL << 55))
+			timestamp += (1ULL << 56);
+	} else {
+		if (timestamp - ref_timestamp > (1ULL << 55))
+			timestamp -= (1ULL << 56);
+	}
+
+	return timestamp;
+}
+
+/* For use only when decoder->vm_time_correlation is true */
+static bool intel_pt_time_in_range(struct intel_pt_decoder *decoder,
+				   uint64_t timestamp)
+{
+	uint64_t max_timestamp = decoder->buf_timestamp;
+
+	if (!max_timestamp) {
+		max_timestamp = decoder->last_reliable_timestamp +
+				0x400000000ULL;
+	}
+	return timestamp >= decoder->last_reliable_timestamp &&
+	       timestamp < decoder->buf_timestamp;
+}
+
+static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp;
+	bool bad = false;
+
+	decoder->have_tma = false;
+
+	if (decoder->ref_timestamp) {
+		timestamp = intel_pt_8b_tsc(decoder->packet.payload,
+					    decoder->ref_timestamp);
+		decoder->tsc_timestamp = timestamp;
+		decoder->timestamp = timestamp;
+		decoder->ref_timestamp = 0;
+		decoder->timestamp_insn_cnt = 0;
+	} else if (decoder->timestamp) {
+		timestamp = decoder->packet.payload |
+			    (decoder->timestamp & (0xffULL << 56));
+		decoder->tsc_timestamp = timestamp;
+		if (timestamp < decoder->timestamp &&
+		    decoder->timestamp - timestamp < decoder->tsc_slip) {
+			intel_pt_log_to("Suppressing backwards timestamp",
+					timestamp);
+			timestamp = decoder->timestamp;
+		}
+		if (timestamp < decoder->timestamp) {
+			if (!decoder->buf_timestamp ||
+			    (timestamp + (1ULL << 56) < decoder->buf_timestamp)) {
+				intel_pt_log_to("Wraparound timestamp", timestamp);
+				timestamp += (1ULL << 56);
+				decoder->tsc_timestamp = timestamp;
+			} else {
+				intel_pt_log_to("Suppressing bad timestamp", timestamp);
+				timestamp = decoder->timestamp;
+				bad = true;
+			}
+		}
+		if (decoder->vm_time_correlation &&
+		    (bad || !intel_pt_time_in_range(decoder, timestamp)) &&
+		    intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE))
+			p_log("Timestamp out of range");
+		decoder->timestamp = timestamp;
+		decoder->timestamp_insn_cnt = 0;
+	}
+
+	if (decoder->last_packet_type == INTEL_PT_CYC) {
+		decoder->cyc_ref_timestamp = decoder->timestamp;
+		decoder->cycle_cnt = 0;
+		decoder->have_calc_cyc_to_tsc = false;
+		intel_pt_calc_cyc_to_tsc(decoder, false);
+	}
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static int intel_pt_overflow(struct intel_pt_decoder *decoder)
+{
+	intel_pt_log("ERROR: Buffer overflow\n");
+	intel_pt_clear_tx_flags(decoder);
+	intel_pt_set_nr(decoder);
+	decoder->timestamp_insn_cnt = 0;
+	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	decoder->state.from_ip = decoder->ip;
+	decoder->ip = 0;
+	decoder->pge = false;
+	intel_pt_clear_fup_event(decoder);
+	decoder->overflow = true;
+	return -EOVERFLOW;
+}
+
+static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder)
+{
+	if (decoder->have_cyc)
+		return;
+
+	decoder->cyc_cnt_timestamp = decoder->timestamp;
+	decoder->base_cyc_cnt = decoder->tot_cyc_cnt;
+}
+
+static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder)
+{
+	decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp;
+
+	if (decoder->pge)
+		intel_pt_mtc_cyc_cnt_pge(decoder);
+}
+
+static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder)
+{
+	uint64_t tot_cyc_cnt, tsc_delta;
+
+	if (decoder->have_cyc)
+		return;
+
+	decoder->sample_cyc = true;
+
+	if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp)
+		return;
+
+	tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp;
+	tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt;
+
+	if (tot_cyc_cnt > decoder->tot_cyc_cnt)
+		decoder->tot_cyc_cnt = tot_cyc_cnt;
+}
+
+static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
+{
+	uint32_t ctc = decoder->packet.payload;
+	uint32_t fc = decoder->packet.count;
+	uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
+
+	if (!decoder->tsc_ctc_ratio_d)
+		return;
+
+	if (decoder->pge && !decoder->in_psb)
+		intel_pt_mtc_cyc_cnt_pge(decoder);
+	else
+		intel_pt_mtc_cyc_cnt_upd(decoder);
+
+	decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+	decoder->last_ctc = ctc - ctc_rem;
+	decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
+	if (decoder->tsc_ctc_mult) {
+		decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+	} else {
+		decoder->ctc_timestamp -= multdiv(ctc_rem,
+						  decoder->tsc_ctc_ratio_n,
+						  decoder->tsc_ctc_ratio_d);
+	}
+	decoder->ctc_delta = 0;
+	decoder->have_tma = true;
+	decoder->fixup_last_mtc = true;
+	intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x  CTC rem %#x\n",
+		     decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
+}
+
+static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp;
+	uint32_t mtc, mtc_delta;
+
+	if (!decoder->have_tma)
+		return;
+
+	mtc = decoder->packet.payload;
+
+	if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
+		decoder->fixup_last_mtc = false;
+		intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+					&decoder->last_mtc);
+	}
+
+	if (mtc > decoder->last_mtc)
+		mtc_delta = mtc - decoder->last_mtc;
+	else
+		mtc_delta = mtc + 256 - decoder->last_mtc;
+
+	decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
+
+	if (decoder->tsc_ctc_mult) {
+		timestamp = decoder->ctc_timestamp +
+			    decoder->ctc_delta * decoder->tsc_ctc_mult;
+	} else {
+		timestamp = decoder->ctc_timestamp +
+			    multdiv(decoder->ctc_delta,
+				    decoder->tsc_ctc_ratio_n,
+				    decoder->tsc_ctc_ratio_d);
+	}
+
+	if (timestamp < decoder->timestamp)
+		intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+			     timestamp, decoder->timestamp);
+	else
+		decoder->timestamp = timestamp;
+
+	intel_pt_mtc_cyc_cnt_upd(decoder);
+
+	decoder->timestamp_insn_cnt = 0;
+	decoder->last_mtc = mtc;
+
+	if (decoder->last_packet_type == INTEL_PT_CYC) {
+		decoder->cyc_ref_timestamp = decoder->timestamp;
+		decoder->cycle_cnt = 0;
+		decoder->have_calc_cyc_to_tsc = false;
+		intel_pt_calc_cyc_to_tsc(decoder, true);
+	}
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+	unsigned int cbr = decoder->packet.payload & 0xff;
+
+	decoder->cbr_payload = decoder->packet.payload;
+
+	if (decoder->cbr == cbr)
+		return;
+
+	decoder->cbr = cbr;
+	decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+	decoder->cyc_ref_timestamp = decoder->timestamp;
+	decoder->cycle_cnt = 0;
+
+	intel_pt_mtc_cyc_cnt_cbr(decoder);
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+	decoder->have_cyc = true;
+
+	decoder->cycle_cnt += decoder->packet.payload;
+	if (decoder->pge)
+		decoder->tot_cyc_cnt += decoder->packet.payload;
+	decoder->sample_cyc = true;
+
+	if (!decoder->cyc_ref_timestamp)
+		return;
+
+	if (decoder->have_calc_cyc_to_tsc)
+		timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+	else if (decoder->cbr)
+		timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+	else
+		return;
+
+	if (timestamp < decoder->timestamp)
+		intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+			     timestamp, decoder->timestamp);
+	else
+		decoder->timestamp = timestamp;
+
+	decoder->timestamp_insn_cnt = 0;
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static void intel_pt_bbp(struct intel_pt_decoder *decoder)
+{
+	if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
+		memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
+		decoder->state.items.is_32_bit = false;
+	}
+	decoder->blk_type = decoder->packet.payload;
+	decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
+	if (decoder->blk_type == INTEL_PT_GP_REGS)
+		decoder->state.items.is_32_bit = decoder->packet.count;
+	if (decoder->blk_type_pos < 0) {
+		intel_pt_log("WARNING: Unknown block type %u\n",
+			     decoder->blk_type);
+	} else if (decoder->state.items.mask[decoder->blk_type_pos]) {
+		intel_pt_log("WARNING: Duplicate block type %u\n",
+			     decoder->blk_type);
+	}
+}
+
+static void intel_pt_bip(struct intel_pt_decoder *decoder)
+{
+	uint32_t id = decoder->packet.count;
+	uint32_t bit = 1 << id;
+	int pos = decoder->blk_type_pos;
+
+	if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
+		intel_pt_log("WARNING: Unknown block item %u type %d\n",
+			     id, decoder->blk_type);
+		return;
+	}
+
+	if (decoder->state.items.mask[pos] & bit) {
+		intel_pt_log("WARNING: Duplicate block item %u type %d\n",
+			     id, decoder->blk_type);
+	}
+
+	decoder->state.items.mask[pos] |= bit;
+	decoder->state.items.val[pos][id] = decoder->packet.payload;
+}
+
+/* Walk PSB+ packets when already in sync. */
+static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	decoder->in_psb = true;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			goto out;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_PSBEND:
+			err = 0;
+			goto out;
+
+		case INTEL_PT_TIP_PGD:
+		case INTEL_PT_TIP_PGE:
+		case INTEL_PT_TIP:
+		case INTEL_PT_TNT:
+		case INTEL_PT_TRACESTOP:
+		case INTEL_PT_BAD:
+		case INTEL_PT_PSB:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+		case INTEL_PT_BBP:
+		case INTEL_PT_BIP:
+		case INTEL_PT_BEP:
+		case INTEL_PT_BEP_IP:
+		case INTEL_PT_CFE:
+		case INTEL_PT_CFE_IP:
+		case INTEL_PT_EVD:
+			decoder->have_tma = false;
+			intel_pt_log("ERROR: Unexpected packet\n");
+			err = -EAGAIN;
+			goto out;
+
+		case INTEL_PT_OVF:
+			err = intel_pt_overflow(decoder);
+			goto out;
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			intel_pt_mode_exec_status(decoder);
+			break;
+
+		case INTEL_PT_PIP:
+			intel_pt_set_pip(decoder);
+			break;
+
+		case INTEL_PT_FUP:
+			decoder->pge = true;
+			if (decoder->packet.count) {
+				intel_pt_set_last_ip(decoder);
+				decoder->psb_ip = decoder->last_ip;
+			}
+			break;
+
+		case INTEL_PT_MODE_TSX:
+			intel_pt_update_in_tx(decoder);
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+				decoder->state.type |= INTEL_PT_INSTRUCTION;
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+		default:
+			break;
+		}
+	}
+out:
+	decoder->in_psb = false;
+
+	return err;
+}
+
+static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
+		decoder->tx_flags = 0;
+		decoder->state.flags &= ~INTEL_PT_IN_TX;
+		decoder->state.flags |= INTEL_PT_ABORT_TX;
+	} else {
+		decoder->state.flags |= INTEL_PT_ASYNC;
+	}
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TNT:
+		case INTEL_PT_FUP:
+		case INTEL_PT_TRACESTOP:
+		case INTEL_PT_PSB:
+		case INTEL_PT_TSC:
+		case INTEL_PT_TMA:
+		case INTEL_PT_MODE_TSX:
+		case INTEL_PT_BAD:
+		case INTEL_PT_PSBEND:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+		case INTEL_PT_BBP:
+		case INTEL_PT_BIP:
+		case INTEL_PT_BEP:
+		case INTEL_PT_BEP_IP:
+		case INTEL_PT_CFE:
+		case INTEL_PT_CFE_IP:
+		case INTEL_PT_EVD:
+			intel_pt_log("ERROR: Missing TIP after FUP\n");
+			decoder->pkt_state = INTEL_PT_STATE_ERR3;
+			decoder->pkt_step = 0;
+			return -ENOENT;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_TIP_PGD:
+			decoder->state.from_ip = decoder->ip;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				intel_pt_set_ip(decoder);
+				decoder->state.to_ip = decoder->ip;
+			}
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			decoder->state.type |= INTEL_PT_TRACE_END;
+			intel_pt_update_nr(decoder);
+			return 0;
+
+		case INTEL_PT_TIP_PGE:
+			decoder->pge = true;
+			intel_pt_log("Omitting PGE ip " x64_fmt "\n",
+				     decoder->ip);
+			decoder->state.from_ip = 0;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				intel_pt_set_ip(decoder);
+				decoder->state.to_ip = decoder->ip;
+			}
+			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+			intel_pt_mtc_cyc_cnt_pge(decoder);
+			intel_pt_set_nr(decoder);
+			return 0;
+
+		case INTEL_PT_TIP:
+			decoder->state.from_ip = decoder->ip;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				intel_pt_set_ip(decoder);
+				decoder->state.to_ip = decoder->ip;
+			}
+			intel_pt_update_nr(decoder);
+			intel_pt_sample_iflag_chg(decoder);
+			return 0;
+
+		case INTEL_PT_PIP:
+			intel_pt_update_pip(decoder);
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+				decoder->state.type |= INTEL_PT_INSTRUCTION;
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			intel_pt_mode_exec(decoder);
+			break;
+
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+			break;
+
+		default:
+			return intel_pt_bug(decoder);
+		}
+	}
+}
+
+static int intel_pt_resample(struct intel_pt_decoder *decoder)
+{
+	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	decoder->state.type = INTEL_PT_INSTRUCTION;
+	decoder->state.from_ip = decoder->ip;
+	decoder->state.to_ip = 0;
+	return 0;
+}
+
+struct intel_pt_vm_tsc_info {
+	struct intel_pt_pkt pip_packet;
+	struct intel_pt_pkt vmcs_packet;
+	struct intel_pt_pkt tma_packet;
+	bool tsc, pip, vmcs, tma, psbend;
+	uint64_t ctc_delta;
+	uint64_t last_ctc;
+	int max_lookahead;
+};
+
+/* Lookahead and get the PIP, VMCS and TMA packets from PSB+ */
+static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct intel_pt_vm_tsc_info *data = pkt_info->data;
+
+	switch (pkt_info->packet.type) {
+	case INTEL_PT_PAD:
+	case INTEL_PT_MNT:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_MTC:
+	case INTEL_PT_FUP:
+	case INTEL_PT_CYC:
+	case INTEL_PT_CBR:
+		break;
+
+	case INTEL_PT_TSC:
+		data->tsc = true;
+		break;
+
+	case INTEL_PT_TMA:
+		data->tma_packet = pkt_info->packet;
+		data->tma = true;
+		break;
+
+	case INTEL_PT_PIP:
+		data->pip_packet = pkt_info->packet;
+		data->pip = true;
+		break;
+
+	case INTEL_PT_VMCS:
+		data->vmcs_packet = pkt_info->packet;
+		data->vmcs = true;
+		break;
+
+	case INTEL_PT_PSBEND:
+		data->psbend = true;
+		return 1;
+
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_BBP:
+	case INTEL_PT_BIP:
+	case INTEL_PT_BEP:
+	case INTEL_PT_BEP_IP:
+	case INTEL_PT_OVF:
+	case INTEL_PT_BAD:
+	case INTEL_PT_TNT:
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TIP:
+	case INTEL_PT_PSB:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_CFE:
+	case INTEL_PT_CFE_IP:
+	case INTEL_PT_EVD:
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+struct intel_pt_ovf_fup_info {
+	int max_lookahead;
+	bool found;
+};
+
+/* Lookahead to detect a FUP packet after OVF */
+static int intel_pt_ovf_fup_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct intel_pt_ovf_fup_info *data = pkt_info->data;
+
+	if (pkt_info->packet.type == INTEL_PT_CYC ||
+	    pkt_info->packet.type == INTEL_PT_MTC ||
+	    pkt_info->packet.type == INTEL_PT_TSC)
+		return !--(data->max_lookahead);
+	data->found = pkt_info->packet.type == INTEL_PT_FUP;
+	return 1;
+}
+
+static bool intel_pt_ovf_fup_lookahead(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_ovf_fup_info data = {
+		.max_lookahead = 16,
+		.found = false,
+	};
+
+	intel_pt_pkt_lookahead(decoder, intel_pt_ovf_fup_lookahead_cb, &data);
+	return data.found;
+}
+
+/* Lookahead and get the TMA packet after TSC */
+static int intel_pt_tma_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct intel_pt_vm_tsc_info *data = pkt_info->data;
+
+	if (pkt_info->packet.type == INTEL_PT_CYC ||
+	    pkt_info->packet.type == INTEL_PT_MTC)
+		return !--(data->max_lookahead);
+
+	if (pkt_info->packet.type == INTEL_PT_TMA) {
+		data->tma_packet = pkt_info->packet;
+		data->tma = true;
+	}
+	return 1;
+}
+
+static uint64_t intel_pt_ctc_to_tsc(struct intel_pt_decoder *decoder, uint64_t ctc)
+{
+	if (decoder->tsc_ctc_mult)
+		return ctc * decoder->tsc_ctc_mult;
+	else
+		return multdiv(ctc, decoder->tsc_ctc_ratio_n, decoder->tsc_ctc_ratio_d);
+}
+
+static uint64_t intel_pt_calc_expected_tsc(struct intel_pt_decoder *decoder,
+					   uint32_t ctc,
+					   uint32_t fc,
+					   uint64_t last_ctc_timestamp,
+					   uint64_t ctc_delta,
+					   uint32_t last_ctc)
+{
+	/* Number of CTC ticks from last_ctc_timestamp to last_mtc */
+	uint64_t last_mtc_ctc = last_ctc + ctc_delta;
+	/*
+	 * Number of CTC ticks from there until current TMA packet. We would
+	 * expect last_mtc_ctc to be before ctc, but the TSC packet can slip
+	 * past an MTC, so a sign-extended value is used.
+	 */
+	uint64_t delta = (int16_t)((uint16_t)ctc - (uint16_t)last_mtc_ctc);
+	/* Total CTC ticks from last_ctc_timestamp to current TMA packet */
+	uint64_t new_ctc_delta = ctc_delta + delta;
+	uint64_t expected_tsc;
+
+	/*
+	 * Convert CTC ticks to TSC ticks, add the starting point
+	 * (last_ctc_timestamp) and the fast counter from the TMA packet.
+	 */
+	expected_tsc = last_ctc_timestamp + intel_pt_ctc_to_tsc(decoder, new_ctc_delta) + fc;
+
+	if (intel_pt_enable_logging) {
+		intel_pt_log_x64(last_mtc_ctc);
+		intel_pt_log_x32(last_ctc);
+		intel_pt_log_x64(ctc_delta);
+		intel_pt_log_x64(delta);
+		intel_pt_log_x32(ctc);
+		intel_pt_log_x64(new_ctc_delta);
+		intel_pt_log_x64(last_ctc_timestamp);
+		intel_pt_log_x32(fc);
+		intel_pt_log_x64(intel_pt_ctc_to_tsc(decoder, new_ctc_delta));
+		intel_pt_log_x64(expected_tsc);
+	}
+
+	return expected_tsc;
+}
+
+static uint64_t intel_pt_expected_tsc(struct intel_pt_decoder *decoder,
+				      struct intel_pt_vm_tsc_info *data)
+{
+	uint32_t ctc = data->tma_packet.payload;
+	uint32_t fc = data->tma_packet.count;
+
+	return intel_pt_calc_expected_tsc(decoder, ctc, fc,
+					  decoder->ctc_timestamp,
+					  data->ctc_delta, data->last_ctc);
+}
+
+static void intel_pt_translate_vm_tsc(struct intel_pt_decoder *decoder,
+				      struct intel_pt_vmcs_info *vmcs_info)
+{
+	uint64_t payload = decoder->packet.payload;
+
+	/* VMX adds the TSC Offset, so subtract to get host TSC */
+	decoder->packet.payload -= vmcs_info->tsc_offset;
+	/* TSC packet has only 7 bytes */
+	decoder->packet.payload &= SEVEN_BYTES;
+
+	/*
+	 * The buffer is mmapped from the data file, so this also updates the
+	 * data file.
+	 */
+	if (!decoder->vm_tm_corr_dry_run)
+		memcpy((void *)decoder->buf + 1, &decoder->packet.payload, 7);
+
+	intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64
+		     "    VMCS %#" PRIx64 "    TSC Offset %#" PRIx64 "\n",
+		     payload, decoder->packet.payload, vmcs_info->vmcs,
+		     vmcs_info->tsc_offset);
+}
+
+static void intel_pt_translate_vm_tsc_offset(struct intel_pt_decoder *decoder,
+					     uint64_t tsc_offset)
+{
+	struct intel_pt_vmcs_info vmcs_info = {
+		.vmcs = NO_VMCS,
+		.tsc_offset = tsc_offset
+	};
+
+	intel_pt_translate_vm_tsc(decoder, &vmcs_info);
+}
+
+static inline bool in_vm(uint64_t pip_payload)
+{
+	return pip_payload & 1;
+}
+
+static inline bool pip_in_vm(struct intel_pt_pkt *pip_packet)
+{
+	return pip_packet->payload & 1;
+}
+
+static void intel_pt_print_vmcs_info(struct intel_pt_vmcs_info *vmcs_info)
+{
+	p_log("VMCS: %#" PRIx64 "  TSC Offset %#" PRIx64,
+	      vmcs_info->vmcs, vmcs_info->tsc_offset);
+}
+
+static void intel_pt_vm_tm_corr_psb(struct intel_pt_decoder *decoder,
+				    struct intel_pt_vm_tsc_info *data)
+{
+	memset(data, 0, sizeof(*data));
+	data->ctc_delta = decoder->ctc_delta;
+	data->last_ctc = decoder->last_ctc;
+	intel_pt_pkt_lookahead(decoder, intel_pt_vm_psb_lookahead_cb, data);
+	if (data->tsc && !data->psbend)
+		p_log("ERROR: PSB without PSBEND");
+	decoder->in_psb = data->psbend;
+}
+
+static void intel_pt_vm_tm_corr_first_tsc(struct intel_pt_decoder *decoder,
+					  struct intel_pt_vm_tsc_info *data,
+					  struct intel_pt_vmcs_info *vmcs_info,
+					  uint64_t host_tsc)
+{
+	if (!decoder->in_psb) {
+		/* Can't happen */
+		p_log("ERROR: First TSC is not in PSB+");
+	}
+
+	if (data->pip) {
+		if (pip_in_vm(&data->pip_packet)) { /* Guest */
+			if (vmcs_info && vmcs_info->tsc_offset) {
+				intel_pt_translate_vm_tsc(decoder, vmcs_info);
+				decoder->vm_tm_corr_reliable = true;
+			} else {
+				p_log("ERROR: First TSC, unknown TSC Offset");
+			}
+		} else { /* Host */
+			decoder->vm_tm_corr_reliable = true;
+		}
+	} else { /* Host or Guest */
+		decoder->vm_tm_corr_reliable = false;
+		if (intel_pt_time_in_range(decoder, host_tsc)) {
+			/* Assume Host */
+		} else {
+			/* Assume Guest */
+			if (vmcs_info && vmcs_info->tsc_offset)
+				intel_pt_translate_vm_tsc(decoder, vmcs_info);
+			else
+				p_log("ERROR: First TSC, no PIP, unknown TSC Offset");
+		}
+	}
+}
+
+static void intel_pt_vm_tm_corr_tsc(struct intel_pt_decoder *decoder,
+				    struct intel_pt_vm_tsc_info *data)
+{
+	struct intel_pt_vmcs_info *vmcs_info;
+	uint64_t tsc_offset = 0;
+	uint64_t vmcs;
+	bool reliable = true;
+	uint64_t expected_tsc;
+	uint64_t host_tsc;
+	uint64_t ref_timestamp;
+
+	bool assign = false;
+	bool assign_reliable = false;
+
+	/* Already have 'data' for the in_psb case */
+	if (!decoder->in_psb) {
+		memset(data, 0, sizeof(*data));
+		data->ctc_delta = decoder->ctc_delta;
+		data->last_ctc = decoder->last_ctc;
+		data->max_lookahead = 16;
+		intel_pt_pkt_lookahead(decoder, intel_pt_tma_lookahead_cb, data);
+		if (decoder->pge) {
+			data->pip = true;
+			data->pip_packet.payload = decoder->pip_payload;
+		}
+	}
+
+	/* Calculations depend on having TMA packets */
+	if (!data->tma) {
+		p_log("ERROR: TSC without TMA");
+		return;
+	}
+
+	vmcs = data->vmcs ? data->vmcs_packet.payload : decoder->vmcs;
+	if (vmcs == NO_VMCS)
+		vmcs = 0;
+
+	vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs);
+
+	ref_timestamp = decoder->timestamp ? decoder->timestamp : decoder->buf_timestamp;
+	host_tsc = intel_pt_8b_tsc(decoder->packet.payload, ref_timestamp);
+
+	if (!decoder->ctc_timestamp) {
+		intel_pt_vm_tm_corr_first_tsc(decoder, data, vmcs_info, host_tsc);
+		return;
+	}
+
+	expected_tsc = intel_pt_expected_tsc(decoder, data);
+
+	tsc_offset = host_tsc - expected_tsc;
+
+	/* Determine if TSC is from Host or Guest */
+	if (data->pip) {
+		if (pip_in_vm(&data->pip_packet)) { /* Guest */
+			if (!vmcs_info) {
+				/* PIP NR=1 without VMCS cannot happen */
+				p_log("ERROR: Missing VMCS");
+				intel_pt_translate_vm_tsc_offset(decoder, tsc_offset);
+				decoder->vm_tm_corr_reliable = false;
+				return;
+			}
+		} else { /* Host */
+			decoder->last_reliable_timestamp = host_tsc;
+			decoder->vm_tm_corr_reliable = true;
+			return;
+		}
+	} else { /* Host or Guest */
+		reliable = false; /* Host/Guest is a guess, so not reliable */
+		if (decoder->in_psb) {
+			if (!tsc_offset)
+				return; /* Zero TSC Offset, assume Host */
+			/*
+			 * TSC packet has only 7 bytes of TSC. We have no
+			 * information about the Guest's 8th byte, but it
+			 * doesn't matter because we only need 7 bytes.
+			 * Here, since the 8th byte is unreliable and
+			 * irrelevant, compare only 7 byes.
+			 */
+			if (vmcs_info &&
+			    (tsc_offset & SEVEN_BYTES) ==
+			    (vmcs_info->tsc_offset & SEVEN_BYTES)) {
+				/* Same TSC Offset as last VMCS, assume Guest */
+				goto guest;
+			}
+		}
+		/*
+		 * Check if the host_tsc is within the expected range.
+		 * Note, we could narrow the range more by looking ahead for
+		 * the next host TSC in the same buffer, but we don't bother to
+		 * do that because this is probably good enough.
+		 */
+		if (host_tsc >= expected_tsc && intel_pt_time_in_range(decoder, host_tsc)) {
+			/* Within expected range for Host TSC, assume Host */
+			decoder->vm_tm_corr_reliable = false;
+			return;
+		}
+	}
+
+guest: /* Assuming Guest */
+
+	/* Determine whether to assign TSC Offset */
+	if (vmcs_info && vmcs_info->vmcs) {
+		if (vmcs_info->tsc_offset && vmcs_info->reliable) {
+			assign = false;
+		} else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_reliable &&
+			   decoder->vm_tm_corr_continuous && decoder->vm_tm_corr_same_buf) {
+			/* Continuous tracing, TSC in a PSB is not a time loss */
+			assign = true;
+			assign_reliable = true;
+		} else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_same_buf) {
+			/*
+			 * Unlikely to be a time loss TSC in a PSB which is not
+			 * at the start of a buffer.
+			 */
+			assign = true;
+			assign_reliable = false;
+		}
+	}
+
+	/* Record VMCS TSC Offset */
+	if (assign && (vmcs_info->tsc_offset != tsc_offset ||
+		       vmcs_info->reliable != assign_reliable)) {
+		bool print = vmcs_info->tsc_offset != tsc_offset;
+
+		vmcs_info->tsc_offset = tsc_offset;
+		vmcs_info->reliable = assign_reliable;
+		if (print)
+			intel_pt_print_vmcs_info(vmcs_info);
+	}
+
+	/* Determine what TSC Offset to use */
+	if (vmcs_info && vmcs_info->tsc_offset) {
+		if (!vmcs_info->reliable)
+			reliable = false;
+		intel_pt_translate_vm_tsc(decoder, vmcs_info);
+	} else {
+		reliable = false;
+		if (vmcs_info) {
+			if (!vmcs_info->error_printed) {
+				p_log("ERROR: Unknown TSC Offset for VMCS %#" PRIx64,
+				      vmcs_info->vmcs);
+				vmcs_info->error_printed = true;
+			}
+		} else {
+			if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS))
+				p_log("ERROR: Unknown VMCS");
+		}
+		intel_pt_translate_vm_tsc_offset(decoder, tsc_offset);
+	}
+
+	decoder->vm_tm_corr_reliable = reliable;
+}
+
+static void intel_pt_vm_tm_corr_pebs_tsc(struct intel_pt_decoder *decoder)
+{
+	uint64_t host_tsc = decoder->packet.payload;
+	uint64_t guest_tsc = decoder->packet.payload;
+	struct intel_pt_vmcs_info *vmcs_info;
+	uint64_t vmcs;
+
+	vmcs = decoder->vmcs;
+	if (vmcs == NO_VMCS)
+		vmcs = 0;
+
+	vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs);
+
+	if (decoder->pge) {
+		if (in_vm(decoder->pip_payload)) { /* Guest */
+			if (!vmcs_info) {
+				/* PIP NR=1 without VMCS cannot happen */
+				p_log("ERROR: Missing VMCS");
+			}
+		} else { /* Host */
+			return;
+		}
+	} else { /* Host or Guest */
+		if (intel_pt_time_in_range(decoder, host_tsc)) {
+			/* Within expected range for Host TSC, assume Host */
+			return;
+		}
+	}
+
+	if (vmcs_info) {
+		/* Translate Guest TSC to Host TSC */
+		host_tsc = ((guest_tsc & SEVEN_BYTES) - vmcs_info->tsc_offset) & SEVEN_BYTES;
+		host_tsc = intel_pt_8b_tsc(host_tsc, decoder->timestamp);
+		intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64
+			     "    VMCS %#" PRIx64 "    TSC Offset %#" PRIx64 "\n",
+			     guest_tsc, host_tsc, vmcs_info->vmcs,
+			     vmcs_info->tsc_offset);
+		if (!intel_pt_time_in_range(decoder, host_tsc) &&
+		    intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE))
+			p_log("Timestamp out of range");
+	} else {
+		if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS))
+			p_log("ERROR: Unknown VMCS");
+		host_tsc = decoder->timestamp;
+	}
+
+	decoder->packet.payload = host_tsc;
+
+	if (!decoder->vm_tm_corr_dry_run)
+		memcpy((void *)decoder->buf + 1, &host_tsc, 8);
+}
+
+static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_vm_tsc_info data = { .psbend = false };
+	bool pge;
+	int err;
+
+	if (decoder->in_psb)
+		intel_pt_vm_tm_corr_psb(decoder, &data);
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err == -ENOLINK)
+			continue;
+		if (err)
+			break;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TIP_PGD:
+			decoder->pge = false;
+			decoder->vm_tm_corr_continuous = false;
+			break;
+
+		case INTEL_PT_TNT:
+		case INTEL_PT_TIP:
+		case INTEL_PT_TIP_PGE:
+			decoder->pge = true;
+			break;
+
+		case INTEL_PT_OVF:
+			decoder->in_psb = false;
+			pge = decoder->pge;
+			decoder->pge = intel_pt_ovf_fup_lookahead(decoder);
+			if (pge != decoder->pge)
+				intel_pt_log("Surprising PGE change in OVF!");
+			if (!decoder->pge)
+				decoder->vm_tm_corr_continuous = false;
+			break;
+
+		case INTEL_PT_FUP:
+			if (decoder->in_psb)
+				decoder->pge = true;
+			break;
+
+		case INTEL_PT_TRACESTOP:
+			decoder->pge = false;
+			decoder->vm_tm_corr_continuous = false;
+			decoder->have_tma = false;
+			break;
+
+		case INTEL_PT_PSB:
+			intel_pt_vm_tm_corr_psb(decoder, &data);
+			break;
+
+		case INTEL_PT_PIP:
+			decoder->pip_payload = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TSC:
+			intel_pt_vm_tm_corr_tsc(decoder, &data);
+			intel_pt_calc_tsc_timestamp(decoder);
+			decoder->vm_tm_corr_same_buf = true;
+			decoder->vm_tm_corr_continuous = decoder->pge;
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_PSBEND:
+			decoder->in_psb = false;
+			data.psbend = false;
+			break;
+
+		case INTEL_PT_VMCS:
+			if (decoder->packet.payload != NO_VMCS)
+				decoder->vmcs = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_BBP:
+			decoder->blk_type = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_BIP:
+			if (decoder->blk_type == INTEL_PT_PEBS_BASIC &&
+			    decoder->packet.count == 2)
+				intel_pt_vm_tm_corr_pebs_tsc(decoder);
+			break;
+
+		case INTEL_PT_BEP:
+		case INTEL_PT_BEP_IP:
+			decoder->blk_type = 0;
+			break;
+
+		case INTEL_PT_CFE:
+		case INTEL_PT_CFE_IP:
+		case INTEL_PT_EVD:
+		case INTEL_PT_MODE_EXEC:
+		case INTEL_PT_MODE_TSX:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_PWRX:
+		case INTEL_PT_BAD: /* Does not happen */
+		default:
+			break;
+		}
+	}
+
+	return err;
+}
+
+#define HOP_PROCESS	0
+#define HOP_IGNORE	1
+#define HOP_RETURN	2
+#define HOP_AGAIN	3
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
+
+/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
+static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
+{
+	*err = 0;
+
+	/* Leap from PSB to PSB, getting ip from FUP within PSB+ */
+	if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
+		*err = intel_pt_scan_for_psb(decoder);
+		if (*err)
+			return HOP_RETURN;
+	}
+
+	switch (decoder->packet.type) {
+	case INTEL_PT_TNT:
+		return HOP_IGNORE;
+
+	case INTEL_PT_TIP_PGD:
+		decoder->pge = false;
+		if (!decoder->packet.count) {
+			intel_pt_set_nr(decoder);
+			return HOP_IGNORE;
+		}
+		intel_pt_set_ip(decoder);
+		decoder->state.type |= INTEL_PT_TRACE_END;
+		decoder->state.from_ip = 0;
+		decoder->state.to_ip = decoder->ip;
+		intel_pt_update_nr(decoder);
+		return HOP_RETURN;
+
+	case INTEL_PT_TIP:
+		if (!decoder->packet.count) {
+			intel_pt_set_nr(decoder);
+			return HOP_IGNORE;
+		}
+		intel_pt_set_ip(decoder);
+		decoder->state.type = INTEL_PT_INSTRUCTION;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		intel_pt_update_nr(decoder);
+		intel_pt_sample_iflag_chg(decoder);
+		return HOP_RETURN;
+
+	case INTEL_PT_FUP:
+		if (!decoder->packet.count)
+			return HOP_IGNORE;
+		intel_pt_set_ip(decoder);
+		if (decoder->set_fup_mwait || decoder->set_fup_pwre)
+			*no_tip = true;
+		if (!decoder->branch_enable || !decoder->pge)
+			*no_tip = true;
+		if (*no_tip) {
+			decoder->state.type = INTEL_PT_INSTRUCTION;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			intel_pt_fup_event(decoder, *no_tip);
+			return HOP_RETURN;
+		}
+		intel_pt_fup_event(decoder, *no_tip);
+		decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH;
+		*err = intel_pt_walk_fup_tip(decoder);
+		if (!*err && decoder->state.to_ip)
+			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+		return HOP_RETURN;
+
+	case INTEL_PT_PSB:
+		decoder->state.psb_offset = decoder->pos;
+		decoder->psb_ip = 0;
+		decoder->last_ip = 0;
+		decoder->have_last_ip = true;
+		*err = intel_pt_walk_psbend(decoder);
+		if (*err == -EAGAIN)
+			return HOP_AGAIN;
+		if (*err)
+			return HOP_RETURN;
+		decoder->state.type = INTEL_PT_PSB_EVT;
+		if (decoder->psb_ip) {
+			decoder->state.type |= INTEL_PT_INSTRUCTION;
+			decoder->ip = decoder->psb_ip;
+		}
+		decoder->state.from_ip = decoder->psb_ip;
+		decoder->state.to_ip = 0;
+		return HOP_RETURN;
+
+	case INTEL_PT_BAD:
+	case INTEL_PT_PAD:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TSC:
+	case INTEL_PT_TMA:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_MTC:
+	case INTEL_PT_CYC:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_CBR:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_PIP:
+	case INTEL_PT_OVF:
+	case INTEL_PT_MNT:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_BBP:
+	case INTEL_PT_BIP:
+	case INTEL_PT_BEP:
+	case INTEL_PT_BEP_IP:
+	case INTEL_PT_CFE:
+	case INTEL_PT_CFE_IP:
+	case INTEL_PT_EVD:
+	default:
+		return HOP_PROCESS;
+	}
+}
+
+struct intel_pt_psb_info {
+	struct intel_pt_pkt fup_packet;
+	bool fup;
+	int after_psbend;
+};
+
+/* Lookahead and get the FUP packet from PSB+ */
+static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct intel_pt_psb_info *data = pkt_info->data;
+
+	switch (pkt_info->packet.type) {
+	case INTEL_PT_PAD:
+	case INTEL_PT_MNT:
+	case INTEL_PT_TSC:
+	case INTEL_PT_TMA:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_MTC:
+	case INTEL_PT_CYC:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_CBR:
+	case INTEL_PT_PIP:
+		if (data->after_psbend) {
+			data->after_psbend -= 1;
+			if (!data->after_psbend)
+				return 1;
+		}
+		break;
+
+	case INTEL_PT_FUP:
+		if (data->after_psbend)
+			return 1;
+		if (data->fup || pkt_info->packet.count == 0)
+			return 1;
+		data->fup_packet = pkt_info->packet;
+		data->fup = true;
+		break;
+
+	case INTEL_PT_PSBEND:
+		if (!data->fup)
+			return 1;
+		/* Keep going to check for a TIP.PGE */
+		data->after_psbend = 6;
+		break;
+
+	case INTEL_PT_TIP_PGE:
+		/* Ignore FUP in PSB+ if followed by TIP.PGE */
+		if (data->after_psbend)
+			data->fup = false;
+		return 1;
+
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_BBP:
+	case INTEL_PT_BIP:
+	case INTEL_PT_BEP:
+	case INTEL_PT_BEP_IP:
+	case INTEL_PT_CFE:
+	case INTEL_PT_CFE_IP:
+	case INTEL_PT_EVD:
+		if (data->after_psbend) {
+			data->after_psbend -= 1;
+			if (!data->after_psbend)
+				return 1;
+			break;
+		}
+		return 1;
+
+	case INTEL_PT_OVF:
+	case INTEL_PT_BAD:
+	case INTEL_PT_TNT:
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TIP:
+	case INTEL_PT_PSB:
+	case INTEL_PT_TRACESTOP:
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+static int intel_pt_psb(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	decoder->last_ip = 0;
+	decoder->psb_ip = 0;
+	decoder->have_last_ip = true;
+	intel_pt_clear_stack(&decoder->stack);
+	err = intel_pt_walk_psbend(decoder);
+	if (err)
+		return err;
+	decoder->state.type = INTEL_PT_PSB_EVT;
+	decoder->state.from_ip = decoder->psb_ip;
+	decoder->state.to_ip = 0;
+	return 0;
+}
+
+static int intel_pt_fup_in_psb(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	if (decoder->ip != decoder->last_ip) {
+		err = intel_pt_walk_fup(decoder);
+		if (!err || err != -EAGAIN)
+			return err;
+	}
+
+	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	err = intel_pt_psb(decoder);
+	if (err) {
+		decoder->pkt_state = INTEL_PT_STATE_ERR3;
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err)
+{
+	struct intel_pt_psb_info data = { .fup = false };
+
+	if (!decoder->branch_enable)
+		return false;
+
+	intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data);
+	if (!data.fup)
+		return false;
+
+	decoder->packet = data.fup_packet;
+	intel_pt_set_last_ip(decoder);
+	decoder->pkt_state = INTEL_PT_STATE_FUP_IN_PSB;
+
+	*err = intel_pt_fup_in_psb(decoder);
+
+	return true;
+}
+
+static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
+{
+	int last_packet_type = INTEL_PT_PAD;
+	bool no_tip = false;
+	int err;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+next:
+		err = 0;
+		if (decoder->cyc_threshold) {
+			if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC)
+				decoder->sample_cyc = false;
+			last_packet_type = decoder->packet.type;
+		}
+
+		if (decoder->hop) {
+			switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
+			case HOP_IGNORE:
+				continue;
+			case HOP_RETURN:
+				return err;
+			case HOP_AGAIN:
+				goto next;
+			default:
+				break;
+			}
+		}
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TNT:
+			if (!decoder->packet.count)
+				break;
+			decoder->tnt = decoder->packet;
+			decoder->pkt_state = INTEL_PT_STATE_TNT;
+			err = intel_pt_walk_tnt(decoder);
+			if (err == -EAGAIN)
+				break;
+			return err;
+
+		case INTEL_PT_TIP_PGD:
+			if (decoder->packet.count != 0)
+				intel_pt_set_last_ip(decoder);
+			decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
+			return intel_pt_walk_tip(decoder);
+
+		case INTEL_PT_TIP_PGE: {
+			decoder->pge = true;
+			decoder->overflow = false;
+			intel_pt_mtc_cyc_cnt_pge(decoder);
+			intel_pt_set_nr(decoder);
+			if (decoder->packet.count == 0) {
+				intel_pt_log_at("Skipping zero TIP.PGE",
+						decoder->pos);
+				break;
+			}
+			intel_pt_sample_iflag_chg(decoder);
+			intel_pt_set_ip(decoder);
+			decoder->state.from_ip = 0;
+			decoder->state.to_ip = decoder->ip;
+			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+			/*
+			 * In hop mode, resample to get the to_ip as an
+			 * "instruction" sample.
+			 */
+			if (decoder->hop)
+				decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+			return 0;
+		}
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_TIP:
+			if (decoder->packet.count != 0)
+				intel_pt_set_last_ip(decoder);
+			decoder->pkt_state = INTEL_PT_STATE_TIP;
+			return intel_pt_walk_tip(decoder);
+
+		case INTEL_PT_FUP:
+			if (decoder->packet.count == 0) {
+				intel_pt_log_at("Skipping zero FUP",
+						decoder->pos);
+				no_tip = false;
+				break;
+			}
+			intel_pt_set_last_ip(decoder);
+			if (!decoder->branch_enable || !decoder->pge) {
+				decoder->ip = decoder->last_ip;
+				if (intel_pt_fup_event(decoder, no_tip))
+					return 0;
+				no_tip = false;
+				break;
+			}
+			if (decoder->set_fup_mwait)
+				no_tip = true;
+			if (no_tip)
+				decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+			else
+				decoder->pkt_state = INTEL_PT_STATE_FUP;
+			err = intel_pt_walk_fup(decoder);
+			if (err != -EAGAIN)
+				return err;
+			if (no_tip) {
+				no_tip = false;
+				break;
+			}
+			return intel_pt_walk_fup_tip(decoder);
+
+		case INTEL_PT_TRACESTOP:
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			intel_pt_clear_tx_flags(decoder);
+			decoder->have_tma = false;
+			break;
+
+		case INTEL_PT_PSB:
+			decoder->state.psb_offset = decoder->pos;
+			decoder->psb_ip = 0;
+			if (intel_pt_psb_with_fup(decoder, &err))
+				return err;
+			err = intel_pt_psb(decoder);
+			if (err == -EAGAIN)
+				goto next;
+			return err;
+
+		case INTEL_PT_PIP:
+			intel_pt_update_pip(decoder);
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			if (decoder->period_type != INTEL_PT_PERIOD_MTC)
+				break;
+			/*
+			 * Ensure that there has been an instruction since the
+			 * last MTC.
+			 */
+			if (!decoder->mtc_insn)
+				break;
+			decoder->mtc_insn = false;
+			/* Ensure that there is a timestamp */
+			if (!decoder->timestamp)
+				break;
+			decoder->state.type = INTEL_PT_INSTRUCTION;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->mtc_insn = false;
+			return 0;
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			if (decoder->cbr != decoder->cbr_seen) {
+				decoder->state.type = 0;
+				return 0;
+			}
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			intel_pt_mode_exec(decoder);
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_mode_exec = true;
+				no_tip = true;
+			}
+			goto next;
+
+		case INTEL_PT_MODE_TSX:
+			/* MODE_TSX need not be followed by FUP */
+			if (!decoder->pge || decoder->in_psb) {
+				intel_pt_update_in_tx(decoder);
+				break;
+			}
+			err = intel_pt_mode_tsx(decoder, &no_tip);
+			if (err)
+				return err;
+			goto next;
+
+		case INTEL_PT_BAD: /* Does not happen */
+			return intel_pt_bug(decoder);
+
+		case INTEL_PT_PSBEND:
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+			break;
+
+		case INTEL_PT_PTWRITE_IP:
+			decoder->fup_ptw_payload = decoder->packet.payload;
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_ptw = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_PTWRITE:
+			decoder->state.type = INTEL_PT_PTW;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.ptw_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_MWAIT:
+			decoder->fup_mwait_payload = decoder->packet.payload;
+			decoder->set_fup_mwait = true;
+			break;
+
+		case INTEL_PT_PWRE:
+			if (decoder->set_fup_mwait) {
+				decoder->fup_pwre_payload =
+							decoder->packet.payload;
+				decoder->set_fup_pwre = true;
+				break;
+			}
+			decoder->state.type = INTEL_PT_PWR_ENTRY;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_EXSTOP_IP:
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_exstop = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_EXSTOP:
+			decoder->state.type = INTEL_PT_EX_STOP;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+
+		case INTEL_PT_PWRX:
+			decoder->state.type = INTEL_PT_PWR_EXIT;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_BBP:
+			intel_pt_bbp(decoder);
+			break;
+
+		case INTEL_PT_BIP:
+			intel_pt_bip(decoder);
+			break;
+
+		case INTEL_PT_BEP:
+			decoder->state.type = INTEL_PT_BLK_ITEMS;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+
+		case INTEL_PT_BEP_IP:
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_bep = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after BEP",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_CFE:
+			decoder->fup_cfe_pkt = decoder->packet;
+			decoder->set_fup_cfe = true;
+			if (!decoder->pge) {
+				intel_pt_fup_event(decoder, true);
+				return 0;
+			}
+			break;
+
+		case INTEL_PT_CFE_IP:
+			decoder->fup_cfe_pkt = decoder->packet;
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_cfe_ip = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after CFE",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_EVD:
+			err = intel_pt_evd(decoder);
+			if (err)
+				return err;
+			break;
+
+		default:
+			return intel_pt_bug(decoder);
+		}
+	}
+}
+
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+	return decoder->packet.count &&
+	       (decoder->have_last_ip || decoder->packet.count == 3 ||
+		decoder->packet.count == 6);
+}
+
+/* Walk PSB+ packets to get in sync. */
+static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	decoder->in_psb = true;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			goto out;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TIP_PGD:
+			decoder->continuous_period = false;
+			__fallthrough;
+		case INTEL_PT_TIP_PGE:
+		case INTEL_PT_TIP:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+		case INTEL_PT_BBP:
+		case INTEL_PT_BIP:
+		case INTEL_PT_BEP:
+		case INTEL_PT_BEP_IP:
+		case INTEL_PT_CFE:
+		case INTEL_PT_CFE_IP:
+		case INTEL_PT_EVD:
+			intel_pt_log("ERROR: Unexpected packet\n");
+			err = -ENOENT;
+			goto out;
+
+		case INTEL_PT_FUP:
+			decoder->pge = true;
+			if (intel_pt_have_ip(decoder)) {
+				uint64_t current_ip = decoder->ip;
+
+				intel_pt_set_ip(decoder);
+				decoder->psb_ip = decoder->ip;
+				if (current_ip)
+					intel_pt_log_to("Setting IP",
+							decoder->ip);
+			}
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_PIP:
+			intel_pt_set_pip(decoder);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			intel_pt_mode_exec_status(decoder);
+			break;
+
+		case INTEL_PT_MODE_TSX:
+			intel_pt_update_in_tx(decoder);
+			break;
+
+		case INTEL_PT_TRACESTOP:
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			intel_pt_clear_tx_flags(decoder);
+			__fallthrough;
+
+		case INTEL_PT_TNT:
+			decoder->have_tma = false;
+			intel_pt_log("ERROR: Unexpected packet\n");
+			if (decoder->ip)
+				decoder->pkt_state = INTEL_PT_STATE_ERR4;
+			else
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+			err = -ENOENT;
+			goto out;
+
+		case INTEL_PT_BAD: /* Does not happen */
+			err = intel_pt_bug(decoder);
+			goto out;
+
+		case INTEL_PT_OVF:
+			err = intel_pt_overflow(decoder);
+			goto out;
+
+		case INTEL_PT_PSBEND:
+			err = 0;
+			goto out;
+
+		case INTEL_PT_PSB:
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+		default:
+			break;
+		}
+	}
+out:
+	decoder->in_psb = false;
+
+	return err;
+}
+
+static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TIP_PGD:
+			decoder->continuous_period = false;
+			decoder->pge = false;
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (!decoder->ip)
+				break;
+			decoder->state.type |= INTEL_PT_TRACE_END;
+			return 0;
+
+		case INTEL_PT_TIP_PGE:
+			decoder->pge = true;
+			intel_pt_mtc_cyc_cnt_pge(decoder);
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (!decoder->ip)
+				break;
+			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+			return 0;
+
+		case INTEL_PT_TIP:
+			decoder->pge = true;
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (!decoder->ip)
+				break;
+			return 0;
+
+		case INTEL_PT_FUP:
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (decoder->ip)
+				return 0;
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_PIP:
+			intel_pt_set_pip(decoder);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			intel_pt_mode_exec_status(decoder);
+			break;
+
+		case INTEL_PT_MODE_TSX:
+			intel_pt_update_in_tx(decoder);
+			break;
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_BAD: /* Does not happen */
+			return intel_pt_bug(decoder);
+
+		case INTEL_PT_TRACESTOP:
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			intel_pt_clear_tx_flags(decoder);
+			decoder->have_tma = false;
+			break;
+
+		case INTEL_PT_PSB:
+			decoder->state.psb_offset = decoder->pos;
+			decoder->psb_ip = 0;
+			decoder->last_ip = 0;
+			decoder->have_last_ip = true;
+			intel_pt_clear_stack(&decoder->stack);
+			err = intel_pt_walk_psb(decoder);
+			if (err)
+				return err;
+			decoder->state.type = INTEL_PT_PSB_EVT;
+			decoder->state.from_ip = decoder->psb_ip;
+			decoder->state.to_ip = 0;
+			return 0;
+
+		case INTEL_PT_TNT:
+		case INTEL_PT_PSBEND:
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+		case INTEL_PT_BBP:
+		case INTEL_PT_BIP:
+		case INTEL_PT_BEP:
+		case INTEL_PT_BEP_IP:
+		case INTEL_PT_CFE:
+		case INTEL_PT_CFE_IP:
+		case INTEL_PT_EVD:
+		default:
+			break;
+		}
+	}
+}
+
+static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	intel_pt_clear_fup_event(decoder);
+	decoder->overflow = false;
+
+	if (!decoder->branch_enable) {
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->state.type = 0; /* Do not have a sample */
+		return 0;
+	}
+
+	intel_pt_log("Scanning for full IP\n");
+	err = intel_pt_walk_to_ip(decoder);
+	if (err || ((decoder->state.type & INTEL_PT_PSB_EVT) && !decoder->ip))
+		return err;
+
+	/* In hop mode, resample to get the to_ip as an "instruction" sample */
+	if (decoder->hop)
+		decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+	else
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+
+	decoder->state.from_ip = 0;
+	decoder->state.to_ip = decoder->ip;
+	intel_pt_log_to("Setting IP", decoder->ip);
+
+	return 0;
+}
+
+static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
+{
+	const unsigned char *end = decoder->buf + decoder->len;
+	size_t i;
+
+	for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
+		if (i > decoder->len)
+			continue;
+		if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
+			return i;
+	}
+	return 0;
+}
+
+static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
+{
+	size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
+	const char *psb = INTEL_PT_PSB_STR;
+
+	if (rest_psb > decoder->len ||
+	    memcmp(decoder->buf, psb + part_psb, rest_psb))
+		return 0;
+
+	return rest_psb;
+}
+
+static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
+				  int part_psb)
+{
+	int rest_psb, ret;
+
+	decoder->pos += decoder->len;
+	decoder->len = 0;
+
+	ret = intel_pt_get_next_data(decoder, false);
+	if (ret)
+		return ret;
+
+	rest_psb = intel_pt_rest_psb(decoder, part_psb);
+	if (!rest_psb)
+		return 0;
+
+	decoder->pos -= part_psb;
+	decoder->next_buf = decoder->buf + rest_psb;
+	decoder->next_len = decoder->len - rest_psb;
+	memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+	decoder->buf = decoder->temp_buf;
+	decoder->len = INTEL_PT_PSB_LEN;
+
+	return 0;
+}
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
+{
+	unsigned char *next;
+	int ret;
+
+	intel_pt_log("Scanning for PSB\n");
+	while (1) {
+		if (!decoder->len) {
+			ret = intel_pt_get_next_data(decoder, false);
+			if (ret)
+				return ret;
+		}
+
+		next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
+			      INTEL_PT_PSB_LEN);
+		if (!next) {
+			int part_psb;
+
+			part_psb = intel_pt_part_psb(decoder);
+			if (part_psb) {
+				ret = intel_pt_get_split_psb(decoder, part_psb);
+				if (ret)
+					return ret;
+			} else {
+				decoder->pos += decoder->len;
+				decoder->len = 0;
+			}
+			continue;
+		}
+
+		decoder->pkt_step = next - decoder->buf;
+		return intel_pt_get_next_packet(decoder);
+	}
+}
+
+static int intel_pt_sync(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	decoder->pge = false;
+	decoder->continuous_period = false;
+	decoder->have_last_ip = false;
+	decoder->last_ip = 0;
+	decoder->psb_ip = 0;
+	decoder->ip = 0;
+	intel_pt_clear_stack(&decoder->stack);
+
+	err = intel_pt_scan_for_psb(decoder);
+	if (err)
+		return err;
+
+	if (decoder->vm_time_correlation) {
+		decoder->in_psb = true;
+		if (!decoder->timestamp)
+			decoder->timestamp = 1;
+		decoder->state.type = 0;
+		decoder->pkt_state = INTEL_PT_STATE_VM_TIME_CORRELATION;
+		return 0;
+	}
+
+	decoder->have_last_ip = true;
+	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+
+	err = intel_pt_walk_psb(decoder);
+	if (err)
+		return err;
+
+	decoder->state.type = INTEL_PT_PSB_EVT; /* Only PSB sample */
+	decoder->state.from_ip = decoder->psb_ip;
+	decoder->state.to_ip = 0;
+
+	if (decoder->ip) {
+		/*
+		 * In hop mode, resample to get the PSB FUP ip as an
+		 * "instruction" sample.
+		 */
+		if (decoder->hop)
+			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+		else
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	}
+
+	return 0;
+}
+
+static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t est = decoder->sample_insn_cnt << 1;
+
+	if (!decoder->cbr || !decoder->max_non_turbo_ratio)
+		goto out;
+
+	est *= decoder->max_non_turbo_ratio;
+	est /= decoder->cbr;
+out:
+	return decoder->sample_timestamp + est;
+}
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	do {
+		decoder->state.type = INTEL_PT_BRANCH;
+		decoder->state.flags = 0;
+
+		switch (decoder->pkt_state) {
+		case INTEL_PT_STATE_NO_PSB:
+			err = intel_pt_sync(decoder);
+			break;
+		case INTEL_PT_STATE_NO_IP:
+			decoder->have_last_ip = false;
+			decoder->last_ip = 0;
+			decoder->ip = 0;
+			__fallthrough;
+		case INTEL_PT_STATE_ERR_RESYNC:
+			err = intel_pt_sync_ip(decoder);
+			break;
+		case INTEL_PT_STATE_IN_SYNC:
+			err = intel_pt_walk_trace(decoder);
+			break;
+		case INTEL_PT_STATE_TNT:
+		case INTEL_PT_STATE_TNT_CONT:
+			err = intel_pt_walk_tnt(decoder);
+			if (err == -EAGAIN)
+				err = intel_pt_walk_trace(decoder);
+			break;
+		case INTEL_PT_STATE_TIP:
+		case INTEL_PT_STATE_TIP_PGD:
+			err = intel_pt_walk_tip(decoder);
+			break;
+		case INTEL_PT_STATE_FUP:
+			err = intel_pt_walk_fup(decoder);
+			if (err == -EAGAIN)
+				err = intel_pt_walk_fup_tip(decoder);
+			break;
+		case INTEL_PT_STATE_FUP_NO_TIP:
+			err = intel_pt_walk_fup(decoder);
+			if (err == -EAGAIN)
+				err = intel_pt_walk_trace(decoder);
+			break;
+		case INTEL_PT_STATE_FUP_IN_PSB:
+			err = intel_pt_fup_in_psb(decoder);
+			break;
+		case INTEL_PT_STATE_RESAMPLE:
+			err = intel_pt_resample(decoder);
+			break;
+		case INTEL_PT_STATE_VM_TIME_CORRELATION:
+			err = intel_pt_vm_time_correlation(decoder);
+			break;
+		default:
+			err = intel_pt_bug(decoder);
+			break;
+		}
+	} while (err == -ENOLINK);
+
+	if (err) {
+		decoder->state.err = intel_pt_ext_err(err);
+		if (err != -EOVERFLOW)
+			decoder->state.from_ip = decoder->ip;
+		intel_pt_update_sample_time(decoder);
+		decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+		intel_pt_set_nr(decoder);
+	} else {
+		decoder->state.err = 0;
+		if (decoder->cbr != decoder->cbr_seen) {
+			decoder->cbr_seen = decoder->cbr;
+			if (!decoder->state.type) {
+				decoder->state.from_ip = decoder->ip;
+				decoder->state.to_ip = 0;
+			}
+			decoder->state.type |= INTEL_PT_CBR_CHG;
+			decoder->state.cbr_payload = decoder->cbr_payload;
+			decoder->state.cbr = decoder->cbr;
+		}
+		if (intel_pt_sample_time(decoder->pkt_state)) {
+			intel_pt_update_sample_time(decoder);
+			if (decoder->sample_cyc) {
+				decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+				decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
+				decoder->sample_cyc = false;
+			}
+		}
+		/*
+		 * When using only TSC/MTC to compute cycles, IPC can be
+		 * sampled as soon as the cycle count changes.
+		 */
+		if (!decoder->have_cyc)
+			decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
+	}
+
+	 /* Let PSB event always have TSC timestamp */
+	if ((decoder->state.type & INTEL_PT_PSB_EVT) && decoder->tsc_timestamp)
+		decoder->sample_timestamp = decoder->tsc_timestamp;
+
+	decoder->state.from_nr = decoder->nr;
+	decoder->state.to_nr = decoder->next_nr;
+	decoder->nr = decoder->next_nr;
+
+	decoder->state.timestamp = decoder->sample_timestamp;
+	decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
+	decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+	decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
+
+	return &decoder->state;
+}
+
+/**
+ * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the next PSB packet if
+ * there is one, otherwise the buffer pointer is unchanged.  If @buf is updated,
+ * @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
+{
+	unsigned char *next;
+
+	next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+	if (next) {
+		*len -= next - *buf;
+		*buf = next;
+		return true;
+	}
+	return false;
+}
+
+/**
+ * intel_pt_step_psb - move buffer pointer to the start of the following PSB
+ *                     packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the following PSB packet
+ * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
+ * pointer is unchanged.  If @buf is updated, @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
+{
+	unsigned char *next;
+
+	if (!*len)
+		return false;
+
+	next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+	if (next) {
+		*len -= next - *buf;
+		*buf = next;
+		return true;
+	}
+	return false;
+}
+
+/**
+ * intel_pt_last_psb - find the last PSB packet in a buffer.
+ * @buf: buffer
+ * @len: size of buffer
+ *
+ * This function finds the last PSB in a buffer.
+ *
+ * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
+ */
+static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
+{
+	const char *n = INTEL_PT_PSB_STR;
+	unsigned char *p;
+	size_t k;
+
+	if (len < INTEL_PT_PSB_LEN)
+		return NULL;
+
+	k = len - INTEL_PT_PSB_LEN + 1;
+	while (1) {
+		p = memrchr(buf, n[0], k);
+		if (!p)
+			return NULL;
+		if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
+			return p;
+		k = p - buf;
+		if (!k)
+			return NULL;
+	}
+}
+
+/**
+ * intel_pt_next_tsc - find and return next TSC.
+ * @buf: buffer
+ * @len: size of buffer
+ * @tsc: TSC value returned
+ * @rem: returns remaining size when TSC is found
+ *
+ * Find a TSC packet in @buf and return the TSC value.  This function assumes
+ * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
+ * PSBEND packet is found.
+ *
+ * Return: %true if TSC is found, false otherwise.
+ */
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
+			      size_t *rem)
+{
+	enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
+	struct intel_pt_pkt packet;
+	int ret;
+
+	while (len) {
+		ret = intel_pt_get_packet(buf, len, &packet, &ctx);
+		if (ret <= 0)
+			return false;
+		if (packet.type == INTEL_PT_TSC) {
+			*tsc = packet.payload;
+			*rem = len;
+			return true;
+		}
+		if (packet.type == INTEL_PT_PSBEND)
+			return false;
+		buf += ret;
+		len -= ret;
+	}
+	return false;
+}
+
+/**
+ * intel_pt_tsc_cmp - compare 7-byte TSCs.
+ * @tsc1: first TSC to compare
+ * @tsc2: second TSC to compare
+ *
+ * This function compares 7-byte TSC values allowing for the possibility that
+ * TSC wrapped around.  Generally it is not possible to know if TSC has wrapped
+ * around so for that purpose this function assumes the absolute difference is
+ * less than half the maximum difference.
+ *
+ * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
+ * after @tsc2.
+ */
+static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
+{
+	const uint64_t halfway = (1ULL << 55);
+
+	if (tsc1 == tsc2)
+		return 0;
+
+	if (tsc1 < tsc2) {
+		if (tsc2 - tsc1 < halfway)
+			return -1;
+		else
+			return 1;
+	} else {
+		if (tsc1 - tsc2 < halfway)
+			return 1;
+		else
+			return -1;
+	}
+}
+
+#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
+
+/**
+ * adj_for_padding - adjust overlap to account for padding.
+ * @buf_b: second buffer
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ *
+ * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
+ * accordingly.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts
+ */
+static unsigned char *adj_for_padding(unsigned char *buf_b,
+				      unsigned char *buf_a, size_t len_a)
+{
+	unsigned char *p = buf_b - MAX_PADDING;
+	unsigned char *q = buf_a + len_a - MAX_PADDING;
+	int i;
+
+	for (i = MAX_PADDING; i; i--, p++, q++) {
+		if (*p != *q)
+			break;
+	}
+
+	return p;
+}
+
+/**
+ * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
+ *                             using TSC.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ *               to buf_a
+ * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling
+ *
+ * If the trace contains TSC we can look at the last TSC of @buf_a and the
+ * first TSC of @buf_b in order to determine if the buffers overlap, and then
+ * walk forward in @buf_b until a later TSC is found.  A precondition is that
+ * @buf_a and @buf_b are positioned at a PSB.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
+						size_t len_a,
+						unsigned char *buf_b,
+						size_t len_b, bool *consecutive,
+						bool ooo_tsc)
+{
+	uint64_t tsc_a, tsc_b;
+	unsigned char *p;
+	size_t len, rem_a, rem_b;
+
+	p = intel_pt_last_psb(buf_a, len_a);
+	if (!p)
+		return buf_b; /* No PSB in buf_a => no overlap */
+
+	len = len_a - (p - buf_a);
+	if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
+		/* The last PSB+ in buf_a is incomplete, so go back one more */
+		len_a -= len;
+		p = intel_pt_last_psb(buf_a, len_a);
+		if (!p)
+			return buf_b; /* No full PSB+ => assume no overlap */
+		len = len_a - (p - buf_a);
+		if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
+			return buf_b; /* No TSC in buf_a => assume no overlap */
+	}
+
+	while (1) {
+		/* Ignore PSB+ with no TSC */
+		if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
+			int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
+
+			/* Same TSC, so buffers are consecutive */
+			if (!cmp && rem_b >= rem_a) {
+				unsigned char *start;
+
+				*consecutive = true;
+				start = buf_b + len_b - (rem_b - rem_a);
+				return adj_for_padding(start, buf_a, len_a);
+			}
+			if (cmp < 0 && !ooo_tsc)
+				return buf_b; /* tsc_a < tsc_b => no overlap */
+		}
+
+		if (!intel_pt_step_psb(&buf_b, &len_b))
+			return buf_b + len_b; /* No PSB in buf_b => no data */
+	}
+}
+
+/**
+ * intel_pt_find_overlap - determine start of non-overlapped trace data.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @have_tsc: can use TSC packets to detect overlap
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ *               to buf_a
+ * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling
+ *
+ * When trace samples or snapshots are recorded there is the possibility that
+ * the data overlaps.  Note that, for the purposes of decoding, data is only
+ * useful if it begins with a PSB packet.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+				     unsigned char *buf_b, size_t len_b,
+				     bool have_tsc, bool *consecutive,
+				     bool ooo_tsc)
+{
+	unsigned char *found;
+
+	/* Buffer 'b' must start at PSB so throw away everything before that */
+	if (!intel_pt_next_psb(&buf_b, &len_b))
+		return buf_b + len_b; /* No PSB */
+
+	if (!intel_pt_next_psb(&buf_a, &len_a))
+		return buf_b; /* No overlap */
+
+	if (have_tsc) {
+		found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
+						  consecutive, ooo_tsc);
+		if (found)
+			return found;
+	}
+
+	/*
+	 * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
+	 * we can ignore the first part of buffer 'a'.
+	 */
+	while (len_b < len_a) {
+		if (!intel_pt_step_psb(&buf_a, &len_a))
+			return buf_b; /* No overlap */
+	}
+
+	/* Now len_b >= len_a */
+	while (1) {
+		/* Potential overlap so check the bytes */
+		found = memmem(buf_a, len_a, buf_b, len_a);
+		if (found) {
+			*consecutive = true;
+			return adj_for_padding(buf_b + len_a, buf_a, len_a);
+		}
+
+		/* Try again at next PSB in buffer 'a' */
+		if (!intel_pt_step_psb(&buf_a, &len_a))
+			return buf_b; /* No overlap */
+	}
+}
+
+/**
+ * struct fast_forward_data - data used by intel_pt_ff_cb().
+ * @timestamp: timestamp to fast forward towards
+ * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than
+ *                 the fast forward timestamp.
+ */
+struct fast_forward_data {
+	uint64_t timestamp;
+	uint64_t buf_timestamp;
+};
+
+/**
+ * intel_pt_ff_cb - fast forward lookahead callback.
+ * @buffer: Intel PT trace buffer
+ * @data: opaque pointer to fast forward data (struct fast_forward_data)
+ *
+ * Determine if @buffer trace is past the fast forward timestamp.
+ *
+ * Return: 1 (stop lookahead) if @buffer trace is past the fast forward
+ *         timestamp, and 0 otherwise.
+ */
+static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data)
+{
+	struct fast_forward_data *d = data;
+	unsigned char *buf;
+	uint64_t tsc;
+	size_t rem;
+	size_t len;
+
+	buf = (unsigned char *)buffer->buf;
+	len = buffer->len;
+
+	if (!intel_pt_next_psb(&buf, &len) ||
+	    !intel_pt_next_tsc(buf, len, &tsc, &rem))
+		return 0;
+
+	tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp);
+
+	intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n",
+		     tsc, buffer->ref_timestamp);
+
+	/*
+	 * If the buffer contains a timestamp earlier that the fast forward
+	 * timestamp, then record it, else stop.
+	 */
+	if (tsc < d->timestamp)
+		d->buf_timestamp = buffer->ref_timestamp;
+	else
+		return 1;
+
+	return 0;
+}
+
+/**
+ * intel_pt_fast_forward - reposition decoder forwards.
+ * @decoder: Intel PT decoder
+ * @timestamp: timestamp to fast forward towards
+ *
+ * Reposition decoder at the last PSB with a timestamp earlier than @timestamp.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp)
+{
+	struct fast_forward_data d = { .timestamp = timestamp };
+	unsigned char *buf;
+	size_t len;
+	int err;
+
+	intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp);
+
+	/* Find buffer timestamp of buffer to fast forward to */
+	err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d);
+	if (err < 0)
+		return err;
+
+	/* Walk to buffer with same buffer timestamp */
+	if (d.buf_timestamp) {
+		do {
+			decoder->pos += decoder->len;
+			decoder->len = 0;
+			err = intel_pt_get_next_data(decoder, true);
+			/* -ENOLINK means non-consecutive trace */
+			if (err && err != -ENOLINK)
+				return err;
+		} while (decoder->buf_timestamp != d.buf_timestamp);
+	}
+
+	if (!decoder->buf)
+		return 0;
+
+	buf = (unsigned char *)decoder->buf;
+	len = decoder->len;
+
+	if (!intel_pt_next_psb(&buf, &len))
+		return 0;
+
+	/*
+	 * Walk PSBs while the PSB timestamp is less than the fast forward
+	 * timestamp.
+	 */
+	do {
+		uint64_t tsc;
+		size_t rem;
+
+		if (!intel_pt_next_tsc(buf, len, &tsc, &rem))
+			break;
+		tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp);
+		/*
+		 * A TSC packet can slip past MTC packets but, after fast
+		 * forward, decoding starts at the TSC timestamp. That means
+		 * the timestamps may not be exactly the same as the timestamps
+		 * that would have been decoded without fast forward.
+		 */
+		if (tsc < timestamp) {
+			intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc);
+			decoder->pos += decoder->len - len;
+			decoder->buf = buf;
+			decoder->len = len;
+			intel_pt_reposition(decoder);
+		} else {
+			break;
+		}
+	} while (intel_pt_step_psb(&buf, &len));
+
+	return 0;
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
new file mode 100644
index 000000000..c773028df
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_DECODER_H__
+#define INCLUDE__INTEL_PT_DECODER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <linux/rbtree.h>
+
+#include "intel-pt-insn-decoder.h"
+
+#define INTEL_PT_IN_TX		(1 << 0)
+#define INTEL_PT_ABORT_TX	(1 << 1)
+#define INTEL_PT_IFLAG		(1 << 2)
+#define INTEL_PT_ASYNC		(1 << 2)
+#define INTEL_PT_FUP_IP		(1 << 3)
+#define INTEL_PT_SAMPLE_IPC	(1 << 4)
+
+enum intel_pt_sample_type {
+	INTEL_PT_BRANCH		= 1 << 0,
+	INTEL_PT_INSTRUCTION	= 1 << 1,
+	INTEL_PT_TRANSACTION	= 1 << 2,
+	INTEL_PT_PTW		= 1 << 3,
+	INTEL_PT_MWAIT_OP	= 1 << 4,
+	INTEL_PT_PWR_ENTRY	= 1 << 5,
+	INTEL_PT_EX_STOP	= 1 << 6,
+	INTEL_PT_PWR_EXIT	= 1 << 7,
+	INTEL_PT_CBR_CHG	= 1 << 8,
+	INTEL_PT_TRACE_BEGIN	= 1 << 9,
+	INTEL_PT_TRACE_END	= 1 << 10,
+	INTEL_PT_BLK_ITEMS	= 1 << 11,
+	INTEL_PT_PSB_EVT	= 1 << 12,
+	INTEL_PT_EVT		= 1 << 13,
+	INTEL_PT_IFLAG_CHG	= 1 << 14,
+};
+
+enum intel_pt_period_type {
+	INTEL_PT_PERIOD_NONE,
+	INTEL_PT_PERIOD_INSTRUCTIONS,
+	INTEL_PT_PERIOD_TICKS,
+	INTEL_PT_PERIOD_MTC,
+};
+
+enum {
+	INTEL_PT_ERR_NOMEM = 1,
+	INTEL_PT_ERR_INTERN,
+	INTEL_PT_ERR_BADPKT,
+	INTEL_PT_ERR_NODATA,
+	INTEL_PT_ERR_NOINSN,
+	INTEL_PT_ERR_MISMAT,
+	INTEL_PT_ERR_OVR,
+	INTEL_PT_ERR_LOST,
+	INTEL_PT_ERR_UNK,
+	INTEL_PT_ERR_NELOOP,
+	INTEL_PT_ERR_EPTW,
+	INTEL_PT_ERR_MAX,
+};
+
+enum intel_pt_param_flags {
+	/*
+	 * FUP packet can contain next linear instruction pointer instead of
+	 * current linear instruction pointer.
+	 */
+	INTEL_PT_FUP_WITH_NLIP	= 1 << 0,
+};
+
+enum intel_pt_blk_type {
+	INTEL_PT_GP_REGS	= 1,
+	INTEL_PT_PEBS_BASIC	= 4,
+	INTEL_PT_PEBS_MEM	= 5,
+	INTEL_PT_LBR_0		= 8,
+	INTEL_PT_LBR_1		= 9,
+	INTEL_PT_LBR_2		= 10,
+	INTEL_PT_XMM		= 16,
+	INTEL_PT_BLK_TYPE_MAX
+};
+
+/*
+ * The block type numbers are not sequential but here they are given sequential
+ * positions to avoid wasting space for array placement.
+ */
+enum intel_pt_blk_type_pos {
+	INTEL_PT_GP_REGS_POS,
+	INTEL_PT_PEBS_BASIC_POS,
+	INTEL_PT_PEBS_MEM_POS,
+	INTEL_PT_LBR_0_POS,
+	INTEL_PT_LBR_1_POS,
+	INTEL_PT_LBR_2_POS,
+	INTEL_PT_XMM_POS,
+	INTEL_PT_BLK_TYPE_CNT
+};
+
+/* Get the array position for a block type */
+static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
+{
+#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
+	const int map[INTEL_PT_BLK_TYPE_MAX] = {
+		BLK_TYPE(GP_REGS),
+		BLK_TYPE(PEBS_BASIC),
+		BLK_TYPE(PEBS_MEM),
+		BLK_TYPE(LBR_0),
+		BLK_TYPE(LBR_1),
+		BLK_TYPE(LBR_2),
+		BLK_TYPE(XMM),
+	};
+#undef BLK_TYPE
+
+	return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
+}
+
+#define INTEL_PT_BLK_ITEM_ID_CNT	32
+
+/*
+ * Use unions so that the block items can be accessed by name or by array index.
+ * There is an array of 32-bit masks for each block type, which indicate which
+ * values are present. Then arrays of 32 64-bit values for each block type.
+ */
+struct intel_pt_blk_items {
+	union {
+		uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
+		struct {
+			uint32_t has_rflags:1;
+			uint32_t has_rip:1;
+			uint32_t has_rax:1;
+			uint32_t has_rcx:1;
+			uint32_t has_rdx:1;
+			uint32_t has_rbx:1;
+			uint32_t has_rsp:1;
+			uint32_t has_rbp:1;
+			uint32_t has_rsi:1;
+			uint32_t has_rdi:1;
+			uint32_t has_r8:1;
+			uint32_t has_r9:1;
+			uint32_t has_r10:1;
+			uint32_t has_r11:1;
+			uint32_t has_r12:1;
+			uint32_t has_r13:1;
+			uint32_t has_r14:1;
+			uint32_t has_r15:1;
+			uint32_t has_unused_0:14;
+			uint32_t has_ip:1;
+			uint32_t has_applicable_counters:1;
+			uint32_t has_timestamp:1;
+			uint32_t has_unused_1:29;
+			uint32_t has_mem_access_address:1;
+			uint32_t has_mem_aux_info:1;
+			uint32_t has_mem_access_latency:1;
+			uint32_t has_tsx_aux_info:1;
+			uint32_t has_unused_2:28;
+			uint32_t has_lbr_0;
+			uint32_t has_lbr_1;
+			uint32_t has_lbr_2;
+			uint32_t has_xmm;
+		};
+	};
+	union {
+		uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
+		struct {
+			struct {
+				uint64_t rflags;
+				uint64_t rip;
+				uint64_t rax;
+				uint64_t rcx;
+				uint64_t rdx;
+				uint64_t rbx;
+				uint64_t rsp;
+				uint64_t rbp;
+				uint64_t rsi;
+				uint64_t rdi;
+				uint64_t r8;
+				uint64_t r9;
+				uint64_t r10;
+				uint64_t r11;
+				uint64_t r12;
+				uint64_t r13;
+				uint64_t r14;
+				uint64_t r15;
+				uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
+			};
+			struct {
+				uint64_t ip;
+				uint64_t applicable_counters;
+				uint64_t timestamp;
+				uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
+			};
+			struct {
+				uint64_t mem_access_address;
+				uint64_t mem_aux_info;
+				uint64_t mem_access_latency;
+				uint64_t tsx_aux_info;
+				uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
+			};
+			uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
+			uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
+			uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
+			uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
+		};
+	};
+	bool is_32_bit;
+};
+
+struct intel_pt_vmcs_info {
+	struct rb_node rb_node;
+	uint64_t vmcs;
+	uint64_t tsc_offset;
+	bool reliable;
+	bool error_printed;
+};
+
+/*
+ * Maximum number of event trace data in one go, assuming at most 1 per type
+ * and 6-bits of type in the EVD packet.
+ */
+#define INTEL_PT_MAX_EVDS 64
+
+/* Event trace data from EVD packet */
+struct intel_pt_evd {
+	int type;
+	uint64_t payload;
+};
+
+struct intel_pt_state {
+	enum intel_pt_sample_type type;
+	bool from_nr;
+	bool to_nr;
+	bool from_iflag;
+	bool to_iflag;
+	int err;
+	uint64_t from_ip;
+	uint64_t to_ip;
+	uint64_t tot_insn_cnt;
+	uint64_t tot_cyc_cnt;
+	uint64_t cycles;
+	uint64_t timestamp;
+	uint64_t est_timestamp;
+	uint64_t trace_nr;
+	uint64_t ptw_payload;
+	uint64_t mwait_payload;
+	uint64_t pwre_payload;
+	uint64_t pwrx_payload;
+	uint64_t cbr_payload;
+	uint64_t psb_offset;
+	uint32_t cbr;
+	uint32_t flags;
+	enum intel_pt_insn_op insn_op;
+	int insn_len;
+	char insn[INTEL_PT_INSN_BUF_SZ];
+	struct intel_pt_blk_items items;
+	int cfe_type;
+	int cfe_vector;
+	int evd_cnt;
+	struct intel_pt_evd *evd;
+};
+
+struct intel_pt_insn;
+
+struct intel_pt_buffer {
+	const unsigned char *buf;
+	size_t len;
+	bool consecutive;
+	uint64_t ref_timestamp;
+	uint64_t trace_nr;
+};
+
+typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *);
+
+struct intel_pt_params {
+	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
+	int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
+	struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs);
+	void *data;
+	bool return_compression;
+	bool branch_enable;
+	bool vm_time_correlation;
+	bool vm_tm_corr_dry_run;
+	uint64_t first_timestamp;
+	uint64_t ctl;
+	uint64_t period;
+	enum intel_pt_period_type period_type;
+	unsigned max_non_turbo_ratio;
+	unsigned int mtc_period;
+	uint32_t tsc_ctc_ratio_n;
+	uint32_t tsc_ctc_ratio_d;
+	enum intel_pt_param_flags flags;
+	unsigned int quick;
+	int max_loops;
+};
+
+struct intel_pt_decoder;
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp);
+
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+				     unsigned char *buf_b, size_t len_b,
+				     bool have_tsc, bool *consecutive,
+				     bool ooo_tsc);
+
+int intel_pt__strerror(int code, char *buf, size_t buflen);
+
+void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder,
+				  uint64_t first_timestamp);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
new file mode 100644
index 000000000..137607718
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_insn_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include "../../../arch/x86/include/asm/insn.h"
+
+#include "../../../arch/x86/lib/inat.c"
+#include "../../../arch/x86/lib/insn.c"
+
+#include "event.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "dump-insn.h"
+
+#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
+#error Instruction buffer size too small
+#endif
+
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
+static void intel_pt_insn_decoder(struct insn *insn,
+				  struct intel_pt_insn *intel_pt_insn)
+{
+	enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
+	enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
+	int ext;
+
+	intel_pt_insn->rel = 0;
+	intel_pt_insn->emulated_ptwrite = false;
+
+	if (insn_is_avx(insn)) {
+		intel_pt_insn->op = INTEL_PT_OP_OTHER;
+		intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
+		intel_pt_insn->length = insn->length;
+		return;
+	}
+
+	switch (insn->opcode.bytes[0]) {
+	case 0xf:
+		switch (insn->opcode.bytes[1]) {
+		case 0x01:
+			switch (insn->modrm.bytes[0]) {
+			case 0xc2: /* vmlaunch */
+			case 0xc3: /* vmresume */
+				op = INTEL_PT_OP_VMENTRY;
+				branch = INTEL_PT_BR_INDIRECT;
+				break;
+			default:
+				break;
+			}
+			break;
+		case 0x05: /* syscall */
+		case 0x34: /* sysenter */
+			op = INTEL_PT_OP_SYSCALL;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		case 0x07: /* sysret */
+		case 0x35: /* sysexit */
+			op = INTEL_PT_OP_SYSRET;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		case 0x80 ... 0x8f: /* jcc */
+			op = INTEL_PT_OP_JCC;
+			branch = INTEL_PT_BR_CONDITIONAL;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 0x70 ... 0x7f: /* jcc */
+		op = INTEL_PT_OP_JCC;
+		branch = INTEL_PT_BR_CONDITIONAL;
+		break;
+	case 0xc2: /* near ret */
+	case 0xc3: /* near ret */
+	case 0xca: /* far ret */
+	case 0xcb: /* far ret */
+		op = INTEL_PT_OP_RET;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xcf: /* iret */
+		op = INTEL_PT_OP_IRET;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xcc ... 0xce: /* int */
+		op = INTEL_PT_OP_INT;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xe8: /* call near rel */
+		op = INTEL_PT_OP_CALL;
+		branch = INTEL_PT_BR_UNCONDITIONAL;
+		break;
+	case 0x9a: /* call far absolute */
+		op = INTEL_PT_OP_CALL;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xe0 ... 0xe2: /* loop */
+		op = INTEL_PT_OP_LOOP;
+		branch = INTEL_PT_BR_CONDITIONAL;
+		break;
+	case 0xe3: /* jcc */
+		op = INTEL_PT_OP_JCC;
+		branch = INTEL_PT_BR_CONDITIONAL;
+		break;
+	case 0xe9: /* jmp */
+	case 0xeb: /* jmp */
+		op = INTEL_PT_OP_JMP;
+		branch = INTEL_PT_BR_UNCONDITIONAL;
+		break;
+	case 0xea: /* far jmp */
+		op = INTEL_PT_OP_JMP;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xff: /* call near absolute, call far absolute ind */
+		ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+		switch (ext) {
+		case 2: /* near ind call */
+		case 3: /* far ind call */
+			op = INTEL_PT_OP_CALL;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		case 4:
+		case 5:
+			op = INTEL_PT_OP_JMP;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	intel_pt_insn->op = op;
+	intel_pt_insn->branch = branch;
+	intel_pt_insn->length = insn->length;
+
+	if (branch == INTEL_PT_BR_CONDITIONAL ||
+	    branch == INTEL_PT_BR_UNCONDITIONAL) {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		switch (insn->immediate.nbytes) {
+		case 1:
+			intel_pt_insn->rel = insn->immediate.value;
+			break;
+		case 2:
+			intel_pt_insn->rel =
+					bswap_16((short)insn->immediate.value);
+			break;
+		case 4:
+			intel_pt_insn->rel = bswap_32(insn->immediate.value);
+			break;
+		default:
+			intel_pt_insn->rel = 0;
+			break;
+		}
+#else
+		intel_pt_insn->rel = insn->immediate.value;
+#endif
+	}
+}
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+		      struct intel_pt_insn *intel_pt_insn)
+{
+	struct insn insn;
+	int ret;
+
+	ret = insn_decode(&insn, buf, len,
+			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+	if (ret < 0 || insn.length > len)
+		return -1;
+
+	intel_pt_insn_decoder(&insn, intel_pt_insn);
+	if (insn.length < INTEL_PT_INSN_BUF_SZ)
+		memcpy(intel_pt_insn->buf, buf, insn.length);
+	else
+		memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_BUF_SZ);
+	return 0;
+}
+
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+{
+	struct intel_pt_insn in;
+	if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
+		return -1;
+	return in.branch != INTEL_PT_BR_NO_BRANCH;
+}
+
+const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
+		      u8 *inbuf, int inlen, int *lenp)
+{
+	struct insn insn;
+	int n, i, ret;
+	int left;
+
+	ret = insn_decode(&insn, inbuf, inlen,
+			  x->is64bit ? INSN_MODE_64 : INSN_MODE_32);
+
+	if (ret < 0 || insn.length > inlen)
+		return "<bad>";
+	if (lenp)
+		*lenp = insn.length;
+	left = sizeof(x->out);
+	n = snprintf(x->out, left, "insn: ");
+	left -= n;
+	for (i = 0; i < insn.length; i++) {
+		n += snprintf(x->out + n, left, "%02x ", inbuf[i]);
+		left -= n;
+	}
+	return x->out;
+}
+
+const char *branch_name[] = {
+	[INTEL_PT_OP_OTHER]	= "Other",
+	[INTEL_PT_OP_CALL]	= "Call",
+	[INTEL_PT_OP_RET]	= "Ret",
+	[INTEL_PT_OP_JCC]	= "Jcc",
+	[INTEL_PT_OP_JMP]	= "Jmp",
+	[INTEL_PT_OP_LOOP]	= "Loop",
+	[INTEL_PT_OP_IRET]	= "IRet",
+	[INTEL_PT_OP_INT]	= "Int",
+	[INTEL_PT_OP_SYSCALL]	= "Syscall",
+	[INTEL_PT_OP_SYSRET]	= "Sysret",
+	[INTEL_PT_OP_VMENTRY]	= "VMentry",
+};
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op)
+{
+	return branch_name[op];
+}
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+		       size_t buf_len)
+{
+	switch (intel_pt_insn->branch) {
+	case INTEL_PT_BR_CONDITIONAL:
+	case INTEL_PT_BR_UNCONDITIONAL:
+		return snprintf(buf, buf_len, "%s %s%d",
+				intel_pt_insn_name(intel_pt_insn->op),
+				intel_pt_insn->rel > 0 ? "+" : "",
+				intel_pt_insn->rel);
+	case INTEL_PT_BR_NO_BRANCH:
+	case INTEL_PT_BR_INDIRECT:
+		return snprintf(buf, buf_len, "%s",
+				intel_pt_insn_name(intel_pt_insn->op));
+	default:
+		break;
+	}
+	return 0;
+}
+
+int intel_pt_insn_type(enum intel_pt_insn_op op)
+{
+	switch (op) {
+	case INTEL_PT_OP_OTHER:
+		return 0;
+	case INTEL_PT_OP_CALL:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
+	case INTEL_PT_OP_RET:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
+	case INTEL_PT_OP_JCC:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+	case INTEL_PT_OP_JMP:
+		return PERF_IP_FLAG_BRANCH;
+	case INTEL_PT_OP_LOOP:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+	case INTEL_PT_OP_IRET:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+		       PERF_IP_FLAG_INTERRUPT;
+	case INTEL_PT_OP_INT:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+		       PERF_IP_FLAG_INTERRUPT;
+	case INTEL_PT_OP_SYSCALL:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+		       PERF_IP_FLAG_SYSCALLRET;
+	case INTEL_PT_OP_SYSRET:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+		       PERF_IP_FLAG_SYSCALLRET;
+	case INTEL_PT_OP_VMENTRY:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+		       PERF_IP_FLAG_VMENTRY;
+	default:
+		return 0;
+	}
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
new file mode 100644
index 000000000..e3338b56a
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_insn_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
+#define INCLUDE__INTEL_PT_INSN_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_INSN_DESC_MAX		32
+#define INTEL_PT_INSN_BUF_SZ		16
+
+enum intel_pt_insn_op {
+	INTEL_PT_OP_OTHER,
+	INTEL_PT_OP_CALL,
+	INTEL_PT_OP_RET,
+	INTEL_PT_OP_JCC,
+	INTEL_PT_OP_JMP,
+	INTEL_PT_OP_LOOP,
+	INTEL_PT_OP_IRET,
+	INTEL_PT_OP_INT,
+	INTEL_PT_OP_SYSCALL,
+	INTEL_PT_OP_SYSRET,
+	INTEL_PT_OP_VMENTRY,
+};
+
+enum intel_pt_insn_branch {
+	INTEL_PT_BR_NO_BRANCH,
+	INTEL_PT_BR_INDIRECT,
+	INTEL_PT_BR_CONDITIONAL,
+	INTEL_PT_BR_UNCONDITIONAL,
+};
+
+struct intel_pt_insn {
+	enum intel_pt_insn_op		op;
+	enum intel_pt_insn_branch	branch;
+	bool				emulated_ptwrite;
+	int				length;
+	int32_t				rel;
+	unsigned char			buf[INTEL_PT_INSN_BUF_SZ];
+};
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+		      struct intel_pt_insn *intel_pt_insn);
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op);
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+		       size_t buf_len);
+
+int intel_pt_insn_type(enum intel_pt_insn_op op);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
new file mode 100644
index 000000000..ef55d6232
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_log.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <linux/zalloc.h>
+#include <linux/kernel.h>
+
+#include "intel-pt-log.h"
+#include "intel-pt-insn-decoder.h"
+
+#include "intel-pt-pkt-decoder.h"
+
+#define MAX_LOG_NAME 256
+
+#define DFLT_BUF_SZ	(16 * 1024)
+
+struct log_buf {
+	char			*buf;
+	size_t			buf_sz;
+	size_t			head;
+	bool			wrapped;
+	FILE			*backend;
+};
+
+static FILE *f;
+static char log_name[MAX_LOG_NAME];
+bool intel_pt_enable_logging;
+static bool intel_pt_dump_log_on_error;
+static unsigned int intel_pt_log_on_error_size;
+static struct log_buf log_buf;
+
+void *intel_pt_log_fp(void)
+{
+	return f;
+}
+
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size)
+{
+	intel_pt_enable_logging = true;
+	intel_pt_dump_log_on_error = dump_log_on_error;
+	intel_pt_log_on_error_size = log_on_error_size;
+}
+
+void intel_pt_log_disable(void)
+{
+	if (f)
+		fflush(f);
+	intel_pt_enable_logging = false;
+}
+
+void intel_pt_log_set_name(const char *name)
+{
+	strncpy(log_name, name, MAX_LOG_NAME - 5);
+	strcat(log_name, ".log");
+}
+
+static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
+				int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		fprintf(f, " ");
+
+	fprintf(f, "  %08" PRIx64 ": ", pos);
+	for (i = 0; i < len; i++)
+		fprintf(f, " %02x", buf[i]);
+	for (; i < 16; i++)
+		fprintf(f, "   ");
+	fprintf(f, " ");
+}
+
+static void intel_pt_print_no_data(uint64_t pos, int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		fprintf(f, " ");
+
+	fprintf(f, "  %08" PRIx64 ": ", pos);
+	for (i = 0; i < 16; i++)
+		fprintf(f, "   ");
+	fprintf(f, " ");
+}
+
+static ssize_t log_buf__write(void *cookie, const char *buf, size_t size)
+{
+	struct log_buf *b = cookie;
+	size_t sz = size;
+
+	if (!b->buf)
+		return size;
+
+	while (sz) {
+		size_t space = b->buf_sz - b->head;
+		size_t n = min(space, sz);
+
+		memcpy(b->buf + b->head, buf, n);
+		sz -= n;
+		buf += n;
+		b->head += n;
+		if (sz && b->head >= b->buf_sz) {
+			b->head = 0;
+			b->wrapped = true;
+		}
+	}
+	return size;
+}
+
+static int log_buf__close(void *cookie)
+{
+	struct log_buf *b = cookie;
+
+	zfree(&b->buf);
+	return 0;
+}
+
+static FILE *log_buf__open(struct log_buf *b, FILE *backend, unsigned int sz)
+{
+	cookie_io_functions_t fns = {
+		.write = log_buf__write,
+		.close = log_buf__close,
+	};
+	FILE *file;
+
+	memset(b, 0, sizeof(*b));
+	b->buf_sz = sz;
+	b->buf = malloc(b->buf_sz);
+	b->backend = backend;
+	file = fopencookie(b, "a", fns);
+	if (!file)
+		zfree(&b->buf);
+	return file;
+}
+
+static bool remove_first_line(const char **p, size_t *n)
+{
+	for (; *n && **p != '\n'; ++*p, --*n)
+		;
+	if (*n) {
+		*p += 1;
+		*n -= 1;
+		return true;
+	}
+	return false;
+}
+
+static void write_lines(const char *p, size_t n, FILE *fp, bool *remove_first)
+{
+	if (*remove_first)
+		*remove_first = !remove_first_line(&p, &n);
+	fwrite(p, n, 1, fp);
+}
+
+static void log_buf__dump(struct log_buf *b)
+{
+	bool remove_first = false;
+
+	if (!b->buf)
+		return;
+
+	fflush(f); /* Could update b->head and b->wrapped */
+	fprintf(b->backend, "Dumping debug log buffer\n");
+	if (b->wrapped) {
+		remove_first = true;
+		write_lines(b->buf + b->head, b->buf_sz - b->head, b->backend, &remove_first);
+	}
+	write_lines(b->buf, b->head, b->backend, &remove_first);
+	fprintf(b->backend, "End of debug log buffer dump\n");
+
+	b->head = 0;
+	b->wrapped = false;
+}
+
+void intel_pt_log_dump_buf(void)
+{
+	log_buf__dump(&log_buf);
+}
+
+static int intel_pt_log_open(void)
+{
+	if (!intel_pt_enable_logging)
+		return -1;
+
+	if (f)
+		return 0;
+
+	if (log_name[0])
+		f = fopen(log_name, "w+");
+	else
+		f = stdout;
+	if (f && intel_pt_dump_log_on_error)
+		f = log_buf__open(&log_buf, f, intel_pt_log_on_error_size);
+	if (!f) {
+		intel_pt_enable_logging = false;
+		return -1;
+	}
+
+	return 0;
+}
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+			   uint64_t pos, const unsigned char *buf)
+{
+	char desc[INTEL_PT_PKT_DESC_MAX];
+
+	if (intel_pt_log_open())
+		return;
+
+	intel_pt_print_data(buf, pkt_len, pos, 0);
+	intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
+	fprintf(f, "%s\n", desc);
+}
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+	char desc[INTEL_PT_INSN_DESC_MAX];
+	size_t len = intel_pt_insn->length;
+
+	if (intel_pt_log_open())
+		return;
+
+	if (len > INTEL_PT_INSN_BUF_SZ)
+		len = INTEL_PT_INSN_BUF_SZ;
+	intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
+	if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+		fprintf(f, "%s\n", desc);
+	else
+		fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+				 uint64_t ip)
+{
+	char desc[INTEL_PT_INSN_DESC_MAX];
+
+	if (intel_pt_log_open())
+		return;
+
+	intel_pt_print_no_data(ip, 8);
+	if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+		fprintf(f, "%s\n", desc);
+	else
+		fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log(const char *fmt, ...)
+{
+	va_list args;
+
+	if (intel_pt_log_open())
+		return;
+
+	va_start(args, fmt);
+	vfprintf(f, fmt, args);
+	va_end(args);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
new file mode 100644
index 000000000..354d7d23f
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_log.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_LOG_H__
+#define INCLUDE__INTEL_PT_LOG_H__
+
+#include <linux/compiler.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+struct intel_pt_pkt;
+
+void *intel_pt_log_fp(void);
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size);
+void intel_pt_log_disable(void);
+void intel_pt_log_set_name(const char *name);
+void intel_pt_log_dump_buf(void);
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+			   uint64_t pos, const unsigned char *buf);
+
+struct intel_pt_insn;
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+				 uint64_t ip);
+
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
+
+#define intel_pt_log(fmt, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log(fmt, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_packet(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_packet(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_insn(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_insn(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_insn_no_data(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define x64_fmt "0x%" PRIx64
+
+extern bool intel_pt_enable_logging;
+
+static inline void intel_pt_log_at(const char *msg, uint64_t u)
+{
+	intel_pt_log("%s at " x64_fmt "\n", msg, u);
+}
+
+static inline void intel_pt_log_to(const char *msg, uint64_t u)
+{
+	intel_pt_log("%s to " x64_fmt "\n", msg, u);
+}
+
+#define intel_pt_log_var(var, fmt) intel_pt_log("%s: " #var " " fmt "\n", __func__, var)
+
+#define intel_pt_log_x32(var) intel_pt_log_var(var, "%#x")
+#define intel_pt_log_x64(var) intel_pt_log_var(var, "%#" PRIx64)
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
new file mode 100644
index 000000000..18f97f43e
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -0,0 +1,800 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt_pkt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/compiler.h>
+
+#include "intel-pt-pkt-decoder.h"
+
+#define BIT(n)		(1 << (n))
+
+#define BIT63		((uint64_t)1 << 63)
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+	memcpy((d), (s), (n));    \
+	*(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const packet_name[] = {
+	[INTEL_PT_BAD]		= "Bad Packet!",
+	[INTEL_PT_PAD]		= "PAD",
+	[INTEL_PT_TNT]		= "TNT",
+	[INTEL_PT_TIP_PGD]	= "TIP.PGD",
+	[INTEL_PT_TIP_PGE]	= "TIP.PGE",
+	[INTEL_PT_TSC]		= "TSC",
+	[INTEL_PT_TMA]		= "TMA",
+	[INTEL_PT_MODE_EXEC]	= "MODE.Exec",
+	[INTEL_PT_MODE_TSX]	= "MODE.TSX",
+	[INTEL_PT_MTC]		= "MTC",
+	[INTEL_PT_TIP]		= "TIP",
+	[INTEL_PT_FUP]		= "FUP",
+	[INTEL_PT_CYC]		= "CYC",
+	[INTEL_PT_VMCS]		= "VMCS",
+	[INTEL_PT_PSB]		= "PSB",
+	[INTEL_PT_PSBEND]	= "PSBEND",
+	[INTEL_PT_CBR]		= "CBR",
+	[INTEL_PT_TRACESTOP]	= "TraceSTOP",
+	[INTEL_PT_PIP]		= "PIP",
+	[INTEL_PT_OVF]		= "OVF",
+	[INTEL_PT_MNT]		= "MNT",
+	[INTEL_PT_PTWRITE]	= "PTWRITE",
+	[INTEL_PT_PTWRITE_IP]	= "PTWRITE",
+	[INTEL_PT_EXSTOP]	= "EXSTOP",
+	[INTEL_PT_EXSTOP_IP]	= "EXSTOP",
+	[INTEL_PT_MWAIT]	= "MWAIT",
+	[INTEL_PT_PWRE]		= "PWRE",
+	[INTEL_PT_PWRX]		= "PWRX",
+	[INTEL_PT_BBP]		= "BBP",
+	[INTEL_PT_BIP]		= "BIP",
+	[INTEL_PT_BEP]		= "BEP",
+	[INTEL_PT_BEP_IP]	= "BEP",
+	[INTEL_PT_CFE]		= "CFE",
+	[INTEL_PT_CFE_IP]	= "CFE",
+	[INTEL_PT_EVD]		= "EVD",
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
+{
+	return packet_name[type];
+}
+
+static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
+				 struct intel_pt_pkt *packet)
+{
+	uint64_t payload;
+	int count;
+
+	if (len < 8)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	payload = le64_to_cpu(*(uint64_t *)buf);
+
+	for (count = 47; count; count--) {
+		if (payload & BIT63)
+			break;
+		payload <<= 1;
+	}
+
+	packet->type = INTEL_PT_TNT;
+	packet->count = count;
+	packet->payload = payload << 1;
+	return 8;
+}
+
+static int intel_pt_get_pip(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	uint64_t payload = 0;
+
+	if (len < 8)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	packet->type = INTEL_PT_PIP;
+	memcpy_le64(&payload, buf + 2, 6);
+	packet->payload = payload;
+
+	return 8;
+}
+
+static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_TRACESTOP;
+	return 2;
+}
+
+static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 4)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_CBR;
+	packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
+	return 4;
+}
+
+static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	unsigned int count = (52 - 5) >> 3;
+
+	if (count < 1 || count > 7)
+		return INTEL_PT_BAD_PACKET;
+
+	if (len < count + 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	packet->type = INTEL_PT_VMCS;
+	packet->count = count;
+	memcpy_le64(&packet->payload, buf + 2, count);
+
+	return count + 2;
+}
+
+static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_OVF;
+	return 2;
+}
+
+static int intel_pt_get_psb(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	int i;
+
+	if (len < 16)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	for (i = 2; i < 16; i += 2) {
+		if (buf[i] != 2 || buf[i + 1] != 0x82)
+			return INTEL_PT_BAD_PACKET;
+	}
+
+	packet->type = INTEL_PT_PSB;
+	return 16;
+}
+
+static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_PSBEND;
+	return 2;
+}
+
+static int intel_pt_get_tma(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 7)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	packet->type = INTEL_PT_TMA;
+	packet->payload = buf[2] | (buf[3] << 8);
+	packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
+	return 7;
+}
+
+static int intel_pt_get_pad(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_PAD;
+	return 1;
+}
+
+static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 11)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MNT;
+	memcpy_le64(&packet->payload, buf + 3, 8);
+	return 11;
+}
+
+static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 3)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	switch (buf[2]) {
+	case 0x88: /* MNT */
+		return intel_pt_get_mnt(buf, len, packet);
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+				struct intel_pt_pkt *packet)
+{
+	packet->count = (buf[1] >> 5) & 0x3;
+	packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+					 INTEL_PT_PTWRITE;
+
+	switch (packet->count) {
+	case 0:
+		if (len < 6)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+		return 6;
+	case 1:
+		if (len < 10)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+		return 10;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP;
+	return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP_IP;
+	return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 10)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MWAIT;
+	packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+	return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 4)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRE;
+	memcpy_le64(&packet->payload, buf + 2, 2);
+	return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 7)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRX;
+	memcpy_le64(&packet->payload, buf + 2, 5);
+	return 7;
+}
+
+static int intel_pt_get_bbp(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 3)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_BBP;
+	packet->count = buf[2] >> 7;
+	packet->payload = buf[2] & 0x1f;
+	return 3;
+}
+
+static int intel_pt_get_bip_4(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 5)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_BIP;
+	packet->count = buf[0] >> 3;
+	memcpy_le64(&packet->payload, buf + 1, 4);
+	return 5;
+}
+
+static int intel_pt_get_bip_8(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 9)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_BIP;
+	packet->count = buf[0] >> 3;
+	memcpy_le64(&packet->payload, buf + 1, 8);
+	return 9;
+}
+
+static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_BEP;
+	return 2;
+}
+
+static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_BEP_IP;
+	return 2;
+}
+
+static int intel_pt_get_cfe(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 4)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = buf[2] & 0x80 ? INTEL_PT_CFE_IP : INTEL_PT_CFE;
+	packet->count = buf[2] & 0x1f;
+	packet->payload = buf[3];
+	return 4;
+}
+
+static int intel_pt_get_evd(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 11)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_EVD;
+	packet->count = buf[2] & 0x3f;
+	packet->payload = buf[3];
+	memcpy_le64(&packet->payload, buf + 3, 8);
+	return 11;
+}
+
+static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	if ((buf[1] & 0x1f) == 0x12)
+		return intel_pt_get_ptwrite(buf, len, packet);
+
+	switch (buf[1]) {
+	case 0xa3: /* Long TNT */
+		return intel_pt_get_long_tnt(buf, len, packet);
+	case 0x43: /* PIP */
+		return intel_pt_get_pip(buf, len, packet);
+	case 0x83: /* TraceStop */
+		return intel_pt_get_tracestop(packet);
+	case 0x03: /* CBR */
+		return intel_pt_get_cbr(buf, len, packet);
+	case 0xc8: /* VMCS */
+		return intel_pt_get_vmcs(buf, len, packet);
+	case 0xf3: /* OVF */
+		return intel_pt_get_ovf(packet);
+	case 0x82: /* PSB */
+		return intel_pt_get_psb(buf, len, packet);
+	case 0x23: /* PSBEND */
+		return intel_pt_get_psbend(packet);
+	case 0x73: /* TMA */
+		return intel_pt_get_tma(buf, len, packet);
+	case 0xC3: /* 3-byte header */
+		return intel_pt_get_3byte(buf, len, packet);
+	case 0x62: /* EXSTOP no IP */
+		return intel_pt_get_exstop(packet);
+	case 0xE2: /* EXSTOP with IP */
+		return intel_pt_get_exstop_ip(packet);
+	case 0xC2: /* MWAIT */
+		return intel_pt_get_mwait(buf, len, packet);
+	case 0x22: /* PWRE */
+		return intel_pt_get_pwre(buf, len, packet);
+	case 0xA2: /* PWRX */
+		return intel_pt_get_pwrx(buf, len, packet);
+	case 0x63: /* BBP */
+		return intel_pt_get_bbp(buf, len, packet);
+	case 0x33: /* BEP no IP */
+		return intel_pt_get_bep(len, packet);
+	case 0xb3: /* BEP with IP */
+		return intel_pt_get_bep_ip(len, packet);
+	case 0x13: /* CFE */
+		return intel_pt_get_cfe(buf, len, packet);
+	case 0x53: /* EVD */
+		return intel_pt_get_evd(buf, len, packet);
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_short_tnt(unsigned int byte,
+				  struct intel_pt_pkt *packet)
+{
+	int count;
+
+	for (count = 6; count; count--) {
+		if (byte & BIT(7))
+			break;
+		byte <<= 1;
+	}
+
+	packet->type = INTEL_PT_TNT;
+	packet->count = count;
+	packet->payload = (uint64_t)byte << 57;
+
+	return 1;
+}
+
+static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
+			    size_t len, struct intel_pt_pkt *packet)
+{
+	unsigned int offs = 1, shift;
+	uint64_t payload = byte >> 3;
+
+	byte >>= 2;
+	len -= 1;
+	for (shift = 5; byte & 1; shift += 7) {
+		if (offs > 9)
+			return INTEL_PT_BAD_PACKET;
+		if (len < offs)
+			return INTEL_PT_NEED_MORE_BYTES;
+		byte = buf[offs++];
+		payload |= ((uint64_t)byte >> 1) << shift;
+	}
+
+	packet->type = INTEL_PT_CYC;
+	packet->payload = payload;
+	return offs;
+}
+
+static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
+			   const unsigned char *buf, size_t len,
+			   struct intel_pt_pkt *packet)
+{
+	int ip_len;
+
+	packet->count = byte >> 5;
+
+	switch (packet->count) {
+	case 0:
+		ip_len = 0;
+		break;
+	case 1:
+		if (len < 3)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 2;
+		packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+		break;
+	case 2:
+		if (len < 5)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 4;
+		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
+		break;
+	case 3:
+	case 4:
+		if (len < 7)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 6;
+		memcpy_le64(&packet->payload, buf + 1, 6);
+		break;
+	case 6:
+		if (len < 9)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 8;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+		break;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+
+	packet->type = type;
+
+	return ip_len + 1;
+}
+
+static int intel_pt_get_mode(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	switch (buf[1] >> 5) {
+	case 0:
+		packet->type = INTEL_PT_MODE_EXEC;
+		packet->count = buf[1];
+		switch (buf[1] & 3) {
+		case 0:
+			packet->payload = 16;
+			break;
+		case 1:
+			packet->payload = 64;
+			break;
+		case 2:
+			packet->payload = 32;
+			break;
+		default:
+			return INTEL_PT_BAD_PACKET;
+		}
+		break;
+	case 1:
+		packet->type = INTEL_PT_MODE_TSX;
+		if ((buf[1] & 3) == 3)
+			return INTEL_PT_BAD_PACKET;
+		packet->payload = buf[1] & 3;
+		break;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+
+	return 2;
+}
+
+static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 8)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_TSC;
+	memcpy_le64(&packet->payload, buf + 1, 7);
+	return 8;
+}
+
+static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MTC;
+	packet->payload = buf[1];
+	return 2;
+}
+
+static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
+				  struct intel_pt_pkt *packet,
+				  enum intel_pt_pkt_ctx ctx)
+{
+	unsigned int byte;
+
+	memset(packet, 0, sizeof(struct intel_pt_pkt));
+
+	if (!len)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	byte = buf[0];
+
+	switch (ctx) {
+	case INTEL_PT_NO_CTX:
+		break;
+	case INTEL_PT_BLK_4_CTX:
+		if ((byte & 0x7) == 4)
+			return intel_pt_get_bip_4(buf, len, packet);
+		break;
+	case INTEL_PT_BLK_8_CTX:
+		if ((byte & 0x7) == 4)
+			return intel_pt_get_bip_8(buf, len, packet);
+		break;
+	default:
+		break;
+	}
+
+	if (!(byte & BIT(0))) {
+		if (byte == 0)
+			return intel_pt_get_pad(packet);
+		if (byte == 2)
+			return intel_pt_get_ext(buf, len, packet);
+		return intel_pt_get_short_tnt(byte, packet);
+	}
+
+	if ((byte & 2))
+		return intel_pt_get_cyc(byte, buf, len, packet);
+
+	switch (byte & 0x1f) {
+	case 0x0D:
+		return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
+	case 0x11:
+		return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
+				       packet);
+	case 0x01:
+		return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
+				       packet);
+	case 0x1D:
+		return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
+	case 0x19:
+		switch (byte) {
+		case 0x99:
+			return intel_pt_get_mode(buf, len, packet);
+		case 0x19:
+			return intel_pt_get_tsc(buf, len, packet);
+		case 0x59:
+			return intel_pt_get_mtc(buf, len, packet);
+		default:
+			return INTEL_PT_BAD_PACKET;
+		}
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+			  enum intel_pt_pkt_ctx *ctx)
+{
+	switch (packet->type) {
+	case INTEL_PT_BAD:
+	case INTEL_PT_PAD:
+	case INTEL_PT_TSC:
+	case INTEL_PT_TMA:
+	case INTEL_PT_MTC:
+	case INTEL_PT_FUP:
+	case INTEL_PT_CYC:
+	case INTEL_PT_CBR:
+	case INTEL_PT_MNT:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_BIP:
+		break;
+	case INTEL_PT_TNT:
+	case INTEL_PT_TIP:
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_PIP:
+	case INTEL_PT_OVF:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_PSB:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_BEP:
+	case INTEL_PT_BEP_IP:
+	case INTEL_PT_CFE:
+	case INTEL_PT_CFE_IP:
+	case INTEL_PT_EVD:
+		*ctx = INTEL_PT_NO_CTX;
+		break;
+	case INTEL_PT_BBP:
+		if (packet->count)
+			*ctx = INTEL_PT_BLK_4_CTX;
+		else
+			*ctx = INTEL_PT_BLK_8_CTX;
+		break;
+	default:
+		break;
+	}
+}
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+			struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx)
+{
+	int ret;
+
+	ret = intel_pt_do_get_packet(buf, len, packet, *ctx);
+	if (ret > 0) {
+		while (ret < 8 && len > (size_t)ret && !buf[ret])
+			ret += 1;
+		intel_pt_upd_pkt_ctx(packet, ctx);
+	}
+	return ret;
+}
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
+		      size_t buf_len)
+{
+	int ret, i, nr;
+	unsigned long long payload = packet->payload;
+	const char *name = intel_pt_pkt_name(packet->type);
+
+	switch (packet->type) {
+	case INTEL_PT_BAD:
+	case INTEL_PT_PAD:
+	case INTEL_PT_PSB:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_OVF:
+		return snprintf(buf, buf_len, "%s", name);
+	case INTEL_PT_TNT: {
+		size_t blen = buf_len;
+
+		ret = snprintf(buf, blen, "%s ", name);
+		if (ret < 0)
+			return ret;
+		buf += ret;
+		blen -= ret;
+		for (i = 0; i < packet->count; i++) {
+			if (payload & BIT63)
+				ret = snprintf(buf, blen, "T");
+			else
+				ret = snprintf(buf, blen, "N");
+			if (ret < 0)
+				return ret;
+			buf += ret;
+			blen -= ret;
+			payload <<= 1;
+		}
+		ret = snprintf(buf, blen, " (%d)", packet->count);
+		if (ret < 0)
+			return ret;
+		blen -= ret;
+		return buf_len - blen;
+	}
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TIP:
+	case INTEL_PT_FUP:
+		if (!(packet->count))
+			return snprintf(buf, buf_len, "%s no ip", name);
+		__fallthrough;
+	case INTEL_PT_CYC:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_MTC:
+	case INTEL_PT_MNT:
+	case INTEL_PT_CBR:
+	case INTEL_PT_TSC:
+		return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
+	case INTEL_PT_TMA:
+		return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
+				(unsigned)payload, packet->count);
+	case INTEL_PT_MODE_EXEC:
+		return snprintf(buf, buf_len, "%s IF:%d %lld",
+				name, !!(packet->count & 4), payload);
+	case INTEL_PT_MODE_TSX:
+		return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
+				name, (unsigned)(payload >> 1) & 1,
+				(unsigned)payload & 1);
+	case INTEL_PT_PIP:
+		nr = packet->payload & INTEL_PT_VMX_NR_FLAG ? 1 : 0;
+		payload &= ~INTEL_PT_VMX_NR_FLAG;
+		ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
+			       name, payload >> 1, nr);
+		return ret;
+	case INTEL_PT_PTWRITE:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+	case INTEL_PT_PTWRITE_IP:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+	case INTEL_PT_BEP:
+	case INTEL_PT_EXSTOP:
+		return snprintf(buf, buf_len, "%s IP:0", name);
+	case INTEL_PT_BEP_IP:
+	case INTEL_PT_EXSTOP_IP:
+		return snprintf(buf, buf_len, "%s IP:1", name);
+	case INTEL_PT_MWAIT:
+		return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+				name, payload, (unsigned int)(payload & 0xff),
+				(unsigned int)((payload >> 32) & 0x3));
+	case INTEL_PT_PWRE:
+		return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+				name, payload, !!(payload & 0x80),
+				(unsigned int)((payload >> 12) & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
+	case INTEL_PT_PWRX:
+		return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+				name, payload,
+				(unsigned int)((payload >> 4) & 0xf),
+				(unsigned int)(payload & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
+	case INTEL_PT_BBP:
+		return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx",
+				name, packet->count ? "4" : "8", payload);
+	case INTEL_PT_BIP:
+		return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
+				name, packet->count, payload);
+	case INTEL_PT_CFE:
+	case INTEL_PT_CFE_IP:
+		return snprintf(buf, buf_len, "%s IP:%d Type 0x%02x Vector 0x%llx",
+				name, packet->type == INTEL_PT_CFE_IP, packet->count, payload);
+	case INTEL_PT_EVD:
+		return snprintf(buf, buf_len, "%s Type 0x%02x Payload 0x%llx",
+				name, packet->count, payload);
+	default:
+		break;
+	}
+	return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+			name, payload, packet->count);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
new file mode 100644
index 000000000..496ba4be8
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt_pkt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ */
+
+#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
+#define INCLUDE__INTEL_PT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_PKT_DESC_MAX	256
+
+#define INTEL_PT_NEED_MORE_BYTES	-1
+#define INTEL_PT_BAD_PACKET		-2
+
+#define INTEL_PT_PSB_STR		"\002\202\002\202\002\202\002\202" \
+					"\002\202\002\202\002\202\002\202"
+#define INTEL_PT_PSB_LEN		16
+
+#define INTEL_PT_PKT_MAX_SZ		16
+
+#define INTEL_PT_VMX_NR_FLAG		1
+
+enum intel_pt_pkt_type {
+	INTEL_PT_BAD,
+	INTEL_PT_PAD,
+	INTEL_PT_TNT,
+	INTEL_PT_TIP_PGD,
+	INTEL_PT_TIP_PGE,
+	INTEL_PT_TSC,
+	INTEL_PT_TMA,
+	INTEL_PT_MODE_EXEC,
+	INTEL_PT_MODE_TSX,
+	INTEL_PT_MTC,
+	INTEL_PT_TIP,
+	INTEL_PT_FUP,
+	INTEL_PT_CYC,
+	INTEL_PT_VMCS,
+	INTEL_PT_PSB,
+	INTEL_PT_PSBEND,
+	INTEL_PT_CBR,
+	INTEL_PT_TRACESTOP,
+	INTEL_PT_PIP,
+	INTEL_PT_OVF,
+	INTEL_PT_MNT,
+	INTEL_PT_PTWRITE,
+	INTEL_PT_PTWRITE_IP,
+	INTEL_PT_EXSTOP,
+	INTEL_PT_EXSTOP_IP,
+	INTEL_PT_MWAIT,
+	INTEL_PT_PWRE,
+	INTEL_PT_PWRX,
+	INTEL_PT_BBP,
+	INTEL_PT_BIP,
+	INTEL_PT_BEP,
+	INTEL_PT_BEP_IP,
+	INTEL_PT_CFE,
+	INTEL_PT_CFE_IP,
+	INTEL_PT_EVD,
+};
+
+struct intel_pt_pkt {
+	enum intel_pt_pkt_type	type;
+	int			count;
+	uint64_t		payload;
+};
+
+/*
+ * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP
+ * packets only occur in the context of a block (i.e. between BBP and BEP), that
+ * context must be recorded and passed to the packet decoder.
+ */
+enum intel_pt_pkt_ctx {
+	INTEL_PT_NO_CTX,	/* BIP packets are invalid */
+	INTEL_PT_BLK_4_CTX,	/* 4-byte BIP packets */
+	INTEL_PT_BLK_8_CTX,	/* 8-byte BIP packets */
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+			struct intel_pt_pkt *packet,
+			enum intel_pt_pkt_ctx *ctx);
+
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+			  enum intel_pt_pkt_ctx *ctx);
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
+
+#endif
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
new file mode 100644
index 000000000..6fb64c58b
--- /dev/null
+++ b/tools/perf/util/intel-pt.c
@@ -0,0 +1,4413 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+
+#include "session.h"
+#include "machine.h"
+#include "memswap.h"
+#include "sort.h"
+#include "tool.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "map.h"
+#include "color.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "symbol.h"
+#include "callchain.h"
+#include "dso.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "tsc.h"
+#include "intel-pt.h"
+#include "config.h"
+#include "util/perf_api_probe.h"
+#include "util/synthetic-events.h"
+#include "time-utils.h"
+
+#include "../arch/x86/include/uapi/asm/perf_regs.h"
+
+#include "intel-pt-decoder/intel-pt-log.h"
+#include "intel-pt-decoder/intel-pt-decoder.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_PT_CFG_PASS_THRU	BIT_ULL(0)
+#define INTEL_PT_CFG_PWR_EVT_EN	BIT_ULL(4)
+#define INTEL_PT_CFG_BRANCH_EN	BIT_ULL(13)
+#define INTEL_PT_CFG_EVT_EN	BIT_ULL(31)
+#define INTEL_PT_CFG_TNT_DIS	BIT_ULL(55)
+
+struct range {
+	u64 start;
+	u64 end;
+};
+
+struct intel_pt {
+	struct auxtrace auxtrace;
+	struct auxtrace_queues queues;
+	struct auxtrace_heap heap;
+	u32 auxtrace_type;
+	struct perf_session *session;
+	struct machine *machine;
+	struct evsel *switch_evsel;
+	struct thread *unknown_thread;
+	bool timeless_decoding;
+	bool sampling_mode;
+	bool snapshot_mode;
+	bool per_cpu_mmaps;
+	bool have_tsc;
+	bool data_queued;
+	bool est_tsc;
+	bool sync_switch;
+	bool sync_switch_not_supported;
+	bool mispred_all;
+	bool use_thread_stack;
+	bool callstack;
+	bool cap_event_trace;
+	bool have_guest_sideband;
+	unsigned int br_stack_sz;
+	unsigned int br_stack_sz_plus;
+	int have_sched_switch;
+	u32 pmu_type;
+	u64 kernel_start;
+	u64 switch_ip;
+	u64 ptss_ip;
+	u64 first_timestamp;
+
+	struct perf_tsc_conversion tc;
+	bool cap_user_time_zero;
+
+	struct itrace_synth_opts synth_opts;
+
+	bool sample_instructions;
+	u64 instructions_sample_type;
+	u64 instructions_id;
+
+	bool sample_branches;
+	u32 branches_filter;
+	u64 branches_sample_type;
+	u64 branches_id;
+
+	bool sample_transactions;
+	u64 transactions_sample_type;
+	u64 transactions_id;
+
+	bool sample_ptwrites;
+	u64 ptwrites_sample_type;
+	u64 ptwrites_id;
+
+	bool sample_pwr_events;
+	u64 pwr_events_sample_type;
+	u64 mwait_id;
+	u64 pwre_id;
+	u64 exstop_id;
+	u64 pwrx_id;
+	u64 cbr_id;
+	u64 psb_id;
+
+	bool single_pebs;
+	bool sample_pebs;
+	struct evsel *pebs_evsel;
+
+	u64 evt_sample_type;
+	u64 evt_id;
+
+	u64 iflag_chg_sample_type;
+	u64 iflag_chg_id;
+
+	u64 tsc_bit;
+	u64 mtc_bit;
+	u64 mtc_freq_bits;
+	u32 tsc_ctc_ratio_n;
+	u32 tsc_ctc_ratio_d;
+	u64 cyc_bit;
+	u64 noretcomp_bit;
+	unsigned max_non_turbo_ratio;
+	unsigned cbr2khz;
+	int max_loops;
+
+	unsigned long num_events;
+
+	char *filter;
+	struct addr_filters filts;
+
+	struct range *time_ranges;
+	unsigned int range_cnt;
+
+	struct ip_callchain *chain;
+	struct branch_stack *br_stack;
+
+	u64 dflt_tsc_offset;
+	struct rb_root vmcs_info;
+};
+
+enum switch_state {
+	INTEL_PT_SS_NOT_TRACING,
+	INTEL_PT_SS_UNKNOWN,
+	INTEL_PT_SS_TRACING,
+	INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
+	INTEL_PT_SS_EXPECTING_SWITCH_IP,
+};
+
+/* applicable_counters is 64-bits */
+#define INTEL_PT_MAX_PEBS 64
+
+struct intel_pt_pebs_event {
+	struct evsel *evsel;
+	u64 id;
+};
+
+struct intel_pt_queue {
+	struct intel_pt *pt;
+	unsigned int queue_nr;
+	struct auxtrace_buffer *buffer;
+	struct auxtrace_buffer *old_buffer;
+	void *decoder;
+	const struct intel_pt_state *state;
+	struct ip_callchain *chain;
+	struct branch_stack *last_branch;
+	union perf_event *event_buf;
+	bool on_heap;
+	bool stop;
+	bool step_through_buffers;
+	bool use_buffer_pid_tid;
+	bool sync_switch;
+	bool sample_ipc;
+	pid_t pid, tid;
+	int cpu;
+	int switch_state;
+	pid_t next_tid;
+	struct thread *thread;
+	struct machine *guest_machine;
+	struct thread *guest_thread;
+	struct thread *unknown_guest_thread;
+	pid_t guest_machine_pid;
+	pid_t guest_pid;
+	pid_t guest_tid;
+	int vcpu;
+	bool exclude_kernel;
+	bool have_sample;
+	u64 time;
+	u64 timestamp;
+	u64 sel_timestamp;
+	bool sel_start;
+	unsigned int sel_idx;
+	u32 flags;
+	u16 insn_len;
+	u64 last_insn_cnt;
+	u64 ipc_insn_cnt;
+	u64 ipc_cyc_cnt;
+	u64 last_in_insn_cnt;
+	u64 last_in_cyc_cnt;
+	u64 last_br_insn_cnt;
+	u64 last_br_cyc_cnt;
+	unsigned int cbr_seen;
+	char insn[INTEL_PT_INSN_BUF_SZ];
+	struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS];
+};
+
+static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
+			  unsigned char *buf, size_t len)
+{
+	struct intel_pt_pkt packet;
+	size_t pos = 0;
+	int ret, pkt_len, i;
+	char desc[INTEL_PT_PKT_DESC_MAX];
+	const char *color = PERF_COLOR_BLUE;
+	enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
+
+	color_fprintf(stdout, color,
+		      ". ... Intel Processor Trace data: size %zu bytes\n",
+		      len);
+
+	while (len) {
+		ret = intel_pt_get_packet(buf, len, &packet, &ctx);
+		if (ret > 0)
+			pkt_len = ret;
+		else
+			pkt_len = 1;
+		printf(".");
+		color_fprintf(stdout, color, "  %08x: ", pos);
+		for (i = 0; i < pkt_len; i++)
+			color_fprintf(stdout, color, " %02x", buf[i]);
+		for (; i < 16; i++)
+			color_fprintf(stdout, color, "   ");
+		if (ret > 0) {
+			ret = intel_pt_pkt_desc(&packet, desc,
+						INTEL_PT_PKT_DESC_MAX);
+			if (ret > 0)
+				color_fprintf(stdout, color, " %s\n", desc);
+		} else {
+			color_fprintf(stdout, color, " Bad packet!\n");
+		}
+		pos += pkt_len;
+		buf += pkt_len;
+		len -= pkt_len;
+	}
+}
+
+static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
+				size_t len)
+{
+	printf(".\n");
+	intel_pt_dump(pt, buf, len);
+}
+
+static void intel_pt_log_event(union perf_event *event)
+{
+	FILE *f = intel_pt_log_fp();
+
+	if (!intel_pt_enable_logging || !f)
+		return;
+
+	perf_event__fprintf(event, NULL, f);
+}
+
+static void intel_pt_dump_sample(struct perf_session *session,
+				 struct perf_sample *sample)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+
+	printf("\n");
+	intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
+}
+
+static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
+{
+	struct perf_time_interval *range = pt->synth_opts.ptime_range;
+	int n = pt->synth_opts.range_num;
+
+	if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+		return true;
+
+	if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+		return false;
+
+	/* perf_time__ranges_skip_sample does not work if time is zero */
+	if (!tm)
+		tm = 1;
+
+	return !n || !perf_time__ranges_skip_sample(range, n, tm);
+}
+
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root,
+							u64 vmcs,
+							u64 dflt_tsc_offset)
+{
+	struct rb_node **p = &rb_root->rb_node;
+	struct rb_node *parent = NULL;
+	struct intel_pt_vmcs_info *v;
+
+	while (*p) {
+		parent = *p;
+		v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node);
+
+		if (v->vmcs == vmcs)
+			return v;
+
+		if (vmcs < v->vmcs)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	v = zalloc(sizeof(*v));
+	if (v) {
+		v->vmcs = vmcs;
+		v->tsc_offset = dflt_tsc_offset;
+		v->reliable = dflt_tsc_offset;
+
+		rb_link_node(&v->rb_node, parent, p);
+		rb_insert_color(&v->rb_node, rb_root);
+	}
+
+	return v;
+}
+
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs)
+{
+	struct intel_pt_queue *ptq = data;
+	struct intel_pt *pt = ptq->pt;
+
+	if (!vmcs && !pt->dflt_tsc_offset)
+		return NULL;
+
+	return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset);
+}
+
+static void intel_pt_free_vmcs_info(struct intel_pt *pt)
+{
+	struct intel_pt_vmcs_info *v;
+	struct rb_node *n;
+
+	n = rb_first(&pt->vmcs_info);
+	while (n) {
+		v = rb_entry(n, struct intel_pt_vmcs_info, rb_node);
+		n = rb_next(n);
+		rb_erase(&v->rb_node, &pt->vmcs_info);
+		free(v);
+	}
+}
+
+static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
+				   struct auxtrace_buffer *b)
+{
+	bool consecutive = false;
+	void *start;
+
+	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
+				      pt->have_tsc, &consecutive,
+				      pt->synth_opts.vm_time_correlation);
+	if (!start)
+		return -EINVAL;
+	/*
+	 * In the case of vm_time_correlation, the overlap might contain TSC
+	 * packets that will not be fixed, and that will then no longer work for
+	 * overlap detection. Avoid that by zeroing out the overlap.
+	 */
+	if (pt->synth_opts.vm_time_correlation)
+		memset(b->data, 0, start - b->data);
+	b->use_size = b->data + b->size - start;
+	b->use_data = start;
+	if (b->use_size && consecutive)
+		b->consecutive = true;
+	return 0;
+}
+
+static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
+			       struct auxtrace_buffer *buffer,
+			       struct auxtrace_buffer *old_buffer,
+			       struct intel_pt_buffer *b)
+{
+	bool might_overlap;
+
+	if (!buffer->data) {
+		int fd = perf_data__fd(ptq->pt->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data)
+			return -ENOMEM;
+	}
+
+	might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
+	if (might_overlap && !buffer->consecutive && old_buffer &&
+	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
+		return -ENOMEM;
+
+	if (buffer->use_data) {
+		b->len = buffer->use_size;
+		b->buf = buffer->use_data;
+	} else {
+		b->len = buffer->size;
+		b->buf = buffer->data;
+	}
+	b->ref_timestamp = buffer->reference;
+
+	if (!old_buffer || (might_overlap && !buffer->consecutive)) {
+		b->consecutive = false;
+		b->trace_nr = buffer->buffer_nr + 1;
+	} else {
+		b->consecutive = true;
+	}
+
+	return 0;
+}
+
+/* Do not drop buffers with references - refer intel_pt_get_trace() */
+static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
+					   struct auxtrace_buffer *buffer)
+{
+	if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
+		return;
+
+	auxtrace_buffer__drop_data(buffer);
+}
+
+/* Must be serialized with respect to intel_pt_get_trace() */
+static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
+			      void *cb_data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct auxtrace_buffer *buffer = ptq->buffer;
+	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+	struct auxtrace_queue *queue;
+	int err = 0;
+
+	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+	while (1) {
+		struct intel_pt_buffer b = { .len = 0 };
+
+		buffer = auxtrace_buffer__next(queue, buffer);
+		if (!buffer)
+			break;
+
+		err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
+		if (err)
+			break;
+
+		if (b.len) {
+			intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+			old_buffer = buffer;
+		} else {
+			intel_pt_lookahead_drop_buffer(ptq, buffer);
+			continue;
+		}
+
+		err = cb(&b, cb_data);
+		if (err)
+			break;
+	}
+
+	if (buffer != old_buffer)
+		intel_pt_lookahead_drop_buffer(ptq, buffer);
+	intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+
+	return err;
+}
+
+/*
+ * This function assumes data is processed sequentially only.
+ * Must be serialized with respect to intel_pt_lookahead()
+ */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct auxtrace_buffer *buffer = ptq->buffer;
+	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+	struct auxtrace_queue *queue;
+	int err;
+
+	if (ptq->stop) {
+		b->len = 0;
+		return 0;
+	}
+
+	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+	buffer = auxtrace_buffer__next(queue, buffer);
+	if (!buffer) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		b->len = 0;
+		return 0;
+	}
+
+	ptq->buffer = buffer;
+
+	err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
+	if (err)
+		return err;
+
+	if (ptq->step_through_buffers)
+		ptq->stop = true;
+
+	if (b->len) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		ptq->old_buffer = buffer;
+	} else {
+		auxtrace_buffer__drop_data(buffer);
+		return intel_pt_get_trace(b, data);
+	}
+
+	return 0;
+}
+
+struct intel_pt_cache_entry {
+	struct auxtrace_cache_entry	entry;
+	u64				insn_cnt;
+	u64				byte_cnt;
+	enum intel_pt_insn_op		op;
+	enum intel_pt_insn_branch	branch;
+	bool				emulated_ptwrite;
+	int				length;
+	int32_t				rel;
+	char				insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+static int intel_pt_config_div(const char *var, const char *value, void *data)
+{
+	int *d = data;
+	long val;
+
+	if (!strcmp(var, "intel-pt.cache-divisor")) {
+		val = strtol(value, NULL, 0);
+		if (val > 0 && val <= INT_MAX)
+			*d = val;
+	}
+
+	return 0;
+}
+
+static int intel_pt_cache_divisor(void)
+{
+	static int d;
+
+	if (d)
+		return d;
+
+	perf_config(intel_pt_config_div, &d);
+
+	if (!d)
+		d = 64;
+
+	return d;
+}
+
+static unsigned int intel_pt_cache_size(struct dso *dso,
+					struct machine *machine)
+{
+	off_t size;
+
+	size = dso__data_size(dso, machine);
+	size /= intel_pt_cache_divisor();
+	if (size < 1000)
+		return 10;
+	if (size > (1 << 21))
+		return 21;
+	return 32 - __builtin_clz(size);
+}
+
+static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
+					     struct machine *machine)
+{
+	struct auxtrace_cache *c;
+	unsigned int bits;
+
+	if (dso->auxtrace_cache)
+		return dso->auxtrace_cache;
+
+	bits = intel_pt_cache_size(dso, machine);
+
+	/* Ignoring cache creation failure */
+	c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
+
+	dso->auxtrace_cache = c;
+
+	return c;
+}
+
+static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
+			      u64 offset, u64 insn_cnt, u64 byte_cnt,
+			      struct intel_pt_insn *intel_pt_insn)
+{
+	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+	struct intel_pt_cache_entry *e;
+	int err;
+
+	if (!c)
+		return -ENOMEM;
+
+	e = auxtrace_cache__alloc_entry(c);
+	if (!e)
+		return -ENOMEM;
+
+	e->insn_cnt = insn_cnt;
+	e->byte_cnt = byte_cnt;
+	e->op = intel_pt_insn->op;
+	e->branch = intel_pt_insn->branch;
+	e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite;
+	e->length = intel_pt_insn->length;
+	e->rel = intel_pt_insn->rel;
+	memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
+
+	err = auxtrace_cache__add(c, offset, &e->entry);
+	if (err)
+		auxtrace_cache__free_entry(c, e);
+
+	return err;
+}
+
+static struct intel_pt_cache_entry *
+intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
+{
+	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+	if (!c)
+		return NULL;
+
+	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
+}
+
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
+				      u64 offset)
+{
+	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+	if (!c)
+		return;
+
+	auxtrace_cache__remove(dso->auxtrace_cache, offset);
+}
+
+static inline bool intel_pt_guest_kernel_ip(uint64_t ip)
+{
+	/* Assumes 64-bit kernel */
+	return ip & (1ULL << 63);
+}
+
+static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr)
+{
+	if (nr) {
+		return intel_pt_guest_kernel_ip(ip) ?
+		       PERF_RECORD_MISC_GUEST_KERNEL :
+		       PERF_RECORD_MISC_GUEST_USER;
+	}
+
+	return ip >= ptq->pt->kernel_start ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
+static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip)
+{
+	/* No support for non-zero CS base */
+	if (from_ip)
+		return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr);
+	return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr);
+}
+
+static int intel_pt_get_guest(struct intel_pt_queue *ptq)
+{
+	struct machines *machines = &ptq->pt->session->machines;
+	struct machine *machine;
+	pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid;
+
+	if (ptq->guest_machine && pid == ptq->guest_machine->pid)
+		return 0;
+
+	ptq->guest_machine = NULL;
+	thread__zput(ptq->unknown_guest_thread);
+
+	if (symbol_conf.guest_code) {
+		thread__zput(ptq->guest_thread);
+		ptq->guest_thread = machines__findnew_guest_code(machines, pid);
+	}
+
+	machine = machines__find_guest(machines, pid);
+	if (!machine)
+		return -1;
+
+	ptq->unknown_guest_thread = machine__idle_thread(machine);
+	if (!ptq->unknown_guest_thread)
+		return -1;
+
+	ptq->guest_machine = machine;
+
+	return 0;
+}
+
+static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn)
+{
+	return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL;
+}
+
+#define PTWRITE_MAGIC		"\x0f\x0bperf,ptwrite  "
+#define PTWRITE_MAGIC_LEN	16
+
+static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset)
+{
+	unsigned char buf[PTWRITE_MAGIC_LEN];
+	ssize_t len;
+
+	len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN);
+	if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) {
+		intel_pt_log("Emulated ptwrite signature found\n");
+		return true;
+	}
+	intel_pt_log("Emulated ptwrite signature not found\n");
+	return false;
+}
+
+static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+				   uint64_t *insn_cnt_ptr, uint64_t *ip,
+				   uint64_t to_ip, uint64_t max_insn_cnt,
+				   void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct machine *machine = ptq->pt->machine;
+	struct thread *thread;
+	struct addr_location al;
+	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+	ssize_t len;
+	int x86_64;
+	u8 cpumode;
+	u64 offset, start_offset, start_ip;
+	u64 insn_cnt = 0;
+	bool one_map = true;
+	bool nr;
+
+	intel_pt_insn->length = 0;
+
+	if (to_ip && *ip == to_ip)
+		goto out_no_cache;
+
+	nr = ptq->state->to_nr;
+	cpumode = intel_pt_nr_cpumode(ptq, *ip, nr);
+
+	if (nr) {
+		if (ptq->pt->have_guest_sideband) {
+			if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) {
+				intel_pt_log("ERROR: guest sideband but no guest machine\n");
+				return -EINVAL;
+			}
+		} else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) ||
+			   intel_pt_get_guest(ptq)) {
+			intel_pt_log("ERROR: no guest machine\n");
+			return -EINVAL;
+		}
+		machine = ptq->guest_machine;
+		thread = ptq->guest_thread;
+		if (!thread) {
+			if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) {
+				intel_pt_log("ERROR: no guest thread\n");
+				return -EINVAL;
+			}
+			thread = ptq->unknown_guest_thread;
+		}
+	} else {
+		thread = ptq->thread;
+		if (!thread) {
+			if (cpumode != PERF_RECORD_MISC_KERNEL) {
+				intel_pt_log("ERROR: no thread\n");
+				return -EINVAL;
+			}
+			thread = ptq->pt->unknown_thread;
+		}
+	}
+
+	while (1) {
+		if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) {
+			if (al.map)
+				intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip);
+			else
+				intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip);
+			return -EINVAL;
+		}
+
+		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
+		    dso__data_status_seen(al.map->dso,
+					  DSO_DATA_STATUS_SEEN_ITRACE))
+			return -ENOENT;
+
+		offset = al.map->map_ip(al.map, *ip);
+
+		if (!to_ip && one_map) {
+			struct intel_pt_cache_entry *e;
+
+			e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+			if (e &&
+			    (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
+				*insn_cnt_ptr = e->insn_cnt;
+				*ip += e->byte_cnt;
+				intel_pt_insn->op = e->op;
+				intel_pt_insn->branch = e->branch;
+				intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite;
+				intel_pt_insn->length = e->length;
+				intel_pt_insn->rel = e->rel;
+				memcpy(intel_pt_insn->buf, e->insn,
+				       INTEL_PT_INSN_BUF_SZ);
+				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
+				return 0;
+			}
+		}
+
+		start_offset = offset;
+		start_ip = *ip;
+
+		/* Load maps to ensure dso->is_64_bit has been updated */
+		map__load(al.map);
+
+		x86_64 = al.map->dso->is_64_bit;
+
+		while (1) {
+			len = dso__data_read_offset(al.map->dso, machine,
+						    offset, buf,
+						    INTEL_PT_INSN_BUF_SZ);
+			if (len <= 0) {
+				intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ",
+					     offset);
+				if (intel_pt_enable_logging)
+					dso__fprintf(al.map->dso, intel_pt_log_fp());
+				return -EINVAL;
+			}
+
+			if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
+				return -EINVAL;
+
+			intel_pt_log_insn(intel_pt_insn, *ip);
+
+			insn_cnt += 1;
+
+			if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) {
+				bool eptw;
+				u64 offs;
+
+				if (!intel_pt_jmp_16(intel_pt_insn))
+					goto out;
+				/* Check for emulated ptwrite */
+				offs = offset + intel_pt_insn->length;
+				eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs);
+				intel_pt_insn->emulated_ptwrite = eptw;
+				goto out;
+			}
+
+			if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+				goto out_no_cache;
+
+			*ip += intel_pt_insn->length;
+
+			if (to_ip && *ip == to_ip) {
+				intel_pt_insn->length = 0;
+				goto out_no_cache;
+			}
+
+			if (*ip >= al.map->end)
+				break;
+
+			offset += intel_pt_insn->length;
+		}
+		one_map = false;
+	}
+out:
+	*insn_cnt_ptr = insn_cnt;
+
+	if (!one_map)
+		goto out_no_cache;
+
+	/*
+	 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
+	 * entries.
+	 */
+	if (to_ip) {
+		struct intel_pt_cache_entry *e;
+
+		e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
+		if (e)
+			return 0;
+	}
+
+	/* Ignore cache errors */
+	intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
+			   *ip - start_ip, intel_pt_insn);
+
+	return 0;
+
+out_no_cache:
+	*insn_cnt_ptr = insn_cnt;
+	return 0;
+}
+
+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+				  uint64_t offset, const char *filename)
+{
+	struct addr_filter *filt;
+	bool have_filter   = false;
+	bool hit_tracestop = false;
+	bool hit_filter    = false;
+
+	list_for_each_entry(filt, &pt->filts.head, list) {
+		if (filt->start)
+			have_filter = true;
+
+		if ((filename && !filt->filename) ||
+		    (!filename && filt->filename) ||
+		    (filename && strcmp(filename, filt->filename)))
+			continue;
+
+		if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+			continue;
+
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+			     ip, offset, filename ? filename : "[kernel]",
+			     filt->start ? "filter" : "stop",
+			     filt->addr, filt->size);
+
+		if (filt->start)
+			hit_filter = true;
+		else
+			hit_tracestop = true;
+	}
+
+	if (!hit_tracestop && !hit_filter)
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+			     ip, offset, filename ? filename : "[kernel]");
+
+	return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct thread *thread;
+	struct addr_location al;
+	u8 cpumode;
+	u64 offset;
+
+	if (ptq->state->to_nr) {
+		if (intel_pt_guest_kernel_ip(ip))
+			return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+		/* No support for decoding guest user space */
+		return -EINVAL;
+	} else if (ip >= ptq->pt->kernel_start) {
+		return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+	}
+
+	cpumode = PERF_RECORD_MISC_USER;
+
+	thread = ptq->thread;
+	if (!thread)
+		return -EINVAL;
+
+	if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
+		return -EINVAL;
+
+	offset = al.map->map_ip(al.map, ip);
+
+	return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
+				     al.map->dso->long_name);
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
+static bool intel_pt_get_config(struct intel_pt *pt,
+				struct perf_event_attr *attr, u64 *config)
+{
+	if (attr->type == pt->pmu_type) {
+		if (config)
+			*config = attr->config;
+		return true;
+	}
+
+	return false;
+}
+
+static bool intel_pt_exclude_kernel(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
+		    !evsel->core.attr.exclude_kernel)
+			return false;
+	}
+	return true;
+}
+
+static bool intel_pt_return_compression(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	u64 config;
+
+	if (!pt->noretcomp_bit)
+		return true;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+		    (config & pt->noretcomp_bit))
+			return false;
+	}
+	return true;
+}
+
+static bool intel_pt_branch_enable(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	u64 config;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+		    (config & INTEL_PT_CFG_PASS_THRU) &&
+		    !(config & INTEL_PT_CFG_BRANCH_EN))
+			return false;
+	}
+	return true;
+}
+
+static bool intel_pt_disabled_tnt(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	u64 config;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+		    config & INTEL_PT_CFG_TNT_DIS)
+			return true;
+	}
+	return false;
+}
+
+static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	unsigned int shift;
+	u64 config;
+
+	if (!pt->mtc_freq_bits)
+		return 0;
+
+	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
+		config >>= 1;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config))
+			return (config & pt->mtc_freq_bits) >> shift;
+	}
+	return 0;
+}
+
+static bool intel_pt_timeless_decoding(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	bool timeless_decoding = true;
+	u64 config;
+
+	if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding)
+		return true;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+			return true;
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
+			if (config & pt->tsc_bit)
+				timeless_decoding = false;
+			else
+				return true;
+		}
+	}
+	return timeless_decoding;
+}
+
+static bool intel_pt_tracing_kernel(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
+		    !evsel->core.attr.exclude_kernel)
+			return true;
+	}
+	return false;
+}
+
+static bool intel_pt_have_tsc(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	bool have_tsc = false;
+	u64 config;
+
+	if (!pt->tsc_bit)
+		return false;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
+			if (config & pt->tsc_bit)
+				have_tsc = true;
+			else
+				return false;
+		}
+	}
+	return have_tsc;
+}
+
+static bool intel_pt_have_mtc(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	u64 config;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+		    (config & pt->mtc_bit))
+			return true;
+	}
+	return false;
+}
+
+static bool intel_pt_sampling_mode(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) &&
+		    evsel->core.attr.aux_sample_size)
+			return true;
+	}
+	return false;
+}
+
+static u64 intel_pt_ctl(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+	u64 config;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->core.attr, &config))
+			return config;
+	}
+	return 0;
+}
+
+static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
+{
+	u64 quot, rem;
+
+	quot = ns / pt->tc.time_mult;
+	rem  = ns % pt->tc.time_mult;
+	return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
+		pt->tc.time_mult;
+}
+
+static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
+{
+	size_t sz = sizeof(struct ip_callchain);
+
+	/* Add 1 to callchain_sz for callchain context */
+	sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
+	return zalloc(sz);
+}
+
+static int intel_pt_callchain_init(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+			evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
+	}
+
+	pt->chain = intel_pt_alloc_chain(pt);
+	if (!pt->chain)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void intel_pt_add_callchain(struct intel_pt *pt,
+				   struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(pt->machine,
+							sample->pid,
+							sample->tid);
+
+	thread_stack__sample_late(thread, sample->cpu, pt->chain,
+				  pt->synth_opts.callchain_sz + 1, sample->ip,
+				  pt->kernel_start);
+
+	sample->callchain = pt->chain;
+}
+
+static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt)
+{
+	size_t sz = sizeof(struct branch_stack);
+
+	sz += entry_cnt * sizeof(struct branch_entry);
+	return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+			evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+	}
+
+	pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz);
+	if (!pt->br_stack)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+				  struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(pt->machine,
+							sample->pid,
+							sample->tid);
+
+	thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+				     pt->br_stack_sz, sample->ip,
+				     pt->kernel_start);
+
+	sample->branch_stack = pt->br_stack;
+}
+
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
+
+static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
+						   unsigned int queue_nr)
+{
+	struct intel_pt_params params = { .get_trace = 0, };
+	struct perf_env *env = pt->machine->env;
+	struct intel_pt_queue *ptq;
+
+	ptq = zalloc(sizeof(struct intel_pt_queue));
+	if (!ptq)
+		return NULL;
+
+	if (pt->synth_opts.callchain) {
+		ptq->chain = intel_pt_alloc_chain(pt);
+		if (!ptq->chain)
+			goto out_free;
+	}
+
+	if (pt->synth_opts.last_branch || pt->synth_opts.other_events) {
+		unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz);
+
+		ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt);
+		if (!ptq->last_branch)
+			goto out_free;
+	}
+
+	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+	if (!ptq->event_buf)
+		goto out_free;
+
+	ptq->pt = pt;
+	ptq->queue_nr = queue_nr;
+	ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
+	ptq->pid = -1;
+	ptq->tid = -1;
+	ptq->cpu = -1;
+	ptq->next_tid = -1;
+
+	params.get_trace = intel_pt_get_trace;
+	params.walk_insn = intel_pt_walk_next_insn;
+	params.lookahead = intel_pt_lookahead;
+	params.findnew_vmcs_info = intel_pt_findnew_vmcs_info;
+	params.data = ptq;
+	params.return_compression = intel_pt_return_compression(pt);
+	params.branch_enable = intel_pt_branch_enable(pt);
+	params.ctl = intel_pt_ctl(pt);
+	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
+	params.mtc_period = intel_pt_mtc_period(pt);
+	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
+	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+	params.quick = pt->synth_opts.quick;
+	params.vm_time_correlation = pt->synth_opts.vm_time_correlation;
+	params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run;
+	params.first_timestamp = pt->first_timestamp;
+	params.max_loops = pt->max_loops;
+
+	/* Cannot walk code without TNT, so force 'quick' mode */
+	if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick)
+		params.quick = 1;
+
+	if (pt->filts.cnt > 0)
+		params.pgd_ip = intel_pt_pgd_ip;
+
+	if (pt->synth_opts.instructions) {
+		if (pt->synth_opts.period) {
+			switch (pt->synth_opts.period_type) {
+			case PERF_ITRACE_PERIOD_INSTRUCTIONS:
+				params.period_type =
+						INTEL_PT_PERIOD_INSTRUCTIONS;
+				params.period = pt->synth_opts.period;
+				break;
+			case PERF_ITRACE_PERIOD_TICKS:
+				params.period_type = INTEL_PT_PERIOD_TICKS;
+				params.period = pt->synth_opts.period;
+				break;
+			case PERF_ITRACE_PERIOD_NANOSECS:
+				params.period_type = INTEL_PT_PERIOD_TICKS;
+				params.period = intel_pt_ns_to_ticks(pt,
+							pt->synth_opts.period);
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (!params.period) {
+			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
+			params.period = 1;
+		}
+	}
+
+	if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
+		params.flags |= INTEL_PT_FUP_WITH_NLIP;
+
+	ptq->decoder = intel_pt_decoder_new(&params);
+	if (!ptq->decoder)
+		goto out_free;
+
+	return ptq;
+
+out_free:
+	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->chain);
+	free(ptq);
+	return NULL;
+}
+
+static void intel_pt_free_queue(void *priv)
+{
+	struct intel_pt_queue *ptq = priv;
+
+	if (!ptq)
+		return;
+	thread__zput(ptq->thread);
+	thread__zput(ptq->guest_thread);
+	thread__zput(ptq->unknown_guest_thread);
+	intel_pt_decoder_free(ptq->decoder);
+	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->chain);
+	free(ptq);
+}
+
+static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp)
+{
+	unsigned int i;
+
+	pt->first_timestamp = timestamp;
+
+	for (i = 0; i < pt->queues.nr_queues; i++) {
+		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+		struct intel_pt_queue *ptq = queue->priv;
+
+		if (ptq && ptq->decoder)
+			intel_pt_set_first_timestamp(ptq->decoder, timestamp);
+	}
+}
+
+static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq)
+{
+	struct machines *machines = &ptq->pt->session->machines;
+	struct machine *machine;
+	pid_t machine_pid = ptq->pid;
+	pid_t tid;
+	int vcpu;
+
+	if (machine_pid <= 0)
+		return 0; /* Not a guest machine */
+
+	machine = machines__find(machines, machine_pid);
+	if (!machine)
+		return 0; /* Not a guest machine */
+
+	if (ptq->guest_machine != machine) {
+		ptq->guest_machine = NULL;
+		thread__zput(ptq->guest_thread);
+		thread__zput(ptq->unknown_guest_thread);
+
+		ptq->unknown_guest_thread = machine__find_thread(machine, 0, 0);
+		if (!ptq->unknown_guest_thread)
+			return -1;
+		ptq->guest_machine = machine;
+	}
+
+	vcpu = ptq->thread ? ptq->thread->guest_cpu : -1;
+	if (vcpu < 0)
+		return -1;
+
+	tid = machine__get_current_tid(machine, vcpu);
+
+	if (ptq->guest_thread && ptq->guest_thread->tid != tid)
+		thread__zput(ptq->guest_thread);
+
+	if (!ptq->guest_thread) {
+		ptq->guest_thread = machine__find_thread(machine, -1, tid);
+		if (!ptq->guest_thread)
+			return -1;
+	}
+
+	ptq->guest_machine_pid = machine_pid;
+	ptq->guest_pid = ptq->guest_thread->pid_;
+	ptq->guest_tid = tid;
+	ptq->vcpu = vcpu;
+
+	return 0;
+}
+
+static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
+				     struct auxtrace_queue *queue)
+{
+	struct intel_pt_queue *ptq = queue->priv;
+
+	if (queue->tid == -1 || pt->have_sched_switch) {
+		ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
+		if (ptq->tid == -1)
+			ptq->pid = -1;
+		thread__zput(ptq->thread);
+	}
+
+	if (!ptq->thread && ptq->tid != -1)
+		ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
+
+	if (ptq->thread) {
+		ptq->pid = ptq->thread->pid_;
+		if (queue->cpu == -1)
+			ptq->cpu = ptq->thread->cpu;
+	}
+
+	if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) {
+		ptq->guest_machine_pid = 0;
+		ptq->guest_pid = -1;
+		ptq->guest_tid = -1;
+		ptq->vcpu = -1;
+	}
+}
+
+static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+
+	ptq->insn_len = 0;
+	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
+		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
+	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
+		if (!ptq->state->to_ip)
+			ptq->flags = PERF_IP_FLAG_BRANCH |
+				     PERF_IP_FLAG_ASYNC |
+				     PERF_IP_FLAG_TRACE_END;
+		else if (ptq->state->from_nr && !ptq->state->to_nr)
+			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+				     PERF_IP_FLAG_ASYNC |
+				     PERF_IP_FLAG_VMEXIT;
+		else
+			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+				     PERF_IP_FLAG_ASYNC |
+				     PERF_IP_FLAG_INTERRUPT;
+	} else {
+		if (ptq->state->from_ip)
+			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
+		else
+			ptq->flags = PERF_IP_FLAG_BRANCH |
+				     PERF_IP_FLAG_TRACE_BEGIN;
+		if (ptq->state->flags & INTEL_PT_IN_TX)
+			ptq->flags |= PERF_IP_FLAG_IN_TX;
+		ptq->insn_len = ptq->state->insn_len;
+		memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
+	}
+
+	if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
+		ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
+	if (ptq->state->type & INTEL_PT_TRACE_END)
+		ptq->flags |= PERF_IP_FLAG_TRACE_END;
+
+	if (pt->cap_event_trace) {
+		if (ptq->state->type & INTEL_PT_IFLAG_CHG) {
+			if (!ptq->state->from_iflag)
+				ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+			if (ptq->state->from_iflag != ptq->state->to_iflag)
+				ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE;
+		} else if (!ptq->state->to_iflag) {
+			ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+		}
+	}
+}
+
+static void intel_pt_setup_time_range(struct intel_pt *pt,
+				      struct intel_pt_queue *ptq)
+{
+	if (!pt->range_cnt)
+		return;
+
+	ptq->sel_timestamp = pt->time_ranges[0].start;
+	ptq->sel_idx = 0;
+
+	if (ptq->sel_timestamp) {
+		ptq->sel_start = true;
+	} else {
+		ptq->sel_timestamp = pt->time_ranges[0].end;
+		ptq->sel_start = false;
+	}
+}
+
+static int intel_pt_setup_queue(struct intel_pt *pt,
+				struct auxtrace_queue *queue,
+				unsigned int queue_nr)
+{
+	struct intel_pt_queue *ptq = queue->priv;
+
+	if (list_empty(&queue->head))
+		return 0;
+
+	if (!ptq) {
+		ptq = intel_pt_alloc_queue(pt, queue_nr);
+		if (!ptq)
+			return -ENOMEM;
+		queue->priv = ptq;
+
+		if (queue->cpu != -1)
+			ptq->cpu = queue->cpu;
+		ptq->tid = queue->tid;
+
+		ptq->cbr_seen = UINT_MAX;
+
+		if (pt->sampling_mode && !pt->snapshot_mode &&
+		    pt->timeless_decoding)
+			ptq->step_through_buffers = true;
+
+		ptq->sync_switch = pt->sync_switch;
+
+		intel_pt_setup_time_range(pt, ptq);
+	}
+
+	if (!ptq->on_heap &&
+	    (!ptq->sync_switch ||
+	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
+		const struct intel_pt_state *state;
+		int ret;
+
+		if (pt->timeless_decoding)
+			return 0;
+
+		intel_pt_log("queue %u getting timestamp\n", queue_nr);
+		intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+			     queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+		if (ptq->sel_start && ptq->sel_timestamp) {
+			ret = intel_pt_fast_forward(ptq->decoder,
+						    ptq->sel_timestamp);
+			if (ret)
+				return ret;
+		}
+
+		while (1) {
+			state = intel_pt_decode(ptq->decoder);
+			if (state->err) {
+				if (state->err == INTEL_PT_ERR_NODATA) {
+					intel_pt_log("queue %u has no timestamp\n",
+						     queue_nr);
+					return 0;
+				}
+				continue;
+			}
+			if (state->timestamp)
+				break;
+		}
+
+		ptq->timestamp = state->timestamp;
+		intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
+			     queue_nr, ptq->timestamp);
+		ptq->state = state;
+		ptq->have_sample = true;
+		if (ptq->sel_start && ptq->sel_timestamp &&
+		    ptq->timestamp < ptq->sel_timestamp)
+			ptq->have_sample = false;
+		intel_pt_sample_flags(ptq);
+		ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
+		if (ret)
+			return ret;
+		ptq->on_heap = true;
+	}
+
+	return 0;
+}
+
+static int intel_pt_setup_queues(struct intel_pt *pt)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < pt->queues.nr_queues; i++) {
+		ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static inline bool intel_pt_skip_event(struct intel_pt *pt)
+{
+	return pt->synth_opts.initial_skip &&
+	       pt->num_events++ < pt->synth_opts.initial_skip;
+}
+
+/*
+ * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
+ * Also ensure CBR is first non-skipped event by allowing for 4 more samples
+ * from this decoder state.
+ */
+static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt)
+{
+	return pt->synth_opts.initial_skip &&
+	       pt->num_events + 4 < pt->synth_opts.initial_skip;
+}
+
+static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	sample->pid = ptq->pid;
+	sample->tid = ptq->tid;
+
+	if (ptq->pt->have_guest_sideband) {
+		if ((ptq->state->from_ip && ptq->state->from_nr) ||
+		    (ptq->state->to_ip && ptq->state->to_nr)) {
+			sample->pid = ptq->guest_pid;
+			sample->tid = ptq->guest_tid;
+			sample->machine_pid = ptq->guest_machine_pid;
+			sample->vcpu = ptq->vcpu;
+		}
+	}
+
+	sample->cpu = ptq->cpu;
+	sample->insn_len = ptq->insn_len;
+	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
+static void intel_pt_prep_b_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	intel_pt_prep_a_sample(ptq, event, sample);
+
+	if (!pt->timeless_decoding)
+		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+	sample->ip = ptq->state->from_ip;
+	sample->addr = ptq->state->to_ip;
+	sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr);
+	sample->period = 1;
+	sample->flags = ptq->flags;
+
+	event->sample.header.misc = sample->cpumode;
+}
+
+static int intel_pt_inject_event(union perf_event *event,
+				 struct perf_sample *sample, u64 type)
+{
+	event->header.size = perf_event__sample_event_size(sample, type, 0);
+	return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+static inline int intel_pt_opt_inject(struct intel_pt *pt,
+				      union perf_event *event,
+				      struct perf_sample *sample, u64 type)
+{
+	if (!pt->synth_opts.inject)
+		return 0;
+
+	return intel_pt_inject_event(event, sample, type);
+}
+
+static int intel_pt_deliver_synth_event(struct intel_pt *pt,
+					union perf_event *event,
+					struct perf_sample *sample, u64 type)
+{
+	int ret;
+
+	ret = intel_pt_opt_inject(pt, event, sample, type);
+	if (ret)
+		return ret;
+
+	ret = perf_session__deliver_synth_event(pt->session, event, sample);
+	if (ret)
+		pr_err("Intel PT: failed to deliver event, error %d\n", ret);
+
+	return ret;
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct dummy_branch_stack {
+		u64			nr;
+		u64			hw_idx;
+		struct branch_entry	entries;
+	} dummy_bs;
+
+	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
+		return 0;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_b_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->branches_id;
+	sample.stream_id = ptq->pt->branches_id;
+
+	/*
+	 * perf report cannot handle events without a branch stack when using
+	 * SORT_MODE__BRANCH so make a dummy one.
+	 */
+	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
+		dummy_bs = (struct dummy_branch_stack){
+			.nr = 1,
+			.hw_idx = -1ULL,
+			.entries = {
+				.from = sample.ip,
+				.to = sample.addr,
+			},
+		};
+		sample.branch_stack = (struct branch_stack *)&dummy_bs;
+	}
+
+	if (ptq->sample_ipc)
+		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+	if (sample.cyc_cnt) {
+		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
+		ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
+		ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
+	}
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->branches_sample_type);
+}
+
+static void intel_pt_prep_sample(struct intel_pt *pt,
+				 struct intel_pt_queue *ptq,
+				 union perf_event *event,
+				 struct perf_sample *sample)
+{
+	intel_pt_prep_b_sample(pt, ptq, event, sample);
+
+	if (pt->synth_opts.callchain) {
+		thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+				     pt->synth_opts.callchain_sz + 1,
+				     sample->ip, pt->kernel_start);
+		sample->callchain = ptq->chain;
+	}
+
+	if (pt->synth_opts.last_branch) {
+		thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch,
+					pt->br_stack_sz);
+		sample->branch_stack = ptq->last_branch;
+	}
+}
+
+static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->instructions_id;
+	sample.stream_id = ptq->pt->instructions_id;
+	if (pt->synth_opts.quick)
+		sample.period = 1;
+	else
+		sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+
+	if (ptq->sample_ipc)
+		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+	if (sample.cyc_cnt) {
+		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
+		ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
+		ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
+	}
+
+	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->instructions_sample_type);
+}
+
+static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->transactions_id;
+	sample.stream_id = ptq->pt->transactions_id;
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->transactions_sample_type);
+}
+
+static void intel_pt_prep_p_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	intel_pt_prep_sample(pt, ptq, event, sample);
+
+	/*
+	 * Zero IP is used to mean "trace start" but that is not the case for
+	 * power or PTWRITE events with no IP, so clear the flags.
+	 */
+	if (!sample->ip)
+		sample->flags = 0;
+}
+
+static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_ptwrite raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->ptwrites_id;
+	sample.stream_id = ptq->pt->ptwrites_id;
+
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+	raw.payload = cpu_to_le64(ptq->state->ptw_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->ptwrites_sample_type);
+}
+
+static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_cbr raw;
+	u32 flags;
+
+	if (intel_pt_skip_cbr_event(pt))
+		return 0;
+
+	ptq->cbr_seen = ptq->state->cbr;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->cbr_id;
+	sample.stream_id = ptq->pt->cbr_id;
+
+	flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
+	raw.flags = cpu_to_le32(flags);
+	raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
+	raw.reserved3 = 0;
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_psb raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->psb_id;
+	sample.stream_id = ptq->pt->psb_id;
+	sample.flags = 0;
+
+	raw.reserved = 0;
+	raw.offset = ptq->state->psb_offset;
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_mwait raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->mwait_id;
+	sample.stream_id = ptq->pt->mwait_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->mwait_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwre raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwre_id;
+	sample.stream_id = ptq->pt->pwre_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwre_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_exstop raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->exstop_id;
+	sample.stream_id = ptq->pt->exstop_id;
+
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwrx raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwrx_id;
+	sample.stream_id = ptq->pt->pwrx_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+/*
+ * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
+ * intel_pt_add_gp_regs().
+ */
+static const int pebs_gp_regs[] = {
+	[PERF_REG_X86_FLAGS]	= 1,
+	[PERF_REG_X86_IP]	= 2,
+	[PERF_REG_X86_AX]	= 3,
+	[PERF_REG_X86_CX]	= 4,
+	[PERF_REG_X86_DX]	= 5,
+	[PERF_REG_X86_BX]	= 6,
+	[PERF_REG_X86_SP]	= 7,
+	[PERF_REG_X86_BP]	= 8,
+	[PERF_REG_X86_SI]	= 9,
+	[PERF_REG_X86_DI]	= 10,
+	[PERF_REG_X86_R8]	= 11,
+	[PERF_REG_X86_R9]	= 12,
+	[PERF_REG_X86_R10]	= 13,
+	[PERF_REG_X86_R11]	= 14,
+	[PERF_REG_X86_R12]	= 15,
+	[PERF_REG_X86_R13]	= 16,
+	[PERF_REG_X86_R14]	= 17,
+	[PERF_REG_X86_R15]	= 18,
+};
+
+static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
+				 const struct intel_pt_blk_items *items,
+				 u64 regs_mask)
+{
+	const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
+	u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
+	u32 bit;
+	int i;
+
+	for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
+		/* Get the PEBS gp_regs array index */
+		int n = pebs_gp_regs[i] - 1;
+
+		if (n < 0)
+			continue;
+		/*
+		 * Add only registers that were requested (i.e. 'regs_mask') and
+		 * that were provided (i.e. 'mask'), and update the resulting
+		 * mask (i.e. 'intr_regs->mask') accordingly.
+		 */
+		if (mask & 1 << n && regs_mask & bit) {
+			intr_regs->mask |= bit;
+			*pos++ = gp_regs[n];
+		}
+	}
+
+	return pos;
+}
+
+#ifndef PERF_REG_X86_XMM0
+#define PERF_REG_X86_XMM0 32
+#endif
+
+static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
+			     const struct intel_pt_blk_items *items,
+			     u64 regs_mask)
+{
+	u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
+	const u64 *xmm = items->xmm;
+
+	/*
+	 * If there are any XMM registers, then there should be all of them.
+	 * Nevertheless, follow the logic to add only registers that were
+	 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
+	 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
+	 */
+	intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
+
+	for (; mask; mask >>= 1, xmm++) {
+		if (mask & 1)
+			*pos++ = *xmm;
+	}
+}
+
+#define LBR_INFO_MISPRED	(1ULL << 63)
+#define LBR_INFO_IN_TX		(1ULL << 62)
+#define LBR_INFO_ABORT		(1ULL << 61)
+#define LBR_INFO_CYCLES		0xffff
+
+/* Refer kernel's intel_pmu_store_pebs_lbrs() */
+static u64 intel_pt_lbr_flags(u64 info)
+{
+	union {
+		struct branch_flags flags;
+		u64 result;
+	} u;
+
+	u.result	  = 0;
+	u.flags.mispred	  = !!(info & LBR_INFO_MISPRED);
+	u.flags.predicted = !(info & LBR_INFO_MISPRED);
+	u.flags.in_tx	  = !!(info & LBR_INFO_IN_TX);
+	u.flags.abort	  = !!(info & LBR_INFO_ABORT);
+	u.flags.cycles	  = info & LBR_INFO_CYCLES;
+
+	return u.result;
+}
+
+static void intel_pt_add_lbrs(struct branch_stack *br_stack,
+			      const struct intel_pt_blk_items *items)
+{
+	u64 *to;
+	int i;
+
+	br_stack->nr = 0;
+
+	to = &br_stack->entries[0].from;
+
+	for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
+		u32 mask = items->mask[i];
+		const u64 *from = items->val[i];
+
+		for (; mask; mask >>= 3, from += 3) {
+			if ((mask & 7) == 7) {
+				*to++ = from[0];
+				*to++ = from[1];
+				*to++ = intel_pt_lbr_flags(from[2]);
+				br_stack->nr += 1;
+			}
+		}
+	}
+}
+
+static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
+{
+	const struct intel_pt_blk_items *items = &ptq->state->items;
+	struct perf_sample sample = { .ip = 0, };
+	union perf_event *event = ptq->event_buf;
+	struct intel_pt *pt = ptq->pt;
+	u64 sample_type = evsel->core.attr.sample_type;
+	u8 cpumode;
+	u64 regs[8 * sizeof(sample.intr_regs.mask)];
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_a_sample(ptq, event, &sample);
+
+	sample.id = id;
+	sample.stream_id = id;
+
+	if (!evsel->core.attr.freq)
+		sample.period = evsel->core.attr.sample_period;
+
+	/* No support for non-zero CS base */
+	if (items->has_ip)
+		sample.ip = items->ip;
+	else if (items->has_rip)
+		sample.ip = items->rip;
+	else
+		sample.ip = ptq->state->from_ip;
+
+	cpumode = intel_pt_cpumode(ptq, sample.ip, 0);
+
+	event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
+
+	sample.cpumode = cpumode;
+
+	if (sample_type & PERF_SAMPLE_TIME) {
+		u64 timestamp = 0;
+
+		if (items->has_timestamp)
+			timestamp = items->timestamp;
+		else if (!pt->timeless_decoding)
+			timestamp = ptq->timestamp;
+		if (timestamp)
+			sample.time = tsc_to_perf_time(timestamp, &pt->tc);
+	}
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN &&
+	    pt->synth_opts.callchain) {
+		thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+				     pt->synth_opts.callchain_sz, sample.ip,
+				     pt->kernel_start);
+		sample.callchain = ptq->chain;
+	}
+
+	if (sample_type & PERF_SAMPLE_REGS_INTR &&
+	    (items->mask[INTEL_PT_GP_REGS_POS] ||
+	     items->mask[INTEL_PT_XMM_POS])) {
+		u64 regs_mask = evsel->core.attr.sample_regs_intr;
+		u64 *pos;
+
+		sample.intr_regs.abi = items->is_32_bit ?
+				       PERF_SAMPLE_REGS_ABI_32 :
+				       PERF_SAMPLE_REGS_ABI_64;
+		sample.intr_regs.regs = regs;
+
+		pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+
+		intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+	}
+
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		if (items->mask[INTEL_PT_LBR_0_POS] ||
+		    items->mask[INTEL_PT_LBR_1_POS] ||
+		    items->mask[INTEL_PT_LBR_2_POS]) {
+			intel_pt_add_lbrs(ptq->last_branch, items);
+		} else if (pt->synth_opts.last_branch) {
+			thread_stack__br_sample(ptq->thread, ptq->cpu,
+						ptq->last_branch,
+						pt->br_stack_sz);
+		} else {
+			ptq->last_branch->nr = 0;
+		}
+		sample.branch_stack = ptq->last_branch;
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
+		sample.addr = items->mem_access_address;
+
+	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+		/*
+		 * Refer kernel's setup_pebs_adaptive_sample_data() and
+		 * intel_hsw_weight().
+		 */
+		if (items->has_mem_access_latency) {
+			u64 weight = items->mem_access_latency >> 32;
+
+			/*
+			 * Starts from SPR, the mem access latency field
+			 * contains both cache latency [47:32] and instruction
+			 * latency [15:0]. The cache latency is the same as the
+			 * mem access latency on previous platforms.
+			 *
+			 * In practice, no memory access could last than 4G
+			 * cycles. Use latency >> 32 to distinguish the
+			 * different format of the mem access latency field.
+			 */
+			if (weight > 0) {
+				sample.weight = weight & 0xffff;
+				sample.ins_lat = items->mem_access_latency & 0xffff;
+			} else
+				sample.weight = items->mem_access_latency;
+		}
+		if (!sample.weight && items->has_tsx_aux_info) {
+			/* Cycles last block */
+			sample.weight = (u32)items->tsx_aux_info;
+		}
+	}
+
+	if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
+		u64 ax = items->has_rax ? items->rax : 0;
+		/* Refer kernel's intel_hsw_transaction() */
+		u64 txn = (u8)(items->tsx_aux_info >> 32);
+
+		/* For RTM XABORTs also log the abort code from AX */
+		if (txn & PERF_TXN_TRANSACTION && ax & 1)
+			txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+		sample.transaction = txn;
+	}
+
+	return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+}
+
+static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	struct evsel *evsel = pt->pebs_evsel;
+	u64 id = evsel->core.id[0];
+
+	return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
+}
+
+static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
+{
+	const struct intel_pt_blk_items *items = &ptq->state->items;
+	struct intel_pt_pebs_event *pe;
+	struct intel_pt *pt = ptq->pt;
+	int err = -EINVAL;
+	int hw_id;
+
+	if (!items->has_applicable_counters || !items->applicable_counters) {
+		if (!pt->single_pebs)
+			pr_err("PEBS-via-PT record with no applicable_counters\n");
+		return intel_pt_synth_single_pebs_sample(ptq);
+	}
+
+	for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) {
+		pe = &ptq->pebs[hw_id];
+		if (!pe->evsel) {
+			if (!pt->single_pebs)
+				pr_err("PEBS-via-PT record with no matching event, hw_id %d\n",
+				       hw_id);
+			return intel_pt_synth_single_pebs_sample(ptq);
+		}
+		err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct {
+		struct perf_synth_intel_evt cfe;
+		struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
+	} raw;
+	int i;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id        = ptq->pt->evt_id;
+	sample.stream_id = ptq->pt->evt_id;
+
+	raw.cfe.type     = ptq->state->cfe_type;
+	raw.cfe.reserved = 0;
+	raw.cfe.ip       = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+	raw.cfe.vector   = ptq->state->cfe_vector;
+	raw.cfe.evd_cnt  = ptq->state->evd_cnt;
+
+	for (i = 0; i < ptq->state->evd_cnt; i++) {
+		raw.evd[i].et       = 0;
+		raw.evd[i].evd_type = ptq->state->evd[i].type;
+		raw.evd[i].payload  = ptq->state->evd[i].payload;
+	}
+
+	sample.raw_size = perf_synth__raw_size(raw) +
+			  ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->evt_sample_type);
+}
+
+static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_iflag_chg raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->iflag_chg_id;
+	sample.stream_id = ptq->pt->iflag_chg_id;
+
+	raw.flags = 0;
+	raw.iflag = ptq->state->to_iflag;
+
+	if (ptq->state->type & INTEL_PT_BRANCH) {
+		raw.via_branch = 1;
+		raw.branch_ip = ptq->state->to_ip;
+	} else {
+		sample.addr = 0;
+	}
+	sample.flags = ptq->flags;
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->iflag_chg_sample_type);
+}
+
+static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
+				pid_t pid, pid_t tid, u64 ip, u64 timestamp,
+				pid_t machine_pid, int vcpu)
+{
+	bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+	bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT;
+	union perf_event event;
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+	int err;
+
+	if (pt->synth_opts.error_minus_flags) {
+		if (code == INTEL_PT_ERR_OVR &&
+		    pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
+			return 0;
+		if (code == INTEL_PT_ERR_LOST &&
+		    pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
+			return 0;
+	}
+
+	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
+
+	auxtrace_synth_guest_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+				   code, cpu, pid, tid, ip, msg, timestamp,
+				   machine_pid, vcpu);
+
+	if (intel_pt_enable_logging && !log_on_stdout) {
+		FILE *fp = intel_pt_log_fp();
+
+		if (fp)
+			perf_event__fprintf_auxtrace_error(&event, fp);
+	}
+
+	if (code != INTEL_PT_ERR_LOST && dump_log_on_error)
+		intel_pt_log_dump_buf();
+
+	err = perf_session__deliver_synth_event(pt->session, &event, NULL);
+	if (err)
+		pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
+		       err);
+
+	return err;
+}
+
+static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
+				 const struct intel_pt_state *state)
+{
+	struct intel_pt *pt = ptq->pt;
+	u64 tm = ptq->timestamp;
+	pid_t machine_pid = 0;
+	pid_t pid = ptq->pid;
+	pid_t tid = ptq->tid;
+	int vcpu = -1;
+
+	tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
+
+	if (pt->have_guest_sideband && state->from_nr) {
+		machine_pid = ptq->guest_machine_pid;
+		vcpu = ptq->vcpu;
+		pid = ptq->guest_pid;
+		tid = ptq->guest_tid;
+	}
+
+	return intel_pt_synth_error(pt, state->err, ptq->cpu, pid, tid,
+				    state->from_ip, tm, machine_pid, vcpu);
+}
+
+static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
+{
+	struct auxtrace_queue *queue;
+	pid_t tid = ptq->next_tid;
+	int err;
+
+	if (tid == -1)
+		return 0;
+
+	intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
+
+	err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
+
+	queue = &pt->queues.queue_array[ptq->queue_nr];
+	intel_pt_set_pid_tid_cpu(pt, queue);
+
+	ptq->next_tid = -1;
+
+	return err;
+}
+
+static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
+{
+	struct intel_pt *pt = ptq->pt;
+
+	return ip == pt->switch_ip &&
+	       (ptq->flags & PERF_IP_FLAG_BRANCH) &&
+	       !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
+			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
+}
+
+#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
+			  INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
+
+static int intel_pt_sample(struct intel_pt_queue *ptq)
+{
+	const struct intel_pt_state *state = ptq->state;
+	struct intel_pt *pt = ptq->pt;
+	int err;
+
+	if (!ptq->have_sample)
+		return 0;
+
+	ptq->have_sample = false;
+
+	if (pt->synth_opts.approx_ipc) {
+		ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+		ptq->ipc_cyc_cnt = ptq->state->cycles;
+		ptq->sample_ipc = true;
+	} else {
+		ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+		ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
+		ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC;
+	}
+
+	/* Ensure guest code maps are set up */
+	if (symbol_conf.guest_code && (state->from_nr || state->to_nr))
+		intel_pt_get_guest(ptq);
+
+	/*
+	 * Do PEBS first to allow for the possibility that the PEBS timestamp
+	 * precedes the current timestamp.
+	 */
+	if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
+		err = intel_pt_synth_pebs_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (pt->synth_opts.intr_events) {
+		if (state->type & INTEL_PT_EVT) {
+			err = intel_pt_synth_events_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_IFLAG_CHG) {
+			err = intel_pt_synth_iflag_chg_sample(ptq);
+			if (err)
+				return err;
+		}
+	}
+
+	if (pt->sample_pwr_events) {
+		if (state->type & INTEL_PT_PSB_EVT) {
+			err = intel_pt_synth_psb_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (ptq->state->cbr != ptq->cbr_seen) {
+			err = intel_pt_synth_cbr_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_PWR_EVT) {
+			if (state->type & INTEL_PT_MWAIT_OP) {
+				err = intel_pt_synth_mwait_sample(ptq);
+				if (err)
+					return err;
+			}
+			if (state->type & INTEL_PT_PWR_ENTRY) {
+				err = intel_pt_synth_pwre_sample(ptq);
+				if (err)
+					return err;
+			}
+			if (state->type & INTEL_PT_EX_STOP) {
+				err = intel_pt_synth_exstop_sample(ptq);
+				if (err)
+					return err;
+			}
+			if (state->type & INTEL_PT_PWR_EXIT) {
+				err = intel_pt_synth_pwrx_sample(ptq);
+				if (err)
+					return err;
+			}
+		}
+	}
+
+	if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
+		err = intel_pt_synth_instruction_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
+		err = intel_pt_synth_transaction_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
+		err = intel_pt_synth_ptwrite_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (!(state->type & INTEL_PT_BRANCH))
+		return 0;
+
+	if (pt->use_thread_stack) {
+		thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
+				    state->from_ip, state->to_ip, ptq->insn_len,
+				    state->trace_nr, pt->callstack,
+				    pt->br_stack_sz_plus,
+				    pt->mispred_all);
+	} else {
+		thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
+	}
+
+	if (pt->sample_branches) {
+		if (state->from_nr != state->to_nr &&
+		    state->from_ip && state->to_ip) {
+			struct intel_pt_state *st = (struct intel_pt_state *)state;
+			u64 to_ip = st->to_ip;
+			u64 from_ip = st->from_ip;
+
+			/*
+			 * perf cannot handle having different machines for ip
+			 * and addr, so create 2 branches.
+			 */
+			st->to_ip = 0;
+			err = intel_pt_synth_branch_sample(ptq);
+			if (err)
+				return err;
+			st->from_ip = 0;
+			st->to_ip = to_ip;
+			err = intel_pt_synth_branch_sample(ptq);
+			st->from_ip = from_ip;
+		} else {
+			err = intel_pt_synth_branch_sample(ptq);
+		}
+		if (err)
+			return err;
+	}
+
+	if (!ptq->sync_switch)
+		return 0;
+
+	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
+		switch (ptq->switch_state) {
+		case INTEL_PT_SS_NOT_TRACING:
+		case INTEL_PT_SS_UNKNOWN:
+		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+			err = intel_pt_next_tid(pt, ptq);
+			if (err)
+				return err;
+			ptq->switch_state = INTEL_PT_SS_TRACING;
+			break;
+		default:
+			ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
+			return 1;
+		}
+	} else if (!state->to_ip) {
+		ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+	} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
+		ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+	} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+		   state->to_ip == pt->ptss_ip &&
+		   (ptq->flags & PERF_IP_FLAG_CALL)) {
+		ptq->switch_state = INTEL_PT_SS_TRACING;
+	}
+
+	return 0;
+}
+
+static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
+{
+	struct machine *machine = pt->machine;
+	struct map *map;
+	struct symbol *sym, *start;
+	u64 ip, switch_ip = 0;
+	const char *ptss;
+
+	if (ptss_ip)
+		*ptss_ip = 0;
+
+	map = machine__kernel_map(machine);
+	if (!map)
+		return 0;
+
+	if (map__load(map))
+		return 0;
+
+	start = dso__first_symbol(map->dso);
+
+	for (sym = start; sym; sym = dso__next_symbol(sym)) {
+		if (sym->binding == STB_GLOBAL &&
+		    !strcmp(sym->name, "__switch_to")) {
+			ip = map->unmap_ip(map, sym->start);
+			if (ip >= map->start && ip < map->end) {
+				switch_ip = ip;
+				break;
+			}
+		}
+	}
+
+	if (!switch_ip || !ptss_ip)
+		return 0;
+
+	if (pt->have_sched_switch == 1)
+		ptss = "perf_trace_sched_switch";
+	else
+		ptss = "__perf_event_task_sched_out";
+
+	for (sym = start; sym; sym = dso__next_symbol(sym)) {
+		if (!strcmp(sym->name, ptss)) {
+			ip = map->unmap_ip(map, sym->start);
+			if (ip >= map->start && ip < map->end) {
+				*ptss_ip = ip;
+				break;
+			}
+		}
+	}
+
+	return switch_ip;
+}
+
+static void intel_pt_enable_sync_switch(struct intel_pt *pt)
+{
+	unsigned int i;
+
+	if (pt->sync_switch_not_supported)
+		return;
+
+	pt->sync_switch = true;
+
+	for (i = 0; i < pt->queues.nr_queues; i++) {
+		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+		struct intel_pt_queue *ptq = queue->priv;
+
+		if (ptq)
+			ptq->sync_switch = true;
+	}
+}
+
+static void intel_pt_disable_sync_switch(struct intel_pt *pt)
+{
+	unsigned int i;
+
+	pt->sync_switch = false;
+
+	for (i = 0; i < pt->queues.nr_queues; i++) {
+		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+		struct intel_pt_queue *ptq = queue->priv;
+
+		if (ptq) {
+			ptq->sync_switch = false;
+			intel_pt_next_tid(pt, ptq);
+		}
+	}
+}
+
+/*
+ * To filter against time ranges, it is only necessary to look at the next start
+ * or end time.
+ */
+static bool intel_pt_next_time(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+
+	if (ptq->sel_start) {
+		/* Next time is an end time */
+		ptq->sel_start = false;
+		ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
+		return true;
+	} else if (ptq->sel_idx + 1 < pt->range_cnt) {
+		/* Next time is a start time */
+		ptq->sel_start = true;
+		ptq->sel_idx += 1;
+		ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
+		return true;
+	}
+
+	/* No next time */
+	return false;
+}
+
+static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
+{
+	int err;
+
+	while (1) {
+		if (ptq->sel_start) {
+			if (ptq->timestamp >= ptq->sel_timestamp) {
+				/* After start time, so consider next time */
+				intel_pt_next_time(ptq);
+				if (!ptq->sel_timestamp) {
+					/* No end time */
+					return 0;
+				}
+				/* Check against end time */
+				continue;
+			}
+			/* Before start time, so fast forward */
+			ptq->have_sample = false;
+			if (ptq->sel_timestamp > *ff_timestamp) {
+				if (ptq->sync_switch) {
+					intel_pt_next_tid(ptq->pt, ptq);
+					ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+				}
+				*ff_timestamp = ptq->sel_timestamp;
+				err = intel_pt_fast_forward(ptq->decoder,
+							    ptq->sel_timestamp);
+				if (err)
+					return err;
+			}
+			return 0;
+		} else if (ptq->timestamp > ptq->sel_timestamp) {
+			/* After end time, so consider next time */
+			if (!intel_pt_next_time(ptq)) {
+				/* No next time range, so stop decoding */
+				ptq->have_sample = false;
+				ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+				return 1;
+			}
+			/* Check against next start time */
+			continue;
+		} else {
+			/* Before end time */
+			return 0;
+		}
+	}
+}
+
+static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
+{
+	const struct intel_pt_state *state = ptq->state;
+	struct intel_pt *pt = ptq->pt;
+	u64 ff_timestamp = 0;
+	int err;
+
+	if (!pt->kernel_start) {
+		pt->kernel_start = machine__kernel_start(pt->machine);
+		if (pt->per_cpu_mmaps &&
+		    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
+		    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
+		    !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) {
+			pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
+			if (pt->switch_ip) {
+				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
+					     pt->switch_ip, pt->ptss_ip);
+				intel_pt_enable_sync_switch(pt);
+			}
+		}
+	}
+
+	intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+	while (1) {
+		err = intel_pt_sample(ptq);
+		if (err)
+			return err;
+
+		state = intel_pt_decode(ptq->decoder);
+		if (state->err) {
+			if (state->err == INTEL_PT_ERR_NODATA)
+				return 1;
+			if (ptq->sync_switch &&
+			    state->from_ip >= pt->kernel_start) {
+				ptq->sync_switch = false;
+				intel_pt_next_tid(pt, ptq);
+			}
+			ptq->timestamp = state->est_timestamp;
+			if (pt->synth_opts.errors) {
+				err = intel_ptq_synth_error(ptq, state);
+				if (err)
+					return err;
+			}
+			continue;
+		}
+
+		ptq->state = state;
+		ptq->have_sample = true;
+		intel_pt_sample_flags(ptq);
+
+		/* Use estimated TSC upon return to user space */
+		if (pt->est_tsc &&
+		    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
+		    state->to_ip && state->to_ip < pt->kernel_start) {
+			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+				     state->timestamp, state->est_timestamp);
+			ptq->timestamp = state->est_timestamp;
+		/* Use estimated TSC in unknown switch state */
+		} else if (ptq->sync_switch &&
+			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
+			   ptq->next_tid == -1) {
+			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+				     state->timestamp, state->est_timestamp);
+			ptq->timestamp = state->est_timestamp;
+		} else if (state->timestamp > ptq->timestamp) {
+			ptq->timestamp = state->timestamp;
+		}
+
+		if (ptq->sel_timestamp) {
+			err = intel_pt_time_filter(ptq, &ff_timestamp);
+			if (err)
+				return err;
+		}
+
+		if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
+			*timestamp = ptq->timestamp;
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static inline int intel_pt_update_queues(struct intel_pt *pt)
+{
+	if (pt->queues.new_data) {
+		pt->queues.new_data = false;
+		return intel_pt_setup_queues(pt);
+	}
+	return 0;
+}
+
+static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
+{
+	unsigned int queue_nr;
+	u64 ts;
+	int ret;
+
+	while (1) {
+		struct auxtrace_queue *queue;
+		struct intel_pt_queue *ptq;
+
+		if (!pt->heap.heap_cnt)
+			return 0;
+
+		if (pt->heap.heap_array[0].ordinal >= timestamp)
+			return 0;
+
+		queue_nr = pt->heap.heap_array[0].queue_nr;
+		queue = &pt->queues.queue_array[queue_nr];
+		ptq = queue->priv;
+
+		intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
+			     queue_nr, pt->heap.heap_array[0].ordinal,
+			     timestamp);
+
+		auxtrace_heap__pop(&pt->heap);
+
+		if (pt->heap.heap_cnt) {
+			ts = pt->heap.heap_array[0].ordinal + 1;
+			if (ts > timestamp)
+				ts = timestamp;
+		} else {
+			ts = timestamp;
+		}
+
+		intel_pt_set_pid_tid_cpu(pt, queue);
+
+		ret = intel_pt_run_decoder(ptq, &ts);
+
+		if (ret < 0) {
+			auxtrace_heap__add(&pt->heap, queue_nr, ts);
+			return ret;
+		}
+
+		if (!ret) {
+			ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		} else {
+			ptq->on_heap = false;
+		}
+	}
+
+	return 0;
+}
+
+static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
+					    u64 time_)
+{
+	struct auxtrace_queues *queues = &pt->queues;
+	unsigned int i;
+	u64 ts = 0;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+		struct intel_pt_queue *ptq = queue->priv;
+
+		if (ptq && (tid == -1 || ptq->tid == tid)) {
+			ptq->time = time_;
+			intel_pt_set_pid_tid_cpu(pt, queue);
+			intel_pt_run_decoder(ptq, &ts);
+		}
+	}
+	return 0;
+}
+
+static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq,
+					    struct auxtrace_queue *queue,
+					    struct perf_sample *sample)
+{
+	struct machine *m = ptq->pt->machine;
+
+	ptq->pid = sample->pid;
+	ptq->tid = sample->tid;
+	ptq->cpu = queue->cpu;
+
+	intel_pt_log("queue %u cpu %d pid %d tid %d\n",
+		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+	thread__zput(ptq->thread);
+
+	if (ptq->tid == -1)
+		return;
+
+	if (ptq->pid == -1) {
+		ptq->thread = machine__find_thread(m, -1, ptq->tid);
+		if (ptq->thread)
+			ptq->pid = ptq->thread->pid_;
+		return;
+	}
+
+	ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid);
+}
+
+static int intel_pt_process_timeless_sample(struct intel_pt *pt,
+					    struct perf_sample *sample)
+{
+	struct auxtrace_queue *queue;
+	struct intel_pt_queue *ptq;
+	u64 ts = 0;
+
+	queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
+	if (!queue)
+		return -EINVAL;
+
+	ptq = queue->priv;
+	if (!ptq)
+		return 0;
+
+	ptq->stop = false;
+	ptq->time = sample->time;
+	intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample);
+	intel_pt_run_decoder(ptq, &ts);
+	return 0;
+}
+
+static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
+{
+	return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
+				    sample->pid, sample->tid, 0, sample->time,
+				    sample->machine_pid, sample->vcpu);
+}
+
+static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
+{
+	unsigned i, j;
+
+	if (cpu < 0 || !pt->queues.nr_queues)
+		return NULL;
+
+	if ((unsigned)cpu >= pt->queues.nr_queues)
+		i = pt->queues.nr_queues - 1;
+	else
+		i = cpu;
+
+	if (pt->queues.queue_array[i].cpu == cpu)
+		return pt->queues.queue_array[i].priv;
+
+	for (j = 0; i > 0; j++) {
+		if (pt->queues.queue_array[--i].cpu == cpu)
+			return pt->queues.queue_array[i].priv;
+	}
+
+	for (; j < pt->queues.nr_queues; j++) {
+		if (pt->queues.queue_array[j].cpu == cpu)
+			return pt->queues.queue_array[j].priv;
+	}
+
+	return NULL;
+}
+
+static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
+				u64 timestamp)
+{
+	struct intel_pt_queue *ptq;
+	int err;
+
+	if (!pt->sync_switch)
+		return 1;
+
+	ptq = intel_pt_cpu_to_ptq(pt, cpu);
+	if (!ptq || !ptq->sync_switch)
+		return 1;
+
+	switch (ptq->switch_state) {
+	case INTEL_PT_SS_NOT_TRACING:
+		break;
+	case INTEL_PT_SS_UNKNOWN:
+	case INTEL_PT_SS_TRACING:
+		ptq->next_tid = tid;
+		ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
+		return 0;
+	case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+		if (!ptq->on_heap) {
+			ptq->timestamp = perf_time_to_tsc(timestamp,
+							  &pt->tc);
+			err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
+						 ptq->timestamp);
+			if (err)
+				return err;
+			ptq->on_heap = true;
+		}
+		ptq->switch_state = INTEL_PT_SS_TRACING;
+		break;
+	case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+		intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
+		break;
+	default:
+		break;
+	}
+
+	ptq->next_tid = -1;
+
+	return 1;
+}
+
+static int intel_pt_process_switch(struct intel_pt *pt,
+				   struct perf_sample *sample)
+{
+	pid_t tid;
+	int cpu, ret;
+	struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id);
+
+	if (evsel != pt->switch_evsel)
+		return 0;
+
+	tid = evsel__intval(evsel, sample, "next_pid");
+	cpu = sample->cpu;
+
+	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+		     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
+		     &pt->tc));
+
+	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+	if (ret <= 0)
+		return ret;
+
+	return machine__set_current_tid(pt->machine, cpu, -1, tid);
+}
+
+static int intel_pt_context_switch_in(struct intel_pt *pt,
+				      struct perf_sample *sample)
+{
+	pid_t pid = sample->pid;
+	pid_t tid = sample->tid;
+	int cpu = sample->cpu;
+
+	if (pt->sync_switch) {
+		struct intel_pt_queue *ptq;
+
+		ptq = intel_pt_cpu_to_ptq(pt, cpu);
+		if (ptq && ptq->sync_switch) {
+			ptq->next_tid = -1;
+			switch (ptq->switch_state) {
+			case INTEL_PT_SS_NOT_TRACING:
+			case INTEL_PT_SS_UNKNOWN:
+			case INTEL_PT_SS_TRACING:
+				break;
+			case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+			case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+				ptq->switch_state = INTEL_PT_SS_TRACING;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If the current tid has not been updated yet, ensure it is now that
+	 * a "switch in" event has occurred.
+	 */
+	if (machine__get_current_tid(pt->machine, cpu) == tid)
+		return 0;
+
+	return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
+static int intel_pt_guest_context_switch(struct intel_pt *pt,
+					 union perf_event *event,
+					 struct perf_sample *sample)
+{
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	struct machines *machines = &pt->session->machines;
+	struct machine *machine = machines__find(machines, sample->machine_pid);
+
+	pt->have_guest_sideband = true;
+
+	/*
+	 * sync_switch cannot handle guest machines at present, so just disable
+	 * it.
+	 */
+	pt->sync_switch_not_supported = true;
+	if (pt->sync_switch)
+		intel_pt_disable_sync_switch(pt);
+
+	if (out)
+		return 0;
+
+	if (!machine)
+		return -EINVAL;
+
+	return machine__set_current_tid(machine, sample->vcpu, sample->pid, sample->tid);
+}
+
+static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
+				   struct perf_sample *sample)
+{
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	pid_t pid, tid;
+	int cpu, ret;
+
+	if (perf_event__is_guest(event))
+		return intel_pt_guest_context_switch(pt, event, sample);
+
+	cpu = sample->cpu;
+
+	if (pt->have_sched_switch == 3) {
+		if (!out)
+			return intel_pt_context_switch_in(pt, sample);
+		if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
+			pr_err("Expecting CPU-wide context switch event\n");
+			return -EINVAL;
+		}
+		pid = event->context_switch.next_prev_pid;
+		tid = event->context_switch.next_prev_tid;
+	} else {
+		if (out)
+			return 0;
+		pid = sample->pid;
+		tid = sample->tid;
+	}
+
+	if (tid == -1)
+		intel_pt_log("context_switch event has no tid\n");
+
+	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+	if (ret <= 0)
+		return ret;
+
+	return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
+static int intel_pt_process_itrace_start(struct intel_pt *pt,
+					 union perf_event *event,
+					 struct perf_sample *sample)
+{
+	if (!pt->per_cpu_mmaps)
+		return 0;
+
+	intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+		     sample->cpu, event->itrace_start.pid,
+		     event->itrace_start.tid, sample->time,
+		     perf_time_to_tsc(sample->time, &pt->tc));
+
+	return machine__set_current_tid(pt->machine, sample->cpu,
+					event->itrace_start.pid,
+					event->itrace_start.tid);
+}
+
+static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
+					     union perf_event *event,
+					     struct perf_sample *sample)
+{
+	u64 hw_id = event->aux_output_hw_id.hw_id;
+	struct auxtrace_queue *queue;
+	struct intel_pt_queue *ptq;
+	struct evsel *evsel;
+
+	queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
+	evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id);
+	if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) {
+		pr_err("Bad AUX output hardware ID\n");
+		return -EINVAL;
+	}
+
+	ptq = queue->priv;
+
+	ptq->pebs[hw_id].evsel = evsel;
+	ptq->pebs[hw_id].id = sample->id;
+
+	return 0;
+}
+
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al)
+{
+	if (!al->map || addr < al->map->start || addr >= al->map->end) {
+		if (!thread__find_map(thread, cpumode, addr, al))
+			return -1;
+	}
+
+	return 0;
+}
+
+/* Invalidate all instruction cache entries that overlap the text poke */
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
+{
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
+	/* Assume text poke begins in a basic block no more than 4096 bytes */
+	int cnt = 4096 + event->text_poke.new_len;
+	struct thread *thread = pt->unknown_thread;
+	struct addr_location al = { .map = NULL };
+	struct machine *machine = pt->machine;
+	struct intel_pt_cache_entry *e;
+	u64 offset;
+
+	if (!event->text_poke.new_len)
+		return 0;
+
+	for (; cnt; cnt--, addr--) {
+		if (intel_pt_find_map(thread, cpumode, addr, &al)) {
+			if (addr < event->text_poke.addr)
+				return 0;
+			continue;
+		}
+
+		if (!al.map->dso || !al.map->dso->auxtrace_cache)
+			continue;
+
+		offset = al.map->map_ip(al.map, addr);
+
+		e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+		if (!e)
+			continue;
+
+		if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
+			/*
+			 * No overlap. Working backwards there cannot be another
+			 * basic block that overlaps the text poke if there is a
+			 * branch instruction before the text poke address.
+			 */
+			if (e->branch != INTEL_PT_BR_NO_BRANCH)
+				return 0;
+		} else {
+			intel_pt_cache_invalidate(al.map->dso, machine, offset);
+			intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
+				     al.map->dso->long_name, addr);
+		}
+	}
+
+	return 0;
+}
+
+static int intel_pt_process_event(struct perf_session *session,
+				  union perf_event *event,
+				  struct perf_sample *sample,
+				  struct perf_tool *tool)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+	u64 timestamp;
+	int err = 0;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("Intel Processor Trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (sample->time && sample->time != (u64)-1)
+		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
+	else
+		timestamp = 0;
+
+	if (timestamp || pt->timeless_decoding) {
+		err = intel_pt_update_queues(pt);
+		if (err)
+			return err;
+	}
+
+	if (pt->timeless_decoding) {
+		if (pt->sampling_mode) {
+			if (sample->aux_sample.size)
+				err = intel_pt_process_timeless_sample(pt,
+								       sample);
+		} else if (event->header.type == PERF_RECORD_EXIT) {
+			err = intel_pt_process_timeless_queues(pt,
+							       event->fork.tid,
+							       sample->time);
+		}
+	} else if (timestamp) {
+		if (!pt->first_timestamp)
+			intel_pt_first_timestamp(pt, timestamp);
+		err = intel_pt_process_queues(pt, timestamp);
+	}
+	if (err)
+		return err;
+
+	if (event->header.type == PERF_RECORD_SAMPLE) {
+		if (pt->synth_opts.add_callchain && !sample->callchain)
+			intel_pt_add_callchain(pt, sample);
+		if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+			intel_pt_add_br_stack(pt, sample);
+	}
+
+	if (event->header.type == PERF_RECORD_AUX &&
+	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+	    pt->synth_opts.errors) {
+		err = intel_pt_lost(pt, sample);
+		if (err)
+			return err;
+	}
+
+	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
+		err = intel_pt_process_switch(pt, sample);
+	else if (event->header.type == PERF_RECORD_ITRACE_START)
+		err = intel_pt_process_itrace_start(pt, event, sample);
+	else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID)
+		err = intel_pt_process_aux_output_hw_id(pt, event, sample);
+	else if (event->header.type == PERF_RECORD_SWITCH ||
+		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+		err = intel_pt_context_switch(pt, event, sample);
+
+	if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
+		err = intel_pt_text_poke(pt, event);
+
+	if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
+		intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+			     event->header.type, sample->cpu, sample->time, timestamp);
+		intel_pt_log_event(event);
+	}
+
+	return err;
+}
+
+static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+	int ret;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	ret = intel_pt_update_queues(pt);
+	if (ret < 0)
+		return ret;
+
+	if (pt->timeless_decoding)
+		return intel_pt_process_timeless_queues(pt, -1,
+							MAX_TIMESTAMP - 1);
+
+	return intel_pt_process_queues(pt, MAX_TIMESTAMP);
+}
+
+static void intel_pt_free_events(struct perf_session *session)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+	struct auxtrace_queues *queues = &pt->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		intel_pt_free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+	intel_pt_log_disable();
+	auxtrace_queues__free(queues);
+}
+
+static void intel_pt_free(struct perf_session *session)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+
+	auxtrace_heap__free(&pt->heap);
+	intel_pt_free_events(session);
+	session->auxtrace = NULL;
+	intel_pt_free_vmcs_info(pt);
+	thread__put(pt->unknown_thread);
+	addr_filters__exit(&pt->filts);
+	zfree(&pt->chain);
+	zfree(&pt->filter);
+	zfree(&pt->time_ranges);
+	free(pt);
+}
+
+static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
+				       struct evsel *evsel)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+
+	return evsel->core.attr.type == pt->pmu_type;
+}
+
+static int intel_pt_process_auxtrace_event(struct perf_session *session,
+					   union perf_event *event,
+					   struct perf_tool *tool __maybe_unused)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+
+	if (!pt->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t data_offset;
+		int fd = perf_data__fd(session->data);
+		int err;
+
+		if (perf_data__is_pipe(session->data)) {
+			data_offset = 0;
+		} else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
+
+		err = auxtrace_queues__add_event(&pt->queues, session, event,
+						 data_offset, &buffer);
+		if (err)
+			return err;
+
+		/* Dump here now we have copied a piped trace out of the pipe */
+		if (dump_trace) {
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				intel_pt_dump_event(pt, buffer->data,
+						    buffer->size);
+				auxtrace_buffer__put_data(buffer);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int intel_pt_queue_data(struct perf_session *session,
+			       struct perf_sample *sample,
+			       union perf_event *event, u64 data_offset)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+	u64 timestamp;
+
+	if (event) {
+		return auxtrace_queues__add_event(&pt->queues, session, event,
+						  data_offset, NULL);
+	}
+
+	if (sample->time && sample->time != (u64)-1)
+		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
+	else
+		timestamp = 0;
+
+	return auxtrace_queues__add_sample(&pt->queues, session, sample,
+					   data_offset, timestamp);
+}
+
+struct intel_pt_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int intel_pt_event_synth(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample __maybe_unused,
+				struct machine *machine __maybe_unused)
+{
+	struct intel_pt_synth *intel_pt_synth =
+			container_of(tool, struct intel_pt_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(intel_pt_synth->session, event,
+						 NULL);
+}
+
+static int intel_pt_synth_event(struct perf_session *session, const char *name,
+				struct perf_event_attr *attr, u64 id)
+{
+	struct intel_pt_synth intel_pt_synth;
+	int err;
+
+	pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+		 name, id, (u64)attr->sample_type);
+
+	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
+	intel_pt_synth.session = session;
+
+	err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+					  &id, intel_pt_event_synth);
+	if (err)
+		pr_err("%s: failed to synthesize '%s' event type\n",
+		       __func__, name);
+
+	return err;
+}
+
+static void intel_pt_set_event_name(struct evlist *evlist, u64 id,
+				    const char *name)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.id && evsel->core.id[0] == id) {
+			if (evsel->name)
+				zfree(&evsel->name);
+			evsel->name = strdup(name);
+			break;
+		}
+	}
+}
+
+static struct evsel *intel_pt_evsel(struct intel_pt *pt,
+					 struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+static int intel_pt_synth_events(struct intel_pt *pt,
+				 struct perf_session *session)
+{
+	struct evlist *evlist = session->evlist;
+	struct evsel *evsel = intel_pt_evsel(pt, evlist);
+	struct perf_event_attr attr;
+	u64 id;
+	int err;
+
+	if (!evsel) {
+		pr_debug("There are no selected events with Intel Processor Trace data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+			    PERF_SAMPLE_PERIOD;
+	if (pt->timeless_decoding)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	else
+		attr.sample_type |= PERF_SAMPLE_TIME;
+	if (!pt->per_cpu_mmaps)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+	attr.exclude_user = evsel->core.attr.exclude_user;
+	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+	attr.exclude_hv = evsel->core.attr.exclude_hv;
+	attr.exclude_host = evsel->core.attr.exclude_host;
+	attr.exclude_guest = evsel->core.attr.exclude_guest;
+	attr.sample_id_all = evsel->core.attr.sample_id_all;
+	attr.read_format = evsel->core.attr.read_format;
+
+	id = evsel->core.id[0] + 1000000000;
+	if (!id)
+		id = 1;
+
+	if (pt->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		err = intel_pt_synth_event(session, "branches", &attr, id);
+		if (err)
+			return err;
+		pt->sample_branches = true;
+		pt->branches_sample_type = attr.sample_type;
+		pt->branches_id = id;
+		id += 1;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+	}
+
+	if (pt->synth_opts.callchain)
+		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+	if (pt->synth_opts.last_branch) {
+		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+		/*
+		 * We don't use the hardware index, but the sample generation
+		 * code uses the new format branch_stack with this field,
+		 * so the event attributes must indicate that it's present.
+		 */
+		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+	}
+
+	if (pt->synth_opts.instructions) {
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+			attr.sample_period =
+				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+		else
+			attr.sample_period = pt->synth_opts.period;
+		err = intel_pt_synth_event(session, "instructions", &attr, id);
+		if (err)
+			return err;
+		pt->sample_instructions = true;
+		pt->instructions_sample_type = attr.sample_type;
+		pt->instructions_id = id;
+		id += 1;
+	}
+
+	attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
+	attr.sample_period = 1;
+
+	if (pt->synth_opts.transactions) {
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		err = intel_pt_synth_event(session, "transactions", &attr, id);
+		if (err)
+			return err;
+		pt->sample_transactions = true;
+		pt->transactions_sample_type = attr.sample_type;
+		pt->transactions_id = id;
+		intel_pt_set_event_name(evlist, id, "transactions");
+		id += 1;
+	}
+
+	attr.type = PERF_TYPE_SYNTH;
+	attr.sample_type |= PERF_SAMPLE_RAW;
+
+	if (pt->synth_opts.ptwrites) {
+		attr.config = PERF_SYNTH_INTEL_PTWRITE;
+		err = intel_pt_synth_event(session, "ptwrite", &attr, id);
+		if (err)
+			return err;
+		pt->sample_ptwrites = true;
+		pt->ptwrites_sample_type = attr.sample_type;
+		pt->ptwrites_id = id;
+		intel_pt_set_event_name(evlist, id, "ptwrite");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events) {
+		pt->sample_pwr_events = true;
+		pt->pwr_events_sample_type = attr.sample_type;
+
+		attr.config = PERF_SYNTH_INTEL_CBR;
+		err = intel_pt_synth_event(session, "cbr", &attr, id);
+		if (err)
+			return err;
+		pt->cbr_id = id;
+		intel_pt_set_event_name(evlist, id, "cbr");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PSB;
+		err = intel_pt_synth_event(session, "psb", &attr, id);
+		if (err)
+			return err;
+		pt->psb_id = id;
+		intel_pt_set_event_name(evlist, id, "psb");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) {
+		attr.config = PERF_SYNTH_INTEL_MWAIT;
+		err = intel_pt_synth_event(session, "mwait", &attr, id);
+		if (err)
+			return err;
+		pt->mwait_id = id;
+		intel_pt_set_event_name(evlist, id, "mwait");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRE;
+		err = intel_pt_synth_event(session, "pwre", &attr, id);
+		if (err)
+			return err;
+		pt->pwre_id = id;
+		intel_pt_set_event_name(evlist, id, "pwre");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_EXSTOP;
+		err = intel_pt_synth_event(session, "exstop", &attr, id);
+		if (err)
+			return err;
+		pt->exstop_id = id;
+		intel_pt_set_event_name(evlist, id, "exstop");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRX;
+		err = intel_pt_synth_event(session, "pwrx", &attr, id);
+		if (err)
+			return err;
+		pt->pwrx_id = id;
+		intel_pt_set_event_name(evlist, id, "pwrx");
+		id += 1;
+	}
+
+	if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) {
+		attr.config = PERF_SYNTH_INTEL_EVT;
+		err = intel_pt_synth_event(session, "evt", &attr, id);
+		if (err)
+			return err;
+		pt->evt_sample_type = attr.sample_type;
+		pt->evt_id = id;
+		intel_pt_set_event_name(evlist, id, "evt");
+		id += 1;
+	}
+
+	if (pt->synth_opts.intr_events && pt->cap_event_trace) {
+		attr.config = PERF_SYNTH_INTEL_IFLAG_CHG;
+		err = intel_pt_synth_event(session, "iflag", &attr, id);
+		if (err)
+			return err;
+		pt->iflag_chg_sample_type = attr.sample_type;
+		pt->iflag_chg_id = id;
+		intel_pt_set_event_name(evlist, id, "iflag");
+		id += 1;
+	}
+
+	return 0;
+}
+
+static void intel_pt_setup_pebs_events(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	if (!pt->synth_opts.other_events)
+		return;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (evsel->core.attr.aux_output && evsel->core.id) {
+			if (pt->single_pebs) {
+				pt->single_pebs = false;
+				return;
+			}
+			pt->single_pebs = true;
+			pt->sample_pebs = true;
+			pt->pebs_evsel = evsel;
+		}
+	}
+}
+
+static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry_reverse(evlist, evsel) {
+		const char *name = evsel__name(evsel);
+
+		if (!strcmp(name, "sched:sched_switch"))
+			return evsel;
+	}
+
+	return NULL;
+}
+
+static bool intel_pt_find_switch(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.context_switch)
+			return true;
+	}
+
+	return false;
+}
+
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+	struct intel_pt *pt = data;
+
+	if (!strcmp(var, "intel-pt.mispred-all"))
+		pt->mispred_all = perf_config_bool(var, value);
+
+	if (!strcmp(var, "intel-pt.max-loops"))
+		perf_config_int(&pt->max_loops, var, value);
+
+	return 0;
+}
+
+/* Find least TSC which converts to ns or later */
+static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
+{
+	u64 tsc, tm;
+
+	tsc = perf_time_to_tsc(ns, &pt->tc);
+
+	while (1) {
+		tm = tsc_to_perf_time(tsc, &pt->tc);
+		if (tm < ns)
+			break;
+		tsc -= 1;
+	}
+
+	while (tm < ns)
+		tm = tsc_to_perf_time(++tsc, &pt->tc);
+
+	return tsc;
+}
+
+/* Find greatest TSC which converts to ns or earlier */
+static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
+{
+	u64 tsc, tm;
+
+	tsc = perf_time_to_tsc(ns, &pt->tc);
+
+	while (1) {
+		tm = tsc_to_perf_time(tsc, &pt->tc);
+		if (tm > ns)
+			break;
+		tsc += 1;
+	}
+
+	while (tm > ns)
+		tm = tsc_to_perf_time(--tsc, &pt->tc);
+
+	return tsc;
+}
+
+static int intel_pt_setup_time_ranges(struct intel_pt *pt,
+				      struct itrace_synth_opts *opts)
+{
+	struct perf_time_interval *p = opts->ptime_range;
+	int n = opts->range_num;
+	int i;
+
+	if (!n || !p || pt->timeless_decoding)
+		return 0;
+
+	pt->time_ranges = calloc(n, sizeof(struct range));
+	if (!pt->time_ranges)
+		return -ENOMEM;
+
+	pt->range_cnt = n;
+
+	intel_pt_log("%s: %u range(s)\n", __func__, n);
+
+	for (i = 0; i < n; i++) {
+		struct range *r = &pt->time_ranges[i];
+		u64 ts = p[i].start;
+		u64 te = p[i].end;
+
+		/*
+		 * Take care to ensure the TSC range matches the perf-time range
+		 * when converted back to perf-time.
+		 */
+		r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
+		r->end   = te ? intel_pt_tsc_end(te, pt) : 0;
+
+		intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
+			     i, ts, te);
+		intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
+			     i, r->start, r->end);
+	}
+
+	return 0;
+}
+
+static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
+{
+	struct intel_pt_vmcs_info *vmcs_info;
+	u64 tsc_offset, vmcs;
+	char *p = *args;
+
+	errno = 0;
+
+	p = skip_spaces(p);
+	if (!*p)
+		return 1;
+
+	tsc_offset = strtoull(p, &p, 0);
+	if (errno)
+		return -errno;
+	p = skip_spaces(p);
+	if (*p != ':') {
+		pt->dflt_tsc_offset = tsc_offset;
+		*args = p;
+		return 0;
+	}
+	p += 1;
+	while (1) {
+		vmcs = strtoull(p, &p, 0);
+		if (errno)
+			return -errno;
+		if (!vmcs)
+			return -EINVAL;
+		vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset);
+		if (!vmcs_info)
+			return -ENOMEM;
+		p = skip_spaces(p);
+		if (*p != ',')
+			break;
+		p += 1;
+	}
+	*args = p;
+	return 0;
+}
+
+static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt)
+{
+	char *args = pt->synth_opts.vm_tm_corr_args;
+	int ret;
+
+	if (!args)
+		return 0;
+
+	do {
+		ret = intel_pt_parse_vm_tm_corr_arg(pt, &args);
+	} while (!ret);
+
+	if (ret < 0) {
+		pr_err("Failed to parse VM Time Correlation options\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const char * const intel_pt_info_fmts[] = {
+	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
+	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
+	[INTEL_PT_TIME_MULT]		= "  Time Muliplier      %"PRIu64"\n",
+	[INTEL_PT_TIME_ZERO]		= "  Time Zero           %"PRIu64"\n",
+	[INTEL_PT_CAP_USER_TIME_ZERO]	= "  Cap Time Zero       %"PRId64"\n",
+	[INTEL_PT_TSC_BIT]		= "  TSC bit             %#"PRIx64"\n",
+	[INTEL_PT_NORETCOMP_BIT]	= "  NoRETComp bit       %#"PRIx64"\n",
+	[INTEL_PT_HAVE_SCHED_SWITCH]	= "  Have sched_switch   %"PRId64"\n",
+	[INTEL_PT_SNAPSHOT_MODE]	= "  Snapshot mode       %"PRId64"\n",
+	[INTEL_PT_PER_CPU_MMAPS]	= "  Per-cpu maps        %"PRId64"\n",
+	[INTEL_PT_MTC_BIT]		= "  MTC bit             %#"PRIx64"\n",
+	[INTEL_PT_MTC_FREQ_BITS]	= "  MTC freq bits       %#"PRIx64"\n",
+	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
+	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
+	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
+	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
+	[INTEL_PT_FILTER_STR_LEN]	= "  Filter string len.  %"PRIu64"\n",
+};
+
+static void intel_pt_print_info(__u64 *arr, int start, int finish)
+{
+	int i;
+
+	if (!dump_trace)
+		return;
+
+	for (i = start; i <= finish; i++) {
+		const char *fmt = intel_pt_info_fmts[i];
+
+		if (fmt)
+			fprintf(stdout, fmt, arr[i]);
+	}
+}
+
+static void intel_pt_print_info_str(const char *name, const char *str)
+{
+	if (!dump_trace)
+		return;
+
+	fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
+}
+
+static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos)
+{
+	return auxtrace_info->header.size >=
+		sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1));
+}
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+				   struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
+	struct intel_pt *pt;
+	void *info_end;
+	__u64 *info;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
+					min_sz)
+		return -EINVAL;
+
+	pt = zalloc(sizeof(struct intel_pt));
+	if (!pt)
+		return -ENOMEM;
+
+	pt->vmcs_info = RB_ROOT;
+
+	addr_filters__init(&pt->filts);
+
+	err = perf_config(intel_pt_perf_config, pt);
+	if (err)
+		goto err_free;
+
+	err = auxtrace_queues__init(&pt->queues);
+	if (err)
+		goto err_free;
+
+	if (session->itrace_synth_opts->set) {
+		pt->synth_opts = *session->itrace_synth_opts;
+	} else {
+		struct itrace_synth_opts *opts = session->itrace_synth_opts;
+
+		itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample);
+		if (!opts->default_no_sample && !opts->inject) {
+			pt->synth_opts.branches = false;
+			pt->synth_opts.callchain = true;
+			pt->synth_opts.add_callchain = true;
+		}
+		pt->synth_opts.thread_stack = opts->thread_stack;
+	}
+
+	if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT))
+		intel_pt_log_set_name(INTEL_PT_PMU_NAME);
+
+	pt->session = session;
+	pt->machine = &session->machines.host; /* No kvm support */
+	pt->auxtrace_type = auxtrace_info->type;
+	pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
+	pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
+	pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
+	pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
+	pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
+	pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
+	pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
+	pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
+	pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
+	pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
+	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
+			    INTEL_PT_PER_CPU_MMAPS);
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
+		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
+		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
+		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
+		pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
+		pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
+		intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
+				    INTEL_PT_CYC_BIT);
+	}
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
+		pt->max_non_turbo_ratio =
+			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_MAX_NONTURBO_RATIO,
+				    INTEL_PT_MAX_NONTURBO_RATIO);
+	}
+
+	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+	info_end = (void *)auxtrace_info + auxtrace_info->header.size;
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
+		size_t len;
+
+		len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_FILTER_STR_LEN,
+				    INTEL_PT_FILTER_STR_LEN);
+		if (len) {
+			const char *filter = (const char *)info;
+
+			len = roundup(len + 1, 8);
+			info += len >> 3;
+			if ((void *)info > info_end) {
+				pr_err("%s: bad filter string length\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+			pt->filter = memdup(filter, len);
+			if (!pt->filter) {
+				err = -ENOMEM;
+				goto err_free_queues;
+			}
+			if (session->header.needs_swap)
+				mem_bswap_64(pt->filter, len);
+			if (pt->filter[len - 1]) {
+				pr_err("%s: filter string not null terminated\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+			err = addr_filters__parse_bare_filter(&pt->filts,
+							      filter);
+			if (err)
+				goto err_free_queues;
+		}
+		intel_pt_print_info_str("Filter string", pt->filter);
+	}
+
+	if ((void *)info < info_end) {
+		pt->cap_event_trace = *info++;
+		if (dump_trace)
+			fprintf(stdout, "  Cap Event Trace     %d\n",
+				pt->cap_event_trace);
+	}
+
+	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
+	if (pt->timeless_decoding && !pt->tc.time_mult)
+		pt->tc.time_mult = 1;
+	pt->have_tsc = intel_pt_have_tsc(pt);
+	pt->sampling_mode = intel_pt_sampling_mode(pt);
+	pt->est_tsc = !pt->timeless_decoding;
+
+	if (pt->synth_opts.vm_time_correlation) {
+		if (pt->timeless_decoding) {
+			pr_err("Intel PT has no time information for VM Time Correlation\n");
+			err = -EINVAL;
+			goto err_free_queues;
+		}
+		if (session->itrace_synth_opts->ptime_range) {
+			pr_err("Time ranges cannot be specified with VM Time Correlation\n");
+			err = -EINVAL;
+			goto err_free_queues;
+		}
+		/* Currently TSC Offset is calculated using MTC packets */
+		if (!intel_pt_have_mtc(pt)) {
+			pr_err("MTC packets must have been enabled for VM Time Correlation\n");
+			err = -EINVAL;
+			goto err_free_queues;
+		}
+		err = intel_pt_parse_vm_tm_corr_args(pt);
+		if (err)
+			goto err_free_queues;
+	}
+
+	pt->unknown_thread = thread__new(999999999, 999999999);
+	if (!pt->unknown_thread) {
+		err = -ENOMEM;
+		goto err_free_queues;
+	}
+
+	/*
+	 * Since this thread will not be kept in any rbtree not in a
+	 * list, initialize its list node so that at thread__put() the
+	 * current thread lifetime assumption is kept and we don't segfault
+	 * at list_del_init().
+	 */
+	INIT_LIST_HEAD(&pt->unknown_thread->node);
+
+	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
+	if (err)
+		goto err_delete_thread;
+	if (thread__init_maps(pt->unknown_thread, pt->machine)) {
+		err = -ENOMEM;
+		goto err_delete_thread;
+	}
+
+	pt->auxtrace.process_event = intel_pt_process_event;
+	pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
+	pt->auxtrace.queue_data = intel_pt_queue_data;
+	pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample;
+	pt->auxtrace.flush_events = intel_pt_flush;
+	pt->auxtrace.free_events = intel_pt_free_events;
+	pt->auxtrace.free = intel_pt_free;
+	pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
+	session->auxtrace = &pt->auxtrace;
+
+	if (dump_trace)
+		return 0;
+
+	if (pt->have_sched_switch == 1) {
+		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
+		if (!pt->switch_evsel) {
+			pr_err("%s: missing sched_switch event\n", __func__);
+			err = -EINVAL;
+			goto err_delete_thread;
+		}
+	} else if (pt->have_sched_switch == 2 &&
+		   !intel_pt_find_switch(session->evlist)) {
+		pr_err("%s: missing context_switch attribute flag\n", __func__);
+		err = -EINVAL;
+		goto err_delete_thread;
+	}
+
+	if (pt->synth_opts.log) {
+		bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+		unsigned int log_on_error_size = pt->synth_opts.log_on_error_size;
+
+		intel_pt_log_enable(log_on_error, log_on_error_size);
+	}
+
+	/* Maximum non-turbo ratio is TSC freq / 100 MHz */
+	if (pt->tc.time_mult) {
+		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
+
+		if (!pt->max_non_turbo_ratio)
+			pt->max_non_turbo_ratio =
+					(tsc_freq + 50000000) / 100000000;
+		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
+		intel_pt_log("Maximum non-turbo ratio %u\n",
+			     pt->max_non_turbo_ratio);
+		pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
+	}
+
+	err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
+	if (err)
+		goto err_delete_thread;
+
+	if (pt->synth_opts.calls)
+		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+				       PERF_IP_FLAG_TRACE_END;
+	if (pt->synth_opts.returns)
+		pt->branches_filter |= PERF_IP_FLAG_RETURN |
+				       PERF_IP_FLAG_TRACE_BEGIN;
+
+	if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
+	    !symbol_conf.use_callchain) {
+		symbol_conf.use_callchain = true;
+		if (callchain_register_param(&callchain_param) < 0) {
+			symbol_conf.use_callchain = false;
+			pt->synth_opts.callchain = false;
+			pt->synth_opts.add_callchain = false;
+		}
+	}
+
+	if (pt->synth_opts.add_callchain) {
+		err = intel_pt_callchain_init(pt);
+		if (err)
+			goto err_delete_thread;
+	}
+
+	if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
+		pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+		pt->br_stack_sz_plus = pt->br_stack_sz;
+	}
+
+	if (pt->synth_opts.add_last_branch) {
+		err = intel_pt_br_stack_init(pt);
+		if (err)
+			goto err_delete_thread;
+		/*
+		 * Additional branch stack size to cater for tracing from the
+		 * actual sample ip to where the sample time is recorded.
+		 * Measured at about 200 branches, but generously set to 1024.
+		 * If kernel space is not being traced, then add just 1 for the
+		 * branch to kernel space.
+		 */
+		if (intel_pt_tracing_kernel(pt))
+			pt->br_stack_sz_plus += 1024;
+		else
+			pt->br_stack_sz_plus += 1;
+	}
+
+	pt->use_thread_stack = pt->synth_opts.callchain ||
+			       pt->synth_opts.add_callchain ||
+			       pt->synth_opts.thread_stack ||
+			       pt->synth_opts.last_branch ||
+			       pt->synth_opts.add_last_branch;
+
+	pt->callstack = pt->synth_opts.callchain ||
+			pt->synth_opts.add_callchain ||
+			pt->synth_opts.thread_stack;
+
+	err = intel_pt_synth_events(pt, session);
+	if (err)
+		goto err_delete_thread;
+
+	intel_pt_setup_pebs_events(pt);
+
+	if (perf_data__is_pipe(session->data)) {
+		pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n"
+			   "         The output cannot relied upon.  In particular,\n"
+			   "         timestamps and the order of events may be incorrect.\n");
+	}
+
+	if (pt->sampling_mode || list_empty(&session->auxtrace_index))
+		err = auxtrace_queue_data(session, true, true);
+	else
+		err = auxtrace_queues__process_index(&pt->queues, session);
+	if (err)
+		goto err_delete_thread;
+
+	if (pt->queues.populated)
+		pt->data_queued = true;
+
+	if (pt->timeless_decoding)
+		pr_debug2("Intel PT decoding without timestamps\n");
+
+	return 0;
+
+err_delete_thread:
+	zfree(&pt->chain);
+	thread__zput(pt->unknown_thread);
+err_free_queues:
+	intel_pt_log_disable();
+	auxtrace_queues__free(&pt->queues);
+	session->auxtrace = NULL;
+err_free:
+	addr_filters__exit(&pt->filts);
+	zfree(&pt->filter);
+	zfree(&pt->time_ranges);
+	free(pt);
+	return err;
+}
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
new file mode 100644
index 000000000..c7d6068e3
--- /dev/null
+++ b/tools/perf/util/intel-pt.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_pt.h: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ */
+
+#ifndef INCLUDE__PERF_INTEL_PT_H__
+#define INCLUDE__PERF_INTEL_PT_H__
+
+#define INTEL_PT_PMU_NAME "intel_pt"
+
+enum {
+	INTEL_PT_PMU_TYPE,
+	INTEL_PT_TIME_SHIFT,
+	INTEL_PT_TIME_MULT,
+	INTEL_PT_TIME_ZERO,
+	INTEL_PT_CAP_USER_TIME_ZERO,
+	INTEL_PT_TSC_BIT,
+	INTEL_PT_NORETCOMP_BIT,
+	INTEL_PT_HAVE_SCHED_SWITCH,
+	INTEL_PT_SNAPSHOT_MODE,
+	INTEL_PT_PER_CPU_MMAPS,
+	INTEL_PT_MTC_BIT,
+	INTEL_PT_MTC_FREQ_BITS,
+	INTEL_PT_TSC_CTC_N,
+	INTEL_PT_TSC_CTC_D,
+	INTEL_PT_CYC_BIT,
+	INTEL_PT_MAX_NONTURBO_RATIO,
+	INTEL_PT_FILTER_STR_LEN,
+	INTEL_PT_AUXTRACE_PRIV_MAX,
+};
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+struct perf_event_attr;
+struct perf_pmu;
+
+struct auxtrace_record *intel_pt_recording_init(int *err);
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+				   struct perf_session *session);
+
+struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+
+#endif
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
new file mode 100644
index 000000000..934092199
--- /dev/null
+++ b/tools/perf/util/intlist.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Based on intlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+#include "intlist.h"
+
+static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused,
+					 const void *entry)
+{
+	unsigned long i = (unsigned long)entry;
+	struct rb_node *rc = NULL;
+	struct int_node *node = malloc(sizeof(*node));
+
+	if (node != NULL) {
+		node->i = i;
+		node->priv = NULL;
+		rc = &node->rb_node;
+	}
+
+	return rc;
+}
+
+static void int_node__delete(struct int_node *ilist)
+{
+	free(ilist);
+}
+
+static void intlist__node_delete(struct rblist *rblist __maybe_unused,
+				 struct rb_node *rb_node)
+{
+	struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+	int_node__delete(node);
+}
+
+static int intlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+	unsigned long i = (unsigned long)entry;
+	struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+	if (node->i > i)
+		return 1;
+	else if (node->i < i)
+		return -1;
+
+	return 0;
+}
+
+int intlist__add(struct intlist *ilist, unsigned long i)
+{
+	return rblist__add_node(&ilist->rblist, (void *)i);
+}
+
+void intlist__remove(struct intlist *ilist, struct int_node *node)
+{
+	rblist__remove_node(&ilist->rblist, &node->rb_node);
+}
+
+static struct int_node *__intlist__findnew(struct intlist *ilist,
+					   unsigned long i, bool create)
+{
+	struct int_node *node = NULL;
+	struct rb_node *rb_node;
+
+	if (ilist == NULL)
+		return NULL;
+
+	if (create)
+		rb_node = rblist__findnew(&ilist->rblist, (void *)i);
+	else
+		rb_node = rblist__find(&ilist->rblist, (void *)i);
+
+	if (rb_node)
+		node = container_of(rb_node, struct int_node, rb_node);
+
+	return node;
+}
+
+struct int_node *intlist__find(struct intlist *ilist, unsigned long i)
+{
+	return __intlist__findnew(ilist, i, false);
+}
+
+struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i)
+{
+	return __intlist__findnew(ilist, i, true);
+}
+
+static int intlist__parse_list(struct intlist *ilist, const char *s)
+{
+	char *sep;
+	int err;
+
+	do {
+		unsigned long value = strtol(s, &sep, 10);
+		err = -EINVAL;
+		if (*sep != ',' && *sep != '\0')
+			break;
+		err = intlist__add(ilist, value);
+		if (err)
+			break;
+		s = sep + 1;
+	} while (*sep != '\0');
+
+	return err;
+}
+
+struct intlist *intlist__new(const char *slist)
+{
+	struct intlist *ilist = malloc(sizeof(*ilist));
+
+	if (ilist != NULL) {
+		rblist__init(&ilist->rblist);
+		ilist->rblist.node_cmp    = intlist__node_cmp;
+		ilist->rblist.node_new    = intlist__node_new;
+		ilist->rblist.node_delete = intlist__node_delete;
+
+		if (slist && intlist__parse_list(ilist, slist))
+			goto out_delete;
+	}
+
+	return ilist;
+out_delete:
+	intlist__delete(ilist);
+	return NULL;
+}
+
+void intlist__delete(struct intlist *ilist)
+{
+	if (ilist != NULL)
+		rblist__delete(&ilist->rblist);
+}
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx)
+{
+	struct int_node *node = NULL;
+	struct rb_node *rb_node;
+
+	rb_node = rblist__entry(&ilist->rblist, idx);
+	if (rb_node)
+		node = container_of(rb_node, struct int_node, rb_node);
+
+	return node;
+}
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
new file mode 100644
index 000000000..e336b174d
--- /dev/null
+++ b/tools/perf/util/intlist.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_INTLIST_H
+#define __PERF_INTLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct int_node {
+	struct rb_node rb_node;
+	unsigned long i;
+	void *priv;
+};
+
+struct intlist {
+	struct rblist rblist;
+};
+
+struct intlist *intlist__new(const char *slist);
+void intlist__delete(struct intlist *ilist);
+
+void intlist__remove(struct intlist *ilist, struct int_node *in);
+int intlist__add(struct intlist *ilist, unsigned long i);
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx);
+struct int_node *intlist__find(struct intlist *ilist, unsigned long i);
+struct int_node *intlist__findnew(struct intlist *ilist, unsigned long i);
+
+static inline bool intlist__has_entry(struct intlist *ilist, unsigned long i)
+{
+	return intlist__find(ilist, i) != NULL;
+}
+
+static inline bool intlist__empty(const struct intlist *ilist)
+{
+	return rblist__empty(&ilist->rblist);
+}
+
+static inline unsigned int intlist__nr_entries(const struct intlist *ilist)
+{
+	return rblist__nr_entries(&ilist->rblist);
+}
+
+/* For intlist iteration */
+static inline struct int_node *intlist__first(struct intlist *ilist)
+{
+	struct rb_node *rn = rb_first_cached(&ilist->rblist.entries);
+	return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+static inline struct int_node *intlist__next(struct int_node *in)
+{
+	struct rb_node *rn;
+	if (!in)
+		return NULL;
+	rn = rb_next(&in->rb_node);
+	return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+
+/**
+ * intlist__for_each_entry      - iterate over a intlist
+ * @pos:	the &struct int_node to use as a loop cursor.
+ * @ilist:	the &struct intlist for loop.
+ */
+#define intlist__for_each_entry(pos, ilist)	\
+	for (pos = intlist__first(ilist); pos; pos = intlist__next(pos))
+
+/**
+ * intlist__for_each_entry_safe - iterate over a intlist safe against removal of
+ *                         int_node
+ * @pos:	the &struct int_node to use as a loop cursor.
+ * @n:		another &struct int_node to use as temporary storage.
+ * @ilist:	the &struct intlist for loop.
+ */
+#define intlist__for_each_entry_safe(pos, n, ilist)	\
+	for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\
+	     pos = n, n = intlist__next(n))
+#endif /* __PERF_INTLIST_H */
diff --git a/tools/perf/util/iostat.c b/tools/perf/util/iostat.c
new file mode 100644
index 000000000..57dd49da2
--- /dev/null
+++ b/tools/perf/util/iostat.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/iostat.h"
+#include "util/debug.h"
+
+enum iostat_mode_t iostat_mode = IOSTAT_NONE;
+
+__weak int iostat_prepare(struct evlist *evlist __maybe_unused,
+			  struct perf_stat_config *config __maybe_unused)
+{
+	return -1;
+}
+
+__weak int iostat_parse(const struct option *opt __maybe_unused,
+			 const char *str __maybe_unused,
+			 int unset __maybe_unused)
+{
+	pr_err("iostat mode is not supported on current platform\n");
+	return -1;
+}
+
+__weak void iostat_list(struct evlist *evlist __maybe_unused,
+		       struct perf_stat_config *config __maybe_unused)
+{
+}
+
+__weak void iostat_release(struct evlist *evlist __maybe_unused)
+{
+}
+
+__weak void iostat_print_header_prefix(struct perf_stat_config *config __maybe_unused)
+{
+}
+
+__weak void iostat_print_metric(struct perf_stat_config *config __maybe_unused,
+				struct evsel *evsel __maybe_unused,
+				struct perf_stat_output_ctx *out __maybe_unused)
+{
+}
+
+__weak void iostat_prefix(struct evlist *evlist __maybe_unused,
+			  struct perf_stat_config *config __maybe_unused,
+			  char *prefix __maybe_unused,
+			  struct timespec *ts __maybe_unused)
+{
+}
+
+__weak void iostat_print_counters(struct evlist *evlist __maybe_unused,
+				  struct perf_stat_config *config __maybe_unused,
+				  struct timespec *ts __maybe_unused,
+				  char *prefix __maybe_unused,
+				  iostat_print_counter_t print_cnt_cb __maybe_unused)
+{
+}
diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h
new file mode 100644
index 000000000..23c1c46a3
--- /dev/null
+++ b/tools/perf/util/iostat.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * perf iostat
+ *
+ * Copyright (C) 2020, Intel Corporation
+ *
+ * Authors: Alexander Antonov <alexander.antonov@linux.intel.com>
+ */
+
+#ifndef _IOSTAT_H
+#define _IOSTAT_H
+
+#include <subcmd/parse-options.h>
+#include "util/stat.h"
+#include "util/parse-events.h"
+#include "util/evlist.h"
+
+struct option;
+struct perf_stat_config;
+struct evlist;
+struct timespec;
+
+enum iostat_mode_t {
+	IOSTAT_NONE		= -1,
+	IOSTAT_RUN		= 0,
+	IOSTAT_LIST		= 1
+};
+
+extern enum iostat_mode_t iostat_mode;
+
+typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, char *);
+
+int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config);
+int iostat_parse(const struct option *opt, const char *str,
+		 int unset __maybe_unused);
+void iostat_list(struct evlist *evlist, struct perf_stat_config *config);
+void iostat_release(struct evlist *evlist);
+void iostat_prefix(struct evlist *evlist, struct perf_stat_config *config,
+		   char *prefix, struct timespec *ts);
+void iostat_print_header_prefix(struct perf_stat_config *config);
+void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
+			 struct perf_stat_output_ctx *out);
+void iostat_print_counters(struct evlist *evlist,
+			   struct perf_stat_config *config, struct timespec *ts,
+			   char *prefix, iostat_print_counter_t print_cnt_cb);
+
+#endif /* _IOSTAT_H */
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
new file mode 100644
index 000000000..fb810e1b2
--- /dev/null
+++ b/tools/perf/util/jit.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+int jit_process(struct perf_session *session, struct perf_data *output,
+		struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes);
+
+int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
new file mode 100644
index 000000000..0e033278f
--- /dev/null
+++ b/tools/perf/util/jitdump.c
@@ -0,0 +1,879 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <linux/stringify.h>
+
+#include "build-id.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "namespaces.h"
+#include "symbol.h"
+#include <elf.h>
+
+#include "tsc.h"
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "thread.h"
+
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+
+struct jit_buf_desc {
+	struct perf_data *output;
+	struct perf_session *session;
+	struct machine *machine;
+	struct nsinfo  *nsi;
+	union jr_entry   *entry;
+	void             *buf;
+	uint64_t	 sample_type;
+	size_t           bufsize;
+	FILE             *in;
+	bool		 needs_bswap; /* handles cross-endianness */
+	bool		 use_arch_timestamp;
+	void		 *debug_data;
+	void		 *unwinding_data;
+	uint64_t	 unwinding_size;
+	uint64_t	 unwinding_mapped_size;
+	uint64_t         eh_frame_hdr_size;
+	size_t		 nr_debug_entries;
+	uint32_t         code_load_count;
+	u64		 bytes_written;
+	struct rb_root   code_root;
+	char		 dir[PATH_MAX];
+};
+
+struct jit_tool {
+	struct perf_tool tool;
+	struct perf_data	output;
+	struct perf_data	input;
+	u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(struct jit_buf_desc *jd,
+	     char *filename,
+	     const char *sym,
+	     uint64_t code_addr,
+	     const void *code,
+	     int csize,
+	     void *debug,
+	     int nr_debug_entries,
+	     void *unwinding,
+	     uint32_t unwinding_header_size,
+	     uint32_t unwinding_size)
+{
+	int ret, fd, saved_errno;
+	struct nscookie nsc;
+
+	if (verbose > 0)
+		fprintf(stderr, "write ELF image %s\n", filename);
+
+	nsinfo__mountns_enter(jd->nsi, &nsc);
+	fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+	saved_errno = errno;
+	nsinfo__mountns_exit(&nsc);
+	if (fd == -1) {
+		pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(saved_errno));
+		return -1;
+	}
+
+	ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries,
+			    unwinding, unwinding_header_size, unwinding_size);
+
+        close(fd);
+
+	if (ret) {
+		nsinfo__mountns_enter(jd->nsi, &nsc);
+		unlink(filename);
+		nsinfo__mountns_exit(&nsc);
+	}
+
+	return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+	if (!(jd && jd->in))
+		return;
+	funlockfile(jd->in);
+	fclose(jd->in);
+	jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+	struct evsel *evsel;
+
+	/*
+	 * check that all events use CLOCK_MONOTONIC
+	 */
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->core.attr.use_clockid == 0 || evsel->core.attr.clockid != CLOCK_MONOTONIC)
+			return -1;
+	}
+	return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+	struct jitheader header;
+	struct nscookie nsc;
+	struct jr_prefix *prefix;
+	ssize_t bs, bsz = 0;
+	void *n, *buf = NULL;
+	int ret, retval = -1;
+
+	nsinfo__mountns_enter(jd->nsi, &nsc);
+	jd->in = fopen(name, "r");
+	nsinfo__mountns_exit(&nsc);
+	if (!jd->in)
+		return -1;
+
+	bsz = hmax(sizeof(header), sizeof(*prefix));
+
+	buf = malloc(bsz);
+	if (!buf)
+		goto error;
+
+	/*
+	 * protect from writer modifying the file while we are reading it
+	 */
+	flockfile(jd->in);
+
+	ret = fread(buf, sizeof(header), 1, jd->in);
+	if (ret != 1)
+		goto error;
+
+	memcpy(&header, buf, sizeof(header));
+
+	if (header.magic != JITHEADER_MAGIC) {
+		if (header.magic != JITHEADER_MAGIC_SW)
+			goto error;
+		jd->needs_bswap = true;
+	}
+
+	if (jd->needs_bswap) {
+		header.version    = bswap_32(header.version);
+		header.total_size = bswap_32(header.total_size);
+		header.pid	  = bswap_32(header.pid);
+		header.elf_mach   = bswap_32(header.elf_mach);
+		header.timestamp  = bswap_64(header.timestamp);
+		header.flags      = bswap_64(header.flags);
+	}
+
+	jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
+	if (verbose > 2)
+		pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
+			header.version,
+			header.total_size,
+			(unsigned long long)header.timestamp,
+			header.pid,
+			header.elf_mach,
+			jd->use_arch_timestamp);
+
+	if (header.version > JITHEADER_VERSION) {
+		pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION),
+			header.version);
+		goto error;
+	}
+
+	if (header.flags & JITDUMP_FLAGS_RESERVED) {
+		pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+		       (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+		goto error;
+	}
+
+	if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) {
+		pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
+		goto error;
+	}
+
+	/*
+	 * validate event is using the correct clockid
+	 */
+	if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) {
+		pr_err("error, jitted code must be sampled with perf record -k 1\n");
+		goto error;
+	}
+
+	bs = header.total_size - sizeof(header);
+
+	if (bs > bsz) {
+		n = realloc(buf, bs);
+		if (!n)
+			goto error;
+		bsz = bs;
+		buf = n;
+		/* read extra we do not know about */
+		ret = fread(buf, bs - bsz, 1, jd->in);
+		if (ret != 1)
+			goto error;
+	}
+	/*
+	 * keep dirname for generating files and mmap records
+	 */
+	strcpy(jd->dir, name);
+	dirname(jd->dir);
+
+	return 0;
+error:
+	funlockfile(jd->in);
+	fclose(jd->in);
+	return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+	struct jr_prefix *prefix;
+	union jr_entry *jr;
+	void *addr;
+	size_t bs, size;
+	int id, ret;
+
+	if (!(jd && jd->in))
+		return NULL;
+
+	if (jd->buf == NULL) {
+		size_t sz = getpagesize();
+		if (sz < sizeof(*prefix))
+			sz = sizeof(*prefix);
+
+		jd->buf = malloc(sz);
+		if (jd->buf == NULL)
+			return NULL;
+
+		jd->bufsize = sz;
+	}
+
+	prefix = jd->buf;
+
+	/*
+	 * file is still locked at this point
+	 */
+	ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+	if (ret  != 1)
+		return NULL;
+
+	if (jd->needs_bswap) {
+		prefix->id   	   = bswap_32(prefix->id);
+		prefix->total_size = bswap_32(prefix->total_size);
+		prefix->timestamp  = bswap_64(prefix->timestamp);
+	}
+	id   = prefix->id;
+	size = prefix->total_size;
+
+	bs = (size_t)size;
+	if (bs < sizeof(*prefix))
+		return NULL;
+
+	if (id >= JIT_CODE_MAX) {
+		pr_warning("next_entry: unknown record type %d, skipping\n", id);
+	}
+	if (bs > jd->bufsize) {
+		void *n;
+		n = realloc(jd->buf, bs);
+		if (!n)
+			return NULL;
+		jd->buf = n;
+		jd->bufsize = bs;
+	}
+
+	addr = ((void *)jd->buf) + sizeof(*prefix);
+
+	ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+	if (ret != 1)
+		return NULL;
+
+	jr = (union jr_entry *)jd->buf;
+
+	switch(id) {
+	case JIT_CODE_DEBUG_INFO:
+		if (jd->needs_bswap) {
+			uint64_t n;
+			jr->info.code_addr = bswap_64(jr->info.code_addr);
+			jr->info.nr_entry  = bswap_64(jr->info.nr_entry);
+			for (n = 0 ; n < jr->info.nr_entry; n++) {
+				jr->info.entries[n].addr    = bswap_64(jr->info.entries[n].addr);
+				jr->info.entries[n].lineno  = bswap_32(jr->info.entries[n].lineno);
+				jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+			}
+		}
+		break;
+	case JIT_CODE_UNWINDING_INFO:
+		if (jd->needs_bswap) {
+			jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size);
+			jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size);
+			jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size);
+		}
+		break;
+	case JIT_CODE_CLOSE:
+		break;
+	case JIT_CODE_LOAD:
+		if (jd->needs_bswap) {
+			jr->load.pid       = bswap_32(jr->load.pid);
+			jr->load.tid       = bswap_32(jr->load.tid);
+			jr->load.vma       = bswap_64(jr->load.vma);
+			jr->load.code_addr = bswap_64(jr->load.code_addr);
+			jr->load.code_size = bswap_64(jr->load.code_size);
+			jr->load.code_index= bswap_64(jr->load.code_index);
+		}
+		jd->code_load_count++;
+		break;
+	case JIT_CODE_MOVE:
+		if (jd->needs_bswap) {
+			jr->move.pid           = bswap_32(jr->move.pid);
+			jr->move.tid           = bswap_32(jr->move.tid);
+			jr->move.vma           = bswap_64(jr->move.vma);
+			jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+			jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+			jr->move.code_size     = bswap_64(jr->move.code_size);
+			jr->move.code_index    = bswap_64(jr->move.code_index);
+		}
+		break;
+	case JIT_CODE_MAX:
+	default:
+		/* skip unknown record (we have read them) */
+		break;
+	}
+	return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+	ssize_t size;
+
+	size = perf_data__write(jd->output, event, event->header.size);
+	if (size < 0)
+		return -1;
+
+	jd->bytes_written += size;
+	return 0;
+}
+
+static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+		return nsinfo__tgid(jd->nsi);
+	return jr->load.pid;
+}
+
+static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+		return nsinfo__pid(jd->nsi);
+	return jr->load.tid;
+}
+
+static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
+{
+	struct perf_tsc_conversion tc = { .time_shift = 0, };
+	struct perf_record_time_conv *time_conv = &jd->session->time_conv;
+
+	if (!jd->use_arch_timestamp)
+		return timestamp;
+
+	tc.time_shift = time_conv->time_shift;
+	tc.time_mult  = time_conv->time_mult;
+	tc.time_zero  = time_conv->time_zero;
+
+	/*
+	 * The event TIME_CONV was extended for the fields from "time_cycles"
+	 * when supported cap_user_time_short, for backward compatibility,
+	 * checks the event size and assigns these extended fields if these
+	 * fields are contained in the event.
+	 */
+	if (event_contains(*time_conv, time_cycles)) {
+		tc.time_cycles	       = time_conv->time_cycles;
+		tc.time_mask	       = time_conv->time_mask;
+		tc.cap_user_time_zero  = time_conv->cap_user_time_zero;
+		tc.cap_user_time_short = time_conv->cap_user_time_short;
+
+		if (!tc.cap_user_time_zero)
+			return 0;
+	}
+
+	return tsc_to_perf_time(timestamp, &tc);
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	uint64_t code, addr;
+	uintptr_t uaddr;
+	char *filename;
+	struct stat st;
+	size_t size;
+	u16 idr_size;
+	const char *sym;
+	uint64_t count;
+	int ret, csize, usize;
+	pid_t nspid, pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	nspid = jr->load.pid;
+	pid   = jr_entry_pid(jd, jr);
+	tid   = jr_entry_tid(jd, jr);
+	csize = jr->load.code_size;
+	usize = jd->unwinding_mapped_size;
+	addr  = jr->load.code_addr;
+	sym   = (void *)((unsigned long)jr + sizeof(jr->load));
+	code  = (unsigned long)jr + jr->load.p.total_size - csize;
+	count = jr->load.code_index;
+	idr_size = jd->machine->id_hdr_size;
+
+	event = calloc(1, sizeof(*event) + idr_size);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
+			jd->dir,
+			nspid,
+			count);
+
+	size++; /* for \0 */
+
+	size = PERF_ALIGN(size, sizeof(u64));
+	uaddr = (uintptr_t)code;
+	ret = jit_emit_elf(jd, filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries,
+			   jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size);
+
+	if (jd->debug_data && jd->nr_debug_entries) {
+		zfree(&jd->debug_data);
+		jd->nr_debug_entries = 0;
+	}
+
+	if (jd->unwinding_data && jd->eh_frame_hdr_size) {
+		zfree(&jd->unwinding_data);
+		jd->eh_frame_hdr_size = 0;
+		jd->unwinding_mapped_size = 0;
+		jd->unwinding_size = 0;
+	}
+
+	if (ret) {
+		free(event);
+		return -1;
+	}
+	if (nsinfo__stat(filename, &st, jd->nsi))
+		memset(&st, 0, sizeof(st));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = addr;
+	event->mmap2.len   = usize ? ALIGN_8(csize) + usize : csize;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	/*
+	 * mark dso as use to generate buildid in the header
+	 */
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	char *filename;
+	size_t size;
+	struct stat st;
+	int usize;
+	u16 idr_size;
+	int ret;
+	pid_t nspid, pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	nspid = jr->load.pid;
+	pid   = jr_entry_pid(jd, jr);
+	tid   = jr_entry_tid(jd, jr);
+	usize = jd->unwinding_mapped_size;
+	idr_size = jd->machine->id_hdr_size;
+
+	/*
+	 * +16 to account for sample_id_all (hack)
+	 */
+	event = calloc(1, sizeof(*event) + 16);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
+	         jd->dir,
+		 nspid,
+		 jr->move.code_index);
+
+	size++; /* for \0 */
+
+	if (nsinfo__stat(filename, &st, jd->nsi))
+		memset(&st, 0, sizeof(st));
+
+	size = PERF_ALIGN(size, sizeof(u64));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = jr->move.new_code_addr;
+	event->mmap2.len   = usize ? ALIGN_8(jr->move.code_size) + usize
+				   : jr->move.code_size;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = jr->move.new_code_addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	void *data;
+	size_t sz;
+
+	if (!(jd && jr))
+		return -1;
+
+	sz  = jr->prefix.total_size - sizeof(jr->info);
+	data = malloc(sz);
+	if (!data)
+		return -1;
+
+	memcpy(data, &jr->info.entries, sz);
+
+	jd->debug_data       = data;
+
+	/*
+	 * we must use nr_entry instead of size here because
+	 * we cannot distinguish actual entry from padding otherwise
+	 */
+	jd->nr_debug_entries = jr->info.nr_entry;
+
+	return 0;
+}
+
+static int
+jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	void *unwinding_data;
+	uint32_t unwinding_data_size;
+
+	if (!(jd && jr))
+		return -1;
+
+	unwinding_data_size  = jr->prefix.total_size - sizeof(jr->unwinding);
+	unwinding_data = malloc(unwinding_data_size);
+	if (!unwinding_data)
+		return -1;
+
+	memcpy(unwinding_data, &jr->unwinding.unwinding_data,
+	       unwinding_data_size);
+
+	jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size;
+	jd->unwinding_size = jr->unwinding.unwinding_size;
+	jd->unwinding_mapped_size = jr->unwinding.mapped_size;
+	jd->unwinding_data = unwinding_data;
+
+	return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+	union jr_entry *jr;
+	int ret = 0;
+
+	while ((jr = jit_get_next_entry(jd))) {
+		switch(jr->prefix.id) {
+		case JIT_CODE_LOAD:
+			ret = jit_repipe_code_load(jd, jr);
+			break;
+		case JIT_CODE_MOVE:
+			ret = jit_repipe_code_move(jd, jr);
+			break;
+		case JIT_CODE_DEBUG_INFO:
+			ret = jit_repipe_debug_info(jd, jr);
+			break;
+		case JIT_CODE_UNWINDING_INFO:
+			ret = jit_repipe_unwinding_info(jd, jr);
+			break;
+		default:
+			ret = 0;
+			continue;
+		}
+	}
+	return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+	int ret;
+
+	if (verbose > 0)
+		fprintf(stderr, "injecting: %s\n", path);
+
+	ret = jit_open(jd, path);
+	if (ret)
+		return -1;
+
+	ret = jit_process_dump(jd);
+
+	jit_close(jd);
+
+	if (verbose > 0)
+		fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+	return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi)
+ {
+	char *p;
+	char *end = NULL;
+	pid_t pid2;
+
+	if (verbose > 2)
+		fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+	/*
+	 * get file name
+	 */
+	p = strrchr(mmap_name, '/');
+	if (!p)
+		return -1;
+
+	/*
+	 * match prefix
+	 */
+	if (strncmp(p, "/jit-", 5))
+		return -1;
+
+	/*
+	 * skip prefix
+	 */
+	p += 5;
+
+	/*
+	 * must be followed by a pid
+	 */
+	if (!isdigit(*p))
+		return -1;
+
+	pid2 = (int)strtol(p, &end, 10);
+	if (!end)
+		return -1;
+
+	/*
+	 * pid does not match mmap pid
+	 * pid==0 in system-wide mode (synthesized)
+	 */
+	if (pid && pid2 != nsinfo__nstgid(nsi))
+		return -1;
+	/*
+	 * validate suffix
+	 */
+	if (strcmp(end, ".dump"))
+		return -1;
+
+	if (verbose > 0)
+		fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+	return 0;
+}
+
+static void jit_add_pid(struct machine *machine, pid_t pid)
+{
+	struct thread *thread = machine__findnew_thread(machine, pid, pid);
+
+	if (!thread) {
+		pr_err("%s: thread %d not found or created\n", __func__, pid);
+		return;
+	}
+
+	thread->priv = (void *)1;
+}
+
+static bool jit_has_pid(struct machine *machine, pid_t pid)
+{
+	struct thread *thread = machine__find_thread(machine, pid, pid);
+
+	if (!thread)
+		return 0;
+
+	return (bool)thread->priv;
+}
+
+int
+jit_process(struct perf_session *session,
+	    struct perf_data *output,
+	    struct machine *machine,
+	    char *filename,
+	    pid_t pid,
+	    pid_t tid,
+	    u64 *nbytes)
+{
+	struct thread *thread;
+	struct nsinfo *nsi;
+	struct evsel *first;
+	struct jit_buf_desc jd;
+	int ret;
+
+	thread = machine__findnew_thread(machine, pid, tid);
+	if (thread == NULL) {
+		pr_err("problem processing JIT mmap event, skipping it.\n");
+		return 0;
+	}
+
+	nsi = nsinfo__get(thread->nsinfo);
+	thread__put(thread);
+
+	/*
+	 * first, detect marker mmap (i.e., the jitdump mmap)
+	 */
+	if (jit_detect(filename, pid, nsi)) {
+		nsinfo__put(nsi);
+
+		/*
+		 * Strip //anon*, [anon:* and /memfd:* mmaps if we processed a jitdump for this pid
+		 */
+		if (jit_has_pid(machine, pid) &&
+			((strncmp(filename, "//anon", 6) == 0) ||
+			 (strncmp(filename, "[anon:", 6) == 0) ||
+			 (strncmp(filename, "/memfd:", 7) == 0)))
+			return 1;
+
+		return 0;
+	}
+
+	memset(&jd, 0, sizeof(jd));
+
+	jd.session = session;
+	jd.output  = output;
+	jd.machine = machine;
+	jd.nsi = nsi;
+
+	/*
+	 * track sample_type to compute id_all layout
+	 * perf sets the same sample type to all events as of now
+	 */
+	first = evlist__first(session->evlist);
+	jd.sample_type = first->core.attr.sample_type;
+
+	*nbytes = 0;
+
+	ret = jit_inject(&jd, filename);
+	if (!ret) {
+		jit_add_pid(machine, pid);
+		*nbytes = jd.bytes_written;
+		ret = 1;
+	}
+
+	nsinfo__put(jd.nsi);
+
+	return ret;
+}
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
new file mode 100644
index 000000000..ab2842def
--- /dev/null
+++ b/tools/perf/util/jitdump.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC		0x4A695444
+#define JITHEADER_MAGIC_SW	0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+#define ALIGN_8(x) (((x) + 7) & (~7))
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+	JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT,
+	JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP	(1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT)
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+				(~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+	uint32_t magic;		/* characters "jItD" */
+	uint32_t version;	/* header version */
+	uint32_t total_size;	/* total size of header */
+	uint32_t elf_mach;	/* elf mach target */
+	uint32_t pad1;		/* reserved */
+	uint32_t pid;		/* JIT process id */
+	uint64_t timestamp;	/* timestamp */
+	uint64_t flags;		/* flags */
+};
+
+enum jit_record_type {
+	JIT_CODE_LOAD		= 0,
+        JIT_CODE_MOVE           = 1,
+	JIT_CODE_DEBUG_INFO	= 2,
+	JIT_CODE_CLOSE		= 3,
+	JIT_CODE_UNWINDING_INFO	= 4,
+
+	JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+	uint32_t id;
+	uint32_t total_size;
+	uint64_t timestamp;
+};
+
+struct jr_code_load {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct jr_code_close {
+	struct jr_prefix p;
+};
+
+struct jr_code_move {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t old_code_addr;
+	uint64_t new_code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct debug_entry {
+	uint64_t addr;
+	int lineno;	    /* source line number starting at 1 */
+	int discrim;	    /* column discriminator, 0 is default */
+	const char name[]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+	struct jr_prefix p;
+
+	uint64_t code_addr;
+	uint64_t nr_entry;
+	struct debug_entry entries[];
+};
+
+struct jr_code_unwinding_info {
+	struct jr_prefix p;
+
+	uint64_t unwinding_size;
+	uint64_t eh_frame_hdr_size;
+	uint64_t mapped_size;
+	const char unwinding_data[];
+};
+
+union jr_entry {
+        struct jr_code_debug_info info;
+        struct jr_code_close close;
+        struct jr_code_load load;
+        struct jr_code_move move;
+        struct jr_prefix prefix;
+        struct jr_code_unwinding_info unwinding;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	size_t l = strlen(ent->name) + 1;
+	return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	return a;
+}
+
+#endif /* !JITDUMP_H */
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
new file mode 100644
index 000000000..6f0fa05b6
--- /dev/null
+++ b/tools/perf/util/kvm-stat.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_KVM_STAT_H
+#define __PERF_KVM_STAT_H
+
+#ifdef HAVE_KVM_STAT_SUPPORT
+
+#include "tool.h"
+#include "stat.h"
+#include "record.h"
+
+struct evsel;
+struct evlist;
+struct perf_session;
+
+struct event_key {
+	#define INVALID_KEY     (~0ULL)
+	u64 key;
+	int info;
+	struct exit_reasons_table *exit_reasons;
+};
+
+struct kvm_event_stats {
+	u64 time;
+	struct stats stats;
+};
+
+struct kvm_event {
+	struct list_head hash_entry;
+	struct rb_node rb;
+
+	struct event_key key;
+
+	struct kvm_event_stats total;
+
+	#define DEFAULT_VCPU_NUM 8
+	int max_vcpu;
+	struct kvm_event_stats *vcpu;
+};
+
+typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
+
+struct kvm_event_key {
+	const char *name;
+	key_cmp_fun key;
+};
+
+struct perf_kvm_stat;
+
+struct child_event_ops {
+	void (*get_key)(struct evsel *evsel,
+			struct perf_sample *sample,
+			struct event_key *key);
+	const char *name;
+};
+
+struct kvm_events_ops {
+	bool (*is_begin_event)(struct evsel *evsel,
+			       struct perf_sample *sample,
+			       struct event_key *key);
+	bool (*is_end_event)(struct evsel *evsel,
+			     struct perf_sample *sample, struct event_key *key);
+	struct child_event_ops *child_ops;
+	void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
+			   char *decode);
+	const char *name;
+};
+
+struct exit_reasons_table {
+	unsigned long exit_code;
+	const char *reason;
+};
+
+#define EVENTS_BITS		12
+#define EVENTS_CACHE_SIZE	(1UL << EVENTS_BITS)
+
+struct perf_kvm_stat {
+	struct perf_tool    tool;
+	struct record_opts  opts;
+	struct evlist  *evlist;
+	struct perf_session *session;
+
+	const char *file_name;
+	const char *report_event;
+	const char *sort_key;
+	int trace_vcpu;
+
+	struct exit_reasons_table *exit_reasons;
+	const char *exit_reasons_isa;
+
+	struct kvm_events_ops *events_ops;
+	key_cmp_fun compare;
+	struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
+
+	u64 total_time;
+	u64 total_count;
+	u64 lost_events;
+	u64 duration;
+
+	struct intlist *pid_list;
+
+	struct rb_root result;
+
+	int timerfd;
+	unsigned int display_time;
+	bool live;
+	bool force;
+};
+
+struct kvm_reg_events_ops {
+	const char *name;
+	struct kvm_events_ops *ops;
+};
+
+void exit_event_get_key(struct evsel *evsel,
+			struct perf_sample *sample,
+			struct event_key *key);
+bool exit_event_begin(struct evsel *evsel,
+		      struct perf_sample *sample,
+		      struct event_key *key);
+bool exit_event_end(struct evsel *evsel,
+		    struct perf_sample *sample,
+		    struct event_key *key);
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+			   struct event_key *key,
+			   char *decode);
+
+bool kvm_exit_event(struct evsel *evsel);
+bool kvm_entry_event(struct evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
+
+#define define_exit_reasons_table(name, symbols)	\
+	static struct exit_reasons_table name[] = {	\
+		symbols, { -1, NULL }			\
+	}
+
+/*
+ * arch specific callbacks and data structures
+ */
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
+
+extern const char *kvm_events_tp[];
+extern struct kvm_reg_events_ops kvm_reg_events_ops[];
+extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const int decode_str_len;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
+#endif /* HAVE_KVM_STAT_SUPPORT */
+
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
+#endif /* __PERF_KVM_STAT_H */
diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h
new file mode 100644
index 000000000..320c0a6d2
--- /dev/null
+++ b/tools/perf/util/kwork.h
@@ -0,0 +1,257 @@
+#ifndef PERF_UTIL_KWORK_H
+#define PERF_UTIL_KWORK_H
+
+#include "perf.h"
+
+#include "util/tool.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/session.h"
+#include "util/time-utils.h"
+
+#include <linux/list.h>
+#include <linux/bitmap.h>
+
+enum kwork_class_type {
+	KWORK_CLASS_IRQ,
+	KWORK_CLASS_SOFTIRQ,
+	KWORK_CLASS_WORKQUEUE,
+	KWORK_CLASS_MAX,
+};
+
+enum kwork_report_type {
+	KWORK_REPORT_RUNTIME,
+	KWORK_REPORT_LATENCY,
+	KWORK_REPORT_TIMEHIST,
+};
+
+enum kwork_trace_type {
+	KWORK_TRACE_RAISE,
+	KWORK_TRACE_ENTRY,
+	KWORK_TRACE_EXIT,
+	KWORK_TRACE_MAX,
+};
+
+/*
+ * data structure:
+ *
+ *                 +==================+ +============+ +======================+
+ *                 |      class       | |    work    | |         atom         |
+ *                 +==================+ +============+ +======================+
+ * +------------+  |  +-----+         | |  +------+  | |  +-------+   +-----+ |
+ * | perf_kwork | +-> | irq | --------|+-> | eth0 | --+-> | raise | - | ... | --+   +-----------+
+ * +-----+------+ ||  +-----+         |||  +------+  |||  +-------+   +-----+ | |   |           |
+ *       |        ||                  |||            |||                      | +-> | atom_page |
+ *       |        ||                  |||            |||  +-------+   +-----+ |     |           |
+ *       |  class_list                |||            |+-> | entry | - | ... | ----> |           |
+ *       |        ||                  |||            |||  +-------+   +-----+ |     |           |
+ *       |        ||                  |||            |||                      | +-> |           |
+ *       |        ||                  |||            |||  +-------+   +-----+ | |   |           |
+ *       |        ||                  |||            |+-> | exit  | - | ... | --+   +-----+-----+
+ *       |        ||                  |||            | |  +-------+   +-----+ |           |
+ *       |        ||                  |||            | |                      |           |
+ *       |        ||                  |||  +-----+   | |                      |           |
+ *       |        ||                  |+-> | ... |   | |                      |           |
+ *       |        ||                  | |  +-----+   | |                      |           |
+ *       |        ||                  | |            | |                      |           |
+ *       |        ||  +---------+     | |  +-----+   | |  +-------+   +-----+ |           |
+ *       |        +-> | softirq | -------> | RCU | ---+-> | raise | - | ... | --+   +-----+-----+
+ *       |        ||  +---------+     | |  +-----+   |||  +-------+   +-----+ | |   |           |
+ *       |        ||                  | |            |||                      | +-> | atom_page |
+ *       |        ||                  | |            |||  +-------+   +-----+ |     |           |
+ *       |        ||                  | |            |+-> | entry | - | ... | ----> |           |
+ *       |        ||                  | |            |||  +-------+   +-----+ |     |           |
+ *       |        ||                  | |            |||                      | +-> |           |
+ *       |        ||                  | |            |||  +-------+   +-----+ | |   |           |
+ *       |        ||                  | |            |+-> | exit  | - | ... | --+   +-----+-----+
+ *       |        ||                  | |            | |  +-------+   +-----+ |           |
+ *       |        ||                  | |            | |                      |           |
+ *       |        ||  +-----------+   | |  +-----+   | |                      |           |
+ *       |        +-> | workqueue | -----> | ... |   | |                      |           |
+ *       |         |  +-----------+   | |  +-----+   | |                      |           |
+ *       |         +==================+ +============+ +======================+           |
+ *       |                                                                                |
+ *       +---->  atom_page_list  ---------------------------------------------------------+
+ *
+ */
+
+struct kwork_atom {
+	struct list_head list;
+	u64 time;
+	struct kwork_atom *prev;
+
+	void *page_addr;
+	unsigned long bit_inpage;
+};
+
+#define NR_ATOM_PER_PAGE 128
+struct kwork_atom_page {
+	struct list_head list;
+	struct kwork_atom atoms[NR_ATOM_PER_PAGE];
+	DECLARE_BITMAP(bitmap, NR_ATOM_PER_PAGE);
+};
+
+struct kwork_class;
+struct kwork_work {
+	/*
+	 * class field
+	 */
+	struct rb_node node;
+	struct kwork_class *class;
+
+	/*
+	 * work field
+	 */
+	u64 id;
+	int cpu;
+	char *name;
+
+	/*
+	 * atom field
+	 */
+	u64 nr_atoms;
+	struct list_head atom_list[KWORK_TRACE_MAX];
+
+	/*
+	 * runtime report
+	 */
+	u64 max_runtime;
+	u64 max_runtime_start;
+	u64 max_runtime_end;
+	u64 total_runtime;
+
+	/*
+	 * latency report
+	 */
+	u64 max_latency;
+	u64 max_latency_start;
+	u64 max_latency_end;
+	u64 total_latency;
+};
+
+struct kwork_class {
+	struct list_head list;
+	const char *name;
+	enum kwork_class_type type;
+
+	unsigned int nr_tracepoints;
+	const struct evsel_str_handler *tp_handlers;
+
+	struct rb_root_cached work_root;
+
+	int (*class_init)(struct kwork_class *class,
+			  struct perf_session *session);
+
+	void (*work_init)(struct kwork_class *class,
+			  struct kwork_work *work,
+			  struct evsel *evsel,
+			  struct perf_sample *sample,
+			  struct machine *machine);
+
+	void (*work_name)(struct kwork_work *work,
+			  char *buf, int len);
+};
+
+struct perf_kwork;
+struct trace_kwork_handler {
+	int (*raise_event)(struct perf_kwork *kwork,
+			   struct kwork_class *class, struct evsel *evsel,
+			   struct perf_sample *sample, struct machine *machine);
+
+	int (*entry_event)(struct perf_kwork *kwork,
+			   struct kwork_class *class, struct evsel *evsel,
+			   struct perf_sample *sample, struct machine *machine);
+
+	int (*exit_event)(struct perf_kwork *kwork,
+			  struct kwork_class *class, struct evsel *evsel,
+			  struct perf_sample *sample, struct machine *machine);
+};
+
+struct perf_kwork {
+	/*
+	 * metadata
+	 */
+	struct perf_tool tool;
+	struct list_head class_list;
+	struct list_head atom_page_list;
+	struct list_head sort_list, cmp_id;
+	struct rb_root_cached sorted_work_root;
+	const struct trace_kwork_handler *tp_handler;
+
+	/*
+	 * profile filters
+	 */
+	const char *profile_name;
+
+	const char *cpu_list;
+	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
+	const char *time_str;
+	struct perf_time_interval ptime;
+
+	/*
+	 * options for command
+	 */
+	bool force;
+	const char *event_list_str;
+	enum kwork_report_type report;
+
+	/*
+	 * options for subcommand
+	 */
+	bool summary;
+	const char *sort_order;
+	bool show_callchain;
+	unsigned int max_stack;
+	bool use_bpf;
+
+	/*
+	 * statistics
+	 */
+	u64 timestart;
+	u64 timeend;
+
+	unsigned long nr_events;
+	unsigned long nr_lost_chunks;
+	unsigned long nr_lost_events;
+
+	u64 all_runtime;
+	u64 all_count;
+	u64 nr_skipped_events[KWORK_TRACE_MAX + 1];
+};
+
+struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+				       struct kwork_class *class,
+				       struct kwork_work *key);
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork);
+int perf_kwork__report_read_bpf(struct perf_kwork *kwork);
+void perf_kwork__report_cleanup_bpf(void);
+
+void perf_kwork__trace_start(void);
+void perf_kwork__trace_finish(void);
+
+#else  /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
+{
+	return -1;
+}
+
+static inline int
+perf_kwork__report_read_bpf(struct perf_kwork *kwork __maybe_unused)
+{
+	return -1;
+}
+
+static inline void perf_kwork__report_cleanup_bpf(void) {}
+
+static inline void perf_kwork__trace_start(void) {}
+static inline void perf_kwork__trace_finish(void) {}
+
+#endif  /* HAVE_BPF_SKEL */
+
+#endif  /* PERF_UTIL_KWORK_H */
diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c
new file mode 100644
index 000000000..6a6712635
--- /dev/null
+++ b/tools/perf/util/levenshtein.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "levenshtein.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings.  To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substitution, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = malloc(sizeof(int) * (len2 + 1));
+	int *row1 = malloc(sizeof(int) * (len2 + 1));
+	int *row2 = malloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h
new file mode 100644
index 000000000..34ca173c5
--- /dev/null
+++ b/tools/perf/util/levenshtein.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LEVENSHTEIN_H
+#define __PERF_LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif /* __PERF_LEVENSHTEIN_H */
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
new file mode 100644
index 000000000..014d82159
--- /dev/null
+++ b/tools/perf/util/libunwind/arm64.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the definition of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
+
+#include "unwind.h"
+#include "libunwind-aarch64.h"
+#define perf_event_arm_regs perf_event_arm64_regs
+#include <../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#undef perf_event_arm_regs
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c
new file mode 100644
index 000000000..b2b92d030
--- /dev/null
+++ b/tools/perf/util/libunwind/x86_32.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the definition of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
+
+#include "unwind.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
new file mode 100644
index 000000000..a9e18bb16
--- /dev/null
+++ b/tools/perf/util/llvm-utils.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "llvm-utils.h"
+#include "config.h"
+#include "util.h"
+#include <sys/wait.h>
+#include <subcmd/exec-cmd.h>
+
+#define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
+		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
+		"-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE "	\
+		"$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
+		"-Wno-unused-value -Wno-pointer-sign "		\
+		"-working-directory $WORKING_DIR "		\
+		"-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE"
+
+struct llvm_param llvm_param = {
+	.clang_path = "clang",
+	.llc_path = "llc",
+	.clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE,
+	.clang_opt = NULL,
+	.opts = NULL,
+	.kbuild_dir = NULL,
+	.kbuild_opts = NULL,
+	.user_set_param = false,
+};
+
+static void version_notice(void);
+
+int perf_llvm_config(const char *var, const char *value)
+{
+	if (!strstarts(var, "llvm."))
+		return 0;
+	var += sizeof("llvm.") - 1;
+
+	if (!strcmp(var, "clang-path"))
+		llvm_param.clang_path = strdup(value);
+	else if (!strcmp(var, "clang-bpf-cmd-template"))
+		llvm_param.clang_bpf_cmd_template = strdup(value);
+	else if (!strcmp(var, "clang-opt"))
+		llvm_param.clang_opt = strdup(value);
+	else if (!strcmp(var, "kbuild-dir"))
+		llvm_param.kbuild_dir = strdup(value);
+	else if (!strcmp(var, "kbuild-opts"))
+		llvm_param.kbuild_opts = strdup(value);
+	else if (!strcmp(var, "dump-obj"))
+		llvm_param.dump_obj = !!perf_config_bool(var, value);
+	else if (!strcmp(var, "opts"))
+		llvm_param.opts = strdup(value);
+	else {
+		pr_debug("Invalid LLVM config option: %s\n", value);
+		return -1;
+	}
+	llvm_param.user_set_param = true;
+	return 0;
+}
+
+static int
+search_program(const char *def, const char *name,
+	       char *output)
+{
+	char *env, *path, *tmp = NULL;
+	char buf[PATH_MAX];
+	int ret;
+
+	output[0] = '\0';
+	if (def && def[0] != '\0') {
+		if (def[0] == '/') {
+			if (access(def, F_OK) == 0) {
+				strlcpy(output, def, PATH_MAX);
+				return 0;
+			}
+		} else if (def[0] != '\0')
+			name = def;
+	}
+
+	env = getenv("PATH");
+	if (!env)
+		return -1;
+	env = strdup(env);
+	if (!env)
+		return -1;
+
+	ret = -ENOENT;
+	path = strtok_r(env, ":",  &tmp);
+	while (path) {
+		scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+		if (access(buf, F_OK) == 0) {
+			strlcpy(output, buf, PATH_MAX);
+			ret = 0;
+			break;
+		}
+		path = strtok_r(NULL, ":", &tmp);
+	}
+
+	free(env);
+	return ret;
+}
+
+static int search_program_and_warn(const char *def, const char *name,
+				   char *output)
+{
+	int ret = search_program(def, name, output);
+
+	if (ret) {
+		pr_err("ERROR:\tunable to find %s.\n"
+		       "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+		       "     \tand '%s-path' option in [llvm] section of ~/.perfconfig.\n",
+		       name, name);
+		version_notice();
+	}
+	return ret;
+}
+
+#define READ_SIZE	4096
+static int
+read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
+{
+	int err = 0;
+	void *buf = NULL;
+	FILE *file = NULL;
+	size_t read_sz = 0, buf_sz = 0;
+	char serr[STRERR_BUFSIZE];
+
+	file = popen(cmd, "r");
+	if (!file) {
+		pr_err("ERROR: unable to popen cmd: %s\n",
+		       str_error_r(errno, serr, sizeof(serr)));
+		return -EINVAL;
+	}
+
+	while (!feof(file) && !ferror(file)) {
+		/*
+		 * Make buf_sz always have obe byte extra space so we
+		 * can put '\0' there.
+		 */
+		if (buf_sz - read_sz < READ_SIZE + 1) {
+			void *new_buf;
+
+			buf_sz = read_sz + READ_SIZE + 1;
+			new_buf = realloc(buf, buf_sz);
+
+			if (!new_buf) {
+				pr_err("ERROR: failed to realloc memory\n");
+				err = -ENOMEM;
+				goto errout;
+			}
+
+			buf = new_buf;
+		}
+		read_sz += fread(buf + read_sz, 1, READ_SIZE, file);
+	}
+
+	if (buf_sz - read_sz < 1) {
+		pr_err("ERROR: internal error\n");
+		err = -EINVAL;
+		goto errout;
+	}
+
+	if (ferror(file)) {
+		pr_err("ERROR: error occurred when reading from pipe: %s\n",
+		       str_error_r(errno, serr, sizeof(serr)));
+		err = -EIO;
+		goto errout;
+	}
+
+	err = WEXITSTATUS(pclose(file));
+	file = NULL;
+	if (err) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	/*
+	 * If buf is string, give it terminal '\0' to make our life
+	 * easier. If buf is not string, that '\0' is out of space
+	 * indicated by read_sz so caller won't even notice it.
+	 */
+	((char *)buf)[read_sz] = '\0';
+
+	if (!p_buf)
+		free(buf);
+	else
+		*p_buf = buf;
+
+	if (p_read_sz)
+		*p_read_sz = read_sz;
+	return 0;
+
+errout:
+	if (file)
+		pclose(file);
+	free(buf);
+	if (p_buf)
+		*p_buf = NULL;
+	if (p_read_sz)
+		*p_read_sz = 0;
+	return err;
+}
+
+static inline void
+force_set_env(const char *var, const char *value)
+{
+	if (value) {
+		setenv(var, value, 1);
+		pr_debug("set env: %s=%s\n", var, value);
+	} else {
+		unsetenv(var);
+		pr_debug("unset env: %s\n", var);
+	}
+}
+
+static void
+version_notice(void)
+{
+	pr_err(
+"     \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n"
+"     \tYou may want to try git trunk:\n"
+"     \t\tgit clone http://llvm.org/git/llvm.git\n"
+"     \t\t     and\n"
+"     \t\tgit clone http://llvm.org/git/clang.git\n\n"
+"     \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
+"     \tdebian/ubuntu:\n"
+"     \t\thttps://apt.llvm.org/\n\n"
+"     \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
+"     \toption in [llvm] section of ~/.perfconfig to:\n\n"
+"     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
+"     \t     -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
+"     \t     -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
+"     \t(Replace /path/to/llc with path to your llc)\n\n"
+);
+}
+
+static int detect_kbuild_dir(char **kbuild_dir)
+{
+	const char *test_dir = llvm_param.kbuild_dir;
+	const char *prefix_dir = "";
+	const char *suffix_dir = "";
+
+	/* _UTSNAME_LENGTH is 65 */
+	char release[128];
+
+	char *autoconf_path;
+
+	int err;
+
+	if (!test_dir) {
+		err = fetch_kernel_version(NULL, release,
+					   sizeof(release));
+		if (err)
+			return -EINVAL;
+
+		test_dir = release;
+		prefix_dir = "/lib/modules/";
+		suffix_dir = "/build";
+	}
+
+	err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h",
+		       prefix_dir, test_dir, suffix_dir);
+	if (err < 0)
+		return -ENOMEM;
+
+	if (access(autoconf_path, R_OK) == 0) {
+		free(autoconf_path);
+
+		err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir,
+			       suffix_dir);
+		if (err < 0)
+			return -ENOMEM;
+		return 0;
+	}
+	pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n",
+		 __func__, autoconf_path);
+	free(autoconf_path);
+	return -ENOENT;
+}
+
+static const char *kinc_fetch_script =
+"#!/usr/bin/env sh\n"
+"if ! test -d \"$KBUILD_DIR\"\n"
+"then\n"
+"	exit 1\n"
+"fi\n"
+"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
+"then\n"
+"	exit 1\n"
+"fi\n"
+"TMPDIR=`mktemp -d`\n"
+"if test -z \"$TMPDIR\"\n"
+"then\n"
+"    exit 1\n"
+"fi\n"
+"cat << EOF > $TMPDIR/Makefile\n"
+"obj-y := dummy.o\n"
+"\\$(obj)/%.o: \\$(src)/%.c\n"
+"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
+"\t\\$(CC) -c -o \\$@ \\$<\n"
+"EOF\n"
+"touch $TMPDIR/dummy.c\n"
+"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
+"RET=$?\n"
+"rm -rf $TMPDIR\n"
+"exit $RET\n";
+
+void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
+{
+	static char *saved_kbuild_dir;
+	static char *saved_kbuild_include_opts;
+	int err;
+
+	if (!kbuild_dir || !kbuild_include_opts)
+		return;
+
+	*kbuild_dir = NULL;
+	*kbuild_include_opts = NULL;
+
+	if (saved_kbuild_dir && saved_kbuild_include_opts &&
+	    !IS_ERR(saved_kbuild_dir) && !IS_ERR(saved_kbuild_include_opts)) {
+		*kbuild_dir = strdup(saved_kbuild_dir);
+		*kbuild_include_opts = strdup(saved_kbuild_include_opts);
+
+		if (*kbuild_dir && *kbuild_include_opts)
+			return;
+
+		zfree(kbuild_dir);
+		zfree(kbuild_include_opts);
+		/*
+		 * Don't fall through: it may breaks saved_kbuild_dir and
+		 * saved_kbuild_include_opts if detect them again when
+		 * memory is low.
+		 */
+		return;
+	}
+
+	if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) {
+		pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n");
+		pr_debug("Skip kbuild options detection.\n");
+		goto errout;
+	}
+
+	err = detect_kbuild_dir(kbuild_dir);
+	if (err) {
+		pr_warning(
+"WARNING:\tunable to get correct kernel building directory.\n"
+"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n"
+"     \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n"
+"     \tdetection.\n\n");
+		goto errout;
+	}
+
+	pr_debug("Kernel build dir is set to %s\n", *kbuild_dir);
+	force_set_env("KBUILD_DIR", *kbuild_dir);
+	force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts);
+	err = read_from_pipe(kinc_fetch_script,
+			     (void **)kbuild_include_opts,
+			     NULL);
+	if (err) {
+		pr_warning(
+"WARNING:\tunable to get kernel include directories from '%s'\n"
+"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n"
+"     \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n"
+"     \toption in [llvm] to \"\" to suppress this detection.\n\n",
+			*kbuild_dir);
+
+		zfree(kbuild_dir);
+		goto errout;
+	}
+
+	pr_debug("include option is set to %s\n", *kbuild_include_opts);
+
+	saved_kbuild_dir = strdup(*kbuild_dir);
+	saved_kbuild_include_opts = strdup(*kbuild_include_opts);
+
+	if (!saved_kbuild_dir || !saved_kbuild_include_opts) {
+		zfree(&saved_kbuild_dir);
+		zfree(&saved_kbuild_include_opts);
+	}
+	return;
+errout:
+	saved_kbuild_dir = ERR_PTR(-EINVAL);
+	saved_kbuild_include_opts = ERR_PTR(-EINVAL);
+}
+
+int llvm__get_nr_cpus(void)
+{
+	static int nr_cpus_avail = 0;
+	char serr[STRERR_BUFSIZE];
+
+	if (nr_cpus_avail > 0)
+		return nr_cpus_avail;
+
+	nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+	if (nr_cpus_avail <= 0) {
+		pr_err(
+"WARNING:\tunable to get available CPUs in this system: %s\n"
+"        \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr)));
+		nr_cpus_avail = 128;
+	}
+	return nr_cpus_avail;
+}
+
+void llvm__dump_obj(const char *path, void *obj_buf, size_t size)
+{
+	char *obj_path = strdup(path);
+	FILE *fp;
+	char *p;
+
+	if (!obj_path) {
+		pr_warning("WARNING: Not enough memory, skip object dumping\n");
+		return;
+	}
+
+	p = strrchr(obj_path, '.');
+	if (!p || (strcmp(p, ".c") != 0)) {
+		pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n",
+			   obj_path);
+		goto out;
+	}
+
+	p[1] = 'o';
+	fp = fopen(obj_path, "wb");
+	if (!fp) {
+		pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n",
+			   obj_path, strerror(errno));
+		goto out;
+	}
+
+	pr_debug("LLVM: dumping %s\n", obj_path);
+	if (fwrite(obj_buf, size, 1, fp) != 1)
+		pr_debug("WARNING: failed to write to file '%s': %s, skip object dumping\n", obj_path, strerror(errno));
+	fclose(fp);
+out:
+	free(obj_path);
+}
+
+int llvm__compile_bpf(const char *path, void **p_obj_buf,
+		      size_t *p_obj_buf_sz)
+{
+	size_t obj_buf_sz;
+	void *obj_buf = NULL;
+	int err, nr_cpus_avail;
+	unsigned int kernel_version;
+	char linux_version_code_str[64];
+	const char *clang_opt = llvm_param.clang_opt;
+	char clang_path[PATH_MAX], llc_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
+	char serr[STRERR_BUFSIZE];
+	char *kbuild_dir = NULL, *kbuild_include_opts = NULL,
+	     *perf_bpf_include_opts = NULL;
+	const char *template = llvm_param.clang_bpf_cmd_template;
+	char *pipe_template = NULL;
+	const char *opts = llvm_param.opts;
+	char *command_echo = NULL, *command_out;
+	char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
+
+	if (path[0] != '-' && realpath(path, abspath) == NULL) {
+		err = errno;
+		pr_err("ERROR: problems with path %s: %s\n",
+		       path, str_error_r(err, serr, sizeof(serr)));
+		return -err;
+	}
+
+	if (!template)
+		template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
+
+	err = search_program_and_warn(llvm_param.clang_path,
+			     "clang", clang_path);
+	if (err)
+		return -ENOENT;
+
+	/*
+	 * This is an optional work. Even it fail we can continue our
+	 * work. Needn't check error return.
+	 */
+	llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
+
+	nr_cpus_avail = llvm__get_nr_cpus();
+	snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
+		 nr_cpus_avail);
+
+	if (fetch_kernel_version(&kernel_version, NULL, 0))
+		kernel_version = 0;
+
+	snprintf(linux_version_code_str, sizeof(linux_version_code_str),
+		 "0x%x", kernel_version);
+	if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
+		goto errout;
+	force_set_env("NR_CPUS", nr_cpus_avail_str);
+	force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
+	force_set_env("CLANG_EXEC", clang_path);
+	force_set_env("CLANG_OPTIONS", clang_opt);
+	force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
+	force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts);
+	force_set_env("WORKING_DIR", kbuild_dir ? : ".");
+
+	if (opts) {
+		err = search_program_and_warn(llvm_param.llc_path, "llc", llc_path);
+		if (err)
+			goto errout;
+
+		err = -ENOMEM;
+		if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -",
+			      template, llc_path, opts) < 0) {
+			pr_err("ERROR:\tnot enough memory to setup command line\n");
+			goto errout;
+		}
+
+		template = pipe_template;
+
+	}
+
+	/*
+	 * Since we may reset clang's working dir, path of source file
+	 * should be transferred into absolute path, except we want
+	 * stdin to be source file (testing).
+	 */
+	force_set_env("CLANG_SOURCE",
+		      (path[0] == '-') ? path : abspath);
+
+	pr_debug("llvm compiling command template: %s\n", template);
+
+	/*
+	 * Below, substitute control characters for values that can cause the
+	 * echo to misbehave, then substitute the values back.
+	 */
+	err = -ENOMEM;
+	if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0)
+		goto errout;
+
+#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0)
+	for (char *p = command_echo; *p; p++) {
+		SWAP_CHAR('<', '\001');
+		SWAP_CHAR('>', '\002');
+		SWAP_CHAR('"', '\003');
+		SWAP_CHAR('\'', '\004');
+		SWAP_CHAR('|', '\005');
+		SWAP_CHAR('&', '\006');
+		SWAP_CHAR('\a', '"');
+	}
+	err = read_from_pipe(command_echo, (void **) &command_out, NULL);
+	if (err)
+		goto errout;
+
+	for (char *p = command_out; *p; p++) {
+		SWAP_CHAR('\001', '<');
+		SWAP_CHAR('\002', '>');
+		SWAP_CHAR('\003', '"');
+		SWAP_CHAR('\004', '\'');
+		SWAP_CHAR('\005', '|');
+		SWAP_CHAR('\006', '&');
+	}
+#undef SWAP_CHAR
+	pr_debug("llvm compiling command : %s\n", command_out);
+
+	err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
+	if (err) {
+		pr_err("ERROR:\tunable to compile %s\n", path);
+		pr_err("Hint:\tCheck error message shown above.\n");
+		pr_err("Hint:\tYou can also pre-compile it into .o using:\n");
+		pr_err("     \t\tclang -target bpf -O2 -c %s\n", path);
+		pr_err("     \twith proper -I and -D options.\n");
+		goto errout;
+	}
+
+	free(command_echo);
+	free(command_out);
+	free(kbuild_dir);
+	free(kbuild_include_opts);
+	free(perf_bpf_include_opts);
+	free(perf_include_dir);
+
+	if (!p_obj_buf)
+		free(obj_buf);
+	else
+		*p_obj_buf = obj_buf;
+
+	if (p_obj_buf_sz)
+		*p_obj_buf_sz = obj_buf_sz;
+	return 0;
+errout:
+	free(command_echo);
+	free(kbuild_dir);
+	free(kbuild_include_opts);
+	free(obj_buf);
+	free(perf_bpf_include_opts);
+	free(perf_include_dir);
+	free(pipe_template);
+	if (p_obj_buf)
+		*p_obj_buf = NULL;
+	if (p_obj_buf_sz)
+		*p_obj_buf_sz = 0;
+	return err;
+}
+
+int llvm__search_clang(void)
+{
+	char clang_path[PATH_MAX];
+
+	return search_program_and_warn(llvm_param.clang_path, "clang", clang_path);
+}
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
new file mode 100644
index 000000000..7878a0e3f
--- /dev/null
+++ b/tools/perf/util/llvm-utils.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __LLVM_UTILS_H
+#define __LLVM_UTILS_H
+
+#include <stdbool.h>
+
+struct llvm_param {
+	/* Path of clang executable */
+	const char *clang_path;
+	/* Path of llc executable */
+	const char *llc_path;
+	/*
+	 * Template of clang bpf compiling. 5 env variables
+	 * can be used:
+	 *   $CLANG_EXEC:		Path to clang.
+	 *   $CLANG_OPTIONS:		Extra options to clang.
+	 *   $KERNEL_INC_OPTIONS:	Kernel include directories.
+	 *   $WORKING_DIR:		Kernel source directory.
+	 *   $CLANG_SOURCE:		Source file to be compiled.
+	 */
+	const char *clang_bpf_cmd_template;
+	/* Will be filled in $CLANG_OPTIONS */
+	const char *clang_opt;
+	/*
+	 * If present it'll add -emit-llvm to $CLANG_OPTIONS to pipe
+	 * the clang output to llc, useful for new llvm options not
+	 * yet selectable via 'clang -mllvm option', such as -mattr=dwarfris
+	 * in clang 6.0/llvm 7
+	 */
+	const char *opts;
+	/* Where to find kbuild system */
+	const char *kbuild_dir;
+	/*
+	 * Arguments passed to make, like 'ARCH=arm' if doing cross
+	 * compiling. Should not be used for dynamic compiling.
+	 */
+	const char *kbuild_opts;
+	/*
+	 * Default is false. If set to true, write compiling result
+	 * to object file.
+	 */
+	bool dump_obj;
+	/*
+	 * Default is false. If one of the above fields is set by user
+	 * explicitly then user_set_llvm is set to true. This is used
+	 * for perf test. If user doesn't set anything in .perfconfig
+	 * and clang is not found, don't trigger llvm test.
+	 */
+	bool user_set_param;
+};
+
+extern struct llvm_param llvm_param;
+int perf_llvm_config(const char *var, const char *value);
+
+int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz);
+
+/* This function is for test__llvm() use only */
+int llvm__search_clang(void);
+
+/* Following functions are reused by builtin clang support */
+void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts);
+int llvm__get_nr_cpus(void);
+
+void llvm__dump_obj(const char *path, void *obj_buf, size_t size);
+#endif
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
new file mode 100644
index 000000000..b8cb8830b
--- /dev/null
+++ b/tools/perf/util/lock-contention.h
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_LOCK_CONTENTION_H
+#define PERF_LOCK_CONTENTION_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct lock_stat {
+	struct hlist_node	hash_entry;
+	struct rb_node		rb;		/* used for sorting */
+
+	u64			addr;		/* address of lockdep_map, used as ID */
+	char			*name;		/* for strcpy(), we cannot use const */
+	u64			*callstack;
+
+	unsigned int		nr_acquire;
+	unsigned int		nr_acquired;
+	unsigned int		nr_contended;
+	unsigned int		nr_release;
+
+	union {
+		unsigned int	nr_readlock;
+		unsigned int	flags;
+	};
+	unsigned int		nr_trylock;
+
+	/* these times are in nano sec. */
+	u64                     avg_wait_time;
+	u64			wait_time_total;
+	u64			wait_time_min;
+	u64			wait_time_max;
+
+	int			broken; /* flag of blacklist */
+	int			combined;
+};
+
+/*
+ * States of lock_seq_stat
+ *
+ * UNINITIALIZED is required for detecting first event of acquire.
+ * As the nature of lock events, there is no guarantee
+ * that the first event for the locks are acquire,
+ * it can be acquired, contended or release.
+ */
+#define SEQ_STATE_UNINITIALIZED      0	       /* initial state */
+#define SEQ_STATE_RELEASED	1
+#define SEQ_STATE_ACQUIRING	2
+#define SEQ_STATE_ACQUIRED	3
+#define SEQ_STATE_READ_ACQUIRED	4
+#define SEQ_STATE_CONTENDED	5
+
+/*
+ * MAX_LOCK_DEPTH
+ * Imported from include/linux/sched.h.
+ * Should this be synchronized?
+ */
+#define MAX_LOCK_DEPTH 48
+
+/*
+ * struct lock_seq_stat:
+ * Place to put on state of one lock sequence
+ * 1) acquire -> acquired -> release
+ * 2) acquire -> contended -> acquired -> release
+ * 3) acquire (with read or try) -> release
+ * 4) Are there other patterns?
+ */
+struct lock_seq_stat {
+	struct list_head        list;
+	int			state;
+	u64			prev_event_time;
+	u64                     addr;
+
+	int                     read_count;
+};
+
+struct thread_stat {
+	struct rb_node		rb;
+
+	u32                     tid;
+	struct list_head        seq_list;
+};
+
+/*
+ * CONTENTION_STACK_DEPTH
+ * Number of stack trace entries to find callers
+ */
+#define CONTENTION_STACK_DEPTH  8
+
+/*
+ * CONTENTION_STACK_SKIP
+ * Number of stack trace entries to skip when finding callers.
+ * The first few entries belong to the locking implementation itself.
+ */
+#define CONTENTION_STACK_SKIP  3
+
+/*
+ * flags for lock:contention_begin
+ * Imported from include/trace/events/lock.h.
+ */
+#define LCB_F_SPIN	(1U << 0)
+#define LCB_F_READ	(1U << 1)
+#define LCB_F_WRITE	(1U << 2)
+#define LCB_F_RT	(1U << 3)
+#define LCB_F_PERCPU	(1U << 4)
+#define LCB_F_MUTEX	(1U << 5)
+
+struct evlist;
+struct machine;
+struct target;
+
+struct lock_contention {
+	struct evlist *evlist;
+	struct target *target;
+	struct machine *machine;
+	struct hlist_head *result;
+	unsigned long map_nr_entries;
+	int lost;
+	int max_stack;
+	int stack_skip;
+};
+
+#ifdef HAVE_BPF_SKEL
+
+int lock_contention_prepare(struct lock_contention *con);
+int lock_contention_start(void);
+int lock_contention_stop(void);
+int lock_contention_read(struct lock_contention *con);
+int lock_contention_finish(void);
+
+#else  /* !HAVE_BPF_SKEL */
+
+static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
+{
+	return 0;
+}
+
+static inline int lock_contention_start(void) { return 0; }
+static inline int lock_contention_stop(void) { return 0; }
+static inline int lock_contention_finish(void) { return 0; }
+
+static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
+{
+	return 0;
+}
+
+#endif  /* HAVE_BPF_SKEL */
+
+bool is_lock_function(struct machine *machine, u64 addr);
+
+#endif  /* PERF_LOCK_CONTENTION_H */
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
new file mode 100644
index 000000000..51424cdc3
--- /dev/null
+++ b/tools/perf/util/lzma.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <lzma.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "compress.h"
+#include "debug.h"
+#include <string.h>
+#include <unistd.h>
+#include <internal/lib.h>
+
+#define BUFSIZE 8192
+
+static const char *lzma_strerror(lzma_ret ret)
+{
+	switch ((int) ret) {
+	case LZMA_MEM_ERROR:
+		return "Memory allocation failed";
+	case LZMA_OPTIONS_ERROR:
+		return "Unsupported decompressor flags";
+	case LZMA_FORMAT_ERROR:
+		return "The input is not in the .xz format";
+	case LZMA_DATA_ERROR:
+		return "Compressed file is corrupt";
+	case LZMA_BUF_ERROR:
+		return "Compressed file is truncated or otherwise corrupt";
+	default:
+		return "Unknown error, possibly a bug";
+	}
+}
+
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+	lzma_action action = LZMA_RUN;
+	lzma_stream strm   = LZMA_STREAM_INIT;
+	lzma_ret ret;
+	int err = -1;
+
+	u8 buf_in[BUFSIZE];
+	u8 buf_out[BUFSIZE];
+	FILE *infile;
+
+	infile = fopen(input, "rb");
+	if (!infile) {
+		pr_err("lzma: fopen failed on %s: '%s'\n",
+		       input, strerror(errno));
+		return -1;
+	}
+
+	ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+	if (ret != LZMA_OK) {
+		pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
+			lzma_strerror(ret), ret);
+		goto err_fclose;
+	}
+
+	strm.next_in   = NULL;
+	strm.avail_in  = 0;
+	strm.next_out  = buf_out;
+	strm.avail_out = sizeof(buf_out);
+
+	while (1) {
+		if (strm.avail_in == 0 && !feof(infile)) {
+			strm.next_in  = buf_in;
+			strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile);
+
+			if (ferror(infile)) {
+				pr_err("lzma: read error: %s\n", strerror(errno));
+				goto err_lzma_end;
+			}
+
+			if (feof(infile))
+				action = LZMA_FINISH;
+		}
+
+		ret = lzma_code(&strm, action);
+
+		if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
+			ssize_t write_size = sizeof(buf_out) - strm.avail_out;
+
+			if (writen(output_fd, buf_out, write_size) != write_size) {
+				pr_err("lzma: write error: %s\n", strerror(errno));
+				goto err_lzma_end;
+			}
+
+			strm.next_out  = buf_out;
+			strm.avail_out = sizeof(buf_out);
+		}
+
+		if (ret != LZMA_OK) {
+			if (ret == LZMA_STREAM_END)
+				break;
+
+			pr_err("lzma: failed %s\n", lzma_strerror(ret));
+			goto err_lzma_end;
+		}
+	}
+
+	err = 0;
+err_lzma_end:
+	lzma_end(&strm);
+err_fclose:
+	fclose(infile);
+	return err;
+}
+
+bool lzma_is_compressed(const char *input)
+{
+	int fd = open(input, O_RDONLY);
+	const uint8_t magic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+	char buf[6] = { 0 };
+	ssize_t rc;
+
+	if (fd < 0)
+		return -1;
+
+	rc = read(fd, buf, sizeof(buf));
+	close(fd);
+	return rc == sizeof(buf) ?
+	       memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
new file mode 100644
index 000000000..9cd52f50e
--- /dev/null
+++ b/tools/perf/util/machine.c
@@ -0,0 +1,3340 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <stdlib.h>
+#include "callchain.h"
+#include "debug.h"
+#include "dso.h"
+#include "env.h"
+#include "event.h"
+#include "evsel.h"
+#include "hist.h"
+#include "machine.h"
+#include "map.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "mem-events.h"
+#include "path.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "sort.h"
+#include "strlist.h"
+#include "target.h"
+#include "thread.h"
+#include "util.h"
+#include "vdso.h"
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "unwind.h"
+#include "linux/hash.h"
+#include "asm/bug.h"
+#include "bpf-event.h"
+#include <internal/lib.h> // page_size
+#include "cgroup.h"
+#include "arm64-frame-pointer-unwind-support.h"
+
+#include <linux/ctype.h>
+#include <symbol/kallsyms.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
+
+static struct dso *machine__kernel_dso(struct machine *machine)
+{
+	return machine->vmlinux_map->dso;
+}
+
+static void dsos__init(struct dsos *dsos)
+{
+	INIT_LIST_HEAD(&dsos->head);
+	dsos->root = RB_ROOT;
+	init_rwsem(&dsos->lock);
+}
+
+static void machine__threads_init(struct machine *machine)
+{
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		threads->entries = RB_ROOT_CACHED;
+		init_rwsem(&threads->lock);
+		threads->nr = 0;
+		INIT_LIST_HEAD(&threads->dead);
+		threads->last_match = NULL;
+	}
+}
+
+static int machine__set_mmap_name(struct machine *machine)
+{
+	if (machine__is_host(machine))
+		machine->mmap_name = strdup("[kernel.kallsyms]");
+	else if (machine__is_default_guest(machine))
+		machine->mmap_name = strdup("[guest.kernel.kallsyms]");
+	else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
+			  machine->pid) < 0)
+		machine->mmap_name = NULL;
+
+	return machine->mmap_name ? 0 : -ENOMEM;
+}
+
+static void thread__set_guest_comm(struct thread *thread, pid_t pid)
+{
+	char comm[64];
+
+	snprintf(comm, sizeof(comm), "[guest/%d]", pid);
+	thread__set_comm(thread, comm, 0);
+}
+
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
+{
+	int err = -ENOMEM;
+
+	memset(machine, 0, sizeof(*machine));
+	machine->kmaps = maps__new(machine);
+	if (machine->kmaps == NULL)
+		return -ENOMEM;
+
+	RB_CLEAR_NODE(&machine->rb_node);
+	dsos__init(&machine->dsos);
+
+	machine__threads_init(machine);
+
+	machine->vdso_info = NULL;
+	machine->env = NULL;
+
+	machine->pid = pid;
+
+	machine->id_hdr_size = 0;
+	machine->kptr_restrict_warned = false;
+	machine->comm_exec = false;
+	machine->kernel_start = 0;
+	machine->vmlinux_map = NULL;
+
+	machine->root_dir = strdup(root_dir);
+	if (machine->root_dir == NULL)
+		goto out;
+
+	if (machine__set_mmap_name(machine))
+		goto out;
+
+	if (pid != HOST_KERNEL_ID) {
+		struct thread *thread = machine__findnew_thread(machine, -1,
+								pid);
+
+		if (thread == NULL)
+			goto out;
+
+		thread__set_guest_comm(thread, pid);
+		thread__put(thread);
+	}
+
+	machine->current_tid = NULL;
+	err = 0;
+
+out:
+	if (err) {
+		zfree(&machine->kmaps);
+		zfree(&machine->root_dir);
+		zfree(&machine->mmap_name);
+	}
+	return 0;
+}
+
+struct machine *machine__new_host(void)
+{
+	struct machine *machine = malloc(sizeof(*machine));
+
+	if (machine != NULL) {
+		machine__init(machine, "", HOST_KERNEL_ID);
+
+		if (machine__create_kernel_maps(machine) < 0)
+			goto out_delete;
+	}
+
+	return machine;
+out_delete:
+	free(machine);
+	return NULL;
+}
+
+struct machine *machine__new_kallsyms(void)
+{
+	struct machine *machine = machine__new_host();
+	/*
+	 * FIXME:
+	 * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
+	 *    ask for not using the kcore parsing code, once this one is fixed
+	 *    to create a map per module.
+	 */
+	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
+		machine__delete(machine);
+		machine = NULL;
+	}
+
+	return machine;
+}
+
+static void dsos__purge(struct dsos *dsos)
+{
+	struct dso *pos, *n;
+
+	down_write(&dsos->lock);
+
+	list_for_each_entry_safe(pos, n, &dsos->head, node) {
+		RB_CLEAR_NODE(&pos->rb_node);
+		pos->root = NULL;
+		list_del_init(&pos->node);
+		dso__put(pos);
+	}
+
+	up_write(&dsos->lock);
+}
+
+static void dsos__exit(struct dsos *dsos)
+{
+	dsos__purge(dsos);
+	exit_rwsem(&dsos->lock);
+}
+
+void machine__delete_threads(struct machine *machine)
+{
+	struct rb_node *nd;
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		down_write(&threads->lock);
+		nd = rb_first_cached(&threads->entries);
+		while (nd) {
+			struct thread *t = rb_entry(nd, struct thread, rb_node);
+
+			nd = rb_next(nd);
+			__machine__remove_thread(machine, t, false);
+		}
+		up_write(&threads->lock);
+	}
+}
+
+void machine__exit(struct machine *machine)
+{
+	int i;
+
+	if (machine == NULL)
+		return;
+
+	machine__destroy_kernel_maps(machine);
+	maps__delete(machine->kmaps);
+	dsos__exit(&machine->dsos);
+	machine__exit_vdso(machine);
+	zfree(&machine->root_dir);
+	zfree(&machine->mmap_name);
+	zfree(&machine->current_tid);
+	zfree(&machine->kallsyms_filename);
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		struct thread *thread, *n;
+		/*
+		 * Forget about the dead, at this point whatever threads were
+		 * left in the dead lists better have a reference count taken
+		 * by who is using them, and then, when they drop those references
+		 * and it finally hits zero, thread__put() will check and see that
+		 * its not in the dead threads list and will not try to remove it
+		 * from there, just calling thread__delete() straight away.
+		 */
+		list_for_each_entry_safe(thread, n, &threads->dead, node)
+			list_del_init(&thread->node);
+
+		exit_rwsem(&threads->lock);
+	}
+}
+
+void machine__delete(struct machine *machine)
+{
+	if (machine) {
+		machine__exit(machine);
+		free(machine);
+	}
+}
+
+void machines__init(struct machines *machines)
+{
+	machine__init(&machines->host, "", HOST_KERNEL_ID);
+	machines->guests = RB_ROOT_CACHED;
+}
+
+void machines__exit(struct machines *machines)
+{
+	machine__exit(&machines->host);
+	/* XXX exit guest */
+}
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+			      const char *root_dir)
+{
+	struct rb_node **p = &machines->guests.rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct machine *pos, *machine = malloc(sizeof(*machine));
+	bool leftmost = true;
+
+	if (machine == NULL)
+		return NULL;
+
+	if (machine__init(machine, root_dir, pid) != 0) {
+		free(machine);
+		return NULL;
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		pos = rb_entry(parent, struct machine, rb_node);
+		if (pid < pos->pid)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+
+	rb_link_node(&machine->rb_node, parent, p);
+	rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost);
+
+	machine->machines = machines;
+
+	return machine;
+}
+
+void machines__set_comm_exec(struct machines *machines, bool comm_exec)
+{
+	struct rb_node *nd;
+
+	machines->host.comm_exec = comm_exec;
+
+	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+		machine->comm_exec = comm_exec;
+	}
+}
+
+struct machine *machines__find(struct machines *machines, pid_t pid)
+{
+	struct rb_node **p = &machines->guests.rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct machine *machine;
+	struct machine *default_machine = NULL;
+
+	if (pid == HOST_KERNEL_ID)
+		return &machines->host;
+
+	while (*p != NULL) {
+		parent = *p;
+		machine = rb_entry(parent, struct machine, rb_node);
+		if (pid < machine->pid)
+			p = &(*p)->rb_left;
+		else if (pid > machine->pid)
+			p = &(*p)->rb_right;
+		else
+			return machine;
+		if (!machine->pid)
+			default_machine = machine;
+	}
+
+	return default_machine;
+}
+
+struct machine *machines__findnew(struct machines *machines, pid_t pid)
+{
+	char path[PATH_MAX];
+	const char *root_dir = "";
+	struct machine *machine = machines__find(machines, pid);
+
+	if (machine && (machine->pid == pid))
+		goto out;
+
+	if ((pid != HOST_KERNEL_ID) &&
+	    (pid != DEFAULT_GUEST_KERNEL_ID) &&
+	    (symbol_conf.guestmount)) {
+		sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
+		if (access(path, R_OK)) {
+			static struct strlist *seen;
+
+			if (!seen)
+				seen = strlist__new(NULL, NULL);
+
+			if (!strlist__has_entry(seen, path)) {
+				pr_err("Can't access file %s\n", path);
+				strlist__add(seen, path);
+			}
+			machine = NULL;
+			goto out;
+		}
+		root_dir = path;
+	}
+
+	machine = machines__add(machines, pid, root_dir);
+out:
+	return machine;
+}
+
+struct machine *machines__find_guest(struct machines *machines, pid_t pid)
+{
+	struct machine *machine = machines__find(machines, pid);
+
+	if (!machine)
+		machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
+	return machine;
+}
+
+/*
+ * A common case for KVM test programs is that the test program acts as the
+ * hypervisor, creating, running and destroying the virtual machine, and
+ * providing the guest object code from its own object code. In this case,
+ * the VM is not running an OS, but only the functions loaded into it by the
+ * hypervisor test program, and conveniently, loaded at the same virtual
+ * addresses.
+ *
+ * Normally to resolve addresses, MMAP events are needed to map addresses
+ * back to the object code and debug symbols for that object code.
+ *
+ * Currently, there is no way to get such mapping information from guests
+ * but, in the scenario described above, the guest has the same mappings
+ * as the hypervisor, so support for that scenario can be achieved.
+ *
+ * To support that, copy the host thread's maps to the guest thread's maps.
+ * Note, we do not discover the guest until we encounter a guest event,
+ * which works well because it is not until then that we know that the host
+ * thread's maps have been set up.
+ *
+ * This function returns the guest thread. Apart from keeping the data
+ * structures sane, using a thread belonging to the guest machine, instead
+ * of the host thread, allows it to have its own comm (refer
+ * thread__set_guest_comm()).
+ */
+static struct thread *findnew_guest_code(struct machine *machine,
+					 struct machine *host_machine,
+					 pid_t pid)
+{
+	struct thread *host_thread;
+	struct thread *thread;
+	int err;
+
+	if (!machine)
+		return NULL;
+
+	thread = machine__findnew_thread(machine, -1, pid);
+	if (!thread)
+		return NULL;
+
+	/* Assume maps are set up if there are any */
+	if (thread->maps->nr_maps)
+		return thread;
+
+	host_thread = machine__find_thread(host_machine, -1, pid);
+	if (!host_thread)
+		goto out_err;
+
+	thread__set_guest_comm(thread, pid);
+
+	/*
+	 * Guest code can be found in hypervisor process at the same address
+	 * so copy host maps.
+	 */
+	err = maps__clone(thread, host_thread->maps);
+	thread__put(host_thread);
+	if (err)
+		goto out_err;
+
+	return thread;
+
+out_err:
+	thread__zput(thread);
+	return NULL;
+}
+
+struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid)
+{
+	struct machine *host_machine = machines__find(machines, HOST_KERNEL_ID);
+	struct machine *machine = machines__findnew(machines, pid);
+
+	return findnew_guest_code(machine, host_machine, pid);
+}
+
+struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid)
+{
+	struct machines *machines = machine->machines;
+	struct machine *host_machine;
+
+	if (!machines)
+		return NULL;
+
+	host_machine = machines__find(machines, HOST_KERNEL_ID);
+
+	return findnew_guest_code(machine, host_machine, pid);
+}
+
+void machines__process_guests(struct machines *machines,
+			      machine__process_t process, void *data)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		process(pos, data);
+	}
+}
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
+{
+	struct rb_node *node;
+	struct machine *machine;
+
+	machines->host.id_hdr_size = id_hdr_size;
+
+	for (node = rb_first_cached(&machines->guests); node;
+	     node = rb_next(node)) {
+		machine = rb_entry(node, struct machine, rb_node);
+		machine->id_hdr_size = id_hdr_size;
+	}
+
+	return;
+}
+
+static void machine__update_thread_pid(struct machine *machine,
+				       struct thread *th, pid_t pid)
+{
+	struct thread *leader;
+
+	if (pid == th->pid_ || pid == -1 || th->pid_ != -1)
+		return;
+
+	th->pid_ = pid;
+
+	if (th->pid_ == th->tid)
+		return;
+
+	leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
+	if (!leader)
+		goto out_err;
+
+	if (!leader->maps)
+		leader->maps = maps__new(machine);
+
+	if (!leader->maps)
+		goto out_err;
+
+	if (th->maps == leader->maps)
+		return;
+
+	if (th->maps) {
+		/*
+		 * Maps are created from MMAP events which provide the pid and
+		 * tid.  Consequently there never should be any maps on a thread
+		 * with an unknown pid.  Just print an error if there are.
+		 */
+		if (!maps__empty(th->maps))
+			pr_err("Discarding thread maps for %d:%d\n",
+			       th->pid_, th->tid);
+		maps__put(th->maps);
+	}
+
+	th->maps = maps__get(leader->maps);
+out_put:
+	thread__put(leader);
+	return;
+out_err:
+	pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
+	goto out_put;
+}
+
+/*
+ * Front-end cache - TID lookups come in blocks,
+ * so most of the time we dont have to look up
+ * the full rbtree:
+ */
+static struct thread*
+__threads__get_last_match(struct threads *threads, struct machine *machine,
+			  int pid, int tid)
+{
+	struct thread *th;
+
+	th = threads->last_match;
+	if (th != NULL) {
+		if (th->tid == tid) {
+			machine__update_thread_pid(machine, th, pid);
+			return thread__get(th);
+		}
+
+		threads->last_match = NULL;
+	}
+
+	return NULL;
+}
+
+static struct thread*
+threads__get_last_match(struct threads *threads, struct machine *machine,
+			int pid, int tid)
+{
+	struct thread *th = NULL;
+
+	if (perf_singlethreaded)
+		th = __threads__get_last_match(threads, machine, pid, tid);
+
+	return th;
+}
+
+static void
+__threads__set_last_match(struct threads *threads, struct thread *th)
+{
+	threads->last_match = th;
+}
+
+static void
+threads__set_last_match(struct threads *threads, struct thread *th)
+{
+	if (perf_singlethreaded)
+		__threads__set_last_match(threads, th);
+}
+
+/*
+ * Caller must eventually drop thread->refcnt returned with a successful
+ * lookup/new thread inserted.
+ */
+static struct thread *____machine__findnew_thread(struct machine *machine,
+						  struct threads *threads,
+						  pid_t pid, pid_t tid,
+						  bool create)
+{
+	struct rb_node **p = &threads->entries.rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+	bool leftmost = true;
+
+	th = threads__get_last_match(threads, machine, pid, tid);
+	if (th)
+		return th;
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->tid == tid) {
+			threads__set_last_match(threads, th);
+			machine__update_thread_pid(machine, th, pid);
+			return thread__get(th);
+		}
+
+		if (tid < th->tid)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+
+	if (!create)
+		return NULL;
+
+	th = thread__new(pid, tid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost);
+
+		/*
+		 * We have to initialize maps separately after rb tree is updated.
+		 *
+		 * The reason is that we call machine__findnew_thread
+		 * within thread__init_maps to find the thread
+		 * leader and that would screwed the rb tree.
+		 */
+		if (thread__init_maps(th, machine)) {
+			rb_erase_cached(&th->rb_node, &threads->entries);
+			RB_CLEAR_NODE(&th->rb_node);
+			thread__put(th);
+			return NULL;
+		}
+		/*
+		 * It is now in the rbtree, get a ref
+		 */
+		thread__get(th);
+		threads__set_last_match(threads, th);
+		++threads->nr;
+	}
+
+	return th;
+}
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
+{
+	return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
+}
+
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
+				       pid_t tid)
+{
+	struct threads *threads = machine__threads(machine, tid);
+	struct thread *th;
+
+	down_write(&threads->lock);
+	th = __machine__findnew_thread(machine, pid, tid);
+	up_write(&threads->lock);
+	return th;
+}
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+				    pid_t tid)
+{
+	struct threads *threads = machine__threads(machine, tid);
+	struct thread *th;
+
+	down_read(&threads->lock);
+	th =  ____machine__findnew_thread(machine, threads, pid, tid, false);
+	up_read(&threads->lock);
+	return th;
+}
+
+/*
+ * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
+ * So here a single thread is created for that, but actually there is a separate
+ * idle task per cpu, so there should be one 'struct thread' per cpu, but there
+ * is only 1. That causes problems for some tools, requiring workarounds. For
+ * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
+ */
+struct thread *machine__idle_thread(struct machine *machine)
+{
+	struct thread *thread = machine__findnew_thread(machine, 0, 0);
+
+	if (!thread || thread__set_comm(thread, "swapper", 0) ||
+	    thread__set_namespaces(thread, 0, NULL))
+		pr_err("problem inserting idle task for machine pid %d\n", machine->pid);
+
+	return thread;
+}
+
+struct comm *machine__thread_exec_comm(struct machine *machine,
+				       struct thread *thread)
+{
+	if (machine->comm_exec)
+		return thread__exec_comm(thread);
+	else
+		return thread__comm(thread);
+}
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(machine,
+							event->comm.pid,
+							event->comm.tid);
+	bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+	int err = 0;
+
+	if (exec)
+		machine->comm_exec = true;
+
+	if (dump_trace)
+		perf_event__fprintf_comm(event, stdout);
+
+	if (thread == NULL ||
+	    __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+		err = -1;
+	}
+
+	thread__put(thread);
+
+	return err;
+}
+
+int machine__process_namespaces_event(struct machine *machine __maybe_unused,
+				      union perf_event *event,
+				      struct perf_sample *sample __maybe_unused)
+{
+	struct thread *thread = machine__findnew_thread(machine,
+							event->namespaces.pid,
+							event->namespaces.tid);
+	int err = 0;
+
+	WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
+		  "\nWARNING: kernel seems to support more namespaces than perf"
+		  " tool.\nTry updating the perf tool..\n\n");
+
+	WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
+		  "\nWARNING: perf tool seems to support more namespaces than"
+		  " the kernel.\nTry updating the kernel..\n\n");
+
+	if (dump_trace)
+		perf_event__fprintf_namespaces(event, stdout);
+
+	if (thread == NULL ||
+	    thread__set_namespaces(thread, sample->time, &event->namespaces)) {
+		dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
+		err = -1;
+	}
+
+	thread__put(thread);
+
+	return err;
+}
+
+int machine__process_cgroup_event(struct machine *machine,
+				  union perf_event *event,
+				  struct perf_sample *sample __maybe_unused)
+{
+	struct cgroup *cgrp;
+
+	if (dump_trace)
+		perf_event__fprintf_cgroup(event, stdout);
+
+	cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path);
+	if (cgrp == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int machine__process_lost_event(struct machine *machine __maybe_unused,
+				union perf_event *event, struct perf_sample *sample __maybe_unused)
+{
+	dump_printf(": id:%" PRI_lu64 ": lost:%" PRI_lu64 "\n",
+		    event->lost.id, event->lost.lost);
+	return 0;
+}
+
+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+					union perf_event *event, struct perf_sample *sample)
+{
+	dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "\n",
+		    sample->id, event->lost_samples.lost);
+	return 0;
+}
+
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+					       struct kmod_path *m,
+					       const char *filename)
+{
+	struct dso *dso;
+
+	down_write(&machine->dsos.lock);
+
+	dso = __dsos__find(&machine->dsos, m->name, true);
+	if (!dso) {
+		dso = __dsos__addnew(&machine->dsos, m->name);
+		if (dso == NULL)
+			goto out_unlock;
+
+		dso__set_module_info(dso, m, machine);
+		dso__set_long_name(dso, strdup(filename), true);
+		dso->kernel = DSO_SPACE__KERNEL;
+	}
+
+	dso__get(dso);
+out_unlock:
+	up_write(&machine->dsos.lock);
+	return dso;
+}
+
+int machine__process_aux_event(struct machine *machine __maybe_unused,
+			       union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_aux(event, stdout);
+	return 0;
+}
+
+int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
+					union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_itrace_start(event, stdout);
+	return 0;
+}
+
+int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unused,
+					    union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_aux_output_hw_id(event, stdout);
+	return 0;
+}
+
+int machine__process_switch_event(struct machine *machine __maybe_unused,
+				  union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_switch(event, stdout);
+	return 0;
+}
+
+static int machine__process_ksymbol_register(struct machine *machine,
+					     union perf_event *event,
+					     struct perf_sample *sample __maybe_unused)
+{
+	struct symbol *sym;
+	struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
+
+	if (!map) {
+		struct dso *dso = dso__new(event->ksymbol.name);
+
+		if (dso) {
+			dso->kernel = DSO_SPACE__KERNEL;
+			map = map__new2(0, dso);
+			dso__put(dso);
+		}
+
+		if (!dso || !map) {
+			return -ENOMEM;
+		}
+
+		if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
+			map->dso->binary_type = DSO_BINARY_TYPE__OOL;
+			map->dso->data.file_size = event->ksymbol.len;
+			dso__set_loaded(map->dso);
+		}
+
+		map->start = event->ksymbol.addr;
+		map->end = map->start + event->ksymbol.len;
+		maps__insert(machine__kernel_maps(machine), map);
+		map__put(map);
+		dso__set_loaded(dso);
+
+		if (is_bpf_image(event->ksymbol.name)) {
+			dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE;
+			dso__set_long_name(dso, "", false);
+		}
+	}
+
+	sym = symbol__new(map->map_ip(map, map->start),
+			  event->ksymbol.len,
+			  0, 0, event->ksymbol.name);
+	if (!sym)
+		return -ENOMEM;
+	dso__insert_symbol(map->dso, sym);
+	return 0;
+}
+
+static int machine__process_ksymbol_unregister(struct machine *machine,
+					       union perf_event *event,
+					       struct perf_sample *sample __maybe_unused)
+{
+	struct symbol *sym;
+	struct map *map;
+
+	map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
+	if (!map)
+		return 0;
+
+	if (map != machine->vmlinux_map)
+		maps__remove(machine__kernel_maps(machine), map);
+	else {
+		sym = dso__find_symbol(map->dso, map->map_ip(map, map->start));
+		if (sym)
+			dso__delete_symbol(map->dso, sym);
+	}
+
+	return 0;
+}
+
+int machine__process_ksymbol(struct machine *machine __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample)
+{
+	if (dump_trace)
+		perf_event__fprintf_ksymbol(event, stdout);
+
+	if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
+		return machine__process_ksymbol_unregister(machine, event,
+							   sample);
+	return machine__process_ksymbol_register(machine, event, sample);
+}
+
+int machine__process_text_poke(struct machine *machine, union perf_event *event,
+			       struct perf_sample *sample __maybe_unused)
+{
+	struct map *map = maps__find(machine__kernel_maps(machine), event->text_poke.addr);
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	if (dump_trace)
+		perf_event__fprintf_text_poke(event, machine, stdout);
+
+	if (!event->text_poke.new_len)
+		return 0;
+
+	if (cpumode != PERF_RECORD_MISC_KERNEL) {
+		pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
+		return 0;
+	}
+
+	if (map && map->dso) {
+		u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len;
+		int ret;
+
+		/*
+		 * Kernel maps might be changed when loading symbols so loading
+		 * must be done prior to using kernel maps.
+		 */
+		map__load(map);
+		ret = dso__data_write_cache_addr(map->dso, map, machine,
+						 event->text_poke.addr,
+						 new_bytes,
+						 event->text_poke.new_len);
+		if (ret != event->text_poke.new_len)
+			pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n",
+				 event->text_poke.addr);
+	} else {
+		pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
+			 event->text_poke.addr);
+	}
+
+	return 0;
+}
+
+static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
+					      const char *filename)
+{
+	struct map *map = NULL;
+	struct kmod_path m;
+	struct dso *dso;
+
+	if (kmod_path__parse_name(&m, filename))
+		return NULL;
+
+	dso = machine__findnew_module_dso(machine, &m, filename);
+	if (dso == NULL)
+		goto out;
+
+	map = map__new2(start, dso);
+	if (map == NULL)
+		goto out;
+
+	maps__insert(machine__kernel_maps(machine), map);
+
+	/* Put the map here because maps__insert already got it */
+	map__put(map);
+out:
+	/* put the dso here, corresponding to  machine__findnew_module_dso */
+	dso__put(dso);
+	zfree(&m.name);
+	return map;
+}
+
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
+
+	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret += __dsos__fprintf(&pos->dsos.head, fp);
+	}
+
+	return ret;
+}
+
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm)
+{
+	return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
+}
+
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm)
+{
+	struct rb_node *nd;
+	size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
+
+	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
+	}
+	return ret;
+}
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
+{
+	int i;
+	size_t printed = 0;
+	struct dso *kdso = machine__kernel_dso(machine);
+
+	if (kdso->has_build_id) {
+		char filename[PATH_MAX];
+		if (dso__build_id_filename(kdso, filename, sizeof(filename),
+					   false))
+			printed += fprintf(fp, "[0] %s\n", filename);
+	}
+
+	for (i = 0; i < vmlinux_path__nr_entries; ++i)
+		printed += fprintf(fp, "[%d] %s\n",
+				   i + kdso->has_build_id, vmlinux_path[i]);
+
+	return printed;
+}
+
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret;
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+
+		down_read(&threads->lock);
+
+		ret = fprintf(fp, "Threads: %u\n", threads->nr);
+
+		for (nd = rb_first_cached(&threads->entries); nd;
+		     nd = rb_next(nd)) {
+			struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+			ret += thread__fprintf(pos, fp);
+		}
+
+		up_read(&threads->lock);
+	}
+	return ret;
+}
+
+static struct dso *machine__get_kernel(struct machine *machine)
+{
+	const char *vmlinux_name = machine->mmap_name;
+	struct dso *kernel;
+
+	if (machine__is_host(machine)) {
+		if (symbol_conf.vmlinux_name)
+			vmlinux_name = symbol_conf.vmlinux_name;
+
+		kernel = machine__findnew_kernel(machine, vmlinux_name,
+						 "[kernel]", DSO_SPACE__KERNEL);
+	} else {
+		if (symbol_conf.default_guest_vmlinux_name)
+			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
+
+		kernel = machine__findnew_kernel(machine, vmlinux_name,
+						 "[guest.kernel]",
+						 DSO_SPACE__KERNEL_GUEST);
+	}
+
+	if (kernel != NULL && (!kernel->has_build_id))
+		dso__read_running_kernel_build_id(kernel, machine);
+
+	return kernel;
+}
+
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+				    size_t bufsz)
+{
+	if (machine__is_default_guest(machine))
+		scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
+	else
+		scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir);
+}
+
+const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
+
+/* Figure out the start address of kernel map from /proc/kallsyms.
+ * Returns the name of the start symbol in *symbol_name. Pass in NULL as
+ * symbol_name if it's not that important.
+ */
+static int machine__get_running_kernel_start(struct machine *machine,
+					     const char **symbol_name,
+					     u64 *start, u64 *end)
+{
+	char filename[PATH_MAX];
+	int i, err = -1;
+	const char *name;
+	u64 addr = 0;
+
+	machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return 0;
+
+	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+		err = kallsyms__get_function_start(filename, name, &addr);
+		if (!err)
+			break;
+	}
+
+	if (err)
+		return -1;
+
+	if (symbol_name)
+		*symbol_name = name;
+
+	*start = addr;
+
+	err = kallsyms__get_function_start(filename, "_etext", &addr);
+	if (!err)
+		*end = addr;
+
+	return 0;
+}
+
+int machine__create_extra_kernel_map(struct machine *machine,
+				     struct dso *kernel,
+				     struct extra_kernel_map *xm)
+{
+	struct kmap *kmap;
+	struct map *map;
+
+	map = map__new2(xm->start, kernel);
+	if (!map)
+		return -1;
+
+	map->end   = xm->end;
+	map->pgoff = xm->pgoff;
+
+	kmap = map__kmap(map);
+
+	strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
+
+	maps__insert(machine__kernel_maps(machine), map);
+
+	pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
+		  kmap->name, map->start, map->end);
+
+	map__put(map);
+
+	return 0;
+}
+
+static u64 find_entry_trampoline(struct dso *dso)
+{
+	/* Duplicates are removed so lookup all aliases */
+	const char *syms[] = {
+		"_entry_trampoline",
+		"__entry_trampoline_start",
+		"entry_SYSCALL_64_trampoline",
+	};
+	struct symbol *sym = dso__first_symbol(dso);
+	unsigned int i;
+
+	for (; sym; sym = dso__next_symbol(sym)) {
+		if (sym->binding != STB_GLOBAL)
+			continue;
+		for (i = 0; i < ARRAY_SIZE(syms); i++) {
+			if (!strcmp(sym->name, syms[i]))
+				return sym->start;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * These values can be used for kernels that do not have symbols for the entry
+ * trampolines in kallsyms.
+ */
+#define X86_64_CPU_ENTRY_AREA_PER_CPU	0xfffffe0000000000ULL
+#define X86_64_CPU_ENTRY_AREA_SIZE	0x2c000
+#define X86_64_ENTRY_TRAMPOLINE		0x6000
+
+/* Map x86_64 PTI entry trampolines */
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+					  struct dso *kernel)
+{
+	struct maps *kmaps = machine__kernel_maps(machine);
+	int nr_cpus_avail, cpu;
+	bool found = false;
+	struct map *map;
+	u64 pgoff;
+
+	/*
+	 * In the vmlinux case, pgoff is a virtual address which must now be
+	 * mapped to a vmlinux offset.
+	 */
+	maps__for_each_entry(kmaps, map) {
+		struct kmap *kmap = __map__kmap(map);
+		struct map *dest_map;
+
+		if (!kmap || !is_entry_trampoline(kmap->name))
+			continue;
+
+		dest_map = maps__find(kmaps, map->pgoff);
+		if (dest_map != map)
+			map->pgoff = dest_map->map_ip(dest_map, map->pgoff);
+		found = true;
+	}
+	if (found || machine->trampolines_mapped)
+		return 0;
+
+	pgoff = find_entry_trampoline(kernel);
+	if (!pgoff)
+		return 0;
+
+	nr_cpus_avail = machine__nr_cpus_avail(machine);
+
+	/* Add a 1 page map for each CPU's entry trampoline */
+	for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
+		u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
+			 cpu * X86_64_CPU_ENTRY_AREA_SIZE +
+			 X86_64_ENTRY_TRAMPOLINE;
+		struct extra_kernel_map xm = {
+			.start = va,
+			.end   = va + page_size,
+			.pgoff = pgoff,
+		};
+
+		strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);
+
+		if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
+			return -1;
+	}
+
+	machine->trampolines_mapped = nr_cpus_avail;
+
+	return 0;
+}
+
+int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
+					     struct dso *kernel __maybe_unused)
+{
+	return 0;
+}
+
+static int
+__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+{
+	/* In case of renewal the kernel map, destroy previous one */
+	machine__destroy_kernel_maps(machine);
+
+	machine->vmlinux_map = map__new2(0, kernel);
+	if (machine->vmlinux_map == NULL)
+		return -1;
+
+	machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
+	maps__insert(machine__kernel_maps(machine), machine->vmlinux_map);
+	return 0;
+}
+
+void machine__destroy_kernel_maps(struct machine *machine)
+{
+	struct kmap *kmap;
+	struct map *map = machine__kernel_map(machine);
+
+	if (map == NULL)
+		return;
+
+	kmap = map__kmap(map);
+	maps__remove(machine__kernel_maps(machine), map);
+	if (kmap && kmap->ref_reloc_sym) {
+		zfree((char **)&kmap->ref_reloc_sym->name);
+		zfree(&kmap->ref_reloc_sym);
+	}
+
+	map__zput(machine->vmlinux_map);
+}
+
+int machines__create_guest_kernel_maps(struct machines *machines)
+{
+	int ret = 0;
+	struct dirent **namelist = NULL;
+	int i, items = 0;
+	char path[PATH_MAX];
+	pid_t pid;
+	char *endp;
+
+	if (symbol_conf.default_guest_vmlinux_name ||
+	    symbol_conf.default_guest_modules ||
+	    symbol_conf.default_guest_kallsyms) {
+		machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
+	}
+
+	if (symbol_conf.guestmount) {
+		items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
+		if (items <= 0)
+			return -ENOENT;
+		for (i = 0; i < items; i++) {
+			if (!isdigit(namelist[i]->d_name[0])) {
+				/* Filter out . and .. */
+				continue;
+			}
+			pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
+			if ((*endp != '\0') ||
+			    (endp == namelist[i]->d_name) ||
+			    (errno == ERANGE)) {
+				pr_debug("invalid directory (%s). Skipping.\n",
+					 namelist[i]->d_name);
+				continue;
+			}
+			sprintf(path, "%s/%s/proc/kallsyms",
+				symbol_conf.guestmount,
+				namelist[i]->d_name);
+			ret = access(path, R_OK);
+			if (ret) {
+				pr_debug("Can't access file %s\n", path);
+				goto failure;
+			}
+			machines__create_kernel_maps(machines, pid);
+		}
+failure:
+		free(namelist);
+	}
+
+	return ret;
+}
+
+void machines__destroy_kernel_maps(struct machines *machines)
+{
+	struct rb_node *next = rb_first_cached(&machines->guests);
+
+	machine__destroy_kernel_maps(&machines->host);
+
+	while (next) {
+		struct machine *pos = rb_entry(next, struct machine, rb_node);
+
+		next = rb_next(&pos->rb_node);
+		rb_erase_cached(&pos->rb_node, &machines->guests);
+		machine__delete(pos);
+	}
+}
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid)
+{
+	struct machine *machine = machines__findnew(machines, pid);
+
+	if (machine == NULL)
+		return -1;
+
+	return machine__create_kernel_maps(machine);
+}
+
+int machine__load_kallsyms(struct machine *machine, const char *filename)
+{
+	struct map *map = machine__kernel_map(machine);
+	int ret = __dso__load_kallsyms(map->dso, filename, map, true);
+
+	if (ret > 0) {
+		dso__set_loaded(map->dso);
+		/*
+		 * Since /proc/kallsyms will have multiple sessions for the
+		 * kernel, with modules between them, fixup the end of all
+		 * sections.
+		 */
+		maps__fixup_end(machine__kernel_maps(machine));
+	}
+
+	return ret;
+}
+
+int machine__load_vmlinux_path(struct machine *machine)
+{
+	struct map *map = machine__kernel_map(machine);
+	int ret = dso__load_vmlinux_path(map->dso, map);
+
+	if (ret > 0)
+		dso__set_loaded(map->dso);
+
+	return ret;
+}
+
+static char *get_kernel_version(const char *root_dir)
+{
+	char version[PATH_MAX];
+	FILE *file;
+	char *name, *tmp;
+	const char *prefix = "Linux version ";
+
+	sprintf(version, "%s/proc/version", root_dir);
+	file = fopen(version, "r");
+	if (!file)
+		return NULL;
+
+	tmp = fgets(version, sizeof(version), file);
+	fclose(file);
+	if (!tmp)
+		return NULL;
+
+	name = strstr(version, prefix);
+	if (!name)
+		return NULL;
+	name += strlen(prefix);
+	tmp = strchr(name, ' ');
+	if (tmp)
+		*tmp = '\0';
+
+	return strdup(name);
+}
+
+static bool is_kmod_dso(struct dso *dso)
+{
+	return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+	       dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
+}
+
+static int maps__set_module_path(struct maps *maps, const char *path, struct kmod_path *m)
+{
+	char *long_name;
+	struct map *map = maps__find_by_name(maps, m->name);
+
+	if (map == NULL)
+		return 0;
+
+	long_name = strdup(path);
+	if (long_name == NULL)
+		return -ENOMEM;
+
+	dso__set_long_name(map->dso, long_name, true);
+	dso__kernel_module_get_build_id(map->dso, "");
+
+	/*
+	 * Full name could reveal us kmod compression, so
+	 * we need to update the symtab_type if needed.
+	 */
+	if (m->comp && is_kmod_dso(map->dso)) {
+		map->dso->symtab_type++;
+		map->dso->comp = m->comp;
+	}
+
+	return 0;
+}
+
+static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth)
+{
+	struct dirent *dent;
+	DIR *dir = opendir(dir_name);
+	int ret = 0;
+
+	if (!dir) {
+		pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+		return -1;
+	}
+
+	while ((dent = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+		struct stat st;
+
+		/*sshfs might return bad dent->d_type, so we have to stat*/
+		path__join(path, sizeof(path), dir_name, dent->d_name);
+		if (stat(path, &st))
+			continue;
+
+		if (S_ISDIR(st.st_mode)) {
+			if (!strcmp(dent->d_name, ".") ||
+			    !strcmp(dent->d_name, ".."))
+				continue;
+
+			/* Do not follow top-level source and build symlinks */
+			if (depth == 0) {
+				if (!strcmp(dent->d_name, "source") ||
+				    !strcmp(dent->d_name, "build"))
+					continue;
+			}
+
+			ret = maps__set_modules_path_dir(maps, path, depth + 1);
+			if (ret < 0)
+				goto out;
+		} else {
+			struct kmod_path m;
+
+			ret = kmod_path__parse_name(&m, dent->d_name);
+			if (ret)
+				goto out;
+
+			if (m.kmod)
+				ret = maps__set_module_path(maps, path, &m);
+
+			zfree(&m.name);
+
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	closedir(dir);
+	return ret;
+}
+
+static int machine__set_modules_path(struct machine *machine)
+{
+	char *version;
+	char modules_path[PATH_MAX];
+
+	version = get_kernel_version(machine->root_dir);
+	if (!version)
+		return -1;
+
+	snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s",
+		 machine->root_dir, version);
+	free(version);
+
+	return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0);
+}
+int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
+				u64 *size __maybe_unused,
+				const char *name __maybe_unused)
+{
+	return 0;
+}
+
+static int machine__create_module(void *arg, const char *name, u64 start,
+				  u64 size)
+{
+	struct machine *machine = arg;
+	struct map *map;
+
+	if (arch__fix_module_text_start(&start, &size, name) < 0)
+		return -1;
+
+	map = machine__addnew_module_map(machine, start, name);
+	if (map == NULL)
+		return -1;
+	map->end = start + size;
+
+	dso__kernel_module_get_build_id(map->dso, machine->root_dir);
+
+	return 0;
+}
+
+static int machine__create_modules(struct machine *machine)
+{
+	const char *modules;
+	char path[PATH_MAX];
+
+	if (machine__is_default_guest(machine)) {
+		modules = symbol_conf.default_guest_modules;
+	} else {
+		snprintf(path, PATH_MAX, "%s/proc/modules", machine->root_dir);
+		modules = path;
+	}
+
+	if (symbol__restricted_filename(modules, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(modules, machine, machine__create_module))
+		return -1;
+
+	if (!machine__set_modules_path(machine))
+		return 0;
+
+	pr_debug("Problems setting modules path maps, continuing anyway...\n");
+
+	return 0;
+}
+
+static void machine__set_kernel_mmap(struct machine *machine,
+				     u64 start, u64 end)
+{
+	machine->vmlinux_map->start = start;
+	machine->vmlinux_map->end   = end;
+	/*
+	 * Be a bit paranoid here, some perf.data file came with
+	 * a zero sized synthesized MMAP event for the kernel.
+	 */
+	if (start == 0 && end == 0)
+		machine->vmlinux_map->end = ~0ULL;
+}
+
+static void machine__update_kernel_mmap(struct machine *machine,
+				     u64 start, u64 end)
+{
+	struct map *map = machine__kernel_map(machine);
+
+	map__get(map);
+	maps__remove(machine__kernel_maps(machine), map);
+
+	machine__set_kernel_mmap(machine, start, end);
+
+	maps__insert(machine__kernel_maps(machine), map);
+	map__put(map);
+}
+
+int machine__create_kernel_maps(struct machine *machine)
+{
+	struct dso *kernel = machine__get_kernel(machine);
+	const char *name = NULL;
+	struct map *map;
+	u64 start = 0, end = ~0ULL;
+	int ret;
+
+	if (kernel == NULL)
+		return -1;
+
+	ret = __machine__create_kernel_maps(machine, kernel);
+	if (ret < 0)
+		goto out_put;
+
+	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
+		if (machine__is_host(machine))
+			pr_debug("Problems creating module maps, "
+				 "continuing anyway...\n");
+		else
+			pr_debug("Problems creating module maps for guest %d, "
+				 "continuing anyway...\n", machine->pid);
+	}
+
+	if (!machine__get_running_kernel_start(machine, &name, &start, &end)) {
+		if (name &&
+		    map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, start)) {
+			machine__destroy_kernel_maps(machine);
+			ret = -1;
+			goto out_put;
+		}
+
+		/*
+		 * we have a real start address now, so re-order the kmaps
+		 * assume it's the last in the kmaps
+		 */
+		machine__update_kernel_mmap(machine, start, end);
+	}
+
+	if (machine__create_extra_kernel_maps(machine, kernel))
+		pr_debug("Problems creating extra kernel maps, continuing anyway...\n");
+
+	if (end == ~0ULL) {
+		/* update end address of the kernel map using adjacent module address */
+		map = map__next(machine__kernel_map(machine));
+		if (map)
+			machine__set_kernel_mmap(machine, start, map->start);
+	}
+
+out_put:
+	dso__put(kernel);
+	return ret;
+}
+
+static bool machine__uses_kcore(struct machine *machine)
+{
+	struct dso *dso;
+
+	list_for_each_entry(dso, &machine->dsos.head, node) {
+		if (dso__is_kcore(dso))
+			return true;
+	}
+
+	return false;
+}
+
+static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
+					     struct extra_kernel_map *xm)
+{
+	return machine__is(machine, "x86_64") &&
+	       is_entry_trampoline(xm->name);
+}
+
+static int machine__process_extra_kernel_map(struct machine *machine,
+					     struct extra_kernel_map *xm)
+{
+	struct dso *kernel = machine__kernel_dso(machine);
+
+	if (kernel == NULL)
+		return -1;
+
+	return machine__create_extra_kernel_map(machine, kernel, xm);
+}
+
+static int machine__process_kernel_mmap_event(struct machine *machine,
+					      struct extra_kernel_map *xm,
+					      struct build_id *bid)
+{
+	struct map *map;
+	enum dso_space_type dso_space;
+	bool is_kernel_mmap;
+	const char *mmap_name = machine->mmap_name;
+
+	/* If we have maps from kcore then we do not need or want any others */
+	if (machine__uses_kcore(machine))
+		return 0;
+
+	if (machine__is_host(machine))
+		dso_space = DSO_SPACE__KERNEL;
+	else
+		dso_space = DSO_SPACE__KERNEL_GUEST;
+
+	is_kernel_mmap = memcmp(xm->name, mmap_name, strlen(mmap_name) - 1) == 0;
+	if (!is_kernel_mmap && !machine__is_host(machine)) {
+		/*
+		 * If the event was recorded inside the guest and injected into
+		 * the host perf.data file, then it will match a host mmap_name,
+		 * so try that - see machine__set_mmap_name().
+		 */
+		mmap_name = "[kernel.kallsyms]";
+		is_kernel_mmap = memcmp(xm->name, mmap_name, strlen(mmap_name) - 1) == 0;
+	}
+	if (xm->name[0] == '/' ||
+	    (!is_kernel_mmap && xm->name[0] == '[')) {
+		map = machine__addnew_module_map(machine, xm->start,
+						 xm->name);
+		if (map == NULL)
+			goto out_problem;
+
+		map->end = map->start + xm->end - xm->start;
+
+		if (build_id__is_defined(bid))
+			dso__set_build_id(map->dso, bid);
+
+	} else if (is_kernel_mmap) {
+		const char *symbol_name = xm->name + strlen(mmap_name);
+		/*
+		 * Should be there already, from the build-id table in
+		 * the header.
+		 */
+		struct dso *kernel = NULL;
+		struct dso *dso;
+
+		down_read(&machine->dsos.lock);
+
+		list_for_each_entry(dso, &machine->dsos.head, node) {
+
+			/*
+			 * The cpumode passed to is_kernel_module is not the
+			 * cpumode of *this* event. If we insist on passing
+			 * correct cpumode to is_kernel_module, we should
+			 * record the cpumode when we adding this dso to the
+			 * linked list.
+			 *
+			 * However we don't really need passing correct
+			 * cpumode.  We know the correct cpumode must be kernel
+			 * mode (if not, we should not link it onto kernel_dsos
+			 * list).
+			 *
+			 * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+			 * is_kernel_module() treats it as a kernel cpumode.
+			 */
+
+			if (!dso->kernel ||
+			    is_kernel_module(dso->long_name,
+					     PERF_RECORD_MISC_CPUMODE_UNKNOWN))
+				continue;
+
+
+			kernel = dso;
+			break;
+		}
+
+		up_read(&machine->dsos.lock);
+
+		if (kernel == NULL)
+			kernel = machine__findnew_dso(machine, machine->mmap_name);
+		if (kernel == NULL)
+			goto out_problem;
+
+		kernel->kernel = dso_space;
+		if (__machine__create_kernel_maps(machine, kernel) < 0) {
+			dso__put(kernel);
+			goto out_problem;
+		}
+
+		if (strstr(kernel->long_name, "vmlinux"))
+			dso__set_short_name(kernel, "[kernel.vmlinux]", false);
+
+		machine__update_kernel_mmap(machine, xm->start, xm->end);
+
+		if (build_id__is_defined(bid))
+			dso__set_build_id(kernel, bid);
+
+		/*
+		 * Avoid using a zero address (kptr_restrict) for the ref reloc
+		 * symbol. Effectively having zero here means that at record
+		 * time /proc/sys/kernel/kptr_restrict was non zero.
+		 */
+		if (xm->pgoff != 0) {
+			map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
+							symbol_name,
+							xm->pgoff);
+		}
+
+		if (machine__is_default_guest(machine)) {
+			/*
+			 * preload dso of guest kernel and modules
+			 */
+			dso__load(kernel, machine__kernel_map(machine));
+		}
+	} else if (perf_event__is_extra_kernel_mmap(machine, xm)) {
+		return machine__process_extra_kernel_map(machine, xm);
+	}
+	return 0;
+out_problem:
+	return -1;
+}
+
+int machine__process_mmap2_event(struct machine *machine,
+				 union perf_event *event,
+				 struct perf_sample *sample)
+{
+	struct thread *thread;
+	struct map *map;
+	struct dso_id dso_id = {
+		.maj = event->mmap2.maj,
+		.min = event->mmap2.min,
+		.ino = event->mmap2.ino,
+		.ino_generation = event->mmap2.ino_generation,
+	};
+	struct build_id __bid, *bid = NULL;
+	int ret = 0;
+
+	if (dump_trace)
+		perf_event__fprintf_mmap2(event, stdout);
+
+	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+		bid = &__bid;
+		build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size);
+	}
+
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+		struct extra_kernel_map xm = {
+			.start = event->mmap2.start,
+			.end   = event->mmap2.start + event->mmap2.len,
+			.pgoff = event->mmap2.pgoff,
+		};
+
+		strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN);
+		ret = machine__process_kernel_mmap_event(machine, &xm, bid);
+		if (ret < 0)
+			goto out_problem;
+		return 0;
+	}
+
+	thread = machine__findnew_thread(machine, event->mmap2.pid,
+					event->mmap2.tid);
+	if (thread == NULL)
+		goto out_problem;
+
+	map = map__new(machine, event->mmap2.start,
+			event->mmap2.len, event->mmap2.pgoff,
+			&dso_id, event->mmap2.prot,
+			event->mmap2.flags, bid,
+			event->mmap2.filename, thread);
+
+	if (map == NULL)
+		goto out_problem_map;
+
+	ret = thread__insert_map(thread, map);
+	if (ret)
+		goto out_problem_insert;
+
+	thread__put(thread);
+	map__put(map);
+	return 0;
+
+out_problem_insert:
+	map__put(map);
+out_problem_map:
+	thread__put(thread);
+out_problem:
+	dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
+	return 0;
+}
+
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct thread *thread;
+	struct map *map;
+	u32 prot = 0;
+	int ret = 0;
+
+	if (dump_trace)
+		perf_event__fprintf_mmap(event, stdout);
+
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+		struct extra_kernel_map xm = {
+			.start = event->mmap.start,
+			.end   = event->mmap.start + event->mmap.len,
+			.pgoff = event->mmap.pgoff,
+		};
+
+		strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+		ret = machine__process_kernel_mmap_event(machine, &xm, NULL);
+		if (ret < 0)
+			goto out_problem;
+		return 0;
+	}
+
+	thread = machine__findnew_thread(machine, event->mmap.pid,
+					 event->mmap.tid);
+	if (thread == NULL)
+		goto out_problem;
+
+	if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
+		prot = PROT_EXEC;
+
+	map = map__new(machine, event->mmap.start,
+			event->mmap.len, event->mmap.pgoff,
+			NULL, prot, 0, NULL, event->mmap.filename, thread);
+
+	if (map == NULL)
+		goto out_problem_map;
+
+	ret = thread__insert_map(thread, map);
+	if (ret)
+		goto out_problem_insert;
+
+	thread__put(thread);
+	map__put(map);
+	return 0;
+
+out_problem_insert:
+	map__put(map);
+out_problem_map:
+	thread__put(thread);
+out_problem:
+	dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+	return 0;
+}
+
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
+{
+	struct threads *threads = machine__threads(machine, th->tid);
+
+	if (threads->last_match == th)
+		threads__set_last_match(threads, NULL);
+
+	if (lock)
+		down_write(&threads->lock);
+
+	BUG_ON(refcount_read(&th->refcnt) == 0);
+
+	rb_erase_cached(&th->rb_node, &threads->entries);
+	RB_CLEAR_NODE(&th->rb_node);
+	--threads->nr;
+	/*
+	 * Move it first to the dead_threads list, then drop the reference,
+	 * if this is the last reference, then the thread__delete destructor
+	 * will be called and we will remove it from the dead_threads list.
+	 */
+	list_add_tail(&th->node, &threads->dead);
+
+	/*
+	 * We need to do the put here because if this is the last refcount,
+	 * then we will be touching the threads->dead head when removing the
+	 * thread.
+	 */
+	thread__put(th);
+
+	if (lock)
+		up_write(&threads->lock);
+}
+
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+	return __machine__remove_thread(machine, th, true);
+}
+
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct thread *thread = machine__find_thread(machine,
+						     event->fork.pid,
+						     event->fork.tid);
+	struct thread *parent = machine__findnew_thread(machine,
+							event->fork.ppid,
+							event->fork.ptid);
+	bool do_maps_clone = true;
+	int err = 0;
+
+	if (dump_trace)
+		perf_event__fprintf_task(event, stdout);
+
+	/*
+	 * There may be an existing thread that is not actually the parent,
+	 * either because we are processing events out of order, or because the
+	 * (fork) event that would have removed the thread was lost. Assume the
+	 * latter case and continue on as best we can.
+	 */
+	if (parent->pid_ != (pid_t)event->fork.ppid) {
+		dump_printf("removing erroneous parent thread %d/%d\n",
+			    parent->pid_, parent->tid);
+		machine__remove_thread(machine, parent);
+		thread__put(parent);
+		parent = machine__findnew_thread(machine, event->fork.ppid,
+						 event->fork.ptid);
+	}
+
+	/* if a thread currently exists for the thread id remove it */
+	if (thread != NULL) {
+		machine__remove_thread(machine, thread);
+		thread__put(thread);
+	}
+
+	thread = machine__findnew_thread(machine, event->fork.pid,
+					 event->fork.tid);
+	/*
+	 * When synthesizing FORK events, we are trying to create thread
+	 * objects for the already running tasks on the machine.
+	 *
+	 * Normally, for a kernel FORK event, we want to clone the parent's
+	 * maps because that is what the kernel just did.
+	 *
+	 * But when synthesizing, this should not be done.  If we do, we end up
+	 * with overlapping maps as we process the synthesized MMAP2 events that
+	 * get delivered shortly thereafter.
+	 *
+	 * Use the FORK event misc flags in an internal way to signal this
+	 * situation, so we can elide the map clone when appropriate.
+	 */
+	if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+		do_maps_clone = false;
+
+	if (thread == NULL || parent == NULL ||
+	    thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
+		err = -1;
+	}
+	thread__put(thread);
+	thread__put(parent);
+
+	return err;
+}
+
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample __maybe_unused)
+{
+	struct thread *thread = machine__find_thread(machine,
+						     event->fork.pid,
+						     event->fork.tid);
+
+	if (dump_trace)
+		perf_event__fprintf_task(event, stdout);
+
+	if (thread != NULL) {
+		thread__exited(thread);
+		thread__put(thread);
+	}
+
+	return 0;
+}
+
+int machine__process_event(struct machine *machine, union perf_event *event,
+			   struct perf_sample *sample)
+{
+	int ret;
+
+	switch (event->header.type) {
+	case PERF_RECORD_COMM:
+		ret = machine__process_comm_event(machine, event, sample); break;
+	case PERF_RECORD_MMAP:
+		ret = machine__process_mmap_event(machine, event, sample); break;
+	case PERF_RECORD_NAMESPACES:
+		ret = machine__process_namespaces_event(machine, event, sample); break;
+	case PERF_RECORD_CGROUP:
+		ret = machine__process_cgroup_event(machine, event, sample); break;
+	case PERF_RECORD_MMAP2:
+		ret = machine__process_mmap2_event(machine, event, sample); break;
+	case PERF_RECORD_FORK:
+		ret = machine__process_fork_event(machine, event, sample); break;
+	case PERF_RECORD_EXIT:
+		ret = machine__process_exit_event(machine, event, sample); break;
+	case PERF_RECORD_LOST:
+		ret = machine__process_lost_event(machine, event, sample); break;
+	case PERF_RECORD_AUX:
+		ret = machine__process_aux_event(machine, event); break;
+	case PERF_RECORD_ITRACE_START:
+		ret = machine__process_itrace_start_event(machine, event); break;
+	case PERF_RECORD_LOST_SAMPLES:
+		ret = machine__process_lost_samples_event(machine, event, sample); break;
+	case PERF_RECORD_SWITCH:
+	case PERF_RECORD_SWITCH_CPU_WIDE:
+		ret = machine__process_switch_event(machine, event); break;
+	case PERF_RECORD_KSYMBOL:
+		ret = machine__process_ksymbol(machine, event, sample); break;
+	case PERF_RECORD_BPF_EVENT:
+		ret = machine__process_bpf(machine, event, sample); break;
+	case PERF_RECORD_TEXT_POKE:
+		ret = machine__process_text_poke(machine, event, sample); break;
+	case PERF_RECORD_AUX_OUTPUT_HW_ID:
+		ret = machine__process_aux_output_hw_id_event(machine, event); break;
+	default:
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
+{
+	if (!regexec(regex, sym->name, 0, NULL, 0))
+		return true;
+	return false;
+}
+
+static void ip__resolve_ams(struct thread *thread,
+			    struct addr_map_symbol *ams,
+			    u64 ip)
+{
+	struct addr_location al;
+
+	memset(&al, 0, sizeof(al));
+	/*
+	 * We cannot use the header.misc hint to determine whether a
+	 * branch stack address is user, kernel, guest, hypervisor.
+	 * Branches may straddle the kernel/user/hypervisor boundaries.
+	 * Thus, we have to try consecutively until we find a match
+	 * or else, the symbol is unknown
+	 */
+	thread__find_cpumode_addr_location(thread, ip, &al);
+
+	ams->addr = ip;
+	ams->al_addr = al.addr;
+	ams->al_level = al.level;
+	ams->ms.maps = al.maps;
+	ams->ms.sym = al.sym;
+	ams->ms.map = al.map;
+	ams->phys_addr = 0;
+	ams->data_page_size = 0;
+}
+
+static void ip__resolve_data(struct thread *thread,
+			     u8 m, struct addr_map_symbol *ams,
+			     u64 addr, u64 phys_addr, u64 daddr_page_size)
+{
+	struct addr_location al;
+
+	memset(&al, 0, sizeof(al));
+
+	thread__find_symbol(thread, m, addr, &al);
+
+	ams->addr = addr;
+	ams->al_addr = al.addr;
+	ams->al_level = al.level;
+	ams->ms.maps = al.maps;
+	ams->ms.sym = al.sym;
+	ams->ms.map = al.map;
+	ams->phys_addr = phys_addr;
+	ams->data_page_size = daddr_page_size;
+}
+
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+				     struct addr_location *al)
+{
+	struct mem_info *mi = mem_info__new();
+
+	if (!mi)
+		return NULL;
+
+	ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
+	ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
+			 sample->addr, sample->phys_addr,
+			 sample->data_page_size);
+	mi->data_src.val = sample->data_src;
+
+	return mi;
+}
+
+static char *callchain_srcline(struct map_symbol *ms, u64 ip)
+{
+	struct map *map = ms->map;
+	char *srcline = NULL;
+
+	if (!map || callchain_param.key == CCKEY_FUNCTION)
+		return srcline;
+
+	srcline = srcline__tree_find(&map->dso->srclines, ip);
+	if (!srcline) {
+		bool show_sym = false;
+		bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+
+		srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
+				      ms->sym, show_sym, show_addr, ip);
+		srcline__tree_insert(&map->dso->srclines, ip, srcline);
+	}
+
+	return srcline;
+}
+
+struct iterations {
+	int nr_loop_iter;
+	u64 cycles;
+};
+
+static int add_callchain_ip(struct thread *thread,
+			    struct callchain_cursor *cursor,
+			    struct symbol **parent,
+			    struct addr_location *root_al,
+			    u8 *cpumode,
+			    u64 ip,
+			    bool branch,
+			    struct branch_flags *flags,
+			    struct iterations *iter,
+			    u64 branch_from)
+{
+	struct map_symbol ms;
+	struct addr_location al;
+	int nr_loop_iter = 0;
+	u64 iter_cycles = 0;
+	const char *srcline = NULL;
+
+	al.filtered = 0;
+	al.sym = NULL;
+	al.srcline = NULL;
+	if (!cpumode) {
+		thread__find_cpumode_addr_location(thread, ip, &al);
+	} else {
+		if (ip >= PERF_CONTEXT_MAX) {
+			switch (ip) {
+			case PERF_CONTEXT_HV:
+				*cpumode = PERF_RECORD_MISC_HYPERVISOR;
+				break;
+			case PERF_CONTEXT_KERNEL:
+				*cpumode = PERF_RECORD_MISC_KERNEL;
+				break;
+			case PERF_CONTEXT_USER:
+				*cpumode = PERF_RECORD_MISC_USER;
+				break;
+			default:
+				pr_debug("invalid callchain context: "
+					 "%"PRId64"\n", (s64) ip);
+				/*
+				 * It seems the callchain is corrupted.
+				 * Discard all.
+				 */
+				callchain_cursor_reset(cursor);
+				return 1;
+			}
+			return 0;
+		}
+		thread__find_symbol(thread, *cpumode, ip, &al);
+	}
+
+	if (al.sym != NULL) {
+		if (perf_hpp_list.parent && !*parent &&
+		    symbol__match_regex(al.sym, &parent_regex))
+			*parent = al.sym;
+		else if (have_ignore_callees && root_al &&
+		  symbol__match_regex(al.sym, &ignore_callees_regex)) {
+			/* Treat this symbol as the root,
+			   forgetting its callees. */
+			*root_al = al;
+			callchain_cursor_reset(cursor);
+		}
+	}
+
+	if (symbol_conf.hide_unresolved && al.sym == NULL)
+		return 0;
+
+	if (iter) {
+		nr_loop_iter = iter->nr_loop_iter;
+		iter_cycles = iter->cycles;
+	}
+
+	ms.maps = al.maps;
+	ms.map = al.map;
+	ms.sym = al.sym;
+	srcline = callchain_srcline(&ms, al.addr);
+	return callchain_cursor_append(cursor, ip, &ms,
+				       branch, flags, nr_loop_iter,
+				       iter_cycles, branch_from, srcline);
+}
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+					   struct addr_location *al)
+{
+	unsigned int i;
+	const struct branch_stack *bs = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
+
+	if (!bi)
+		return NULL;
+
+	for (i = 0; i < bs->nr; i++) {
+		ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
+		ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
+		bi[i].flags = entries[i].flags;
+	}
+	return bi;
+}
+
+static void save_iterations(struct iterations *iter,
+			    struct branch_entry *be, int nr)
+{
+	int i;
+
+	iter->nr_loop_iter++;
+	iter->cycles = 0;
+
+	for (i = 0; i < nr; i++)
+		iter->cycles += be[i].flags.cycles;
+}
+
+#define CHASHSZ 127
+#define CHASHBITS 7
+#define NO_ENTRY 0xff
+
+#define PERF_MAX_BRANCH_DEPTH 127
+
+/* Remove loops. */
+static int remove_loops(struct branch_entry *l, int nr,
+			struct iterations *iter)
+{
+	int i, j, off;
+	unsigned char chash[CHASHSZ];
+
+	memset(chash, NO_ENTRY, sizeof(chash));
+
+	BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
+
+	for (i = 0; i < nr; i++) {
+		int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
+
+		/* no collision handling for now */
+		if (chash[h] == NO_ENTRY) {
+			chash[h] = i;
+		} else if (l[chash[h]].from == l[i].from) {
+			bool is_loop = true;
+			/* check if it is a real loop */
+			off = 0;
+			for (j = chash[h]; j < i && i + off < nr; j++, off++)
+				if (l[j].from != l[i + off].from) {
+					is_loop = false;
+					break;
+				}
+			if (is_loop) {
+				j = nr - (i + off);
+				if (j > 0) {
+					save_iterations(iter + i + off,
+						l + i, off);
+
+					memmove(iter + i, iter + i + off,
+						j * sizeof(*iter));
+
+					memmove(l + i, l + i + off,
+						j * sizeof(*l));
+				}
+
+				nr -= off;
+			}
+		}
+	}
+	return nr;
+}
+
+static int lbr_callchain_add_kernel_ip(struct thread *thread,
+				       struct callchain_cursor *cursor,
+				       struct perf_sample *sample,
+				       struct symbol **parent,
+				       struct addr_location *root_al,
+				       u64 branch_from,
+				       bool callee, int end)
+{
+	struct ip_callchain *chain = sample->callchain;
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	int err, i;
+
+	if (callee) {
+		for (i = 0; i < end + 1; i++) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, chain->ips[i],
+					       false, NULL, NULL, branch_from);
+			if (err)
+				return err;
+		}
+		return 0;
+	}
+
+	for (i = end; i >= 0; i--) {
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, chain->ips[i],
+				       false, NULL, NULL, branch_from);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void save_lbr_cursor_node(struct thread *thread,
+				 struct callchain_cursor *cursor,
+				 int idx)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+
+	if (!lbr_stitch)
+		return;
+
+	if (cursor->pos == cursor->nr) {
+		lbr_stitch->prev_lbr_cursor[idx].valid = false;
+		return;
+	}
+
+	if (!cursor->curr)
+		cursor->curr = cursor->first;
+	else
+		cursor->curr = cursor->curr->next;
+	memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
+	       sizeof(struct callchain_cursor_node));
+
+	lbr_stitch->prev_lbr_cursor[idx].valid = true;
+	cursor->pos++;
+}
+
+static int lbr_callchain_add_lbr_ip(struct thread *thread,
+				    struct callchain_cursor *cursor,
+				    struct perf_sample *sample,
+				    struct symbol **parent,
+				    struct addr_location *root_al,
+				    u64 *branch_from,
+				    bool callee)
+{
+	struct branch_stack *lbr_stack = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	int lbr_nr = lbr_stack->nr;
+	struct branch_flags *flags;
+	int err, i;
+	u64 ip;
+
+	/*
+	 * The curr and pos are not used in writing session. They are cleared
+	 * in callchain_cursor_commit() when the writing session is closed.
+	 * Using curr and pos to track the current cursor node.
+	 */
+	if (thread->lbr_stitch) {
+		cursor->curr = NULL;
+		cursor->pos = cursor->nr;
+		if (cursor->nr) {
+			cursor->curr = cursor->first;
+			for (i = 0; i < (int)(cursor->nr - 1); i++)
+				cursor->curr = cursor->curr->next;
+		}
+	}
+
+	if (callee) {
+		/* Add LBR ip from first entries.to */
+		ip = entries[0].to;
+		flags = &entries[0].flags;
+		*branch_from = entries[0].from;
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       true, flags, NULL,
+				       *branch_from);
+		if (err)
+			return err;
+
+		/*
+		 * The number of cursor node increases.
+		 * Move the current cursor node.
+		 * But does not need to save current cursor node for entry 0.
+		 * It's impossible to stitch the whole LBRs of previous sample.
+		 */
+		if (thread->lbr_stitch && (cursor->pos != cursor->nr)) {
+			if (!cursor->curr)
+				cursor->curr = cursor->first;
+			else
+				cursor->curr = cursor->curr->next;
+			cursor->pos++;
+		}
+
+		/* Add LBR ip from entries.from one by one. */
+		for (i = 0; i < lbr_nr; i++) {
+			ip = entries[i].from;
+			flags = &entries[i].flags;
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, ip,
+					       true, flags, NULL,
+					       *branch_from);
+			if (err)
+				return err;
+			save_lbr_cursor_node(thread, cursor, i);
+		}
+		return 0;
+	}
+
+	/* Add LBR ip from entries.from one by one. */
+	for (i = lbr_nr - 1; i >= 0; i--) {
+		ip = entries[i].from;
+		flags = &entries[i].flags;
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       true, flags, NULL,
+				       *branch_from);
+		if (err)
+			return err;
+		save_lbr_cursor_node(thread, cursor, i);
+	}
+
+	if (lbr_nr > 0) {
+		/* Add LBR ip from first entries.to */
+		ip = entries[0].to;
+		flags = &entries[0].flags;
+		*branch_from = entries[0].from;
+		err = add_callchain_ip(thread, cursor, parent,
+				root_al, &cpumode, ip,
+				true, flags, NULL,
+				*branch_from);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+					     struct callchain_cursor *cursor)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	struct callchain_cursor_node *cnode;
+	struct stitch_list *stitch_node;
+	int err;
+
+	list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+		cnode = &stitch_node->cursor;
+
+		err = callchain_cursor_append(cursor, cnode->ip,
+					      &cnode->ms,
+					      cnode->branch,
+					      &cnode->branch_flags,
+					      cnode->nr_loop_iter,
+					      cnode->iter_cycles,
+					      cnode->branch_from,
+					      cnode->srcline);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	struct stitch_list *stitch_node;
+
+	if (!list_empty(&lbr_stitch->free_lists)) {
+		stitch_node = list_first_entry(&lbr_stitch->free_lists,
+					       struct stitch_list, node);
+		list_del(&stitch_node->node);
+
+		return stitch_node;
+	}
+
+	return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+			     struct perf_sample *cur,
+			     struct perf_sample *prev,
+			     unsigned int max_lbr,
+			     bool callee)
+{
+	struct branch_stack *cur_stack = cur->branch_stack;
+	struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+	struct branch_stack *prev_stack = prev->branch_stack;
+	struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	int i, j, nr_identical_branches = 0;
+	struct stitch_list *stitch_node;
+	u64 cur_base, distance;
+
+	if (!cur_stack || !prev_stack)
+		return false;
+
+	/* Find the physical index of the base-of-stack for current sample. */
+	cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+	distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+						     (max_lbr + prev_stack->hw_idx - cur_base);
+	/* Previous sample has shorter stack. Nothing can be stitched. */
+	if (distance + 1 > prev_stack->nr)
+		return false;
+
+	/*
+	 * Check if there are identical LBRs between two samples.
+	 * Identical LBRs must have same from, to and flags values. Also,
+	 * they have to be saved in the same LBR registers (same physical
+	 * index).
+	 *
+	 * Starts from the base-of-stack of current sample.
+	 */
+	for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+		if ((prev_entries[i].from != cur_entries[j].from) ||
+		    (prev_entries[i].to != cur_entries[j].to) ||
+		    (prev_entries[i].flags.value != cur_entries[j].flags.value))
+			break;
+		nr_identical_branches++;
+	}
+
+	if (!nr_identical_branches)
+		return false;
+
+	/*
+	 * Save the LBRs between the base-of-stack of previous sample
+	 * and the base-of-stack of current sample into lbr_stitch->lists.
+	 * These LBRs will be stitched later.
+	 */
+	for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+		if (!lbr_stitch->prev_lbr_cursor[i].valid)
+			continue;
+
+		stitch_node = get_stitch_node(thread);
+		if (!stitch_node)
+			return false;
+
+		memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+		       sizeof(struct callchain_cursor_node));
+
+		if (callee)
+			list_add(&stitch_node->node, &lbr_stitch->lists);
+		else
+			list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+	}
+
+	return true;
+}
+
+static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
+{
+	if (thread->lbr_stitch)
+		return true;
+
+	thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch));
+	if (!thread->lbr_stitch)
+		goto err;
+
+	thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+	if (!thread->lbr_stitch->prev_lbr_cursor)
+		goto free_lbr_stitch;
+
+	INIT_LIST_HEAD(&thread->lbr_stitch->lists);
+	INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+
+	return true;
+
+free_lbr_stitch:
+	zfree(&thread->lbr_stitch);
+err:
+	pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
+	thread->lbr_stitch_enable = false;
+	return false;
+}
+
+/*
+ * Resolve LBR callstack chain sample
+ * Return:
+ * 1 on success get LBR callchain information
+ * 0 no available LBR callchain information, should try fp
+ * negative error code on other errors.
+ */
+static int resolve_lbr_callchain_sample(struct thread *thread,
+					struct callchain_cursor *cursor,
+					struct perf_sample *sample,
+					struct symbol **parent,
+					struct addr_location *root_al,
+					int max_stack,
+					unsigned int max_lbr)
+{
+	bool callee = (callchain_param.order == ORDER_CALLEE);
+	struct ip_callchain *chain = sample->callchain;
+	int chain_nr = min(max_stack, (int)chain->nr), i;
+	struct lbr_stitch *lbr_stitch;
+	bool stitched_lbr = false;
+	u64 branch_from = 0;
+	int err;
+
+	for (i = 0; i < chain_nr; i++) {
+		if (chain->ips[i] == PERF_CONTEXT_USER)
+			break;
+	}
+
+	/* LBR only affects the user callchain */
+	if (i == chain_nr)
+		return 0;
+
+	if (thread->lbr_stitch_enable && !sample->no_hw_idx &&
+	    (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
+		lbr_stitch = thread->lbr_stitch;
+
+		stitched_lbr = has_stitched_lbr(thread, sample,
+						&lbr_stitch->prev_sample,
+						max_lbr, callee);
+
+		if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+			list_replace_init(&lbr_stitch->lists,
+					  &lbr_stitch->free_lists);
+		}
+		memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
+	}
+
+	if (callee) {
+		/* Add kernel ip */
+		err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+						  parent, root_al, branch_from,
+						  true, i);
+		if (err)
+			goto error;
+
+		err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+					       root_al, &branch_from, true);
+		if (err)
+			goto error;
+
+		if (stitched_lbr) {
+			err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+			if (err)
+				goto error;
+		}
+
+	} else {
+		if (stitched_lbr) {
+			err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+			if (err)
+				goto error;
+		}
+		err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+					       root_al, &branch_from, false);
+		if (err)
+			goto error;
+
+		/* Add kernel ip */
+		err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+						  parent, root_al, branch_from,
+						  false, i);
+		if (err)
+			goto error;
+	}
+	return 1;
+
+error:
+	return (err < 0) ? err : 0;
+}
+
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+			     struct callchain_cursor *cursor,
+			     struct symbol **parent,
+			     struct addr_location *root_al,
+			     u8 *cpumode, int ent)
+{
+	int err = 0;
+
+	while (--ent >= 0) {
+		u64 ip = chain->ips[ent];
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, cpumode, ip,
+					       false, NULL, NULL, 0);
+			break;
+		}
+	}
+	return err;
+}
+
+static u64 get_leaf_frame_caller(struct perf_sample *sample,
+		struct thread *thread, int usr_idx)
+{
+	if (machine__normalized_is(thread->maps->machine, "arm64"))
+		return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
+	else
+		return 0;
+}
+
+static int thread__resolve_callchain_sample(struct thread *thread,
+					    struct callchain_cursor *cursor,
+					    struct evsel *evsel,
+					    struct perf_sample *sample,
+					    struct symbol **parent,
+					    struct addr_location *root_al,
+					    int max_stack)
+{
+	struct branch_stack *branch = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	struct ip_callchain *chain = sample->callchain;
+	int chain_nr = 0;
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	int i, j, err, nr_entries, usr_idx;
+	int skip_idx = -1;
+	int first_call = 0;
+	u64 leaf_frame_caller;
+
+	if (chain)
+		chain_nr = chain->nr;
+
+	if (evsel__has_branch_callstack(evsel)) {
+		struct perf_env *env = evsel__env(evsel);
+
+		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
+						   root_al, max_stack,
+						   !env ? 0 : env->max_branches);
+		if (err)
+			return (err < 0) ? err : 0;
+	}
+
+	/*
+	 * Based on DWARF debug information, some architectures skip
+	 * a callchain entry saved by the kernel.
+	 */
+	skip_idx = arch_skip_callchain_idx(thread, chain);
+
+	/*
+	 * Add branches to call stack for easier browsing. This gives
+	 * more context for a sample than just the callers.
+	 *
+	 * This uses individual histograms of paths compared to the
+	 * aggregated histograms the normal LBR mode uses.
+	 *
+	 * Limitations for now:
+	 * - No extra filters
+	 * - No annotations (should annotate somehow)
+	 */
+
+	if (branch && callchain_param.branch_callstack) {
+		int nr = min(max_stack, (int)branch->nr);
+		struct branch_entry be[nr];
+		struct iterations iter[nr];
+
+		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
+			pr_warning("corrupted branch chain. skipping...\n");
+			goto check_calls;
+		}
+
+		for (i = 0; i < nr; i++) {
+			if (callchain_param.order == ORDER_CALLEE) {
+				be[i] = entries[i];
+
+				if (chain == NULL)
+					continue;
+
+				/*
+				 * Check for overlap into the callchain.
+				 * The return address is one off compared to
+				 * the branch entry. To adjust for this
+				 * assume the calling instruction is not longer
+				 * than 8 bytes.
+				 */
+				if (i == skip_idx ||
+				    chain->ips[first_call] >= PERF_CONTEXT_MAX)
+					first_call++;
+				else if (be[i].from < chain->ips[first_call] &&
+				    be[i].from >= chain->ips[first_call] - 8)
+					first_call++;
+			} else
+				be[i] = entries[branch->nr - i - 1];
+		}
+
+		memset(iter, 0, sizeof(struct iterations) * nr);
+		nr = remove_loops(be, nr, iter);
+
+		for (i = 0; i < nr; i++) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al,
+					       NULL, be[i].to,
+					       true, &be[i].flags,
+					       NULL, be[i].from);
+
+			if (!err)
+				err = add_callchain_ip(thread, cursor, parent, root_al,
+						       NULL, be[i].from,
+						       true, &be[i].flags,
+						       &iter[i], 0);
+			if (err == -EINVAL)
+				break;
+			if (err)
+				return err;
+		}
+
+		if (chain_nr == 0)
+			return 0;
+
+		chain_nr -= nr;
+	}
+
+check_calls:
+	if (chain && callchain_param.order != ORDER_CALLEE) {
+		err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+					&cpumode, chain->nr - first_call);
+		if (err)
+			return (err < 0) ? err : 0;
+	}
+	for (i = first_call, nr_entries = 0;
+	     i < chain_nr && nr_entries < max_stack; i++) {
+		u64 ip;
+
+		if (callchain_param.order == ORDER_CALLEE)
+			j = i;
+		else
+			j = chain->nr - i - 1;
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+		if (j == skip_idx)
+			continue;
+#endif
+		ip = chain->ips[j];
+		if (ip < PERF_CONTEXT_MAX)
+                       ++nr_entries;
+		else if (callchain_param.order != ORDER_CALLEE) {
+			err = find_prev_cpumode(chain, thread, cursor, parent,
+						root_al, &cpumode, j);
+			if (err)
+				return (err < 0) ? err : 0;
+			continue;
+		}
+
+		/*
+		 * PERF_CONTEXT_USER allows us to locate where the user stack ends.
+		 * Depending on callchain_param.order and the position of PERF_CONTEXT_USER,
+		 * the index will be different in order to add the missing frame
+		 * at the right place.
+		 */
+
+		usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1;
+
+		if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) {
+
+			leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx);
+
+			/*
+			 * check if leaf_frame_Caller != ip to not add the same
+			 * value twice.
+			 */
+
+			if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+				err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, leaf_frame_caller,
+					       false, NULL, NULL, 0);
+				if (err)
+					return (err < 0) ? err : 0;
+			}
+		}
+
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       false, NULL, NULL, 0);
+
+		if (err)
+			return (err < 0) ? err : 0;
+	}
+
+	return 0;
+}
+
+static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip)
+{
+	struct symbol *sym = ms->sym;
+	struct map *map = ms->map;
+	struct inline_node *inline_node;
+	struct inline_list *ilist;
+	u64 addr;
+	int ret = 1;
+
+	if (!symbol_conf.inline_name || !map || !sym)
+		return ret;
+
+	addr = map__map_ip(map, ip);
+	addr = map__rip_2objdump(map, addr);
+
+	inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
+	if (!inline_node) {
+		inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
+		if (!inline_node)
+			return ret;
+		inlines__tree_insert(&map->dso->inlined_nodes, inline_node);
+	}
+
+	list_for_each_entry(ilist, &inline_node->val, list) {
+		struct map_symbol ilist_ms = {
+			.maps = ms->maps,
+			.map = map,
+			.sym = ilist->symbol,
+		};
+		ret = callchain_cursor_append(cursor, ip, &ilist_ms, false,
+					      NULL, 0, 0, 0, ilist->srcline);
+
+		if (ret != 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+	struct callchain_cursor *cursor = arg;
+	const char *srcline = NULL;
+	u64 addr = entry->ip;
+
+	if (symbol_conf.hide_unresolved && entry->ms.sym == NULL)
+		return 0;
+
+	if (append_inlines(cursor, &entry->ms, entry->ip) == 0)
+		return 0;
+
+	/*
+	 * Convert entry->ip from a virtual address to an offset in
+	 * its corresponding binary.
+	 */
+	if (entry->ms.map)
+		addr = map__map_ip(entry->ms.map, entry->ip);
+
+	srcline = callchain_srcline(&entry->ms, addr);
+	return callchain_cursor_append(cursor, entry->ip, &entry->ms,
+				       false, NULL, 0, 0, 0, srcline);
+}
+
+static int thread__resolve_callchain_unwind(struct thread *thread,
+					    struct callchain_cursor *cursor,
+					    struct evsel *evsel,
+					    struct perf_sample *sample,
+					    int max_stack)
+{
+	/* Can we do dwarf post unwind? */
+	if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) &&
+	      (evsel->core.attr.sample_type & PERF_SAMPLE_STACK_USER)))
+		return 0;
+
+	/* Bail out if nothing was captured. */
+	if ((!sample->user_regs.regs) ||
+	    (!sample->user_stack.size))
+		return 0;
+
+	return unwind__get_entries(unwind_entry, cursor,
+				   thread, sample, max_stack, false);
+}
+
+int thread__resolve_callchain(struct thread *thread,
+			      struct callchain_cursor *cursor,
+			      struct evsel *evsel,
+			      struct perf_sample *sample,
+			      struct symbol **parent,
+			      struct addr_location *root_al,
+			      int max_stack)
+{
+	int ret = 0;
+
+	callchain_cursor_reset(cursor);
+
+	if (callchain_param.order == ORDER_CALLEE) {
+		ret = thread__resolve_callchain_sample(thread, cursor,
+						       evsel, sample,
+						       parent, root_al,
+						       max_stack);
+		if (ret)
+			return ret;
+		ret = thread__resolve_callchain_unwind(thread, cursor,
+						       evsel, sample,
+						       max_stack);
+	} else {
+		ret = thread__resolve_callchain_unwind(thread, cursor,
+						       evsel, sample,
+						       max_stack);
+		if (ret)
+			return ret;
+		ret = thread__resolve_callchain_sample(thread, cursor,
+						       evsel, sample,
+						       parent, root_al,
+						       max_stack);
+	}
+
+	return ret;
+}
+
+int machine__for_each_thread(struct machine *machine,
+			     int (*fn)(struct thread *thread, void *p),
+			     void *priv)
+{
+	struct threads *threads;
+	struct rb_node *nd;
+	struct thread *thread;
+	int rc = 0;
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		threads = &machine->threads[i];
+		for (nd = rb_first_cached(&threads->entries); nd;
+		     nd = rb_next(nd)) {
+			thread = rb_entry(nd, struct thread, rb_node);
+			rc = fn(thread, priv);
+			if (rc != 0)
+				return rc;
+		}
+
+		list_for_each_entry(thread, &threads->dead, node) {
+			rc = fn(thread, priv);
+			if (rc != 0)
+				return rc;
+		}
+	}
+	return rc;
+}
+
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv)
+{
+	struct rb_node *nd;
+	int rc = 0;
+
+	rc = machine__for_each_thread(&machines->host, fn, priv);
+	if (rc != 0)
+		return rc;
+
+	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+		rc = machine__for_each_thread(machine, fn, priv);
+		if (rc != 0)
+			return rc;
+	}
+	return rc;
+}
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu)
+{
+	if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz)
+		return -1;
+
+	return machine->current_tid[cpu];
+}
+
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+			     pid_t tid)
+{
+	struct thread *thread;
+	const pid_t init_val = -1;
+
+	if (cpu < 0)
+		return -EINVAL;
+
+	if (realloc_array_as_needed(machine->current_tid,
+				    machine->current_tid_sz,
+				    (unsigned int)cpu,
+				    &init_val))
+		return -ENOMEM;
+
+	machine->current_tid[cpu] = tid;
+
+	thread = machine__findnew_thread(machine, pid, tid);
+	if (!thread)
+		return -ENOMEM;
+
+	thread->cpu = cpu;
+	thread__put(thread);
+
+	return 0;
+}
+
+/*
+ * Compares the raw arch string. N.B. see instead perf_env__arch() or
+ * machine__normalized_is() if a normalized arch is needed.
+ */
+bool machine__is(struct machine *machine, const char *arch)
+{
+	return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
+}
+
+bool machine__normalized_is(struct machine *machine, const char *arch)
+{
+	return machine && !strcmp(perf_env__arch(machine->env), arch);
+}
+
+int machine__nr_cpus_avail(struct machine *machine)
+{
+	return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
+}
+
+int machine__get_kernel_start(struct machine *machine)
+{
+	struct map *map = machine__kernel_map(machine);
+	int err = 0;
+
+	/*
+	 * The only addresses above 2^63 are kernel addresses of a 64-bit
+	 * kernel.  Note that addresses are unsigned so that on a 32-bit system
+	 * all addresses including kernel addresses are less than 2^32.  In
+	 * that case (32-bit system), if the kernel mapping is unknown, all
+	 * addresses will be assumed to be in user space - see
+	 * machine__kernel_ip().
+	 */
+	machine->kernel_start = 1ULL << 63;
+	if (map) {
+		err = map__load(map);
+		/*
+		 * On x86_64, PTI entry trampolines are less than the
+		 * start of kernel text, but still above 2^63. So leave
+		 * kernel_start = 1ULL << 63 for x86_64.
+		 */
+		if (!err && !machine__is(machine, "x86_64"))
+			machine->kernel_start = map->start;
+	}
+	return err;
+}
+
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
+{
+	u8 addr_cpumode = cpumode;
+	bool kernel_ip;
+
+	if (!machine->single_address_space)
+		goto out;
+
+	kernel_ip = machine__kernel_ip(machine, addr);
+	switch (cpumode) {
+	case PERF_RECORD_MISC_KERNEL:
+	case PERF_RECORD_MISC_USER:
+		addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
+					   PERF_RECORD_MISC_USER;
+		break;
+	case PERF_RECORD_MISC_GUEST_KERNEL:
+	case PERF_RECORD_MISC_GUEST_USER:
+		addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
+					   PERF_RECORD_MISC_GUEST_USER;
+		break;
+	default:
+		break;
+	}
+out:
+	return addr_cpumode;
+}
+
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id)
+{
+	return dsos__findnew_id(&machine->dsos, filename, id);
+}
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
+{
+	return machine__findnew_dso_id(machine, filename, NULL);
+}
+
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+	struct machine *machine = vmachine;
+	struct map *map;
+	struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
+
+	if (sym == NULL)
+		return NULL;
+
+	*modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL;
+	*addrp = map->unmap_ip(map, sym->start);
+	return sym->name;
+}
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv)
+{
+	struct dso *pos;
+	int err = 0;
+
+	list_for_each_entry(pos, &machine->dsos.head, node) {
+		if (fn(pos, machine, priv))
+			err = -1;
+	}
+	return err;
+}
+
+int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv)
+{
+	struct maps *maps = machine__kernel_maps(machine);
+	struct map *map;
+	int err = 0;
+
+	for (map = maps__first(maps); map != NULL; map = map__next(map)) {
+		err = fn(map, priv);
+		if (err != 0) {
+			break;
+		}
+	}
+	return err;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
new file mode 100644
index 000000000..74935dfaa
--- /dev/null
+++ b/tools/perf/util/machine.h
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MACHINE_H
+#define __PERF_MACHINE_H
+
+#include <sys/types.h>
+#include <linux/rbtree.h>
+#include "maps.h"
+#include "dsos.h"
+#include "rwsem.h"
+
+struct addr_location;
+struct branch_stack;
+struct dso;
+struct dso_id;
+struct evsel;
+struct perf_sample;
+struct symbol;
+struct target;
+struct thread;
+union perf_event;
+struct machines;
+
+/* Native host kernel uses -1 as pid index in machine */
+#define	HOST_KERNEL_ID			(-1)
+#define	DEFAULT_GUEST_KERNEL_ID		(0)
+
+extern const char *ref_reloc_sym_names[];
+
+struct vdso_info;
+
+#define THREADS__TABLE_BITS	8
+#define THREADS__TABLE_SIZE	(1 << THREADS__TABLE_BITS)
+
+struct threads {
+	struct rb_root_cached  entries;
+	struct rw_semaphore    lock;
+	unsigned int	       nr;
+	struct list_head       dead;
+	struct thread	       *last_match;
+};
+
+struct machine {
+	struct rb_node	  rb_node;
+	pid_t		  pid;
+	u16		  id_hdr_size;
+	bool		  comm_exec;
+	bool		  kptr_restrict_warned;
+	bool		  single_address_space;
+	char		  *root_dir;
+	char		  *mmap_name;
+	char		  *kallsyms_filename;
+	struct threads    threads[THREADS__TABLE_SIZE];
+	struct vdso_info  *vdso_info;
+	struct perf_env   *env;
+	struct dsos	  dsos;
+	struct maps	  *kmaps;
+	struct map	  *vmlinux_map;
+	u64		  kernel_start;
+	pid_t		  *current_tid;
+	size_t		  current_tid_sz;
+	union { /* Tool specific area */
+		void	  *priv;
+		u64	  db_id;
+	};
+	struct machines   *machines;
+	bool		  trampolines_mapped;
+};
+
+static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
+{
+	/* Cast it to handle tid == -1 */
+	return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
+/*
+ * The main kernel (vmlinux) map
+ */
+static inline
+struct map *machine__kernel_map(struct machine *machine)
+{
+	return machine->vmlinux_map;
+}
+
+/*
+ * kernel (the one returned by machine__kernel_map()) plus kernel modules maps
+ */
+static inline
+struct maps *machine__kernel_maps(struct machine *machine)
+{
+	return machine->kmaps;
+}
+
+int machine__get_kernel_start(struct machine *machine);
+
+static inline u64 machine__kernel_start(struct machine *machine)
+{
+	if (!machine->kernel_start)
+		machine__get_kernel_start(machine);
+	return machine->kernel_start;
+}
+
+static inline bool machine__kernel_ip(struct machine *machine, u64 ip)
+{
+	u64 kernel_start = machine__kernel_start(machine);
+
+	return ip >= kernel_start;
+}
+
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr);
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+				    pid_t tid);
+struct thread *machine__idle_thread(struct machine *machine);
+struct comm *machine__thread_exec_comm(struct machine *machine,
+				       struct thread *thread);
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_lost_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+					struct perf_sample *sample);
+int machine__process_aux_event(struct machine *machine,
+			       union perf_event *event);
+int machine__process_itrace_start_event(struct machine *machine,
+					union perf_event *event);
+int machine__process_aux_output_hw_id_event(struct machine *machine,
+					    union perf_event *event);
+int machine__process_switch_event(struct machine *machine,
+				  union perf_event *event);
+int machine__process_namespaces_event(struct machine *machine,
+				      union perf_event *event,
+				      struct perf_sample *sample);
+int machine__process_cgroup_event(struct machine *machine,
+				  union perf_event *event,
+				  struct perf_sample *sample);
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
+				 struct perf_sample *sample);
+int machine__process_ksymbol(struct machine *machine,
+			     union perf_event *event,
+			     struct perf_sample *sample);
+int machine__process_text_poke(struct machine *machine,
+			       union perf_event *event,
+			       struct perf_sample *sample);
+int machine__process_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+
+typedef void (*machine__process_t)(struct machine *machine, void *data);
+
+struct machines {
+	struct machine host;
+	struct rb_root_cached guests;
+};
+
+void machines__init(struct machines *machines);
+void machines__exit(struct machines *machines);
+
+void machines__process_guests(struct machines *machines,
+			      machine__process_t process, void *data);
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+			      const char *root_dir);
+struct machine *machines__find(struct machines *machines, pid_t pid);
+struct machine *machines__findnew(struct machines *machines, pid_t pid);
+struct machine *machines__find_guest(struct machines *machines, pid_t pid);
+struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid);
+struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid);
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
+void machines__set_comm_exec(struct machines *machines, bool comm_exec);
+
+struct machine *machine__new_host(void);
+struct machine *machine__new_kallsyms(void);
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
+void machine__exit(struct machine *machine);
+void machine__delete_threads(struct machine *machine);
+void machine__delete(struct machine *machine);
+void machine__remove_thread(struct machine *machine, struct thread *th);
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+					   struct addr_location *al);
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+				     struct addr_location *al);
+
+struct callchain_cursor;
+
+int thread__resolve_callchain(struct thread *thread,
+			      struct callchain_cursor *cursor,
+			      struct evsel *evsel,
+			      struct perf_sample *sample,
+			      struct symbol **parent,
+			      struct addr_location *root_al,
+			      int max_stack);
+
+/*
+ * Default guest kernel is defined by parameter --guestkallsyms
+ * and --guestmodules
+ */
+static inline bool machine__is_default_guest(struct machine *machine)
+{
+	return machine ? machine->pid == DEFAULT_GUEST_KERNEL_ID : false;
+}
+
+static inline bool machine__is_host(struct machine *machine)
+{
+	return machine ? machine->pid == HOST_KERNEL_ID : false;
+}
+
+bool machine__is(struct machine *machine, const char *arch);
+bool machine__normalized_is(struct machine *machine, const char *arch);
+int machine__nr_cpus_avail(struct machine *machine);
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+
+struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id);
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
+
+size_t machine__fprintf(struct machine *machine, FILE *fp);
+
+static inline
+struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
+					   struct map **mapp)
+{
+	return maps__find_symbol(machine->kmaps, addr, mapp);
+}
+
+static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+						   const char *name,
+						   struct map **mapp)
+{
+	return maps__find_symbol_by_name(machine->kmaps, name, mapp);
+}
+
+int arch__fix_module_text_start(u64 *start, u64 *size, const char *name);
+
+int machine__load_kallsyms(struct machine *machine, const char *filename);
+
+int machine__load_vmlinux_path(struct machine *machine);
+
+size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm);
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp);
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm);
+
+void machine__destroy_kernel_maps(struct machine *machine);
+int machine__create_kernel_maps(struct machine *machine);
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid);
+int machines__create_guest_kernel_maps(struct machines *machines);
+void machines__destroy_kernel_maps(struct machines *machines);
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
+
+typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv);
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn,
+			  void *priv);
+
+typedef int (*machine__map_t)(struct map *map, void *priv);
+int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn,
+				 void *priv);
+
+int machine__for_each_thread(struct machine *machine,
+			     int (*fn)(struct thread *thread, void *p),
+			     void *priv);
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv);
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu);
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+			     pid_t tid);
+/*
+ * For use with libtraceevent's tep_set_function_resolver()
+ */
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
+
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+				    size_t bufsz);
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+				      struct dso *kernel);
+
+/* Kernel-space maps for symbols that are outside the main kernel map and module maps */
+struct extra_kernel_map {
+	u64 start;
+	u64 end;
+	u64 pgoff;
+	char name[KMAP_NAME_LEN];
+};
+
+int machine__create_extra_kernel_map(struct machine *machine,
+				     struct dso *kernel,
+				     struct extra_kernel_map *xm);
+
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+					  struct dso *kernel);
+
+#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
new file mode 100644
index 000000000..f3a3d9b3a
--- /dev/null
+++ b/tools/perf/util/map.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "namespaces.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "thread.h"
+#include "vdso.h"
+
+static inline int is_android_lib(const char *filename)
+{
+	return strstarts(filename, "/data/app-lib/") ||
+	       strstarts(filename, "/system/lib/");
+}
+
+static inline bool replace_android_lib(const char *filename, char *newfilename)
+{
+	const char *libname;
+	char *app_abi;
+	size_t app_abi_length, new_length;
+	size_t lib_length = 0;
+
+	libname  = strrchr(filename, '/');
+	if (libname)
+		lib_length = strlen(libname);
+
+	app_abi = getenv("APP_ABI");
+	if (!app_abi)
+		return false;
+
+	app_abi_length = strlen(app_abi);
+
+	if (strstarts(filename, "/data/app-lib/")) {
+		char *apk_path;
+
+		if (!app_abi_length)
+			return false;
+
+		new_length = 7 + app_abi_length + lib_length;
+
+		apk_path = getenv("APK_PATH");
+		if (apk_path) {
+			new_length += strlen(apk_path) + 1;
+			if (new_length > PATH_MAX)
+				return false;
+			snprintf(newfilename, new_length,
+				 "%s/libs/%s/%s", apk_path, app_abi, libname);
+		} else {
+			if (new_length > PATH_MAX)
+				return false;
+			snprintf(newfilename, new_length,
+				 "libs/%s/%s", app_abi, libname);
+		}
+		return true;
+	}
+
+	if (strstarts(filename, "/system/lib/")) {
+		char *ndk, *app;
+		const char *arch;
+		int ndk_length, app_length;
+
+		ndk = getenv("NDK_ROOT");
+		app = getenv("APP_PLATFORM");
+
+		if (!(ndk && app))
+			return false;
+
+		ndk_length = strlen(ndk);
+		app_length = strlen(app);
+
+		if (!(ndk_length && app_length && app_abi_length))
+			return false;
+
+		arch = !strncmp(app_abi, "arm", 3) ? "arm" :
+		       !strncmp(app_abi, "mips", 4) ? "mips" :
+		       !strncmp(app_abi, "x86", 3) ? "x86" : NULL;
+
+		if (!arch)
+			return false;
+
+		new_length = 27 + ndk_length +
+			     app_length + lib_length
+			   + strlen(arch);
+
+		if (new_length > PATH_MAX)
+			return false;
+		snprintf(newfilename, new_length,
+			"%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+			ndk_length, ndk, app_length, app, arch, libname);
+
+		return true;
+	}
+	return false;
+}
+
+void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
+{
+	map->start    = start;
+	map->end      = end;
+	map->pgoff    = pgoff;
+	map->reloc    = 0;
+	map->dso      = dso__get(dso);
+	map->map_ip   = map__map_ip;
+	map->unmap_ip = map__unmap_ip;
+	RB_CLEAR_NODE(&map->rb_node);
+	map->erange_warned = false;
+	refcount_set(&map->refcnt, 1);
+}
+
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+		     u64 pgoff, struct dso_id *id,
+		     u32 prot, u32 flags, struct build_id *bid,
+		     char *filename, struct thread *thread)
+{
+	struct map *map = malloc(sizeof(*map));
+	struct nsinfo *nsi = NULL;
+	struct nsinfo *nnsi;
+
+	if (map != NULL) {
+		char newfilename[PATH_MAX];
+		struct dso *dso, *header_bid_dso;
+		int anon, no_dso, vdso, android;
+
+		android = is_android_lib(filename);
+		anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
+		vdso = is_vdso_map(filename);
+		no_dso = is_no_dso_memory(filename);
+		map->prot = prot;
+		map->flags = flags;
+		nsi = nsinfo__get(thread->nsinfo);
+
+		if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
+			snprintf(newfilename, sizeof(newfilename),
+				 "/tmp/perf-%d.map", nsinfo__pid(nsi));
+			filename = newfilename;
+		}
+
+		if (android) {
+			if (replace_android_lib(filename, newfilename))
+				filename = newfilename;
+		}
+
+		if (vdso) {
+			/* The vdso maps are always on the host and not the
+			 * container.  Ensure that we don't use setns to look
+			 * them up.
+			 */
+			nnsi = nsinfo__copy(nsi);
+			if (nnsi) {
+				nsinfo__put(nsi);
+				nsinfo__clear_need_setns(nnsi);
+				nsi = nnsi;
+			}
+			pgoff = 0;
+			dso = machine__findnew_vdso(machine, thread);
+		} else
+			dso = machine__findnew_dso_id(machine, filename, id);
+
+		if (dso == NULL)
+			goto out_delete;
+
+		map__init(map, start, start + len, pgoff, dso);
+
+		if (anon || no_dso) {
+			map->map_ip = map->unmap_ip = identity__map_ip;
+
+			/*
+			 * Set memory without DSO as loaded. All map__find_*
+			 * functions still return NULL, and we avoid the
+			 * unnecessary map__load warning.
+			 */
+			if (!(prot & PROT_EXEC))
+				dso__set_loaded(dso);
+		}
+		mutex_lock(&dso->lock);
+		nsinfo__put(dso->nsinfo);
+		dso->nsinfo = nsi;
+		mutex_unlock(&dso->lock);
+
+		if (build_id__is_defined(bid)) {
+			dso__set_build_id(dso, bid);
+		} else {
+			/*
+			 * If the mmap event had no build ID, search for an existing dso from the
+			 * build ID header by name. Otherwise only the dso loaded at the time of
+			 * reading the header will have the build ID set and all future mmaps will
+			 * have it missing.
+			 */
+			down_read(&machine->dsos.lock);
+			header_bid_dso = __dsos__find(&machine->dsos, filename, false);
+			up_read(&machine->dsos.lock);
+			if (header_bid_dso && header_bid_dso->header_build_id) {
+				dso__set_build_id(dso, &header_bid_dso->bid);
+				dso->header_build_id = 1;
+			}
+		}
+		dso__put(dso);
+	}
+	return map;
+out_delete:
+	nsinfo__put(nsi);
+	free(map);
+	return NULL;
+}
+
+/*
+ * Constructor variant for modules (where we know from /proc/modules where
+ * they are loaded) and for vmlinux, where only after we load all the
+ * symbols we'll know where it starts and ends.
+ */
+struct map *map__new2(u64 start, struct dso *dso)
+{
+	struct map *map = calloc(1, (sizeof(*map) +
+				     (dso->kernel ? sizeof(struct kmap) : 0)));
+	if (map != NULL) {
+		/*
+		 * ->end will be filled after we load all the symbols
+		 */
+		map__init(map, start, 0, 0, dso);
+	}
+
+	return map;
+}
+
+bool __map__is_kernel(const struct map *map)
+{
+	if (!map->dso->kernel)
+		return false;
+	return machine__kernel_map(map__kmaps((struct map *)map)->machine) == map;
+}
+
+bool __map__is_extra_kernel_map(const struct map *map)
+{
+	struct kmap *kmap = __map__kmap((struct map *)map);
+
+	return kmap && kmap->name[0];
+}
+
+bool __map__is_bpf_prog(const struct map *map)
+{
+	const char *name;
+
+	if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+		return true;
+
+	/*
+	 * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+	 * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+	 * guess the type based on name.
+	 */
+	name = map->dso->short_name;
+	return name && (strstr(name, "bpf_prog_") == name);
+}
+
+bool __map__is_bpf_image(const struct map *map)
+{
+	const char *name;
+
+	if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE)
+		return true;
+
+	/*
+	 * If PERF_RECORD_KSYMBOL is not included, the dso will not have
+	 * type of DSO_BINARY_TYPE__BPF_IMAGE. In such cases, we can
+	 * guess the type based on name.
+	 */
+	name = map->dso->short_name;
+	return name && is_bpf_image(name);
+}
+
+bool __map__is_ool(const struct map *map)
+{
+	return map->dso && map->dso->binary_type == DSO_BINARY_TYPE__OOL;
+}
+
+bool map__has_symbols(const struct map *map)
+{
+	return dso__has_symbols(map->dso);
+}
+
+static void map__exit(struct map *map)
+{
+	BUG_ON(refcount_read(&map->refcnt) != 0);
+	dso__zput(map->dso);
+}
+
+void map__delete(struct map *map)
+{
+	map__exit(map);
+	free(map);
+}
+
+void map__put(struct map *map)
+{
+	if (map && refcount_dec_and_test(&map->refcnt))
+		map__delete(map);
+}
+
+void map__fixup_start(struct map *map)
+{
+	struct rb_root_cached *symbols = &map->dso->symbols;
+	struct rb_node *nd = rb_first_cached(symbols);
+	if (nd != NULL) {
+		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+		map->start = sym->start;
+	}
+}
+
+void map__fixup_end(struct map *map)
+{
+	struct rb_root_cached *symbols = &map->dso->symbols;
+	struct rb_node *nd = rb_last(&symbols->rb_root);
+	if (nd != NULL) {
+		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+		map->end = sym->end;
+	}
+}
+
+#define DSO__DELETED "(deleted)"
+
+int map__load(struct map *map)
+{
+	const char *name = map->dso->long_name;
+	int nr;
+
+	if (dso__loaded(map->dso))
+		return 0;
+
+	nr = dso__load(map->dso, map);
+	if (nr < 0) {
+		if (map->dso->has_build_id) {
+			char sbuild_id[SBUILD_ID_SIZE];
+
+			build_id__sprintf(&map->dso->bid, sbuild_id);
+			pr_debug("%s with build id %s not found", name, sbuild_id);
+		} else
+			pr_debug("Failed to open %s", name);
+
+		pr_debug(", continuing without symbols\n");
+		return -1;
+	} else if (nr == 0) {
+#ifdef HAVE_LIBELF_SUPPORT
+		const size_t len = strlen(name);
+		const size_t real_len = len - sizeof(DSO__DELETED);
+
+		if (len > sizeof(DSO__DELETED) &&
+		    strcmp(name + real_len + 1, DSO__DELETED) == 0) {
+			pr_debug("%.*s was updated (is prelink enabled?). "
+				"Restart the long running apps that use it!\n",
+				   (int)real_len, name);
+		} else {
+			pr_debug("no symbols found in %s, maybe install a debug package?\n", name);
+		}
+#endif
+		return -1;
+	}
+
+	return 0;
+}
+
+struct symbol *map__find_symbol(struct map *map, u64 addr)
+{
+	if (map__load(map) < 0)
+		return NULL;
+
+	return dso__find_symbol(map->dso, addr);
+}
+
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
+{
+	if (map__load(map) < 0)
+		return NULL;
+
+	if (!dso__sorted_by_name(map->dso))
+		dso__sort_by_name(map->dso);
+
+	return dso__find_symbol_by_name(map->dso, name);
+}
+
+struct map *map__clone(struct map *from)
+{
+	size_t size = sizeof(struct map);
+	struct map *map;
+
+	if (from->dso && from->dso->kernel)
+		size += sizeof(struct kmap);
+
+	map = memdup(from, size);
+	if (map != NULL) {
+		refcount_set(&map->refcnt, 1);
+		RB_CLEAR_NODE(&map->rb_node);
+		dso__get(map->dso);
+	}
+
+	return map;
+}
+
+size_t map__fprintf(struct map *map, FILE *fp)
+{
+	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
+		       map->start, map->end, map->pgoff, map->dso->name);
+}
+
+size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+{
+	char buf[symbol_conf.pad_output_len_dso + 1];
+	const char *dsoname = "[unknown]";
+
+	if (map && map->dso) {
+		if (symbol_conf.show_kernel_path && map->dso->long_name)
+			dsoname = map->dso->long_name;
+		else
+			dsoname = map->dso->name;
+	}
+
+	if (symbol_conf.pad_output_len_dso) {
+		scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname);
+		dsoname = buf;
+	}
+
+	return fprintf(fp, "%s", dsoname);
+}
+
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym)
+{
+	if (map == NULL)
+		return SRCLINE_UNKNOWN;
+	return get_srcline(map->dso, map__rip_2objdump(map, addr), sym, true, true, addr);
+}
+
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+			 FILE *fp)
+{
+	int ret = 0;
+
+	if (map && map->dso) {
+		char *srcline = map__srcline(map, addr, NULL);
+		if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)
+			ret = fprintf(fp, "%s%s", prefix, srcline);
+		free_srcline(srcline);
+	}
+	return ret;
+}
+
+void srccode_state_free(struct srccode_state *state)
+{
+	zfree(&state->srcfile);
+	state->line = 0;
+}
+
+/**
+ * map__rip_2objdump - convert symbol start address to objdump address.
+ * @map: memory map
+ * @rip: symbol start address
+ *
+ * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
+ * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is
+ * relative to section start.
+ *
+ * Return: Address suitable for passing to "objdump --start-address="
+ */
+u64 map__rip_2objdump(struct map *map, u64 rip)
+{
+	struct kmap *kmap = __map__kmap(map);
+
+	/*
+	 * vmlinux does not have program headers for PTI entry trampolines and
+	 * kcore may not either. However the trampoline object code is on the
+	 * main kernel map, so just use that instead.
+	 */
+	if (kmap && is_entry_trampoline(kmap->name) && kmap->kmaps && kmap->kmaps->machine) {
+		struct map *kernel_map = machine__kernel_map(kmap->kmaps->machine);
+
+		if (kernel_map)
+			map = kernel_map;
+	}
+
+	if (!map->dso->adjust_symbols)
+		return rip;
+
+	if (map->dso->rel)
+		return rip - map->pgoff;
+
+	/*
+	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
+	 * but all kernel modules are ET_REL, so won't get here.
+	 */
+	if (map->dso->kernel == DSO_SPACE__USER)
+		return rip + map->dso->text_offset;
+
+	return map->unmap_ip(map, rip) - map->reloc;
+}
+
+/**
+ * map__objdump_2mem - convert objdump address to a memory address.
+ * @map: memory map
+ * @ip: objdump address
+ *
+ * Closely related to map__rip_2objdump(), this function takes an address from
+ * objdump and converts it to a memory address.  Note this assumes that @map
+ * contains the address.  To be sure the result is valid, check it forwards
+ * e.g. map__rip_2objdump(map->map_ip(map, map__objdump_2mem(map, ip))) == ip
+ *
+ * Return: Memory address.
+ */
+u64 map__objdump_2mem(struct map *map, u64 ip)
+{
+	if (!map->dso->adjust_symbols)
+		return map->unmap_ip(map, ip);
+
+	if (map->dso->rel)
+		return map->unmap_ip(map, ip + map->pgoff);
+
+	/*
+	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
+	 * but all kernel modules are ET_REL, so won't get here.
+	 */
+	if (map->dso->kernel == DSO_SPACE__USER)
+		return map->unmap_ip(map, ip - map->dso->text_offset);
+
+	return ip + map->reloc;
+}
+
+bool map__contains_symbol(const struct map *map, const struct symbol *sym)
+{
+	u64 ip = map->unmap_ip(map, sym->start);
+
+	return ip >= map->start && ip < map->end;
+}
+
+static struct map *__map__next(struct map *map)
+{
+	struct rb_node *next = rb_next(&map->rb_node);
+
+	if (next)
+		return rb_entry(next, struct map, rb_node);
+	return NULL;
+}
+
+struct map *map__next(struct map *map)
+{
+	return map ? __map__next(map) : NULL;
+}
+
+struct kmap *__map__kmap(struct map *map)
+{
+	if (!map->dso || !map->dso->kernel)
+		return NULL;
+	return (struct kmap *)(map + 1);
+}
+
+struct kmap *map__kmap(struct map *map)
+{
+	struct kmap *kmap = __map__kmap(map);
+
+	if (!kmap)
+		pr_err("Internal error: map__kmap with a non-kernel map\n");
+	return kmap;
+}
+
+struct maps *map__kmaps(struct map *map)
+{
+	struct kmap *kmap = map__kmap(map);
+
+	if (!kmap || !kmap->kmaps) {
+		pr_err("Internal error: map__kmaps with a non-kernel map\n");
+		return NULL;
+	}
+	return kmap->kmaps;
+}
+
+u64 map__map_ip(const struct map *map, u64 ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+u64 map__unmap_ip(const struct map *map, u64 ip)
+{
+	return ip + map->start - map->pgoff;
+}
+
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip)
+{
+	return ip;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
new file mode 100644
index 000000000..3dcfe06db
--- /dev/null
+++ b/tools/perf/util/map.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MAP_H
+#define __PERF_MAP_H
+
+#include <linux/refcount.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/types.h>
+
+struct dso;
+struct maps;
+struct machine;
+
+struct map {
+	union {
+		struct rb_node	rb_node;
+		struct list_head node;
+	};
+	u64			start;
+	u64			end;
+	bool			erange_warned:1;
+	bool			priv:1;
+	u32			prot;
+	u64			pgoff;
+	u64			reloc;
+
+	/* ip -> dso rip */
+	u64			(*map_ip)(const struct map *, u64);
+	/* dso rip -> ip */
+	u64			(*unmap_ip)(const struct map *, u64);
+
+	struct dso		*dso;
+	refcount_t		refcnt;
+	u32			flags;
+};
+
+struct kmap;
+
+struct kmap *__map__kmap(struct map *map);
+struct kmap *map__kmap(struct map *map);
+struct maps *map__kmaps(struct map *map);
+
+/* ip -> dso rip */
+u64 map__map_ip(const struct map *map, u64 ip);
+/* dso rip -> ip */
+u64 map__unmap_ip(const struct map *map, u64 ip);
+/* Returns ip */
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip);
+
+static inline size_t map__size(const struct map *map)
+{
+	return map->end - map->start;
+}
+
+/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
+u64 map__rip_2objdump(struct map *map, u64 rip);
+
+/* objdump address -> memory address */
+u64 map__objdump_2mem(struct map *map, u64 ip);
+
+struct symbol;
+struct thread;
+
+/* map__for_each_symbol - iterate over the symbols in the given map
+ *
+ * @map: the 'struct map *' in which symbols are iterated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ * Note: caller must ensure map->dso is not NULL (map is loaded).
+ */
+#define map__for_each_symbol(map, pos, n)	\
+	dso__for_each_symbol(map->dso, pos, n)
+
+/* map__for_each_symbol_with_name - iterate over the symbols in the given map
+ *                                  that have the given name
+ *
+ * @map: the 'struct map *' in which symbols are iterated
+ * @sym_name: the symbol name
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ */
+#define __map__for_each_symbol_by_name(map, sym_name, pos)	\
+	for (pos = map__find_symbol_by_name(map, sym_name);	\
+	     pos &&						\
+	     !symbol__match_symbol_name(pos->name, sym_name,	\
+					SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
+	     pos = symbol__next_by_name(pos))
+
+#define map__for_each_symbol_by_name(map, sym_name, pos)		\
+	__map__for_each_symbol_by_name(map, sym_name, (pos))
+
+void map__init(struct map *map,
+	       u64 start, u64 end, u64 pgoff, struct dso *dso);
+
+struct dso_id;
+struct build_id;
+
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+		     u64 pgoff, struct dso_id *id, u32 prot, u32 flags,
+		     struct build_id *bid, char *filename, struct thread *thread);
+struct map *map__new2(u64 start, struct dso *dso);
+void map__delete(struct map *map);
+struct map *map__clone(struct map *map);
+
+static inline struct map *map__get(struct map *map)
+{
+	if (map)
+		refcount_inc(&map->refcnt);
+	return map;
+}
+
+void map__put(struct map *map);
+
+static inline void __map__zput(struct map **map)
+{
+	map__put(*map);
+	*map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
+size_t map__fprintf(struct map *map, FILE *fp);
+size_t map__fprintf_dsoname(struct map *map, FILE *fp);
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+			 FILE *fp);
+
+int map__load(struct map *map);
+struct symbol *map__find_symbol(struct map *map, u64 addr);
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
+void map__fixup_start(struct map *map);
+void map__fixup_end(struct map *map);
+
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
+				    u64 addr);
+
+bool __map__is_kernel(const struct map *map);
+bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);
+bool __map__is_bpf_image(const struct map *map);
+bool __map__is_ool(const struct map *map);
+
+static inline bool __map__is_kmodule(const struct map *map)
+{
+	return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+	       !__map__is_bpf_prog(map) && !__map__is_ool(map) &&
+	       !__map__is_bpf_image(map);
+}
+
+bool map__has_symbols(const struct map *map);
+
+bool map__contains_symbol(const struct map *map, const struct symbol *sym);
+
+#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
+
+static inline bool is_entry_trampoline(const char *name)
+{
+	return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
+}
+
+static inline bool is_bpf_image(const char *name)
+{
+	return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
+	       strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
+}
+
+static inline int is_anon_memory(const char *filename)
+{
+	return !strcmp(filename, "//anon") ||
+	       !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+	       !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
+}
+
+static inline int is_no_dso_memory(const char *filename)
+{
+	return !strncmp(filename, "[stack", 6) ||
+	       !strncmp(filename, "/SYSV", 5)  ||
+	       !strcmp(filename, "[heap]");
+}
+#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
new file mode 100644
index 000000000..e08817b0c
--- /dev/null
+++ b/tools/perf/util/map_symbol.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_MAP_SYMBOL
+#define __PERF_MAP_SYMBOL 1
+
+#include <linux/types.h>
+
+struct maps;
+struct map;
+struct symbol;
+
+struct map_symbol {
+	struct maps   *maps;
+	struct map    *map;
+	struct symbol *sym;
+};
+
+struct addr_map_symbol {
+	struct map_symbol ms;
+	u64	      addr;
+	u64	      al_addr;
+	char	      al_level;
+	u64	      phys_addr;
+	u64	      data_page_size;
+};
+#endif // __PERF_MAP_SYMBOL
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
new file mode 100644
index 000000000..37bd5b400
--- /dev/null
+++ b/tools/perf/util/maps.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "maps.h"
+#include "thread.h"
+#include "ui/ui.h"
+#include "unwind.h"
+
+static void __maps__insert(struct maps *maps, struct map *map);
+
+static void maps__init(struct maps *maps, struct machine *machine)
+{
+	maps->entries = RB_ROOT;
+	init_rwsem(&maps->lock);
+	maps->machine = machine;
+	maps->last_search_by_name = NULL;
+	maps->nr_maps = 0;
+	maps->maps_by_name = NULL;
+	refcount_set(&maps->refcnt, 1);
+}
+
+static void __maps__free_maps_by_name(struct maps *maps)
+{
+	/*
+	 * Free everything to try to do it from the rbtree in the next search
+	 */
+	zfree(&maps->maps_by_name);
+	maps->nr_maps_allocated = 0;
+}
+
+void maps__insert(struct maps *maps, struct map *map)
+{
+	down_write(&maps->lock);
+	__maps__insert(maps, map);
+	++maps->nr_maps;
+
+	if (map->dso && map->dso->kernel) {
+		struct kmap *kmap = map__kmap(map);
+
+		if (kmap)
+			kmap->kmaps = maps;
+		else
+			pr_err("Internal error: kernel dso with non kernel map\n");
+	}
+
+
+	/*
+	 * If we already performed some search by name, then we need to add the just
+	 * inserted map and resort.
+	 */
+	if (maps->maps_by_name) {
+		if (maps->nr_maps > maps->nr_maps_allocated) {
+			int nr_allocate = maps->nr_maps * 2;
+			struct map **maps_by_name = realloc(maps->maps_by_name, nr_allocate * sizeof(map));
+
+			if (maps_by_name == NULL) {
+				__maps__free_maps_by_name(maps);
+				up_write(&maps->lock);
+				return;
+			}
+
+			maps->maps_by_name = maps_by_name;
+			maps->nr_maps_allocated = nr_allocate;
+		}
+		maps->maps_by_name[maps->nr_maps - 1] = map;
+		__maps__sort_by_name(maps);
+	}
+	up_write(&maps->lock);
+}
+
+static void __maps__remove(struct maps *maps, struct map *map)
+{
+	rb_erase_init(&map->rb_node, &maps->entries);
+	map__put(map);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+	down_write(&maps->lock);
+	if (maps->last_search_by_name == map)
+		maps->last_search_by_name = NULL;
+
+	__maps__remove(maps, map);
+	--maps->nr_maps;
+	if (maps->maps_by_name)
+		__maps__free_maps_by_name(maps);
+	up_write(&maps->lock);
+}
+
+static void __maps__purge(struct maps *maps)
+{
+	struct map *pos, *next;
+
+	maps__for_each_entry_safe(maps, pos, next) {
+		rb_erase_init(&pos->rb_node,  &maps->entries);
+		map__put(pos);
+	}
+}
+
+static void maps__exit(struct maps *maps)
+{
+	down_write(&maps->lock);
+	__maps__purge(maps);
+	up_write(&maps->lock);
+}
+
+bool maps__empty(struct maps *maps)
+{
+	return !maps__first(maps);
+}
+
+struct maps *maps__new(struct machine *machine)
+{
+	struct maps *maps = zalloc(sizeof(*maps));
+
+	if (maps != NULL)
+		maps__init(maps, machine);
+
+	return maps;
+}
+
+void maps__delete(struct maps *maps)
+{
+	maps__exit(maps);
+	unwind__finish_access(maps);
+	free(maps);
+}
+
+void maps__put(struct maps *maps)
+{
+	if (maps && refcount_dec_and_test(&maps->refcnt))
+		maps__delete(maps);
+}
+
+struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
+{
+	struct map *map = maps__find(maps, addr);
+
+	/* Ensure map is loaded before using map->map_ip */
+	if (map != NULL && map__load(map) >= 0) {
+		if (mapp != NULL)
+			*mapp = map;
+		return map__find_symbol(map, map->map_ip(map, addr));
+	}
+
+	return NULL;
+}
+
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
+{
+	struct symbol *sym;
+	struct map *pos;
+
+	down_read(&maps->lock);
+
+	maps__for_each_entry(maps, pos) {
+		sym = map__find_symbol_by_name(pos, name);
+
+		if (sym == NULL)
+			continue;
+		if (!map__contains_symbol(pos, sym)) {
+			sym = NULL;
+			continue;
+		}
+		if (mapp != NULL)
+			*mapp = pos;
+		goto out;
+	}
+
+	sym = NULL;
+out:
+	up_read(&maps->lock);
+	return sym;
+}
+
+int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
+{
+	if (ams->addr < ams->ms.map->start || ams->addr >= ams->ms.map->end) {
+		if (maps == NULL)
+			return -1;
+		ams->ms.map = maps__find(maps, ams->addr);
+		if (ams->ms.map == NULL)
+			return -1;
+	}
+
+	ams->al_addr = ams->ms.map->map_ip(ams->ms.map, ams->addr);
+	ams->ms.sym = map__find_symbol(ams->ms.map, ams->al_addr);
+
+	return ams->ms.sym ? 0 : -1;
+}
+
+size_t maps__fprintf(struct maps *maps, FILE *fp)
+{
+	size_t printed = 0;
+	struct map *pos;
+
+	down_read(&maps->lock);
+
+	maps__for_each_entry(maps, pos) {
+		printed += fprintf(fp, "Map:");
+		printed += map__fprintf(pos, fp);
+		if (verbose > 2) {
+			printed += dso__fprintf(pos->dso, fp);
+			printed += fprintf(fp, "--\n");
+		}
+	}
+
+	up_read(&maps->lock);
+
+	return printed;
+}
+
+int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
+{
+	struct rb_root *root;
+	struct rb_node *next, *first;
+	int err = 0;
+
+	down_write(&maps->lock);
+
+	root = &maps->entries;
+
+	/*
+	 * Find first map where end > map->start.
+	 * Same as find_vma() in kernel.
+	 */
+	next = root->rb_node;
+	first = NULL;
+	while (next) {
+		struct map *pos = rb_entry(next, struct map, rb_node);
+
+		if (pos->end > map->start) {
+			first = next;
+			if (pos->start <= map->start)
+				break;
+			next = next->rb_left;
+		} else
+			next = next->rb_right;
+	}
+
+	next = first;
+	while (next) {
+		struct map *pos = rb_entry(next, struct map, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		/*
+		 * Stop if current map starts after map->end.
+		 * Maps are ordered by start: next will not overlap for sure.
+		 */
+		if (pos->start >= map->end)
+			break;
+
+		if (verbose >= 2) {
+
+			if (use_browser) {
+				pr_debug("overlapping maps in %s (disable tui for more info)\n",
+					   map->dso->name);
+			} else {
+				fputs("overlapping maps:\n", fp);
+				map__fprintf(map, fp);
+				map__fprintf(pos, fp);
+			}
+		}
+
+		rb_erase_init(&pos->rb_node, root);
+		/*
+		 * Now check if we need to create new maps for areas not
+		 * overlapped by the new map:
+		 */
+		if (map->start > pos->start) {
+			struct map *before = map__clone(pos);
+
+			if (before == NULL) {
+				err = -ENOMEM;
+				goto put_map;
+			}
+
+			before->end = map->start;
+			__maps__insert(maps, before);
+			if (verbose >= 2 && !use_browser)
+				map__fprintf(before, fp);
+			map__put(before);
+		}
+
+		if (map->end < pos->end) {
+			struct map *after = map__clone(pos);
+
+			if (after == NULL) {
+				err = -ENOMEM;
+				goto put_map;
+			}
+
+			after->start = map->end;
+			after->pgoff += map->end - pos->start;
+			assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
+			__maps__insert(maps, after);
+			if (verbose >= 2 && !use_browser)
+				map__fprintf(after, fp);
+			map__put(after);
+		}
+put_map:
+		map__put(pos);
+
+		if (err)
+			goto out;
+	}
+
+	err = 0;
+out:
+	up_write(&maps->lock);
+	return err;
+}
+
+/*
+ * XXX This should not really _copy_ te maps, but refcount them.
+ */
+int maps__clone(struct thread *thread, struct maps *parent)
+{
+	struct maps *maps = thread->maps;
+	int err;
+	struct map *map;
+
+	down_read(&parent->lock);
+
+	maps__for_each_entry(parent, map) {
+		struct map *new = map__clone(map);
+
+		if (new == NULL) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		err = unwind__prepare_access(maps, new, NULL);
+		if (err)
+			goto out_unlock;
+
+		maps__insert(maps, new);
+		map__put(new);
+	}
+
+	err = 0;
+out_unlock:
+	up_read(&parent->lock);
+	return err;
+}
+
+static void __maps__insert(struct maps *maps, struct map *map)
+{
+	struct rb_node **p = &maps->entries.rb_node;
+	struct rb_node *parent = NULL;
+	const u64 ip = map->start;
+	struct map *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node);
+		if (ip < m->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&map->rb_node, parent, p);
+	rb_insert_color(&map->rb_node, &maps->entries);
+	map__get(map);
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+	struct rb_node *p;
+	struct map *m;
+
+	down_read(&maps->lock);
+
+	p = maps->entries.rb_node;
+	while (p != NULL) {
+		m = rb_entry(p, struct map, rb_node);
+		if (ip < m->start)
+			p = p->rb_left;
+		else if (ip >= m->end)
+			p = p->rb_right;
+		else
+			goto out;
+	}
+
+	m = NULL;
+out:
+	up_read(&maps->lock);
+	return m;
+}
+
+struct map *maps__first(struct maps *maps)
+{
+	struct rb_node *first = rb_first(&maps->entries);
+
+	if (first)
+		return rb_entry(first, struct map, rb_node);
+	return NULL;
+}
diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h
new file mode 100644
index 000000000..7e729ff42
--- /dev/null
+++ b/tools/perf/util/maps.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MAPS_H
+#define __PERF_MAPS_H
+
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include "rwsem.h"
+
+struct ref_reloc_sym;
+struct machine;
+struct map;
+struct maps;
+struct thread;
+
+struct map *maps__find(struct maps *maps, u64 addr);
+struct map *maps__first(struct maps *maps);
+struct map *map__next(struct map *map);
+
+#define maps__for_each_entry(maps, map) \
+	for (map = maps__first(maps); map; map = map__next(map))
+
+#define maps__for_each_entry_safe(maps, map, next) \
+	for (map = maps__first(maps), next = map__next(map); map; map = next, next = map__next(map))
+
+struct maps {
+	struct rb_root      entries;
+	struct rw_semaphore lock;
+	struct machine	 *machine;
+	struct map	 *last_search_by_name;
+	struct map	 **maps_by_name;
+	refcount_t	 refcnt;
+	unsigned int	 nr_maps;
+	unsigned int	 nr_maps_allocated;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+	void				*addr_space;
+	struct unwind_libunwind_ops	*unwind_libunwind_ops;
+#endif
+};
+
+#define KMAP_NAME_LEN 256
+
+struct kmap {
+	struct ref_reloc_sym *ref_reloc_sym;
+	struct maps	     *kmaps;
+	char		     name[KMAP_NAME_LEN];
+};
+
+struct maps *maps__new(struct machine *machine);
+void maps__delete(struct maps *maps);
+bool maps__empty(struct maps *maps);
+
+static inline struct maps *maps__get(struct maps *maps)
+{
+	if (maps)
+		refcount_inc(&maps->refcnt);
+	return maps;
+}
+
+void maps__put(struct maps *maps);
+int maps__clone(struct thread *thread, struct maps *parent);
+size_t maps__fprintf(struct maps *maps, FILE *fp);
+
+void maps__insert(struct maps *maps, struct map *map);
+
+void maps__remove(struct maps *maps, struct map *map);
+
+struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp);
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp);
+
+struct addr_map_symbol;
+
+int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams);
+
+int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp);
+
+struct map *maps__find_by_name(struct maps *maps, const char *name);
+
+int maps__merge_in(struct maps *kmaps, struct map *new_map);
+
+void __maps__sort_by_name(struct maps *maps);
+
+#endif // __PERF_MAPS_H
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
new file mode 100644
index 000000000..b3a910930
--- /dev/null
+++ b/tools/perf/util/mem-events.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include <linux/kernel.h>
+#include "map_symbol.h"
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+#include "pmu.h"
+#include "pmu-hybrid.h"
+
+unsigned int perf_mem_events__loads_ldlat = 30;
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"cpu/events/mem-loads"),
+	E("ldlat-stores",	"cpu/mem-stores/P",		"cpu/events/mem-stores"),
+	E(NULL,			NULL,				NULL),
+};
+#undef E
+
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
+struct perf_mem_event * __weak perf_mem_events__ptr(int i)
+{
+	if (i >= PERF_MEM_EVENTS__MAX)
+		return NULL;
+
+	return &perf_mem_events[i];
+}
+
+char * __weak perf_mem_events__name(int i, char *pmu_name  __maybe_unused)
+{
+	struct perf_mem_event *e = perf_mem_events__ptr(i);
+
+	if (!e)
+		return NULL;
+
+	if (i == PERF_MEM_EVENTS__LOAD) {
+		if (!mem_loads_name__init) {
+			mem_loads_name__init = true;
+			scnprintf(mem_loads_name, sizeof(mem_loads_name),
+				  e->name, perf_mem_events__loads_ldlat);
+		}
+		return mem_loads_name;
+	}
+
+	return (char *)e->name;
+}
+
+__weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
+{
+	return false;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+	char *tok, *saveptr = NULL;
+	bool found = false;
+	char *buf;
+	int j;
+
+	/* We need buffer that we know we can write to. */
+	buf = malloc(strlen(str) + 1);
+	if (!buf)
+		return -ENOMEM;
+
+	strcpy(buf, str);
+
+	tok = strtok_r((char *)buf, ",", &saveptr);
+
+	while (tok) {
+		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+			struct perf_mem_event *e = perf_mem_events__ptr(j);
+
+			if (!e->tag)
+				continue;
+
+			if (strstr(e->tag, tok))
+				e->record = found = true;
+		}
+
+		tok = strtok_r(NULL, ",", &saveptr);
+	}
+
+	free(buf);
+
+	if (found)
+		return 0;
+
+	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+	return -1;
+}
+
+static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
+{
+	char path[PATH_MAX];
+	struct stat st;
+
+	scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
+	return !stat(path, &st);
+}
+
+int perf_mem_events__init(void)
+{
+	const char *mnt = sysfs__mount();
+	bool found = false;
+	int j;
+
+	if (!mnt)
+		return -ENOENT;
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		struct perf_mem_event *e = perf_mem_events__ptr(j);
+		struct perf_pmu *pmu;
+		char sysfs_name[100];
+
+		/*
+		 * If the event entry isn't valid, skip initialization
+		 * and "e->supported" will keep false.
+		 */
+		if (!e->tag)
+			continue;
+
+		if (!perf_pmu__has_hybrid()) {
+			scnprintf(sysfs_name, sizeof(sysfs_name),
+				  e->sysfs_name, "cpu");
+			e->supported = perf_mem_event__supported(mnt, sysfs_name);
+		} else {
+			perf_pmu__for_each_hybrid_pmu(pmu) {
+				scnprintf(sysfs_name, sizeof(sysfs_name),
+					  e->sysfs_name, pmu->name);
+				e->supported |= perf_mem_event__supported(mnt, sysfs_name);
+			}
+		}
+
+		if (e->supported)
+			found = true;
+	}
+
+	return found ? 0 : -ENOENT;
+}
+
+void perf_mem_events__list(void)
+{
+	int j;
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		struct perf_mem_event *e = perf_mem_events__ptr(j);
+
+		fprintf(stderr, "%-*s%-*s%s",
+			e->tag ? 13 : 0,
+			e->tag ? : "",
+			e->tag && verbose > 0 ? 25 : 0,
+			e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "",
+			e->supported ? ": available\n" : "");
+	}
+}
+
+static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
+						    int idx)
+{
+	const char *mnt = sysfs__mount();
+	char sysfs_name[100];
+	struct perf_pmu *pmu;
+
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
+			  pmu->name);
+		if (!perf_mem_event__supported(mnt, sysfs_name)) {
+			pr_err("failed: event '%s' not supported\n",
+			       perf_mem_events__name(idx, pmu->name));
+		}
+	}
+}
+
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+				 char **rec_tmp, int *tmp_nr)
+{
+	int i = *argv_nr, k = 0;
+	struct perf_mem_event *e;
+	struct perf_pmu *pmu;
+	char *s;
+
+	for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		e = perf_mem_events__ptr(j);
+		if (!e->record)
+			continue;
+
+		if (!perf_pmu__has_hybrid()) {
+			if (!e->supported) {
+				pr_err("failed: event '%s' not supported\n",
+				       perf_mem_events__name(j, NULL));
+				return -1;
+			}
+
+			rec_argv[i++] = "-e";
+			rec_argv[i++] = perf_mem_events__name(j, NULL);
+		} else {
+			if (!e->supported) {
+				perf_mem_events__print_unsupport_hybrid(e, j);
+				return -1;
+			}
+
+			perf_pmu__for_each_hybrid_pmu(pmu) {
+				rec_argv[i++] = "-e";
+				s = perf_mem_events__name(j, pmu->name);
+				if (s) {
+					s = strdup(s);
+					if (!s)
+						return -1;
+
+					rec_argv[i++] = s;
+					rec_tmp[k++] = s;
+				}
+			}
+		}
+	}
+
+	*argv_nr = i;
+	*tmp_nr = k;
+	return 0;
+}
+
+static const char * const tlb_access[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"L2",
+	"Walker",
+	"Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t l = 0, i;
+	u64 m = PERF_MEM_TLB_NA;
+	u64 hit, miss;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_dtlb;
+
+	hit = m & PERF_MEM_TLB_HIT;
+	miss = m & PERF_MEM_TLB_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, tlb_access[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const mem_lvl[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"LFB/MAB",
+	"L2",
+	"L3",
+	"Local RAM",
+	"Remote RAM (1 hop)",
+	"Remote RAM (2 hops)",
+	"Remote Cache (1 hop)",
+	"Remote Cache (2 hops)",
+	"I/O",
+	"Uncached",
+};
+
+static const char * const mem_lvlnum[] = {
+	[PERF_MEM_LVLNUM_CXL] = "CXL",
+	[PERF_MEM_LVLNUM_IO] = "I/O",
+	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
+	[PERF_MEM_LVLNUM_LFB] = "LFB/MAB",
+	[PERF_MEM_LVLNUM_RAM] = "RAM",
+	[PERF_MEM_LVLNUM_PMEM] = "PMEM",
+	[PERF_MEM_LVLNUM_NA] = "N/A",
+};
+
+static const char * const mem_hops[] = {
+	"N/A",
+	/*
+	 * While printing, 'Remote' will be added to represent
+	 * 'Remote core, same node' accesses as remote field need
+	 * to be set with mem_hops field.
+	 */
+	"core, same node",
+	"node, same socket",
+	"socket, same board",
+	"board",
+};
+
+static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	u64 op = PERF_MEM_LOCK_NA;
+	int l;
+
+	if (mem_info)
+		op = mem_info->data_src.mem_op;
+
+	if (op & PERF_MEM_OP_NA)
+		l = scnprintf(out, sz, "N/A");
+	else if (op & PERF_MEM_OP_LOAD)
+		l = scnprintf(out, sz, "LOAD");
+	else if (op & PERF_MEM_OP_STORE)
+		l = scnprintf(out, sz, "STORE");
+	else if (op & PERF_MEM_OP_PFETCH)
+		l = scnprintf(out, sz, "PFETCH");
+	else if (op & PERF_MEM_OP_EXEC)
+		l = scnprintf(out, sz, "EXEC");
+	else
+		l = scnprintf(out, sz, "No");
+
+	return l;
+}
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m =  PERF_MEM_LVL_NA;
+	u64 hit, miss;
+	int printed = 0;
+
+	if (mem_info)
+		m  = mem_info->data_src.mem_lvl;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	hit = m & PERF_MEM_LVL_HIT;
+	miss = m & PERF_MEM_LVL_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+	if (mem_info && mem_info->data_src.mem_remote) {
+		strcat(out, "Remote ");
+		l += 7;
+	}
+
+	/*
+	 * Incase mem_hops field is set, we can skip printing data source via
+	 * PERF_MEM_LVL namespace.
+	 */
+	if (mem_info && mem_info->data_src.mem_hops) {
+		l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
+	} else {
+		for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+			if (!(m & 0x1))
+				continue;
+			if (printed++) {
+				strcat(out, " or ");
+				l += 4;
+			}
+			l += scnprintf(out + l, sz - l, mem_lvl[i]);
+		}
+	}
+
+	if (mem_info && mem_info->data_src.mem_lvl_num) {
+		int lvl = mem_info->data_src.mem_lvl_num;
+		if (printed++) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		if (mem_lvlnum[lvl])
+			l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
+		else
+			l += scnprintf(out + l, sz - l, "L%d", lvl);
+	}
+
+	if (l == 0)
+		l += scnprintf(out + l, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const snoop_access[] = {
+	"N/A",
+	"None",
+	"Hit",
+	"Miss",
+	"HitM",
+};
+
+static const char * const snoopx_access[] = {
+	"Fwd",
+	"Peer",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m = PERF_MEM_SNOOP_NA;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_snoop;
+
+	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, snoop_access[i]);
+	}
+
+	m = 0;
+	if (mem_info)
+		m = mem_info->data_src.mem_snoopx;
+
+	for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, snoopx_access[i]);
+	}
+
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+
+	return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	u64 mask = PERF_MEM_LOCK_NA;
+	int l;
+
+	if (mem_info)
+		mask = mem_info->data_src.mem_lock;
+
+	if (mask & PERF_MEM_LOCK_NA)
+		l = scnprintf(out, sz, "N/A");
+	else if (mask & PERF_MEM_LOCK_LOCKED)
+		l = scnprintf(out, sz, "Yes");
+	else
+		l = scnprintf(out, sz, "No");
+
+	return l;
+}
+
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t l = 0;
+	u64 mask = PERF_MEM_BLK_NA;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		mask = mem_info->data_src.mem_blk;
+
+	if (!mask || (mask & PERF_MEM_BLK_NA)) {
+		l += scnprintf(out + l, sz - l, " N/A");
+		return l;
+	}
+	if (mask & PERF_MEM_BLK_DATA)
+		l += scnprintf(out + l, sz - l, " Data");
+	if (mask & PERF_MEM_BLK_ADDR)
+		l += scnprintf(out + l, sz - l, " Addr");
+
+	return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	int i = 0;
+
+	i += scnprintf(out, sz, "|OP ");
+	i += perf_mem__op_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|LVL ");
+	i += perf_mem__lvl_scnprintf(out + i, sz, mem_info);
+	i += scnprintf(out + i, sz - i, "|SNP ");
+	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|TLB ");
+	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|LCK ");
+	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|BLK ");
+	i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
+
+	return i;
+}
+
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
+{
+	union perf_mem_data_src *data_src = &mi->data_src;
+	u64 daddr  = mi->daddr.addr;
+	u64 op     = data_src->mem_op;
+	u64 lvl    = data_src->mem_lvl;
+	u64 snoop  = data_src->mem_snoop;
+	u64 snoopx = data_src->mem_snoopx;
+	u64 lock   = data_src->mem_lock;
+	u64 blk    = data_src->mem_blk;
+	/*
+	 * Skylake might report unknown remote level via this
+	 * bit, consider it when evaluating remote HITMs.
+	 *
+	 * Incase of power, remote field can also be used to denote cache
+	 * accesses from the another core of same node. Hence, setting
+	 * mrem only when HOPS is zero along with set remote field.
+	 */
+	bool mrem  = (data_src->mem_remote && !data_src->mem_hops);
+	int err = 0;
+
+#define HITM_INC(__f)		\
+do {				\
+	stats->__f++;		\
+	stats->tot_hitm++;	\
+} while (0)
+
+#define PEER_INC(__f)		\
+do {				\
+	stats->__f++;		\
+	stats->tot_peer++;	\
+} while (0)
+
+#define P(a, b) PERF_MEM_##a##_##b
+
+	stats->nr_entries++;
+
+	if (lock & P(LOCK, LOCKED)) stats->locks++;
+
+	if (blk & P(BLK, DATA)) stats->blk_data++;
+	if (blk & P(BLK, ADDR)) stats->blk_addr++;
+
+	if (op & P(OP, LOAD)) {
+		/* load */
+		stats->load++;
+
+		if (!daddr) {
+			stats->ld_noadrs++;
+			return -1;
+		}
+
+		if (lvl & P(LVL, HIT)) {
+			if (lvl & P(LVL, UNC)) stats->ld_uncache++;
+			if (lvl & P(LVL, IO))  stats->ld_io++;
+			if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
+			if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
+			if (lvl & P(LVL, L2)) {
+				stats->ld_l2hit++;
+
+				if (snoopx & P(SNOOPX, PEER))
+					PEER_INC(lcl_peer);
+			}
+			if (lvl & P(LVL, L3 )) {
+				if (snoop & P(SNOOP, HITM))
+					HITM_INC(lcl_hitm);
+				else
+					stats->ld_llchit++;
+
+				if (snoopx & P(SNOOPX, PEER))
+					PEER_INC(lcl_peer);
+			}
+
+			if (lvl & P(LVL, LOC_RAM)) {
+				stats->lcl_dram++;
+				if (snoop & P(SNOOP, HIT))
+					stats->ld_shared++;
+				else
+					stats->ld_excl++;
+			}
+
+			if ((lvl & P(LVL, REM_RAM1)) ||
+			    (lvl & P(LVL, REM_RAM2)) ||
+			     mrem) {
+				stats->rmt_dram++;
+				if (snoop & P(SNOOP, HIT))
+					stats->ld_shared++;
+				else
+					stats->ld_excl++;
+			}
+		}
+
+		if ((lvl & P(LVL, REM_CCE1)) ||
+		    (lvl & P(LVL, REM_CCE2)) ||
+		     mrem) {
+			if (snoop & P(SNOOP, HIT)) {
+				stats->rmt_hit++;
+			} else if (snoop & P(SNOOP, HITM)) {
+				HITM_INC(rmt_hitm);
+			} else if (snoopx & P(SNOOPX, PEER)) {
+				stats->rmt_hit++;
+				PEER_INC(rmt_peer);
+			}
+		}
+
+		if ((lvl & P(LVL, MISS)))
+			stats->ld_miss++;
+
+	} else if (op & P(OP, STORE)) {
+		/* store */
+		stats->store++;
+
+		if (!daddr) {
+			stats->st_noadrs++;
+			return -1;
+		}
+
+		if (lvl & P(LVL, HIT)) {
+			if (lvl & P(LVL, UNC)) stats->st_uncache++;
+			if (lvl & P(LVL, L1 )) stats->st_l1hit++;
+		}
+		if (lvl & P(LVL, MISS))
+			if (lvl & P(LVL, L1)) stats->st_l1miss++;
+		if (lvl & P(LVL, NA))
+			stats->st_na++;
+	} else {
+		/* unparsable data_src? */
+		stats->noparse++;
+		return -1;
+	}
+
+	if (!mi->daddr.ms.map || !mi->iaddr.ms.map) {
+		stats->nomap++;
+		return -1;
+	}
+
+#undef P
+#undef HITM_INC
+	return err;
+}
+
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
+{
+	stats->nr_entries	+= add->nr_entries;
+
+	stats->locks		+= add->locks;
+	stats->store		+= add->store;
+	stats->st_uncache	+= add->st_uncache;
+	stats->st_noadrs	+= add->st_noadrs;
+	stats->st_l1hit		+= add->st_l1hit;
+	stats->st_l1miss	+= add->st_l1miss;
+	stats->st_na		+= add->st_na;
+	stats->load		+= add->load;
+	stats->ld_excl		+= add->ld_excl;
+	stats->ld_shared	+= add->ld_shared;
+	stats->ld_uncache	+= add->ld_uncache;
+	stats->ld_io		+= add->ld_io;
+	stats->ld_miss		+= add->ld_miss;
+	stats->ld_noadrs	+= add->ld_noadrs;
+	stats->ld_fbhit		+= add->ld_fbhit;
+	stats->ld_l1hit		+= add->ld_l1hit;
+	stats->ld_l2hit		+= add->ld_l2hit;
+	stats->ld_llchit	+= add->ld_llchit;
+	stats->lcl_hitm		+= add->lcl_hitm;
+	stats->rmt_hitm		+= add->rmt_hitm;
+	stats->tot_hitm		+= add->tot_hitm;
+	stats->lcl_peer		+= add->lcl_peer;
+	stats->rmt_peer		+= add->rmt_peer;
+	stats->tot_peer		+= add->tot_peer;
+	stats->rmt_hit		+= add->rmt_hit;
+	stats->lcl_dram		+= add->lcl_dram;
+	stats->rmt_dram		+= add->rmt_dram;
+	stats->blk_data		+= add->blk_data;
+	stats->blk_addr		+= add->blk_addr;
+	stats->nomap		+= add->nomap;
+	stats->noparse		+= add->noparse;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
new file mode 100644
index 000000000..12372309d
--- /dev/null
+++ b/tools/perf/util/mem-events.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/refcount.h>
+#include <linux/perf_event.h>
+#include "stat.h"
+#include "evsel.h"
+
+struct perf_mem_event {
+	bool		record;
+	bool		supported;
+	const char	*tag;
+	const char	*name;
+	const char	*sysfs_name;
+};
+
+struct mem_info {
+	struct addr_map_symbol	iaddr;
+	struct addr_map_symbol	daddr;
+	union perf_mem_data_src	data_src;
+	refcount_t		refcnt;
+};
+
+enum {
+	PERF_MEM_EVENTS__LOAD,
+	PERF_MEM_EVENTS__STORE,
+	PERF_MEM_EVENTS__LOAD_STORE,
+	PERF_MEM_EVENTS__MAX,
+};
+
+extern unsigned int perf_mem_events__loads_ldlat;
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i, char *pmu_name);
+struct perf_mem_event *perf_mem_events__ptr(int i);
+bool is_mem_loads_aux_event(struct evsel *leader);
+
+void perf_mem_events__list(void);
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+				 char **rec_tmp, int *tmp_nr);
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+struct c2c_stats {
+	u32	nr_entries;
+
+	u32	locks;               /* count of 'lock' transactions */
+	u32	store;               /* count of all stores in trace */
+	u32	st_uncache;          /* stores to uncacheable address */
+	u32	st_noadrs;           /* cacheable store with no address */
+	u32	st_l1hit;            /* count of stores that hit L1D */
+	u32	st_l1miss;           /* count of stores that miss L1D */
+	u32	st_na;               /* count of stores with memory level is not available */
+	u32	load;                /* count of all loads in trace */
+	u32	ld_excl;             /* exclusive loads, rmt/lcl DRAM - snp none/miss */
+	u32	ld_shared;           /* shared loads, rmt/lcl DRAM - snp hit */
+	u32	ld_uncache;          /* loads to uncacheable address */
+	u32	ld_io;               /* loads to io address */
+	u32	ld_miss;             /* loads miss */
+	u32	ld_noadrs;           /* cacheable load with no address */
+	u32	ld_fbhit;            /* count of loads hitting Fill Buffer */
+	u32	ld_l1hit;            /* count of loads that hit L1D */
+	u32	ld_l2hit;            /* count of loads that hit L2D */
+	u32	ld_llchit;           /* count of loads that hit LLC */
+	u32	lcl_hitm;            /* count of loads with local HITM  */
+	u32	rmt_hitm;            /* count of loads with remote HITM */
+	u32	tot_hitm;            /* count of loads with local and remote HITM */
+	u32	lcl_peer;            /* count of loads with local peer cache */
+	u32	rmt_peer;            /* count of loads with remote peer cache */
+	u32	tot_peer;            /* count of loads with local and remote peer cache */
+	u32	rmt_hit;             /* count of loads with remote hit clean; */
+	u32	lcl_dram;            /* count of loads miss to local DRAM */
+	u32	rmt_dram;            /* count of loads miss to remote DRAM */
+	u32	blk_data;            /* count of loads blocked by data */
+	u32	blk_addr;            /* count of loads blocked by address conflict */
+	u32	nomap;               /* count of load/stores with no phys addrs */
+	u32	noparse;             /* count of unparsable data sources */
+};
+
+struct hist_entry;
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
+
+#endif /* __PERF_MEM_EVENTS_H */
diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
new file mode 100644
index 000000000..03a7d7b27
--- /dev/null
+++ b/tools/perf/util/mem2node.c
@@ -0,0 +1,139 @@
+#include <errno.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "env.h"
+#include "mem2node.h"
+
+struct phys_entry {
+	struct rb_node	rb_node;
+	u64	start;
+	u64	end;
+	u64	node;
+};
+
+static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct phys_entry *e;
+
+	while (*p != NULL) {
+		parent = *p;
+		e = rb_entry(parent, struct phys_entry, rb_node);
+
+		if (entry->start < e->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&entry->rb_node, parent, p);
+	rb_insert_color(&entry->rb_node, root);
+}
+
+static void
+phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node)
+{
+	entry->start = start;
+	entry->end   = start + bsize;
+	entry->node  = node;
+	RB_CLEAR_NODE(&entry->rb_node);
+}
+
+int mem2node__init(struct mem2node *map, struct perf_env *env)
+{
+	struct memory_node *n, *nodes = &env->memory_nodes[0];
+	struct phys_entry *entries, *tmp_entries;
+	u64 bsize = env->memory_bsize;
+	int i, j = 0, max = 0;
+
+	memset(map, 0x0, sizeof(*map));
+	map->root = RB_ROOT;
+
+	for (i = 0; i < env->nr_memory_nodes; i++) {
+		n = &nodes[i];
+		max += bitmap_weight(n->set, n->size);
+	}
+
+	entries = zalloc(sizeof(*entries) * max);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < env->nr_memory_nodes; i++) {
+		u64 bit;
+
+		n = &nodes[i];
+
+		for (bit = 0; bit < n->size; bit++) {
+			u64 start;
+
+			if (!test_bit(bit, n->set))
+				continue;
+
+			start = bit * bsize;
+
+			/*
+			 * Merge nearby areas, we walk in order
+			 * through the bitmap, so no need to sort.
+			 */
+			if (j > 0) {
+				struct phys_entry *prev = &entries[j - 1];
+
+				if ((prev->end == start) &&
+				    (prev->node == n->node)) {
+					prev->end += bsize;
+					continue;
+				}
+			}
+
+			phys_entry__init(&entries[j++], start, bsize, n->node);
+		}
+	}
+
+	/* Cut unused entries, due to merging. */
+	tmp_entries = realloc(entries, sizeof(*entries) * j);
+	if (tmp_entries ||
+	    WARN_ONCE(j == 0, "No memory nodes, is CONFIG_MEMORY_HOTPLUG enabled?\n"))
+		entries = tmp_entries;
+
+	for (i = 0; i < j; i++) {
+		pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n",
+			 entries[i].node, entries[i].start, entries[i].end);
+
+		phys_entry__insert(&entries[i], &map->root);
+	}
+
+	map->entries = entries;
+	return 0;
+}
+
+void mem2node__exit(struct mem2node *map)
+{
+	zfree(&map->entries);
+}
+
+int mem2node__node(struct mem2node *map, u64 addr)
+{
+	struct rb_node **p, *parent = NULL;
+	struct phys_entry *entry;
+
+	p = &map->root.rb_node;
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct phys_entry, rb_node);
+		if (addr < entry->start)
+			p = &(*p)->rb_left;
+		else if (addr >= entry->end)
+			p = &(*p)->rb_right;
+		else
+			goto out;
+	}
+
+	entry = NULL;
+out:
+	return entry ? (int) entry->node : -1;
+}
diff --git a/tools/perf/util/mem2node.h b/tools/perf/util/mem2node.h
new file mode 100644
index 000000000..8dfa2b58d
--- /dev/null
+++ b/tools/perf/util/mem2node.h
@@ -0,0 +1,20 @@
+#ifndef __MEM2NODE_H
+#define __MEM2NODE_H
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct perf_env;
+struct phys_entry;
+
+struct mem2node {
+	struct rb_root		 root;
+	struct phys_entry	*entries;
+	int			 cnt;
+};
+
+int  mem2node__init(struct mem2node *map, struct perf_env *env);
+void mem2node__exit(struct mem2node *map);
+int  mem2node__node(struct mem2node *map, u64 addr);
+
+#endif /* __MEM2NODE_H */
diff --git a/tools/perf/util/memswap.c b/tools/perf/util/memswap.c
new file mode 100644
index 000000000..c1317e498
--- /dev/null
+++ b/tools/perf/util/memswap.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <byteswap.h>
+#include "memswap.h"
+#include <linux/types.h>
+
+void mem_bswap_32(void *src, int byte_size)
+{
+	u32 *m = src;
+	while (byte_size > 0) {
+		*m = bswap_32(*m);
+		byte_size -= sizeof(u32);
+		++m;
+	}
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+	u64 *m = src;
+
+	while (byte_size > 0) {
+		*m = bswap_64(*m);
+		byte_size -= sizeof(u64);
+		++m;
+	}
+}
diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h
new file mode 100644
index 000000000..2c38e8c2d
--- /dev/null
+++ b/tools/perf/util/memswap.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_MEMSWAP_H_
+#define PERF_MEMSWAP_H_
+
+#include <linux/types.h>
+
+union u64_swap {
+	u64 val64;
+	u32 val32[2];
+};
+
+void mem_bswap_64(void *src, int byte_size);
+void mem_bswap_32(void *src, int byte_size);
+
+#endif /* PERF_MEMSWAP_H_ */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
new file mode 100644
index 000000000..4c98ac29e
--- /dev/null
+++ b/tools/perf/util/metricgroup.c
@@ -0,0 +1,1830 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017, Intel Corporation.
+ */
+
+/* Manage metrics and groups of metrics from JSON files */
+
+#include "metricgroup.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "strbuf.h"
+#include "pmu.h"
+#include "pmu-hybrid.h"
+#include "expr.h"
+#include "rblist.h"
+#include <string.h>
+#include <errno.h>
+#include "strlist.h"
+#include <assert.h>
+#include <linux/ctype.h>
+#include <linux/list_sort.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <perf/cpumap.h>
+#include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
+#include "util.h"
+#include <asm/bug.h>
+#include "cgroup.h"
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+					 struct evsel *evsel,
+					 bool create)
+{
+	struct rb_node *nd;
+	struct metric_event me = {
+		.evsel = evsel
+	};
+
+	if (!metric_events)
+		return NULL;
+
+	nd = rblist__find(metric_events, &me);
+	if (nd)
+		return container_of(nd, struct metric_event, nd);
+	if (create) {
+		rblist__add_node(metric_events, &me);
+		nd = rblist__find(metric_events, &me);
+		if (nd)
+			return container_of(nd, struct metric_event, nd);
+	}
+	return NULL;
+}
+
+static int metric_event_cmp(struct rb_node *rb_node, const void *entry)
+{
+	struct metric_event *a = container_of(rb_node,
+					      struct metric_event,
+					      nd);
+	const struct metric_event *b = entry;
+
+	if (a->evsel == b->evsel)
+		return 0;
+	if ((char *)a->evsel < (char *)b->evsel)
+		return -1;
+	return +1;
+}
+
+static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
+					const void *entry)
+{
+	struct metric_event *me = malloc(sizeof(struct metric_event));
+
+	if (!me)
+		return NULL;
+	memcpy(me, entry, sizeof(struct metric_event));
+	me->evsel = ((struct metric_event *)entry)->evsel;
+	INIT_LIST_HEAD(&me->head);
+	return &me->nd;
+}
+
+static void metric_event_delete(struct rblist *rblist __maybe_unused,
+				struct rb_node *rb_node)
+{
+	struct metric_event *me = container_of(rb_node, struct metric_event, nd);
+	struct metric_expr *expr, *tmp;
+
+	list_for_each_entry_safe(expr, tmp, &me->head, nd) {
+		free((char *)expr->metric_name);
+		free(expr->metric_refs);
+		free(expr->metric_events);
+		free(expr);
+	}
+
+	free(me);
+}
+
+static void metricgroup__rblist_init(struct rblist *metric_events)
+{
+	rblist__init(metric_events);
+	metric_events->node_cmp = metric_event_cmp;
+	metric_events->node_new = metric_event_new;
+	metric_events->node_delete = metric_event_delete;
+}
+
+void metricgroup__rblist_exit(struct rblist *metric_events)
+{
+	rblist__exit(metric_events);
+}
+
+/**
+ * The metric under construction. The data held here will be placed in a
+ * metric_expr.
+ */
+struct metric {
+	struct list_head nd;
+	/**
+	 * The expression parse context importantly holding the IDs contained
+	 * within the expression.
+	 */
+	struct expr_parse_ctx *pctx;
+	/** The name of the metric such as "IPC". */
+	const char *metric_name;
+	/** Modifier on the metric such as "u" or NULL for none. */
+	const char *modifier;
+	/** The expression to parse, for example, "instructions/cycles". */
+	const char *metric_expr;
+	/**
+	 * The "ScaleUnit" that scales and adds a unit to the metric during
+	 * output.
+	 */
+	const char *metric_unit;
+	/** Optional null terminated array of referenced metrics. */
+	struct metric_ref *metric_refs;
+	/**
+	 * Is there a constraint on the group of events? In which case the
+	 * events won't be grouped.
+	 */
+	bool has_constraint;
+	/**
+	 * Parsed events for the metric. Optional as events may be taken from a
+	 * different metric whose group contains all the IDs necessary for this
+	 * one.
+	 */
+	struct evlist *evlist;
+};
+
+static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
+{
+	static bool violate_nmi_constraint;
+
+	if (!foot) {
+		pr_warning("Splitting metric group %s into standalone metrics.\n", name);
+		violate_nmi_constraint = true;
+		return;
+	}
+
+	if (!violate_nmi_constraint)
+		return;
+
+	pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n"
+		   "    echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+		   "    perf stat ...\n"
+		   "    echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+static bool metricgroup__has_constraint(const struct pmu_event *pe)
+{
+	if (!pe->metric_constraint)
+		return false;
+
+	if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") &&
+	    sysctl__nmi_watchdog_enabled()) {
+		metricgroup___watchdog_constraint_hint(pe->metric_name, false);
+		return true;
+	}
+
+	return false;
+}
+
+static void metric__free(struct metric *m)
+{
+	if (!m)
+		return;
+
+	free(m->metric_refs);
+	expr__ctx_free(m->pctx);
+	free((char *)m->modifier);
+	evlist__delete(m->evlist);
+	free(m);
+}
+
+static struct metric *metric__new(const struct pmu_event *pe,
+				  const char *modifier,
+				  bool metric_no_group,
+				  int runtime,
+				  const char *user_requested_cpu_list,
+				  bool system_wide)
+{
+	struct metric *m;
+
+	m = zalloc(sizeof(*m));
+	if (!m)
+		return NULL;
+
+	m->pctx = expr__ctx_new();
+	if (!m->pctx)
+		goto out_err;
+
+	m->metric_name = pe->metric_name;
+	m->modifier = NULL;
+	if (modifier) {
+		m->modifier = strdup(modifier);
+		if (!m->modifier)
+			goto out_err;
+	}
+	m->metric_expr = pe->metric_expr;
+	m->metric_unit = pe->unit;
+	m->pctx->sctx.user_requested_cpu_list = NULL;
+	if (user_requested_cpu_list) {
+		m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list);
+		if (!m->pctx->sctx.user_requested_cpu_list)
+			goto out_err;
+	}
+	m->pctx->sctx.runtime = runtime;
+	m->pctx->sctx.system_wide = system_wide;
+	m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+	m->metric_refs = NULL;
+	m->evlist = NULL;
+
+	return m;
+out_err:
+	metric__free(m);
+	return NULL;
+}
+
+static bool contains_metric_id(struct evsel **metric_events, int num_events,
+			       const char *metric_id)
+{
+	int i;
+
+	for (i = 0; i < num_events; i++) {
+		if (!strcmp(evsel__metric_id(metric_events[i]), metric_id))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * setup_metric_events - Find a group of events in metric_evlist that correspond
+ *                       to the IDs from a parsed metric expression.
+ * @ids: the metric IDs to match.
+ * @metric_evlist: the list of perf events.
+ * @out_metric_events: holds the created metric events array.
+ */
+static int setup_metric_events(struct hashmap *ids,
+			       struct evlist *metric_evlist,
+			       struct evsel ***out_metric_events)
+{
+	struct evsel **metric_events;
+	const char *metric_id;
+	struct evsel *ev;
+	size_t ids_size, matched_events, i;
+
+	*out_metric_events = NULL;
+	ids_size = hashmap__size(ids);
+
+	metric_events = calloc(sizeof(void *), ids_size + 1);
+	if (!metric_events)
+		return -ENOMEM;
+
+	matched_events = 0;
+	evlist__for_each_entry(metric_evlist, ev) {
+		struct expr_id_data *val_ptr;
+
+		/*
+		 * Check for duplicate events with the same name. For
+		 * example, uncore_imc/cas_count_read/ will turn into 6
+		 * events per socket on skylakex. Only the first such
+		 * event is placed in metric_events.
+		 */
+		metric_id = evsel__metric_id(ev);
+		if (contains_metric_id(metric_events, matched_events, metric_id))
+			continue;
+		/*
+		 * Does this event belong to the parse context? For
+		 * combined or shared groups, this metric may not care
+		 * about this event.
+		 */
+		if (hashmap__find(ids, metric_id, (void **)&val_ptr)) {
+			metric_events[matched_events++] = ev;
+
+			if (matched_events >= ids_size)
+				break;
+		}
+	}
+	if (matched_events < ids_size) {
+		free(metric_events);
+		return -EINVAL;
+	}
+	for (i = 0; i < ids_size; i++) {
+		ev = metric_events[i];
+		ev->collect_stat = true;
+
+		/*
+		 * The metric leader points to the identically named
+		 * event in metric_events.
+		 */
+		ev->metric_leader = ev;
+		/*
+		 * Mark two events with identical names in the same
+		 * group (or globally) as being in use as uncore events
+		 * may be duplicated for each pmu. Set the metric leader
+		 * of such events to be the event that appears in
+		 * metric_events.
+		 */
+		metric_id = evsel__metric_id(ev);
+		evlist__for_each_entry_continue(metric_evlist, ev) {
+			if (!strcmp(evsel__metric_id(ev), metric_id))
+				ev->metric_leader = metric_events[i];
+		}
+	}
+	*out_metric_events = metric_events;
+	return 0;
+}
+
+static bool match_metric(const char *n, const char *list)
+{
+	int len;
+	char *m;
+
+	if (!list)
+		return false;
+	if (!strcmp(list, "all"))
+		return true;
+	if (!n)
+		return !strcasecmp(list, "No_group");
+	len = strlen(list);
+	m = strcasestr(n, list);
+	if (!m)
+		return false;
+	if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
+	    (m[len] == 0 || m[len] == ';'))
+		return true;
+	return false;
+}
+
+static bool match_pe_metric(const struct pmu_event *pe, const char *metric)
+{
+	return match_metric(pe->metric_group, metric) ||
+	       match_metric(pe->metric_name, metric);
+}
+
+struct mep {
+	struct rb_node nd;
+	const char *name;
+	struct strlist *metrics;
+};
+
+static int mep_cmp(struct rb_node *rb_node, const void *entry)
+{
+	struct mep *a = container_of(rb_node, struct mep, nd);
+	struct mep *b = (struct mep *)entry;
+
+	return strcmp(a->name, b->name);
+}
+
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
+					const void *entry)
+{
+	struct mep *me = malloc(sizeof(struct mep));
+
+	if (!me)
+		return NULL;
+	memcpy(me, entry, sizeof(struct mep));
+	me->name = strdup(me->name);
+	if (!me->name)
+		goto out_me;
+	me->metrics = strlist__new(NULL, NULL);
+	if (!me->metrics)
+		goto out_name;
+	return &me->nd;
+out_name:
+	zfree(&me->name);
+out_me:
+	free(me);
+	return NULL;
+}
+
+static struct mep *mep_lookup(struct rblist *groups, const char *name)
+{
+	struct rb_node *nd;
+	struct mep me = {
+		.name = name
+	};
+	nd = rblist__find(groups, &me);
+	if (nd)
+		return container_of(nd, struct mep, nd);
+	rblist__add_node(groups, &me);
+	nd = rblist__find(groups, &me);
+	if (nd)
+		return container_of(nd, struct mep, nd);
+	return NULL;
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+		       struct rb_node *nd)
+{
+	struct mep *me = container_of(nd, struct mep, nd);
+
+	strlist__delete(me->metrics);
+	zfree(&me->name);
+	free(me);
+}
+
+static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
+{
+	struct str_node *sn;
+	int n = 0;
+
+	strlist__for_each_entry (sn, metrics) {
+		if (raw)
+			printf("%s%s", n > 0 ? " " : "", sn->s);
+		else
+			printf("  %s\n", sn->s);
+		n++;
+	}
+	if (raw)
+		putchar('\n');
+}
+
+static int metricgroup__print_pmu_event(const struct pmu_event *pe,
+					bool metricgroups, char *filter,
+					bool raw, bool details,
+					struct rblist *groups,
+					struct strlist *metriclist)
+{
+	const char *g;
+	char *omg, *mg;
+
+	g = pe->metric_group;
+	if (!g && pe->metric_name) {
+		if (pe->name)
+			return 0;
+		g = "No_group";
+	}
+
+	if (!g)
+		return 0;
+
+	mg = strdup(g);
+
+	if (!mg)
+		return -ENOMEM;
+	omg = mg;
+	while ((g = strsep(&mg, ";")) != NULL) {
+		struct mep *me;
+		char *s;
+
+		g = skip_spaces(g);
+		if (*g == 0)
+			g = "No_group";
+		if (filter && !strstr(g, filter))
+			continue;
+		if (raw)
+			s = (char *)pe->metric_name;
+		else {
+			if (asprintf(&s, "%s\n%*s%s]",
+				     pe->metric_name, 8, "[", pe->desc) < 0)
+				return -1;
+			if (details) {
+				if (asprintf(&s, "%s\n%*s%s]",
+					     s, 8, "[", pe->metric_expr) < 0)
+					return -1;
+			}
+		}
+
+		if (!s)
+			continue;
+
+		if (!metricgroups) {
+			strlist__add(metriclist, s);
+		} else {
+			me = mep_lookup(groups, g);
+			if (!me)
+				continue;
+			strlist__add(me->metrics, s);
+		}
+
+		if (!raw)
+			free(s);
+	}
+	free(omg);
+
+	return 0;
+}
+
+struct metricgroup_print_sys_idata {
+	struct strlist *metriclist;
+	char *filter;
+	struct rblist *groups;
+	bool metricgroups;
+	bool raw;
+	bool details;
+};
+
+struct metricgroup_iter_data {
+	pmu_event_iter_fn fn;
+	void *data;
+};
+
+static int metricgroup__sys_event_iter(const struct pmu_event *pe,
+				       const struct pmu_events_table *table,
+				       void *data)
+{
+	struct metricgroup_iter_data *d = data;
+	struct perf_pmu *pmu = NULL;
+
+	if (!pe->metric_expr || !pe->compat)
+		return 0;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+
+		if (!pmu->id || strcmp(pmu->id, pe->compat))
+			continue;
+
+		return d->fn(pe, table, d->data);
+	}
+
+	return 0;
+}
+
+static int metricgroup__print_sys_event_iter(const struct pmu_event *pe,
+					     const struct pmu_events_table *table __maybe_unused,
+					     void *data)
+{
+	struct metricgroup_print_sys_idata *d = data;
+
+	return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw,
+				     d->details, d->groups, d->metriclist);
+}
+
+struct metricgroup_print_data {
+	const char *pmu_name;
+	struct strlist *metriclist;
+	char *filter;
+	struct rblist *groups;
+	bool metricgroups;
+	bool raw;
+	bool details;
+};
+
+static int metricgroup__print_callback(const struct pmu_event *pe,
+				       const struct pmu_events_table *table __maybe_unused,
+				       void *vdata)
+{
+	struct metricgroup_print_data *data = vdata;
+
+	if (!pe->metric_expr)
+		return 0;
+
+	if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu))
+		return 0;
+
+	return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter,
+					    data->raw, data->details, data->groups,
+					    data->metriclist);
+}
+
+void metricgroup__print(bool metrics, bool metricgroups, char *filter,
+			bool raw, bool details, const char *pmu_name)
+{
+	struct rblist groups;
+	struct rb_node *node, *next;
+	struct strlist *metriclist = NULL;
+	const struct pmu_events_table *table;
+
+	if (!metricgroups) {
+		metriclist = strlist__new(NULL, NULL);
+		if (!metriclist)
+			return;
+	}
+
+	rblist__init(&groups);
+	groups.node_new = mep_new;
+	groups.node_cmp = mep_cmp;
+	groups.node_delete = mep_delete;
+	table = pmu_events_table__find();
+	if (table) {
+		struct metricgroup_print_data data = {
+			.pmu_name = pmu_name,
+			.metriclist = metriclist,
+			.metricgroups = metricgroups,
+			.filter = filter,
+			.raw = raw,
+			.details = details,
+			.groups = &groups,
+		};
+
+		pmu_events_table_for_each_event(table,
+						metricgroup__print_callback,
+						&data);
+	}
+	{
+		struct metricgroup_iter_data data = {
+			.fn = metricgroup__print_sys_event_iter,
+			.data = (void *) &(struct metricgroup_print_sys_idata){
+				.metriclist = metriclist,
+				.metricgroups = metricgroups,
+				.filter = filter,
+				.raw = raw,
+				.details = details,
+				.groups = &groups,
+			},
+		};
+
+		pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
+	}
+
+	if (!filter || !rblist__empty(&groups)) {
+		if (metricgroups && !raw)
+			printf("\nMetric Groups:\n\n");
+		else if (metrics && !raw)
+			printf("\nMetrics:\n\n");
+	}
+
+	for (node = rb_first_cached(&groups.entries); node; node = next) {
+		struct mep *me = container_of(node, struct mep, nd);
+
+		if (metricgroups)
+			printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n");
+		if (metrics)
+			metricgroup__print_strlist(me->metrics, raw);
+		next = rb_next(node);
+		rblist__remove_node(&groups, node);
+	}
+	if (!metricgroups)
+		metricgroup__print_strlist(metriclist, raw);
+	strlist__delete(metriclist);
+}
+
+static const char *code_characters = ",-=@";
+
+static int encode_metric_id(struct strbuf *sb, const char *x)
+{
+	char *c;
+	int ret = 0;
+
+	for (; *x; x++) {
+		c = strchr(code_characters, *x);
+		if (c) {
+			ret = strbuf_addch(sb, '!');
+			if (ret)
+				break;
+
+			ret = strbuf_addch(sb, '0' + (c - code_characters));
+			if (ret)
+				break;
+		} else {
+			ret = strbuf_addch(sb, *x);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+static int decode_metric_id(struct strbuf *sb, const char *x)
+{
+	const char *orig = x;
+	size_t i;
+	char c;
+	int ret;
+
+	for (; *x; x++) {
+		c = *x;
+		if (*x == '!') {
+			x++;
+			i = *x - '0';
+			if (i > strlen(code_characters)) {
+				pr_err("Bad metric-id encoding in: '%s'", orig);
+				return -1;
+			}
+			c = code_characters[i];
+		}
+		ret = strbuf_addch(sb, c);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifier)
+{
+	struct evsel *ev;
+	struct strbuf sb = STRBUF_INIT;
+	char *cur;
+	int ret = 0;
+
+	evlist__for_each_entry(perf_evlist, ev) {
+		if (!ev->metric_id)
+			continue;
+
+		ret = strbuf_setlen(&sb, 0);
+		if (ret)
+			break;
+
+		ret = decode_metric_id(&sb, ev->metric_id);
+		if (ret)
+			break;
+
+		free((char *)ev->metric_id);
+		ev->metric_id = strdup(sb.buf);
+		if (!ev->metric_id) {
+			ret = -ENOMEM;
+			break;
+		}
+		/*
+		 * If the name is just the parsed event, use the metric-id to
+		 * give a more friendly display version.
+		 */
+		if (strstr(ev->name, "metric-id=")) {
+			bool has_slash = false;
+
+			free(ev->name);
+			for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) {
+				*cur = '/';
+				has_slash = true;
+			}
+
+			if (modifier) {
+				if (!has_slash && !strchr(sb.buf, ':')) {
+					ret = strbuf_addch(&sb, ':');
+					if (ret)
+						break;
+				}
+				ret = strbuf_addstr(&sb, modifier);
+				if (ret)
+					break;
+			}
+			ev->name = strdup(sb.buf);
+			if (!ev->name) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+	}
+	strbuf_release(&sb);
+	return ret;
+}
+
+static int metricgroup__build_event_string(struct strbuf *events,
+					   const struct expr_parse_ctx *ctx,
+					   const char *modifier,
+					   bool has_constraint)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+	bool no_group = true, has_tool_events = false;
+	bool tool_events[PERF_TOOL_MAX] = {false};
+	int ret = 0;
+
+#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
+
+	hashmap__for_each_entry(ctx->ids, cur, bkt) {
+		const char *sep, *rsep, *id = cur->key;
+		enum perf_tool_event ev;
+
+		pr_debug("found event %s\n", id);
+
+		/* Always move tool events outside of the group. */
+		ev = perf_tool_event__from_str(id);
+		if (ev != PERF_TOOL_NONE) {
+			has_tool_events = true;
+			tool_events[ev] = true;
+			continue;
+		}
+		/* Separate events with commas and open the group if necessary. */
+		if (no_group) {
+			if (!has_constraint) {
+				ret = strbuf_addch(events, '{');
+				RETURN_IF_NON_ZERO(ret);
+			}
+
+			no_group = false;
+		} else {
+			ret = strbuf_addch(events, ',');
+			RETURN_IF_NON_ZERO(ret);
+		}
+		/*
+		 * Encode the ID as an event string. Add a qualifier for
+		 * metric_id that is the original name except with characters
+		 * that parse-events can't parse replaced. For example,
+		 * 'msr@tsc@' gets added as msr/tsc,metric-id=msr!3tsc!3/
+		 */
+		sep = strchr(id, '@');
+		if (sep != NULL) {
+			ret = strbuf_add(events, id, sep - id);
+			RETURN_IF_NON_ZERO(ret);
+			ret = strbuf_addch(events, '/');
+			RETURN_IF_NON_ZERO(ret);
+			rsep = strrchr(sep, '@');
+			ret = strbuf_add(events, sep + 1, rsep - sep - 1);
+			RETURN_IF_NON_ZERO(ret);
+			ret = strbuf_addstr(events, ",metric-id=");
+			RETURN_IF_NON_ZERO(ret);
+			sep = rsep;
+		} else {
+			sep = strchr(id, ':');
+			if (sep != NULL) {
+				ret = strbuf_add(events, id, sep - id);
+				RETURN_IF_NON_ZERO(ret);
+			} else {
+				ret = strbuf_addstr(events, id);
+				RETURN_IF_NON_ZERO(ret);
+			}
+			ret = strbuf_addstr(events, "/metric-id=");
+			RETURN_IF_NON_ZERO(ret);
+		}
+		ret = encode_metric_id(events, id);
+		RETURN_IF_NON_ZERO(ret);
+		ret = strbuf_addstr(events, "/");
+		RETURN_IF_NON_ZERO(ret);
+
+		if (sep != NULL) {
+			ret = strbuf_addstr(events, sep + 1);
+			RETURN_IF_NON_ZERO(ret);
+		}
+		if (modifier) {
+			ret = strbuf_addstr(events, modifier);
+			RETURN_IF_NON_ZERO(ret);
+		}
+	}
+	if (!no_group && !has_constraint) {
+		ret = strbuf_addf(events, "}:W");
+		RETURN_IF_NON_ZERO(ret);
+	}
+	if (has_tool_events) {
+		int i;
+
+		perf_tool_event__for_each_event(i) {
+			if (tool_events[i]) {
+				if (!no_group) {
+					ret = strbuf_addch(events, ',');
+					RETURN_IF_NON_ZERO(ret);
+				}
+				no_group = false;
+				ret = strbuf_addstr(events, perf_tool_event__to_str(i));
+				RETURN_IF_NON_ZERO(ret);
+			}
+		}
+	}
+
+	return ret;
+#undef RETURN_IF_NON_ZERO
+}
+
+int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused)
+{
+	return 1;
+}
+
+/*
+ * A singly linked list on the stack of the names of metrics being
+ * processed. Used to identify recursion.
+ */
+struct visited_metric {
+	const char *name;
+	const struct visited_metric *parent;
+};
+
+struct metricgroup_add_iter_data {
+	struct list_head *metric_list;
+	const char *metric_name;
+	const char *modifier;
+	int *ret;
+	bool *has_match;
+	bool metric_no_group;
+	const char *user_requested_cpu_list;
+	bool system_wide;
+	struct metric *root_metric;
+	const struct visited_metric *visited;
+	const struct pmu_events_table *table;
+};
+
+static bool metricgroup__find_metric(const char *metric,
+				     const struct pmu_events_table *table,
+				     struct pmu_event *pe);
+
+static int add_metric(struct list_head *metric_list,
+		      const struct pmu_event *pe,
+		      const char *modifier,
+		      bool metric_no_group,
+		      const char *user_requested_cpu_list,
+		      bool system_wide,
+		      struct metric *root_metric,
+		      const struct visited_metric *visited,
+		      const struct pmu_events_table *table);
+
+/**
+ * resolve_metric - Locate metrics within the root metric and recursively add
+ *                    references to them.
+ * @metric_list: The list the metric is added to.
+ * @modifier: if non-null event modifiers like "u".
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ *                   global. Grouping is the default but due to multiplexing the
+ *                   user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @root_metric: Metrics may reference other metrics to form a tree. In this
+ *               case the root_metric holds all the IDs and a list of referenced
+ *               metrics. When adding a root this argument is NULL.
+ * @visited: A singly linked list of metric names being added that is used to
+ *           detect recursion.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ *       architecture perf is running upon.
+ */
+static int resolve_metric(struct list_head *metric_list,
+			  const char *modifier,
+			  bool metric_no_group,
+			  const char *user_requested_cpu_list,
+			  bool system_wide,
+			  struct metric *root_metric,
+			  const struct visited_metric *visited,
+			  const struct pmu_events_table *table)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+	struct to_resolve {
+		/* The metric to resolve. */
+		struct pmu_event pe;
+		/*
+		 * The key in the IDs map, this may differ from in case,
+		 * etc. from pe->metric_name.
+		 */
+		const char *key;
+	} *pending = NULL;
+	int i, ret = 0, pending_cnt = 0;
+
+	/*
+	 * Iterate all the parsed IDs and if there's a matching metric and it to
+	 * the pending array.
+	 */
+	hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
+		struct pmu_event pe;
+
+		if (metricgroup__find_metric(cur->key, table, &pe)) {
+			pending = realloc(pending,
+					(pending_cnt + 1) * sizeof(struct to_resolve));
+			if (!pending)
+				return -ENOMEM;
+
+			memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe));
+			pending[pending_cnt].key = cur->key;
+			pending_cnt++;
+		}
+	}
+
+	/* Remove the metric IDs from the context. */
+	for (i = 0; i < pending_cnt; i++)
+		expr__del_id(root_metric->pctx, pending[i].key);
+
+	/*
+	 * Recursively add all the metrics, IDs are added to the root metric's
+	 * context.
+	 */
+	for (i = 0; i < pending_cnt; i++) {
+		ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group,
+				 user_requested_cpu_list, system_wide, root_metric, visited,
+				 table);
+		if (ret)
+			break;
+	}
+
+	free(pending);
+	return ret;
+}
+
+/**
+ * __add_metric - Add a metric to metric_list.
+ * @metric_list: The list the metric is added to.
+ * @pe: The pmu_event containing the metric to be added.
+ * @modifier: if non-null event modifiers like "u".
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ *                   global. Grouping is the default but due to multiplexing the
+ *                   user may override.
+ * @runtime: A special argument for the parser only known at runtime.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @root_metric: Metrics may reference other metrics to form a tree. In this
+ *               case the root_metric holds all the IDs and a list of referenced
+ *               metrics. When adding a root this argument is NULL.
+ * @visited: A singly linked list of metric names being added that is used to
+ *           detect recursion.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ *       architecture perf is running upon.
+ */
+static int __add_metric(struct list_head *metric_list,
+			const struct pmu_event *pe,
+			const char *modifier,
+			bool metric_no_group,
+			int runtime,
+			const char *user_requested_cpu_list,
+			bool system_wide,
+			struct metric *root_metric,
+			const struct visited_metric *visited,
+			const struct pmu_events_table *table)
+{
+	const struct visited_metric *vm;
+	int ret;
+	bool is_root = !root_metric;
+	struct visited_metric visited_node = {
+		.name = pe->metric_name,
+		.parent = visited,
+	};
+
+	for (vm = visited; vm; vm = vm->parent) {
+		if (!strcmp(pe->metric_name, vm->name)) {
+			pr_err("failed: recursion detected for %s\n", pe->metric_name);
+			return -1;
+		}
+	}
+
+	if (is_root) {
+		/*
+		 * This metric is the root of a tree and may reference other
+		 * metrics that are added recursively.
+		 */
+		root_metric = metric__new(pe, modifier, metric_no_group, runtime,
+					  user_requested_cpu_list, system_wide);
+		if (!root_metric)
+			return -ENOMEM;
+
+	} else {
+		int cnt = 0;
+
+		/*
+		 * This metric was referenced in a metric higher in the
+		 * tree. Check if the same metric is already resolved in the
+		 * metric_refs list.
+		 */
+		if (root_metric->metric_refs) {
+			for (; root_metric->metric_refs[cnt].metric_name; cnt++) {
+				if (!strcmp(pe->metric_name,
+					    root_metric->metric_refs[cnt].metric_name))
+					return 0;
+			}
+		}
+
+		/* Create reference. Need space for the entry and the terminator. */
+		root_metric->metric_refs = realloc(root_metric->metric_refs,
+						(cnt + 2) * sizeof(struct metric_ref));
+		if (!root_metric->metric_refs)
+			return -ENOMEM;
+
+		/*
+		 * Intentionally passing just const char pointers,
+		 * from 'pe' object, so they never go away. We don't
+		 * need to change them, so there's no need to create
+		 * our own copy.
+		 */
+		root_metric->metric_refs[cnt].metric_name = pe->metric_name;
+		root_metric->metric_refs[cnt].metric_expr = pe->metric_expr;
+
+		/* Null terminate array. */
+		root_metric->metric_refs[cnt+1].metric_name = NULL;
+		root_metric->metric_refs[cnt+1].metric_expr = NULL;
+	}
+
+	/*
+	 * For both the parent and referenced metrics, we parse
+	 * all the metric's IDs and add it to the root context.
+	 */
+	if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) {
+		/* Broken metric. */
+		ret = -EINVAL;
+	} else {
+		/* Resolve referenced metrics. */
+		ret = resolve_metric(metric_list, modifier, metric_no_group,
+				     user_requested_cpu_list, system_wide,
+				     root_metric, &visited_node, table);
+	}
+
+	if (ret) {
+		if (is_root)
+			metric__free(root_metric);
+
+	} else if (is_root)
+		list_add(&root_metric->nd, metric_list);
+
+	return ret;
+}
+
+struct metricgroup__find_metric_data {
+	const char *metric;
+	struct pmu_event *pe;
+};
+
+static int metricgroup__find_metric_callback(const struct pmu_event *pe,
+					     const struct pmu_events_table *table  __maybe_unused,
+					     void *vdata)
+{
+	struct metricgroup__find_metric_data *data = vdata;
+
+	if (!match_metric(pe->metric_name, data->metric))
+		return 0;
+
+	memcpy(data->pe, pe, sizeof(*pe));
+	return 1;
+}
+
+static bool metricgroup__find_metric(const char *metric,
+				     const struct pmu_events_table *table,
+				     struct pmu_event *pe)
+{
+	struct metricgroup__find_metric_data data = {
+		.metric = metric,
+		.pe = pe,
+	};
+
+	return pmu_events_table_for_each_event(table, metricgroup__find_metric_callback, &data)
+		? true : false;
+}
+
+static int add_metric(struct list_head *metric_list,
+		      const struct pmu_event *pe,
+		      const char *modifier,
+		      bool metric_no_group,
+		      const char *user_requested_cpu_list,
+		      bool system_wide,
+		      struct metric *root_metric,
+		      const struct visited_metric *visited,
+		      const struct pmu_events_table *table)
+{
+	int ret = 0;
+
+	pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+	if (!strstr(pe->metric_expr, "?")) {
+		ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0,
+				   user_requested_cpu_list, system_wide, root_metric,
+				   visited, table);
+	} else {
+		int j, count;
+
+		count = arch_get_runtimeparam(pe);
+
+		/* This loop is added to create multiple
+		 * events depend on count value and add
+		 * those events to metric_list.
+		 */
+
+		for (j = 0; j < count && !ret; j++)
+			ret = __add_metric(metric_list, pe, modifier, metric_no_group, j,
+					   user_requested_cpu_list, system_wide,
+					   root_metric, visited, table);
+	}
+
+	return ret;
+}
+
+static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe,
+						const struct pmu_events_table *table __maybe_unused,
+						void *data)
+{
+	struct metricgroup_add_iter_data *d = data;
+	int ret;
+
+	if (!match_pe_metric(pe, d->metric_name))
+		return 0;
+
+	ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group,
+			 d->user_requested_cpu_list, d->system_wide,
+			 d->root_metric, d->visited, d->table);
+	if (ret)
+		goto out;
+
+	*(d->has_match) = true;
+
+out:
+	*(d->ret) = ret;
+	return ret;
+}
+
+/**
+ * metric_list_cmp - list_sort comparator that sorts metrics with more events to
+ *                   the front. tool events are excluded from the count.
+ */
+static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
+			   const struct list_head *r)
+{
+	const struct metric *left = container_of(l, struct metric, nd);
+	const struct metric *right = container_of(r, struct metric, nd);
+	struct expr_id_data *data;
+	int i, left_count, right_count;
+
+	left_count = hashmap__size(left->pctx->ids);
+	perf_tool_event__for_each_event(i) {
+		if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data))
+			left_count--;
+	}
+
+	right_count = hashmap__size(right->pctx->ids);
+	perf_tool_event__for_each_event(i) {
+		if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data))
+			right_count--;
+	}
+
+	return right_count - left_count;
+}
+
+struct metricgroup__add_metric_data {
+	struct list_head *list;
+	const char *metric_name;
+	const char *modifier;
+	const char *user_requested_cpu_list;
+	bool metric_no_group;
+	bool system_wide;
+	bool has_match;
+};
+
+static int metricgroup__add_metric_callback(const struct pmu_event *pe,
+					    const struct pmu_events_table *table,
+					    void *vdata)
+{
+	struct metricgroup__add_metric_data *data = vdata;
+	int ret = 0;
+
+	if (pe->metric_expr &&
+		(match_metric(pe->metric_group, data->metric_name) ||
+		 match_metric(pe->metric_name, data->metric_name))) {
+
+		data->has_match = true;
+		ret = add_metric(data->list, pe, data->modifier, data->metric_no_group,
+				 data->user_requested_cpu_list, data->system_wide,
+				 /*root_metric=*/NULL, /*visited_metrics=*/NULL, table);
+	}
+	return ret;
+}
+
+/**
+ * metricgroup__add_metric - Find and add a metric, or a metric group.
+ * @metric_name: The name of the metric or metric group. For example, "IPC"
+ *               could be the name of a metric and "TopDownL1" the name of a
+ *               metric group.
+ * @modifier: if non-null event modifiers like "u".
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ *                   global. Grouping is the default but due to multiplexing the
+ *                   user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @metric_list: The list that the metric or metric group are added to.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ *       architecture perf is running upon.
+ */
+static int metricgroup__add_metric(const char *metric_name, const char *modifier,
+				   bool metric_no_group,
+				   const char *user_requested_cpu_list,
+				   bool system_wide,
+				   struct list_head *metric_list,
+				   const struct pmu_events_table *table)
+{
+	LIST_HEAD(list);
+	int ret;
+	bool has_match = false;
+
+	{
+		struct metricgroup__add_metric_data data = {
+			.list = &list,
+			.metric_name = metric_name,
+			.modifier = modifier,
+			.metric_no_group = metric_no_group,
+			.user_requested_cpu_list = user_requested_cpu_list,
+			.system_wide = system_wide,
+			.has_match = false,
+		};
+		/*
+		 * Iterate over all metrics seeing if metric matches either the
+		 * name or group. When it does add the metric to the list.
+		 */
+		ret = pmu_events_table_for_each_event(table, metricgroup__add_metric_callback,
+						      &data);
+		if (ret)
+			goto out;
+
+		has_match = data.has_match;
+	}
+	{
+		struct metricgroup_iter_data data = {
+			.fn = metricgroup__add_metric_sys_event_iter,
+			.data = (void *) &(struct metricgroup_add_iter_data) {
+				.metric_list = &list,
+				.metric_name = metric_name,
+				.modifier = modifier,
+				.metric_no_group = metric_no_group,
+				.user_requested_cpu_list = user_requested_cpu_list,
+				.system_wide = system_wide,
+				.has_match = &has_match,
+				.ret = &ret,
+				.table = table,
+			},
+		};
+
+		pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
+	}
+	/* End of pmu events. */
+	if (!has_match)
+		ret = -EINVAL;
+
+out:
+	/*
+	 * add to metric_list so that they can be released
+	 * even if it's failed
+	 */
+	list_splice(&list, metric_list);
+	return ret;
+}
+
+/**
+ * metricgroup__add_metric_list - Find and add metrics, or metric groups,
+ *                                specified in a list.
+ * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1"
+ *        would match the IPC and CPI metrics, and TopDownL1 would match all
+ *        the metrics in the TopDownL1 group.
+ * @metric_no_group: Should events written to events be grouped "{}" or
+ *                   global. Grouping is the default but due to multiplexing the
+ *                   user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
+ * @metric_list: The list that metrics are added to.
+ * @table: The table that is searched for metrics, most commonly the table for the
+ *       architecture perf is running upon.
+ */
+static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
+					const char *user_requested_cpu_list,
+					bool system_wide, struct list_head *metric_list,
+					const struct pmu_events_table *table)
+{
+	char *list_itr, *list_copy, *metric_name, *modifier;
+	int ret, count = 0;
+
+	list_copy = strdup(list);
+	if (!list_copy)
+		return -ENOMEM;
+	list_itr = list_copy;
+
+	while ((metric_name = strsep(&list_itr, ",")) != NULL) {
+		modifier = strchr(metric_name, ':');
+		if (modifier)
+			*modifier++ = '\0';
+
+		ret = metricgroup__add_metric(metric_name, modifier,
+					      metric_no_group, user_requested_cpu_list,
+					      system_wide, metric_list, table);
+		if (ret == -EINVAL)
+			pr_err("Cannot find metric or group `%s'\n", metric_name);
+
+		if (ret)
+			break;
+
+		count++;
+	}
+	free(list_copy);
+
+	if (!ret) {
+		/*
+		 * Warn about nmi_watchdog if any parsed metrics had the
+		 * NO_NMI_WATCHDOG constraint.
+		 */
+		metricgroup___watchdog_constraint_hint(NULL, true);
+		/* No metrics. */
+		if (count == 0)
+			return -EINVAL;
+	}
+	return ret;
+}
+
+static void metricgroup__free_metrics(struct list_head *metric_list)
+{
+	struct metric *m, *tmp;
+
+	list_for_each_entry_safe (m, tmp, metric_list, nd) {
+		list_del_init(&m->nd);
+		metric__free(m);
+	}
+}
+
+/**
+ * find_tool_events - Search for the pressence of tool events in metric_list.
+ * @metric_list: List to take metrics from.
+ * @tool_events: Array of false values, indices corresponding to tool events set
+ *               to true if tool event is found.
+ */
+static void find_tool_events(const struct list_head *metric_list,
+			     bool tool_events[PERF_TOOL_MAX])
+{
+	struct metric *m;
+
+	list_for_each_entry(m, metric_list, nd) {
+		int i;
+
+		perf_tool_event__for_each_event(i) {
+			struct expr_id_data *data;
+
+			if (!tool_events[i] &&
+			    !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data))
+				tool_events[i] = true;
+		}
+	}
+}
+
+/**
+ * build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint
+ *                           metric IDs, as the IDs are held in a set,
+ *                           duplicates will be removed.
+ * @metric_list: List to take metrics from.
+ * @combined: Out argument for result.
+ */
+static int build_combined_expr_ctx(const struct list_head *metric_list,
+				   struct expr_parse_ctx **combined)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+	struct metric *m;
+	char *dup;
+	int ret;
+
+	*combined = expr__ctx_new();
+	if (!*combined)
+		return -ENOMEM;
+
+	list_for_each_entry(m, metric_list, nd) {
+		if (m->has_constraint && !m->modifier) {
+			hashmap__for_each_entry(m->pctx->ids, cur, bkt) {
+				dup = strdup(cur->key);
+				if (!dup) {
+					ret = -ENOMEM;
+					goto err_out;
+				}
+				ret = expr__add_id(*combined, dup);
+				if (ret)
+					goto err_out;
+			}
+		}
+	}
+	return 0;
+err_out:
+	expr__ctx_free(*combined);
+	*combined = NULL;
+	return ret;
+}
+
+/**
+ * parse_ids - Build the event string for the ids and parse them creating an
+ *             evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
+ * @fake_pmu: used when testing metrics not supported by the current CPU.
+ * @ids: the event identifiers parsed from a metric.
+ * @modifier: any modifiers added to the events.
+ * @has_constraint: false if events should be placed in a weak group.
+ * @tool_events: entries set true if the tool event of index could be present in
+ *               the overall list of metrics.
+ * @out_evlist: the created list of events.
+ */
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+		     struct expr_parse_ctx *ids, const char *modifier,
+		     bool has_constraint, const bool tool_events[PERF_TOOL_MAX],
+		     struct evlist **out_evlist)
+{
+	struct parse_events_error parse_error;
+	struct evlist *parsed_evlist;
+	struct strbuf events = STRBUF_INIT;
+	int ret;
+
+	*out_evlist = NULL;
+	if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
+		bool added_event = false;
+		int i;
+		/*
+		 * We may fail to share events between metrics because a tool
+		 * event isn't present in one metric. For example, a ratio of
+		 * cache misses doesn't need duration_time but the same events
+		 * may be used for a misses per second. Events without sharing
+		 * implies multiplexing, that is best avoided, so place
+		 * all tool events in every group.
+		 *
+		 * Also, there may be no ids/events in the expression parsing
+		 * context because of constant evaluation, e.g.:
+		 *    event1 if #smt_on else 0
+		 * Add a tool event to avoid a parse error on an empty string.
+		 */
+		perf_tool_event__for_each_event(i) {
+			if (tool_events[i]) {
+				char *tmp = strdup(perf_tool_event__to_str(i));
+
+				if (!tmp)
+					return -ENOMEM;
+				ids__insert(ids->ids, tmp);
+				added_event = true;
+			}
+		}
+		if (!added_event && hashmap__size(ids->ids) == 0) {
+			char *tmp = strdup("duration_time");
+
+			if (!tmp)
+				return -ENOMEM;
+			ids__insert(ids->ids, tmp);
+		}
+	}
+	ret = metricgroup__build_event_string(&events, ids, modifier,
+					      has_constraint);
+	if (ret)
+		return ret;
+
+	parsed_evlist = evlist__new();
+	if (!parsed_evlist) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+	pr_debug("Parsing metric events '%s'\n", events.buf);
+	parse_events_error__init(&parse_error);
+	ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu);
+	if (ret) {
+		parse_events_error__print(&parse_error, events.buf);
+		goto err_out;
+	}
+	ret = decode_all_metric_ids(parsed_evlist, modifier);
+	if (ret)
+		goto err_out;
+
+	*out_evlist = parsed_evlist;
+	parsed_evlist = NULL;
+err_out:
+	parse_events_error__exit(&parse_error);
+	evlist__delete(parsed_evlist);
+	strbuf_release(&events);
+	return ret;
+}
+
+static int parse_groups(struct evlist *perf_evlist, const char *str,
+			bool metric_no_group,
+			bool metric_no_merge,
+			const char *user_requested_cpu_list,
+			bool system_wide,
+			struct perf_pmu *fake_pmu,
+			struct rblist *metric_events_list,
+			const struct pmu_events_table *table)
+{
+	struct evlist *combined_evlist = NULL;
+	LIST_HEAD(metric_list);
+	struct metric *m;
+	bool tool_events[PERF_TOOL_MAX] = {false};
+	int ret;
+
+	if (metric_events_list->nr_entries == 0)
+		metricgroup__rblist_init(metric_events_list);
+	ret = metricgroup__add_metric_list(str, metric_no_group,
+					   user_requested_cpu_list,
+					   system_wide, &metric_list, table);
+	if (ret)
+		goto out;
+
+	/* Sort metrics from largest to smallest. */
+	list_sort(NULL, &metric_list, metric_list_cmp);
+
+	if (!metric_no_merge) {
+		struct expr_parse_ctx *combined = NULL;
+
+		find_tool_events(&metric_list, tool_events);
+
+		ret = build_combined_expr_ctx(&metric_list, &combined);
+
+		if (!ret && combined && hashmap__size(combined->ids)) {
+			ret = parse_ids(metric_no_merge, fake_pmu, combined,
+					/*modifier=*/NULL,
+					/*has_constraint=*/true,
+					tool_events,
+					&combined_evlist);
+		}
+		if (combined)
+			expr__ctx_free(combined);
+
+		if (ret)
+			goto out;
+	}
+
+	list_for_each_entry(m, &metric_list, nd) {
+		struct metric_event *me;
+		struct evsel **metric_events;
+		struct evlist *metric_evlist = NULL;
+		struct metric *n;
+		struct metric_expr *expr;
+
+		if (combined_evlist && m->has_constraint) {
+			metric_evlist = combined_evlist;
+		} else if (!metric_no_merge) {
+			/*
+			 * See if the IDs for this metric are a subset of an
+			 * earlier metric.
+			 */
+			list_for_each_entry(n, &metric_list, nd) {
+				if (m == n)
+					break;
+
+				if (n->evlist == NULL)
+					continue;
+
+				if ((!m->modifier && n->modifier) ||
+				    (m->modifier && !n->modifier) ||
+				    (m->modifier && n->modifier &&
+					    strcmp(m->modifier, n->modifier)))
+					continue;
+
+				if (expr__subset_of_ids(n->pctx, m->pctx)) {
+					pr_debug("Events in '%s' fully contained within '%s'\n",
+						 m->metric_name, n->metric_name);
+					metric_evlist = n->evlist;
+					break;
+				}
+
+			}
+		}
+		if (!metric_evlist) {
+			ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
+					m->has_constraint, tool_events, &m->evlist);
+			if (ret)
+				goto out;
+
+			metric_evlist = m->evlist;
+		}
+		ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events);
+		if (ret) {
+			pr_debug("Cannot resolve IDs for %s: %s\n",
+				m->metric_name, m->metric_expr);
+			goto out;
+		}
+
+		me = metricgroup__lookup(metric_events_list, metric_events[0], true);
+
+		expr = malloc(sizeof(struct metric_expr));
+		if (!expr) {
+			ret = -ENOMEM;
+			free(metric_events);
+			goto out;
+		}
+
+		expr->metric_refs = m->metric_refs;
+		m->metric_refs = NULL;
+		expr->metric_expr = m->metric_expr;
+		if (m->modifier) {
+			char *tmp;
+
+			if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0)
+				expr->metric_name = NULL;
+			else
+				expr->metric_name = tmp;
+		} else
+			expr->metric_name = strdup(m->metric_name);
+
+		if (!expr->metric_name) {
+			ret = -ENOMEM;
+			free(metric_events);
+			goto out;
+		}
+		expr->metric_unit = m->metric_unit;
+		expr->metric_events = metric_events;
+		expr->runtime = m->pctx->sctx.runtime;
+		list_add(&expr->nd, &me->head);
+	}
+
+
+	if (combined_evlist) {
+		evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries);
+		evlist__delete(combined_evlist);
+	}
+
+	list_for_each_entry(m, &metric_list, nd) {
+		if (m->evlist)
+			evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries);
+	}
+
+out:
+	metricgroup__free_metrics(&metric_list);
+	return ret;
+}
+
+int metricgroup__parse_groups(struct evlist *perf_evlist,
+			      const char *str,
+			      bool metric_no_group,
+			      bool metric_no_merge,
+			      const char *user_requested_cpu_list,
+			      bool system_wide,
+			      struct rblist *metric_events)
+{
+	const struct pmu_events_table *table = pmu_events_table__find();
+
+	if (!table)
+		return -EINVAL;
+
+	return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge,
+			    user_requested_cpu_list, system_wide,
+			    /*fake_pmu=*/NULL, metric_events, table);
+}
+
+int metricgroup__parse_groups_test(struct evlist *evlist,
+				   const struct pmu_events_table *table,
+				   const char *str,
+				   bool metric_no_group,
+				   bool metric_no_merge,
+				   struct rblist *metric_events)
+{
+	return parse_groups(evlist, str, metric_no_group, metric_no_merge,
+			    /*user_requested_cpu_list=*/NULL,
+			    /*system_wide=*/false,
+			    &perf_pmu__fake, metric_events, table);
+}
+
+static int metricgroup__has_metric_callback(const struct pmu_event *pe,
+					    const struct pmu_events_table *table __maybe_unused,
+					    void *vdata)
+{
+	const char *metric = vdata;
+
+	if (!pe->metric_expr)
+		return 0;
+
+	if (match_metric(pe->metric_name, metric))
+		return 1;
+
+	return 0;
+}
+
+bool metricgroup__has_metric(const char *metric)
+{
+	const struct pmu_events_table *table = pmu_events_table__find();
+
+	if (!table)
+		return false;
+
+	return pmu_events_table_for_each_event(table, metricgroup__has_metric_callback,
+					       (void *)metric) ? true : false;
+}
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+				    struct rblist *new_metric_events,
+				    struct rblist *old_metric_events)
+{
+	unsigned int i;
+
+	for (i = 0; i < rblist__nr_entries(old_metric_events); i++) {
+		struct rb_node *nd;
+		struct metric_event *old_me, *new_me;
+		struct metric_expr *old_expr, *new_expr;
+		struct evsel *evsel;
+		size_t alloc_size;
+		int idx, nr;
+
+		nd = rblist__entry(old_metric_events, i);
+		old_me = container_of(nd, struct metric_event, nd);
+
+		evsel = evlist__find_evsel(evlist, old_me->evsel->core.idx);
+		if (!evsel)
+			return -EINVAL;
+		new_me = metricgroup__lookup(new_metric_events, evsel, true);
+		if (!new_me)
+			return -ENOMEM;
+
+		pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n",
+			 cgrp ? cgrp->name : "root", evsel->name, evsel->core.idx);
+
+		list_for_each_entry(old_expr, &old_me->head, nd) {
+			new_expr = malloc(sizeof(*new_expr));
+			if (!new_expr)
+				return -ENOMEM;
+
+			new_expr->metric_expr = old_expr->metric_expr;
+			new_expr->metric_name = strdup(old_expr->metric_name);
+			if (!new_expr->metric_name)
+				return -ENOMEM;
+
+			new_expr->metric_unit = old_expr->metric_unit;
+			new_expr->runtime = old_expr->runtime;
+
+			if (old_expr->metric_refs) {
+				/* calculate number of metric_events */
+				for (nr = 0; old_expr->metric_refs[nr].metric_name; nr++)
+					continue;
+				alloc_size = sizeof(*new_expr->metric_refs);
+				new_expr->metric_refs = calloc(nr + 1, alloc_size);
+				if (!new_expr->metric_refs) {
+					free(new_expr);
+					return -ENOMEM;
+				}
+
+				memcpy(new_expr->metric_refs, old_expr->metric_refs,
+				       nr * alloc_size);
+			} else {
+				new_expr->metric_refs = NULL;
+			}
+
+			/* calculate number of metric_events */
+			for (nr = 0; old_expr->metric_events[nr]; nr++)
+				continue;
+			alloc_size = sizeof(*new_expr->metric_events);
+			new_expr->metric_events = calloc(nr + 1, alloc_size);
+			if (!new_expr->metric_events) {
+				free(new_expr->metric_refs);
+				free(new_expr);
+				return -ENOMEM;
+			}
+
+			/* copy evsel in the same position */
+			for (idx = 0; idx < nr; idx++) {
+				evsel = old_expr->metric_events[idx];
+				evsel = evlist__find_evsel(evlist, evsel->core.idx);
+				if (evsel == NULL) {
+					free(new_expr->metric_events);
+					free(new_expr->metric_refs);
+					free(new_expr);
+					return -EINVAL;
+				}
+				new_expr->metric_events[idx] = evsel;
+			}
+
+			list_add(&new_expr->nd, &new_me->head);
+		}
+	}
+	return 0;
+}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
new file mode 100644
index 000000000..732d3a0d3
--- /dev/null
+++ b/tools/perf/util/metricgroup.h
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef METRICGROUP_H
+#define METRICGROUP_H 1
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdbool.h>
+#include "pmu-events/pmu-events.h"
+
+struct evlist;
+struct evsel;
+struct option;
+struct rblist;
+struct cgroup;
+
+/**
+ * A node in a rblist keyed by the evsel. The global rblist of metric events
+ * generally exists in perf_stat_config. The evsel is looked up in the rblist
+ * yielding a list of metric_expr.
+ */
+struct metric_event {
+	struct rb_node nd;
+	struct evsel *evsel;
+	struct list_head head; /* list of metric_expr */
+};
+
+/**
+ * A metric referenced by a metric_expr. When parsing a metric expression IDs
+ * will be looked up, matching either a value (from metric_events) or a
+ * metric_ref. A metric_ref will then be parsed recursively. The metric_refs and
+ * metric_events need to be known before parsing so that their values may be
+ * placed in the parse context for lookup.
+ */
+struct metric_ref {
+	const char *metric_name;
+	const char *metric_expr;
+};
+
+/**
+ * One in a list of metric_expr associated with an evsel. The data is used to
+ * generate a metric value during stat output.
+ */
+struct metric_expr {
+	struct list_head nd;
+	/** The expression to parse, for example, "instructions/cycles". */
+	const char *metric_expr;
+	/** The name of the meric such as "IPC". */
+	const char *metric_name;
+	/**
+	 * The "ScaleUnit" that scales and adds a unit to the metric during
+	 * output. For example, "6.4e-05MiB" means to scale the resulting metric
+	 * by 6.4e-05 (typically converting a unit like cache lines to something
+	 * more human intelligible) and then add "MiB" afterward when displayed.
+	 */
+	const char *metric_unit;
+	/** Null terminated array of events used by the metric. */
+	struct evsel **metric_events;
+	/** Null terminated array of referenced metrics. */
+	struct metric_ref *metric_refs;
+	/** A value substituted for '?' during parsing. */
+	int runtime;
+};
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+					 struct evsel *evsel,
+					 bool create);
+int metricgroup__parse_groups(struct evlist *perf_evlist,
+			      const char *str,
+			      bool metric_no_group,
+			      bool metric_no_merge,
+			      const char *user_requested_cpu_list,
+			      bool system_wide,
+			      struct rblist *metric_events);
+int metricgroup__parse_groups_test(struct evlist *evlist,
+				   const struct pmu_events_table *table,
+				   const char *str,
+				   bool metric_no_group,
+				   bool metric_no_merge,
+				   struct rblist *metric_events);
+
+void metricgroup__print(bool metrics, bool groups, char *filter,
+			bool raw, bool details, const char *pmu_name);
+bool metricgroup__has_metric(const char *metric);
+int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused);
+void metricgroup__rblist_exit(struct rblist *metric_events);
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+				    struct rblist *new_metric_events,
+				    struct rblist *old_metric_events);
+#endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
new file mode 100644
index 000000000..a4dff881b
--- /dev/null
+++ b/tools/perf/util/mmap.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ */
+
+#include <sys/mman.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h> // sysconf()
+#include <perf/mmap.h>
+#ifdef HAVE_LIBNUMA_SUPPORT
+#include <numaif.h>
+#endif
+#include "cpumap.h"
+#include "debug.h"
+#include "event.h"
+#include "mmap.h"
+#include "../perf.h"
+#include <internal/lib.h> /* page_size */
+#include <linux/bitmap.h>
+
+#define MASK_SIZE 1023
+void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag)
+{
+	char buf[MASK_SIZE + 1];
+	size_t len;
+
+	len = bitmap_scnprintf(mask->bits, mask->nbits, buf, MASK_SIZE);
+	buf[len] = '\0';
+	pr_debug("%p: %s mask[%zd]: %s\n", mask, tag, mask->nbits, buf);
+}
+
+size_t mmap__mmap_len(struct mmap *map)
+{
+	return perf_mmap__mmap_len(&map->core);
+}
+
+int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
+			       struct auxtrace_mmap_params *mp __maybe_unused,
+			       void *userpg __maybe_unused,
+			       int fd __maybe_unused)
+{
+	return 0;
+}
+
+void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused,
+				       off_t auxtrace_offset __maybe_unused,
+				       unsigned int auxtrace_pages __maybe_unused,
+				       bool auxtrace_overwrite __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
+					  struct evlist *evlist __maybe_unused,
+					  struct evsel *evsel __maybe_unused,
+					  int idx __maybe_unused)
+{
+}
+
+#ifdef HAVE_AIO_SUPPORT
+static int perf_mmap__aio_enabled(struct mmap *map)
+{
+	return map->aio.nr_cblocks > 0;
+}
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
+{
+	map->aio.data[idx] = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+				  MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+	if (map->aio.data[idx] == MAP_FAILED) {
+		map->aio.data[idx] = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static void perf_mmap__aio_free(struct mmap *map, int idx)
+{
+	if (map->aio.data[idx]) {
+		munmap(map->aio.data[idx], mmap__mmap_len(map));
+		map->aio.data[idx] = NULL;
+	}
+}
+
+static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity)
+{
+	void *data;
+	size_t mmap_len;
+	unsigned long *node_mask;
+	unsigned long node_index;
+	int err = 0;
+
+	if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
+		data = map->aio.data[idx];
+		mmap_len = mmap__mmap_len(map);
+		node_index = cpu__get_node(cpu);
+		node_mask = bitmap_zalloc(node_index + 1);
+		if (!node_mask) {
+			pr_err("Failed to allocate node mask for mbind: error %m\n");
+			return -1;
+		}
+		set_bit(node_index, node_mask);
+		if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
+			pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+				data, data + mmap_len, node_index);
+			err = -1;
+		}
+		bitmap_free(node_mask);
+	}
+
+	return err;
+}
+#else /* !HAVE_LIBNUMA_SUPPORT */
+static int perf_mmap__aio_alloc(struct mmap *map, int idx)
+{
+	map->aio.data[idx] = malloc(mmap__mmap_len(map));
+	if (map->aio.data[idx] == NULL)
+		return -1;
+
+	return 0;
+}
+
+static void perf_mmap__aio_free(struct mmap *map, int idx)
+{
+	zfree(&(map->aio.data[idx]));
+}
+
+static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused,
+		struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused)
+{
+	return 0;
+}
+#endif
+
+static int perf_mmap__aio_mmap(struct mmap *map, struct mmap_params *mp)
+{
+	int delta_max, i, prio, ret;
+
+	map->aio.nr_cblocks = mp->nr_cblocks;
+	if (map->aio.nr_cblocks) {
+		map->aio.aiocb = calloc(map->aio.nr_cblocks, sizeof(struct aiocb *));
+		if (!map->aio.aiocb) {
+			pr_debug2("failed to allocate aiocb for data buffer, error %m\n");
+			return -1;
+		}
+		map->aio.cblocks = calloc(map->aio.nr_cblocks, sizeof(struct aiocb));
+		if (!map->aio.cblocks) {
+			pr_debug2("failed to allocate cblocks for data buffer, error %m\n");
+			return -1;
+		}
+		map->aio.data = calloc(map->aio.nr_cblocks, sizeof(void *));
+		if (!map->aio.data) {
+			pr_debug2("failed to allocate data buffer, error %m\n");
+			return -1;
+		}
+		delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
+		for (i = 0; i < map->aio.nr_cblocks; ++i) {
+			ret = perf_mmap__aio_alloc(map, i);
+			if (ret == -1) {
+				pr_debug2("failed to allocate data buffer area, error %m");
+				return -1;
+			}
+			ret = perf_mmap__aio_bind(map, i, map->core.cpu, mp->affinity);
+			if (ret == -1)
+				return -1;
+			/*
+			 * Use cblock.aio_fildes value different from -1
+			 * to denote started aio write operation on the
+			 * cblock so it requires explicit record__aio_sync()
+			 * call prior the cblock may be reused again.
+			 */
+			map->aio.cblocks[i].aio_fildes = -1;
+			/*
+			 * Allocate cblocks with priority delta to have
+			 * faster aio write system calls because queued requests
+			 * are kept in separate per-prio queues and adding
+			 * a new request will iterate thru shorter per-prio
+			 * list. Blocks with numbers higher than
+			 *  _SC_AIO_PRIO_DELTA_MAX go with priority 0.
+			 */
+			prio = delta_max - i;
+			map->aio.cblocks[i].aio_reqprio = prio >= 0 ? prio : 0;
+		}
+	}
+
+	return 0;
+}
+
+static void perf_mmap__aio_munmap(struct mmap *map)
+{
+	int i;
+
+	for (i = 0; i < map->aio.nr_cblocks; ++i)
+		perf_mmap__aio_free(map, i);
+	if (map->aio.data)
+		zfree(&map->aio.data);
+	zfree(&map->aio.cblocks);
+	zfree(&map->aio.aiocb);
+}
+#else /* !HAVE_AIO_SUPPORT */
+static int perf_mmap__aio_enabled(struct mmap *map __maybe_unused)
+{
+	return 0;
+}
+
+static int perf_mmap__aio_mmap(struct mmap *map __maybe_unused,
+			       struct mmap_params *mp __maybe_unused)
+{
+	return 0;
+}
+
+static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
+{
+}
+#endif
+
+void mmap__munmap(struct mmap *map)
+{
+	bitmap_free(map->affinity_mask.bits);
+
+#ifndef PYTHON_PERF
+	zstd_fini(&map->zstd_data);
+#endif
+
+	perf_mmap__aio_munmap(map);
+	if (map->data != NULL) {
+		munmap(map->data, mmap__mmap_len(map));
+		map->data = NULL;
+	}
+	auxtrace_mmap__munmap(&map->auxtrace_mmap);
+}
+
+static void build_node_mask(int node, struct mmap_cpu_mask *mask)
+{
+	int idx, nr_cpus;
+	struct perf_cpu cpu;
+	const struct perf_cpu_map *cpu_map = NULL;
+
+	cpu_map = cpu_map__online();
+	if (!cpu_map)
+		return;
+
+	nr_cpus = perf_cpu_map__nr(cpu_map);
+	for (idx = 0; idx < nr_cpus; idx++) {
+		cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
+		if (cpu__get_node(cpu) == node)
+			set_bit(cpu.cpu, mask->bits);
+	}
+}
+
+static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
+{
+	map->affinity_mask.nbits = cpu__max_cpu().cpu;
+	map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits);
+	if (!map->affinity_mask.bits)
+		return -1;
+
+	if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
+		build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
+	else if (mp->affinity == PERF_AFFINITY_CPU)
+		set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
+
+	return 0;
+}
+
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu)
+{
+	if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) {
+		pr_debug2("failed to mmap perf event ring buffer, error %d\n",
+			  errno);
+		return -1;
+	}
+
+	if (mp->affinity != PERF_AFFINITY_SYS &&
+		perf_mmap__setup_affinity_mask(map, mp)) {
+		pr_debug2("failed to alloc mmap affinity mask, error %d\n",
+			  errno);
+		return -1;
+	}
+
+	if (verbose == 2)
+		mmap_cpu_mask__scnprintf(&map->affinity_mask, "mmap");
+
+	map->core.flush = mp->flush;
+
+	map->comp_level = mp->comp_level;
+#ifndef PYTHON_PERF
+	if (zstd_init(&map->zstd_data, map->comp_level)) {
+		pr_debug2("failed to init mmap compressor, error %d\n", errno);
+		return -1;
+	}
+#endif
+
+	if (map->comp_level && !perf_mmap__aio_enabled(map)) {
+		map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+				 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+		if (map->data == MAP_FAILED) {
+			pr_debug2("failed to mmap data buffer, error %d\n",
+					errno);
+			map->data = NULL;
+			return -1;
+		}
+	}
+
+	if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
+				&mp->auxtrace_mp, map->core.base, fd))
+		return -1;
+
+	return perf_mmap__aio_mmap(map, mp);
+}
+
+int perf_mmap__push(struct mmap *md, void *to,
+		    int push(struct mmap *map, void *to, void *buf, size_t size))
+{
+	u64 head = perf_mmap__read_head(&md->core);
+	unsigned char *data = md->core.base + page_size;
+	unsigned long size;
+	void *buf;
+	int rc = 0;
+
+	rc = perf_mmap__read_init(&md->core);
+	if (rc < 0)
+		return (rc == -EAGAIN) ? 1 : -1;
+
+	size = md->core.end - md->core.start;
+
+	if ((md->core.start & md->core.mask) + size != (md->core.end & md->core.mask)) {
+		buf = &data[md->core.start & md->core.mask];
+		size = md->core.mask + 1 - (md->core.start & md->core.mask);
+		md->core.start += size;
+
+		if (push(md, to, buf, size) < 0) {
+			rc = -1;
+			goto out;
+		}
+	}
+
+	buf = &data[md->core.start & md->core.mask];
+	size = md->core.end - md->core.start;
+	md->core.start += size;
+
+	if (push(md, to, buf, size) < 0) {
+		rc = -1;
+		goto out;
+	}
+
+	md->core.prev = head;
+	perf_mmap__consume(&md->core);
+out:
+	return rc;
+}
+
+int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone)
+{
+	clone->nbits = original->nbits;
+	clone->bits  = bitmap_zalloc(original->nbits);
+	if (!clone->bits)
+		return -ENOMEM;
+
+	memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original));
+	return 0;
+}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
new file mode 100644
index 000000000..cd4ccec7f
--- /dev/null
+++ b/tools/perf/util/mmap.h
@@ -0,0 +1,73 @@
+#ifndef __PERF_MMAP_H
+#define __PERF_MMAP_H 1
+
+#include <internal/mmap.h>
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <linux/ring_buffer.h>
+#include <linux/bitops.h>
+#include <perf/cpumap.h>
+#include <stdbool.h>
+#ifdef HAVE_AIO_SUPPORT
+#include <aio.h>
+#endif
+#include "auxtrace.h"
+#include "event.h"
+#include "util/compress.h"
+
+struct aiocb;
+
+struct mmap_cpu_mask {
+	unsigned long *bits;
+	size_t nbits;
+};
+
+#define MMAP_CPU_MASK_BYTES(m) \
+	(BITS_TO_LONGS(((struct mmap_cpu_mask *)m)->nbits) * sizeof(unsigned long))
+
+/**
+ * struct mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
+struct mmap {
+	struct perf_mmap	core;
+	struct auxtrace_mmap auxtrace_mmap;
+#ifdef HAVE_AIO_SUPPORT
+	struct {
+		void		 **data;
+		struct aiocb	 *cblocks;
+		struct aiocb	 **aiocb;
+		int		 nr_cblocks;
+	} aio;
+#endif
+	struct mmap_cpu_mask	affinity_mask;
+	void		*data;
+	int		comp_level;
+	struct perf_data_file *file;
+	struct zstd_data      zstd_data;
+};
+
+struct mmap_params {
+	struct perf_mmap_param core;
+	int nr_cblocks, affinity, flush, comp_level;
+	struct auxtrace_mmap_params auxtrace_mp;
+};
+
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu);
+void mmap__munmap(struct mmap *map);
+
+union perf_event *perf_mmap__read_forward(struct mmap *map);
+
+int perf_mmap__push(struct mmap *md, void *to,
+		    int push(struct mmap *map, void *to, void *buf, size_t size));
+
+size_t mmap__mmap_len(struct mmap *map);
+
+void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
+
+int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original,
+				struct mmap_cpu_mask *clone);
+
+#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c
new file mode 100644
index 000000000..bca7f0717
--- /dev/null
+++ b/tools/perf/util/mutex.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mutex.h"
+
+#include "debug.h"
+#include <linux/string.h>
+#include <errno.h>
+
+static void check_err(const char *fn, int err)
+{
+	char sbuf[STRERR_BUFSIZE];
+
+	if (err == 0)
+		return;
+
+	pr_err("%s error: '%s'\n", fn, str_error_r(err, sbuf, sizeof(sbuf)));
+}
+
+#define CHECK_ERR(err) check_err(__func__, err)
+
+static void __mutex_init(struct mutex *mtx, bool pshared)
+{
+	pthread_mutexattr_t attr;
+
+	CHECK_ERR(pthread_mutexattr_init(&attr));
+
+#ifndef NDEBUG
+	/* In normal builds enable error checking, such as recursive usage. */
+	CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
+#endif
+	if (pshared)
+		CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+	CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr));
+	CHECK_ERR(pthread_mutexattr_destroy(&attr));
+}
+
+void mutex_init(struct mutex *mtx)
+{
+	__mutex_init(mtx, /*pshared=*/false);
+}
+
+void mutex_init_pshared(struct mutex *mtx)
+{
+	__mutex_init(mtx, /*pshared=*/true);
+}
+
+void mutex_destroy(struct mutex *mtx)
+{
+	CHECK_ERR(pthread_mutex_destroy(&mtx->lock));
+}
+
+void mutex_lock(struct mutex *mtx)
+	NO_THREAD_SAFETY_ANALYSIS
+{
+	CHECK_ERR(pthread_mutex_lock(&mtx->lock));
+}
+
+void mutex_unlock(struct mutex *mtx)
+	NO_THREAD_SAFETY_ANALYSIS
+{
+	CHECK_ERR(pthread_mutex_unlock(&mtx->lock));
+}
+
+bool mutex_trylock(struct mutex *mtx)
+{
+	int ret = pthread_mutex_trylock(&mtx->lock);
+
+	if (ret == 0)
+		return true; /* Lock acquired. */
+
+	if (ret == EBUSY)
+		return false; /* Lock busy. */
+
+	/* Print error. */
+	CHECK_ERR(ret);
+	return false;
+}
+
+static void __cond_init(struct cond *cnd, bool pshared)
+{
+	pthread_condattr_t attr;
+
+	CHECK_ERR(pthread_condattr_init(&attr));
+	if (pshared)
+		CHECK_ERR(pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+	CHECK_ERR(pthread_cond_init(&cnd->cond, &attr));
+	CHECK_ERR(pthread_condattr_destroy(&attr));
+}
+
+void cond_init(struct cond *cnd)
+{
+	__cond_init(cnd, /*pshared=*/false);
+}
+
+void cond_init_pshared(struct cond *cnd)
+{
+	__cond_init(cnd, /*pshared=*/true);
+}
+
+void cond_destroy(struct cond *cnd)
+{
+	CHECK_ERR(pthread_cond_destroy(&cnd->cond));
+}
+
+void cond_wait(struct cond *cnd, struct mutex *mtx)
+{
+	CHECK_ERR(pthread_cond_wait(&cnd->cond, &mtx->lock));
+}
+
+void cond_signal(struct cond *cnd)
+{
+	CHECK_ERR(pthread_cond_signal(&cnd->cond));
+}
+
+void cond_broadcast(struct cond *cnd)
+{
+	CHECK_ERR(pthread_cond_broadcast(&cnd->cond));
+}
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
new file mode 100644
index 000000000..40661120c
--- /dev/null
+++ b/tools/perf/util/mutex.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MUTEX_H
+#define __PERF_MUTEX_H
+
+#include <pthread.h>
+#include <stdbool.h>
+
+/*
+ * A function-like feature checking macro that is a wrapper around
+ * `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a
+ * nonzero constant integer if the attribute is supported or 0 if not.
+ */
+#ifdef __has_attribute
+#define HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define HAVE_ATTRIBUTE(x) 0
+#endif
+
+#if HAVE_ATTRIBUTE(guarded_by) && HAVE_ATTRIBUTE(pt_guarded_by) && \
+	HAVE_ATTRIBUTE(lockable) && HAVE_ATTRIBUTE(exclusive_lock_function) && \
+	HAVE_ATTRIBUTE(exclusive_trylock_function) && HAVE_ATTRIBUTE(exclusive_locks_required) && \
+	HAVE_ATTRIBUTE(no_thread_safety_analysis)
+
+/* Documents if a shared field or global variable needs to be protected by a mutex. */
+#define GUARDED_BY(x) __attribute__((guarded_by(x)))
+
+/*
+ * Documents if the memory location pointed to by a pointer should be guarded by
+ * a mutex when dereferencing the pointer.
+ */
+#define PT_GUARDED_BY(x) __attribute__((pt_guarded_by(x)))
+
+/* Documents if a type is a lockable type. */
+#define LOCKABLE __attribute__((lockable))
+
+/* Documents functions that acquire a lock in the body of a function, and do not release it. */
+#define EXCLUSIVE_LOCK_FUNCTION(...)  __attribute__((exclusive_lock_function(__VA_ARGS__)))
+
+/*
+ * Documents functions that expect a lock to be held on entry to the function,
+ * and release it in the body of the function.
+ */
+#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__)))
+
+/* Documents functions that try to acquire a lock, and return success or failure. */
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
+	__attribute__((exclusive_trylock_function(__VA_ARGS__)))
+
+/* Documents a function that expects a mutex to be held prior to entry. */
+#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__)))
+
+/* Turns off thread safety checking within the body of a particular function. */
+#define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
+
+#else
+
+#define GUARDED_BY(x)
+#define PT_GUARDED_BY(x)
+#define LOCKABLE
+#define EXCLUSIVE_LOCK_FUNCTION(...)
+#define UNLOCK_FUNCTION(...)
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
+#define EXCLUSIVE_LOCKS_REQUIRED(...)
+#define NO_THREAD_SAFETY_ANALYSIS
+
+#endif
+
+/*
+ * A wrapper around the mutex implementation that allows perf to error check
+ * usage, etc.
+ */
+struct LOCKABLE mutex {
+	pthread_mutex_t lock;
+};
+
+/* A wrapper around the condition variable implementation. */
+struct cond {
+	pthread_cond_t cond;
+};
+
+/* Default initialize the mtx struct. */
+void mutex_init(struct mutex *mtx);
+/*
+ * Initialize the mtx struct and set the process-shared rather than default
+ * process-private attribute.
+ */
+void mutex_init_pshared(struct mutex *mtx);
+void mutex_destroy(struct mutex *mtx);
+
+void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx);
+void mutex_unlock(struct mutex *mtx) UNLOCK_FUNCTION(*mtx);
+/* Tries to acquire the lock and returns true on success. */
+bool mutex_trylock(struct mutex *mtx) EXCLUSIVE_TRYLOCK_FUNCTION(true, *mtx);
+
+/* Default initialize the cond struct. */
+void cond_init(struct cond *cnd);
+/*
+ * Initialize the cond struct and specify the process-shared rather than default
+ * process-private attribute.
+ */
+void cond_init_pshared(struct cond *cnd);
+void cond_destroy(struct cond *cnd);
+
+void cond_wait(struct cond *cnd, struct mutex *mtx) EXCLUSIVE_LOCKS_REQUIRED(mtx);
+void cond_signal(struct cond *cnd);
+void cond_broadcast(struct cond *cnd);
+
+#endif /* __PERF_MUTEX_H */
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
new file mode 100644
index 000000000..dd536220c
--- /dev/null
+++ b/tools/perf/util/namespaces.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#include "namespaces.h"
+#include "event.h"
+#include "get_current_dir_name.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <asm/bug.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+
+static const char *perf_ns__names[] = {
+	[NET_NS_INDEX]		= "net",
+	[UTS_NS_INDEX]		= "uts",
+	[IPC_NS_INDEX]		= "ipc",
+	[PID_NS_INDEX]		= "pid",
+	[USER_NS_INDEX]		= "user",
+	[MNT_NS_INDEX]		= "mnt",
+	[CGROUP_NS_INDEX]	= "cgroup",
+};
+
+const char *perf_ns__name(unsigned int id)
+{
+	if (id >= ARRAY_SIZE(perf_ns__names))
+		return "UNKNOWN";
+	return perf_ns__names[id];
+}
+
+struct namespaces *namespaces__new(struct perf_record_namespaces *event)
+{
+	struct namespaces *namespaces;
+	u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
+			      sizeof(struct perf_ns_link_info));
+
+	namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
+	if (!namespaces)
+		return NULL;
+
+	namespaces->end_time = -1;
+
+	if (event)
+		memcpy(namespaces->link_info, event->link_info, link_info_size);
+
+	return namespaces;
+}
+
+void namespaces__free(struct namespaces *namespaces)
+{
+	free(namespaces);
+}
+
+static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path)
+{
+	FILE *f = NULL;
+	char *statln = NULL;
+	size_t linesz = 0;
+	char *nspid;
+
+	f = fopen(path, "r");
+	if (f == NULL)
+		return -1;
+
+	while (getline(&statln, &linesz, f) != -1) {
+		/* Use tgid if CONFIG_PID_NS is not defined. */
+		if (strstr(statln, "Tgid:") != NULL) {
+			nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
+						     NULL, 10);
+			nsi->nstgid = nsinfo__tgid(nsi);
+		}
+
+		if (strstr(statln, "NStgid:") != NULL) {
+			nspid = strrchr(statln, '\t');
+			nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
+			/*
+			 * If innermost tgid is not the first, process is in a different
+			 * PID namespace.
+			 */
+			nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
+			break;
+		}
+	}
+
+	fclose(f);
+	free(statln);
+	return 0;
+}
+
+int nsinfo__init(struct nsinfo *nsi)
+{
+	char oldns[PATH_MAX];
+	char spath[PATH_MAX];
+	char *newns = NULL;
+	struct stat old_stat;
+	struct stat new_stat;
+	int rv = -1;
+
+	if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+		return rv;
+
+	if (asprintf(&newns, "/proc/%d/ns/mnt", nsinfo__pid(nsi)) == -1)
+		return rv;
+
+	if (stat(oldns, &old_stat) < 0)
+		goto out;
+
+	if (stat(newns, &new_stat) < 0)
+		goto out;
+
+	/* Check if the mount namespaces differ, if so then indicate that we
+	 * want to switch as part of looking up dso/map data.
+	 */
+	if (old_stat.st_ino != new_stat.st_ino) {
+		nsi->need_setns = true;
+		nsi->mntns_path = newns;
+		newns = NULL;
+	}
+
+	/* If we're dealing with a process that is in a different PID namespace,
+	 * attempt to work out the innermost tgid for the process.
+	 */
+	if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsinfo__pid(nsi)) >= PATH_MAX)
+		goto out;
+
+	rv = nsinfo__get_nspid(nsi, spath);
+
+out:
+	free(newns);
+	return rv;
+}
+
+struct nsinfo *nsinfo__new(pid_t pid)
+{
+	struct nsinfo *nsi;
+
+	if (pid == 0)
+		return NULL;
+
+	nsi = calloc(1, sizeof(*nsi));
+	if (nsi != NULL) {
+		nsi->pid = pid;
+		nsi->tgid = pid;
+		nsi->nstgid = pid;
+		nsi->need_setns = false;
+		nsi->in_pidns = false;
+		/* Init may fail if the process exits while we're trying to look
+		 * at its proc information.  In that case, save the pid but
+		 * don't try to enter the namespace.
+		 */
+		if (nsinfo__init(nsi) == -1)
+			nsi->need_setns = false;
+
+		refcount_set(&nsi->refcnt, 1);
+	}
+
+	return nsi;
+}
+
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi)
+{
+	struct nsinfo *nnsi;
+
+	if (nsi == NULL)
+		return NULL;
+
+	nnsi = calloc(1, sizeof(*nnsi));
+	if (nnsi != NULL) {
+		nnsi->pid = nsinfo__pid(nsi);
+		nnsi->tgid = nsinfo__tgid(nsi);
+		nnsi->nstgid = nsinfo__nstgid(nsi);
+		nnsi->need_setns = nsinfo__need_setns(nsi);
+		nnsi->in_pidns = nsinfo__in_pidns(nsi);
+		if (nsi->mntns_path) {
+			nnsi->mntns_path = strdup(nsi->mntns_path);
+			if (!nnsi->mntns_path) {
+				free(nnsi);
+				return NULL;
+			}
+		}
+		refcount_set(&nnsi->refcnt, 1);
+	}
+
+	return nnsi;
+}
+
+static void nsinfo__delete(struct nsinfo *nsi)
+{
+	zfree(&nsi->mntns_path);
+	free(nsi);
+}
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi)
+{
+	if (nsi)
+		refcount_inc(&nsi->refcnt);
+	return nsi;
+}
+
+void nsinfo__put(struct nsinfo *nsi)
+{
+	if (nsi && refcount_dec_and_test(&nsi->refcnt))
+		nsinfo__delete(nsi);
+}
+
+bool nsinfo__need_setns(const struct nsinfo *nsi)
+{
+        return nsi->need_setns;
+}
+
+void nsinfo__clear_need_setns(struct nsinfo *nsi)
+{
+        nsi->need_setns = false;
+}
+
+pid_t nsinfo__tgid(const struct nsinfo  *nsi)
+{
+        return nsi->tgid;
+}
+
+pid_t nsinfo__nstgid(const struct nsinfo  *nsi)
+{
+        return nsi->nstgid;
+}
+
+pid_t nsinfo__pid(const struct nsinfo  *nsi)
+{
+        return nsi->pid;
+}
+
+pid_t nsinfo__in_pidns(const struct nsinfo  *nsi)
+{
+        return nsi->in_pidns;
+}
+
+void nsinfo__mountns_enter(struct nsinfo *nsi,
+				  struct nscookie *nc)
+{
+	char curpath[PATH_MAX];
+	int oldns = -1;
+	int newns = -1;
+	char *oldcwd = NULL;
+
+	if (nc == NULL)
+		return;
+
+	nc->oldns = -1;
+	nc->newns = -1;
+
+	if (!nsi || !nsi->need_setns)
+		return;
+
+	if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+		return;
+
+	oldcwd = get_current_dir_name();
+	if (!oldcwd)
+		return;
+
+	oldns = open(curpath, O_RDONLY);
+	if (oldns < 0)
+		goto errout;
+
+	newns = open(nsi->mntns_path, O_RDONLY);
+	if (newns < 0)
+		goto errout;
+
+	if (setns(newns, CLONE_NEWNS) < 0)
+		goto errout;
+
+	nc->oldcwd = oldcwd;
+	nc->oldns = oldns;
+	nc->newns = newns;
+	return;
+
+errout:
+	free(oldcwd);
+	if (oldns > -1)
+		close(oldns);
+	if (newns > -1)
+		close(newns);
+}
+
+void nsinfo__mountns_exit(struct nscookie *nc)
+{
+	if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
+		return;
+
+	setns(nc->oldns, CLONE_NEWNS);
+
+	if (nc->oldcwd) {
+		WARN_ON_ONCE(chdir(nc->oldcwd));
+		zfree(&nc->oldcwd);
+	}
+
+	if (nc->oldns > -1) {
+		close(nc->oldns);
+		nc->oldns = -1;
+	}
+
+	if (nc->newns > -1) {
+		close(nc->newns);
+		nc->newns = -1;
+	}
+}
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi)
+{
+	char *rpath;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	rpath = realpath(path, NULL);
+	nsinfo__mountns_exit(&nsc);
+
+	return rpath;
+}
+
+int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi)
+{
+	int ret;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = stat(filename, st);
+	nsinfo__mountns_exit(&nsc);
+
+	return ret;
+}
+
+bool nsinfo__is_in_root_namespace(void)
+{
+	struct nsinfo nsi;
+
+	memset(&nsi, 0x0, sizeof(nsi));
+	nsinfo__get_nspid(&nsi, "/proc/self/status");
+	return !nsi.in_pidns;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
new file mode 100644
index 000000000..567829262
--- /dev/null
+++ b/tools/perf/util/namespaces.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#ifndef __PERF_NAMESPACES_H
+#define __PERF_NAMESPACES_H
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+
+#ifndef HAVE_SETNS_SUPPORT
+int setns(int fd, int nstype);
+#endif
+
+struct perf_record_namespaces;
+
+struct namespaces {
+	struct list_head list;
+	u64 end_time;
+	struct perf_ns_link_info link_info[];
+};
+
+struct namespaces *namespaces__new(struct perf_record_namespaces *event);
+void namespaces__free(struct namespaces *namespaces);
+
+struct nsinfo {
+	pid_t			pid;
+	pid_t			tgid;
+	pid_t			nstgid;
+	bool			need_setns;
+	bool			in_pidns;
+	char			*mntns_path;
+	refcount_t		refcnt;
+};
+
+struct nscookie {
+	int			oldns;
+	int			newns;
+	char			*oldcwd;
+};
+
+int nsinfo__init(struct nsinfo *nsi);
+struct nsinfo *nsinfo__new(pid_t pid);
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi);
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi);
+void nsinfo__put(struct nsinfo *nsi);
+
+bool nsinfo__need_setns(const struct nsinfo *nsi);
+void nsinfo__clear_need_setns(struct nsinfo *nsi);
+pid_t nsinfo__tgid(const struct nsinfo  *nsi);
+pid_t nsinfo__nstgid(const struct nsinfo  *nsi);
+pid_t nsinfo__pid(const struct nsinfo  *nsi);
+pid_t nsinfo__in_pidns(const struct nsinfo  *nsi);
+
+void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
+void nsinfo__mountns_exit(struct nscookie *nc);
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
+int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi);
+
+bool nsinfo__is_in_root_namespace(void);
+
+static inline void __nsinfo__zput(struct nsinfo **nsip)
+{
+	if (nsip) {
+		nsinfo__put(*nsip);
+		*nsip = NULL;
+	}
+}
+
+#define nsinfo__zput(nsi) __nsinfo__zput(&nsi)
+
+const char *perf_ns__name(unsigned int id);
+
+#endif  /* __PERF_NAMESPACES_H */
diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
new file mode 100644
index 000000000..2dd67c60f
--- /dev/null
+++ b/tools/perf/util/off_cpu.h
@@ -0,0 +1,38 @@
+#ifndef PERF_UTIL_OFF_CPU_H
+#define PERF_UTIL_OFF_CPU_H
+
+#include <linux/perf_event.h>
+
+struct evlist;
+struct target;
+struct perf_session;
+struct record_opts;
+
+#define OFFCPU_EVENT  "offcpu-time"
+
+#define OFFCPU_SAMPLE_TYPES  (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \
+			      PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
+			      PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \
+			      PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \
+			      PERF_SAMPLE_CGROUP)
+
+
+#ifdef HAVE_BPF_SKEL
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+		    struct record_opts *opts);
+int off_cpu_write(struct perf_session *session);
+#else
+static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
+				  struct target *target __maybe_unused,
+				  struct record_opts *opts __maybe_unused)
+{
+	return -1;
+}
+
+static inline int off_cpu_write(struct perf_session *session __maybe_unused)
+{
+	return -1;
+}
+#endif
+
+#endif  /* PERF_UTIL_OFF_CPU_H */
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
new file mode 100644
index 000000000..b887dfeea
--- /dev/null
+++ b/tools/perf/util/ordered-events.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include "ordered-events.h"
+#include "session.h"
+#include "asm/bug.h"
+#include "debug.h"
+#include "ui/progress.h"
+
+#define pr_N(n, fmt, ...) \
+	eprintf(n, debug_ordered_events, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+
+static void queue_event(struct ordered_events *oe, struct ordered_event *new)
+{
+	struct ordered_event *last = oe->last;
+	u64 timestamp = new->timestamp;
+	struct list_head *p;
+
+	++oe->nr_events;
+	oe->last = new;
+
+	pr_oe_time2(timestamp, "queue_event nr_events %u\n", oe->nr_events);
+
+	if (!last) {
+		list_add(&new->list, &oe->events);
+		oe->max_timestamp = timestamp;
+		return;
+	}
+
+	/*
+	 * last event might point to some random place in the list as it's
+	 * the last queued event. We expect that the new event is close to
+	 * this.
+	 */
+	if (last->timestamp <= timestamp) {
+		while (last->timestamp <= timestamp) {
+			p = last->list.next;
+			if (p == &oe->events) {
+				list_add_tail(&new->list, &oe->events);
+				oe->max_timestamp = timestamp;
+				return;
+			}
+			last = list_entry(p, struct ordered_event, list);
+		}
+		list_add_tail(&new->list, &last->list);
+	} else {
+		while (last->timestamp > timestamp) {
+			p = last->list.prev;
+			if (p == &oe->events) {
+				list_add(&new->list, &oe->events);
+				return;
+			}
+			last = list_entry(p, struct ordered_event, list);
+		}
+		list_add(&new->list, &last->list);
+	}
+}
+
+static union perf_event *__dup_event(struct ordered_events *oe,
+				     union perf_event *event)
+{
+	union perf_event *new_event = NULL;
+
+	if (oe->cur_alloc_size < oe->max_alloc_size) {
+		new_event = memdup(event, event->header.size);
+		if (new_event)
+			oe->cur_alloc_size += event->header.size;
+	}
+
+	return new_event;
+}
+
+static union perf_event *dup_event(struct ordered_events *oe,
+				   union perf_event *event)
+{
+	return oe->copy_on_queue ? __dup_event(oe, event) : event;
+}
+
+static void __free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+	if (event) {
+		oe->cur_alloc_size -= event->header.size;
+		free(event);
+	}
+}
+
+static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+	if (oe->copy_on_queue)
+		__free_dup_event(oe, event);
+}
+
+#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct ordered_event))
+static struct ordered_event *alloc_event(struct ordered_events *oe,
+					 union perf_event *event)
+{
+	struct list_head *cache = &oe->cache;
+	struct ordered_event *new = NULL;
+	union perf_event *new_event;
+	size_t size;
+
+	new_event = dup_event(oe, event);
+	if (!new_event)
+		return NULL;
+
+	/*
+	 * We maintain the following scheme of buffers for ordered
+	 * event allocation:
+	 *
+	 *   to_free list -> buffer1 (64K)
+	 *                   buffer2 (64K)
+	 *                   ...
+	 *
+	 * Each buffer keeps an array of ordered events objects:
+	 *    buffer -> event[0]
+	 *              event[1]
+	 *              ...
+	 *
+	 * Each allocated ordered event is linked to one of
+	 * following lists:
+	 *   - time ordered list 'events'
+	 *   - list of currently removed events 'cache'
+	 *
+	 * Allocation of the ordered event uses the following order
+	 * to get the memory:
+	 *   - use recently removed object from 'cache' list
+	 *   - use available object in current allocation buffer
+	 *   - allocate new buffer if the current buffer is full
+	 *
+	 * Removal of ordered event object moves it from events to
+	 * the cache list.
+	 */
+	size = sizeof(*oe->buffer) + MAX_SAMPLE_BUFFER * sizeof(*new);
+
+	if (!list_empty(cache)) {
+		new = list_entry(cache->next, struct ordered_event, list);
+		list_del_init(&new->list);
+	} else if (oe->buffer) {
+		new = &oe->buffer->event[oe->buffer_idx];
+		if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
+			oe->buffer = NULL;
+	} else if ((oe->cur_alloc_size + size) < oe->max_alloc_size) {
+		oe->buffer = malloc(size);
+		if (!oe->buffer) {
+			free_dup_event(oe, new_event);
+			return NULL;
+		}
+
+		pr("alloc size %" PRIu64 "B (+%zu), max %" PRIu64 "B\n",
+		   oe->cur_alloc_size, size, oe->max_alloc_size);
+
+		oe->cur_alloc_size += size;
+		list_add(&oe->buffer->list, &oe->to_free);
+
+		oe->buffer_idx = 1;
+		new = &oe->buffer->event[0];
+	} else {
+		pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
+		return NULL;
+	}
+
+	new->event = new_event;
+	return new;
+}
+
+static struct ordered_event *
+ordered_events__new_event(struct ordered_events *oe, u64 timestamp,
+		    union perf_event *event)
+{
+	struct ordered_event *new;
+
+	new = alloc_event(oe, event);
+	if (new) {
+		new->timestamp = timestamp;
+		queue_event(oe, new);
+	}
+
+	return new;
+}
+
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event)
+{
+	list_move(&event->list, &oe->cache);
+	oe->nr_events--;
+	free_dup_event(oe, event->event);
+	event->event = NULL;
+}
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+			  u64 timestamp, u64 file_offset, const char *file_path)
+{
+	struct ordered_event *oevent;
+
+	if (!timestamp || timestamp == ~0ULL)
+		return -ETIME;
+
+	if (timestamp < oe->last_flush) {
+		pr_oe_time(timestamp,      "out of order event\n");
+		pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n",
+			   oe->last_flush_type);
+
+		oe->nr_unordered_events++;
+	}
+
+	oevent = ordered_events__new_event(oe, timestamp, event);
+	if (!oevent) {
+		ordered_events__flush(oe, OE_FLUSH__HALF);
+		oevent = ordered_events__new_event(oe, timestamp, event);
+	}
+
+	if (!oevent)
+		return -ENOMEM;
+
+	oevent->file_offset = file_offset;
+	oevent->file_path = file_path;
+	return 0;
+}
+
+static int do_flush(struct ordered_events *oe, bool show_progress)
+{
+	struct list_head *head = &oe->events;
+	struct ordered_event *tmp, *iter;
+	u64 limit = oe->next_flush;
+	u64 last_ts = oe->last ? oe->last->timestamp : 0ULL;
+	struct ui_progress prog;
+	int ret;
+
+	if (!limit)
+		return 0;
+
+	if (show_progress)
+		ui_progress__init(&prog, oe->nr_events, "Processing time ordered events...");
+
+	list_for_each_entry_safe(iter, tmp, head, list) {
+		if (session_done())
+			return 0;
+
+		if (iter->timestamp > limit)
+			break;
+		ret = oe->deliver(oe, iter);
+		if (ret)
+			return ret;
+
+		ordered_events__delete(oe, iter);
+		oe->last_flush = iter->timestamp;
+
+		if (show_progress)
+			ui_progress__update(&prog, 1);
+	}
+
+	if (list_empty(head))
+		oe->last = NULL;
+	else if (last_ts <= limit)
+		oe->last = list_entry(head->prev, struct ordered_event, list);
+
+	if (show_progress)
+		ui_progress__finish();
+
+	return 0;
+}
+
+static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how,
+				   u64 timestamp)
+{
+	static const char * const str[] = {
+		"NONE",
+		"FINAL",
+		"ROUND",
+		"HALF ",
+		"TOP  ",
+		"TIME ",
+	};
+	int err;
+	bool show_progress = false;
+
+	if (oe->nr_events == 0)
+		return 0;
+
+	switch (how) {
+	case OE_FLUSH__FINAL:
+		show_progress = true;
+		__fallthrough;
+	case OE_FLUSH__TOP:
+		oe->next_flush = ULLONG_MAX;
+		break;
+
+	case OE_FLUSH__HALF:
+	{
+		struct ordered_event *first, *last;
+		struct list_head *head = &oe->events;
+
+		first = list_entry(head->next, struct ordered_event, list);
+		last = oe->last;
+
+		/* Warn if we are called before any event got allocated. */
+		if (WARN_ONCE(!last || list_empty(head), "empty queue"))
+			return 0;
+
+		oe->next_flush  = first->timestamp;
+		oe->next_flush += (last->timestamp - first->timestamp) / 2;
+		break;
+	}
+
+	case OE_FLUSH__TIME:
+		oe->next_flush = timestamp;
+		show_progress = false;
+		break;
+
+	case OE_FLUSH__ROUND:
+	case OE_FLUSH__NONE:
+	default:
+		break;
+	}
+
+	pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE  %s, nr_events %u\n",
+		   str[how], oe->nr_events);
+	pr_oe_time(oe->max_timestamp, "max_timestamp\n");
+
+	err = do_flush(oe, show_progress);
+
+	if (!err) {
+		if (how == OE_FLUSH__ROUND)
+			oe->next_flush = oe->max_timestamp;
+
+		oe->last_flush_type = how;
+	}
+
+	pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush POST %s, nr_events %u\n",
+		   str[how], oe->nr_events);
+	pr_oe_time(oe->last_flush, "last_flush\n");
+
+	return err;
+}
+
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
+{
+	return __ordered_events__flush(oe, how, 0);
+}
+
+int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp)
+{
+	return __ordered_events__flush(oe, OE_FLUSH__TIME, timestamp);
+}
+
+u64 ordered_events__first_time(struct ordered_events *oe)
+{
+	struct ordered_event *event;
+
+	if (list_empty(&oe->events))
+		return 0;
+
+	event = list_first_entry(&oe->events, struct ordered_event, list);
+	return event->timestamp;
+}
+
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver,
+			  void *data)
+{
+	INIT_LIST_HEAD(&oe->events);
+	INIT_LIST_HEAD(&oe->cache);
+	INIT_LIST_HEAD(&oe->to_free);
+	oe->max_alloc_size = (u64) -1;
+	oe->cur_alloc_size = 0;
+	oe->deliver	   = deliver;
+	oe->data	   = data;
+}
+
+static void
+ordered_events_buffer__free(struct ordered_events_buffer *buffer,
+			    unsigned int max, struct ordered_events *oe)
+{
+	if (oe->copy_on_queue) {
+		unsigned int i;
+
+		for (i = 0; i < max; i++)
+			__free_dup_event(oe, buffer->event[i].event);
+	}
+
+	free(buffer);
+}
+
+void ordered_events__free(struct ordered_events *oe)
+{
+	struct ordered_events_buffer *buffer, *tmp;
+
+	if (list_empty(&oe->to_free))
+		return;
+
+	/*
+	 * Current buffer might not have all the events allocated
+	 * yet, we need to free only allocated ones ...
+	 */
+	if (oe->buffer) {
+		list_del_init(&oe->buffer->list);
+		ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+	}
+
+	/* ... and continue with the rest */
+	list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
+		list_del_init(&buffer->list);
+		ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe);
+	}
+}
+
+void ordered_events__reinit(struct ordered_events *oe)
+{
+	ordered_events__deliver_t old_deliver = oe->deliver;
+
+	ordered_events__free(oe);
+	memset(oe, '\0', sizeof(*oe));
+	ordered_events__init(oe, old_deliver, oe->data);
+}
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
new file mode 100644
index 000000000..8febbd7c9
--- /dev/null
+++ b/tools/perf/util/ordered-events.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ORDERED_EVENTS_H
+#define __ORDERED_EVENTS_H
+
+#include <linux/types.h>
+
+struct perf_sample;
+
+struct ordered_event {
+	u64			timestamp;
+	u64			file_offset;
+	const char		*file_path;
+	union perf_event	*event;
+	struct list_head	list;
+};
+
+enum oe_flush {
+	OE_FLUSH__NONE,
+	OE_FLUSH__FINAL,
+	OE_FLUSH__ROUND,
+	OE_FLUSH__HALF,
+	OE_FLUSH__TOP,
+	OE_FLUSH__TIME,
+};
+
+struct ordered_events;
+
+typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
+					 struct ordered_event *event);
+
+struct ordered_events_buffer {
+	struct list_head	list;
+	struct ordered_event	event[];
+};
+
+struct ordered_events {
+	u64				 last_flush;
+	u64				 next_flush;
+	u64				 max_timestamp;
+	u64				 max_alloc_size;
+	u64				 cur_alloc_size;
+	struct list_head		 events;
+	struct list_head		 cache;
+	struct list_head		 to_free;
+	struct ordered_events_buffer	*buffer;
+	struct ordered_event		*last;
+	ordered_events__deliver_t	 deliver;
+	int				 buffer_idx;
+	unsigned int			 nr_events;
+	enum oe_flush			 last_flush_type;
+	u32				 nr_unordered_events;
+	bool				 copy_on_queue;
+	void				*data;
+};
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+			  u64 timestamp, u64 file_offset, const char *file_path);
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
+int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp);
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver,
+			  void *data);
+void ordered_events__free(struct ordered_events *oe);
+void ordered_events__reinit(struct ordered_events *oe);
+u64 ordered_events__first_time(struct ordered_events *oe);
+
+static inline
+void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
+{
+	oe->max_alloc_size = size;
+}
+
+static inline
+void ordered_events__set_copy_on_queue(struct ordered_events *oe, bool copy)
+{
+	oe->copy_on_queue = copy;
+}
+
+static inline u64 ordered_events__last_flush_time(struct ordered_events *oe)
+{
+	return oe->last_flush;
+}
+
+#endif /* __ORDERED_EVENTS_H */
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
new file mode 100644
index 000000000..31faf2bb4
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/debug.h"
+#include "util/event.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-branch-options.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define BRANCH_OPT(n, m) \
+	{ .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+	const char *name;
+	int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+	BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
+	BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
+	BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
+	BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
+	BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
+	BRANCH_END
+};
+
+int parse_branch_str(const char *str, __u64 *mode)
+{
+#define ONLY_PLM \
+	(PERF_SAMPLE_BRANCH_USER	|\
+	 PERF_SAMPLE_BRANCH_KERNEL	|\
+	 PERF_SAMPLE_BRANCH_HV)
+
+	int ret = 0;
+	char *p, *s;
+	char *os = NULL;
+	const struct branch_mode *br;
+
+	if (str == NULL) {
+		*mode = PERF_SAMPLE_BRANCH_ANY;
+		return 0;
+	}
+
+	/* because str is read-only */
+	s = os = strdup(str);
+	if (!s)
+		return -1;
+
+	for (;;) {
+		p = strchr(s, ',');
+		if (p)
+			*p = '\0';
+
+		for (br = branch_modes; br->name; br++) {
+			if (!strcasecmp(s, br->name))
+				break;
+		}
+		if (!br->name) {
+			ret = -1;
+			pr_warning("unknown branch filter %s,"
+				    " check man page\n", s);
+			goto error;
+		}
+
+		*mode |= br->mode;
+
+		if (!p)
+			break;
+
+		s = p + 1;
+	}
+
+	/* default to any branch */
+	if ((*mode & ~ONLY_PLM) == 0) {
+		*mode = PERF_SAMPLE_BRANCH_ANY;
+	}
+error:
+	free(os);
+	return ret;
+}
+
+int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+	__u64 *mode = (__u64 *)opt->value;
+
+	if (unset)
+		return 0;
+
+	/*
+	 * cannot set it twice, -b + --branch-filter for instance
+	 */
+	if (*mode) {
+		pr_err("Error: Can't use --branch-any (-b) with --branch-filter (-j).\n");
+		return -1;
+	}
+
+	return parse_branch_str(str, mode);
+}
diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h
new file mode 100644
index 000000000..11d172273
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
+#define _PERF_PARSE_BRANCH_OPTIONS_H 1
+#include <stdint.h>
+int parse_branch_stack(const struct option *opt, const char *str, int unset);
+int parse_branch_str(const char *str, __u64 *mode);
+#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
new file mode 100644
index 000000000..7c9f9150b
--- /dev/null
+++ b/tools/perf/util/parse-events-hybrid.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/param.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "parse-events-hybrid.h"
+#include "debug.h"
+#include "pmu.h"
+#include "pmu-hybrid.h"
+#include "perf.h"
+
+static void config_hybrid_attr(struct perf_event_attr *attr,
+			       int type, int pmu_type)
+{
+	/*
+	 * attr.config layout for type PERF_TYPE_HARDWARE and
+	 * PERF_TYPE_HW_CACHE
+	 *
+	 * PERF_TYPE_HARDWARE:                 0xEEEEEEEE000000AA
+	 *                                     AA: hardware event ID
+	 *                                     EEEEEEEE: PMU type ID
+	 * PERF_TYPE_HW_CACHE:                 0xEEEEEEEE00DDCCBB
+	 *                                     BB: hardware cache ID
+	 *                                     CC: hardware cache op ID
+	 *                                     DD: hardware cache op result ID
+	 *                                     EEEEEEEE: PMU type ID
+	 * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+	 */
+	attr->type = type;
+	attr->config = (attr->config & PERF_HW_EVENT_MASK) |
+			((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
+}
+
+static int create_event_hybrid(__u32 config_type, int *idx,
+			       struct list_head *list,
+			       struct perf_event_attr *attr, const char *name,
+			       const char *metric_id,
+			       struct list_head *config_terms,
+			       struct perf_pmu *pmu)
+{
+	struct evsel *evsel;
+	__u32 type = attr->type;
+	__u64 config = attr->config;
+
+	config_hybrid_attr(attr, config_type, pmu->type);
+
+	/*
+	 * Some hybrid hardware cache events are only available on one CPU
+	 * PMU. For example, the 'L1-dcache-load-misses' is only available
+	 * on cpu_core, while the 'L1-icache-loads' is only available on
+	 * cpu_atom. We need to remove "not supported" hybrid cache events.
+	 */
+	if (attr->type == PERF_TYPE_HW_CACHE
+	    && !is_event_supported(attr->type, attr->config))
+		return 0;
+
+	evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id,
+					       pmu, config_terms);
+	if (evsel) {
+		evsel->pmu_name = strdup(pmu->name);
+		if (!evsel->pmu_name)
+			return -ENOMEM;
+	} else
+		return -ENOMEM;
+	attr->type = type;
+	attr->config = config;
+	return 0;
+}
+
+static int pmu_cmp(struct parse_events_state *parse_state,
+		   struct perf_pmu *pmu)
+{
+	if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name)
+		return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name);
+
+	if (parse_state->hybrid_pmu_name)
+		return strcmp(parse_state->hybrid_pmu_name, pmu->name);
+
+	return 0;
+}
+
+static int add_hw_hybrid(struct parse_events_state *parse_state,
+			 struct list_head *list, struct perf_event_attr *attr,
+			 const char *name, const char *metric_id,
+			 struct list_head *config_terms)
+{
+	struct perf_pmu *pmu;
+	int ret;
+
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		LIST_HEAD(terms);
+
+		if (pmu_cmp(parse_state, pmu))
+			continue;
+
+		copy_config_terms(&terms, config_terms);
+		ret = create_event_hybrid(PERF_TYPE_HARDWARE,
+					  &parse_state->idx, list, attr, name,
+					  metric_id, &terms, pmu);
+		free_config_terms(&terms);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int create_raw_event_hybrid(int *idx, struct list_head *list,
+				   struct perf_event_attr *attr,
+				   const char *name,
+				   const char *metric_id,
+				   struct list_head *config_terms,
+				   struct perf_pmu *pmu)
+{
+	struct evsel *evsel;
+
+	attr->type = pmu->type;
+	evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id,
+					       pmu, config_terms);
+	if (evsel)
+		evsel->pmu_name = strdup(pmu->name);
+	else
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int add_raw_hybrid(struct parse_events_state *parse_state,
+			  struct list_head *list, struct perf_event_attr *attr,
+			  const char *name, const char *metric_id,
+			  struct list_head *config_terms)
+{
+	struct perf_pmu *pmu;
+	int ret;
+
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		LIST_HEAD(terms);
+
+		if (pmu_cmp(parse_state, pmu))
+			continue;
+
+		copy_config_terms(&terms, config_terms);
+		ret = create_raw_event_hybrid(&parse_state->idx, list, attr,
+					      name, metric_id, &terms, pmu);
+		free_config_terms(&terms);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state,
+				     struct list_head *list,
+				     struct perf_event_attr *attr,
+				     const char *name, const char *metric_id,
+				     struct list_head *config_terms,
+				     bool *hybrid)
+{
+	*hybrid = false;
+	if (attr->type == PERF_TYPE_SOFTWARE)
+		return 0;
+
+	if (!perf_pmu__has_hybrid())
+		return 0;
+
+	*hybrid = true;
+	if (attr->type != PERF_TYPE_RAW) {
+		return add_hw_hybrid(parse_state, list, attr, name, metric_id,
+				     config_terms);
+	}
+
+	return add_raw_hybrid(parse_state, list, attr, name, metric_id,
+			      config_terms);
+}
+
+int parse_events__add_cache_hybrid(struct list_head *list, int *idx,
+				   struct perf_event_attr *attr,
+				   const char *name,
+				   const char *metric_id,
+				   struct list_head *config_terms,
+				   bool *hybrid,
+				   struct parse_events_state *parse_state)
+{
+	struct perf_pmu *pmu;
+	int ret;
+
+	*hybrid = false;
+	if (!perf_pmu__has_hybrid())
+		return 0;
+
+	*hybrid = true;
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		LIST_HEAD(terms);
+
+		if (pmu_cmp(parse_state, pmu))
+			continue;
+
+		copy_config_terms(&terms, config_terms);
+		ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list,
+					  attr, name, metric_id, &terms, pmu);
+		free_config_terms(&terms);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h
new file mode 100644
index 000000000..cbc05fec0
--- /dev/null
+++ b/tools/perf/util/parse-events-hybrid.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_PARSE_EVENTS_HYBRID_H
+#define __PERF_PARSE_EVENTS_HYBRID_H
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <string.h>
+
+int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state,
+				     struct list_head *list,
+				     struct perf_event_attr *attr,
+				     const char *name, const char *metric_id,
+				     struct list_head *config_terms,
+				     bool *hybrid);
+
+int parse_events__add_cache_hybrid(struct list_head *list, int *idx,
+				   struct perf_event_attr *attr,
+				   const char *name, const char *metric_id,
+				   struct list_head *config_terms,
+				   bool *hybrid,
+				   struct parse_events_state *parse_state);
+
+#endif /* __PERF_PARSE_EVENTS_HYBRID_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
new file mode 100644
index 000000000..5973f46c2
--- /dev/null
+++ b/tools/perf/util/parse-events.c
@@ -0,0 +1,2839 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/hw_breakpoint.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/param.h>
+#include "term.h"
+#include "evlist.h"
+#include "evsel.h"
+#include <subcmd/parse-options.h>
+#include "parse-events.h"
+#include "string2.h"
+#include "strlist.h"
+#include "bpf-loader.h"
+#include "debug.h"
+#include <api/fs/tracing_path.h>
+#include <perf/cpumap.h>
+#include "parse-events-bison.h"
+#include "parse-events-flex.h"
+#include "pmu.h"
+#include "asm/bug.h"
+#include "util/parse-branch-options.h"
+#include "util/evsel_config.h"
+#include "util/event.h"
+#include "perf.h"
+#include "util/parse-events-hybrid.h"
+#include "util/pmu-hybrid.h"
+#include "tracepoint.h"
+#include "thread_map.h"
+
+#define MAX_NAME_LEN 100
+
+struct perf_pmu_event_symbol {
+	char	*symbol;
+	enum perf_pmu_event_symbol_type	type;
+};
+
+#ifdef PARSER_DEBUG
+extern int parse_events_debug;
+#endif
+int parse_events_parse(void *parse_state, void *scanner);
+static int get_config_terms(struct list_head *head_config,
+			    struct list_head *head_terms __maybe_unused);
+static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
+					 const char *str, char *pmu_name,
+					 struct list_head *list);
+
+static struct perf_pmu_event_symbol *perf_pmu_events_list;
+/*
+ * The variable indicates the number of supported pmu event symbols.
+ * 0 means not initialized and ready to init
+ * -1 means failed to init, don't try anymore
+ * >0 is the number of supported pmu event symbols
+ */
+static int perf_pmu_events_list_num;
+
+struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = {
+		.symbol = "cpu-cycles",
+		.alias  = "cycles",
+	},
+	[PERF_COUNT_HW_INSTRUCTIONS] = {
+		.symbol = "instructions",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_CACHE_REFERENCES] = {
+		.symbol = "cache-references",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_CACHE_MISSES] = {
+		.symbol = "cache-misses",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {
+		.symbol = "branch-instructions",
+		.alias  = "branches",
+	},
+	[PERF_COUNT_HW_BRANCH_MISSES] = {
+		.symbol = "branch-misses",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_BUS_CYCLES] = {
+		.symbol = "bus-cycles",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {
+		.symbol = "stalled-cycles-frontend",
+		.alias  = "idle-cycles-frontend",
+	},
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {
+		.symbol = "stalled-cycles-backend",
+		.alias  = "idle-cycles-backend",
+	},
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = {
+		.symbol = "ref-cycles",
+		.alias  = "",
+	},
+};
+
+struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
+	[PERF_COUNT_SW_CPU_CLOCK] = {
+		.symbol = "cpu-clock",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_TASK_CLOCK] = {
+		.symbol = "task-clock",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS] = {
+		.symbol = "page-faults",
+		.alias  = "faults",
+	},
+	[PERF_COUNT_SW_CONTEXT_SWITCHES] = {
+		.symbol = "context-switches",
+		.alias  = "cs",
+	},
+	[PERF_COUNT_SW_CPU_MIGRATIONS] = {
+		.symbol = "cpu-migrations",
+		.alias  = "migrations",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS_MIN] = {
+		.symbol = "minor-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS_MAJ] = {
+		.symbol = "major-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_ALIGNMENT_FAULTS] = {
+		.symbol = "alignment-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_EMULATION_FAULTS] = {
+		.symbol = "emulation-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_DUMMY] = {
+		.symbol = "dummy",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_BPF_OUTPUT] = {
+		.symbol = "bpf-output",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_CGROUP_SWITCHES] = {
+		.symbol = "cgroup-switches",
+		.alias  = "",
+	},
+};
+
+bool is_event_supported(u8 type, u64 config)
+{
+	bool ret = true;
+	int open_return;
+	struct evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = type,
+		.config = config,
+		.disabled = 1,
+	};
+	struct perf_thread_map *tmap = thread_map__new_by_tid(0);
+
+	if (tmap == NULL)
+		return false;
+
+	evsel = evsel__new(&attr);
+	if (evsel) {
+		open_return = evsel__open(evsel, NULL, tmap);
+		ret = open_return >= 0;
+
+		if (open_return == -EACCES) {
+			/*
+			 * This happens if the paranoid value
+			 * /proc/sys/kernel/perf_event_paranoid is set to 2
+			 * Re-run with exclude_kernel set; we don't do that
+			 * by default as some ARM machines do not support it.
+			 *
+			 */
+			evsel->core.attr.exclude_kernel = 1;
+			ret = evsel__open(evsel, NULL, tmap) >= 0;
+		}
+		evsel__delete(evsel);
+	}
+
+	perf_thread_map__put(tmap);
+	return ret;
+}
+
+const char *event_type(int type)
+{
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		return "hardware";
+
+	case PERF_TYPE_SOFTWARE:
+		return "software";
+
+	case PERF_TYPE_TRACEPOINT:
+		return "tracepoint";
+
+	case PERF_TYPE_HW_CACHE:
+		return "hardware-cache";
+
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+static char *get_config_str(struct list_head *head_terms, int type_term)
+{
+	struct parse_events_term *term;
+
+	if (!head_terms)
+		return NULL;
+
+	list_for_each_entry(term, head_terms, list)
+		if (term->type_term == type_term)
+			return term->val.str;
+
+	return NULL;
+}
+
+static char *get_config_metric_id(struct list_head *head_terms)
+{
+	return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID);
+}
+
+static char *get_config_name(struct list_head *head_terms)
+{
+	return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
+}
+
+static struct evsel *
+__add_event(struct list_head *list, int *idx,
+	    struct perf_event_attr *attr,
+	    bool init_attr,
+	    const char *name, const char *metric_id, struct perf_pmu *pmu,
+	    struct list_head *config_terms, bool auto_merge_stats,
+	    const char *cpu_list)
+{
+	struct evsel *evsel;
+	struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
+			       cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+
+	if (pmu)
+		perf_pmu__warn_invalid_formats(pmu);
+
+	if (pmu && attr->type == PERF_TYPE_RAW)
+		perf_pmu__warn_invalid_config(pmu, attr->config, name);
+
+	if (init_attr)
+		event_attr_init(attr);
+
+	evsel = evsel__new_idx(attr, *idx);
+	if (!evsel) {
+		perf_cpu_map__put(cpus);
+		return NULL;
+	}
+
+	(*idx)++;
+	evsel->core.cpus = cpus;
+	evsel->core.own_cpus = perf_cpu_map__get(cpus);
+	evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
+	evsel->auto_merge_stats = auto_merge_stats;
+
+	if (name)
+		evsel->name = strdup(name);
+
+	if (metric_id)
+		evsel->metric_id = strdup(metric_id);
+
+	if (config_terms)
+		list_splice_init(config_terms, &evsel->config_terms);
+
+	if (list)
+		list_add_tail(&evsel->core.node, list);
+
+	return evsel;
+}
+
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+				      const char *name, const char *metric_id,
+				      struct perf_pmu *pmu)
+{
+	return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name,
+			   metric_id, pmu, /*config_terms=*/NULL,
+			   /*auto_merge_stats=*/false, /*cpu_list=*/NULL);
+}
+
+static int add_event(struct list_head *list, int *idx,
+		     struct perf_event_attr *attr, const char *name,
+		     const char *metric_id, struct list_head *config_terms)
+{
+	return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id,
+			   /*pmu=*/NULL, config_terms,
+			   /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM;
+}
+
+static int add_event_tool(struct list_head *list, int *idx,
+			  enum perf_tool_event tool_event)
+{
+	struct evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_DUMMY,
+	};
+
+	evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL,
+			    /*metric_id=*/NULL, /*pmu=*/NULL,
+			    /*config_terms=*/NULL, /*auto_merge_stats=*/false,
+			    /*cpu_list=*/"0");
+	if (!evsel)
+		return -ENOMEM;
+	evsel->tool_event = tool_event;
+	if (tool_event == PERF_TOOL_DURATION_TIME
+	    || tool_event == PERF_TOOL_USER_TIME
+	    || tool_event == PERF_TOOL_SYSTEM_TIME) {
+		free((char *)evsel->unit);
+		evsel->unit = strdup("ns");
+	}
+	return 0;
+}
+
+static int parse_aliases(char *str, const char *const names[][EVSEL__MAX_ALIASES], int size)
+{
+	int i, j;
+	int n, longest = -1;
+
+	for (i = 0; i < size; i++) {
+		for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) {
+			n = strlen(names[i][j]);
+			if (n > longest && !strncasecmp(str, names[i][j], n))
+				longest = n;
+		}
+		if (longest > 0)
+			return i;
+	}
+
+	return -1;
+}
+
+typedef int config_term_func_t(struct perf_event_attr *attr,
+			       struct parse_events_term *term,
+			       struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+			      struct parse_events_term *term,
+			      struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+		       struct list_head *head,
+		       struct parse_events_error *err,
+		       config_term_func_t config_term);
+
+int parse_events_add_cache(struct list_head *list, int *idx,
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *err,
+			   struct list_head *head_config,
+			   struct parse_events_state *parse_state)
+{
+	struct perf_event_attr attr;
+	LIST_HEAD(config_terms);
+	char name[MAX_NAME_LEN];
+	const char *config_name, *metric_id;
+	int cache_type = -1, cache_op = -1, cache_result = -1;
+	char *op_result[2] = { op_result1, op_result2 };
+	int i, n, ret;
+	bool hybrid;
+
+	/*
+	 * No fallback - if we cannot get a clear cache type
+	 * then bail out:
+	 */
+	cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX);
+	if (cache_type == -1)
+		return -EINVAL;
+
+	config_name = get_config_name(head_config);
+	n = snprintf(name, MAX_NAME_LEN, "%s", type);
+
+	for (i = 0; (i < 2) && (op_result[i]); i++) {
+		char *str = op_result[i];
+
+		n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
+
+		if (cache_op == -1) {
+			cache_op = parse_aliases(str, evsel__hw_cache_op,
+						 PERF_COUNT_HW_CACHE_OP_MAX);
+			if (cache_op >= 0) {
+				if (!evsel__is_cache_op_valid(cache_type, cache_op))
+					return -EINVAL;
+				continue;
+			}
+		}
+
+		if (cache_result == -1) {
+			cache_result = parse_aliases(str, evsel__hw_cache_result,
+						     PERF_COUNT_HW_CACHE_RESULT_MAX);
+			if (cache_result >= 0)
+				continue;
+		}
+	}
+
+	/*
+	 * Fall back to reads:
+	 */
+	if (cache_op == -1)
+		cache_op = PERF_COUNT_HW_CACHE_OP_READ;
+
+	/*
+	 * Fall back to accesses:
+	 */
+	if (cache_result == -1)
+		cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
+	attr.type = PERF_TYPE_HW_CACHE;
+
+	if (head_config) {
+		if (config_attr(&attr, head_config, err,
+				config_term_common))
+			return -EINVAL;
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+	}
+
+	metric_id = get_config_metric_id(head_config);
+	ret = parse_events__add_cache_hybrid(list, idx, &attr,
+					     config_name ? : name,
+					     metric_id,
+					     &config_terms,
+					     &hybrid, parse_state);
+	if (hybrid)
+		goto out_free_terms;
+
+	ret = add_event(list, idx, &attr, config_name ? : name, metric_id,
+			&config_terms);
+out_free_terms:
+	free_config_terms(&config_terms);
+	return ret;
+}
+
+static void tracepoint_error(struct parse_events_error *e, int err,
+			     const char *sys, const char *name)
+{
+	const char *str;
+	char help[BUFSIZ];
+
+	if (!e)
+		return;
+
+	/*
+	 * We get error directly from syscall errno ( > 0),
+	 * or from encoded pointer's error ( < 0).
+	 */
+	err = abs(err);
+
+	switch (err) {
+	case EACCES:
+		str = "can't access trace events";
+		break;
+	case ENOENT:
+		str = "unknown tracepoint";
+		break;
+	default:
+		str = "failed to add tracepoint";
+		break;
+	}
+
+	tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name);
+	parse_events_error__handle(e, 0, strdup(str), strdup(help));
+}
+
+static int add_tracepoint(struct list_head *list, int *idx,
+			  const char *sys_name, const char *evt_name,
+			  struct parse_events_error *err,
+			  struct list_head *head_config)
+{
+	struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++);
+
+	if (IS_ERR(evsel)) {
+		tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
+		return PTR_ERR(evsel);
+	}
+
+	if (head_config) {
+		LIST_HEAD(config_terms);
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+		list_splice(&config_terms, &evsel->config_terms);
+	}
+
+	list_add_tail(&evsel->core.node, list);
+	return 0;
+}
+
+static int add_tracepoint_multi_event(struct list_head *list, int *idx,
+				      const char *sys_name, const char *evt_name,
+				      struct parse_events_error *err,
+				      struct list_head *head_config)
+{
+	char *evt_path;
+	struct dirent *evt_ent;
+	DIR *evt_dir;
+	int ret = 0, found = 0;
+
+	evt_path = get_events_file(sys_name);
+	if (!evt_path) {
+		tracepoint_error(err, errno, sys_name, evt_name);
+		return -1;
+	}
+	evt_dir = opendir(evt_path);
+	if (!evt_dir) {
+		put_events_file(evt_path);
+		tracepoint_error(err, errno, sys_name, evt_name);
+		return -1;
+	}
+
+	while (!ret && (evt_ent = readdir(evt_dir))) {
+		if (!strcmp(evt_ent->d_name, ".")
+		    || !strcmp(evt_ent->d_name, "..")
+		    || !strcmp(evt_ent->d_name, "enable")
+		    || !strcmp(evt_ent->d_name, "filter"))
+			continue;
+
+		if (!strglobmatch(evt_ent->d_name, evt_name))
+			continue;
+
+		found++;
+
+		ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
+				     err, head_config);
+	}
+
+	if (!found) {
+		tracepoint_error(err, ENOENT, sys_name, evt_name);
+		ret = -1;
+	}
+
+	put_events_file(evt_path);
+	closedir(evt_dir);
+	return ret;
+}
+
+static int add_tracepoint_event(struct list_head *list, int *idx,
+				const char *sys_name, const char *evt_name,
+				struct parse_events_error *err,
+				struct list_head *head_config)
+{
+	return strpbrk(evt_name, "*?") ?
+	       add_tracepoint_multi_event(list, idx, sys_name, evt_name,
+					  err, head_config) :
+	       add_tracepoint(list, idx, sys_name, evt_name,
+			      err, head_config);
+}
+
+static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
+				    const char *sys_name, const char *evt_name,
+				    struct parse_events_error *err,
+				    struct list_head *head_config)
+{
+	struct dirent *events_ent;
+	DIR *events_dir;
+	int ret = 0;
+
+	events_dir = tracing_events__opendir();
+	if (!events_dir) {
+		tracepoint_error(err, errno, sys_name, evt_name);
+		return -1;
+	}
+
+	while (!ret && (events_ent = readdir(events_dir))) {
+		if (!strcmp(events_ent->d_name, ".")
+		    || !strcmp(events_ent->d_name, "..")
+		    || !strcmp(events_ent->d_name, "enable")
+		    || !strcmp(events_ent->d_name, "header_event")
+		    || !strcmp(events_ent->d_name, "header_page"))
+			continue;
+
+		if (!strglobmatch(events_ent->d_name, sys_name))
+			continue;
+
+		ret = add_tracepoint_event(list, idx, events_ent->d_name,
+					   evt_name, err, head_config);
+	}
+
+	closedir(events_dir);
+	return ret;
+}
+
+#ifdef HAVE_LIBBPF_SUPPORT
+struct __add_bpf_event_param {
+	struct parse_events_state *parse_state;
+	struct list_head *list;
+	struct list_head *head_config;
+};
+
+static int add_bpf_event(const char *group, const char *event, int fd, struct bpf_object *obj,
+			 void *_param)
+{
+	LIST_HEAD(new_evsels);
+	struct __add_bpf_event_param *param = _param;
+	struct parse_events_state *parse_state = param->parse_state;
+	struct list_head *list = param->list;
+	struct evsel *pos;
+	int err;
+	/*
+	 * Check if we should add the event, i.e. if it is a TP but starts with a '!',
+	 * then don't add the tracepoint, this will be used for something else, like
+	 * adding to a BPF_MAP_TYPE_PROG_ARRAY.
+	 *
+	 * See tools/perf/examples/bpf/augmented_raw_syscalls.c
+	 */
+	if (group[0] == '!')
+		return 0;
+
+	pr_debug("add bpf event %s:%s and attach bpf program %d\n",
+		 group, event, fd);
+
+	err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group,
+					  event, parse_state->error,
+					  param->head_config);
+	if (err) {
+		struct evsel *evsel, *tmp;
+
+		pr_debug("Failed to add BPF event %s:%s\n",
+			 group, event);
+		list_for_each_entry_safe(evsel, tmp, &new_evsels, core.node) {
+			list_del_init(&evsel->core.node);
+			evsel__delete(evsel);
+		}
+		return err;
+	}
+	pr_debug("adding %s:%s\n", group, event);
+
+	list_for_each_entry(pos, &new_evsels, core.node) {
+		pr_debug("adding %s:%s to %p\n",
+			 group, event, pos);
+		pos->bpf_fd = fd;
+		pos->bpf_obj = obj;
+	}
+	list_splice(&new_evsels, list);
+	return 0;
+}
+
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+			      struct list_head *list,
+			      struct bpf_object *obj,
+			      struct list_head *head_config)
+{
+	int err;
+	char errbuf[BUFSIZ];
+	struct __add_bpf_event_param param = {parse_state, list, head_config};
+	static bool registered_unprobe_atexit = false;
+
+	if (IS_ERR(obj) || !obj) {
+		snprintf(errbuf, sizeof(errbuf),
+			 "Internal error: load bpf obj with NULL");
+		err = -EINVAL;
+		goto errout;
+	}
+
+	/*
+	 * Register atexit handler before calling bpf__probe() so
+	 * bpf__probe() don't need to unprobe probe points its already
+	 * created when failure.
+	 */
+	if (!registered_unprobe_atexit) {
+		atexit(bpf__clear);
+		registered_unprobe_atexit = true;
+	}
+
+	err = bpf__probe(obj);
+	if (err) {
+		bpf__strerror_probe(obj, err, errbuf, sizeof(errbuf));
+		goto errout;
+	}
+
+	err = bpf__load(obj);
+	if (err) {
+		bpf__strerror_load(obj, err, errbuf, sizeof(errbuf));
+		goto errout;
+	}
+
+	err = bpf__foreach_event(obj, add_bpf_event, &param);
+	if (err) {
+		snprintf(errbuf, sizeof(errbuf),
+			 "Attach events in BPF object failed");
+		goto errout;
+	}
+
+	return 0;
+errout:
+	parse_events_error__handle(parse_state->error, 0,
+				strdup(errbuf), strdup("(add -v to see detail)"));
+	return err;
+}
+
+static int
+parse_events_config_bpf(struct parse_events_state *parse_state,
+			struct bpf_object *obj,
+			struct list_head *head_config)
+{
+	struct parse_events_term *term;
+	int error_pos;
+
+	if (!head_config || list_empty(head_config))
+		return 0;
+
+	list_for_each_entry(term, head_config, list) {
+		int err;
+
+		if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+			parse_events_error__handle(parse_state->error, term->err_term,
+						strdup("Invalid config term for BPF object"),
+						NULL);
+			return -EINVAL;
+		}
+
+		err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
+		if (err) {
+			char errbuf[BUFSIZ];
+			int idx;
+
+			bpf__strerror_config_obj(obj, term, parse_state->evlist,
+						 &error_pos, err, errbuf,
+						 sizeof(errbuf));
+
+			if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+				idx = term->err_val;
+			else
+				idx = term->err_term + error_pos;
+
+			parse_events_error__handle(parse_state->error, idx,
+						strdup(errbuf),
+						strdup(
+"Hint:\tValid config terms:\n"
+"     \tmap:[<arraymap>].value<indices>=[value]\n"
+"     \tmap:[<eventmap>].event<indices>=[event]\n"
+"\n"
+"     \twhere <indices> is something like [0,3...5] or [all]\n"
+"     \t(add -v to see detail)"));
+			return err;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Split config terms:
+ * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
+ *  'call-graph=fp' is 'evt config', should be applied to each
+ *  events in bpf.c.
+ * 'map:array.value[0]=1' is 'obj config', should be processed
+ * with parse_events_config_bpf.
+ *
+ * Move object config terms from the first list to obj_head_config.
+ */
+static void
+split_bpf_config_terms(struct list_head *evt_head_config,
+		       struct list_head *obj_head_config)
+{
+	struct parse_events_term *term, *temp;
+
+	/*
+	 * Currently, all possible user config term
+	 * belong to bpf object. parse_events__is_hardcoded_term()
+	 * happens to be a good flag.
+	 *
+	 * See parse_events_config_bpf() and
+	 * config_term_tracepoint().
+	 */
+	list_for_each_entry_safe(term, temp, evt_head_config, list)
+		if (!parse_events__is_hardcoded_term(term))
+			list_move_tail(&term->list, obj_head_config);
+}
+
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  char *bpf_file_name,
+			  bool source,
+			  struct list_head *head_config)
+{
+	int err;
+	struct bpf_object *obj;
+	LIST_HEAD(obj_head_config);
+
+	if (head_config)
+		split_bpf_config_terms(head_config, &obj_head_config);
+
+	obj = bpf__prepare_load(bpf_file_name, source);
+	if (IS_ERR(obj)) {
+		char errbuf[BUFSIZ];
+
+		err = PTR_ERR(obj);
+
+		if (err == -ENOTSUP)
+			snprintf(errbuf, sizeof(errbuf),
+				 "BPF support is not compiled");
+		else
+			bpf__strerror_prepare_load(bpf_file_name,
+						   source,
+						   -err, errbuf,
+						   sizeof(errbuf));
+
+		parse_events_error__handle(parse_state->error, 0,
+					strdup(errbuf), strdup("(add -v to see detail)"));
+		return err;
+	}
+
+	err = parse_events_load_bpf_obj(parse_state, list, obj, head_config);
+	if (err)
+		return err;
+	err = parse_events_config_bpf(parse_state, obj, &obj_head_config);
+
+	/*
+	 * Caller doesn't know anything about obj_head_config,
+	 * so combine them together again before returning.
+	 */
+	if (head_config)
+		list_splice_tail(&obj_head_config, head_config);
+	return err;
+}
+#else // HAVE_LIBBPF_SUPPORT
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+			      struct list_head *list __maybe_unused,
+			      struct bpf_object *obj __maybe_unused,
+			      struct list_head *head_config __maybe_unused)
+{
+	parse_events_error__handle(parse_state->error, 0,
+				   strdup("BPF support is not compiled"),
+				   strdup("Make sure libbpf-devel is available at build time."));
+	return -ENOTSUP;
+}
+
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+			  struct list_head *list __maybe_unused,
+			  char *bpf_file_name __maybe_unused,
+			  bool source __maybe_unused,
+			  struct list_head *head_config __maybe_unused)
+{
+	parse_events_error__handle(parse_state->error, 0,
+				   strdup("BPF support is not compiled"),
+				   strdup("Make sure libbpf-devel is available at build time."));
+	return -ENOTSUP;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+static int
+parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
+{
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (!type || !type[i])
+			break;
+
+#define CHECK_SET_TYPE(bit)		\
+do {					\
+	if (attr->bp_type & bit)	\
+		return -EINVAL;		\
+	else				\
+		attr->bp_type |= bit;	\
+} while (0)
+
+		switch (type[i]) {
+		case 'r':
+			CHECK_SET_TYPE(HW_BREAKPOINT_R);
+			break;
+		case 'w':
+			CHECK_SET_TYPE(HW_BREAKPOINT_W);
+			break;
+		case 'x':
+			CHECK_SET_TYPE(HW_BREAKPOINT_X);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+#undef CHECK_SET_TYPE
+
+	if (!attr->bp_type) /* Default */
+		attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+
+	return 0;
+}
+
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
+				u64 addr, char *type, u64 len)
+{
+	struct perf_event_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.bp_addr = addr;
+
+	if (parse_breakpoint_type(type, &attr))
+		return -EINVAL;
+
+	/* Provide some defaults if len is not specified */
+	if (!len) {
+		if (attr.bp_type == HW_BREAKPOINT_X)
+			len = sizeof(long);
+		else
+			len = HW_BREAKPOINT_LEN_4;
+	}
+
+	attr.bp_len = len;
+
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.sample_period = 1;
+
+	return add_event(list, idx, &attr, /*name=*/NULL, /*mertic_id=*/NULL,
+			 /*config_terms=*/NULL);
+}
+
+static int check_type_val(struct parse_events_term *term,
+			  struct parse_events_error *err,
+			  int type)
+{
+	if (type == term->type_val)
+		return 0;
+
+	if (err) {
+		parse_events_error__handle(err, term->err_val,
+					type == PARSE_EVENTS__TERM_TYPE_NUM
+					? strdup("expected numeric value")
+					: strdup("expected string value"),
+					NULL);
+	}
+	return -EINVAL;
+}
+
+/*
+ * Update according to parse-events.l
+ */
+static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+	[PARSE_EVENTS__TERM_TYPE_USER]			= "<sysfs term>",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG]		= "config",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG1]		= "config1",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG2]		= "config2",
+	[PARSE_EVENTS__TERM_TYPE_NAME]			= "name",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]		= "period",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]		= "freq",
+	[PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]	= "branch_type",
+	[PARSE_EVENTS__TERM_TYPE_TIME]			= "time",
+	[PARSE_EVENTS__TERM_TYPE_CALLGRAPH]		= "call-graph",
+	[PARSE_EVENTS__TERM_TYPE_STACKSIZE]		= "stack-size",
+	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
+	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
+	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
+	[PARSE_EVENTS__TERM_TYPE_MAX_EVENTS]		= "nr",
+	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
+	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
+	[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
+	[PARSE_EVENTS__TERM_TYPE_PERCORE]		= "percore",
+	[PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT]		= "aux-output",
+	[PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE]	= "aux-sample-size",
+	[PARSE_EVENTS__TERM_TYPE_METRIC_ID]		= "metric-id",
+};
+
+static bool config_term_shrinked;
+
+static bool
+config_term_avail(int term_type, struct parse_events_error *err)
+{
+	char *err_str;
+
+	if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+		parse_events_error__handle(err, -1,
+					strdup("Invalid term_type"), NULL);
+		return false;
+	}
+	if (!config_term_shrinked)
+		return true;
+
+	switch (term_type) {
+	case PARSE_EVENTS__TERM_TYPE_CONFIG:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+	case PARSE_EVENTS__TERM_TYPE_NAME:
+	case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+	case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+	case PARSE_EVENTS__TERM_TYPE_PERCORE:
+		return true;
+	default:
+		if (!err)
+			return false;
+
+		/* term_type is validated so indexing is safe */
+		if (asprintf(&err_str, "'%s' is not usable in 'perf stat'",
+				config_term_names[term_type]) >= 0)
+			parse_events_error__handle(err, -1, err_str, NULL);
+		return false;
+	}
+}
+
+void parse_events__shrink_config_terms(void)
+{
+	config_term_shrinked = true;
+}
+
+static int config_term_common(struct perf_event_attr *attr,
+			      struct parse_events_term *term,
+			      struct parse_events_error *err)
+{
+#define CHECK_TYPE_VAL(type)						   \
+do {									   \
+	if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+		return -EINVAL;						   \
+} while (0)
+
+	switch (term->type_term) {
+	case PARSE_EVENTS__TERM_TYPE_CONFIG:
+		CHECK_TYPE_VAL(NUM);
+		attr->config = term->val.num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+		CHECK_TYPE_VAL(NUM);
+		attr->config1 = term->val.num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+		CHECK_TYPE_VAL(NUM);
+		attr->config2 = term->val.num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+		CHECK_TYPE_VAL(STR);
+		if (strcmp(term->val.str, "no") &&
+		    parse_branch_str(term->val.str,
+				    &attr->branch_sample_type)) {
+			parse_events_error__handle(err, term->err_val,
+					strdup("invalid branch sample type"),
+					NULL);
+			return -EINVAL;
+		}
+		break;
+	case PARSE_EVENTS__TERM_TYPE_TIME:
+		CHECK_TYPE_VAL(NUM);
+		if (term->val.num > 1) {
+			parse_events_error__handle(err, term->err_val,
+						strdup("expected 0 or 1"),
+						NULL);
+			return -EINVAL;
+		}
+		break;
+	case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+		CHECK_TYPE_VAL(STR);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_INHERIT:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_NAME:
+		CHECK_TYPE_VAL(STR);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+		CHECK_TYPE_VAL(STR);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_PERCORE:
+		CHECK_TYPE_VAL(NUM);
+		if ((unsigned int)term->val.num > 1) {
+			parse_events_error__handle(err, term->err_val,
+						strdup("expected 0 or 1"),
+						NULL);
+			return -EINVAL;
+		}
+		break;
+	case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+		CHECK_TYPE_VAL(NUM);
+		if (term->val.num > UINT_MAX) {
+			parse_events_error__handle(err, term->err_val,
+						strdup("too big"),
+						NULL);
+			return -EINVAL;
+		}
+		break;
+	default:
+		parse_events_error__handle(err, term->err_term,
+				strdup("unknown term"),
+				parse_events_formats_error_string(NULL));
+		return -EINVAL;
+	}
+
+	/*
+	 * Check term availability after basic checking so
+	 * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+	 *
+	 * If check availability at the entry of this function,
+	 * user will see "'<sysfs term>' is not usable in 'perf stat'"
+	 * if an invalid config term is provided for legacy events
+	 * (for example, instructions/badterm/...), which is confusing.
+	 */
+	if (!config_term_avail(term->type_term, err))
+		return -EINVAL;
+	return 0;
+#undef CHECK_TYPE_VAL
+}
+
+static int config_term_pmu(struct perf_event_attr *attr,
+			   struct parse_events_term *term,
+			   struct parse_events_error *err)
+{
+	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
+	    term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG)
+		/*
+		 * Always succeed for sysfs terms, as we dont know
+		 * at this point what type they need to have.
+		 */
+		return 0;
+	else
+		return config_term_common(attr, term, err);
+}
+
+static int config_term_tracepoint(struct perf_event_attr *attr,
+				  struct parse_events_term *term,
+				  struct parse_events_error *err)
+{
+	switch (term->type_term) {
+	case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+	case PARSE_EVENTS__TERM_TYPE_INHERIT:
+	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+	case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+	case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+		return config_term_common(attr, term, err);
+	default:
+		if (err) {
+			parse_events_error__handle(err, term->err_term,
+				strdup("unknown term"),
+				strdup("valid terms: call-graph,stack-size\n"));
+		}
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int config_attr(struct perf_event_attr *attr,
+		       struct list_head *head,
+		       struct parse_events_error *err,
+		       config_term_func_t config_term)
+{
+	struct parse_events_term *term;
+
+	list_for_each_entry(term, head, list)
+		if (config_term(attr, term, err))
+			return -EINVAL;
+
+	return 0;
+}
+
+static int get_config_terms(struct list_head *head_config,
+			    struct list_head *head_terms __maybe_unused)
+{
+#define ADD_CONFIG_TERM(__type, __weak)				\
+	struct evsel_config_term *__t;			\
+								\
+	__t = zalloc(sizeof(*__t));				\
+	if (!__t)						\
+		return -ENOMEM;					\
+								\
+	INIT_LIST_HEAD(&__t->list);				\
+	__t->type       = EVSEL__CONFIG_TERM_ ## __type;	\
+	__t->weak	= __weak;				\
+	list_add_tail(&__t->list, head_terms)
+
+#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak)	\
+do {								\
+	ADD_CONFIG_TERM(__type, __weak);			\
+	__t->val.__name = __val;				\
+} while (0)
+
+#define ADD_CONFIG_TERM_STR(__type, __val, __weak)		\
+do {								\
+	ADD_CONFIG_TERM(__type, __weak);			\
+	__t->val.str = strdup(__val);				\
+	if (!__t->val.str) {					\
+		zfree(&__t);					\
+		return -ENOMEM;					\
+	}							\
+	__t->free_str = true;					\
+} while (0)
+
+	struct parse_events_term *term;
+
+	list_for_each_entry(term, head_config, list) {
+		switch (term->type_term) {
+		case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+			ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+			ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_TIME:
+			ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+			ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+			ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+			ADD_CONFIG_TERM_VAL(STACK_USER, stack_user,
+					    term->val.num, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_INHERIT:
+			ADD_CONFIG_TERM_VAL(INHERIT, inherit,
+					    term->val.num ? 1 : 0, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+			ADD_CONFIG_TERM_VAL(INHERIT, inherit,
+					    term->val.num ? 0 : 1, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+			ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack,
+					    term->val.num, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+			ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events,
+					    term->val.num, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+			ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
+					    term->val.num ? 1 : 0, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+			ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
+					    term->val.num ? 0 : 1, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+			ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_PERCORE:
+			ADD_CONFIG_TERM_VAL(PERCORE, percore,
+					    term->val.num ? true : false, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+			ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
+					    term->val.num ? 1 : 0, term->weak);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+			ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
+					    term->val.num, term->weak);
+			break;
+		default:
+			break;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
+ * each bit of attr->config that the user has changed.
+ */
+static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
+			   struct list_head *head_terms)
+{
+	struct parse_events_term *term;
+	u64 bits = 0;
+	int type;
+
+	list_for_each_entry(term, head_config, list) {
+		switch (term->type_term) {
+		case PARSE_EVENTS__TERM_TYPE_USER:
+			type = perf_pmu__format_type(&pmu->format, term->config);
+			if (type != PERF_PMU_FORMAT_VALUE_CONFIG)
+				continue;
+			bits |= perf_pmu__format_bits(&pmu->format, term->config);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_CONFIG:
+			bits = ~(u64)0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (bits)
+		ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false);
+
+#undef ADD_CONFIG_TERM
+	return 0;
+}
+
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+				const char *sys, const char *event,
+				struct parse_events_error *err,
+				struct list_head *head_config)
+{
+	if (head_config) {
+		struct perf_event_attr attr;
+
+		if (config_attr(&attr, head_config, err,
+				config_term_tracepoint))
+			return -EINVAL;
+	}
+
+	if (strpbrk(sys, "*?"))
+		return add_tracepoint_multi_sys(list, idx, sys, event,
+						err, head_config);
+	else
+		return add_tracepoint_event(list, idx, sys, event,
+					    err, head_config);
+}
+
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+			     struct list_head *list,
+			     u32 type, u64 config,
+			     struct list_head *head_config)
+{
+	struct perf_event_attr attr;
+	LIST_HEAD(config_terms);
+	const char *name, *metric_id;
+	bool hybrid;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = type;
+	attr.config = config;
+
+	if (head_config) {
+		if (config_attr(&attr, head_config, parse_state->error,
+				config_term_common))
+			return -EINVAL;
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+	}
+
+	name = get_config_name(head_config);
+	metric_id = get_config_metric_id(head_config);
+	ret = parse_events__add_numeric_hybrid(parse_state, list, &attr,
+					       name, metric_id,
+					       &config_terms, &hybrid);
+	if (hybrid)
+		goto out_free_terms;
+
+	ret = add_event(list, &parse_state->idx, &attr, name, metric_id,
+			&config_terms);
+out_free_terms:
+	free_config_terms(&config_terms);
+	return ret;
+}
+
+int parse_events_add_tool(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  int tool_event)
+{
+	return add_event_tool(list, &parse_state->idx, tool_event);
+}
+
+static bool config_term_percore(struct list_head *config_terms)
+{
+	struct evsel_config_term *term;
+
+	list_for_each_entry(term, config_terms, list) {
+		if (term->type == EVSEL__CONFIG_TERM_PERCORE)
+			return term->val.percore;
+	}
+
+	return false;
+}
+
+static int parse_events__inside_hybrid_pmu(struct parse_events_state *parse_state,
+					   struct list_head *list, char *name,
+					   struct list_head *head_config)
+{
+	struct parse_events_term *term;
+	int ret = -1;
+
+	if (parse_state->fake_pmu || !head_config || list_empty(head_config) ||
+	    !perf_pmu__is_hybrid(name)) {
+		return -1;
+	}
+
+	/*
+	 * More than one term in list.
+	 */
+	if (head_config->next && head_config->next->next != head_config)
+		return -1;
+
+	term = list_first_entry(head_config, struct parse_events_term, list);
+	if (term && term->config && strcmp(term->config, "event")) {
+		ret = parse_events__with_hybrid_pmu(parse_state, term->config,
+						    name, list);
+	}
+
+	return ret;
+}
+
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config,
+			 bool auto_merge_stats,
+			 bool use_alias)
+{
+	struct perf_event_attr attr;
+	struct perf_pmu_info info;
+	struct perf_pmu *pmu;
+	struct evsel *evsel;
+	struct parse_events_error *err = parse_state->error;
+	bool use_uncore_alias;
+	LIST_HEAD(config_terms);
+
+	pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+
+	if (verbose > 1 && !(pmu && pmu->selectable)) {
+		fprintf(stderr, "Attempting to add event pmu '%s' with '",
+			name);
+		if (head_config) {
+			struct parse_events_term *term;
+
+			list_for_each_entry(term, head_config, list) {
+				fprintf(stderr, "%s,", term->config);
+			}
+		}
+		fprintf(stderr, "' that may result in non-fatal errors\n");
+	}
+
+	if (!pmu) {
+		char *err_str;
+
+		if (asprintf(&err_str,
+				"Cannot find PMU `%s'. Missing kernel support?",
+				name) >= 0)
+			parse_events_error__handle(err, 0, err_str, NULL);
+		return -EINVAL;
+	}
+
+	if (pmu->default_config) {
+		memcpy(&attr, pmu->default_config,
+		       sizeof(struct perf_event_attr));
+	} else {
+		memset(&attr, 0, sizeof(attr));
+	}
+
+	use_uncore_alias = (pmu->is_uncore && use_alias);
+
+	if (!head_config) {
+		attr.type = pmu->type;
+		evsel = __add_event(list, &parse_state->idx, &attr,
+				    /*init_attr=*/true, /*name=*/NULL,
+				    /*metric_id=*/NULL, pmu,
+				    /*config_terms=*/NULL, auto_merge_stats,
+				    /*cpu_list=*/NULL);
+		if (evsel) {
+			evsel->pmu_name = name ? strdup(name) : NULL;
+			evsel->use_uncore_alias = use_uncore_alias;
+			return 0;
+		} else {
+			return -ENOMEM;
+		}
+	}
+
+	if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info))
+		return -EINVAL;
+
+	if (verbose > 1) {
+		fprintf(stderr, "After aliases, add event pmu '%s' with '",
+			name);
+		if (head_config) {
+			struct parse_events_term *term;
+
+			list_for_each_entry(term, head_config, list) {
+				fprintf(stderr, "%s,", term->config);
+			}
+		}
+		fprintf(stderr, "' that may result in non-fatal errors\n");
+	}
+
+	/*
+	 * Configure hardcoded terms first, no need to check
+	 * return value when called with fail == 0 ;)
+	 */
+	if (config_attr(&attr, head_config, parse_state->error, config_term_pmu))
+		return -EINVAL;
+
+	if (get_config_terms(head_config, &config_terms))
+		return -ENOMEM;
+
+	/*
+	 * When using default config, record which bits of attr->config were
+	 * changed by the user.
+	 */
+	if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms))
+		return -ENOMEM;
+
+	if (!parse_events__inside_hybrid_pmu(parse_state, list, name,
+					     head_config)) {
+		return 0;
+	}
+
+	if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
+		free_config_terms(&config_terms);
+		return -EINVAL;
+	}
+
+	evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
+			    get_config_name(head_config),
+			    get_config_metric_id(head_config), pmu,
+			    &config_terms, auto_merge_stats, /*cpu_list=*/NULL);
+	if (!evsel)
+		return -ENOMEM;
+
+	if (evsel->name)
+		evsel->use_config_name = true;
+
+	evsel->pmu_name = name ? strdup(name) : NULL;
+	evsel->use_uncore_alias = use_uncore_alias;
+	evsel->percore = config_term_percore(&evsel->config_terms);
+
+	if (parse_state->fake_pmu)
+		return 0;
+
+	free((char *)evsel->unit);
+	evsel->unit = strdup(info.unit);
+	evsel->scale = info.scale;
+	evsel->per_pkg = info.per_pkg;
+	evsel->snapshot = info.snapshot;
+	evsel->metric_expr = info.metric_expr;
+	evsel->metric_name = info.metric_name;
+	return 0;
+}
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+			       char *str, struct list_head *head,
+			       struct list_head **listp)
+{
+	struct parse_events_term *term;
+	struct list_head *list = NULL;
+	struct list_head *orig_head = NULL;
+	struct perf_pmu *pmu = NULL;
+	int ok = 0;
+	char *config;
+
+	*listp = NULL;
+
+	if (!head) {
+		head = malloc(sizeof(struct list_head));
+		if (!head)
+			goto out_err;
+
+		INIT_LIST_HEAD(head);
+	}
+	config = strdup(str);
+	if (!config)
+		goto out_err;
+
+	if (parse_events_term__num(&term,
+				   PARSE_EVENTS__TERM_TYPE_USER,
+				   config, 1, false, &config,
+					NULL) < 0) {
+		free(config);
+		goto out_err;
+	}
+	list_add_tail(&term->list, head);
+
+	/* Add it for all PMUs that support the alias */
+	list = malloc(sizeof(struct list_head));
+	if (!list)
+		goto out_err;
+
+	INIT_LIST_HEAD(list);
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		struct perf_pmu_alias *alias;
+
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			if (!strcasecmp(alias->name, str)) {
+				parse_events_copy_term_list(head, &orig_head);
+				if (!parse_events_add_pmu(parse_state, list,
+							  pmu->name, orig_head,
+							  true, true)) {
+					pr_debug("%s -> %s/%s/\n", str,
+						 pmu->name, alias->str);
+					ok++;
+				}
+				parse_events_terms__delete(orig_head);
+			}
+		}
+	}
+
+	if (parse_state->fake_pmu) {
+		if (!parse_events_add_pmu(parse_state, list, str, head,
+					  true, true)) {
+			pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str);
+			ok++;
+		}
+	}
+
+out_err:
+	if (ok)
+		*listp = list;
+	else
+		free(list);
+
+	parse_events_terms__delete(head);
+	return ok ? 0 : -1;
+}
+
+int parse_events__modifier_group(struct list_head *list,
+				 char *event_mod)
+{
+	return parse_events__modifier_event(list, event_mod, true);
+}
+
+/*
+ * Check if the two uncore PMUs are from the same uncore block
+ * The format of the uncore PMU name is uncore_#blockname_#pmuidx
+ */
+static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b)
+{
+	char *end_a, *end_b;
+
+	end_a = strrchr(pmu_name_a, '_');
+	end_b = strrchr(pmu_name_b, '_');
+
+	if (!end_a || !end_b)
+		return false;
+
+	if ((end_a - pmu_name_a) != (end_b - pmu_name_b))
+		return false;
+
+	return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0);
+}
+
+static int
+parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
+					   struct parse_events_state *parse_state)
+{
+	struct evsel *evsel, *leader;
+	uintptr_t *leaders;
+	bool is_leader = true;
+	int i, nr_pmu = 0, total_members, ret = 0;
+
+	leader = list_first_entry(list, struct evsel, core.node);
+	evsel = list_last_entry(list, struct evsel, core.node);
+	total_members = evsel->core.idx - leader->core.idx + 1;
+
+	leaders = calloc(total_members, sizeof(uintptr_t));
+	if (WARN_ON(!leaders))
+		return 0;
+
+	/*
+	 * Going through the whole group and doing sanity check.
+	 * All members must use alias, and be from the same uncore block.
+	 * Also, storing the leader events in an array.
+	 */
+	__evlist__for_each_entry(list, evsel) {
+
+		/* Only split the uncore group which members use alias */
+		if (!evsel->use_uncore_alias)
+			goto out;
+
+		/* The events must be from the same uncore block */
+		if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name))
+			goto out;
+
+		if (!is_leader)
+			continue;
+		/*
+		 * If the event's PMU name starts to repeat, it must be a new
+		 * event. That can be used to distinguish the leader from
+		 * other members, even they have the same event name.
+		 */
+		if ((leader != evsel) &&
+		    !strcmp(leader->pmu_name, evsel->pmu_name)) {
+			is_leader = false;
+			continue;
+		}
+
+		/* Store the leader event for each PMU */
+		leaders[nr_pmu++] = (uintptr_t) evsel;
+	}
+
+	/* only one event alias */
+	if (nr_pmu == total_members) {
+		parse_state->nr_groups--;
+		goto handled;
+	}
+
+	/*
+	 * An uncore event alias is a joint name which means the same event
+	 * runs on all PMUs of a block.
+	 * Perf doesn't support mixed events from different PMUs in the same
+	 * group. The big group has to be split into multiple small groups
+	 * which only include the events from the same PMU.
+	 *
+	 * Here the uncore event aliases must be from the same uncore block.
+	 * The number of PMUs must be same for each alias. The number of new
+	 * small groups equals to the number of PMUs.
+	 * Setting the leader event for corresponding members in each group.
+	 */
+	i = 0;
+	__evlist__for_each_entry(list, evsel) {
+		if (i >= nr_pmu)
+			i = 0;
+		evsel__set_leader(evsel, (struct evsel *) leaders[i++]);
+	}
+
+	/* The number of members and group name are same for each group */
+	for (i = 0; i < nr_pmu; i++) {
+		evsel = (struct evsel *) leaders[i];
+		evsel->core.nr_members = total_members / nr_pmu;
+		evsel->group_name = name ? strdup(name) : NULL;
+	}
+
+	/* Take the new small groups into account */
+	parse_state->nr_groups += nr_pmu - 1;
+
+handled:
+	ret = 1;
+out:
+	free(leaders);
+	return ret;
+}
+
+__weak struct evsel *arch_evlist__leader(struct list_head *list)
+{
+	return list_first_entry(list, struct evsel, core.node);
+}
+
+void parse_events__set_leader(char *name, struct list_head *list,
+			      struct parse_events_state *parse_state)
+{
+	struct evsel *leader;
+
+	if (list_empty(list)) {
+		WARN_ONCE(true, "WARNING: failed to set leader: empty list");
+		return;
+	}
+
+	if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
+		return;
+
+	leader = arch_evlist__leader(list);
+	__perf_evlist__set_leader(list, &leader->core);
+	leader->group_name = name ? strdup(name) : NULL;
+	list_move(&leader->core.node, list);
+}
+
+/* list_event is assumed to point to malloc'ed memory */
+void parse_events_update_lists(struct list_head *list_event,
+			       struct list_head *list_all)
+{
+	/*
+	 * Called for single event definition. Update the
+	 * 'all event' list, and reinit the 'single event'
+	 * list, for next event definition.
+	 */
+	list_splice_tail(list_event, list_all);
+	free(list_event);
+}
+
+struct event_modifier {
+	int eu;
+	int ek;
+	int eh;
+	int eH;
+	int eG;
+	int eI;
+	int precise;
+	int precise_max;
+	int exclude_GH;
+	int sample_read;
+	int pinned;
+	int weak;
+	int exclusive;
+	int bpf_counter;
+};
+
+static int get_event_modifier(struct event_modifier *mod, char *str,
+			       struct evsel *evsel)
+{
+	int eu = evsel ? evsel->core.attr.exclude_user : 0;
+	int ek = evsel ? evsel->core.attr.exclude_kernel : 0;
+	int eh = evsel ? evsel->core.attr.exclude_hv : 0;
+	int eH = evsel ? evsel->core.attr.exclude_host : 0;
+	int eG = evsel ? evsel->core.attr.exclude_guest : 0;
+	int eI = evsel ? evsel->core.attr.exclude_idle : 0;
+	int precise = evsel ? evsel->core.attr.precise_ip : 0;
+	int precise_max = 0;
+	int sample_read = 0;
+	int pinned = evsel ? evsel->core.attr.pinned : 0;
+	int exclusive = evsel ? evsel->core.attr.exclusive : 0;
+
+	int exclude = eu | ek | eh;
+	int exclude_GH = evsel ? evsel->exclude_GH : 0;
+	int weak = 0;
+	int bpf_counter = 0;
+
+	memset(mod, 0, sizeof(*mod));
+
+	while (*str) {
+		if (*str == 'u') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
+			if (!exclude_GH && !perf_guest)
+				eG = 1;
+			eu = 0;
+		} else if (*str == 'k') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
+			ek = 0;
+		} else if (*str == 'h') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
+			eh = 0;
+		} else if (*str == 'G') {
+			if (!exclude_GH)
+				exclude_GH = eG = eH = 1;
+			eG = 0;
+		} else if (*str == 'H') {
+			if (!exclude_GH)
+				exclude_GH = eG = eH = 1;
+			eH = 0;
+		} else if (*str == 'I') {
+			eI = 1;
+		} else if (*str == 'p') {
+			precise++;
+			/* use of precise requires exclude_guest */
+			if (!exclude_GH)
+				eG = 1;
+		} else if (*str == 'P') {
+			precise_max = 1;
+		} else if (*str == 'S') {
+			sample_read = 1;
+		} else if (*str == 'D') {
+			pinned = 1;
+		} else if (*str == 'e') {
+			exclusive = 1;
+		} else if (*str == 'W') {
+			weak = 1;
+		} else if (*str == 'b') {
+			bpf_counter = 1;
+		} else
+			break;
+
+		++str;
+	}
+
+	/*
+	 * precise ip:
+	 *
+	 *  0 - SAMPLE_IP can have arbitrary skid
+	 *  1 - SAMPLE_IP must have constant skid
+	 *  2 - SAMPLE_IP requested to have 0 skid
+	 *  3 - SAMPLE_IP must have 0 skid
+	 *
+	 *  See also PERF_RECORD_MISC_EXACT_IP
+	 */
+	if (precise > 3)
+		return -EINVAL;
+
+	mod->eu = eu;
+	mod->ek = ek;
+	mod->eh = eh;
+	mod->eH = eH;
+	mod->eG = eG;
+	mod->eI = eI;
+	mod->precise = precise;
+	mod->precise_max = precise_max;
+	mod->exclude_GH = exclude_GH;
+	mod->sample_read = sample_read;
+	mod->pinned = pinned;
+	mod->weak = weak;
+	mod->bpf_counter = bpf_counter;
+	mod->exclusive = exclusive;
+
+	return 0;
+}
+
+/*
+ * Basic modifier sanity check to validate it contains only one
+ * instance of any modifier (apart from 'p') present.
+ */
+static int check_modifier(char *str)
+{
+	char *p = str;
+
+	/* The sizeof includes 0 byte as well. */
+	if (strlen(str) > (sizeof("ukhGHpppPSDIWeb") - 1))
+		return -1;
+
+	while (*p) {
+		if (*p != 'p' && strchr(p + 1, *p))
+			return -1;
+		p++;
+	}
+
+	return 0;
+}
+
+int parse_events__modifier_event(struct list_head *list, char *str, bool add)
+{
+	struct evsel *evsel;
+	struct event_modifier mod;
+
+	if (str == NULL)
+		return 0;
+
+	if (check_modifier(str))
+		return -EINVAL;
+
+	if (!add && get_event_modifier(&mod, str, NULL))
+		return -EINVAL;
+
+	__evlist__for_each_entry(list, evsel) {
+		if (add && get_event_modifier(&mod, str, evsel))
+			return -EINVAL;
+
+		evsel->core.attr.exclude_user   = mod.eu;
+		evsel->core.attr.exclude_kernel = mod.ek;
+		evsel->core.attr.exclude_hv     = mod.eh;
+		evsel->core.attr.precise_ip     = mod.precise;
+		evsel->core.attr.exclude_host   = mod.eH;
+		evsel->core.attr.exclude_guest  = mod.eG;
+		evsel->core.attr.exclude_idle   = mod.eI;
+		evsel->exclude_GH          = mod.exclude_GH;
+		evsel->sample_read         = mod.sample_read;
+		evsel->precise_max         = mod.precise_max;
+		evsel->weak_group	   = mod.weak;
+		evsel->bpf_counter	   = mod.bpf_counter;
+
+		if (evsel__is_group_leader(evsel)) {
+			evsel->core.attr.pinned = mod.pinned;
+			evsel->core.attr.exclusive = mod.exclusive;
+		}
+	}
+
+	return 0;
+}
+
+int parse_events_name(struct list_head *list, const char *name)
+{
+	struct evsel *evsel;
+
+	__evlist__for_each_entry(list, evsel) {
+		if (!evsel->name)
+			evsel->name = strdup(name);
+	}
+
+	return 0;
+}
+
+static int
+comp_pmu(const void *p1, const void *p2)
+{
+	struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1;
+	struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2;
+
+	return strcasecmp(pmu1->symbol, pmu2->symbol);
+}
+
+static void perf_pmu__parse_cleanup(void)
+{
+	if (perf_pmu_events_list_num > 0) {
+		struct perf_pmu_event_symbol *p;
+		int i;
+
+		for (i = 0; i < perf_pmu_events_list_num; i++) {
+			p = perf_pmu_events_list + i;
+			zfree(&p->symbol);
+		}
+		zfree(&perf_pmu_events_list);
+		perf_pmu_events_list_num = 0;
+	}
+}
+
+#define SET_SYMBOL(str, stype)		\
+do {					\
+	p->symbol = str;		\
+	if (!p->symbol)			\
+		goto err;		\
+	p->type = stype;		\
+} while (0)
+
+/*
+ * Read the pmu events list from sysfs
+ * Save it into perf_pmu_events_list
+ */
+static void perf_pmu__parse_init(void)
+{
+
+	struct perf_pmu *pmu = NULL;
+	struct perf_pmu_alias *alias;
+	int len = 0;
+
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			char *tmp = strchr(alias->name, '-');
+
+			if (tmp) {
+				char *tmp2 = NULL;
+
+				tmp2 = strchr(tmp + 1, '-');
+				len++;
+				if (tmp2)
+					len++;
+			}
+
+			len++;
+		}
+	}
+
+	if (len == 0) {
+		perf_pmu_events_list_num = -1;
+		return;
+	}
+	perf_pmu_events_list = malloc(sizeof(struct perf_pmu_event_symbol) * len);
+	if (!perf_pmu_events_list)
+		return;
+	perf_pmu_events_list_num = len;
+
+	len = 0;
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
+			char *tmp = strchr(alias->name, '-');
+			char *tmp2 = NULL;
+
+			if (tmp)
+				tmp2 = strchr(tmp + 1, '-');
+			if (tmp2) {
+				SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+						PMU_EVENT_SYMBOL_PREFIX);
+				p++;
+				tmp++;
+				SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX);
+				p++;
+				SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2);
+				len += 3;
+			} else if (tmp) {
+				SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+						PMU_EVENT_SYMBOL_PREFIX);
+				p++;
+				SET_SYMBOL(strdup(++tmp), PMU_EVENT_SYMBOL_SUFFIX);
+				len += 2;
+			} else {
+				SET_SYMBOL(strdup(alias->name), PMU_EVENT_SYMBOL);
+				len++;
+			}
+		}
+	}
+	qsort(perf_pmu_events_list, len,
+		sizeof(struct perf_pmu_event_symbol), comp_pmu);
+
+	return;
+err:
+	perf_pmu__parse_cleanup();
+}
+
+/*
+ * This function injects special term in
+ * perf_pmu_events_list so the test code
+ * can check on this functionality.
+ */
+int perf_pmu__test_parse_init(void)
+{
+	struct perf_pmu_event_symbol *list, *tmp, symbols[] = {
+		{(char *)"read", PMU_EVENT_SYMBOL},
+		{(char *)"event", PMU_EVENT_SYMBOL_PREFIX},
+		{(char *)"two", PMU_EVENT_SYMBOL_SUFFIX},
+		{(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX},
+		{(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2},
+	};
+	unsigned long i, j;
+
+	tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols));
+	if (!list)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
+		tmp->type = symbols[i].type;
+		tmp->symbol = strdup(symbols[i].symbol);
+		if (!tmp->symbol)
+			goto err_free;
+	}
+
+	perf_pmu_events_list = list;
+	perf_pmu_events_list_num = ARRAY_SIZE(symbols);
+
+	qsort(perf_pmu_events_list, ARRAY_SIZE(symbols),
+	      sizeof(struct perf_pmu_event_symbol), comp_pmu);
+	return 0;
+
+err_free:
+	for (j = 0, tmp = list; j < i; j++, tmp++)
+		free(tmp->symbol);
+	free(list);
+	return -ENOMEM;
+}
+
+enum perf_pmu_event_symbol_type
+perf_pmu__parse_check(const char *name)
+{
+	struct perf_pmu_event_symbol p, *r;
+
+	/* scan kernel pmu events from sysfs if needed */
+	if (perf_pmu_events_list_num == 0)
+		perf_pmu__parse_init();
+	/*
+	 * name "cpu" could be prefix of cpu-cycles or cpu// events.
+	 * cpu-cycles has been handled by hardcode.
+	 * So it must be cpu// events, not kernel pmu event.
+	 */
+	if ((perf_pmu_events_list_num <= 0) || !strcmp(name, "cpu"))
+		return PMU_EVENT_SYMBOL_ERR;
+
+	p.symbol = strdup(name);
+	r = bsearch(&p, perf_pmu_events_list,
+			(size_t) perf_pmu_events_list_num,
+			sizeof(struct perf_pmu_event_symbol), comp_pmu);
+	zfree(&p.symbol);
+	return r ? r->type : PMU_EVENT_SYMBOL_ERR;
+}
+
+static int parse_events__scanner(const char *str,
+				 struct parse_events_state *parse_state)
+{
+	YY_BUFFER_STATE buffer;
+	void *scanner;
+	int ret;
+
+	ret = parse_events_lex_init_extra(parse_state, &scanner);
+	if (ret)
+		return ret;
+
+	buffer = parse_events__scan_string(str, scanner);
+
+#ifdef PARSER_DEBUG
+	parse_events_debug = 1;
+	parse_events_set_debug(1, scanner);
+#endif
+	ret = parse_events_parse(parse_state, scanner);
+
+	parse_events__flush_buffer(buffer, scanner);
+	parse_events__delete_buffer(buffer, scanner);
+	parse_events_lex_destroy(scanner);
+	return ret;
+}
+
+/*
+ * parse event config string, return a list of event terms.
+ */
+int parse_events_terms(struct list_head *terms, const char *str)
+{
+	struct parse_events_state parse_state = {
+		.terms  = NULL,
+		.stoken = PE_START_TERMS,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &parse_state);
+	perf_pmu__parse_cleanup();
+
+	if (!ret) {
+		list_splice(parse_state.terms, terms);
+		zfree(&parse_state.terms);
+		return 0;
+	}
+
+	parse_events_terms__delete(parse_state.terms);
+	return ret;
+}
+
+static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
+					 const char *str, char *pmu_name,
+					 struct list_head *list)
+{
+	struct parse_events_state ps = {
+		.list            = LIST_HEAD_INIT(ps.list),
+		.stoken          = PE_START_EVENTS,
+		.hybrid_pmu_name = pmu_name,
+		.idx             = parse_state->idx,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &ps);
+	perf_pmu__parse_cleanup();
+
+	if (!ret) {
+		if (!list_empty(&ps.list)) {
+			list_splice(&ps.list, list);
+			parse_state->idx = ps.idx;
+			return 0;
+		} else
+			return -1;
+	}
+
+	return ret;
+}
+
+int __parse_events(struct evlist *evlist, const char *str,
+		   struct parse_events_error *err, struct perf_pmu *fake_pmu)
+{
+	struct parse_events_state parse_state = {
+		.list	  = LIST_HEAD_INIT(parse_state.list),
+		.idx	  = evlist->core.nr_entries,
+		.error	  = err,
+		.evlist	  = evlist,
+		.stoken	  = PE_START_EVENTS,
+		.fake_pmu = fake_pmu,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &parse_state);
+	perf_pmu__parse_cleanup();
+
+	if (!ret && list_empty(&parse_state.list)) {
+		WARN_ONCE(true, "WARNING: event parser found nothing\n");
+		return -1;
+	}
+
+	/*
+	 * Add list to the evlist even with errors to allow callers to clean up.
+	 */
+	evlist__splice_list_tail(evlist, &parse_state.list);
+
+	if (!ret) {
+		struct evsel *last;
+
+		evlist->core.nr_groups += parse_state.nr_groups;
+		last = evlist__last(evlist);
+		last->cmdline_group_boundary = true;
+
+		return 0;
+	}
+
+	/*
+	 * There are 2 users - builtin-record and builtin-test objects.
+	 * Both call evlist__delete in case of error, so we dont
+	 * need to bother.
+	 */
+	return ret;
+}
+
+int parse_event(struct evlist *evlist, const char *str)
+{
+	struct parse_events_error err;
+	int ret;
+
+	parse_events_error__init(&err);
+	ret = parse_events(evlist, str, &err);
+	parse_events_error__exit(&err);
+	return ret;
+}
+
+void parse_events_error__init(struct parse_events_error *err)
+{
+	bzero(err, sizeof(*err));
+}
+
+void parse_events_error__exit(struct parse_events_error *err)
+{
+	zfree(&err->str);
+	zfree(&err->help);
+	zfree(&err->first_str);
+	zfree(&err->first_help);
+}
+
+void parse_events_error__handle(struct parse_events_error *err, int idx,
+				char *str, char *help)
+{
+	if (WARN(!str || !err, "WARNING: failed to provide error string or struct\n"))
+		goto out_free;
+	switch (err->num_errors) {
+	case 0:
+		err->idx = idx;
+		err->str = str;
+		err->help = help;
+		break;
+	case 1:
+		err->first_idx = err->idx;
+		err->idx = idx;
+		err->first_str = err->str;
+		err->str = str;
+		err->first_help = err->help;
+		err->help = help;
+		break;
+	default:
+		pr_debug("Multiple errors dropping message: %s (%s)\n",
+			err->str, err->help);
+		free(err->str);
+		err->str = str;
+		free(err->help);
+		err->help = help;
+		break;
+	}
+	err->num_errors++;
+	return;
+
+out_free:
+	free(str);
+	free(help);
+}
+
+#define MAX_WIDTH 1000
+static int get_term_width(void)
+{
+	struct winsize ws;
+
+	get_term_dimensions(&ws);
+	return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
+}
+
+static void __parse_events_error__print(int err_idx, const char *err_str,
+					const char *err_help, const char *event)
+{
+	const char *str = "invalid or unsupported event: ";
+	char _buf[MAX_WIDTH];
+	char *buf = (char *) event;
+	int idx = 0;
+	if (err_str) {
+		/* -2 for extra '' in the final fprintf */
+		int width       = get_term_width() - 2;
+		int len_event   = strlen(event);
+		int len_str, max_len, cut = 0;
+
+		/*
+		 * Maximum error index indent, we will cut
+		 * the event string if it's bigger.
+		 */
+		int max_err_idx = 13;
+
+		/*
+		 * Let's be specific with the message when
+		 * we have the precise error.
+		 */
+		str     = "event syntax error: ";
+		len_str = strlen(str);
+		max_len = width - len_str;
+
+		buf = _buf;
+
+		/* We're cutting from the beginning. */
+		if (err_idx > max_err_idx)
+			cut = err_idx - max_err_idx;
+
+		strncpy(buf, event + cut, max_len);
+
+		/* Mark cut parts with '..' on both sides. */
+		if (cut)
+			buf[0] = buf[1] = '.';
+
+		if ((len_event - cut) > max_len) {
+			buf[max_len - 1] = buf[max_len - 2] = '.';
+			buf[max_len] = 0;
+		}
+
+		idx = len_str + err_idx - cut;
+	}
+
+	fprintf(stderr, "%s'%s'\n", str, buf);
+	if (idx) {
+		fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err_str);
+		if (err_help)
+			fprintf(stderr, "\n%s\n", err_help);
+	}
+}
+
+void parse_events_error__print(struct parse_events_error *err,
+			       const char *event)
+{
+	if (!err->num_errors)
+		return;
+
+	__parse_events_error__print(err->idx, err->str, err->help, event);
+
+	if (err->num_errors > 1) {
+		fputs("\nInitial error:\n", stderr);
+		__parse_events_error__print(err->first_idx, err->first_str,
+					err->first_help, event);
+	}
+}
+
+#undef MAX_WIDTH
+
+int parse_events_option(const struct option *opt, const char *str,
+			int unset __maybe_unused)
+{
+	struct evlist *evlist = *(struct evlist **)opt->value;
+	struct parse_events_error err;
+	int ret;
+
+	parse_events_error__init(&err);
+	ret = parse_events(evlist, str, &err);
+
+	if (ret) {
+		parse_events_error__print(&err, str);
+		fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+	}
+	parse_events_error__exit(&err);
+
+	return ret;
+}
+
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset)
+{
+	struct evlist **evlistp = opt->value;
+	int ret;
+
+	if (*evlistp == NULL) {
+		*evlistp = evlist__new();
+
+		if (*evlistp == NULL) {
+			fprintf(stderr, "Not enough memory to create evlist\n");
+			return -1;
+		}
+	}
+
+	ret = parse_events_option(opt, str, unset);
+	if (ret) {
+		evlist__delete(*evlistp);
+		*evlistp = NULL;
+	}
+
+	return ret;
+}
+
+static int
+foreach_evsel_in_last_glob(struct evlist *evlist,
+			   int (*func)(struct evsel *evsel,
+				       const void *arg),
+			   const void *arg)
+{
+	struct evsel *last = NULL;
+	int err;
+
+	/*
+	 * Don't return when list_empty, give func a chance to report
+	 * error when it found last == NULL.
+	 *
+	 * So no need to WARN here, let *func do this.
+	 */
+	if (evlist->core.nr_entries > 0)
+		last = evlist__last(evlist);
+
+	do {
+		err = (*func)(last, arg);
+		if (err)
+			return -1;
+		if (!last)
+			return 0;
+
+		if (last->core.node.prev == &evlist->core.entries)
+			return 0;
+		last = list_entry(last->core.node.prev, struct evsel, core.node);
+	} while (!last->cmdline_group_boundary);
+
+	return 0;
+}
+
+static int set_filter(struct evsel *evsel, const void *arg)
+{
+	const char *str = arg;
+	bool found = false;
+	int nr_addr_filters = 0;
+	struct perf_pmu *pmu = NULL;
+
+	if (evsel == NULL) {
+		fprintf(stderr,
+			"--filter option should follow a -e tracepoint or HW tracer option\n");
+		return -1;
+	}
+
+	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+		if (evsel__append_tp_filter(evsel, str) < 0) {
+			fprintf(stderr,
+				"not enough memory to hold filter string\n");
+			return -1;
+		}
+
+		return 0;
+	}
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		if (pmu->type == evsel->core.attr.type) {
+			found = true;
+			break;
+		}
+
+	if (found)
+		perf_pmu__scan_file(pmu, "nr_addr_filters",
+				    "%d", &nr_addr_filters);
+
+	if (!nr_addr_filters) {
+		fprintf(stderr,
+			"This CPU does not support address filtering\n");
+		return -1;
+	}
+
+	if (evsel__append_addr_filter(evsel, str) < 0) {
+		fprintf(stderr,
+			"not enough memory to hold filter string\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int parse_filter(const struct option *opt, const char *str,
+		 int unset __maybe_unused)
+{
+	struct evlist *evlist = *(struct evlist **)opt->value;
+
+	return foreach_evsel_in_last_glob(evlist, set_filter,
+					  (const void *)str);
+}
+
+static int add_exclude_perf_filter(struct evsel *evsel,
+				   const void *arg __maybe_unused)
+{
+	char new_filter[64];
+
+	if (evsel == NULL || evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+		fprintf(stderr,
+			"--exclude-perf option should follow a -e tracepoint option\n");
+		return -1;
+	}
+
+	snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
+
+	if (evsel__append_tp_filter(evsel, new_filter) < 0) {
+		fprintf(stderr,
+			"not enough memory to hold filter string\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int exclude_perf(const struct option *opt,
+		 const char *arg __maybe_unused,
+		 int unset __maybe_unused)
+{
+	struct evlist *evlist = *(struct evlist **)opt->value;
+
+	return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter,
+					  NULL);
+}
+
+int parse_events__is_hardcoded_term(struct parse_events_term *term)
+{
+	return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
+}
+
+static int new_term(struct parse_events_term **_term,
+		    struct parse_events_term *temp,
+		    char *str, u64 num)
+{
+	struct parse_events_term *term;
+
+	term = malloc(sizeof(*term));
+	if (!term)
+		return -ENOMEM;
+
+	*term = *temp;
+	INIT_LIST_HEAD(&term->list);
+	term->weak = false;
+
+	switch (term->type_val) {
+	case PARSE_EVENTS__TERM_TYPE_NUM:
+		term->val.num = num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_STR:
+		term->val.str = str;
+		break;
+	default:
+		free(term);
+		return -EINVAL;
+	}
+
+	*_term = term;
+	return 0;
+}
+
+int parse_events_term__num(struct parse_events_term **term,
+			   int type_term, char *config, u64 num,
+			   bool no_value,
+			   void *loc_term_, void *loc_val_)
+{
+	YYLTYPE *loc_term = loc_term_;
+	YYLTYPE *loc_val = loc_val_;
+
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = type_term,
+		.config    = config ? : strdup(config_term_names[type_term]),
+		.no_value  = no_value,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, NULL, num);
+}
+
+int parse_events_term__str(struct parse_events_term **term,
+			   int type_term, char *config, char *str,
+			   void *loc_term_, void *loc_val_)
+{
+	YYLTYPE *loc_term = loc_term_;
+	YYLTYPE *loc_val = loc_val_;
+
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = type_term,
+		.config    = config,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, str, 0);
+}
+
+int parse_events_term__sym_hw(struct parse_events_term **term,
+			      char *config, unsigned idx)
+{
+	struct event_symbol *sym;
+	char *str;
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+		.config    = config,
+	};
+
+	if (!temp.config) {
+		temp.config = strdup("event");
+		if (!temp.config)
+			return -ENOMEM;
+	}
+	BUG_ON(idx >= PERF_COUNT_HW_MAX);
+	sym = &event_symbols_hw[idx];
+
+	str = strdup(sym->symbol);
+	if (!str)
+		return -ENOMEM;
+	return new_term(term, &temp, str, 0);
+}
+
+int parse_events_term__clone(struct parse_events_term **new,
+			     struct parse_events_term *term)
+{
+	char *str;
+	struct parse_events_term temp = {
+		.type_val  = term->type_val,
+		.type_term = term->type_term,
+		.config    = NULL,
+		.err_term  = term->err_term,
+		.err_val   = term->err_val,
+	};
+
+	if (term->config) {
+		temp.config = strdup(term->config);
+		if (!temp.config)
+			return -ENOMEM;
+	}
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+		return new_term(new, &temp, NULL, term->val.num);
+
+	str = strdup(term->val.str);
+	if (!str)
+		return -ENOMEM;
+	return new_term(new, &temp, str, 0);
+}
+
+void parse_events_term__delete(struct parse_events_term *term)
+{
+	if (term->array.nr_ranges)
+		zfree(&term->array.ranges);
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM)
+		zfree(&term->val.str);
+
+	zfree(&term->config);
+	free(term);
+}
+
+int parse_events_copy_term_list(struct list_head *old,
+				 struct list_head **new)
+{
+	struct parse_events_term *term, *n;
+	int ret;
+
+	if (!old) {
+		*new = NULL;
+		return 0;
+	}
+
+	*new = malloc(sizeof(struct list_head));
+	if (!*new)
+		return -ENOMEM;
+	INIT_LIST_HEAD(*new);
+
+	list_for_each_entry (term, old, list) {
+		ret = parse_events_term__clone(&n, term);
+		if (ret)
+			return ret;
+		list_add_tail(&n->list, *new);
+	}
+	return 0;
+}
+
+void parse_events_terms__purge(struct list_head *terms)
+{
+	struct parse_events_term *term, *h;
+
+	list_for_each_entry_safe(term, h, terms, list) {
+		list_del_init(&term->list);
+		parse_events_term__delete(term);
+	}
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+	if (!terms)
+		return;
+	parse_events_terms__purge(terms);
+	free(terms);
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+	zfree(&a->ranges);
+}
+
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+			       int idx, const char *str)
+{
+	if (!parse_state->error)
+		return;
+
+	parse_events_error__handle(parse_state->error, idx, strdup(str), NULL);
+}
+
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+	int i;
+	bool first = true;
+
+	buf[0] = '\0';
+	for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+		const char *name = config_term_names[i];
+
+		if (!config_term_avail(i, NULL))
+			continue;
+		if (!name)
+			continue;
+		if (name[0] == '<')
+			continue;
+
+		if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+			return;
+
+		if (!first)
+			strcat(buf, ",");
+		else
+			first = false;
+		strcat(buf, name);
+	}
+}
+
+/*
+ * Return string contains valid config terms of an event.
+ * @additional_terms: For terms such as PMU sysfs terms.
+ */
+char *parse_events_formats_error_string(char *additional_terms)
+{
+	char *str;
+	/* "no-overwrite" is the longest name */
+	char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+			  (sizeof("no-overwrite") - 1)];
+
+	config_terms_list(static_terms, sizeof(static_terms));
+	/* valid terms */
+	if (additional_terms) {
+		if (asprintf(&str, "valid terms: %s,%s",
+			     additional_terms, static_terms) < 0)
+			goto fail;
+	} else {
+		if (asprintf(&str, "valid terms: %s", static_terms) < 0)
+			goto fail;
+	}
+	return str;
+
+fail:
+	return NULL;
+}
+
+struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx,
+					     struct perf_event_attr *attr,
+					     const char *name,
+					     const char *metric_id,
+					     struct perf_pmu *pmu,
+					     struct list_head *config_terms)
+{
+	return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id,
+			   pmu, config_terms, /*auto_merge_stats=*/false,
+			   /*cpu_list=*/NULL);
+}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
new file mode 100644
index 000000000..07df7bb7b
--- /dev/null
+++ b/tools/perf/util/parse-events.h
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_PARSE_EVENTS_H
+#define __PERF_PARSE_EVENTS_H
+/*
+ * Parse symbolic events/counts passed in as options:
+ */
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <string.h>
+
+struct evsel;
+struct evlist;
+struct parse_events_error;
+
+struct option;
+struct perf_pmu;
+
+bool have_tracepoints(struct list_head *evlist);
+bool is_event_supported(u8 type, u64 config);
+
+const char *event_type(int type);
+
+int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
+__attribute__((nonnull(1, 2, 3)))
+int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error,
+		   struct perf_pmu *fake_pmu);
+
+__attribute__((nonnull))
+static inline int parse_events(struct evlist *evlist, const char *str,
+			       struct parse_events_error *err)
+{
+	return __parse_events(evlist, str, err, NULL);
+}
+
+int parse_event(struct evlist *evlist, const char *str);
+
+int parse_events_terms(struct list_head *terms, const char *str);
+int parse_filter(const struct option *opt, const char *str, int unset);
+int exclude_perf(const struct option *opt, const char *arg, int unset);
+
+enum perf_pmu_event_symbol_type {
+	PMU_EVENT_SYMBOL_ERR,		/* not a PMU EVENT */
+	PMU_EVENT_SYMBOL,		/* normal style PMU event */
+	PMU_EVENT_SYMBOL_PREFIX,	/* prefix of pre-suf style event */
+	PMU_EVENT_SYMBOL_SUFFIX,	/* suffix of pre-suf style event */
+	PMU_EVENT_SYMBOL_SUFFIX2,	/* suffix of pre-suf2 style event */
+};
+
+enum {
+	PARSE_EVENTS__TERM_TYPE_NUM,
+	PARSE_EVENTS__TERM_TYPE_STR,
+};
+
+enum {
+	PARSE_EVENTS__TERM_TYPE_USER,
+	PARSE_EVENTS__TERM_TYPE_CONFIG,
+	PARSE_EVENTS__TERM_TYPE_CONFIG1,
+	PARSE_EVENTS__TERM_TYPE_CONFIG2,
+	PARSE_EVENTS__TERM_TYPE_NAME,
+	PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
+	PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
+	PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+	PARSE_EVENTS__TERM_TYPE_TIME,
+	PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+	PARSE_EVENTS__TERM_TYPE_STACKSIZE,
+	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
+	PARSE_EVENTS__TERM_TYPE_INHERIT,
+	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+	PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
+	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
+	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
+	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+	PARSE_EVENTS__TERM_TYPE_PERCORE,
+	PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT,
+	PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE,
+	PARSE_EVENTS__TERM_TYPE_METRIC_ID,
+	__PARSE_EVENTS__TERM_TYPE_NR,
+};
+
+struct parse_events_array {
+	size_t nr_ranges;
+	struct {
+		unsigned int start;
+		size_t length;
+	} *ranges;
+};
+
+struct parse_events_term {
+	char *config;
+	struct parse_events_array array;
+	union {
+		char *str;
+		u64  num;
+	} val;
+	int type_val;
+	int type_term;
+	struct list_head list;
+	bool used;
+	bool no_value;
+
+	/* error string indexes for within parsed string */
+	int err_term;
+	int err_val;
+
+	/* Coming from implicit alias */
+	bool weak;
+};
+
+struct parse_events_error {
+	int   num_errors;       /* number of errors encountered */
+	int   idx;	/* index in the parsed string */
+	char *str;      /* string to display at the index */
+	char *help;	/* optional help string */
+	int   first_idx;/* as above, but for the first encountered error */
+	char *first_str;
+	char *first_help;
+};
+
+struct parse_events_state {
+	struct list_head	   list;
+	int			   idx;
+	int			   nr_groups;
+	struct parse_events_error *error;
+	struct evlist		  *evlist;
+	struct list_head	  *terms;
+	int			   stoken;
+	struct perf_pmu		  *fake_pmu;
+	char			  *hybrid_pmu_name;
+};
+
+void parse_events__shrink_config_terms(void);
+int parse_events__is_hardcoded_term(struct parse_events_term *term);
+int parse_events_term__num(struct parse_events_term **term,
+			   int type_term, char *config, u64 num,
+			   bool novalue,
+			   void *loc_term, void *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+			   int type_term, char *config, char *str,
+			   void *loc_term, void *loc_val);
+int parse_events_term__sym_hw(struct parse_events_term **term,
+			      char *config, unsigned idx);
+int parse_events_term__clone(struct parse_events_term **new,
+			     struct parse_events_term *term);
+void parse_events_term__delete(struct parse_events_term *term);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+void parse_events__clear_array(struct parse_events_array *a);
+int parse_events__modifier_event(struct list_head *list, char *str, bool add);
+int parse_events__modifier_group(struct list_head *list, char *event_mod);
+int parse_events_name(struct list_head *list, const char *name);
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+				const char *sys, const char *event,
+				struct parse_events_error *error,
+				struct list_head *head_config);
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  char *bpf_file_name,
+			  bool source,
+			  struct list_head *head_config);
+/* Provide this function for perf test */
+struct bpf_object;
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+			      struct list_head *list,
+			      struct bpf_object *obj,
+			      struct list_head *head_config);
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+			     struct list_head *list,
+			     u32 type, u64 config,
+			     struct list_head *head_config);
+int parse_events_add_tool(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  int tool_event);
+int parse_events_add_cache(struct list_head *list, int *idx,
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *error,
+			   struct list_head *head_config,
+			   struct parse_events_state *parse_state);
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
+				u64 addr, char *type, u64 len);
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config,
+			 bool auto_merge_stats,
+			 bool use_alias);
+
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+				      const char *name, const char *metric_id,
+				      struct perf_pmu *pmu);
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+			       char *str,
+			       struct list_head *head_config,
+			       struct list_head **listp);
+
+int parse_events_copy_term_list(struct list_head *old,
+				 struct list_head **new);
+
+enum perf_pmu_event_symbol_type
+perf_pmu__parse_check(const char *name);
+void parse_events__set_leader(char *name, struct list_head *list,
+			      struct parse_events_state *parse_state);
+void parse_events_update_lists(struct list_head *list_event,
+			       struct list_head *list_all);
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+			       int idx, const char *str);
+
+struct event_symbol {
+	const char	*symbol;
+	const char	*alias;
+};
+extern struct event_symbol event_symbols_hw[];
+extern struct event_symbol event_symbols_sw[];
+
+char *parse_events_formats_error_string(char *additional_terms);
+
+void parse_events_error__init(struct parse_events_error *err);
+void parse_events_error__exit(struct parse_events_error *err);
+void parse_events_error__handle(struct parse_events_error *err, int idx,
+				char *str, char *help);
+void parse_events_error__print(struct parse_events_error *err,
+			       const char *event);
+
+#ifdef HAVE_LIBELF_SUPPORT
+/*
+ * If the probe point starts with '%',
+ * or starts with "sdt_" and has a ':' but no '=',
+ * then it should be a SDT/cached probe point.
+ */
+static inline bool is_sdt_event(char *str)
+{
+	return (str[0] == '%' ||
+		(!strncmp(str, "sdt_", 4) &&
+		 !!strchr(str, ':') && !strchr(str, '=')));
+}
+#else
+static inline bool is_sdt_event(char *str __maybe_unused)
+{
+	return false;
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
+int perf_pmu__test_parse_init(void);
+
+struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx,
+					     struct perf_event_attr *attr,
+					     const char *name,
+					     const char *metric_id,
+					     struct perf_pmu *pmu,
+					     struct list_head *config_terms);
+
+#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
new file mode 100644
index 000000000..3a9ce96c8
--- /dev/null
+++ b/tools/perf/util/parse-events.l
@@ -0,0 +1,415 @@
+
+%option reentrant
+%option bison-bridge
+%option prefix="parse_events_"
+%option stack
+%option bison-locations
+%option yylineno
+%option reject
+
+%{
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "parse-events.h"
+#include "parse-events-bison.h"
+#include "evsel.h"
+
+char *parse_events_get_text(yyscan_t yyscanner);
+YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+	u64 num;
+
+	errno = 0;
+	num = strtoull(str, NULL, base);
+	if (errno)
+		return PE_ERROR;
+
+	yylval->num = num;
+	return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	return __value(yylval, text, base, PE_VALUE);
+}
+
+static int str(yyscan_t scanner, int token)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	if (text[0] != '\'') {
+		yylval->str = strdup(text);
+	} else {
+		/*
+		 * If a text tag specified on the command line
+		 * contains opening single quite ' then it is
+		 * expected that the tag ends with single quote
+		 * as well, like this:
+		 *     name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
+		 * quotes need to be escaped to bypass shell
+		 * processing.
+		 */
+		yylval->str = strndup(&text[1], strlen(text) - 2);
+	}
+
+	return token;
+}
+
+static int raw(yyscan_t scanner)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL)
+		return str(scanner, PE_NAME);
+
+	return __value(yylval, text + 1, 16, PE_RAW);
+}
+
+static bool isbpf_suffix(char *text)
+{
+	int len = strlen(text);
+
+	if (len < 2)
+		return false;
+	if ((text[len - 1] == 'c' || text[len - 1] == 'o') &&
+	    text[len - 2] == '.')
+		return true;
+	if (len > 4 && !strcmp(text + len - 4, ".obj"))
+		return true;
+	return false;
+}
+
+static bool isbpf(yyscan_t scanner)
+{
+	char *text = parse_events_get_text(scanner);
+	struct stat st;
+
+	if (!isbpf_suffix(text))
+		return false;
+
+	return stat(text, &st) == 0;
+}
+
+/*
+ * This function is called when the parser gets two kind of input:
+ *
+ * 	@cfg1 or @cfg2=config
+ *
+ * The leading '@' is stripped off before 'cfg1' and 'cfg2=config' are given to
+ * bison.  In the latter case it is necessary to keep the string intact so that
+ * the PMU kernel driver can determine what configurable is associated to
+ * 'config'.
+ */
+static int drv_str(yyscan_t scanner, int token)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	/* Strip off the '@' */
+	yylval->str = strdup(text + 1);
+	return token;
+}
+
+#define REWIND(__alloc)				\
+do {								\
+	YYSTYPE *__yylval = parse_events_get_lval(yyscanner);	\
+	char *text = parse_events_get_text(yyscanner);		\
+								\
+	if (__alloc)						\
+		__yylval->str = strdup(text);			\
+								\
+	yycolumn -= strlen(text);				\
+	yyless(0);						\
+} while (0)
+
+static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	yylval->str = strdup(text);
+
+	/*
+	 * If we're not testing then parse check determines the PMU event type
+	 * which if it isn't a PMU returns PE_NAME. When testing the result of
+	 * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless
+	 * an '!' is present in which case the text can't be a PMU name.
+	 */
+	switch (perf_pmu__parse_check(text)) {
+		case PMU_EVENT_SYMBOL_PREFIX:
+			return PE_PMU_EVENT_PRE;
+		case PMU_EVENT_SYMBOL_SUFFIX:
+			return PE_PMU_EVENT_SUF;
+		case PMU_EVENT_SYMBOL_SUFFIX2:
+			return PE_PMU_EVENT_SUF2;
+		case PMU_EVENT_SYMBOL:
+			return parse_state->fake_pmu
+				? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT;
+		default:
+			return parse_state->fake_pmu && !strchr(text,'!')
+				? PE_PMU_EVENT_FAKE : PE_NAME;
+	}
+}
+
+static int sym(yyscan_t scanner, int type, int config)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = (type << 16) + config;
+	return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
+}
+
+static int tool(yyscan_t scanner, enum perf_tool_event event)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = event;
+	return PE_VALUE_SYM_TOOL;
+}
+
+static int term(yyscan_t scanner, int type)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = type;
+	return PE_TERM;
+}
+
+#define YY_USER_ACTION					\
+do {							\
+	yylloc->last_column  = yylloc->first_column;	\
+	yylloc->first_column = yycolumn;		\
+	yycolumn += yyleng;				\
+} while (0);
+
+#define USER_REJECT		\
+	yycolumn -= yyleng;	\
+	REJECT
+
+%}
+
+%x mem
+%s config
+%x event
+%x array
+
+group		[^,{}/]*[{][^}]*[}][^,{}/]*
+event_pmu	[^,{}/]+[/][^/]*[/][^,{}/]*
+event		[^,{}/]+
+bpf_object	[^,{}]+\.(o|bpf)[a-zA-Z0-9._]*
+bpf_source	[^,{}]+\.c[a-zA-Z0-9._]*
+
+num_dec		[0-9]+
+num_hex		0x[a-fA-F0-9]+
+num_raw_hex	[a-fA-F0-9]+
+name		[a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]*
+name_tag	[\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
+name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+drv_cfg_term	[a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
+/* If you add a modifier you need to update check_modifier() */
+modifier_event	[ukhpPGHSDIWeb]+
+modifier_bp	[rwx]{1,3}
+
+%%
+
+%{
+	struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner);
+
+	{
+		int start_token = _parse_state->stoken;
+
+		if (start_token == PE_START_TERMS)
+			BEGIN(config);
+		else if (start_token == PE_START_EVENTS)
+			BEGIN(event);
+
+		if (start_token) {
+			_parse_state->stoken = 0;
+			/*
+			 * The flex parser does not init locations variable
+			 * via the scan_string interface, so we need do the
+			 * init in here.
+			 */
+			yycolumn = 0;
+			return start_token;
+		}
+         }
+%}
+
+<event>{
+
+{group}		{
+			BEGIN(INITIAL);
+			REWIND(0);
+		}
+
+{event_pmu}	|
+{bpf_object}	|
+{bpf_source}	|
+{event}		{
+			BEGIN(INITIAL);
+			REWIND(1);
+			return PE_EVENT_NAME;
+		}
+
+<<EOF>>		{
+			BEGIN(INITIAL);
+			REWIND(0);
+		}
+,		{
+			return ',';
+		}
+}
+
+<array>{
+"]"			{ BEGIN(config); return ']'; }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+,			{ return ','; }
+"\.\.\."		{ return PE_ARRAY_RANGE; }
+}
+
+<config>{
+	/*
+	 * Please update config_term_names when new static term is added.
+	 */
+config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
+config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
+config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
+name			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
+period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+freq			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
+branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
+inherit			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
+no-inherit		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
+overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
+no-overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
+percore			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
+aux-output		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
+aux-sample-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
+metric-id		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
+r{num_raw_hex}		{ return raw(yyscanner); }
+r0x{num_raw_hex}	{ return raw(yyscanner); }
+,			{ return ','; }
+"/"			{ BEGIN(INITIAL); return '/'; }
+{name_minus}		{ return str(yyscanner, PE_NAME); }
+\[all\]			{ return PE_ARRAY_ALL; }
+"["			{ BEGIN(array); return '['; }
+@{drv_cfg_term}		{ return drv_str(yyscanner, PE_DRV_CFG_TERM); }
+}
+
+<mem>{
+{modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }
+:			{ return ':'; }
+"/"			{ return '/'; }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+	/*
+	 * We need to separate 'mem:' scanner part, in order to get specific
+	 * modifier bits parsed out. Otherwise we would need to handle PE_NAME
+	 * and we'd need to parse it manually. During the escape from <mem>
+	 * state we need to put the escaping char back, so we dont miss it.
+	 */
+.			{ unput(*yytext); BEGIN(INITIAL); }
+	/*
+	 * We destroy the scanner after reaching EOF,
+	 * but anyway just to be sure get back to INIT state.
+	 */
+<<EOF>>			{ BEGIN(INITIAL); }
+}
+
+cpu-cycles|cycles				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches			{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
+task-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
+page-faults|faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
+minor-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
+major-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
+context-switches|cs				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
+cpu-migrations|migrations			{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
+alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
+emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
+dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+duration_time					{ return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
+user_time						{ return tool(yyscanner, PERF_TOOL_USER_TIME); }
+system_time						{ return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
+bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
+
+	/*
+	 * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
+	 * Because the prefix cycles is mixed up with cpu-cycles.
+	 * loads and stores are mixed up with cache event
+	 */
+cycles-ct				|
+cycles-t				|
+mem-loads				|
+mem-loads-aux				|
+mem-stores				|
+topdown-[a-z-]+				|
+tx-capacity-[a-z-]+			|
+el-capacity-[a-z-]+			{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+
+L1-dcache|l1-d|l1d|L1-data		|
+L1-icache|l1-i|l1i|L1-instruction	|
+LLC|L2					|
+dTLB|d-tlb|Data-TLB			|
+iTLB|i-tlb|Instruction-TLB		|
+branch|branches|bpu|btb|bpc		|
+node					{ return str(yyscanner, PE_NAME_CACHE_TYPE); }
+
+load|loads|read				|
+store|stores|write			|
+prefetch|prefetches			|
+speculative-read|speculative-load	|
+refs|Reference|ops|access		|
+misses|miss				{ return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
+
+mem:			{ BEGIN(mem); return PE_PREFIX_MEM; }
+r{num_raw_hex}		{ return raw(yyscanner); }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+
+{modifier_event}	{ return str(yyscanner, PE_MODIFIER_EVENT); }
+{bpf_object}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
+{bpf_source}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
+{name}			{ return pmu_str_check(yyscanner, _parse_state); }
+{name_tag}		{ return str(yyscanner, PE_NAME); }
+"/"			{ BEGIN(config); return '/'; }
+-			{ return '-'; }
+,			{ BEGIN(event); return ','; }
+:			{ return ':'; }
+"{"			{ BEGIN(event); return '{'; }
+"}"			{ return '}'; }
+=			{ return '='; }
+\n			{ }
+.			{ }
+
+%%
+
+int parse_events_wrap(void *scanner __maybe_unused)
+{
+	return 1;
+}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
new file mode 100644
index 000000000..be8c51770
--- /dev/null
+++ b/tools/perf/util/parse-events.y
@@ -0,0 +1,996 @@
+%define api.pure full
+%parse-param {void *_parse_state}
+%parse-param {void *scanner}
+%lex-param {void* scanner}
+%locations
+
+%{
+
+#define YYDEBUG 1
+
+#include <fnmatch.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include "pmu.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "parse-events-bison.h"
+
+void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg);
+
+#define ABORT_ON(val) \
+do { \
+	if (val) \
+		YYABORT; \
+} while (0)
+
+static struct list_head* alloc_list(void)
+{
+	struct list_head *list;
+
+	list = malloc(sizeof(*list));
+	if (!list)
+		return NULL;
+
+	INIT_LIST_HEAD(list);
+	return list;
+}
+
+static void free_list_evsel(struct list_head* list_evsel)
+{
+	struct evsel *evsel, *tmp;
+
+	list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) {
+		list_del_init(&evsel->core.node);
+		evsel__delete(evsel);
+	}
+	free(list_evsel);
+}
+
+static void inc_group_count(struct list_head *list,
+		       struct parse_events_state *parse_state)
+{
+	/* Count groups only have more than 1 members */
+	if (!list_is_last(list->next, list))
+		parse_state->nr_groups++;
+}
+
+%}
+
+%token PE_START_EVENTS PE_START_TERMS
+%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM
+%token PE_VALUE_SYM_TOOL
+%token PE_EVENT_NAME
+%token PE_NAME
+%token PE_BPF_OBJECT PE_BPF_SOURCE
+%token PE_MODIFIER_EVENT PE_MODIFIER_BP
+%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
+%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
+%token PE_ERROR
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
+%token PE_DRV_CFG_TERM
+%type <num> PE_VALUE
+%type <num> PE_VALUE_SYM_HW
+%type <num> PE_VALUE_SYM_SW
+%type <num> PE_VALUE_SYM_TOOL
+%type <num> PE_RAW
+%type <num> PE_TERM
+%type <num> value_sym
+%type <str> PE_NAME
+%type <str> PE_BPF_OBJECT
+%type <str> PE_BPF_SOURCE
+%type <str> PE_NAME_CACHE_TYPE
+%type <str> PE_NAME_CACHE_OP_RESULT
+%type <str> PE_MODIFIER_EVENT
+%type <str> PE_MODIFIER_BP
+%type <str> PE_EVENT_NAME
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%type <str> PE_DRV_CFG_TERM
+%type <str> event_pmu_name
+%destructor { free ($$); } <str>
+%type <term> event_term
+%destructor { parse_events_term__delete ($$); } <term>
+%type <list_terms> event_config
+%type <list_terms> opt_event_config
+%type <list_terms> opt_pmu_config
+%destructor { parse_events_terms__delete ($$); } <list_terms>
+%type <list_evsel> event_pmu
+%type <list_evsel> event_legacy_symbol
+%type <list_evsel> event_legacy_cache
+%type <list_evsel> event_legacy_mem
+%type <list_evsel> event_legacy_tracepoint
+%type <list_evsel> event_legacy_numeric
+%type <list_evsel> event_legacy_raw
+%type <list_evsel> event_bpf_file
+%type <list_evsel> event_def
+%type <list_evsel> event_mod
+%type <list_evsel> event_name
+%type <list_evsel> event
+%type <list_evsel> events
+%type <list_evsel> group_def
+%type <list_evsel> group
+%type <list_evsel> groups
+%destructor { free_list_evsel ($$); } <list_evsel>
+%type <tracepoint_name> tracepoint_name
+%destructor { free ($$.sys); free ($$.event); } <tracepoint_name>
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
+%destructor { free ($$.ranges); } <array>
+
+%union
+{
+	char *str;
+	u64 num;
+	struct list_head *list_evsel;
+	struct list_head *list_terms;
+	struct parse_events_term *term;
+	struct tracepoint_name {
+		char *sys;
+		char *event;
+	} tracepoint_name;
+	struct parse_events_array array;
+}
+%%
+
+start:
+PE_START_EVENTS start_events
+|
+PE_START_TERMS  start_terms
+
+start_events: groups
+{
+	struct parse_events_state *parse_state = _parse_state;
+
+	/* frees $1 */
+	parse_events_update_lists($1, &parse_state->list);
+}
+
+groups:
+groups ',' group
+{
+	struct list_head *list  = $1;
+	struct list_head *group = $3;
+
+	/* frees $3 */
+	parse_events_update_lists(group, list);
+	$$ = list;
+}
+|
+groups ',' event
+{
+	struct list_head *list  = $1;
+	struct list_head *event = $3;
+
+	/* frees $3 */
+	parse_events_update_lists(event, list);
+	$$ = list;
+}
+|
+group
+|
+event
+
+group:
+group_def ':' PE_MODIFIER_EVENT
+{
+	struct list_head *list = $1;
+	int err;
+
+	err = parse_events__modifier_group(list, $3);
+	free($3);
+	if (err) {
+		struct parse_events_state *parse_state = _parse_state;
+		struct parse_events_error *error = parse_state->error;
+
+		parse_events_error__handle(error, @3.first_column,
+					   strdup("Bad modifier"), NULL);
+		free_list_evsel(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+group_def
+
+group_def:
+PE_NAME '{' events '}'
+{
+	struct list_head *list = $3;
+
+	inc_group_count(list, _parse_state);
+	parse_events__set_leader($1, list, _parse_state);
+	free($1);
+	$$ = list;
+}
+|
+'{' events '}'
+{
+	struct list_head *list = $2;
+
+	inc_group_count(list, _parse_state);
+	parse_events__set_leader(NULL, list, _parse_state);
+	$$ = list;
+}
+
+events:
+events ',' event
+{
+	struct list_head *event = $3;
+	struct list_head *list  = $1;
+
+	/* frees $3 */
+	parse_events_update_lists(event, list);
+	$$ = list;
+}
+|
+event
+
+event: event_mod
+
+event_mod:
+event_name PE_MODIFIER_EVENT
+{
+	struct list_head *list = $1;
+	int err;
+
+	/*
+	 * Apply modifier on all events added by single event definition
+	 * (there could be more events added for multiple tracepoint
+	 * definitions via '*?'.
+	 */
+	err = parse_events__modifier_event(list, $2, false);
+	free($2);
+	if (err) {
+		struct parse_events_state *parse_state = _parse_state;
+		struct parse_events_error *error = parse_state->error;
+
+		parse_events_error__handle(error, @2.first_column,
+					   strdup("Bad modifier"), NULL);
+		free_list_evsel(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+event_name
+
+event_name:
+PE_EVENT_NAME event_def
+{
+	int err;
+
+	err = parse_events_name($2, $1);
+	free($1);
+	if (err) {
+		free_list_evsel($2);
+		YYABORT;
+	}
+	$$ = $2;
+}
+|
+event_def
+
+event_def: event_pmu |
+	   event_legacy_symbol |
+	   event_legacy_cache sep_dc |
+	   event_legacy_mem |
+	   event_legacy_tracepoint sep_dc |
+	   event_legacy_numeric sep_dc |
+	   event_legacy_raw sep_dc |
+	   event_bpf_file
+
+event_pmu_name:
+PE_NAME | PE_PMU_EVENT_PRE
+
+event_pmu:
+event_pmu_name opt_pmu_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL;
+	char *pattern = NULL;
+
+#define CLEANUP_YYABORT					\
+	do {						\
+		parse_events_terms__delete($2);		\
+		parse_events_terms__delete(orig_terms);	\
+		free(list);				\
+		free($1);				\
+		free(pattern);				\
+		YYABORT;				\
+	} while(0)
+
+	if (parse_events_copy_term_list($2, &orig_terms))
+		CLEANUP_YYABORT;
+
+	if (error)
+		error->idx = @1.first_column;
+
+	list = alloc_list();
+	if (!list)
+		CLEANUP_YYABORT;
+	if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
+		struct perf_pmu *pmu = NULL;
+		int ok = 0;
+
+		if (asprintf(&pattern, "%s*", $1) < 0)
+			CLEANUP_YYABORT;
+
+		while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+			char *name = pmu->name;
+
+			if (!strncmp(name, "uncore_", 7) &&
+			    strncmp($1, "uncore_", 7))
+				name += 7;
+			if (!perf_pmu__match(pattern, name, $1) ||
+			    !perf_pmu__match(pattern, pmu->alias_name, $1)) {
+				if (parse_events_copy_term_list(orig_terms, &terms))
+					CLEANUP_YYABORT;
+				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
+					ok++;
+				parse_events_terms__delete(terms);
+			}
+		}
+
+		if (!ok)
+			CLEANUP_YYABORT;
+	}
+	parse_events_terms__delete($2);
+	parse_events_terms__delete(orig_terms);
+	free(pattern);
+	free($1);
+	$$ = list;
+#undef CLEANUP_YYABORT
+}
+|
+PE_KERNEL_PMU_EVENT sep_dc
+{
+	struct list_head *list;
+	int err;
+
+	err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
+	free($1);
+	if (err < 0)
+		YYABORT;
+	$$ = list;
+}
+|
+PE_KERNEL_PMU_EVENT opt_pmu_config
+{
+	struct list_head *list;
+	int err;
+
+	/* frees $2 */
+	err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list);
+	free($1);
+	if (err < 0)
+		YYABORT;
+	$$ = list;
+}
+|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc
+{
+	struct list_head *list;
+	char pmu_name[128];
+	snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5);
+	free($1);
+	free($3);
+	free($5);
+	if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
+		YYABORT;
+	$$ = list;
+}
+|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
+{
+	struct list_head *list;
+	char pmu_name[128];
+
+	snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3);
+	free($1);
+	free($3);
+	if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
+		YYABORT;
+	$$ = list;
+}
+|
+PE_PMU_EVENT_FAKE sep_dc
+{
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	if (!list)
+		YYABORT;
+
+	err = parse_events_add_pmu(_parse_state, list, $1, NULL, false, false);
+	free($1);
+	if (err < 0) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_PMU_EVENT_FAKE opt_pmu_config
+{
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	if (!list)
+		YYABORT;
+
+	err = parse_events_add_pmu(_parse_state, list, $1, $2, false, false);
+	free($1);
+	parse_events_terms__delete($2);
+	if (err < 0) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+
+value_sym:
+PE_VALUE_SYM_HW
+|
+PE_VALUE_SYM_SW
+
+event_legacy_symbol:
+value_sym '/' event_config '/'
+{
+	struct list_head *list;
+	int type = $1 >> 16;
+	int config = $1 & 255;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_numeric(_parse_state, list, type, config, $3);
+	parse_events_terms__delete($3);
+	if (err) {
+		free_list_evsel(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+value_sym sep_slash_slash_dc
+{
+	struct list_head *list;
+	int type = $1 >> 16;
+	int config = $1 & 255;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL));
+	$$ = list;
+}
+|
+PE_VALUE_SYM_TOOL sep_slash_slash_dc
+{
+	struct list_head *list;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	ABORT_ON(parse_events_add_tool(_parse_state, list, $1));
+	$$ = list;
+}
+
+event_legacy_cache:
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6,
+				     parse_state);
+	parse_events_terms__delete($6);
+	free($1);
+	free($3);
+	free($5);
+	if (err) {
+		free_list_evsel(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4,
+				     parse_state);
+	parse_events_terms__delete($4);
+	free($1);
+	free($3);
+	if (err) {
+		free_list_evsel(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_NAME_CACHE_TYPE opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2,
+				     parse_state);
+	parse_events_terms__delete($2);
+	free($1);
+	if (err) {
+		free_list_evsel(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+
+event_legacy_mem:
+PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_breakpoint(list, &parse_state->idx,
+					  $2, $6, $4);
+	free($6);
+	if (err) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	if (parse_events_add_breakpoint(list, &parse_state->idx,
+					$2, NULL, $4)) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_breakpoint(list, &parse_state->idx,
+					  $2, $4, 0);
+	free($4);
+	if (err) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	if (parse_events_add_breakpoint(list, &parse_state->idx,
+					$2, NULL, 0)) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+
+event_legacy_tracepoint:
+tracepoint_name opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	if (error)
+		error->idx = @1.first_column;
+
+	err = parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event,
+					error, $2);
+
+	parse_events_terms__delete($2);
+	free($1.sys);
+	free($1.event);
+	if (err) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+
+tracepoint_name:
+PE_NAME '-' PE_NAME ':' PE_NAME
+{
+	struct tracepoint_name tracepoint;
+
+	ABORT_ON(asprintf(&tracepoint.sys, "%s-%s", $1, $3) < 0);
+	tracepoint.event = $5;
+	free($1);
+	free($3);
+	$$ = tracepoint;
+}
+|
+PE_NAME ':' PE_NAME
+{
+	struct tracepoint_name tracepoint = {$1, $3};
+
+	$$ = tracepoint;
+}
+
+event_legacy_numeric:
+PE_VALUE ':' PE_VALUE opt_event_config
+{
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4);
+	parse_events_terms__delete($4);
+	if (err) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+
+event_legacy_raw:
+PE_RAW opt_event_config
+{
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, $1, $2);
+	parse_events_terms__delete($2);
+	if (err) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+
+event_bpf_file:
+PE_BPF_OBJECT opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_load_bpf(parse_state, list, $1, false, $2);
+	parse_events_terms__delete($2);
+	free($1);
+	if (err) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+|
+PE_BPF_SOURCE opt_event_config
+{
+	struct list_head *list;
+	int err;
+
+	list = alloc_list();
+	ABORT_ON(!list);
+	err = parse_events_load_bpf(_parse_state, list, $1, true, $2);
+	parse_events_terms__delete($2);
+	if (err) {
+		free(list);
+		YYABORT;
+	}
+	$$ = list;
+}
+
+opt_event_config:
+'/' event_config '/'
+{
+	$$ = $2;
+}
+|
+'/' '/'
+{
+	$$ = NULL;
+}
+|
+{
+	$$ = NULL;
+}
+
+opt_pmu_config:
+'/' event_config '/'
+{
+	$$ = $2;
+}
+|
+'/' '/'
+{
+	$$ = NULL;
+}
+
+start_terms: event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	if (parse_state->terms) {
+		parse_events_terms__delete ($1);
+		YYABORT;
+	}
+	parse_state->terms = $1;
+}
+
+event_config:
+event_config ',' event_term
+{
+	struct list_head *head = $1;
+	struct parse_events_term *term = $3;
+
+	if (!head) {
+		parse_events_term__delete(term);
+		YYABORT;
+	}
+	list_add_tail(&term->list, head);
+	$$ = $1;
+}
+|
+event_term
+{
+	struct list_head *head = malloc(sizeof(*head));
+	struct parse_events_term *term = $1;
+
+	ABORT_ON(!head);
+	INIT_LIST_HEAD(head);
+	list_add_tail(&term->list, head);
+	$$ = head;
+}
+
+event_term:
+PE_RAW
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG,
+					NULL, $1, false, &@1, NULL));
+	$$ = term;
+}
+|
+PE_NAME '=' PE_NAME
+{
+	struct parse_events_term *term;
+
+	if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $3, &@1, &@3)) {
+		free($1);
+		free($3);
+		YYABORT;
+	}
+	$$ = term;
+}
+|
+PE_NAME '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $3, false, &@1, &@3)) {
+		free($1);
+		YYABORT;
+	}
+	$$ = term;
+}
+|
+PE_NAME '=' PE_VALUE_SYM_HW
+{
+	struct parse_events_term *term;
+	int config = $3 & 255;
+
+	if (parse_events_term__sym_hw(&term, $1, config)) {
+		free($1);
+		YYABORT;
+	}
+	$$ = term;
+}
+|
+PE_NAME
+{
+	struct parse_events_term *term;
+
+	if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, 1, true, &@1, NULL)) {
+		free($1);
+		YYABORT;
+	}
+	$$ = term;
+}
+|
+PE_VALUE_SYM_HW
+{
+	struct parse_events_term *term;
+	int config = $1 & 255;
+
+	ABORT_ON(parse_events_term__sym_hw(&term, NULL, config));
+	$$ = term;
+}
+|
+PE_TERM '=' PE_NAME
+{
+	struct parse_events_term *term;
+
+	if (parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)) {
+		free($3);
+		YYABORT;
+	}
+	$$ = term;
+}
+|
+PE_TERM '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
+	$$ = term;
+}
+|
+PE_TERM
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
+	$$ = term;
+}
+|
+PE_NAME array '=' PE_NAME
+{
+	struct parse_events_term *term;
+
+	if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, &@1, &@4)) {
+		free($1);
+		free($4);
+		free($2.ranges);
+		YYABORT;
+	}
+	term->array = $2;
+	$$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, false, &@1, &@4)) {
+		free($1);
+		free($2.ranges);
+		YYABORT;
+	}
+	term->array = $2;
+	$$ = term;
+}
+|
+PE_DRV_CFG_TERM
+{
+	struct parse_events_term *term;
+	char *config = strdup($1);
+
+	ABORT_ON(!config);
+	if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+					config, $1, &@1, NULL)) {
+		free($1);
+		free(config);
+		YYABORT;
+	}
+	$$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+	$$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+	$$.nr_ranges = 0;
+	$$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+	struct parse_events_array new_array;
+
+	new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+	new_array.ranges = realloc($1.ranges,
+				sizeof(new_array.ranges[0]) *
+				new_array.nr_ranges);
+	ABORT_ON(!new_array.ranges);
+	memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+	       $3.nr_ranges * sizeof(new_array.ranges[0]));
+	free($3.ranges);
+	$$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+	struct parse_events_array array;
+
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = 1;
+	$$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+	struct parse_events_array array;
+
+	ABORT_ON($3 < $1);
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = $3 - $1 + 1;
+	$$ = array;
+}
+
+sep_dc: ':' |
+
+sep_slash_slash_dc: '/' '/' | ':' |
+
+%%
+
+void parse_events_error(YYLTYPE *loc, void *parse_state,
+			void *scanner __maybe_unused,
+			char const *msg __maybe_unused)
+{
+	parse_events_evlist_error(parse_state, loc->last_column, "parser error");
+}
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
new file mode 100644
index 000000000..a4a100425
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/perf_regs.h"
+#include "util/parse-regs-options.h"
+
+static int
+__parse_regs(const struct option *opt, const char *str, int unset, bool intr)
+{
+	uint64_t *mode = (uint64_t *)opt->value;
+	const struct sample_reg *r = NULL;
+	char *s, *os = NULL, *p;
+	int ret = -1;
+	uint64_t mask;
+
+	if (unset)
+		return 0;
+
+	/*
+	 * cannot set it twice
+	 */
+	if (*mode)
+		return -1;
+
+	if (intr)
+		mask = arch__intr_reg_mask();
+	else
+		mask = arch__user_reg_mask();
+
+	/* str may be NULL in case no arg is passed to -I */
+	if (str) {
+		/* because str is read-only */
+		s = os = strdup(str);
+		if (!s)
+			return -1;
+
+		for (;;) {
+			p = strchr(s, ',');
+			if (p)
+				*p = '\0';
+
+			if (!strcmp(s, "?")) {
+				fprintf(stderr, "available registers: ");
+#ifdef HAVE_PERF_REGS_SUPPORT
+				for (r = sample_reg_masks; r->name; r++) {
+					if (r->mask & mask)
+						fprintf(stderr, "%s ", r->name);
+				}
+#endif
+				fputc('\n', stderr);
+				/* just printing available regs */
+				goto error;
+			}
+#ifdef HAVE_PERF_REGS_SUPPORT
+			for (r = sample_reg_masks; r->name; r++) {
+				if ((r->mask & mask) && !strcasecmp(s, r->name))
+					break;
+			}
+#endif
+			if (!r || !r->name) {
+				ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
+					    s, intr ? "-I" : "--user-regs=");
+				goto error;
+			}
+
+			*mode |= r->mask;
+
+			if (!p)
+				break;
+
+			s = p + 1;
+		}
+	}
+	ret = 0;
+
+	/* default to all possible regs */
+	if (*mode == 0)
+		*mode = mask;
+error:
+	free(os);
+	return ret;
+}
+
+int
+parse_user_regs(const struct option *opt, const char *str, int unset)
+{
+	return __parse_regs(opt, str, unset, false);
+}
+
+int
+parse_intr_regs(const struct option *opt, const char *str, int unset)
+{
+	return __parse_regs(opt, str, unset, true);
+}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
new file mode 100644
index 000000000..2b23d25c6
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_REGS_OPTIONS_H
+#define _PERF_PARSE_REGS_OPTIONS_H 1
+struct option;
+int parse_user_regs(const struct option *opt, const char *str, int unset);
+int parse_intr_regs(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/tools/perf/util/parse-sublevel-options.c b/tools/perf/util/parse-sublevel-options.c
new file mode 100644
index 000000000..a841d17ff
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.c
@@ -0,0 +1,70 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "util/debug.h"
+#include "util/parse-sublevel-options.h"
+
+static int parse_one_sublevel_option(const char *str,
+				     struct sublevel_option *opts)
+{
+	struct sublevel_option *opt = opts;
+	char *vstr, *s = strdup(str);
+	int v = 1;
+
+	if (!s) {
+		pr_err("no memory\n");
+		return -1;
+	}
+
+	vstr = strchr(s, '=');
+	if (vstr)
+		*vstr++ = 0;
+
+	while (opt->name) {
+		if (!strcmp(s, opt->name))
+			break;
+		opt++;
+	}
+
+	if (!opt->name) {
+		pr_err("Unknown option name '%s'\n", s);
+		free(s);
+		return -1;
+	}
+
+	if (vstr)
+		v = atoi(vstr);
+
+	*opt->value_ptr = v;
+	free(s);
+	return 0;
+}
+
+/* parse options like --foo a=<n>,b,c... */
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts)
+{
+	char *s = strdup(str);
+	char *p = NULL;
+	int ret;
+
+	if (!s) {
+		pr_err("no memory\n");
+		return -1;
+	}
+
+	p = strtok(s, ",");
+	while (p) {
+		ret = parse_one_sublevel_option(p, opts);
+		if (ret) {
+			free(s);
+			return ret;
+		}
+
+		p = strtok(NULL, ",");
+	}
+
+	free(s);
+	return 0;
+}
diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h
new file mode 100644
index 000000000..578b18ef0
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.h
@@ -0,0 +1,11 @@
+#ifndef _PERF_PARSE_SUBLEVEL_OPTIONS_H
+#define _PERF_PARSE_SUBLEVEL_OPTIONS_H
+
+struct sublevel_option {
+	const char *name;
+	int *value_ptr;
+};
+
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts);
+
+#endif
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
new file mode 100644
index 000000000..ce80b79be
--- /dev/null
+++ b/tools/perf/util/path.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I'm tired of doing "vsnprintf()" etc just to open a
+ * file, so here's a "return static buffer with printf"
+ * interface for paths.
+ *
+ * It's obviously not thread-safe. Sue me. But it's quite
+ * useful for doing things like
+ *
+ *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
+ *
+ * which is what it's designed for.
+ */
+#include "path.h"
+#include "cache.h"
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <unistd.h>
+
+static char bad_path[] = "/bad-path/";
+/*
+ * One hack:
+ */
+static char *get_pathname(void)
+{
+	static char pathname_array[4][PATH_MAX];
+	static int idx;
+
+	return pathname_array[3 & ++idx];
+}
+
+static char *cleanup_path(char *path)
+{
+	/* Clean it up */
+	if (!memcmp(path, "./", 2)) {
+		path += 2;
+		while (*path == '/')
+			path++;
+	}
+	return path;
+}
+
+char *mkpath(const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+	char *pathname = get_pathname();
+
+	va_start(args, fmt);
+	len = vsnprintf(pathname, PATH_MAX, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2)
+{
+	return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2);
+}
+
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3)
+{
+	return scnprintf(bf, size, "%s%s%s%s%s", path1, path1[0] ? "/" : "",
+			 path2, path2[0] ? "/" : "", path3);
+}
+
+bool is_regular_file(const char *file)
+{
+	struct stat st;
+
+	if (stat(file, &st))
+		return false;
+
+	return S_ISREG(st.st_mode);
+}
+
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+bool is_directory(const char *base_path, const struct dirent *dent)
+{
+	char path[PATH_MAX];
+	struct stat st;
+
+	snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
+	if (stat(path, &st))
+		return false;
+
+	return S_ISDIR(st.st_mode);
+}
+
+bool is_executable_file(const char *base_path, const struct dirent *dent)
+{
+	char path[PATH_MAX];
+	struct stat st;
+
+	snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
+	if (stat(path, &st))
+		return false;
+
+	return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR);
+}
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
new file mode 100644
index 000000000..d94902c22
--- /dev/null
+++ b/tools/perf/util/path.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PATH_H
+#define _PERF_PATH_H
+
+#include <stddef.h>
+#include <stdbool.h>
+
+struct dirent;
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2);
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3);
+
+bool is_regular_file(const char *file);
+bool is_directory(const char *base_path, const struct dirent *dent);
+bool is_executable_file(const char *base_path, const struct dirent *dent);
+
+#endif /* _PERF_PATH_H */
diff --git a/tools/perf/util/perf-hooks-list.h b/tools/perf/util/perf-hooks-list.h
new file mode 100644
index 000000000..2867c07ee
--- /dev/null
+++ b/tools/perf/util/perf-hooks-list.h
@@ -0,0 +1,3 @@
+PERF_HOOK(record_start)
+PERF_HOOK(record_end)
+PERF_HOOK(test)
diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c
new file mode 100644
index 000000000..7a0ab3507
--- /dev/null
+++ b/tools/perf/util/perf-hooks.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * perf_hooks.c
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <setjmp.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include "util/debug.h"
+#include "util/perf-hooks.h"
+
+static sigjmp_buf jmpbuf;
+static const struct perf_hook_desc *current_perf_hook;
+
+void perf_hooks__invoke(const struct perf_hook_desc *desc)
+{
+	if (!(desc && desc->p_hook_func && *desc->p_hook_func))
+		return;
+
+	if (sigsetjmp(jmpbuf, 1)) {
+		pr_warning("Fatal error (SEGFAULT) in perf hook '%s'\n",
+			   desc->hook_name);
+		*(current_perf_hook->p_hook_func) = NULL;
+	} else {
+		current_perf_hook = desc;
+		(**desc->p_hook_func)(desc->hook_ctx);
+	}
+	current_perf_hook = NULL;
+}
+
+void perf_hooks__recover(void)
+{
+	if (current_perf_hook)
+		siglongjmp(jmpbuf, 1);
+}
+
+#define PERF_HOOK(name)					\
+perf_hook_func_t __perf_hook_func_##name = NULL;	\
+struct perf_hook_desc __perf_hook_desc_##name =		\
+	{.hook_name = #name,				\
+	 .p_hook_func = &__perf_hook_func_##name,	\
+	 .hook_ctx = NULL};
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+#define PERF_HOOK(name)		\
+	&__perf_hook_desc_##name,
+
+static struct perf_hook_desc *perf_hooks[] = {
+#include "perf-hooks-list.h"
+};
+#undef PERF_HOOK
+
+int perf_hooks__set_hook(const char *hook_name,
+			 perf_hook_func_t hook_func,
+			 void *hook_ctx)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+		if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+			continue;
+
+		if (*(perf_hooks[i]->p_hook_func))
+			pr_warning("Overwrite existing hook: %s\n", hook_name);
+		*(perf_hooks[i]->p_hook_func) = hook_func;
+		perf_hooks[i]->hook_ctx = hook_ctx;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+perf_hook_func_t perf_hooks__get_hook(const char *hook_name)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+		if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+			continue;
+
+		return *(perf_hooks[i]->p_hook_func);
+	}
+	return ERR_PTR(-ENOENT);
+}
diff --git a/tools/perf/util/perf-hooks.h b/tools/perf/util/perf-hooks.h
new file mode 100644
index 000000000..27fbec62d
--- /dev/null
+++ b/tools/perf/util/perf-hooks.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_PERF_HOOKS_H
+#define PERF_UTIL_PERF_HOOKS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*perf_hook_func_t)(void *ctx);
+struct perf_hook_desc {
+	const char * const hook_name;
+	perf_hook_func_t * const p_hook_func;
+	void *hook_ctx;
+};
+
+extern void perf_hooks__invoke(const struct perf_hook_desc *);
+extern void perf_hooks__recover(void);
+
+#define PERF_HOOK(name)					\
+extern struct perf_hook_desc __perf_hook_desc_##name;	\
+static inline void perf_hooks__invoke_##name(void)	\
+{ 							\
+	perf_hooks__invoke(&__perf_hook_desc_##name);	\
+}
+
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+extern int
+perf_hooks__set_hook(const char *hook_name,
+		     perf_hook_func_t hook_func,
+		     void *hook_ctx);
+
+extern perf_hook_func_t
+perf_hooks__get_hook(const char *hook_name);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
new file mode 100644
index 000000000..e1e2d7015
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.c
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "perf-sys.h"
+#include "util/cloexec.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/perf_api_probe.h"
+#include <perf/cpumap.h>
+#include <errno.h>
+
+typedef void (*setup_probe_fn_t)(struct evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str)
+{
+	struct evlist *evlist;
+	struct evsel *evsel;
+	unsigned long flags = perf_event_open_cloexec_flag();
+	int err = -EAGAIN, fd;
+	static pid_t pid = -1;
+
+	evlist = evlist__new();
+	if (!evlist)
+		return -ENOMEM;
+
+	if (parse_event(evlist, str))
+		goto out_delete;
+
+	evsel = evlist__first(evlist);
+
+	while (1) {
+		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
+		if (fd < 0) {
+			if (pid == -1 && errno == EACCES) {
+				pid = 0;
+				continue;
+			}
+			goto out_delete;
+		}
+		break;
+	}
+	close(fd);
+
+	fn(evsel);
+
+	fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
+	if (fd < 0) {
+		if (errno == EINVAL)
+			err = -EINVAL;
+		goto out_delete;
+	}
+	close(fd);
+	err = 0;
+
+out_delete:
+	evlist__delete(evlist);
+	return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+	struct perf_cpu_map *cpus;
+	struct perf_cpu cpu;
+	int ret, i = 0;
+
+	cpus = perf_cpu_map__new(NULL);
+	if (!cpus)
+		return false;
+	cpu = perf_cpu_map__cpu(cpus, 0);
+	perf_cpu_map__put(cpus);
+
+	do {
+		ret = perf_do_probe_api(fn, cpu, try[i++]);
+		if (!ret)
+			return true;
+	} while (ret == -EAGAIN && try[i]);
+
+	return false;
+}
+
+static void perf_probe_sample_identifier(struct evsel *evsel)
+{
+	evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+static void perf_probe_comm_exec(struct evsel *evsel)
+{
+	evsel->core.attr.comm_exec = 1;
+}
+
+static void perf_probe_context_switch(struct evsel *evsel)
+{
+	evsel->core.attr.context_switch = 1;
+}
+
+static void perf_probe_text_poke(struct evsel *evsel)
+{
+	evsel->core.attr.text_poke = 1;
+}
+
+static void perf_probe_build_id(struct evsel *evsel)
+{
+	evsel->core.attr.build_id = 1;
+}
+
+static void perf_probe_cgroup(struct evsel *evsel)
+{
+	evsel->core.attr.cgroup = 1;
+}
+
+bool perf_can_sample_identifier(void)
+{
+	return perf_probe_api(perf_probe_sample_identifier);
+}
+
+bool perf_can_comm_exec(void)
+{
+	return perf_probe_api(perf_probe_comm_exec);
+}
+
+bool perf_can_record_switch_events(void)
+{
+	return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_text_poke_events(void)
+{
+	return perf_probe_api(perf_probe_text_poke);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+		.exclude_kernel = 1,
+	};
+	struct perf_cpu_map *cpus;
+	struct perf_cpu cpu;
+	int fd;
+
+	cpus = perf_cpu_map__new(NULL);
+	if (!cpus)
+		return false;
+
+	cpu = perf_cpu_map__cpu(cpus, 0);
+	perf_cpu_map__put(cpus);
+
+	fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0);
+	if (fd < 0)
+		return false;
+	close(fd);
+
+	return true;
+}
+
+/*
+ * Architectures are expected to know if AUX area sampling is supported by the
+ * hardware. Here we check for kernel support.
+ */
+bool perf_can_aux_sample(void)
+{
+	struct perf_event_attr attr = {
+		.size = sizeof(struct perf_event_attr),
+		.exclude_kernel = 1,
+		/*
+		 * Non-zero value causes the kernel to calculate the effective
+		 * attribute size up to that byte.
+		 */
+		.aux_sample_size = 1,
+	};
+	int fd;
+
+	fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+	/*
+	 * If the kernel attribute is big enough to contain aux_sample_size
+	 * then we assume that it is supported. We are relying on the kernel to
+	 * validate the attribute size before anything else that could be wrong.
+	 */
+	if (fd < 0 && errno == E2BIG)
+		return false;
+	if (fd >= 0)
+		close(fd);
+
+	return true;
+}
+
+bool perf_can_record_build_id(void)
+{
+	return perf_probe_api(perf_probe_build_id);
+}
+
+bool perf_can_record_cgroup(void)
+{
+	return perf_probe_api(perf_probe_cgroup);
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
new file mode 100644
index 000000000..b104168ef
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.h
@@ -0,0 +1,17 @@
+
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_API_PROBE_H
+#define __PERF_API_PROBE_H
+
+#include <stdbool.h>
+
+bool perf_can_aux_sample(void);
+bool perf_can_comm_exec(void);
+bool perf_can_record_cpu_wide(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_record_text_poke_events(void);
+bool perf_can_sample_identifier(void);
+bool perf_can_record_build_id(void);
+bool perf_can_record_cgroup(void);
+
+#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
new file mode 100644
index 000000000..7e5e7b305
--- /dev/null
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include "util/evsel_fprintf.h"
+
+struct bit_names {
+	int bit;
+	const char *name;
+};
+
+static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
+{
+	bool first_bit = true;
+	int i = 0;
+
+	do {
+		if (value & bits[i].bit) {
+			buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
+			first_bit = false;
+		}
+	} while (bits[++i].name != NULL);
+}
+
+static void __p_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+		bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+		bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+		bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
+		bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE),
+		bit_name(WEIGHT_STRUCT),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
+
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+		bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+		bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+		bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+		bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+		bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
+
+static void __p_read_format(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+		bit_name(ID), bit_name(GROUP), bit_name(LOST),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
+
+#define BUF_SIZE		1024
+
+#define p_hex(val)		snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
+#define p_unsigned(val)		snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
+#define p_signed(val)		snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
+#define p_sample_type(val)	__p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
+#define p_read_format(val)	__p_read_format(buf, BUF_SIZE, val)
+
+#define PRINT_ATTRn(_n, _f, _p)				\
+do {							\
+	if (attr->_f) {					\
+		_p(attr->_f);				\
+		ret += attr__fprintf(fp, _n, buf, priv);\
+	}						\
+} while (0)
+
+#define PRINT_ATTRf(_f, _p)	PRINT_ATTRn(#_f, _f, _p)
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+			     attr__fprintf_f attr__fprintf, void *priv)
+{
+	char buf[BUF_SIZE];
+	int ret = 0;
+
+	PRINT_ATTRf(type, p_unsigned);
+	PRINT_ATTRf(size, p_unsigned);
+	PRINT_ATTRf(config, p_hex);
+	PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
+	PRINT_ATTRf(sample_type, p_sample_type);
+	PRINT_ATTRf(read_format, p_read_format);
+
+	PRINT_ATTRf(disabled, p_unsigned);
+	PRINT_ATTRf(inherit, p_unsigned);
+	PRINT_ATTRf(pinned, p_unsigned);
+	PRINT_ATTRf(exclusive, p_unsigned);
+	PRINT_ATTRf(exclude_user, p_unsigned);
+	PRINT_ATTRf(exclude_kernel, p_unsigned);
+	PRINT_ATTRf(exclude_hv, p_unsigned);
+	PRINT_ATTRf(exclude_idle, p_unsigned);
+	PRINT_ATTRf(mmap, p_unsigned);
+	PRINT_ATTRf(comm, p_unsigned);
+	PRINT_ATTRf(freq, p_unsigned);
+	PRINT_ATTRf(inherit_stat, p_unsigned);
+	PRINT_ATTRf(enable_on_exec, p_unsigned);
+	PRINT_ATTRf(task, p_unsigned);
+	PRINT_ATTRf(watermark, p_unsigned);
+	PRINT_ATTRf(precise_ip, p_unsigned);
+	PRINT_ATTRf(mmap_data, p_unsigned);
+	PRINT_ATTRf(sample_id_all, p_unsigned);
+	PRINT_ATTRf(exclude_host, p_unsigned);
+	PRINT_ATTRf(exclude_guest, p_unsigned);
+	PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
+	PRINT_ATTRf(exclude_callchain_user, p_unsigned);
+	PRINT_ATTRf(mmap2, p_unsigned);
+	PRINT_ATTRf(comm_exec, p_unsigned);
+	PRINT_ATTRf(use_clockid, p_unsigned);
+	PRINT_ATTRf(context_switch, p_unsigned);
+	PRINT_ATTRf(write_backward, p_unsigned);
+	PRINT_ATTRf(namespaces, p_unsigned);
+	PRINT_ATTRf(ksymbol, p_unsigned);
+	PRINT_ATTRf(bpf_event, p_unsigned);
+	PRINT_ATTRf(aux_output, p_unsigned);
+	PRINT_ATTRf(cgroup, p_unsigned);
+	PRINT_ATTRf(text_poke, p_unsigned);
+	PRINT_ATTRf(build_id, p_unsigned);
+	PRINT_ATTRf(inherit_thread, p_unsigned);
+	PRINT_ATTRf(remove_on_exec, p_unsigned);
+	PRINT_ATTRf(sigtrap, p_unsigned);
+
+	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
+	PRINT_ATTRf(bp_type, p_unsigned);
+	PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
+	PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
+	PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
+	PRINT_ATTRf(sample_regs_user, p_hex);
+	PRINT_ATTRf(sample_stack_user, p_unsigned);
+	PRINT_ATTRf(clockid, p_signed);
+	PRINT_ATTRf(sample_regs_intr, p_hex);
+	PRINT_ATTRf(aux_watermark, p_unsigned);
+	PRINT_ATTRf(sample_max_stack, p_unsigned);
+	PRINT_ATTRf(aux_sample_size, p_unsigned);
+	PRINT_ATTRf(sig_data, p_unsigned);
+
+	return ret;
+}
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
new file mode 100644
index 000000000..872dd3d38
--- /dev/null
+++ b/tools/perf/util/perf_regs.c
@@ -0,0 +1,717 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include "perf_regs.h"
+#include "event.h"
+
+int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
+				 char **new_op __maybe_unused)
+{
+	return SDT_ARG_SKIP;
+}
+
+uint64_t __weak arch__intr_reg_mask(void)
+{
+	return PERF_REGS_MASK;
+}
+
+uint64_t __weak arch__user_reg_mask(void)
+{
+	return PERF_REGS_MASK;
+}
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+#include "../../arch/arm/include/uapi/asm/perf_regs.h"
+#include "../../arch/csky/include/uapi/asm/perf_regs.h"
+#include "../../arch/mips/include/uapi/asm/perf_regs.h"
+#include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
+#include "../../arch/riscv/include/uapi/asm/perf_regs.h"
+#include "../../arch/s390/include/uapi/asm/perf_regs.h"
+#include "../../arch/x86/include/uapi/asm/perf_regs.h"
+
+static const char *__perf_reg_name_arm64(int id)
+{
+	switch (id) {
+	case PERF_REG_ARM64_X0:
+		return "x0";
+	case PERF_REG_ARM64_X1:
+		return "x1";
+	case PERF_REG_ARM64_X2:
+		return "x2";
+	case PERF_REG_ARM64_X3:
+		return "x3";
+	case PERF_REG_ARM64_X4:
+		return "x4";
+	case PERF_REG_ARM64_X5:
+		return "x5";
+	case PERF_REG_ARM64_X6:
+		return "x6";
+	case PERF_REG_ARM64_X7:
+		return "x7";
+	case PERF_REG_ARM64_X8:
+		return "x8";
+	case PERF_REG_ARM64_X9:
+		return "x9";
+	case PERF_REG_ARM64_X10:
+		return "x10";
+	case PERF_REG_ARM64_X11:
+		return "x11";
+	case PERF_REG_ARM64_X12:
+		return "x12";
+	case PERF_REG_ARM64_X13:
+		return "x13";
+	case PERF_REG_ARM64_X14:
+		return "x14";
+	case PERF_REG_ARM64_X15:
+		return "x15";
+	case PERF_REG_ARM64_X16:
+		return "x16";
+	case PERF_REG_ARM64_X17:
+		return "x17";
+	case PERF_REG_ARM64_X18:
+		return "x18";
+	case PERF_REG_ARM64_X19:
+		return "x19";
+	case PERF_REG_ARM64_X20:
+		return "x20";
+	case PERF_REG_ARM64_X21:
+		return "x21";
+	case PERF_REG_ARM64_X22:
+		return "x22";
+	case PERF_REG_ARM64_X23:
+		return "x23";
+	case PERF_REG_ARM64_X24:
+		return "x24";
+	case PERF_REG_ARM64_X25:
+		return "x25";
+	case PERF_REG_ARM64_X26:
+		return "x26";
+	case PERF_REG_ARM64_X27:
+		return "x27";
+	case PERF_REG_ARM64_X28:
+		return "x28";
+	case PERF_REG_ARM64_X29:
+		return "x29";
+	case PERF_REG_ARM64_SP:
+		return "sp";
+	case PERF_REG_ARM64_LR:
+		return "lr";
+	case PERF_REG_ARM64_PC:
+		return "pc";
+	case PERF_REG_ARM64_VG:
+		return "vg";
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+static const char *__perf_reg_name_arm(int id)
+{
+	switch (id) {
+	case PERF_REG_ARM_R0:
+		return "r0";
+	case PERF_REG_ARM_R1:
+		return "r1";
+	case PERF_REG_ARM_R2:
+		return "r2";
+	case PERF_REG_ARM_R3:
+		return "r3";
+	case PERF_REG_ARM_R4:
+		return "r4";
+	case PERF_REG_ARM_R5:
+		return "r5";
+	case PERF_REG_ARM_R6:
+		return "r6";
+	case PERF_REG_ARM_R7:
+		return "r7";
+	case PERF_REG_ARM_R8:
+		return "r8";
+	case PERF_REG_ARM_R9:
+		return "r9";
+	case PERF_REG_ARM_R10:
+		return "r10";
+	case PERF_REG_ARM_FP:
+		return "fp";
+	case PERF_REG_ARM_IP:
+		return "ip";
+	case PERF_REG_ARM_SP:
+		return "sp";
+	case PERF_REG_ARM_LR:
+		return "lr";
+	case PERF_REG_ARM_PC:
+		return "pc";
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+static const char *__perf_reg_name_csky(int id)
+{
+	switch (id) {
+	case PERF_REG_CSKY_A0:
+		return "a0";
+	case PERF_REG_CSKY_A1:
+		return "a1";
+	case PERF_REG_CSKY_A2:
+		return "a2";
+	case PERF_REG_CSKY_A3:
+		return "a3";
+	case PERF_REG_CSKY_REGS0:
+		return "regs0";
+	case PERF_REG_CSKY_REGS1:
+		return "regs1";
+	case PERF_REG_CSKY_REGS2:
+		return "regs2";
+	case PERF_REG_CSKY_REGS3:
+		return "regs3";
+	case PERF_REG_CSKY_REGS4:
+		return "regs4";
+	case PERF_REG_CSKY_REGS5:
+		return "regs5";
+	case PERF_REG_CSKY_REGS6:
+		return "regs6";
+	case PERF_REG_CSKY_REGS7:
+		return "regs7";
+	case PERF_REG_CSKY_REGS8:
+		return "regs8";
+	case PERF_REG_CSKY_REGS9:
+		return "regs9";
+	case PERF_REG_CSKY_SP:
+		return "sp";
+	case PERF_REG_CSKY_LR:
+		return "lr";
+	case PERF_REG_CSKY_PC:
+		return "pc";
+#if defined(__CSKYABIV2__)
+	case PERF_REG_CSKY_EXREGS0:
+		return "exregs0";
+	case PERF_REG_CSKY_EXREGS1:
+		return "exregs1";
+	case PERF_REG_CSKY_EXREGS2:
+		return "exregs2";
+	case PERF_REG_CSKY_EXREGS3:
+		return "exregs3";
+	case PERF_REG_CSKY_EXREGS4:
+		return "exregs4";
+	case PERF_REG_CSKY_EXREGS5:
+		return "exregs5";
+	case PERF_REG_CSKY_EXREGS6:
+		return "exregs6";
+	case PERF_REG_CSKY_EXREGS7:
+		return "exregs7";
+	case PERF_REG_CSKY_EXREGS8:
+		return "exregs8";
+	case PERF_REG_CSKY_EXREGS9:
+		return "exregs9";
+	case PERF_REG_CSKY_EXREGS10:
+		return "exregs10";
+	case PERF_REG_CSKY_EXREGS11:
+		return "exregs11";
+	case PERF_REG_CSKY_EXREGS12:
+		return "exregs12";
+	case PERF_REG_CSKY_EXREGS13:
+		return "exregs13";
+	case PERF_REG_CSKY_EXREGS14:
+		return "exregs14";
+	case PERF_REG_CSKY_TLS:
+		return "tls";
+	case PERF_REG_CSKY_HI:
+		return "hi";
+	case PERF_REG_CSKY_LO:
+		return "lo";
+#endif
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+static const char *__perf_reg_name_mips(int id)
+{
+	switch (id) {
+	case PERF_REG_MIPS_PC:
+		return "PC";
+	case PERF_REG_MIPS_R1:
+		return "$1";
+	case PERF_REG_MIPS_R2:
+		return "$2";
+	case PERF_REG_MIPS_R3:
+		return "$3";
+	case PERF_REG_MIPS_R4:
+		return "$4";
+	case PERF_REG_MIPS_R5:
+		return "$5";
+	case PERF_REG_MIPS_R6:
+		return "$6";
+	case PERF_REG_MIPS_R7:
+		return "$7";
+	case PERF_REG_MIPS_R8:
+		return "$8";
+	case PERF_REG_MIPS_R9:
+		return "$9";
+	case PERF_REG_MIPS_R10:
+		return "$10";
+	case PERF_REG_MIPS_R11:
+		return "$11";
+	case PERF_REG_MIPS_R12:
+		return "$12";
+	case PERF_REG_MIPS_R13:
+		return "$13";
+	case PERF_REG_MIPS_R14:
+		return "$14";
+	case PERF_REG_MIPS_R15:
+		return "$15";
+	case PERF_REG_MIPS_R16:
+		return "$16";
+	case PERF_REG_MIPS_R17:
+		return "$17";
+	case PERF_REG_MIPS_R18:
+		return "$18";
+	case PERF_REG_MIPS_R19:
+		return "$19";
+	case PERF_REG_MIPS_R20:
+		return "$20";
+	case PERF_REG_MIPS_R21:
+		return "$21";
+	case PERF_REG_MIPS_R22:
+		return "$22";
+	case PERF_REG_MIPS_R23:
+		return "$23";
+	case PERF_REG_MIPS_R24:
+		return "$24";
+	case PERF_REG_MIPS_R25:
+		return "$25";
+	case PERF_REG_MIPS_R28:
+		return "$28";
+	case PERF_REG_MIPS_R29:
+		return "$29";
+	case PERF_REG_MIPS_R30:
+		return "$30";
+	case PERF_REG_MIPS_R31:
+		return "$31";
+	default:
+		break;
+	}
+	return NULL;
+}
+
+static const char *__perf_reg_name_powerpc(int id)
+{
+	switch (id) {
+	case PERF_REG_POWERPC_R0:
+		return "r0";
+	case PERF_REG_POWERPC_R1:
+		return "r1";
+	case PERF_REG_POWERPC_R2:
+		return "r2";
+	case PERF_REG_POWERPC_R3:
+		return "r3";
+	case PERF_REG_POWERPC_R4:
+		return "r4";
+	case PERF_REG_POWERPC_R5:
+		return "r5";
+	case PERF_REG_POWERPC_R6:
+		return "r6";
+	case PERF_REG_POWERPC_R7:
+		return "r7";
+	case PERF_REG_POWERPC_R8:
+		return "r8";
+	case PERF_REG_POWERPC_R9:
+		return "r9";
+	case PERF_REG_POWERPC_R10:
+		return "r10";
+	case PERF_REG_POWERPC_R11:
+		return "r11";
+	case PERF_REG_POWERPC_R12:
+		return "r12";
+	case PERF_REG_POWERPC_R13:
+		return "r13";
+	case PERF_REG_POWERPC_R14:
+		return "r14";
+	case PERF_REG_POWERPC_R15:
+		return "r15";
+	case PERF_REG_POWERPC_R16:
+		return "r16";
+	case PERF_REG_POWERPC_R17:
+		return "r17";
+	case PERF_REG_POWERPC_R18:
+		return "r18";
+	case PERF_REG_POWERPC_R19:
+		return "r19";
+	case PERF_REG_POWERPC_R20:
+		return "r20";
+	case PERF_REG_POWERPC_R21:
+		return "r21";
+	case PERF_REG_POWERPC_R22:
+		return "r22";
+	case PERF_REG_POWERPC_R23:
+		return "r23";
+	case PERF_REG_POWERPC_R24:
+		return "r24";
+	case PERF_REG_POWERPC_R25:
+		return "r25";
+	case PERF_REG_POWERPC_R26:
+		return "r26";
+	case PERF_REG_POWERPC_R27:
+		return "r27";
+	case PERF_REG_POWERPC_R28:
+		return "r28";
+	case PERF_REG_POWERPC_R29:
+		return "r29";
+	case PERF_REG_POWERPC_R30:
+		return "r30";
+	case PERF_REG_POWERPC_R31:
+		return "r31";
+	case PERF_REG_POWERPC_NIP:
+		return "nip";
+	case PERF_REG_POWERPC_MSR:
+		return "msr";
+	case PERF_REG_POWERPC_ORIG_R3:
+		return "orig_r3";
+	case PERF_REG_POWERPC_CTR:
+		return "ctr";
+	case PERF_REG_POWERPC_LINK:
+		return "link";
+	case PERF_REG_POWERPC_XER:
+		return "xer";
+	case PERF_REG_POWERPC_CCR:
+		return "ccr";
+	case PERF_REG_POWERPC_SOFTE:
+		return "softe";
+	case PERF_REG_POWERPC_TRAP:
+		return "trap";
+	case PERF_REG_POWERPC_DAR:
+		return "dar";
+	case PERF_REG_POWERPC_DSISR:
+		return "dsisr";
+	case PERF_REG_POWERPC_SIER:
+		return "sier";
+	case PERF_REG_POWERPC_MMCRA:
+		return "mmcra";
+	case PERF_REG_POWERPC_MMCR0:
+		return "mmcr0";
+	case PERF_REG_POWERPC_MMCR1:
+		return "mmcr1";
+	case PERF_REG_POWERPC_MMCR2:
+		return "mmcr2";
+	case PERF_REG_POWERPC_MMCR3:
+		return "mmcr3";
+	case PERF_REG_POWERPC_SIER2:
+		return "sier2";
+	case PERF_REG_POWERPC_SIER3:
+		return "sier3";
+	case PERF_REG_POWERPC_PMC1:
+		return "pmc1";
+	case PERF_REG_POWERPC_PMC2:
+		return "pmc2";
+	case PERF_REG_POWERPC_PMC3:
+		return "pmc3";
+	case PERF_REG_POWERPC_PMC4:
+		return "pmc4";
+	case PERF_REG_POWERPC_PMC5:
+		return "pmc5";
+	case PERF_REG_POWERPC_PMC6:
+		return "pmc6";
+	case PERF_REG_POWERPC_SDAR:
+		return "sdar";
+	case PERF_REG_POWERPC_SIAR:
+		return "siar";
+	default:
+		break;
+	}
+	return NULL;
+}
+
+static const char *__perf_reg_name_riscv(int id)
+{
+	switch (id) {
+	case PERF_REG_RISCV_PC:
+		return "pc";
+	case PERF_REG_RISCV_RA:
+		return "ra";
+	case PERF_REG_RISCV_SP:
+		return "sp";
+	case PERF_REG_RISCV_GP:
+		return "gp";
+	case PERF_REG_RISCV_TP:
+		return "tp";
+	case PERF_REG_RISCV_T0:
+		return "t0";
+	case PERF_REG_RISCV_T1:
+		return "t1";
+	case PERF_REG_RISCV_T2:
+		return "t2";
+	case PERF_REG_RISCV_S0:
+		return "s0";
+	case PERF_REG_RISCV_S1:
+		return "s1";
+	case PERF_REG_RISCV_A0:
+		return "a0";
+	case PERF_REG_RISCV_A1:
+		return "a1";
+	case PERF_REG_RISCV_A2:
+		return "a2";
+	case PERF_REG_RISCV_A3:
+		return "a3";
+	case PERF_REG_RISCV_A4:
+		return "a4";
+	case PERF_REG_RISCV_A5:
+		return "a5";
+	case PERF_REG_RISCV_A6:
+		return "a6";
+	case PERF_REG_RISCV_A7:
+		return "a7";
+	case PERF_REG_RISCV_S2:
+		return "s2";
+	case PERF_REG_RISCV_S3:
+		return "s3";
+	case PERF_REG_RISCV_S4:
+		return "s4";
+	case PERF_REG_RISCV_S5:
+		return "s5";
+	case PERF_REG_RISCV_S6:
+		return "s6";
+	case PERF_REG_RISCV_S7:
+		return "s7";
+	case PERF_REG_RISCV_S8:
+		return "s8";
+	case PERF_REG_RISCV_S9:
+		return "s9";
+	case PERF_REG_RISCV_S10:
+		return "s10";
+	case PERF_REG_RISCV_S11:
+		return "s11";
+	case PERF_REG_RISCV_T3:
+		return "t3";
+	case PERF_REG_RISCV_T4:
+		return "t4";
+	case PERF_REG_RISCV_T5:
+		return "t5";
+	case PERF_REG_RISCV_T6:
+		return "t6";
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+static const char *__perf_reg_name_s390(int id)
+{
+	switch (id) {
+	case PERF_REG_S390_R0:
+		return "R0";
+	case PERF_REG_S390_R1:
+		return "R1";
+	case PERF_REG_S390_R2:
+		return "R2";
+	case PERF_REG_S390_R3:
+		return "R3";
+	case PERF_REG_S390_R4:
+		return "R4";
+	case PERF_REG_S390_R5:
+		return "R5";
+	case PERF_REG_S390_R6:
+		return "R6";
+	case PERF_REG_S390_R7:
+		return "R7";
+	case PERF_REG_S390_R8:
+		return "R8";
+	case PERF_REG_S390_R9:
+		return "R9";
+	case PERF_REG_S390_R10:
+		return "R10";
+	case PERF_REG_S390_R11:
+		return "R11";
+	case PERF_REG_S390_R12:
+		return "R12";
+	case PERF_REG_S390_R13:
+		return "R13";
+	case PERF_REG_S390_R14:
+		return "R14";
+	case PERF_REG_S390_R15:
+		return "R15";
+	case PERF_REG_S390_FP0:
+		return "FP0";
+	case PERF_REG_S390_FP1:
+		return "FP1";
+	case PERF_REG_S390_FP2:
+		return "FP2";
+	case PERF_REG_S390_FP3:
+		return "FP3";
+	case PERF_REG_S390_FP4:
+		return "FP4";
+	case PERF_REG_S390_FP5:
+		return "FP5";
+	case PERF_REG_S390_FP6:
+		return "FP6";
+	case PERF_REG_S390_FP7:
+		return "FP7";
+	case PERF_REG_S390_FP8:
+		return "FP8";
+	case PERF_REG_S390_FP9:
+		return "FP9";
+	case PERF_REG_S390_FP10:
+		return "FP10";
+	case PERF_REG_S390_FP11:
+		return "FP11";
+	case PERF_REG_S390_FP12:
+		return "FP12";
+	case PERF_REG_S390_FP13:
+		return "FP13";
+	case PERF_REG_S390_FP14:
+		return "FP14";
+	case PERF_REG_S390_FP15:
+		return "FP15";
+	case PERF_REG_S390_MASK:
+		return "MASK";
+	case PERF_REG_S390_PC:
+		return "PC";
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+static const char *__perf_reg_name_x86(int id)
+{
+	switch (id) {
+	case PERF_REG_X86_AX:
+		return "AX";
+	case PERF_REG_X86_BX:
+		return "BX";
+	case PERF_REG_X86_CX:
+		return "CX";
+	case PERF_REG_X86_DX:
+		return "DX";
+	case PERF_REG_X86_SI:
+		return "SI";
+	case PERF_REG_X86_DI:
+		return "DI";
+	case PERF_REG_X86_BP:
+		return "BP";
+	case PERF_REG_X86_SP:
+		return "SP";
+	case PERF_REG_X86_IP:
+		return "IP";
+	case PERF_REG_X86_FLAGS:
+		return "FLAGS";
+	case PERF_REG_X86_CS:
+		return "CS";
+	case PERF_REG_X86_SS:
+		return "SS";
+	case PERF_REG_X86_DS:
+		return "DS";
+	case PERF_REG_X86_ES:
+		return "ES";
+	case PERF_REG_X86_FS:
+		return "FS";
+	case PERF_REG_X86_GS:
+		return "GS";
+	case PERF_REG_X86_R8:
+		return "R8";
+	case PERF_REG_X86_R9:
+		return "R9";
+	case PERF_REG_X86_R10:
+		return "R10";
+	case PERF_REG_X86_R11:
+		return "R11";
+	case PERF_REG_X86_R12:
+		return "R12";
+	case PERF_REG_X86_R13:
+		return "R13";
+	case PERF_REG_X86_R14:
+		return "R14";
+	case PERF_REG_X86_R15:
+		return "R15";
+
+#define XMM(x) \
+	case PERF_REG_X86_XMM ## x:	\
+	case PERF_REG_X86_XMM ## x + 1:	\
+		return "XMM" #x;
+	XMM(0)
+	XMM(1)
+	XMM(2)
+	XMM(3)
+	XMM(4)
+	XMM(5)
+	XMM(6)
+	XMM(7)
+	XMM(8)
+	XMM(9)
+	XMM(10)
+	XMM(11)
+	XMM(12)
+	XMM(13)
+	XMM(14)
+	XMM(15)
+#undef XMM
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+const char *perf_reg_name(int id, const char *arch)
+{
+	const char *reg_name = NULL;
+
+	if (!strcmp(arch, "csky"))
+		reg_name = __perf_reg_name_csky(id);
+	else if (!strcmp(arch, "mips"))
+		reg_name = __perf_reg_name_mips(id);
+	else if (!strcmp(arch, "powerpc"))
+		reg_name = __perf_reg_name_powerpc(id);
+	else if (!strcmp(arch, "riscv"))
+		reg_name = __perf_reg_name_riscv(id);
+	else if (!strcmp(arch, "s390"))
+		reg_name = __perf_reg_name_s390(id);
+	else if (!strcmp(arch, "x86"))
+		reg_name = __perf_reg_name_x86(id);
+	else if (!strcmp(arch, "arm"))
+		reg_name = __perf_reg_name_arm(id);
+	else if (!strcmp(arch, "arm64"))
+		reg_name = __perf_reg_name_arm64(id);
+
+	return reg_name ?: "unknown";
+}
+
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
+{
+	int i, idx = 0;
+	u64 mask = regs->mask;
+
+	if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE)
+		return -EINVAL;
+
+	if (regs->cache_mask & (1ULL << id))
+		goto out;
+
+	if (!(mask & (1ULL << id)))
+		return -EINVAL;
+
+	for (i = 0; i < id; i++) {
+		if (mask & (1ULL << i))
+			idx++;
+	}
+
+	regs->cache_mask |= (1ULL << id);
+	regs->cache_regs[id] = regs->regs[idx];
+
+out:
+	*valp = regs->cache_regs[id];
+	return 0;
+}
+#endif
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
new file mode 100644
index 000000000..ce1127af0
--- /dev/null
+++ b/tools/perf/util/perf_regs.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_REGS_H
+#define __PERF_REGS_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct regs_dump;
+
+struct sample_reg {
+	const char *name;
+	uint64_t mask;
+};
+
+#define SMPL_REG_MASK(b) (1ULL << (b))
+#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) }
+#define SMPL_REG2_MASK(b) (3ULL << (b))
+#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) }
+#define SMPL_REG_END { .name = NULL }
+
+enum {
+	SDT_ARG_VALID = 0,
+	SDT_ARG_SKIP,
+};
+
+int arch_sdt_arg_parse_op(char *old_op, char **new_op);
+uint64_t arch__intr_reg_mask(void);
+uint64_t arch__user_reg_mask(void);
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+extern const struct sample_reg sample_reg_masks[];
+
+#include <perf_regs.h>
+
+#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+
+const char *perf_reg_name(int id, const char *arch);
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
+
+#else
+#define PERF_REGS_MASK	0
+#define PERF_REGS_MAX	0
+
+#define DWARF_MINIMAL_REGS PERF_REGS_MASK
+
+static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
+{
+	return "unknown";
+}
+
+static inline int perf_reg_value(u64 *valp __maybe_unused,
+				 struct regs_dump *regs __maybe_unused,
+				 int id __maybe_unused)
+{
+	return 0;
+}
+#endif /* HAVE_PERF_REGS_SUPPORT */
+#endif /* __PERF_REGS_H */
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
new file mode 100644
index 000000000..f0bcfcab1
--- /dev/null
+++ b/tools/perf/util/pfm.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "util/cpumap.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/pmu.h"
+#include "util/pfm.h"
+
+#include <string.h>
+#include <linux/kernel.h>
+#include <perfmon/pfmlib_perf_event.h>
+
+static void libpfm_initialize(void)
+{
+	int ret;
+
+	ret = pfm_initialize();
+	if (ret != PFM_SUCCESS) {
+		ui__warning("libpfm failed to initialize: %s\n",
+			pfm_strerror(ret));
+	}
+}
+
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+			int unset __maybe_unused)
+{
+	struct evlist *evlist = *(struct evlist **)opt->value;
+	struct perf_event_attr attr;
+	struct perf_pmu *pmu;
+	struct evsel *evsel, *grp_leader = NULL;
+	char *p, *q, *p_orig;
+	const char *sep;
+	int grp_evt = -1;
+	int ret;
+
+	libpfm_initialize();
+
+	p_orig = p = strdup(str);
+	if (!p)
+		return -1;
+	/*
+	 * force loading of the PMU list
+	 */
+	perf_pmu__scan(NULL);
+
+	for (q = p; strsep(&p, ",{}"); q = p) {
+		sep = p ? str + (p - p_orig - 1) : "";
+		if (*sep == '{') {
+			if (grp_evt > -1) {
+				ui__error(
+					"nested event groups not supported\n");
+				goto error;
+			}
+			grp_evt++;
+		}
+
+		/* no event */
+		if (*q == '\0') {
+			if (*sep == '}') {
+				if (grp_evt < 0) {
+					ui__error("cannot close a non-existing event group\n");
+					goto error;
+				}
+				grp_evt--;
+			}
+			continue;
+		}
+
+		memset(&attr, 0, sizeof(attr));
+		event_attr_init(&attr);
+
+		ret = pfm_get_perf_event_encoding(q, PFM_PLM0|PFM_PLM3,
+						&attr, NULL, NULL);
+
+		if (ret != PFM_SUCCESS) {
+			ui__error("failed to parse event %s : %s\n", str,
+				  pfm_strerror(ret));
+			goto error;
+		}
+
+		pmu = perf_pmu__find_by_type((unsigned int)attr.type);
+		evsel = parse_events__add_event(evlist->core.nr_entries,
+						&attr, q, /*metric_id=*/NULL,
+						pmu);
+		if (evsel == NULL)
+			goto error;
+
+		evsel->is_libpfm_event = true;
+
+		evlist__add(evlist, evsel);
+
+		if (grp_evt == 0)
+			grp_leader = evsel;
+
+		if (grp_evt > -1) {
+			evsel__set_leader(evsel, grp_leader);
+			grp_leader->core.nr_members++;
+			grp_evt++;
+		}
+
+		if (*sep == '}') {
+			if (grp_evt < 0) {
+				ui__error(
+				   "cannot close a non-existing event group\n");
+				goto error;
+			}
+			evlist->core.nr_groups++;
+			grp_leader = NULL;
+			grp_evt = -1;
+		}
+	}
+	free(p_orig);
+	return 0;
+error:
+	free(p_orig);
+	return -1;
+}
+
+static const char *srcs[PFM_ATTR_CTRL_MAX] = {
+	[PFM_ATTR_CTRL_UNKNOWN] = "???",
+	[PFM_ATTR_CTRL_PMU] = "PMU",
+	[PFM_ATTR_CTRL_PERF_EVENT] = "perf_event",
+};
+
+static void
+print_attr_flags(pfm_event_attr_info_t *info)
+{
+	int n = 0;
+
+	if (info->is_dfl) {
+		printf("[default] ");
+		n++;
+	}
+
+	if (info->is_precise) {
+		printf("[precise] ");
+		n++;
+	}
+
+	if (!n)
+		printf("- ");
+}
+
+static void
+print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
+{
+	pfm_event_attr_info_t ainfo;
+	const char *src;
+	int j, ret;
+
+	ainfo.size = sizeof(ainfo);
+
+	printf("  %s\n", info->name);
+	printf("    [%s]\n", info->desc);
+	if (long_desc) {
+		if (info->equiv)
+			printf("      Equiv: %s\n", info->equiv);
+
+		printf("      Code  : 0x%"PRIx64"\n", info->code);
+	}
+	pfm_for_each_event_attr(j, info) {
+		ret = pfm_get_event_attr_info(info->idx, j,
+					      PFM_OS_PERF_EVENT_EXT, &ainfo);
+		if (ret != PFM_SUCCESS)
+			continue;
+
+		if (ainfo.type == PFM_ATTR_UMASK) {
+			printf("      %s:%s\n", info->name, ainfo.name);
+			printf("        [%s]\n", ainfo.desc);
+		}
+
+		if (!long_desc)
+			continue;
+
+		if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+			ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
+
+		src = srcs[ainfo.ctrl];
+		switch (ainfo.type) {
+		case PFM_ATTR_UMASK:
+			printf("        Umask : 0x%02"PRIx64" : %s: ",
+				ainfo.code, src);
+			print_attr_flags(&ainfo);
+			putchar('\n');
+			break;
+		case PFM_ATTR_MOD_BOOL:
+			printf("      Modif : %s: [%s] : %s (boolean)\n", src,
+				ainfo.name, ainfo.desc);
+			break;
+		case PFM_ATTR_MOD_INTEGER:
+			printf("      Modif : %s: [%s] : %s (integer)\n", src,
+				ainfo.name, ainfo.desc);
+			break;
+		case PFM_ATTR_NONE:
+		case PFM_ATTR_RAW_UMASK:
+		case PFM_ATTR_MAX:
+		default:
+			printf("      Attr  : %s: [%s] : %s\n", src,
+				ainfo.name, ainfo.desc);
+		}
+	}
+}
+
+/*
+ * list all pmu::event:umask, pmu::event
+ * printed events may not be all valid combinations of umask for an event
+ */
+static void
+print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info)
+{
+	pfm_event_attr_info_t ainfo;
+	int j, ret;
+	bool has_umask = false;
+
+	ainfo.size = sizeof(ainfo);
+
+	pfm_for_each_event_attr(j, info) {
+		ret = pfm_get_event_attr_info(info->idx, j,
+					      PFM_OS_PERF_EVENT_EXT, &ainfo);
+		if (ret != PFM_SUCCESS)
+			continue;
+
+		if (ainfo.type != PFM_ATTR_UMASK)
+			continue;
+
+		printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name);
+		has_umask = true;
+	}
+	if (!has_umask)
+		printf("%s::%s\n", pinfo->name, info->name);
+}
+
+void print_libpfm_events(bool name_only, bool long_desc)
+{
+	pfm_event_info_t info;
+	pfm_pmu_info_t pinfo;
+	int i, p, ret;
+
+	libpfm_initialize();
+
+	/* initialize to zero to indicate ABI version */
+	info.size  = sizeof(info);
+	pinfo.size = sizeof(pinfo);
+
+	if (!name_only)
+		puts("\nList of pre-defined events (to be used in --pfm-events):\n");
+
+	pfm_for_all_pmus(p) {
+		bool printed_pmu = false;
+
+		ret = pfm_get_pmu_info(p, &pinfo);
+		if (ret != PFM_SUCCESS)
+			continue;
+
+		/* only print events that are supported by host HW */
+		if (!pinfo.is_present)
+			continue;
+
+		/* handled by perf directly */
+		if (pinfo.pmu == PFM_PMU_PERF_EVENT)
+			continue;
+
+		for (i = pinfo.first_event; i != -1;
+		     i = pfm_get_event_next(i)) {
+
+			ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT,
+						&info);
+			if (ret != PFM_SUCCESS)
+				continue;
+
+			if (!name_only && !printed_pmu) {
+				printf("%s:\n", pinfo.name);
+				printed_pmu = true;
+			}
+
+			if (!name_only)
+				print_libpfm_events_detailed(&info, long_desc);
+			else
+				print_libpfm_events_raw(&pinfo, &info);
+		}
+		if (!name_only && printed_pmu)
+			putchar('\n');
+	}
+}
diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h
new file mode 100644
index 000000000..7d70dda87
--- /dev/null
+++ b/tools/perf/util/pfm.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#ifndef __PERF_PFM_H
+#define __PERF_PFM_H
+
+#include <subcmd/parse-options.h>
+
+#ifdef HAVE_LIBPFM
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+			int unset);
+
+void print_libpfm_events(bool name_only, bool long_desc);
+
+#else
+#include <linux/compiler.h>
+
+static inline int parse_libpfm_events_option(
+	const struct option *opt __maybe_unused,
+	const char *str __maybe_unused,
+	int unset __maybe_unused)
+{
+	return 0;
+}
+
+static inline void print_libpfm_events(bool name_only __maybe_unused,
+				       bool long_desc __maybe_unused)
+{
+}
+
+#endif
+
+
+#endif /* __PERF_PFM_H */
diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c
new file mode 100644
index 000000000..f51ccaac6
--- /dev/null
+++ b/tools/perf/util/pmu-hybrid.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <locale.h>
+#include <api/fs/fs.h>
+#include "fncache.h"
+#include "pmu-hybrid.h"
+
+LIST_HEAD(perf_pmu__hybrid_pmus);
+
+bool perf_pmu__hybrid_mounted(const char *name)
+{
+	char path[PATH_MAX];
+	const char *sysfs;
+	FILE *file;
+	int n, cpu;
+
+	if (strncmp(name, "cpu_", 4))
+		return false;
+
+	sysfs = sysfs__mountpoint();
+	if (!sysfs)
+		return false;
+
+	snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, name);
+	if (!file_available(path))
+		return false;
+
+	file = fopen(path, "r");
+	if (!file)
+		return false;
+
+	n = fscanf(file, "%u", &cpu);
+	fclose(file);
+	if (n <= 0)
+		return false;
+
+	return true;
+}
+
+struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name)
+{
+	struct perf_pmu *pmu;
+
+	if (!name)
+		return NULL;
+
+	perf_pmu__for_each_hybrid_pmu(pmu) {
+		if (!strcmp(name, pmu->name))
+			return pmu;
+	}
+
+	return NULL;
+}
+
+bool perf_pmu__is_hybrid(const char *name)
+{
+	return perf_pmu__find_hybrid_pmu(name) != NULL;
+}
+
+char *perf_pmu__hybrid_type_to_pmu(const char *type)
+{
+	char *pmu_name = NULL;
+
+	if (asprintf(&pmu_name, "cpu_%s", type) < 0)
+		return NULL;
+
+	if (perf_pmu__is_hybrid(pmu_name))
+		return pmu_name;
+
+	/*
+	 * pmu may be not scanned, check the sysfs.
+	 */
+	if (perf_pmu__hybrid_mounted(pmu_name))
+		return pmu_name;
+
+	free(pmu_name);
+	return NULL;
+}
diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h
new file mode 100644
index 000000000..2b186c26a
--- /dev/null
+++ b/tools/perf/util/pmu-hybrid.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMU_HYBRID_H
+#define __PMU_HYBRID_H
+
+#include <linux/perf_event.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <stdbool.h>
+#include "pmu.h"
+
+extern struct list_head perf_pmu__hybrid_pmus;
+
+#define perf_pmu__for_each_hybrid_pmu(pmu)	\
+	list_for_each_entry(pmu, &perf_pmu__hybrid_pmus, hybrid_list)
+
+bool perf_pmu__hybrid_mounted(const char *name);
+
+struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name);
+bool perf_pmu__is_hybrid(const char *name);
+char *perf_pmu__hybrid_type_to_pmu(const char *type);
+
+static inline int perf_pmu__hybrid_pmu_num(void)
+{
+	struct perf_pmu *pmu;
+	int num = 0;
+
+	perf_pmu__for_each_hybrid_pmu(pmu)
+		num++;
+
+	return num;
+}
+
+#endif /* __PMU_HYBRID_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
new file mode 100644
index 000000000..9a762c0cc
--- /dev/null
+++ b/tools/perf/util/pmu.c
@@ -0,0 +1,2007 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/ctype.h>
+#include <subcmd/pager.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <dirent.h>
+#include <api/fs/fs.h>
+#include <locale.h>
+#include <regex.h>
+#include <perf/cpumap.h>
+#include <fnmatch.h>
+#include "debug.h"
+#include "evsel.h"
+#include "pmu.h"
+#include "parse-events.h"
+#include "header.h"
+#include "string2.h"
+#include "strbuf.h"
+#include "fncache.h"
+#include "pmu-hybrid.h"
+
+struct perf_pmu perf_pmu__fake;
+
+struct perf_pmu_format {
+	char *name;
+	int value;
+	DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+	struct list_head list;
+};
+
+int perf_pmu_parse(struct list_head *list, char *name);
+extern FILE *perf_pmu_in;
+
+static LIST_HEAD(pmus);
+static bool hybrid_scanned;
+
+/*
+ * Parse & process all the sysfs attributes located under
+ * the directory specified in 'dir' parameter.
+ */
+int perf_pmu__format_parse(char *dir, struct list_head *head)
+{
+	struct dirent *evt_ent;
+	DIR *format_dir;
+	int ret = 0;
+
+	format_dir = opendir(dir);
+	if (!format_dir)
+		return -EINVAL;
+
+	while (!ret && (evt_ent = readdir(format_dir))) {
+		char path[PATH_MAX];
+		char *name = evt_ent->d_name;
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		snprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+		ret = -EINVAL;
+		file = fopen(path, "r");
+		if (!file)
+			break;
+
+		perf_pmu_in = file;
+		ret = perf_pmu_parse(head, name);
+		fclose(file);
+	}
+
+	closedir(format_dir);
+	return ret;
+}
+
+/*
+ * Reading/parsing the default pmu format definition, which should be
+ * located at:
+ * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes.
+ */
+static int pmu_format(const char *name, struct list_head *format)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name);
+
+	if (!file_available(path))
+		return 0;
+
+	if (perf_pmu__format_parse(path, format))
+		return -1;
+
+	return 0;
+}
+
+int perf_pmu__convert_scale(const char *scale, char **end, double *sval)
+{
+	char *lc;
+	int ret = 0;
+
+	/*
+	 * save current locale
+	 */
+	lc = setlocale(LC_NUMERIC, NULL);
+
+	/*
+	 * The lc string may be allocated in static storage,
+	 * so get a dynamic copy to make it survive setlocale
+	 * call below.
+	 */
+	lc = strdup(lc);
+	if (!lc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * force to C locale to ensure kernel
+	 * scale string is converted correctly.
+	 * kernel uses default C locale.
+	 */
+	setlocale(LC_NUMERIC, "C");
+
+	*sval = strtod(scale, end);
+
+out:
+	/* restore locale */
+	setlocale(LC_NUMERIC, lc);
+	free(lc);
+	return ret;
+}
+
+static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+	struct stat st;
+	ssize_t sret;
+	char scale[128];
+	int fd, ret = -1;
+	char path[PATH_MAX];
+
+	scnprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	if (fstat(fd, &st) < 0)
+		goto error;
+
+	sret = read(fd, scale, sizeof(scale)-1);
+	if (sret < 0)
+		goto error;
+
+	if (scale[sret - 1] == '\n')
+		scale[sret - 1] = '\0';
+	else
+		scale[sret] = '\0';
+
+	ret = perf_pmu__convert_scale(scale, NULL, &alias->scale);
+error:
+	close(fd);
+	return ret;
+}
+
+static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+	char path[PATH_MAX];
+	ssize_t sret;
+	int fd;
+
+	scnprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	sret = read(fd, alias->unit, UNIT_MAX_LEN);
+	if (sret < 0)
+		goto error;
+
+	close(fd);
+
+	if (alias->unit[sret - 1] == '\n')
+		alias->unit[sret - 1] = '\0';
+	else
+		alias->unit[sret] = '\0';
+
+	return 0;
+error:
+	close(fd);
+	alias->unit[0] = '\0';
+	return -1;
+}
+
+static int
+perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+	char path[PATH_MAX];
+	int fd;
+
+	scnprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	close(fd);
+
+	alias->per_pkg = true;
+	return 0;
+}
+
+static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
+				    char *dir, char *name)
+{
+	char path[PATH_MAX];
+	int fd;
+
+	scnprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	alias->snapshot = true;
+	close(fd);
+	return 0;
+}
+
+static void perf_pmu_assign_str(char *name, const char *field, char **old_str,
+				char **new_str)
+{
+	if (!*old_str)
+		goto set_new;
+
+	if (*new_str) {	/* Have new string, check with old */
+		if (strcasecmp(*old_str, *new_str))
+			pr_debug("alias %s differs in field '%s'\n",
+				 name, field);
+		zfree(old_str);
+	} else		/* Nothing new --> keep old string */
+		return;
+set_new:
+	*old_str = *new_str;
+	*new_str = NULL;
+}
+
+static void perf_pmu_update_alias(struct perf_pmu_alias *old,
+				  struct perf_pmu_alias *newalias)
+{
+	perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc);
+	perf_pmu_assign_str(old->name, "long_desc", &old->long_desc,
+			    &newalias->long_desc);
+	perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic);
+	perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr,
+			    &newalias->metric_expr);
+	perf_pmu_assign_str(old->name, "metric_name", &old->metric_name,
+			    &newalias->metric_name);
+	perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str);
+	old->scale = newalias->scale;
+	old->per_pkg = newalias->per_pkg;
+	old->snapshot = newalias->snapshot;
+	memcpy(old->unit, newalias->unit, sizeof(old->unit));
+}
+
+/* Delete an alias entry. */
+void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+{
+	zfree(&newalias->name);
+	zfree(&newalias->desc);
+	zfree(&newalias->long_desc);
+	zfree(&newalias->topic);
+	zfree(&newalias->str);
+	zfree(&newalias->metric_expr);
+	zfree(&newalias->metric_name);
+	zfree(&newalias->pmu_name);
+	parse_events_terms__purge(&newalias->terms);
+	free(newalias);
+}
+
+/* Merge an alias, search in alias list. If this name is already
+ * present merge both of them to combine all information.
+ */
+static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias,
+				 struct list_head *alist)
+{
+	struct perf_pmu_alias *a;
+
+	list_for_each_entry(a, alist, list) {
+		if (!strcasecmp(newalias->name, a->name)) {
+			if (newalias->pmu_name && a->pmu_name &&
+			    !strcasecmp(newalias->pmu_name, a->pmu_name)) {
+				continue;
+			}
+			perf_pmu_update_alias(a, newalias);
+			perf_pmu_free_alias(newalias);
+			return true;
+		}
+	}
+	return false;
+}
+
+static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
+				 char *desc, char *val, const struct pmu_event *pe)
+{
+	struct parse_events_term *term;
+	struct perf_pmu_alias *alias;
+	int ret;
+	int num;
+	char newval[256];
+	char *long_desc = NULL, *topic = NULL, *unit = NULL, *perpkg = NULL,
+	     *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL,
+	     *pmu_name = NULL;
+
+	if (pe) {
+		long_desc = (char *)pe->long_desc;
+		topic = (char *)pe->topic;
+		unit = (char *)pe->unit;
+		perpkg = (char *)pe->perpkg;
+		metric_expr = (char *)pe->metric_expr;
+		metric_name = (char *)pe->metric_name;
+		deprecated = (char *)pe->deprecated;
+		pmu_name = (char *)pe->pmu;
+	}
+
+	alias = malloc(sizeof(*alias));
+	if (!alias)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&alias->terms);
+	alias->scale = 1.0;
+	alias->unit[0] = '\0';
+	alias->per_pkg = false;
+	alias->snapshot = false;
+	alias->deprecated = false;
+
+	ret = parse_events_terms(&alias->terms, val);
+	if (ret) {
+		pr_err("Cannot parse alias %s: %d\n", val, ret);
+		free(alias);
+		return ret;
+	}
+
+	/* Scan event and remove leading zeroes, spaces, newlines, some
+	 * platforms have terms specified as
+	 * event=0x0091 (read from files ../<PMU>/events/<FILE>
+	 * and terms specified as event=0x91 (read from JSON files).
+	 *
+	 * Rebuild string to make alias->str member comparable.
+	 */
+	memset(newval, 0, sizeof(newval));
+	ret = 0;
+	list_for_each_entry(term, &alias->terms, list) {
+		if (ret)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 ",");
+		if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 "%s=%#x", term->config, term->val.num);
+		else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 "%s=%s", term->config, term->val.str);
+	}
+
+	alias->name = strdup(name);
+	if (dir) {
+		/*
+		 * load unit name and scale if available
+		 */
+		perf_pmu__parse_unit(alias, dir, name);
+		perf_pmu__parse_scale(alias, dir, name);
+		perf_pmu__parse_per_pkg(alias, dir, name);
+		perf_pmu__parse_snapshot(alias, dir, name);
+	}
+
+	alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL;
+	alias->metric_name = metric_name ? strdup(metric_name): NULL;
+	alias->desc = desc ? strdup(desc) : NULL;
+	alias->long_desc = long_desc ? strdup(long_desc) :
+				desc ? strdup(desc) : NULL;
+	alias->topic = topic ? strdup(topic) : NULL;
+	if (unit) {
+		if (perf_pmu__convert_scale(unit, &unit, &alias->scale) < 0)
+			return -1;
+		snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
+	}
+	alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1;
+	alias->str = strdup(newval);
+	alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL;
+
+	if (deprecated)
+		alias->deprecated = true;
+
+	if (!perf_pmu_merge_alias(alias, list))
+		list_add_tail(&alias->list, list);
+
+	return 0;
+}
+
+static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file)
+{
+	char buf[256];
+	int ret;
+
+	ret = fread(buf, 1, sizeof(buf), file);
+	if (ret == 0)
+		return -EINVAL;
+
+	buf[ret] = 0;
+
+	/* Remove trailing newline from sysfs file */
+	strim(buf);
+
+	return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL);
+}
+
+static inline bool pmu_alias_info_file(char *name)
+{
+	size_t len;
+
+	len = strlen(name);
+	if (len > 5 && !strcmp(name + len - 5, ".unit"))
+		return true;
+	if (len > 6 && !strcmp(name + len - 6, ".scale"))
+		return true;
+	if (len > 8 && !strcmp(name + len - 8, ".per-pkg"))
+		return true;
+	if (len > 9 && !strcmp(name + len - 9, ".snapshot"))
+		return true;
+
+	return false;
+}
+
+/*
+ * Process all the sysfs attributes located under the directory
+ * specified in 'dir' parameter.
+ */
+static int pmu_aliases_parse(char *dir, struct list_head *head)
+{
+	struct dirent *evt_ent;
+	DIR *event_dir;
+
+	event_dir = opendir(dir);
+	if (!event_dir)
+		return -EINVAL;
+
+	while ((evt_ent = readdir(event_dir))) {
+		char path[PATH_MAX];
+		char *name = evt_ent->d_name;
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		/*
+		 * skip info files parsed in perf_pmu__new_alias()
+		 */
+		if (pmu_alias_info_file(name))
+			continue;
+
+		scnprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+		file = fopen(path, "r");
+		if (!file) {
+			pr_debug("Cannot open %s\n", path);
+			continue;
+		}
+
+		if (perf_pmu__new_alias(head, dir, name, file) < 0)
+			pr_debug("Cannot set up %s\n", name);
+		fclose(file);
+	}
+
+	closedir(event_dir);
+	return 0;
+}
+
+/*
+ * Reading the pmu event aliases definition, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
+ */
+static int pmu_aliases(const char *name, struct list_head *head)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s/bus/event_source/devices/%s/events", sysfs, name);
+
+	if (!file_available(path))
+		return 0;
+
+	if (pmu_aliases_parse(path, head))
+		return -1;
+
+	return 0;
+}
+
+static int pmu_alias_terms(struct perf_pmu_alias *alias,
+			   struct list_head *terms)
+{
+	struct parse_events_term *term, *cloned;
+	LIST_HEAD(list);
+	int ret;
+
+	list_for_each_entry(term, &alias->terms, list) {
+		ret = parse_events_term__clone(&cloned, term);
+		if (ret) {
+			parse_events_terms__purge(&list);
+			return ret;
+		}
+		/*
+		 * Weak terms don't override command line options,
+		 * which we don't want for implicit terms in aliases.
+		 */
+		cloned->weak = true;
+		list_add_tail(&cloned->list, &list);
+	}
+	list_splice(&list, terms);
+	return 0;
+}
+
+/*
+ * Reading/parsing the default pmu type value, which should be
+ * located at:
+ * /sys/bus/event_source/devices/<dev>/type as sysfs attribute.
+ */
+static int pmu_type(const char *name, __u32 *type)
+{
+	char path[PATH_MAX];
+	FILE *file;
+	int ret = 0;
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name);
+
+	if (access(path, R_OK) < 0)
+		return -1;
+
+	file = fopen(path, "r");
+	if (!file)
+		return -EINVAL;
+
+	if (1 != fscanf(file, "%u", type))
+		ret = -1;
+
+	fclose(file);
+	return ret;
+}
+
+/* Add all pmus in sysfs to pmu list: */
+static void pmu_read_sysfs(void)
+{
+	char path[PATH_MAX];
+	DIR *dir;
+	struct dirent *dent;
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
+
+	dir = opendir(path);
+	if (!dir)
+		return;
+
+	while ((dent = readdir(dir))) {
+		if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+			continue;
+		/* add to static LIST_HEAD(pmus): */
+		perf_pmu__find(dent->d_name);
+	}
+
+	closedir(dir);
+}
+
+static struct perf_cpu_map *__pmu_cpumask(const char *path)
+{
+	FILE *file;
+	struct perf_cpu_map *cpus;
+
+	file = fopen(path, "r");
+	if (!file)
+		return NULL;
+
+	cpus = perf_cpu_map__read(file);
+	fclose(file);
+	return cpus;
+}
+
+/*
+ * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
+ * may have a "cpus" file.
+ */
+#define SYS_TEMPLATE_ID	"./bus/event_source/devices/%s/identifier"
+#define CPUS_TEMPLATE_UNCORE	"%s/bus/event_source/devices/%s/cpumask"
+
+static struct perf_cpu_map *pmu_cpumask(const char *name)
+{
+	char path[PATH_MAX];
+	struct perf_cpu_map *cpus;
+	const char *sysfs = sysfs__mountpoint();
+	const char *templates[] = {
+		CPUS_TEMPLATE_UNCORE,
+		CPUS_TEMPLATE_CPU,
+		NULL
+	};
+	const char **template;
+
+	if (!sysfs)
+		return NULL;
+
+	for (template = templates; *template; template++) {
+		snprintf(path, PATH_MAX, *template, sysfs, name);
+		cpus = __pmu_cpumask(path);
+		if (cpus)
+			return cpus;
+	}
+
+	return NULL;
+}
+
+static bool pmu_is_uncore(const char *name)
+{
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	if (perf_pmu__hybrid_mounted(name))
+		return false;
+
+	sysfs = sysfs__mountpoint();
+	snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
+	return file_available(path);
+}
+
+static char *pmu_id(const char *name)
+{
+	char path[PATH_MAX], *str;
+	size_t len;
+
+	snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name);
+
+	if (sysfs__read_str(path, &str, &len) < 0)
+		return NULL;
+
+	str[len - 1] = 0; /* remove line feed */
+
+	return str;
+}
+
+/*
+ *  PMU CORE devices have different name other than cpu in sysfs on some
+ *  platforms.
+ *  Looking for possible sysfs files to identify the arm core device.
+ */
+static int is_arm_pmu_core(const char *name)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return 0;
+
+	/* Look for cpu sysfs (specific to arm) */
+	scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
+				sysfs, name);
+	return file_available(path);
+}
+
+char *perf_pmu__getcpuid(struct perf_pmu *pmu)
+{
+	char *cpuid;
+	static bool printed;
+
+	cpuid = getenv("PERF_CPUID");
+	if (cpuid)
+		cpuid = strdup(cpuid);
+	if (!cpuid)
+		cpuid = get_cpuid_str(pmu);
+	if (!cpuid)
+		return NULL;
+
+	if (!printed) {
+		pr_debug("Using CPUID %s\n", cpuid);
+		printed = true;
+	}
+	return cpuid;
+}
+
+__weak const struct pmu_events_table *pmu_events_table__find(void)
+{
+	return perf_pmu__find_table(NULL);
+}
+
+/*
+ * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name
+ * to be valid.
+ */
+static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok)
+{
+	const char *p;
+
+	if (strncmp(pmu_name, tok, strlen(tok)))
+		return false;
+
+	p = pmu_name + strlen(tok);
+	if (*p == 0)
+		return true;
+
+	if (*p == '_')
+		++p;
+
+	/* Ensure we end in a number */
+	while (1) {
+		if (!isdigit(*p))
+			return false;
+		if (*(++p) == 0)
+			break;
+	}
+
+	return true;
+}
+
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+{
+	char *tmp = NULL, *tok, *str;
+	bool res;
+
+	str = strdup(pmu_name);
+	if (!str)
+		return false;
+
+	/*
+	 * uncore alias may be from different PMU with common prefix
+	 */
+	tok = strtok_r(str, ",", &tmp);
+	if (strncmp(pmu_name, tok, strlen(tok))) {
+		res = false;
+		goto out;
+	}
+
+	/*
+	 * Match more complex aliases where the alias name is a comma-delimited
+	 * list of tokens, orderly contained in the matching PMU name.
+	 *
+	 * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we
+	 *	    match "socket" in "socketX_pmunameY" and then "pmuname" in
+	 *	    "pmunameY".
+	 */
+	while (1) {
+		char *next_tok = strtok_r(NULL, ",", &tmp);
+
+		name = strstr(name, tok);
+		if (!name ||
+		    (!next_tok && !perf_pmu__valid_suffix(name, tok))) {
+			res = false;
+			goto out;
+		}
+		if (!next_tok)
+			break;
+		tok = next_tok;
+		name += strlen(tok);
+	}
+
+	res = true;
+out:
+	free(str);
+	return res;
+}
+
+struct pmu_add_cpu_aliases_map_data {
+	struct list_head *head;
+	const char *name;
+	const char *cpu_name;
+	struct perf_pmu *pmu;
+};
+
+static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe,
+					const struct pmu_events_table *table __maybe_unused,
+					void *vdata)
+{
+	struct pmu_add_cpu_aliases_map_data *data = vdata;
+	const char *pname = pe->pmu ? pe->pmu : data->cpu_name;
+
+	if (!pe->name)
+		return 0;
+
+	if (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->name))
+		goto new_alias;
+
+	if (strcmp(pname, data->name))
+		return 0;
+
+new_alias:
+	/* need type casts to override 'const' */
+	__perf_pmu__new_alias(data->head, NULL, (char *)pe->name, (char *)pe->desc,
+			      (char *)pe->event, pe);
+	return 0;
+}
+
+/*
+ * From the pmu_events_map, find the table of PMU events that corresponds
+ * to the current running CPU. Then, add all PMU events from that table
+ * as aliases.
+ */
+void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu,
+			       const struct pmu_events_table *table)
+{
+	struct pmu_add_cpu_aliases_map_data data = {
+		.head = head,
+		.name = pmu->name,
+		.cpu_name = is_arm_pmu_core(pmu->name) ? pmu->name : "cpu",
+		.pmu = pmu,
+	};
+
+	pmu_events_table_for_each_event(table, pmu_add_cpu_aliases_map_callback, &data);
+}
+
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+	const struct pmu_events_table *table;
+
+	table = perf_pmu__find_table(pmu);
+	if (!table)
+		return;
+
+	pmu_add_cpu_aliases_table(head, pmu, table);
+}
+
+struct pmu_sys_event_iter_data {
+	struct list_head *head;
+	struct perf_pmu *pmu;
+};
+
+static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
+				       const struct pmu_events_table *table __maybe_unused,
+				       void *data)
+{
+	struct pmu_sys_event_iter_data *idata = data;
+	struct perf_pmu *pmu = idata->pmu;
+
+	if (!pe->name) {
+		if (pe->metric_group || pe->metric_name)
+			return 0;
+		return -EINVAL;
+	}
+
+	if (!pe->compat || !pe->pmu)
+		return 0;
+
+	if (!strcmp(pmu->id, pe->compat) &&
+	    pmu_uncore_alias_match(pe->pmu, pmu->name)) {
+		__perf_pmu__new_alias(idata->head, NULL,
+				      (char *)pe->name,
+				      (char *)pe->desc,
+				      (char *)pe->event,
+				      pe);
+	}
+
+	return 0;
+}
+
+void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+	struct pmu_sys_event_iter_data idata = {
+		.head = head,
+		.pmu = pmu,
+	};
+
+	if (!pmu->id)
+		return;
+
+	pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, &idata);
+}
+
+struct perf_event_attr * __weak
+perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+	return NULL;
+}
+
+char * __weak
+pmu_find_real_name(const char *name)
+{
+	return (char *)name;
+}
+
+char * __weak
+pmu_find_alias_name(const char *name __maybe_unused)
+{
+	return NULL;
+}
+
+static int pmu_max_precise(const char *name)
+{
+	char path[PATH_MAX];
+	int max_precise = -1;
+
+	scnprintf(path, PATH_MAX,
+		 "bus/event_source/devices/%s/caps/max_precise",
+		 name);
+
+	sysfs__read_int(path, &max_precise);
+	return max_precise;
+}
+
+static struct perf_pmu *pmu_lookup(const char *lookup_name)
+{
+	struct perf_pmu *pmu;
+	LIST_HEAD(format);
+	LIST_HEAD(aliases);
+	__u32 type;
+	char *name = pmu_find_real_name(lookup_name);
+	bool is_hybrid = perf_pmu__hybrid_mounted(name);
+	char *alias_name;
+
+	/*
+	 * Check pmu name for hybrid and the pmu may be invalid in sysfs
+	 */
+	if (!strncmp(name, "cpu_", 4) && !is_hybrid)
+		return NULL;
+
+	/*
+	 * The pmu data we store & need consists of the pmu
+	 * type value and format definitions. Load both right
+	 * now.
+	 */
+	if (pmu_format(name, &format))
+		return NULL;
+
+	/*
+	 * Check the type first to avoid unnecessary work.
+	 */
+	if (pmu_type(name, &type))
+		return NULL;
+
+	if (pmu_aliases(name, &aliases))
+		return NULL;
+
+	pmu = zalloc(sizeof(*pmu));
+	if (!pmu)
+		return NULL;
+
+	pmu->cpus = pmu_cpumask(name);
+	pmu->name = strdup(name);
+	if (!pmu->name)
+		goto err;
+
+	alias_name = pmu_find_alias_name(name);
+	if (alias_name) {
+		pmu->alias_name = strdup(alias_name);
+		if (!pmu->alias_name)
+			goto err;
+	}
+
+	pmu->type = type;
+	pmu->is_uncore = pmu_is_uncore(name);
+	if (pmu->is_uncore)
+		pmu->id = pmu_id(name);
+	pmu->is_hybrid = is_hybrid;
+	pmu->max_precise = pmu_max_precise(name);
+	pmu_add_cpu_aliases(&aliases, pmu);
+	pmu_add_sys_aliases(&aliases, pmu);
+
+	INIT_LIST_HEAD(&pmu->format);
+	INIT_LIST_HEAD(&pmu->aliases);
+	INIT_LIST_HEAD(&pmu->caps);
+	list_splice(&format, &pmu->format);
+	list_splice(&aliases, &pmu->aliases);
+	list_add_tail(&pmu->list, &pmus);
+
+	if (pmu->is_hybrid)
+		list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus);
+
+	pmu->default_config = perf_pmu__get_default_config(pmu);
+
+	return pmu;
+err:
+	if (pmu->name)
+		free(pmu->name);
+	free(pmu);
+	return NULL;
+}
+
+void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
+{
+	struct perf_pmu_format *format;
+
+	/* fake pmu doesn't have format list */
+	if (pmu == &perf_pmu__fake)
+		return;
+
+	list_for_each_entry(format, &pmu->format, list)
+		if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) {
+			pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'"
+				   "which is not supported by this version of perf!\n",
+				   pmu->name, format->name, format->value);
+			return;
+		}
+}
+
+static struct perf_pmu *pmu_find(const char *name)
+{
+	struct perf_pmu *pmu;
+
+	list_for_each_entry(pmu, &pmus, list) {
+		if (!strcmp(pmu->name, name) ||
+		    (pmu->alias_name && !strcmp(pmu->alias_name, name)))
+			return pmu;
+	}
+
+	return NULL;
+}
+
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type)
+{
+	struct perf_pmu *pmu;
+
+	list_for_each_entry(pmu, &pmus, list)
+		if (pmu->type == type)
+			return pmu;
+
+	return NULL;
+}
+
+struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
+{
+	/*
+	 * pmu iterator: If pmu is NULL, we start at the begin,
+	 * otherwise return the next pmu. Returns NULL on end.
+	 */
+	if (!pmu) {
+		pmu_read_sysfs();
+		pmu = list_prepare_entry(pmu, &pmus, list);
+	}
+	list_for_each_entry_continue(pmu, &pmus, list)
+		return pmu;
+	return NULL;
+}
+
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel)
+{
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (pmu->type == evsel->core.attr.type)
+			break;
+	}
+
+	return pmu;
+}
+
+bool evsel__is_aux_event(struct evsel *evsel)
+{
+	struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+	return pmu && pmu->auxtrace;
+}
+
+struct perf_pmu *perf_pmu__find(const char *name)
+{
+	struct perf_pmu *pmu;
+
+	/*
+	 * Once PMU is loaded it stays in the list,
+	 * so we keep us from multiple reading/parsing
+	 * the pmu format definitions.
+	 */
+	pmu = pmu_find(name);
+	if (pmu)
+		return pmu;
+
+	return pmu_lookup(name);
+}
+
+static struct perf_pmu_format *
+pmu_find_format(struct list_head *formats, const char *name)
+{
+	struct perf_pmu_format *format;
+
+	list_for_each_entry(format, formats, list)
+		if (!strcmp(format->name, name))
+			return format;
+
+	return NULL;
+}
+
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
+{
+	struct perf_pmu_format *format = pmu_find_format(formats, name);
+	__u64 bits = 0;
+	int fbit;
+
+	if (!format)
+		return 0;
+
+	for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
+		bits |= 1ULL << fbit;
+
+	return bits;
+}
+
+int perf_pmu__format_type(struct list_head *formats, const char *name)
+{
+	struct perf_pmu_format *format = pmu_find_format(formats, name);
+
+	if (!format)
+		return -1;
+
+	return format->value;
+}
+
+/*
+ * Sets value based on the format definition (format parameter)
+ * and unformatted value (value parameter).
+ */
+static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
+			     bool zero)
+{
+	unsigned long fbit, vbit;
+
+	for (fbit = 0, vbit = 0; fbit < PERF_PMU_FORMAT_BITS; fbit++) {
+
+		if (!test_bit(fbit, format))
+			continue;
+
+		if (value & (1llu << vbit++))
+			*v |= (1llu << fbit);
+		else if (zero)
+			*v &= ~(1llu << fbit);
+	}
+}
+
+static __u64 pmu_format_max_value(const unsigned long *format)
+{
+	int w;
+
+	w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+	if (!w)
+		return 0;
+	if (w < 64)
+		return (1ULL << w) - 1;
+	return -1;
+}
+
+/*
+ * Term is a string term, and might be a param-term. Try to look up it's value
+ * in the remaining terms.
+ * - We have a term like "base-or-format-term=param-term",
+ * - We need to find the value supplied for "param-term" (with param-term named
+ *   in a config string) later on in the term list.
+ */
+static int pmu_resolve_param_term(struct parse_events_term *term,
+				  struct list_head *head_terms,
+				  __u64 *value)
+{
+	struct parse_events_term *t;
+
+	list_for_each_entry(t, head_terms, list) {
+		if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM &&
+		    t->config && !strcmp(t->config, term->config)) {
+			t->used = true;
+			*value = t->val.num;
+			return 0;
+		}
+	}
+
+	if (verbose > 0)
+		printf("Required parameter '%s' not specified\n", term->config);
+
+	return -1;
+}
+
+static char *pmu_formats_string(struct list_head *formats)
+{
+	struct perf_pmu_format *format;
+	char *str = NULL;
+	struct strbuf buf = STRBUF_INIT;
+	unsigned int i = 0;
+
+	if (!formats)
+		return NULL;
+
+	/* sysfs exported terms */
+	list_for_each_entry(format, formats, list)
+		if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0)
+			goto error;
+
+	str = strbuf_detach(&buf, NULL);
+error:
+	strbuf_release(&buf);
+
+	return str;
+}
+
+/*
+ * Setup one of config[12] attr members based on the
+ * user input data - term parameter.
+ */
+static int pmu_config_term(const char *pmu_name,
+			   struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct parse_events_term *term,
+			   struct list_head *head_terms,
+			   bool zero, struct parse_events_error *err)
+{
+	struct perf_pmu_format *format;
+	__u64 *vp;
+	__u64 val, max_val;
+
+	/*
+	 * If this is a parameter we've already used for parameterized-eval,
+	 * skip it in normal eval.
+	 */
+	if (term->used)
+		return 0;
+
+	/*
+	 * Hardcoded terms should be already in, so nothing
+	 * to be done for them.
+	 */
+	if (parse_events__is_hardcoded_term(term))
+		return 0;
+
+	format = pmu_find_format(formats, term->config);
+	if (!format) {
+		char *pmu_term = pmu_formats_string(formats);
+		char *unknown_term;
+		char *help_msg;
+
+		if (asprintf(&unknown_term,
+				"unknown term '%s' for pmu '%s'",
+				term->config, pmu_name) < 0)
+			unknown_term = NULL;
+		help_msg = parse_events_formats_error_string(pmu_term);
+		if (err) {
+			parse_events_error__handle(err, term->err_term,
+						   unknown_term,
+						   help_msg);
+		} else {
+			pr_debug("%s (%s)\n", unknown_term, help_msg);
+			free(unknown_term);
+		}
+		free(pmu_term);
+		return -EINVAL;
+	}
+
+	switch (format->value) {
+	case PERF_PMU_FORMAT_VALUE_CONFIG:
+		vp = &attr->config;
+		break;
+	case PERF_PMU_FORMAT_VALUE_CONFIG1:
+		vp = &attr->config1;
+		break;
+	case PERF_PMU_FORMAT_VALUE_CONFIG2:
+		vp = &attr->config2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Either directly use a numeric term, or try to translate string terms
+	 * using event parameters.
+	 */
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->no_value &&
+		    bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) {
+			if (err) {
+				parse_events_error__handle(err, term->err_val,
+					   strdup("no value assigned for term"),
+					   NULL);
+			}
+			return -EINVAL;
+		}
+
+		val = term->val.num;
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+		if (strcmp(term->val.str, "?")) {
+			if (verbose > 0) {
+				pr_info("Invalid sysfs entry %s=%s\n",
+						term->config, term->val.str);
+			}
+			if (err) {
+				parse_events_error__handle(err, term->err_val,
+					strdup("expected numeric value"),
+					NULL);
+			}
+			return -EINVAL;
+		}
+
+		if (pmu_resolve_param_term(term, head_terms, &val))
+			return -EINVAL;
+	} else
+		return -EINVAL;
+
+	max_val = pmu_format_max_value(format->bits);
+	if (val > max_val) {
+		if (err) {
+			char *err_str;
+
+			parse_events_error__handle(err, term->err_val,
+				asprintf(&err_str,
+				    "value too big for format, maximum is %llu",
+				    (unsigned long long)max_val) < 0
+				    ? strdup("value too big for format")
+				    : err_str,
+				    NULL);
+			return -EINVAL;
+		}
+		/*
+		 * Assume we don't care if !err, in which case the value will be
+		 * silently truncated.
+		 */
+	}
+
+	pmu_format_value(format->bits, val, vp, zero);
+	return 0;
+}
+
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct list_head *head_terms,
+			   bool zero, struct parse_events_error *err)
+{
+	struct parse_events_term *term;
+
+	list_for_each_entry(term, head_terms, list) {
+		if (pmu_config_term(pmu_name, formats, attr, term, head_terms,
+				    zero, err))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Configures event's 'attr' parameter based on the:
+ * 1) users input - specified in terms parameter
+ * 2) pmu format definitions - specified by pmu parameter
+ */
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+		     struct list_head *head_terms,
+		     struct parse_events_error *err)
+{
+	bool zero = !!pmu->default_config;
+
+	attr->type = pmu->type;
+	return perf_pmu__config_terms(pmu->name, &pmu->format, attr,
+				      head_terms, zero, err);
+}
+
+static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
+					     struct parse_events_term *term)
+{
+	struct perf_pmu_alias *alias;
+	char *name;
+
+	if (parse_events__is_hardcoded_term(term))
+		return NULL;
+
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->val.num != 1)
+			return NULL;
+		if (pmu_find_format(&pmu->format, term->config))
+			return NULL;
+		name = term->config;
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+		if (strcasecmp(term->config, "event"))
+			return NULL;
+		name = term->val.str;
+	} else {
+		return NULL;
+	}
+
+	list_for_each_entry(alias, &pmu->aliases, list) {
+		if (!strcasecmp(alias->name, name))
+			return alias;
+	}
+	return NULL;
+}
+
+
+static int check_info_data(struct perf_pmu_alias *alias,
+			   struct perf_pmu_info *info)
+{
+	/*
+	 * Only one term in event definition can
+	 * define unit, scale and snapshot, fail
+	 * if there's more than one.
+	 */
+	if ((info->unit && alias->unit[0]) ||
+	    (info->scale && alias->scale) ||
+	    (info->snapshot && alias->snapshot))
+		return -EINVAL;
+
+	if (alias->unit[0])
+		info->unit = alias->unit;
+
+	if (alias->scale)
+		info->scale = alias->scale;
+
+	if (alias->snapshot)
+		info->snapshot = alias->snapshot;
+
+	return 0;
+}
+
+/*
+ * Find alias in the terms list and replace it with the terms
+ * defined for the alias
+ */
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+			  struct perf_pmu_info *info)
+{
+	struct parse_events_term *term, *h;
+	struct perf_pmu_alias *alias;
+	int ret;
+
+	info->per_pkg = false;
+
+	/*
+	 * Mark unit and scale as not set
+	 * (different from default values, see below)
+	 */
+	info->unit     = NULL;
+	info->scale    = 0.0;
+	info->snapshot = false;
+	info->metric_expr = NULL;
+	info->metric_name = NULL;
+
+	list_for_each_entry_safe(term, h, head_terms, list) {
+		alias = pmu_find_alias(pmu, term);
+		if (!alias)
+			continue;
+		ret = pmu_alias_terms(alias, &term->list);
+		if (ret)
+			return ret;
+
+		ret = check_info_data(alias, info);
+		if (ret)
+			return ret;
+
+		if (alias->per_pkg)
+			info->per_pkg = true;
+		info->metric_expr = alias->metric_expr;
+		info->metric_name = alias->metric_name;
+
+		list_del_init(&term->list);
+		parse_events_term__delete(term);
+	}
+
+	/*
+	 * if no unit or scale found in aliases, then
+	 * set defaults as for evsel
+	 * unit cannot left to NULL
+	 */
+	if (info->unit == NULL)
+		info->unit   = "";
+
+	if (info->scale == 0.0)
+		info->scale  = 1.0;
+
+	return 0;
+}
+
+int perf_pmu__new_format(struct list_head *list, char *name,
+			 int config, unsigned long *bits)
+{
+	struct perf_pmu_format *format;
+
+	format = zalloc(sizeof(*format));
+	if (!format)
+		return -ENOMEM;
+
+	format->name = strdup(name);
+	format->value = config;
+	memcpy(format->bits, bits, sizeof(format->bits));
+
+	list_add_tail(&format->list, list);
+	return 0;
+}
+
+void perf_pmu__set_format(unsigned long *bits, long from, long to)
+{
+	long b;
+
+	if (!to)
+		to = from;
+
+	memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
+	for (b = from; b <= to; b++)
+		set_bit(b, bits);
+}
+
+void perf_pmu__del_formats(struct list_head *formats)
+{
+	struct perf_pmu_format *fmt, *tmp;
+
+	list_for_each_entry_safe(fmt, tmp, formats, list) {
+		list_del(&fmt->list);
+		free(fmt->name);
+		free(fmt);
+	}
+}
+
+static int sub_non_neg(int a, int b)
+{
+	if (b > a)
+		return 0;
+	return a - b;
+}
+
+static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
+			  struct perf_pmu_alias *alias)
+{
+	struct parse_events_term *term;
+	int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
+
+	list_for_each_entry(term, &alias->terms, list) {
+		if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+			used += snprintf(buf + used, sub_non_neg(len, used),
+					",%s=%s", term->config,
+					term->val.str);
+	}
+
+	if (sub_non_neg(len, used) > 0) {
+		buf[used] = '/';
+		used++;
+	}
+	if (sub_non_neg(len, used) > 0) {
+		buf[used] = '\0';
+		used++;
+	} else
+		buf[len - 1] = '\0';
+
+	return buf;
+}
+
+static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
+			     struct perf_pmu_alias *alias)
+{
+	snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name);
+	return buf;
+}
+
+struct sevent {
+	char *name;
+	char *desc;
+	char *topic;
+	char *str;
+	char *pmu;
+	char *metric_expr;
+	char *metric_name;
+	int is_cpu;
+};
+
+static int cmp_sevent(const void *a, const void *b)
+{
+	const struct sevent *as = a;
+	const struct sevent *bs = b;
+	int ret;
+
+	/* Put extra events last */
+	if (!!as->desc != !!bs->desc)
+		return !!as->desc - !!bs->desc;
+	if (as->topic && bs->topic) {
+		int n = strcmp(as->topic, bs->topic);
+
+		if (n)
+			return n;
+	}
+
+	/* Order CPU core events to be first */
+	if (as->is_cpu != bs->is_cpu)
+		return bs->is_cpu - as->is_cpu;
+
+	ret = strcmp(as->name, bs->name);
+	if (!ret) {
+		if (as->pmu && bs->pmu)
+			return strcmp(as->pmu, bs->pmu);
+	}
+
+	return ret;
+}
+
+static void wordwrap(char *s, int start, int max, int corr)
+{
+	int column = start;
+	int n;
+
+	while (*s) {
+		int wlen = strcspn(s, " \t");
+
+		if (column + wlen >= max && column > start) {
+			printf("\n%*s", start, "");
+			column = start + corr;
+		}
+		n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+		if (n <= 0)
+			break;
+		s += wlen;
+		column += n;
+		s = skip_spaces(s);
+	}
+}
+
+bool is_pmu_core(const char *name)
+{
+	return !strcmp(name, "cpu") || is_arm_pmu_core(name);
+}
+
+static bool pmu_alias_is_duplicate(struct sevent *alias_a,
+				   struct sevent *alias_b)
+{
+	/* Different names -> never duplicates */
+	if (strcmp(alias_a->name, alias_b->name))
+		return false;
+
+	/* Don't remove duplicates for hybrid PMUs */
+	if (perf_pmu__is_hybrid(alias_a->pmu) &&
+	    perf_pmu__is_hybrid(alias_b->pmu))
+		return false;
+
+	return true;
+}
+
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
+			bool long_desc, bool details_flag, bool deprecated,
+			const char *pmu_name)
+{
+	struct perf_pmu *pmu;
+	struct perf_pmu_alias *alias;
+	char buf[1024];
+	int printed = 0;
+	int len, j;
+	struct sevent *aliases;
+	int numdesc = 0;
+	int columns = pager_get_columns();
+	char *topic = NULL;
+
+	pmu = NULL;
+	len = 0;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		list_for_each_entry(alias, &pmu->aliases, list)
+			len++;
+		if (pmu->selectable)
+			len++;
+	}
+	aliases = zalloc(sizeof(struct sevent) * len);
+	if (!aliases)
+		goto out_enomem;
+	pmu = NULL;
+	j = 0;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (pmu_name && perf_pmu__is_hybrid(pmu->name) &&
+		    strcmp(pmu_name, pmu->name)) {
+			continue;
+		}
+
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			char *name = alias->desc ? alias->name :
+				format_alias(buf, sizeof(buf), pmu, alias);
+			bool is_cpu = is_pmu_core(pmu->name) ||
+				      perf_pmu__is_hybrid(pmu->name);
+
+			if (alias->deprecated && !deprecated)
+				continue;
+
+			if (event_glob != NULL &&
+			    !(strglobmatch_nocase(name, event_glob) ||
+			      (!is_cpu && strglobmatch_nocase(alias->name,
+						       event_glob)) ||
+			      (alias->topic &&
+			       strglobmatch_nocase(alias->topic, event_glob))))
+				continue;
+
+			if (is_cpu && !name_only && !alias->desc)
+				name = format_alias_or(buf, sizeof(buf), pmu, alias);
+
+			aliases[j].name = name;
+			if (is_cpu && !name_only && !alias->desc)
+				aliases[j].name = format_alias_or(buf,
+								  sizeof(buf),
+								  pmu, alias);
+			aliases[j].name = strdup(aliases[j].name);
+			if (!aliases[j].name)
+				goto out_enomem;
+
+			aliases[j].desc = long_desc ? alias->long_desc :
+						alias->desc;
+			aliases[j].topic = alias->topic;
+			aliases[j].str = alias->str;
+			aliases[j].pmu = pmu->name;
+			aliases[j].metric_expr = alias->metric_expr;
+			aliases[j].metric_name = alias->metric_name;
+			aliases[j].is_cpu = is_cpu;
+			j++;
+		}
+		if (pmu->selectable &&
+		    (event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
+			char *s;
+			if (asprintf(&s, "%s//", pmu->name) < 0)
+				goto out_enomem;
+			aliases[j].name = s;
+			j++;
+		}
+	}
+	len = j;
+	qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
+	for (j = 0; j < len; j++) {
+		/* Skip duplicates */
+		if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
+			continue;
+
+		if (name_only) {
+			printf("%s ", aliases[j].name);
+			continue;
+		}
+		if (aliases[j].desc && !quiet_flag) {
+			if (numdesc++ == 0)
+				printf("\n");
+			if (aliases[j].topic && (!topic ||
+					strcmp(topic, aliases[j].topic))) {
+				printf("%s%s:\n", topic ? "\n" : "",
+						aliases[j].topic);
+				topic = aliases[j].topic;
+			}
+			printf("  %-50s\n", aliases[j].name);
+			printf("%*s", 8, "[");
+			wordwrap(aliases[j].desc, 8, columns, 0);
+			printf("]\n");
+			if (details_flag) {
+				printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str);
+				if (aliases[j].metric_name)
+					printf(" MetricName: %s", aliases[j].metric_name);
+				if (aliases[j].metric_expr)
+					printf(" MetricExpr: %s", aliases[j].metric_expr);
+				putchar('\n');
+			}
+		} else
+			printf("  %-50s [Kernel PMU event]\n", aliases[j].name);
+		printed++;
+	}
+	if (printed && pager_in_use())
+		printf("\n");
+out_free:
+	for (j = 0; j < len; j++)
+		zfree(&aliases[j].name);
+	zfree(&aliases);
+	return;
+
+out_enomem:
+	printf("FATAL: not enough memory to print PMU events\n");
+	if (aliases)
+		goto out_free;
+}
+
+bool pmu_have_event(const char *pname, const char *name)
+{
+	struct perf_pmu *pmu;
+	struct perf_pmu_alias *alias;
+
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (strcmp(pname, pmu->name))
+			continue;
+		list_for_each_entry(alias, &pmu->aliases, list)
+			if (!strcmp(alias->name, name))
+				return true;
+	}
+	return false;
+}
+
+static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
+{
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	sysfs = sysfs__mountpoint();
+	if (!sysfs)
+		return NULL;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name);
+	if (!file_available(path))
+		return NULL;
+	return fopen(path, "r");
+}
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
+			...)
+{
+	va_list args;
+	FILE *file;
+	int ret = EOF;
+
+	va_start(args, fmt);
+	file = perf_pmu__open_file(pmu, name);
+	if (file) {
+		ret = vfscanf(file, fmt, args);
+		fclose(file);
+	}
+	va_end(args);
+	return ret;
+}
+
+static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
+{
+	struct perf_pmu_caps *caps = zalloc(sizeof(*caps));
+
+	if (!caps)
+		return -ENOMEM;
+
+	caps->name = strdup(name);
+	if (!caps->name)
+		goto free_caps;
+	caps->value = strndup(value, strlen(value) - 1);
+	if (!caps->value)
+		goto free_name;
+	list_add_tail(&caps->list, list);
+	return 0;
+
+free_name:
+	zfree(&caps->name);
+free_caps:
+	free(caps);
+
+	return -ENOMEM;
+}
+
+/*
+ * Reading/parsing the given pmu capabilities, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
+ * Return the number of capabilities
+ */
+int perf_pmu__caps_parse(struct perf_pmu *pmu)
+{
+	struct stat st;
+	char caps_path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+	DIR *caps_dir;
+	struct dirent *evt_ent;
+
+	if (pmu->caps_initialized)
+		return pmu->nr_caps;
+
+	pmu->nr_caps = 0;
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(caps_path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name);
+
+	if (stat(caps_path, &st) < 0) {
+		pmu->caps_initialized = true;
+		return 0;	/* no error if caps does not exist */
+	}
+
+	caps_dir = opendir(caps_path);
+	if (!caps_dir)
+		return -EINVAL;
+
+	while ((evt_ent = readdir(caps_dir)) != NULL) {
+		char path[PATH_MAX + NAME_MAX + 1];
+		char *name = evt_ent->d_name;
+		char value[128];
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", caps_path, name);
+
+		file = fopen(path, "r");
+		if (!file)
+			continue;
+
+		if (!fgets(value, sizeof(value), file) ||
+		    (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) {
+			fclose(file);
+			continue;
+		}
+
+		pmu->nr_caps++;
+		fclose(file);
+	}
+
+	closedir(caps_dir);
+
+	pmu->caps_initialized = true;
+	return pmu->nr_caps;
+}
+
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+				   const char *name)
+{
+	struct perf_pmu_format *format;
+	__u64 masks = 0, bits;
+	char buf[100];
+	unsigned int i;
+
+	list_for_each_entry(format, &pmu->format, list)	{
+		if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG)
+			continue;
+
+		for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS)
+			masks |= 1ULL << i;
+	}
+
+	/*
+	 * Kernel doesn't export any valid format bits.
+	 */
+	if (masks == 0)
+		return;
+
+	bits = config & ~masks;
+	if (bits == 0)
+		return;
+
+	bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf));
+
+	pr_warning("WARNING: event '%s' not valid (bits %s of config "
+		   "'%llx' not supported by kernel)!\n",
+		   name ?: "N/A", buf, config);
+}
+
+bool perf_pmu__has_hybrid(void)
+{
+	if (!hybrid_scanned) {
+		hybrid_scanned = true;
+		perf_pmu__scan(NULL);
+	}
+
+	return !list_empty(&perf_pmu__hybrid_pmus);
+}
+
+int perf_pmu__match(char *pattern, char *name, char *tok)
+{
+	if (!name)
+		return -1;
+
+	if (fnmatch(pattern, name, 0))
+		return -1;
+
+	if (tok && !perf_pmu__valid_suffix(name, tok))
+		return -1;
+
+	return 0;
+}
+
+int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
+			 struct perf_cpu_map **mcpus_ptr,
+			 struct perf_cpu_map **ucpus_ptr)
+{
+	struct perf_cpu_map *pmu_cpus = pmu->cpus;
+	struct perf_cpu_map *matched_cpus, *unmatched_cpus;
+	struct perf_cpu cpu;
+	int i, matched_nr = 0, unmatched_nr = 0;
+
+	matched_cpus = perf_cpu_map__default_new();
+	if (!matched_cpus)
+		return -1;
+
+	unmatched_cpus = perf_cpu_map__default_new();
+	if (!unmatched_cpus) {
+		perf_cpu_map__put(matched_cpus);
+		return -1;
+	}
+
+	perf_cpu_map__for_each_cpu(cpu, i, cpus) {
+		if (!perf_cpu_map__has(pmu_cpus, cpu))
+			unmatched_cpus->map[unmatched_nr++] = cpu;
+		else
+			matched_cpus->map[matched_nr++] = cpu;
+	}
+
+	unmatched_cpus->nr = unmatched_nr;
+	matched_cpus->nr = matched_nr;
+	*mcpus_ptr = matched_cpus;
+	*ucpus_ptr = unmatched_cpus;
+	return 0;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
new file mode 100644
index 000000000..68e15c38a
--- /dev/null
+++ b/tools/perf/util/pmu.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMU_H
+#define __PMU_H
+
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <linux/list.h>
+#include <stdbool.h>
+#include "parse-events.h"
+#include "pmu-events/pmu-events.h"
+
+struct evsel_config_term;
+struct perf_cpu_map;
+
+enum {
+	PERF_PMU_FORMAT_VALUE_CONFIG,
+	PERF_PMU_FORMAT_VALUE_CONFIG1,
+	PERF_PMU_FORMAT_VALUE_CONFIG2,
+	PERF_PMU_FORMAT_VALUE_CONFIG_END,
+};
+
+#define PERF_PMU_FORMAT_BITS 64
+#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
+#define CPUS_TEMPLATE_CPU	"%s/bus/event_source/devices/%s/cpus"
+#define MAX_PMU_NAME_LEN 128
+
+struct perf_event_attr;
+
+struct perf_pmu_caps {
+	char *name;
+	char *value;
+	struct list_head list;
+};
+
+struct perf_pmu {
+	char *name;
+	char *alias_name;
+	char *id;
+	__u32 type;
+	bool selectable;
+	bool is_uncore;
+	bool is_hybrid;
+	bool auxtrace;
+	int max_precise;
+	struct perf_event_attr *default_config;
+	struct perf_cpu_map *cpus;
+	struct list_head format;  /* HEAD struct perf_pmu_format -> list */
+	struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+	bool caps_initialized;
+	u32 nr_caps;
+	struct list_head caps;    /* HEAD struct perf_pmu_caps -> list */
+	struct list_head list;    /* ELEM */
+	struct list_head hybrid_list;
+
+	struct {
+		bool exclude_guest;
+	} missing_features;
+};
+
+extern struct perf_pmu perf_pmu__fake;
+
+struct perf_pmu_info {
+	const char *unit;
+	const char *metric_expr;
+	const char *metric_name;
+	double scale;
+	bool per_pkg;
+	bool snapshot;
+};
+
+#define UNIT_MAX_LEN	31 /* max length for event unit name */
+
+struct perf_pmu_alias {
+	char *name;
+	char *desc;
+	char *long_desc;
+	char *topic;
+	char *str;
+	struct list_head terms; /* HEAD struct parse_events_term -> list */
+	struct list_head list;  /* ELEM */
+	char unit[UNIT_MAX_LEN+1];
+	double scale;
+	bool per_pkg;
+	bool snapshot;
+	bool deprecated;
+	char *metric_expr;
+	char *metric_name;
+	char *pmu_name;
+};
+
+struct perf_pmu *perf_pmu__find(const char *name);
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
+void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu);
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+		     struct list_head *head_terms,
+		     struct parse_events_error *error);
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct list_head *head_terms,
+			   bool zero, struct parse_events_error *error);
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
+int perf_pmu__format_type(struct list_head *formats, const char *name);
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+			  struct perf_pmu_info *info);
+struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
+				  struct list_head *head_terms);
+void perf_pmu_error(struct list_head *list, char *name, char const *msg);
+
+int perf_pmu__new_format(struct list_head *list, char *name,
+			 int config, unsigned long *bits);
+void perf_pmu__set_format(unsigned long *bits, long from, long to);
+int perf_pmu__format_parse(char *dir, struct list_head *head);
+void perf_pmu__del_formats(struct list_head *formats);
+
+struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
+
+bool is_pmu_core(const char *name);
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
+		      bool long_desc, bool details_flag,
+		      bool deprecated, const char *pmu_name);
+bool pmu_have_event(const char *pname, const char *name);
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
+
+int perf_pmu__test(void);
+
+struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu,
+			       const struct pmu_events_table *table);
+
+char *perf_pmu__getcpuid(struct perf_pmu *pmu);
+const struct pmu_events_table *pmu_events_table__find(void);
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
+void perf_pmu_free_alias(struct perf_pmu_alias *alias);
+
+int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+
+int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+				   const char *name);
+void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
+
+bool perf_pmu__has_hybrid(void);
+int perf_pmu__match(char *pattern, char *name, char *tok);
+
+int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
+			 struct perf_cpu_map **mcpus_ptr,
+			 struct perf_cpu_map **ucpus_ptr);
+
+char *pmu_find_real_name(const char *name);
+char *pmu_find_alias_name(const char *name);
+#endif /* __PMU_H */
diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l
new file mode 100644
index 000000000..58b4926cf
--- /dev/null
+++ b/tools/perf/util/pmu.l
@@ -0,0 +1,41 @@
+%option prefix="perf_pmu_"
+
+%{
+#include <stdlib.h>
+#include <linux/bitops.h>
+#include "pmu.h"
+#include "pmu-bison.h"
+
+static int value(int base)
+{
+	long num;
+
+	errno = 0;
+	num = strtoul(perf_pmu_text, NULL, base);
+	if (errno)
+		return PP_ERROR;
+
+	perf_pmu_lval.num = num;
+	return PP_VALUE;
+}
+
+%}
+
+num_dec         [0-9]+
+
+%%
+
+{num_dec}	{ return value(10); }
+config		{ return PP_CONFIG; }
+-		{ return '-'; }
+:		{ return ':'; }
+,		{ return ','; }
+.		{ ; }
+\n		{ ; }
+
+%%
+
+int perf_pmu_wrap(void)
+{
+	return 1;
+}
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
new file mode 100644
index 000000000..e675d79a0
--- /dev/null
+++ b/tools/perf/util/pmu.y
@@ -0,0 +1,83 @@
+
+%parse-param {struct list_head *format}
+%parse-param {char *name}
+
+%{
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+#include <string.h>
+#include "pmu.h"
+
+#define ABORT_ON(val) \
+do { \
+        if (val) \
+                YYABORT; \
+} while (0)
+
+%}
+
+%token PP_CONFIG
+%token PP_VALUE PP_ERROR
+%type <num> PP_VALUE
+%type <bits> bit_term
+%type <bits> bits
+
+%union
+{
+	unsigned long num;
+	DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+}
+
+%%
+
+format:
+format format_term
+|
+format_term
+
+format_term:
+PP_CONFIG ':' bits
+{
+	ABORT_ON(perf_pmu__new_format(format, name,
+				      PERF_PMU_FORMAT_VALUE_CONFIG,
+				      $3));
+}
+|
+PP_CONFIG PP_VALUE ':' bits
+{
+	ABORT_ON(perf_pmu__new_format(format, name,
+				      $2,
+				      $4));
+}
+
+bits:
+bits ',' bit_term
+{
+	bitmap_or($$, $1, $3, 64);
+}
+|
+bit_term
+{
+	memcpy($$, $1, sizeof($1));
+}
+
+bit_term:
+PP_VALUE '-' PP_VALUE
+{
+	perf_pmu__set_format($$, $1, $3);
+}
+|
+PP_VALUE
+{
+	perf_pmu__set_format($$, $1, 0);
+}
+
+%%
+
+void perf_pmu_error(struct list_head *list __maybe_unused,
+		    char *name __maybe_unused,
+		    char const *msg __maybe_unused)
+{
+}
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
new file mode 100644
index 000000000..c4d5d87fa
--- /dev/null
+++ b/tools/perf/util/print-events.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include <api/fs/tracing_path.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <linux/zalloc.h>
+#include <subcmd/pager.h>
+
+#include "build-id.h"
+#include "debug.h"
+#include "evsel.h"
+#include "metricgroup.h"
+#include "parse-events.h"
+#include "pmu.h"
+#include "print-events.h"
+#include "probe-file.h"
+#include "string2.h"
+#include "strlist.h"
+#include "tracepoint.h"
+#include "pfm.h"
+#include "pmu-hybrid.h"
+
+#define MAX_NAME_LEN 100
+
+static const char * const event_type_descriptors[] = {
+	"Hardware event",
+	"Software event",
+	"Tracepoint event",
+	"Hardware cache event",
+	"Raw hardware event descriptor",
+	"Hardware breakpoint",
+};
+
+static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
+	[PERF_TOOL_DURATION_TIME] = {
+		.symbol = "duration_time",
+		.alias  = "",
+	},
+	[PERF_TOOL_USER_TIME] = {
+		.symbol = "user_time",
+		.alias  = "",
+	},
+	[PERF_TOOL_SYSTEM_TIME] = {
+		.symbol = "system_time",
+		.alias  = "",
+	},
+};
+
+static int cmp_string(const void *a, const void *b)
+{
+	const char * const *as = a;
+	const char * const *bs = b;
+
+	return strcmp(*as, *bs);
+}
+
+/*
+ * Print the events from <debugfs_mount_point>/tracing/events
+ */
+void print_tracepoint_events(const char *subsys_glob,
+			     const char *event_glob, bool name_only)
+{
+	DIR *sys_dir, *evt_dir;
+	struct dirent *sys_dirent, *evt_dirent;
+	char evt_path[MAXPATHLEN];
+	char *dir_path;
+	char **evt_list = NULL;
+	unsigned int evt_i = 0, evt_num = 0;
+	bool evt_num_known = false;
+
+restart:
+	sys_dir = tracing_events__opendir();
+	if (!sys_dir)
+		return;
+
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_close_sys_dir;
+	}
+
+	for_each_subsystem(sys_dir, sys_dirent) {
+		if (subsys_glob != NULL &&
+		    !strglobmatch(sys_dirent->d_name, subsys_glob))
+			continue;
+
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
+		evt_dir = opendir(dir_path);
+		if (!evt_dir)
+			goto next;
+
+		for_each_event(dir_path, evt_dir, evt_dirent) {
+			if (event_glob != NULL &&
+			    !strglobmatch(evt_dirent->d_name, event_glob))
+				continue;
+
+			if (!evt_num_known) {
+				evt_num++;
+				continue;
+			}
+
+			snprintf(evt_path, MAXPATHLEN, "%s:%s",
+				 sys_dirent->d_name, evt_dirent->d_name);
+
+			evt_list[evt_i] = strdup(evt_path);
+			if (evt_list[evt_i] == NULL) {
+				put_events_file(dir_path);
+				goto out_close_evt_dir;
+			}
+			evt_i++;
+		}
+		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
+	}
+	closedir(sys_dir);
+
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++],
+				event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+	}
+	if (evt_num && pager_in_use())
+		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return;
+
+out_close_evt_dir:
+	closedir(evt_dir);
+out_close_sys_dir:
+	closedir(sys_dir);
+
+	printf("FATAL: not enough memory to print %s\n",
+			event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+	if (evt_list)
+		goto out_free;
+}
+
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+		      bool name_only)
+{
+	struct probe_cache *pcache;
+	struct probe_cache_entry *ent;
+	struct strlist *bidlist, *sdtlist;
+	struct strlist_config cfg = {.dont_dupstr = true};
+	struct str_node *nd, *nd2;
+	char *buf, *path, *ptr = NULL;
+	bool show_detail = false;
+	int ret;
+
+	sdtlist = strlist__new(NULL, &cfg);
+	if (!sdtlist) {
+		pr_debug("Failed to allocate new strlist for SDT\n");
+		return;
+	}
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: %d\n", errno);
+		return;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		pcache = probe_cache__new(nd->s, NULL);
+		if (!pcache)
+			continue;
+		list_for_each_entry(ent, &pcache->entries, node) {
+			if (!ent->sdt)
+				continue;
+			if (subsys_glob &&
+			    !strglobmatch(ent->pev.group, subsys_glob))
+				continue;
+			if (event_glob &&
+			    !strglobmatch(ent->pev.event, event_glob))
+				continue;
+			ret = asprintf(&buf, "%s:%s@%s", ent->pev.group,
+					ent->pev.event, nd->s);
+			if (ret > 0)
+				strlist__add(sdtlist, buf);
+		}
+		probe_cache__delete(pcache);
+	}
+	strlist__delete(bidlist);
+
+	strlist__for_each_entry(nd, sdtlist) {
+		buf = strchr(nd->s, '@');
+		if (buf)
+			*(buf++) = '\0';
+		if (name_only) {
+			printf("%s ", nd->s);
+			continue;
+		}
+		nd2 = strlist__next(nd);
+		if (nd2) {
+			ptr = strchr(nd2->s, '@');
+			if (ptr)
+				*ptr = '\0';
+			if (strcmp(nd->s, nd2->s) == 0)
+				show_detail = true;
+		}
+		if (show_detail) {
+			path = build_id_cache__origname(buf);
+			ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf);
+			if (ret > 0) {
+				printf("  %-50s [%s]\n", buf, "SDT event");
+				free(buf);
+			}
+			free(path);
+		} else
+			printf("  %-50s [%s]\n", nd->s, "SDT event");
+		if (nd2) {
+			if (strcmp(nd->s, nd2->s) != 0)
+				show_detail = false;
+			if (ptr)
+				*ptr = '@';
+		}
+	}
+	strlist__delete(sdtlist);
+}
+
+int print_hwcache_events(const char *event_glob, bool name_only)
+{
+	unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0;
+	char name[64], new_name[128];
+	char **evt_list = NULL, **evt_pmus = NULL;
+	bool evt_num_known = false;
+	struct perf_pmu *pmu = NULL;
+
+	if (perf_pmu__has_hybrid()) {
+		npmus = perf_pmu__hybrid_pmu_num();
+		evt_pmus = zalloc(sizeof(char *) * npmus);
+		if (!evt_pmus)
+			goto out_enomem;
+	}
+
+restart:
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_enomem;
+	}
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				unsigned int hybrid_supported = 0, j;
+				bool supported;
+
+				__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
+				if (event_glob != NULL && !strglobmatch(name, event_glob))
+					continue;
+
+				if (!perf_pmu__has_hybrid()) {
+					if (!is_event_supported(PERF_TYPE_HW_CACHE,
+								type | (op << 8) | (i << 16))) {
+						continue;
+					}
+				} else {
+					perf_pmu__for_each_hybrid_pmu(pmu) {
+						if (!evt_num_known) {
+							evt_num++;
+							continue;
+						}
+
+						supported = is_event_supported(
+							PERF_TYPE_HW_CACHE,
+							type | (op << 8) | (i << 16) |
+							((__u64)pmu->type << PERF_PMU_TYPE_SHIFT));
+						if (supported) {
+							snprintf(new_name, sizeof(new_name),
+								 "%s/%s/", pmu->name, name);
+							evt_pmus[hybrid_supported] =
+								strdup(new_name);
+							hybrid_supported++;
+						}
+					}
+
+					if (hybrid_supported == 0)
+						continue;
+				}
+
+				if (!evt_num_known) {
+					evt_num++;
+					continue;
+				}
+
+				if ((hybrid_supported == 0) ||
+				    (hybrid_supported == npmus)) {
+					evt_list[evt_i] = strdup(name);
+					if (npmus > 0) {
+						for (j = 0; j < npmus; j++)
+							zfree(&evt_pmus[j]);
+					}
+				} else {
+					for (j = 0; j < hybrid_supported; j++) {
+						evt_list[evt_i++] = evt_pmus[j];
+						evt_pmus[j] = NULL;
+					}
+					continue;
+				}
+
+				if (evt_list[evt_i] == NULL)
+					goto out_enomem;
+				evt_i++;
+			}
+		}
+	}
+
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+
+	for (evt_i = 0; evt_i < evt_num; evt_i++) {
+		if (!evt_list[evt_i])
+			break;
+	}
+
+	evt_num = evt_i;
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++],
+				event_type_descriptors[PERF_TYPE_HW_CACHE]);
+	}
+	if (evt_num && pager_in_use())
+		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+
+	for (evt_i = 0; evt_i < npmus; evt_i++)
+		zfree(&evt_pmus[evt_i]);
+	zfree(&evt_pmus);
+	return evt_num;
+
+out_enomem:
+	printf("FATAL: not enough memory to print %s\n",
+		event_type_descriptors[PERF_TYPE_HW_CACHE]);
+	if (evt_list)
+		goto out_free;
+	return evt_num;
+}
+
+static void print_tool_event(const struct event_symbol *syms, const char *event_glob,
+			     bool name_only)
+{
+	if (syms->symbol == NULL)
+		return;
+
+	if (event_glob && !(strglobmatch(syms->symbol, event_glob) ||
+	      (syms->alias && strglobmatch(syms->alias, event_glob))))
+		return;
+
+	if (name_only)
+		printf("%s ", syms->symbol);
+	else {
+		char name[MAX_NAME_LEN];
+
+		if (syms->alias && strlen(syms->alias))
+			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
+		else
+			strlcpy(name, syms->symbol, MAX_NAME_LEN);
+		printf("  %-50s [%s]\n", name, "Tool event");
+	}
+}
+
+void print_tool_events(const char *event_glob, bool name_only)
+{
+	// Start at 1 because the first enum entry means no tool event.
+	for (int i = 1; i < PERF_TOOL_MAX; ++i)
+		print_tool_event(event_symbols_tool + i, event_glob, name_only);
+
+	if (pager_in_use())
+		printf("\n");
+}
+
+void print_symbol_events(const char *event_glob, unsigned int type,
+			 struct event_symbol *syms, unsigned int max,
+			 bool name_only)
+{
+	unsigned int i, evt_i = 0, evt_num = 0;
+	char name[MAX_NAME_LEN];
+	char **evt_list = NULL;
+	bool evt_num_known = false;
+
+restart:
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_enomem;
+		syms -= max;
+	}
+
+	for (i = 0; i < max; i++, syms++) {
+		/*
+		 * New attr.config still not supported here, the latest
+		 * example was PERF_COUNT_SW_CGROUP_SWITCHES
+		 */
+		if (syms->symbol == NULL)
+			continue;
+
+		if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
+		      (syms->alias && strglobmatch(syms->alias, event_glob))))
+			continue;
+
+		if (!is_event_supported(type, i))
+			continue;
+
+		if (!evt_num_known) {
+			evt_num++;
+			continue;
+		}
+
+		if (!name_only && strlen(syms->alias))
+			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
+		else
+			strlcpy(name, syms->symbol, MAX_NAME_LEN);
+
+		evt_list[evt_i] = strdup(name);
+		if (evt_list[evt_i] == NULL)
+			goto out_enomem;
+		evt_i++;
+	}
+
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]);
+	}
+	if (evt_num && pager_in_use())
+		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return;
+
+out_enomem:
+	printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]);
+	if (evt_list)
+		goto out_free;
+}
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(const char *event_glob, bool name_only, bool quiet_flag,
+			bool long_desc, bool details_flag, bool deprecated,
+			const char *pmu_name)
+{
+	print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
+			    event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
+
+	print_symbol_events(event_glob, PERF_TYPE_SOFTWARE,
+			    event_symbols_sw, PERF_COUNT_SW_MAX, name_only);
+	print_tool_events(event_glob, name_only);
+
+	print_hwcache_events(event_glob, name_only);
+
+	print_pmu_events(event_glob, name_only, quiet_flag, long_desc,
+			details_flag, deprecated, pmu_name);
+
+	if (event_glob != NULL)
+		return;
+
+	if (!name_only) {
+		printf("  %-50s [%s]\n",
+		       "rNNN",
+		       event_type_descriptors[PERF_TYPE_RAW]);
+		printf("  %-50s [%s]\n",
+		       "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
+		       event_type_descriptors[PERF_TYPE_RAW]);
+		if (pager_in_use())
+			printf("   (see 'man perf-list' on how to encode it)\n\n");
+
+		printf("  %-50s [%s]\n",
+		       "mem:<addr>[/len][:access]",
+			event_type_descriptors[PERF_TYPE_BREAKPOINT]);
+		if (pager_in_use())
+			printf("\n");
+	}
+
+	print_tracepoint_events(NULL, NULL, name_only);
+
+	print_sdt_events(NULL, NULL, name_only);
+
+	metricgroup__print(true, true, NULL, name_only, details_flag,
+			   pmu_name);
+
+	print_libpfm_events(name_only, long_desc);
+}
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
new file mode 100644
index 000000000..1da9910d8
--- /dev/null
+++ b/tools/perf/util/print-events.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_PRINT_EVENTS_H
+#define __PERF_PRINT_EVENTS_H
+
+#include <stdbool.h>
+
+struct event_symbol;
+
+void print_events(const char *event_glob, bool name_only, bool quiet_flag,
+		  bool long_desc, bool details_flag, bool deprecated,
+		  const char *pmu_name);
+int print_hwcache_events(const char *event_glob, bool name_only);
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+		      bool name_only);
+void print_symbol_events(const char *event_glob, unsigned int type,
+			 struct event_symbol *syms, unsigned int max,
+			 bool name_only);
+void print_tool_events(const char *event_glob, bool name_only);
+void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
+			     bool name_only);
+
+#endif /* __PERF_PRINT_EVENTS_H */
diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c
new file mode 100644
index 000000000..13fdc51c6
--- /dev/null
+++ b/tools/perf/util/print_binary.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "print_binary.h"
+#include <linux/log2.h>
+#include <linux/ctype.h>
+
+int binary__fprintf(unsigned char *data, size_t len,
+		    size_t bytes_per_line, binary__fprintf_t printer,
+		    void *extra, FILE *fp)
+{
+	size_t i, j, mask;
+	int printed = 0;
+
+	if (!printer)
+		return 0;
+
+	bytes_per_line = roundup_pow_of_two(bytes_per_line);
+	mask = bytes_per_line - 1;
+
+	printed += printer(BINARY_PRINT_DATA_BEGIN, 0, extra, fp);
+	for (i = 0; i < len; i++) {
+		if ((i & mask) == 0) {
+			printed += printer(BINARY_PRINT_LINE_BEGIN, -1, extra, fp);
+			printed += printer(BINARY_PRINT_ADDR, i, extra, fp);
+		}
+
+		printed += printer(BINARY_PRINT_NUM_DATA, data[i], extra, fp);
+
+		if (((i & mask) == mask) || i == len - 1) {
+			for (j = 0; j < mask-(i & mask); j++)
+				printed += printer(BINARY_PRINT_NUM_PAD, -1, extra, fp);
+
+			printer(BINARY_PRINT_SEP, i, extra, fp);
+			for (j = i & ~mask; j <= i; j++)
+				printed += printer(BINARY_PRINT_CHAR_DATA, data[j], extra, fp);
+			for (j = 0; j < mask-(i & mask); j++)
+				printed += printer(BINARY_PRINT_CHAR_PAD, i, extra, fp);
+			printed += printer(BINARY_PRINT_LINE_END, -1, extra, fp);
+		}
+	}
+	printed += printer(BINARY_PRINT_DATA_END, -1, extra, fp);
+	return printed;
+}
+
+int is_printable_array(char *p, unsigned int len)
+{
+	unsigned int i;
+
+	if (!p || !len || p[len - 1] != 0)
+		return 0;
+
+	len--;
+
+	for (i = 0; i < len && p[i]; i++) {
+		if (!isprint(p[i]) && !isspace(p[i]))
+			return 0;
+	}
+	return 1;
+}
diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h
new file mode 100644
index 000000000..2a1554afc
--- /dev/null
+++ b/tools/perf/util/print_binary.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_PRINT_BINARY_H
+#define PERF_PRINT_BINARY_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+enum binary_printer_ops {
+	BINARY_PRINT_DATA_BEGIN,
+	BINARY_PRINT_LINE_BEGIN,
+	BINARY_PRINT_ADDR,
+	BINARY_PRINT_NUM_DATA,
+	BINARY_PRINT_NUM_PAD,
+	BINARY_PRINT_SEP,
+	BINARY_PRINT_CHAR_DATA,
+	BINARY_PRINT_CHAR_PAD,
+	BINARY_PRINT_LINE_END,
+	BINARY_PRINT_DATA_END,
+};
+
+typedef int (*binary__fprintf_t)(enum binary_printer_ops op,
+				 unsigned int val, void *extra, FILE *fp);
+
+int binary__fprintf(unsigned char *data, size_t len,
+		    size_t bytes_per_line, binary__fprintf_t printer,
+		    void *extra, FILE *fp);
+
+static inline void print_binary(unsigned char *data, size_t len,
+				size_t bytes_per_line, binary__fprintf_t printer,
+				void *extra)
+{
+	binary__fprintf(data, len, bytes_per_line, printer, extra, stdout);
+}
+
+int is_printable_array(char *p, unsigned int len);
+
+#endif /* PERF_PRINT_BINARY_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
new file mode 100644
index 000000000..0c24bc7af
--- /dev/null
+++ b/tools/perf/util/probe-event.c
@@ -0,0 +1,3799 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * probe-event.c : perf-probe definition to probe_events format converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <elf.h>
+
+#include "build-id.h"
+#include "event.h"
+#include "namespaces.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "dso.h"
+#include "color.h"
+#include "map.h"
+#include "maps.h"
+#include "mutex.h"
+#include "symbol.h"
+#include <api/fs/fs.h>
+#include "trace-event.h"	/* For __maybe_unused */
+#include "probe-event.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "session.h"
+#include "string2.h"
+#include "strbuf.h"
+
+#include <subcmd/pager.h>
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
+#define PERFPROBE_GROUP "probe"
+
+bool probe_event_dry_run;	/* Dry run flag */
+struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM };
+
+#define semantic_error(msg ...) pr_err("Semantic error :" msg)
+
+int e_snprintf(char *str, size_t size, const char *format, ...)
+{
+	int ret;
+	va_list ap;
+	va_start(ap, format);
+	ret = vsnprintf(str, size, format, ap);
+	va_end(ap);
+	if (ret >= (int)size)
+		ret = -E2BIG;
+	return ret;
+}
+
+static struct machine *host_machine;
+
+/* Initialize symbol maps and path of vmlinux/modules */
+int init_probe_symbol_maps(bool user_only)
+{
+	int ret;
+
+	symbol_conf.sort_by_name = true;
+	symbol_conf.allow_aliases = true;
+	ret = symbol__init(NULL);
+	if (ret < 0) {
+		pr_debug("Failed to init symbol map.\n");
+		goto out;
+	}
+
+	if (host_machine || user_only)	/* already initialized */
+		return 0;
+
+	if (symbol_conf.vmlinux_name)
+		pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+
+	host_machine = machine__new_host();
+	if (!host_machine) {
+		pr_debug("machine__new_host() failed.\n");
+		symbol__exit();
+		ret = -1;
+	}
+out:
+	if (ret < 0)
+		pr_warning("Failed to init vmlinux path.\n");
+	return ret;
+}
+
+void exit_probe_symbol_maps(void)
+{
+	machine__delete(host_machine);
+	host_machine = NULL;
+	symbol__exit();
+}
+
+static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
+{
+	struct kmap *kmap;
+	struct map *map = machine__kernel_map(host_machine);
+
+	if (map__load(map) < 0)
+		return NULL;
+
+	kmap = map__kmap(map);
+	if (!kmap)
+		return NULL;
+
+	if (pmap)
+		*pmap = map;
+
+	return kmap->ref_reloc_sym;
+}
+
+static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
+					     bool reloc, bool reladdr)
+{
+	struct ref_reloc_sym *reloc_sym;
+	struct symbol *sym;
+	struct map *map;
+
+	/* ref_reloc_sym is just a label. Need a special fix*/
+	reloc_sym = kernel_get_ref_reloc_sym(&map);
+	if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
+		*addr = (!map->reloc || reloc) ? reloc_sym->addr :
+			reloc_sym->unrelocated_addr;
+	else {
+		sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
+		if (!sym)
+			return -ENOENT;
+		*addr = map->unmap_ip(map, sym->start) -
+			((reloc) ? 0 : map->reloc) -
+			((reladdr) ? map->start : 0);
+	}
+	return 0;
+}
+
+static struct map *kernel_get_module_map(const char *module)
+{
+	struct maps *maps = machine__kernel_maps(host_machine);
+	struct map *pos;
+
+	/* A file path -- this is an offline module */
+	if (module && strchr(module, '/'))
+		return dso__new_map(module);
+
+	if (!module) {
+		pos = machine__kernel_map(host_machine);
+		return map__get(pos);
+	}
+
+	maps__for_each_entry(maps, pos) {
+		/* short_name is "[module]" */
+		if (strncmp(pos->dso->short_name + 1, module,
+			    pos->dso->short_name_len - 2) == 0 &&
+		    module[pos->dso->short_name_len - 2] == '\0') {
+			return map__get(pos);
+		}
+	}
+	return NULL;
+}
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
+{
+	/* Init maps of given executable or kernel */
+	if (user) {
+		struct map *map;
+
+		map = dso__new_map(target);
+		if (map && map->dso) {
+			mutex_lock(&map->dso->lock);
+			nsinfo__put(map->dso->nsinfo);
+			map->dso->nsinfo = nsinfo__get(nsi);
+			mutex_unlock(&map->dso->lock);
+		}
+		return map;
+	} else {
+		return kernel_get_module_map(target);
+	}
+}
+
+static int convert_exec_to_group(const char *exec, char **result)
+{
+	char *ptr1, *ptr2, *exec_copy;
+	char buf[64];
+	int ret;
+
+	exec_copy = strdup(exec);
+	if (!exec_copy)
+		return -ENOMEM;
+
+	ptr1 = basename(exec_copy);
+	if (!ptr1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (ptr2 = ptr1; *ptr2 != '\0'; ptr2++) {
+		if (!isalnum(*ptr2) && *ptr2 != '_') {
+			*ptr2 = '\0';
+			break;
+		}
+	}
+
+	ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
+	if (ret < 0)
+		goto out;
+
+	*result = strdup(buf);
+	ret = *result ? 0 : -ENOMEM;
+
+out:
+	free(exec_copy);
+	return ret;
+}
+
+static void clear_perf_probe_point(struct perf_probe_point *pp)
+{
+	zfree(&pp->file);
+	zfree(&pp->function);
+	zfree(&pp->lazy_line);
+}
+
+static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
+{
+	int i;
+
+	for (i = 0; i < ntevs; i++)
+		clear_probe_trace_event(tevs + i);
+}
+
+static bool kprobe_blacklist__listed(u64 address);
+static bool kprobe_warn_out_range(const char *symbol, u64 address)
+{
+	struct map *map;
+	bool ret = false;
+
+	map = kernel_get_module_map(NULL);
+	if (map) {
+		ret = address <= map->start || map->end < address;
+		if (ret)
+			pr_warning("%s is out of .text, skip it.\n", symbol);
+		map__put(map);
+	}
+	if (!ret && kprobe_blacklist__listed(address)) {
+		pr_warning("%s is blacklisted function, skip it.\n", symbol);
+		ret = true;
+	}
+
+	return ret;
+}
+
+/*
+ * @module can be module name of module file path. In case of path,
+ * inspect elf and find out what is actual module name.
+ * Caller has to free mod_name after using it.
+ */
+static char *find_module_name(const char *module)
+{
+	int fd;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	char *mod_name = NULL;
+	int name_offset;
+
+	fd = open(module, O_RDONLY);
+	if (fd < 0)
+		return NULL;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		goto elf_err;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto ret_err;
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+			".gnu.linkonce.this_module", NULL);
+	if (!sec)
+		goto ret_err;
+
+	data = elf_getdata(sec, NULL);
+	if (!data || !data->d_buf)
+		goto ret_err;
+
+	/*
+	 * NOTE:
+	 * '.gnu.linkonce.this_module' section of kernel module elf directly
+	 * maps to 'struct module' from linux/module.h. This section contains
+	 * actual module name which will be used by kernel after loading it.
+	 * But, we cannot use 'struct module' here since linux/module.h is not
+	 * exposed to user-space. Offset of 'name' has remained same from long
+	 * time, so hardcoding it here.
+	 */
+	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+		name_offset = 12;
+	else	/* expect ELFCLASS64 by default */
+		name_offset = 24;
+
+	mod_name = strdup((char *)data->d_buf + name_offset);
+
+ret_err:
+	elf_end(elf);
+elf_err:
+	close(fd);
+	return mod_name;
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
+{
+	struct dso *dso;
+	struct map *map;
+	const char *vmlinux_name;
+	int ret = 0;
+
+	if (module) {
+		char module_name[128];
+
+		snprintf(module_name, sizeof(module_name), "[%s]", module);
+		map = maps__find_by_name(machine__kernel_maps(host_machine), module_name);
+		if (map) {
+			dso = map->dso;
+			goto found;
+		}
+		pr_debug("Failed to find module %s.\n", module);
+		return -ENOENT;
+	}
+
+	map = machine__kernel_map(host_machine);
+	dso = map->dso;
+	if (!dso->has_build_id)
+		dso__read_running_kernel_build_id(dso, host_machine);
+
+	vmlinux_name = symbol_conf.vmlinux_name;
+	dso->load_errno = 0;
+	if (vmlinux_name)
+		ret = dso__load_vmlinux(dso, map, vmlinux_name, false);
+	else
+		ret = dso__load_vmlinux_path(dso, map);
+found:
+	*pdso = dso;
+	return ret;
+}
+
+/*
+ * Some binaries like glibc have special symbols which are on the symbol
+ * table, but not in the debuginfo. If we can find the address of the
+ * symbol from map, we can translate the address back to the probe point.
+ */
+static int find_alternative_probe_point(struct debuginfo *dinfo,
+					struct perf_probe_point *pp,
+					struct perf_probe_point *result,
+					const char *target, struct nsinfo *nsi,
+					bool uprobes)
+{
+	struct map *map = NULL;
+	struct symbol *sym;
+	u64 address = 0;
+	int ret = -ENOENT;
+
+	/* This can work only for function-name based one */
+	if (!pp->function || pp->file)
+		return -ENOTSUP;
+
+	map = get_target_map(target, nsi, uprobes);
+	if (!map)
+		return -EINVAL;
+
+	/* Find the address of given function */
+	map__for_each_symbol_by_name(map, pp->function, sym) {
+		if (uprobes) {
+			address = sym->start;
+			if (sym->type == STT_GNU_IFUNC)
+				pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n"
+					   "Consider identifying the final function used at run time and set the probe directly on that.\n",
+					   pp->function);
+		} else
+			address = map->unmap_ip(map, sym->start) - map->reloc;
+		break;
+	}
+	if (!address) {
+		ret = -ENOENT;
+		goto out;
+	}
+	pr_debug("Symbol %s address found : %" PRIx64 "\n",
+			pp->function, address);
+
+	ret = debuginfo__find_probe_point(dinfo, address, result);
+	if (ret <= 0)
+		ret = (!ret) ? -ENOENT : ret;
+	else {
+		result->offset += pp->offset;
+		result->line += pp->line;
+		result->retprobe = pp->retprobe;
+		ret = 0;
+	}
+
+out:
+	map__put(map);
+	return ret;
+
+}
+
+static int get_alternative_probe_event(struct debuginfo *dinfo,
+				       struct perf_probe_event *pev,
+				       struct perf_probe_point *tmp)
+{
+	int ret;
+
+	memcpy(tmp, &pev->point, sizeof(*tmp));
+	memset(&pev->point, 0, sizeof(pev->point));
+	ret = find_alternative_probe_point(dinfo, tmp, &pev->point, pev->target,
+					   pev->nsi, pev->uprobes);
+	if (ret < 0)
+		memcpy(&pev->point, tmp, sizeof(*tmp));
+
+	return ret;
+}
+
+static int get_alternative_line_range(struct debuginfo *dinfo,
+				      struct line_range *lr,
+				      const char *target, bool user)
+{
+	struct perf_probe_point pp = { .function = lr->function,
+				       .file = lr->file,
+				       .line = lr->start };
+	struct perf_probe_point result;
+	int ret, len = 0;
+
+	memset(&result, 0, sizeof(result));
+
+	if (lr->end != INT_MAX)
+		len = lr->end - lr->start;
+	ret = find_alternative_probe_point(dinfo, &pp, &result,
+					   target, NULL, user);
+	if (!ret) {
+		lr->function = result.function;
+		lr->file = result.file;
+		lr->start = result.line;
+		if (lr->end != INT_MAX)
+			lr->end = lr->start + len;
+		clear_perf_probe_point(&pp);
+	}
+	return ret;
+}
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *nsi,
+					      bool silent)
+{
+	debuginfod_client *c = debuginfod_begin();
+	char sbuild_id[SBUILD_ID_SIZE + 1];
+	struct debuginfo *ret = NULL;
+	struct nscookie nsc;
+	char *path;
+	int fd;
+
+	if (!c)
+		return NULL;
+
+	build_id__sprintf(&dso->bid, sbuild_id);
+	fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id,
+					0, &path);
+	if (fd >= 0)
+		close(fd);
+	debuginfod_end(c);
+	if (fd < 0) {
+		if (!silent)
+			pr_debug("Failed to find debuginfo in debuginfod.\n");
+		return NULL;
+	}
+	if (!silent)
+		pr_debug("Load debuginfo from debuginfod (%s)\n", path);
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = debuginfo__new((const char *)path);
+	nsinfo__mountns_exit(&nsc);
+	return ret;
+}
+#else
+static inline
+struct debuginfo *open_from_debuginfod(struct dso *dso __maybe_unused,
+				       struct nsinfo *nsi __maybe_unused,
+				       bool silent __maybe_unused)
+{
+	return NULL;
+}
+#endif
+
+/* Open new debuginfo of given module */
+static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
+					bool silent)
+{
+	const char *path = module;
+	char reason[STRERR_BUFSIZE];
+	struct debuginfo *ret = NULL;
+	struct dso *dso = NULL;
+	struct nscookie nsc;
+	int err;
+
+	if (!module || !strchr(module, '/')) {
+		err = kernel_get_module_dso(module, &dso);
+		if (err < 0) {
+			if (!dso || dso->load_errno == 0) {
+				if (!str_error_r(-err, reason, STRERR_BUFSIZE))
+					strcpy(reason, "(unknown)");
+			} else
+				dso__strerror_load(dso, reason, STRERR_BUFSIZE);
+			if (dso)
+				ret = open_from_debuginfod(dso, nsi, silent);
+			if (ret)
+				return ret;
+			if (!silent) {
+				if (module)
+					pr_err("Module %s is not loaded, please specify its full path name.\n", module);
+				else
+					pr_err("Failed to find the path for the kernel: %s\n", reason);
+			}
+			return NULL;
+		}
+		path = dso->long_name;
+	}
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = debuginfo__new(path);
+	if (!ret && !silent) {
+		pr_warning("The %s file has no debug information.\n", path);
+		if (!module || !strtailcmp(path, ".ko"))
+			pr_warning("Rebuild with CONFIG_DEBUG_INFO=y, ");
+		else
+			pr_warning("Rebuild with -g, ");
+		pr_warning("or install an appropriate debuginfo package.\n");
+	}
+	nsinfo__mountns_exit(&nsc);
+	return ret;
+}
+
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+	const char *path = module;
+
+	/* If the module is NULL, it should be the kernel. */
+	if (!module)
+		path = "kernel";
+
+	if (debuginfo_cache_path && !strcmp(debuginfo_cache_path, path))
+		goto out;
+
+	/* Copy module path */
+	free(debuginfo_cache_path);
+	debuginfo_cache_path = strdup(path);
+	if (!debuginfo_cache_path) {
+		debuginfo__delete(debuginfo_cache);
+		debuginfo_cache = NULL;
+		goto out;
+	}
+
+	debuginfo_cache = open_debuginfo(module, NULL, silent);
+	if (!debuginfo_cache)
+		zfree(&debuginfo_cache_path);
+out:
+	return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+	debuginfo__delete(debuginfo_cache);
+	debuginfo_cache = NULL;
+	zfree(&debuginfo_cache_path);
+}
+
+
+static int get_text_start_address(const char *exec, u64 *address,
+				  struct nsinfo *nsi)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	int fd, ret = -ENOENT;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	fd = open(exec, O_RDONLY);
+	nsinfo__mountns_exit(&nsc);
+	if (fd < 0)
+		return -errno;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		ret = -EINVAL;
+		goto out_close;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out;
+
+	if (!elf_section_by_name(elf, &ehdr, &shdr, ".text", NULL))
+		goto out;
+
+	*address = shdr.sh_addr - shdr.sh_offset;
+	ret = 0;
+out:
+	elf_end(elf);
+out_close:
+	close(fd);
+
+	return ret;
+}
+
+/*
+ * Convert trace point to probe point with debuginfo
+ */
+static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
+					    struct perf_probe_point *pp,
+					    bool is_kprobe)
+{
+	struct debuginfo *dinfo = NULL;
+	u64 stext = 0;
+	u64 addr = tp->address;
+	int ret = -ENOENT;
+
+	/* convert the address to dwarf address */
+	if (!is_kprobe) {
+		if (!addr) {
+			ret = -EINVAL;
+			goto error;
+		}
+		ret = get_text_start_address(tp->module, &stext, NULL);
+		if (ret < 0)
+			goto error;
+		addr += stext;
+	} else if (tp->symbol) {
+		/* If the module is given, this returns relative address */
+		ret = kernel_get_symbol_address_by_name(tp->symbol, &addr,
+							false, !!tp->module);
+		if (ret != 0)
+			goto error;
+		addr += tp->offset;
+	}
+
+	pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
+		 tp->module ? : "kernel");
+
+	dinfo = debuginfo_cache__open(tp->module, verbose <= 0);
+	if (dinfo)
+		ret = debuginfo__find_probe_point(dinfo, addr, pp);
+	else
+		ret = -ENOENT;
+
+	if (ret > 0) {
+		pp->retprobe = tp->retprobe;
+		return 0;
+	}
+error:
+	pr_debug("Failed to find corresponding probes from debuginfo.\n");
+	return ret ? : -ENOENT;
+}
+
+/* Adjust symbol name and address */
+static int post_process_probe_trace_point(struct probe_trace_point *tp,
+					   struct map *map, u64 offs)
+{
+	struct symbol *sym;
+	u64 addr = tp->address - offs;
+
+	sym = map__find_symbol(map, addr);
+	if (!sym) {
+		/*
+		 * If the address is in the inittext section, map can not
+		 * find it. Ignore it if we are probing offline kernel.
+		 */
+		return (symbol_conf.ignore_vmlinux_buildid) ? 0 : -ENOENT;
+	}
+
+	if (strcmp(sym->name, tp->symbol)) {
+		/* If we have no realname, use symbol for it */
+		if (!tp->realname)
+			tp->realname = tp->symbol;
+		else
+			free(tp->symbol);
+		tp->symbol = strdup(sym->name);
+		if (!tp->symbol)
+			return -ENOMEM;
+	}
+	tp->offset = addr - sym->start;
+	tp->address -= offs;
+
+	return 0;
+}
+
+/*
+ * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
+ * and generate new symbols with suffixes such as .constprop.N or .isra.N
+ * etc. Since those symbols are not recorded in DWARF, we have to find
+ * correct generated symbols from offline ELF binary.
+ * For online kernel or uprobes we don't need this because those are
+ * rebased on _text, or already a section relative address.
+ */
+static int
+post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
+					int ntevs, const char *pathname)
+{
+	struct map *map;
+	u64 stext = 0;
+	int i, ret = 0;
+
+	/* Prepare a map for offline binary */
+	map = dso__new_map(pathname);
+	if (!map || get_text_start_address(pathname, &stext, NULL) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", pathname);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ntevs; i++) {
+		ret = post_process_probe_trace_point(&tevs[i].point,
+						     map, stext);
+		if (ret < 0)
+			break;
+	}
+	map__put(map);
+
+	return ret;
+}
+
+static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
+					  int ntevs, const char *exec,
+					  struct nsinfo *nsi)
+{
+	int i, ret = 0;
+	u64 stext = 0;
+
+	if (!exec)
+		return 0;
+
+	ret = get_text_start_address(exec, &stext, nsi);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < ntevs && ret >= 0; i++) {
+		/* point.address is the address of point.symbol + point.offset */
+		tevs[i].point.address -= stext;
+		tevs[i].point.module = strdup(exec);
+		if (!tevs[i].point.module) {
+			ret = -ENOMEM;
+			break;
+		}
+		tevs[i].uprobes = true;
+	}
+
+	return ret;
+}
+
+static int
+post_process_module_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs, const char *module,
+				       struct debuginfo *dinfo)
+{
+	Dwarf_Addr text_offs = 0;
+	int i, ret = 0;
+	char *mod_name = NULL;
+	struct map *map;
+
+	if (!module)
+		return 0;
+
+	map = get_target_map(module, NULL, false);
+	if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", module);
+		return -EINVAL;
+	}
+
+	mod_name = find_module_name(module);
+	for (i = 0; i < ntevs; i++) {
+		ret = post_process_probe_trace_point(&tevs[i].point,
+						map, text_offs);
+		if (ret < 0)
+			break;
+		tevs[i].point.module =
+			strdup(mod_name ? mod_name : module);
+		if (!tevs[i].point.module) {
+			ret = -ENOMEM;
+			break;
+		}
+	}
+
+	free(mod_name);
+	map__put(map);
+
+	return ret;
+}
+
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs)
+{
+	struct ref_reloc_sym *reloc_sym;
+	struct map *map;
+	char *tmp;
+	int i, skipped = 0;
+
+	/* Skip post process if the target is an offline kernel */
+	if (symbol_conf.ignore_vmlinux_buildid)
+		return post_process_offline_probe_trace_events(tevs, ntevs,
+						symbol_conf.vmlinux_name);
+
+	reloc_sym = kernel_get_ref_reloc_sym(&map);
+	if (!reloc_sym) {
+		pr_warning("Relocated base symbol is not found! "
+			   "Check /proc/sys/kernel/kptr_restrict\n"
+			   "and /proc/sys/kernel/perf_event_paranoid. "
+			   "Or run as privileged perf user.\n\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ntevs; i++) {
+		if (!tevs[i].point.address)
+			continue;
+		if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
+			continue;
+		/*
+		 * If we found a wrong one, mark it by NULL symbol.
+		 * Since addresses in debuginfo is same as objdump, we need
+		 * to convert it to addresses on memory.
+		 */
+		if (kprobe_warn_out_range(tevs[i].point.symbol,
+			map__objdump_2mem(map, tevs[i].point.address))) {
+			tmp = NULL;
+			skipped++;
+		} else {
+			tmp = strdup(reloc_sym->name);
+			if (!tmp)
+				return -ENOMEM;
+		}
+		/* If we have no realname, use symbol for it */
+		if (!tevs[i].point.realname)
+			tevs[i].point.realname = tevs[i].point.symbol;
+		else
+			free(tevs[i].point.symbol);
+		tevs[i].point.symbol = tmp;
+		tevs[i].point.offset = tevs[i].point.address -
+			(map->reloc ? reloc_sym->unrelocated_addr :
+				      reloc_sym->addr);
+	}
+	return skipped;
+}
+
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+				      int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+					   struct probe_trace_event *tevs,
+					   int ntevs, const char *module,
+					   bool uprobe, struct debuginfo *dinfo)
+{
+	int ret;
+
+	if (uprobe)
+		ret = add_exec_to_probe_trace_events(tevs, ntevs, module,
+						     pev->nsi);
+	else if (module)
+		/* Currently ref_reloc_sym based probe is not for drivers */
+		ret = post_process_module_probe_trace_events(tevs, ntevs,
+							     module, dinfo);
+	else
+		ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+	if (ret >= 0)
+		arch__post_process_probe_trace_events(pev, ntevs);
+
+	return ret;
+}
+
+/* Try to find perf_probe_event with debuginfo */
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+					  struct probe_trace_event **tevs)
+{
+	bool need_dwarf = perf_probe_event_need_dwarf(pev);
+	struct perf_probe_point tmp;
+	struct debuginfo *dinfo;
+	int ntevs, ret = 0;
+
+	/* Workaround for gcc #98776 issue.
+	 * Perf failed to add kretprobe event with debuginfo of vmlinux which is
+	 * compiled by gcc with -fpatchable-function-entry option enabled. The
+	 * same issue with kernel module. The retprobe doesn`t need debuginfo.
+	 * This workaround solution use map to query the probe function address
+	 * for retprobe event.
+	 */
+	if (pev->point.retprobe)
+		return 0;
+
+	dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf);
+	if (!dinfo) {
+		if (need_dwarf)
+			return -ENOENT;
+		pr_debug("Could not open debuginfo. Try to use symbols.\n");
+		return 0;
+	}
+
+	pr_debug("Try to find probe point from debuginfo.\n");
+	/* Searching trace events corresponding to a probe event */
+	ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+
+	if (ntevs == 0)	{  /* Not found, retry with an alternative */
+		ret = get_alternative_probe_event(dinfo, pev, &tmp);
+		if (!ret) {
+			ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+			/*
+			 * Write back to the original probe_event for
+			 * setting appropriate (user given) event name
+			 */
+			clear_perf_probe_point(&pev->point);
+			memcpy(&pev->point, &tmp, sizeof(tmp));
+		}
+	}
+
+	if (ntevs > 0) {	/* Succeeded to find trace events */
+		pr_debug("Found %d probe_trace_events.\n", ntevs);
+		ret = post_process_probe_trace_events(pev, *tevs, ntevs,
+					pev->target, pev->uprobes, dinfo);
+		if (ret < 0 || ret == ntevs) {
+			pr_debug("Post processing failed or all events are skipped. (%d)\n", ret);
+			clear_probe_trace_events(*tevs, ntevs);
+			zfree(tevs);
+			ntevs = 0;
+		}
+	}
+
+	debuginfo__delete(dinfo);
+
+	if (ntevs == 0)	{	/* No error but failed to find probe point. */
+		pr_warning("Probe point '%s' not found.\n",
+			   synthesize_perf_probe_point(&pev->point));
+		return -ENOENT;
+	} else if (ntevs < 0) {
+		/* Error path : ntevs < 0 */
+		pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
+		if (ntevs == -EBADF)
+			pr_warning("Warning: No dwarf info found in the vmlinux - "
+				"please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
+		if (!need_dwarf) {
+			pr_debug("Trying to use symbols.\n");
+			return 0;
+		}
+	}
+	return ntevs;
+}
+
+#define LINEBUF_SIZE 256
+#define NR_ADDITIONAL_LINES 2
+
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+	char buf[LINEBUF_SIZE], sbuf[STRERR_BUFSIZE];
+	const char *color = show_num ? "" : PERF_COLOR_BLUE;
+	const char *prefix = NULL;
+
+	do {
+		if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+			goto error;
+		if (skip)
+			continue;
+		if (!prefix) {
+			prefix = show_num ? "%7d  " : "         ";
+			color_fprintf(stdout, color, prefix, l);
+		}
+		color_fprintf(stdout, color, "%s", buf);
+
+	} while (strchr(buf, '\n') == NULL);
+
+	return 1;
+error:
+	if (ferror(fp)) {
+		pr_warning("File read error: %s\n",
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -1;
+	}
+	return 0;
+}
+
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+	int rv = __show_one_line(fp, l, skip, show_num);
+	if (rv == 0) {
+		pr_warning("Source file is shorter than expected.\n");
+		rv = -1;
+	}
+	return rv;
+}
+
+#define show_one_line_with_num(f,l)	_show_one_line(f,l,false,true)
+#define show_one_line(f,l)		_show_one_line(f,l,false,false)
+#define skip_one_line(f,l)		_show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l)	__show_one_line(f,l,false,false)
+
+/*
+ * Show line-range always requires debuginfo to find source file and
+ * line number.
+ */
+static int __show_line_range(struct line_range *lr, const char *module,
+			     bool user)
+{
+	struct build_id bid;
+	int l = 1;
+	struct int_node *ln;
+	struct debuginfo *dinfo;
+	FILE *fp;
+	int ret;
+	char *tmp;
+	char sbuf[STRERR_BUFSIZE];
+	char sbuild_id[SBUILD_ID_SIZE] = "";
+
+	/* Search a line range */
+	dinfo = open_debuginfo(module, NULL, false);
+	if (!dinfo)
+		return -ENOENT;
+
+	ret = debuginfo__find_line_range(dinfo, lr);
+	if (!ret) {	/* Not found, retry with an alternative */
+		ret = get_alternative_line_range(dinfo, lr, module, user);
+		if (!ret)
+			ret = debuginfo__find_line_range(dinfo, lr);
+	}
+	if (dinfo->build_id) {
+		build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE);
+		build_id__sprintf(&bid, sbuild_id);
+	}
+	debuginfo__delete(dinfo);
+	if (ret == 0 || ret == -ENOENT) {
+		pr_warning("Specified source line is not found.\n");
+		return -ENOENT;
+	} else if (ret < 0) {
+		pr_warning("Debuginfo analysis failed.\n");
+		return ret;
+	}
+
+	/* Convert source file path */
+	tmp = lr->path;
+	ret = find_source_path(tmp, sbuild_id, lr->comp_dir, &lr->path);
+
+	/* Free old path when new path is assigned */
+	if (tmp != lr->path)
+		free(tmp);
+
+	if (ret < 0) {
+		pr_warning("Failed to find source file path.\n");
+		return ret;
+	}
+
+	setup_pager();
+
+	if (lr->function)
+		fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path,
+			lr->start - lr->offset);
+	else
+		fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
+
+	fp = fopen(lr->path, "r");
+	if (fp == NULL) {
+		pr_warning("Failed to open %s: %s\n", lr->path,
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -errno;
+	}
+	/* Skip to starting line number */
+	while (l < lr->start) {
+		ret = skip_one_line(fp, l++);
+		if (ret < 0)
+			goto end;
+	}
+
+	intlist__for_each_entry(ln, lr->line_list) {
+		for (; ln->i > (unsigned long)l; l++) {
+			ret = show_one_line(fp, l - lr->offset);
+			if (ret < 0)
+				goto end;
+		}
+		ret = show_one_line_with_num(fp, l++ - lr->offset);
+		if (ret < 0)
+			goto end;
+	}
+
+	if (lr->end == INT_MAX)
+		lr->end = l + NR_ADDITIONAL_LINES;
+	while (l <= lr->end) {
+		ret = show_one_line_or_eof(fp, l++ - lr->offset);
+		if (ret <= 0)
+			break;
+	}
+end:
+	fclose(fp);
+	return ret;
+}
+
+int show_line_range(struct line_range *lr, const char *module,
+		    struct nsinfo *nsi, bool user)
+{
+	int ret;
+	struct nscookie nsc;
+
+	ret = init_probe_symbol_maps(user);
+	if (ret < 0)
+		return ret;
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = __show_line_range(lr, module, user);
+	nsinfo__mountns_exit(&nsc);
+	exit_probe_symbol_maps();
+
+	return ret;
+}
+
+static int show_available_vars_at(struct debuginfo *dinfo,
+				  struct perf_probe_event *pev,
+				  struct strfilter *_filter)
+{
+	char *buf;
+	int ret, i, nvars;
+	struct str_node *node;
+	struct variable_list *vls = NULL, *vl;
+	struct perf_probe_point tmp;
+	const char *var;
+
+	buf = synthesize_perf_probe_point(&pev->point);
+	if (!buf)
+		return -EINVAL;
+	pr_debug("Searching variables at %s\n", buf);
+
+	ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
+	if (!ret) {  /* Not found, retry with an alternative */
+		ret = get_alternative_probe_event(dinfo, pev, &tmp);
+		if (!ret) {
+			ret = debuginfo__find_available_vars_at(dinfo, pev,
+								&vls);
+			/* Release the old probe_point */
+			clear_perf_probe_point(&tmp);
+		}
+	}
+	if (ret <= 0) {
+		if (ret == 0 || ret == -ENOENT) {
+			pr_err("Failed to find the address of %s\n", buf);
+			ret = -ENOENT;
+		} else
+			pr_warning("Debuginfo analysis failed.\n");
+		goto end;
+	}
+
+	/* Some variables are found */
+	fprintf(stdout, "Available variables at %s\n", buf);
+	for (i = 0; i < ret; i++) {
+		vl = &vls[i];
+		/*
+		 * A probe point might be converted to
+		 * several trace points.
+		 */
+		fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+			vl->point.offset);
+		zfree(&vl->point.symbol);
+		nvars = 0;
+		if (vl->vars) {
+			strlist__for_each_entry(node, vl->vars) {
+				var = strchr(node->s, '\t') + 1;
+				if (strfilter__compare(_filter, var)) {
+					fprintf(stdout, "\t\t%s\n", node->s);
+					nvars++;
+				}
+			}
+			strlist__delete(vl->vars);
+		}
+		if (nvars == 0)
+			fprintf(stdout, "\t\t(No matched variables)\n");
+	}
+	free(vls);
+end:
+	free(buf);
+	return ret;
+}
+
+/* Show available variables on given probe point */
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+			struct strfilter *_filter)
+{
+	int i, ret = 0;
+	struct debuginfo *dinfo;
+
+	ret = init_probe_symbol_maps(pevs->uprobes);
+	if (ret < 0)
+		return ret;
+
+	dinfo = open_debuginfo(pevs->target, pevs->nsi, false);
+	if (!dinfo) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	setup_pager();
+
+	for (i = 0; i < npevs && ret >= 0; i++)
+		ret = show_available_vars_at(dinfo, &pevs[i], _filter);
+
+	debuginfo__delete(dinfo);
+out:
+	exit_probe_symbol_maps();
+	return ret;
+}
+
+#else	/* !HAVE_DWARF_SUPPORT */
+
+static void debuginfo_cache__exit(void)
+{
+}
+
+static int
+find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
+				 struct perf_probe_point *pp __maybe_unused,
+				 bool is_kprobe __maybe_unused)
+{
+	return -ENOSYS;
+}
+
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+				struct probe_trace_event **tevs __maybe_unused)
+{
+	if (perf_probe_event_need_dwarf(pev)) {
+		pr_warning("Debuginfo-analysis is not supported.\n");
+		return -ENOSYS;
+	}
+
+	return 0;
+}
+
+int show_line_range(struct line_range *lr __maybe_unused,
+		    const char *module __maybe_unused,
+		    struct nsinfo *nsi __maybe_unused,
+		    bool user __maybe_unused)
+{
+	pr_warning("Debuginfo-analysis is not supported.\n");
+	return -ENOSYS;
+}
+
+int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
+			int npevs __maybe_unused,
+			struct strfilter *filter __maybe_unused)
+{
+	pr_warning("Debuginfo-analysis is not supported.\n");
+	return -ENOSYS;
+}
+#endif
+
+void line_range__clear(struct line_range *lr)
+{
+	zfree(&lr->function);
+	zfree(&lr->file);
+	zfree(&lr->path);
+	zfree(&lr->comp_dir);
+	intlist__delete(lr->line_list);
+}
+
+int line_range__init(struct line_range *lr)
+{
+	memset(lr, 0, sizeof(*lr));
+	lr->line_list = intlist__new(NULL);
+	if (!lr->line_list)
+		return -ENOMEM;
+	else
+		return 0;
+}
+
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+	const char *start = *ptr;
+
+	errno = 0;
+	*val = strtol(*ptr, ptr, 0);
+	if (errno || *ptr == start) {
+		semantic_error("'%s' is not a valid number.\n", what);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Check the name is good for event, group or function */
+static bool is_c_func_name(const char *name)
+{
+	if (!isalpha(*name) && *name != '_')
+		return false;
+	while (*++name != '\0') {
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+			return false;
+	}
+	return true;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ *         SRC[:SLN[+NUM|-ELN]]
+ *         FNC[@SRC][:SLN[+NUM|-ELN]]
+ */
+int parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+	char *range, *file, *name = strdup(arg);
+	int err;
+
+	if (!name)
+		return -ENOMEM;
+
+	lr->start = 0;
+	lr->end = INT_MAX;
+
+	range = strchr(name, ':');
+	if (range) {
+		*range++ = '\0';
+
+		err = parse_line_num(&range, &lr->start, "start line");
+		if (err)
+			goto err;
+
+		if (*range == '+' || *range == '-') {
+			const char c = *range++;
+
+			err = parse_line_num(&range, &lr->end, "end line");
+			if (err)
+				goto err;
+
+			if (c == '+') {
+				lr->end += lr->start;
+				/*
+				 * Adjust the number of lines here.
+				 * If the number of lines == 1, the
+				 * end of line should be equal to
+				 * the start of line.
+				 */
+				lr->end--;
+			}
+		}
+
+		pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+		err = -EINVAL;
+		if (lr->start > lr->end) {
+			semantic_error("Start line must be smaller"
+				       " than end line.\n");
+			goto err;
+		}
+		if (*range != '\0') {
+			semantic_error("Tailing with invalid str '%s'.\n", range);
+			goto err;
+		}
+	}
+
+	file = strchr(name, '@');
+	if (file) {
+		*file = '\0';
+		lr->file = strdup(++file);
+		if (lr->file == NULL) {
+			err = -ENOMEM;
+			goto err;
+		}
+		lr->function = name;
+	} else if (strchr(name, '/') || strchr(name, '.'))
+		lr->file = name;
+	else if (is_c_func_name(name))/* We reuse it for checking funcname */
+		lr->function = name;
+	else {	/* Invalid name */
+		semantic_error("'%s' is not a valid function name.\n", name);
+		err = -EINVAL;
+		goto err;
+	}
+
+	return 0;
+err:
+	free(name);
+	return err;
+}
+
+static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+{
+	char *ptr;
+
+	ptr = strpbrk_esc(*arg, ":");
+	if (ptr) {
+		*ptr = '\0';
+		if (!pev->sdt && !is_c_func_name(*arg))
+			goto ng_name;
+		pev->group = strdup_esc(*arg);
+		if (!pev->group)
+			return -ENOMEM;
+		*arg = ptr + 1;
+	} else
+		pev->group = NULL;
+
+	pev->event = strdup_esc(*arg);
+	if (pev->event == NULL)
+		return -ENOMEM;
+
+	if (!pev->sdt && !is_c_func_name(pev->event)) {
+		zfree(&pev->event);
+ng_name:
+		zfree(&pev->group);
+		semantic_error("%s is bad for event name -it must "
+			       "follow C symbol-naming rule.\n", *arg);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Parse probepoint definition. */
+static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
+{
+	struct perf_probe_point *pp = &pev->point;
+	char *ptr, *tmp;
+	char c, nc = 0;
+	bool file_spec = false;
+	int ret;
+
+	/*
+	 * <Syntax>
+	 * perf probe [GRP:][EVENT=]SRC[:LN|;PTN]
+	 * perf probe [GRP:][EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT]
+	 * perf probe %[GRP:]SDT_EVENT
+	 */
+	if (!arg)
+		return -EINVAL;
+
+	if (is_sdt_event(arg)) {
+		pev->sdt = true;
+		if (arg[0] == '%')
+			arg++;
+	}
+
+	ptr = strpbrk_esc(arg, ";=@+%");
+	if (pev->sdt) {
+		if (ptr) {
+			if (*ptr != '@') {
+				semantic_error("%s must be an SDT name.\n",
+					       arg);
+				return -EINVAL;
+			}
+			/* This must be a target file name or build id */
+			tmp = build_id_cache__complement(ptr + 1);
+			if (tmp) {
+				pev->target = build_id_cache__origname(tmp);
+				free(tmp);
+			} else
+				pev->target = strdup_esc(ptr + 1);
+			if (!pev->target)
+				return -ENOMEM;
+			*ptr = '\0';
+		}
+		ret = parse_perf_probe_event_name(&arg, pev);
+		if (ret == 0) {
+			if (asprintf(&pev->point.function, "%%%s", pev->event) < 0)
+				ret = -errno;
+		}
+		return ret;
+	}
+
+	if (ptr && *ptr == '=') {	/* Event name */
+		*ptr = '\0';
+		tmp = ptr + 1;
+		ret = parse_perf_probe_event_name(&arg, pev);
+		if (ret < 0)
+			return ret;
+
+		arg = tmp;
+	}
+
+	/*
+	 * Check arg is function or file name and copy it.
+	 *
+	 * We consider arg to be a file spec if and only if it satisfies
+	 * all of the below criteria::
+	 * - it does not include any of "+@%",
+	 * - it includes one of ":;", and
+	 * - it has a period '.' in the name.
+	 *
+	 * Otherwise, we consider arg to be a function specification.
+	 */
+	if (!strpbrk_esc(arg, "+@%")) {
+		ptr = strpbrk_esc(arg, ";:");
+		/* This is a file spec if it includes a '.' before ; or : */
+		if (ptr && memchr(arg, '.', ptr - arg))
+			file_spec = true;
+	}
+
+	ptr = strpbrk_esc(arg, ";:+@%");
+	if (ptr) {
+		nc = *ptr;
+		*ptr++ = '\0';
+	}
+
+	if (arg[0] == '\0')
+		tmp = NULL;
+	else {
+		tmp = strdup_esc(arg);
+		if (tmp == NULL)
+			return -ENOMEM;
+	}
+
+	if (file_spec)
+		pp->file = tmp;
+	else {
+		pp->function = tmp;
+
+		/*
+		 * Keep pp->function even if this is absolute address,
+		 * so it can mark whether abs_address is valid.
+		 * Which make 'perf probe lib.bin 0x0' possible.
+		 *
+		 * Note that checking length of tmp is not needed
+		 * because when we access tmp[1] we know tmp[0] is '0',
+		 * so tmp[1] should always valid (but could be '\0').
+		 */
+		if (tmp && !strncmp(tmp, "0x", 2)) {
+			pp->abs_address = strtoull(pp->function, &tmp, 0);
+			if (*tmp != '\0') {
+				semantic_error("Invalid absolute address.\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* Parse other options */
+	while (ptr) {
+		arg = ptr;
+		c = nc;
+		if (c == ';') {	/* Lazy pattern must be the last part */
+			pp->lazy_line = strdup(arg); /* let leave escapes */
+			if (pp->lazy_line == NULL)
+				return -ENOMEM;
+			break;
+		}
+		ptr = strpbrk_esc(arg, ";:+@%");
+		if (ptr) {
+			nc = *ptr;
+			*ptr++ = '\0';
+		}
+		switch (c) {
+		case ':':	/* Line number */
+			pp->line = strtoul(arg, &tmp, 0);
+			if (*tmp != '\0') {
+				semantic_error("There is non-digit char"
+					       " in line number.\n");
+				return -EINVAL;
+			}
+			break;
+		case '+':	/* Byte offset from a symbol */
+			pp->offset = strtoul(arg, &tmp, 0);
+			if (*tmp != '\0') {
+				semantic_error("There is non-digit character"
+						" in offset.\n");
+				return -EINVAL;
+			}
+			break;
+		case '@':	/* File name */
+			if (pp->file) {
+				semantic_error("SRC@SRC is not allowed.\n");
+				return -EINVAL;
+			}
+			pp->file = strdup_esc(arg);
+			if (pp->file == NULL)
+				return -ENOMEM;
+			break;
+		case '%':	/* Probe places */
+			if (strcmp(arg, "return") == 0) {
+				pp->retprobe = 1;
+			} else {	/* Others not supported yet */
+				semantic_error("%%%s is not supported.\n", arg);
+				return -ENOTSUP;
+			}
+			break;
+		default:	/* Buggy case */
+			pr_err("This program has a bug at %s:%d.\n",
+				__FILE__, __LINE__);
+			return -ENOTSUP;
+			break;
+		}
+	}
+
+	/* Exclusion check */
+	if (pp->lazy_line && pp->line) {
+		semantic_error("Lazy pattern can't be used with"
+			       " line number.\n");
+		return -EINVAL;
+	}
+
+	if (pp->lazy_line && pp->offset) {
+		semantic_error("Lazy pattern can't be used with offset.\n");
+		return -EINVAL;
+	}
+
+	if (pp->line && pp->offset) {
+		semantic_error("Offset can't be used with line number.\n");
+		return -EINVAL;
+	}
+
+	if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
+		semantic_error("File always requires line number or "
+			       "lazy pattern.\n");
+		return -EINVAL;
+	}
+
+	if (pp->offset && !pp->function) {
+		semantic_error("Offset requires an entry function.\n");
+		return -EINVAL;
+	}
+
+	if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
+		semantic_error("Offset/Line/Lazy pattern can't be used with "
+			       "return probe.\n");
+		return -EINVAL;
+	}
+
+	pr_debug("symbol:%s file:%s line:%d offset:%lu return:%d lazy:%s\n",
+		 pp->function, pp->file, pp->line, pp->offset, pp->retprobe,
+		 pp->lazy_line);
+	return 0;
+}
+
+/* Parse perf-probe event argument */
+static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
+{
+	char *tmp, *goodname;
+	struct perf_probe_arg_field **fieldp;
+
+	pr_debug("parsing arg: %s into ", str);
+
+	tmp = strchr(str, '=');
+	if (tmp) {
+		arg->name = strndup(str, tmp - str);
+		if (arg->name == NULL)
+			return -ENOMEM;
+		pr_debug("name:%s ", arg->name);
+		str = tmp + 1;
+	}
+
+	tmp = strchr(str, '@');
+	if (tmp && tmp != str && !strcmp(tmp + 1, "user")) { /* user attr */
+		if (!user_access_is_supported()) {
+			semantic_error("ftrace does not support user access\n");
+			return -EINVAL;
+		}
+		*tmp = '\0';
+		arg->user_access = true;
+		pr_debug("user_access ");
+	}
+
+	tmp = strchr(str, ':');
+	if (tmp) {	/* Type setting */
+		*tmp = '\0';
+		arg->type = strdup(tmp + 1);
+		if (arg->type == NULL)
+			return -ENOMEM;
+		pr_debug("type:%s ", arg->type);
+	}
+
+	tmp = strpbrk(str, "-.[");
+	if (!is_c_varname(str) || !tmp) {
+		/* A variable, register, symbol or special value */
+		arg->var = strdup(str);
+		if (arg->var == NULL)
+			return -ENOMEM;
+		pr_debug("%s\n", arg->var);
+		return 0;
+	}
+
+	/* Structure fields or array element */
+	arg->var = strndup(str, tmp - str);
+	if (arg->var == NULL)
+		return -ENOMEM;
+	goodname = arg->var;
+	pr_debug("%s, ", arg->var);
+	fieldp = &arg->field;
+
+	do {
+		*fieldp = zalloc(sizeof(struct perf_probe_arg_field));
+		if (*fieldp == NULL)
+			return -ENOMEM;
+		if (*tmp == '[') {	/* Array */
+			str = tmp;
+			(*fieldp)->index = strtol(str + 1, &tmp, 0);
+			(*fieldp)->ref = true;
+			if (*tmp != ']' || tmp == str + 1) {
+				semantic_error("Array index must be a"
+						" number.\n");
+				return -EINVAL;
+			}
+			tmp++;
+			if (*tmp == '\0')
+				tmp = NULL;
+		} else {		/* Structure */
+			if (*tmp == '.') {
+				str = tmp + 1;
+				(*fieldp)->ref = false;
+			} else if (tmp[1] == '>') {
+				str = tmp + 2;
+				(*fieldp)->ref = true;
+			} else {
+				semantic_error("Argument parse error: %s\n",
+					       str);
+				return -EINVAL;
+			}
+			tmp = strpbrk(str, "-.[");
+		}
+		if (tmp) {
+			(*fieldp)->name = strndup(str, tmp - str);
+			if ((*fieldp)->name == NULL)
+				return -ENOMEM;
+			if (*str != '[')
+				goodname = (*fieldp)->name;
+			pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref);
+			fieldp = &(*fieldp)->next;
+		}
+	} while (tmp);
+	(*fieldp)->name = strdup(str);
+	if ((*fieldp)->name == NULL)
+		return -ENOMEM;
+	if (*str != '[')
+		goodname = (*fieldp)->name;
+	pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref);
+
+	/* If no name is specified, set the last field name (not array index)*/
+	if (!arg->name) {
+		arg->name = strdup(goodname);
+		if (arg->name == NULL)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/* Parse perf-probe event command */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev)
+{
+	char **argv;
+	int argc, i, ret = 0;
+
+	argv = argv_split(cmd, &argc);
+	if (!argv) {
+		pr_debug("Failed to split arguments.\n");
+		return -ENOMEM;
+	}
+	if (argc - 1 > MAX_PROBE_ARGS) {
+		semantic_error("Too many probe arguments (%d).\n", argc - 1);
+		ret = -ERANGE;
+		goto out;
+	}
+	/* Parse probe point */
+	ret = parse_perf_probe_point(argv[0], pev);
+	if (ret < 0)
+		goto out;
+
+	/* Generate event name if needed */
+	if (!pev->event && pev->point.function && pev->point.line
+			&& !pev->point.lazy_line && !pev->point.offset) {
+		if (asprintf(&pev->event, "%s_L%d", pev->point.function,
+			pev->point.line) < 0) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	/* Copy arguments and ensure return probe has no C argument */
+	pev->nargs = argc - 1;
+	pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+	if (pev->args == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	for (i = 0; i < pev->nargs && ret >= 0; i++) {
+		ret = parse_perf_probe_arg(argv[i + 1], &pev->args[i]);
+		if (ret >= 0 &&
+		    is_c_varname(pev->args[i].var) && pev->point.retprobe) {
+			semantic_error("You can't specify local variable for"
+				       " kretprobe.\n");
+			ret = -EINVAL;
+		}
+	}
+out:
+	argv_free(argv);
+
+	return ret;
+}
+
+/* Returns true if *any* ARG is either C variable, $params or $vars. */
+bool perf_probe_with_var(struct perf_probe_event *pev)
+{
+	int i = 0;
+
+	for (i = 0; i < pev->nargs; i++)
+		if (is_c_varname(pev->args[i].var)              ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_PARAMS) ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_VARS))
+			return true;
+	return false;
+}
+
+/* Return true if this perf_probe_event requires debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
+{
+	if (pev->point.file || pev->point.line || pev->point.lazy_line)
+		return true;
+
+	if (perf_probe_with_var(pev))
+		return true;
+
+	return false;
+}
+
+/* Parse probe_events event into struct probe_point */
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
+{
+	struct probe_trace_point *tp = &tev->point;
+	char pr;
+	char *p;
+	char *argv0_str = NULL, *fmt, *fmt1_str, *fmt2_str, *fmt3_str;
+	int ret, i, argc;
+	char **argv;
+
+	pr_debug("Parsing probe_events: %s\n", cmd);
+	argv = argv_split(cmd, &argc);
+	if (!argv) {
+		pr_debug("Failed to split arguments.\n");
+		return -ENOMEM;
+	}
+	if (argc < 2) {
+		semantic_error("Too few probe arguments.\n");
+		ret = -ERANGE;
+		goto out;
+	}
+
+	/* Scan event and group name. */
+	argv0_str = strdup(argv[0]);
+	if (argv0_str == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	fmt1_str = strtok_r(argv0_str, ":", &fmt);
+	fmt2_str = strtok_r(NULL, "/", &fmt);
+	fmt3_str = strtok_r(NULL, " \t", &fmt);
+	if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) {
+		semantic_error("Failed to parse event name: %s\n", argv[0]);
+		ret = -EINVAL;
+		goto out;
+	}
+	pr = fmt1_str[0];
+	tev->group = strdup(fmt2_str);
+	tev->event = strdup(fmt3_str);
+	if (tev->group == NULL || tev->event == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr);
+
+	tp->retprobe = (pr == 'r');
+
+	/* Scan module name(if there), function name and offset */
+	p = strchr(argv[1], ':');
+	if (p) {
+		tp->module = strndup(argv[1], p - argv[1]);
+		if (!tp->module) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		tev->uprobes = (tp->module[0] == '/');
+		p++;
+	} else
+		p = argv[1];
+	fmt1_str = strtok_r(p, "+", &fmt);
+	/* only the address started with 0x */
+	if (fmt1_str[0] == '0')	{
+		/*
+		 * Fix a special case:
+		 * if address == 0, kernel reports something like:
+		 * p:probe_libc/abs_0 /lib/libc-2.18.so:0x          (null) arg1=%ax
+		 * Newer kernel may fix that, but we want to
+		 * support old kernel also.
+		 */
+		if (strcmp(fmt1_str, "0x") == 0) {
+			if (!argv[2] || strcmp(argv[2], "(null)")) {
+				ret = -EINVAL;
+				goto out;
+			}
+			tp->address = 0;
+
+			free(argv[2]);
+			for (i = 2; argv[i + 1] != NULL; i++)
+				argv[i] = argv[i + 1];
+
+			argv[i] = NULL;
+			argc -= 1;
+		} else
+			tp->address = strtoull(fmt1_str, NULL, 0);
+	} else {
+		/* Only the symbol-based probe has offset */
+		tp->symbol = strdup(fmt1_str);
+		if (tp->symbol == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		fmt2_str = strtok_r(NULL, "", &fmt);
+		if (fmt2_str == NULL)
+			tp->offset = 0;
+		else
+			tp->offset = strtoul(fmt2_str, NULL, 10);
+	}
+
+	if (tev->uprobes) {
+		fmt2_str = strchr(p, '(');
+		if (fmt2_str)
+			tp->ref_ctr_offset = strtoul(fmt2_str + 1, NULL, 0);
+	}
+
+	tev->nargs = argc - 2;
+	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+	if (tev->args == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	for (i = 0; i < tev->nargs; i++) {
+		p = strchr(argv[i + 2], '=');
+		if (p)	/* We don't need which register is assigned. */
+			*p++ = '\0';
+		else
+			p = argv[i + 2];
+		tev->args[i].name = strdup(argv[i + 2]);
+		/* TODO: parse regs and offset */
+		tev->args[i].value = strdup(p);
+		if (tev->args[i].name == NULL || tev->args[i].value == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+	ret = 0;
+out:
+	free(argv0_str);
+	argv_free(argv);
+	return ret;
+}
+
+/* Compose only probe arg */
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa)
+{
+	struct perf_probe_arg_field *field = pa->field;
+	struct strbuf buf;
+	char *ret = NULL;
+	int err;
+
+	if (strbuf_init(&buf, 64) < 0)
+		return NULL;
+
+	if (pa->name && pa->var)
+		err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var);
+	else
+		err = strbuf_addstr(&buf, pa->name ?: pa->var);
+	if (err)
+		goto out;
+
+	while (field) {
+		if (field->name[0] == '[')
+			err = strbuf_addstr(&buf, field->name);
+		else
+			err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".",
+					  field->name);
+		field = field->next;
+		if (err)
+			goto out;
+	}
+
+	if (pa->type)
+		if (strbuf_addf(&buf, ":%s", pa->type) < 0)
+			goto out;
+
+	ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
+}
+
+/* Compose only probe point (not argument) */
+char *synthesize_perf_probe_point(struct perf_probe_point *pp)
+{
+	struct strbuf buf;
+	char *tmp, *ret = NULL;
+	int len, err = 0;
+
+	if (strbuf_init(&buf, 64) < 0)
+		return NULL;
+
+	if (pp->function) {
+		if (strbuf_addstr(&buf, pp->function) < 0)
+			goto out;
+		if (pp->offset)
+			err = strbuf_addf(&buf, "+%lu", pp->offset);
+		else if (pp->line)
+			err = strbuf_addf(&buf, ":%d", pp->line);
+		else if (pp->retprobe)
+			err = strbuf_addstr(&buf, "%return");
+		if (err)
+			goto out;
+	}
+	if (pp->file) {
+		tmp = pp->file;
+		len = strlen(tmp);
+		if (len > 30) {
+			tmp = strchr(pp->file + len - 30, '/');
+			tmp = tmp ? tmp + 1 : pp->file + len - 30;
+		}
+		err = strbuf_addf(&buf, "@%s", tmp);
+		if (!err && !pp->function && pp->line)
+			err = strbuf_addf(&buf, ":%d", pp->line);
+	}
+	if (!err)
+		ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
+}
+
+char *synthesize_perf_probe_command(struct perf_probe_event *pev)
+{
+	struct strbuf buf;
+	char *tmp, *ret = NULL;
+	int i;
+
+	if (strbuf_init(&buf, 64))
+		return NULL;
+	if (pev->event)
+		if (strbuf_addf(&buf, "%s:%s=", pev->group ?: PERFPROBE_GROUP,
+				pev->event) < 0)
+			goto out;
+
+	tmp = synthesize_perf_probe_point(&pev->point);
+	if (!tmp || strbuf_addstr(&buf, tmp) < 0)
+		goto out;
+	free(tmp);
+
+	for (i = 0; i < pev->nargs; i++) {
+		tmp = synthesize_perf_probe_arg(pev->args + i);
+		if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0)
+			goto out;
+		free(tmp);
+	}
+
+	ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
+}
+
+static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
+					    struct strbuf *buf, int depth)
+{
+	int err;
+	if (ref->next) {
+		depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
+							 depth + 1);
+		if (depth < 0)
+			return depth;
+	}
+	if (ref->user_access)
+		err = strbuf_addf(buf, "%s%ld(", "+u", ref->offset);
+	else
+		err = strbuf_addf(buf, "%+ld(", ref->offset);
+	return (err < 0) ? err : depth;
+}
+
+static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
+				      struct strbuf *buf)
+{
+	struct probe_trace_arg_ref *ref = arg->ref;
+	int depth = 0, err;
+
+	/* Argument name or separator */
+	if (arg->name)
+		err = strbuf_addf(buf, " %s=", arg->name);
+	else
+		err = strbuf_addch(buf, ' ');
+	if (err)
+		return err;
+
+	/* Special case: @XXX */
+	if (arg->value[0] == '@' && arg->ref)
+			ref = ref->next;
+
+	/* Dereferencing arguments */
+	if (ref) {
+		depth = __synthesize_probe_trace_arg_ref(ref, buf, 1);
+		if (depth < 0)
+			return depth;
+	}
+
+	/* Print argument value */
+	if (arg->value[0] == '@' && arg->ref)
+		err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset);
+	else
+		err = strbuf_addstr(buf, arg->value);
+
+	/* Closing */
+	while (!err && depth--)
+		err = strbuf_addch(buf, ')');
+
+	/* Print argument type */
+	if (!err && arg->type)
+		err = strbuf_addf(buf, ":%s", arg->type);
+
+	return err;
+}
+
+static int
+synthesize_probe_trace_args(struct probe_trace_event *tev, struct strbuf *buf)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < tev->nargs && ret >= 0; i++)
+		ret = synthesize_probe_trace_arg(&tev->args[i], buf);
+
+	return ret;
+}
+
+static int
+synthesize_uprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf)
+{
+	int err;
+
+	/* Uprobes must have tp->module */
+	if (!tp->module)
+		return -EINVAL;
+	/*
+	 * If tp->address == 0, then this point must be a
+	 * absolute address uprobe.
+	 * try_to_find_absolute_address() should have made
+	 * tp->symbol to "0x0".
+	 */
+	if (!tp->address && (!tp->symbol || strcmp(tp->symbol, "0x0")))
+		return -EINVAL;
+
+	/* Use the tp->address for uprobes */
+	err = strbuf_addf(buf, "%s:0x%" PRIx64, tp->module, tp->address);
+
+	if (err >= 0 && tp->ref_ctr_offset) {
+		if (!uprobe_ref_ctr_is_supported())
+			return -EINVAL;
+		err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset);
+	}
+	return err >= 0 ? 0 : err;
+}
+
+static int
+synthesize_kprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf)
+{
+	if (!strncmp(tp->symbol, "0x", 2)) {
+		/* Absolute address. See try_to_find_absolute_address() */
+		return strbuf_addf(buf, "%s%s0x%" PRIx64, tp->module ?: "",
+				  tp->module ? ":" : "", tp->address);
+	} else {
+		return strbuf_addf(buf, "%s%s%s+%lu", tp->module ?: "",
+				tp->module ? ":" : "", tp->symbol, tp->offset);
+	}
+}
+
+char *synthesize_probe_trace_command(struct probe_trace_event *tev)
+{
+	struct probe_trace_point *tp = &tev->point;
+	struct strbuf buf;
+	char *ret = NULL;
+	int err;
+
+	if (strbuf_init(&buf, 32) < 0)
+		return NULL;
+
+	if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
+			tev->group, tev->event) < 0)
+		goto error;
+
+	if (tev->uprobes)
+		err = synthesize_uprobe_trace_def(tp, &buf);
+	else
+		err = synthesize_kprobe_trace_def(tp, &buf);
+
+	if (err >= 0)
+		err = synthesize_probe_trace_args(tev, &buf);
+
+	if (err >= 0)
+		ret = strbuf_detach(&buf, NULL);
+error:
+	strbuf_release(&buf);
+	return ret;
+}
+
+static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
+					  struct perf_probe_point *pp,
+					  bool is_kprobe)
+{
+	struct symbol *sym = NULL;
+	struct map *map = NULL;
+	u64 addr = tp->address;
+	int ret = -ENOENT;
+
+	if (!is_kprobe) {
+		map = dso__new_map(tp->module);
+		if (!map)
+			goto out;
+		sym = map__find_symbol(map, addr);
+	} else {
+		if (tp->symbol && !addr) {
+			if (kernel_get_symbol_address_by_name(tp->symbol,
+						&addr, true, false) < 0)
+				goto out;
+		}
+		if (addr) {
+			addr += tp->offset;
+			sym = machine__find_kernel_symbol(host_machine, addr, &map);
+		}
+	}
+
+	if (!sym)
+		goto out;
+
+	pp->retprobe = tp->retprobe;
+	pp->offset = addr - map->unmap_ip(map, sym->start);
+	pp->function = strdup(sym->name);
+	ret = pp->function ? 0 : -ENOMEM;
+
+out:
+	if (map && !is_kprobe) {
+		map__put(map);
+	}
+
+	return ret;
+}
+
+static int convert_to_perf_probe_point(struct probe_trace_point *tp,
+				       struct perf_probe_point *pp,
+				       bool is_kprobe)
+{
+	char buf[128];
+	int ret;
+
+	ret = find_perf_probe_point_from_dwarf(tp, pp, is_kprobe);
+	if (!ret)
+		return 0;
+	ret = find_perf_probe_point_from_map(tp, pp, is_kprobe);
+	if (!ret)
+		return 0;
+
+	pr_debug("Failed to find probe point from both of dwarf and map.\n");
+
+	if (tp->symbol) {
+		pp->function = strdup(tp->symbol);
+		pp->offset = tp->offset;
+	} else {
+		ret = e_snprintf(buf, 128, "0x%" PRIx64, tp->address);
+		if (ret < 0)
+			return ret;
+		pp->function = strdup(buf);
+		pp->offset = 0;
+	}
+	if (pp->function == NULL)
+		return -ENOMEM;
+
+	pp->retprobe = tp->retprobe;
+
+	return 0;
+}
+
+static int convert_to_perf_probe_event(struct probe_trace_event *tev,
+			       struct perf_probe_event *pev, bool is_kprobe)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int i, ret;
+
+	/* Convert event/group name */
+	pev->event = strdup(tev->event);
+	pev->group = strdup(tev->group);
+	if (pev->event == NULL || pev->group == NULL)
+		return -ENOMEM;
+
+	/* Convert trace_point to probe_point */
+	ret = convert_to_perf_probe_point(&tev->point, &pev->point, is_kprobe);
+	if (ret < 0)
+		return ret;
+
+	/* Convert trace_arg to probe_arg */
+	pev->nargs = tev->nargs;
+	pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+	if (pev->args == NULL)
+		return -ENOMEM;
+	for (i = 0; i < tev->nargs && ret >= 0; i++) {
+		if (tev->args[i].name)
+			pev->args[i].name = strdup(tev->args[i].name);
+		else {
+			if ((ret = strbuf_init(&buf, 32)) < 0)
+				goto error;
+			ret = synthesize_probe_trace_arg(&tev->args[i], &buf);
+			pev->args[i].name = strbuf_detach(&buf, NULL);
+		}
+		if (pev->args[i].name == NULL && ret >= 0)
+			ret = -ENOMEM;
+	}
+error:
+	if (ret < 0)
+		clear_perf_probe_event(pev);
+
+	return ret;
+}
+
+void clear_perf_probe_event(struct perf_probe_event *pev)
+{
+	struct perf_probe_arg_field *field, *next;
+	int i;
+
+	zfree(&pev->event);
+	zfree(&pev->group);
+	zfree(&pev->target);
+	clear_perf_probe_point(&pev->point);
+
+	for (i = 0; i < pev->nargs; i++) {
+		zfree(&pev->args[i].name);
+		zfree(&pev->args[i].var);
+		zfree(&pev->args[i].type);
+		field = pev->args[i].field;
+		while (field) {
+			next = field->next;
+			zfree(&field->name);
+			free(field);
+			field = next;
+		}
+	}
+	pev->nargs = 0;
+	zfree(&pev->args);
+}
+
+#define strdup_or_goto(str, label)	\
+({ char *__p = NULL; if (str && !(__p = strdup(str))) goto label; __p; })
+
+static int perf_probe_point__copy(struct perf_probe_point *dst,
+				  struct perf_probe_point *src)
+{
+	dst->file = strdup_or_goto(src->file, out_err);
+	dst->function = strdup_or_goto(src->function, out_err);
+	dst->lazy_line = strdup_or_goto(src->lazy_line, out_err);
+	dst->line = src->line;
+	dst->retprobe = src->retprobe;
+	dst->offset = src->offset;
+	return 0;
+
+out_err:
+	clear_perf_probe_point(dst);
+	return -ENOMEM;
+}
+
+static int perf_probe_arg__copy(struct perf_probe_arg *dst,
+				struct perf_probe_arg *src)
+{
+	struct perf_probe_arg_field *field, **ppfield;
+
+	dst->name = strdup_or_goto(src->name, out_err);
+	dst->var = strdup_or_goto(src->var, out_err);
+	dst->type = strdup_or_goto(src->type, out_err);
+
+	field = src->field;
+	ppfield = &(dst->field);
+	while (field) {
+		*ppfield = zalloc(sizeof(*field));
+		if (!*ppfield)
+			goto out_err;
+		(*ppfield)->name = strdup_or_goto(field->name, out_err);
+		(*ppfield)->index = field->index;
+		(*ppfield)->ref = field->ref;
+		field = field->next;
+		ppfield = &((*ppfield)->next);
+	}
+	return 0;
+out_err:
+	return -ENOMEM;
+}
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+			   struct perf_probe_event *src)
+{
+	int i;
+
+	dst->event = strdup_or_goto(src->event, out_err);
+	dst->group = strdup_or_goto(src->group, out_err);
+	dst->target = strdup_or_goto(src->target, out_err);
+	dst->uprobes = src->uprobes;
+
+	if (perf_probe_point__copy(&dst->point, &src->point) < 0)
+		goto out_err;
+
+	dst->args = zalloc(sizeof(struct perf_probe_arg) * src->nargs);
+	if (!dst->args)
+		goto out_err;
+	dst->nargs = src->nargs;
+
+	for (i = 0; i < src->nargs; i++)
+		if (perf_probe_arg__copy(&dst->args[i], &src->args[i]) < 0)
+			goto out_err;
+	return 0;
+
+out_err:
+	clear_perf_probe_event(dst);
+	return -ENOMEM;
+}
+
+void clear_probe_trace_event(struct probe_trace_event *tev)
+{
+	struct probe_trace_arg_ref *ref, *next;
+	int i;
+
+	zfree(&tev->event);
+	zfree(&tev->group);
+	zfree(&tev->point.symbol);
+	zfree(&tev->point.realname);
+	zfree(&tev->point.module);
+	for (i = 0; i < tev->nargs; i++) {
+		zfree(&tev->args[i].name);
+		zfree(&tev->args[i].value);
+		zfree(&tev->args[i].type);
+		ref = tev->args[i].ref;
+		while (ref) {
+			next = ref->next;
+			free(ref);
+			ref = next;
+		}
+	}
+	zfree(&tev->args);
+	tev->nargs = 0;
+}
+
+struct kprobe_blacklist_node {
+	struct list_head list;
+	u64 start;
+	u64 end;
+	char *symbol;
+};
+
+static void kprobe_blacklist__delete(struct list_head *blacklist)
+{
+	struct kprobe_blacklist_node *node;
+
+	while (!list_empty(blacklist)) {
+		node = list_first_entry(blacklist,
+					struct kprobe_blacklist_node, list);
+		list_del_init(&node->list);
+		zfree(&node->symbol);
+		free(node);
+	}
+}
+
+static int kprobe_blacklist__load(struct list_head *blacklist)
+{
+	struct kprobe_blacklist_node *node;
+	const char *__debugfs = debugfs__mountpoint();
+	char buf[PATH_MAX], *p;
+	FILE *fp;
+	int ret;
+
+	if (__debugfs == NULL)
+		return -ENOTSUP;
+
+	ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs);
+	if (ret < 0)
+		return ret;
+
+	fp = fopen(buf, "r");
+	if (!fp)
+		return -errno;
+
+	ret = 0;
+	while (fgets(buf, PATH_MAX, fp)) {
+		node = zalloc(sizeof(*node));
+		if (!node) {
+			ret = -ENOMEM;
+			break;
+		}
+		INIT_LIST_HEAD(&node->list);
+		list_add_tail(&node->list, blacklist);
+		if (sscanf(buf, "0x%" PRIx64 "-0x%" PRIx64, &node->start, &node->end) != 2) {
+			ret = -EINVAL;
+			break;
+		}
+		p = strchr(buf, '\t');
+		if (p) {
+			p++;
+			if (p[strlen(p) - 1] == '\n')
+				p[strlen(p) - 1] = '\0';
+		} else
+			p = (char *)"unknown";
+		node->symbol = strdup(p);
+		if (!node->symbol) {
+			ret = -ENOMEM;
+			break;
+		}
+		pr_debug2("Blacklist: 0x%" PRIx64 "-0x%" PRIx64 ", %s\n",
+			  node->start, node->end, node->symbol);
+		ret++;
+	}
+	if (ret < 0)
+		kprobe_blacklist__delete(blacklist);
+	fclose(fp);
+
+	return ret;
+}
+
+static struct kprobe_blacklist_node *
+kprobe_blacklist__find_by_address(struct list_head *blacklist, u64 address)
+{
+	struct kprobe_blacklist_node *node;
+
+	list_for_each_entry(node, blacklist, list) {
+		if (node->start <= address && address < node->end)
+			return node;
+	}
+
+	return NULL;
+}
+
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+	if (!list_empty(&kprobe_blacklist))
+		return;
+
+	if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+		pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+	kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(u64 address)
+{
+	return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
+static int perf_probe_event__sprintf(const char *group, const char *event,
+				     struct perf_probe_event *pev,
+				     const char *module,
+				     struct strbuf *result)
+{
+	int i, ret;
+	char *buf;
+
+	if (asprintf(&buf, "%s:%s", group, event) < 0)
+		return -errno;
+	ret = strbuf_addf(result, "  %-20s (on ", buf);
+	free(buf);
+	if (ret)
+		return ret;
+
+	/* Synthesize only event probe point */
+	buf = synthesize_perf_probe_point(&pev->point);
+	if (!buf)
+		return -ENOMEM;
+	ret = strbuf_addstr(result, buf);
+	free(buf);
+
+	if (!ret && module)
+		ret = strbuf_addf(result, " in %s", module);
+
+	if (!ret && pev->nargs > 0) {
+		ret = strbuf_add(result, " with", 5);
+		for (i = 0; !ret && i < pev->nargs; i++) {
+			buf = synthesize_perf_probe_arg(&pev->args[i]);
+			if (!buf)
+				return -ENOMEM;
+			ret = strbuf_addf(result, " %s", buf);
+			free(buf);
+		}
+	}
+	if (!ret)
+		ret = strbuf_addch(result, ')');
+
+	return ret;
+}
+
+/* Show an event */
+int show_perf_probe_event(const char *group, const char *event,
+			  struct perf_probe_event *pev,
+			  const char *module, bool use_stdout)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int ret;
+
+	ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
+	if (ret >= 0) {
+		if (use_stdout)
+			printf("%s\n", buf.buf);
+		else
+			pr_info("%s\n", buf.buf);
+	}
+	strbuf_release(&buf);
+
+	return ret;
+}
+
+static bool filter_probe_trace_event(struct probe_trace_event *tev,
+				     struct strfilter *filter)
+{
+	char tmp[128];
+
+	/* At first, check the event name itself */
+	if (strfilter__compare(filter, tev->event))
+		return true;
+
+	/* Next, check the combination of name and group */
+	if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
+		return false;
+	return strfilter__compare(filter, tmp);
+}
+
+static int __show_perf_probe_events(int fd, bool is_kprobe,
+				    struct strfilter *filter)
+{
+	int ret = 0;
+	struct probe_trace_event tev;
+	struct perf_probe_event pev;
+	struct strlist *rawlist;
+	struct str_node *ent;
+
+	memset(&tev, 0, sizeof(tev));
+	memset(&pev, 0, sizeof(pev));
+
+	rawlist = probe_file__get_rawlist(fd);
+	if (!rawlist)
+		return -ENOMEM;
+
+	strlist__for_each_entry(ent, rawlist) {
+		ret = parse_probe_trace_command(ent->s, &tev);
+		if (ret >= 0) {
+			if (!filter_probe_trace_event(&tev, filter))
+				goto next;
+			ret = convert_to_perf_probe_event(&tev, &pev,
+								is_kprobe);
+			if (ret < 0)
+				goto next;
+			ret = show_perf_probe_event(pev.group, pev.event,
+						    &pev, tev.point.module,
+						    true);
+		}
+next:
+		clear_perf_probe_event(&pev);
+		clear_probe_trace_event(&tev);
+		if (ret < 0)
+			break;
+	}
+	strlist__delete(rawlist);
+	/* Cleanup cached debuginfo if needed */
+	debuginfo_cache__exit();
+
+	return ret;
+}
+
+/* List up current perf-probe events */
+int show_perf_probe_events(struct strfilter *filter)
+{
+	int kp_fd, up_fd, ret;
+
+	setup_pager();
+
+	if (probe_conf.cache)
+		return probe_cache__show_all_caches(filter);
+
+	ret = init_probe_symbol_maps(false);
+	if (ret < 0)
+		return ret;
+
+	ret = probe_file__open_both(&kp_fd, &up_fd, 0);
+	if (ret < 0)
+		return ret;
+
+	if (kp_fd >= 0)
+		ret = __show_perf_probe_events(kp_fd, true, filter);
+	if (up_fd >= 0 && ret >= 0)
+		ret = __show_perf_probe_events(up_fd, false, filter);
+	if (kp_fd > 0)
+		close(kp_fd);
+	if (up_fd > 0)
+		close(up_fd);
+	exit_probe_symbol_maps();
+
+	return ret;
+}
+
+static int get_new_event_name(char *buf, size_t len, const char *base,
+			      struct strlist *namelist, bool ret_event,
+			      bool allow_suffix)
+{
+	int i, ret;
+	char *p, *nbase;
+
+	if (*base == '.')
+		base++;
+	nbase = strdup(base);
+	if (!nbase)
+		return -ENOMEM;
+
+	/* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
+	p = strpbrk(nbase, ".@");
+	if (p && p != nbase)
+		*p = '\0';
+
+	/* Try no suffix number */
+	ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
+	if (ret < 0) {
+		pr_debug("snprintf() failed: %d\n", ret);
+		goto out;
+	}
+	if (!strlist__has_entry(namelist, buf))
+		goto out;
+
+	if (!allow_suffix) {
+		pr_warning("Error: event \"%s\" already exists.\n"
+			   " Hint: Remove existing event by 'perf probe -d'\n"
+			   "       or force duplicates by 'perf probe -f'\n"
+			   "       or set 'force=yes' in BPF source.\n",
+			   buf);
+		ret = -EEXIST;
+		goto out;
+	}
+
+	/* Try to add suffix */
+	for (i = 1; i < MAX_EVENT_INDEX; i++) {
+		ret = e_snprintf(buf, len, "%s_%d", nbase, i);
+		if (ret < 0) {
+			pr_debug("snprintf() failed: %d\n", ret);
+			goto out;
+		}
+		if (!strlist__has_entry(namelist, buf))
+			break;
+	}
+	if (i == MAX_EVENT_INDEX) {
+		pr_warning("Too many events are on the same function.\n");
+		ret = -ERANGE;
+	}
+
+out:
+	free(nbase);
+
+	/* Final validation */
+	if (ret >= 0 && !is_c_func_name(buf)) {
+		pr_warning("Internal error: \"%s\" is an invalid event name.\n",
+			   buf);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/* Warn if the current kernel's uprobe implementation is old */
+static void warn_uprobe_event_compat(struct probe_trace_event *tev)
+{
+	int i;
+	char *buf = synthesize_probe_trace_command(tev);
+	struct probe_trace_point *tp = &tev->point;
+
+	if (tp->ref_ctr_offset && !uprobe_ref_ctr_is_supported()) {
+		pr_warning("A semaphore is associated with %s:%s and "
+			   "seems your kernel doesn't support it.\n",
+			   tev->group, tev->event);
+	}
+
+	/* Old uprobe event doesn't support memory dereference */
+	if (!tev->uprobes || tev->nargs == 0 || !buf)
+		goto out;
+
+	for (i = 0; i < tev->nargs; i++)
+		if (strglobmatch(tev->args[i].value, "[$@+-]*")) {
+			pr_warning("Please upgrade your kernel to at least "
+				   "3.14 to have access to feature %s\n",
+				   tev->args[i].value);
+			break;
+		}
+out:
+	free(buf);
+}
+
+/* Set new name from original perf_probe_event and namelist */
+static int probe_trace_event__set_name(struct probe_trace_event *tev,
+				       struct perf_probe_event *pev,
+				       struct strlist *namelist,
+				       bool allow_suffix)
+{
+	const char *event, *group;
+	char buf[64];
+	int ret;
+
+	/* If probe_event or trace_event already have the name, reuse it */
+	if (pev->event && !pev->sdt)
+		event = pev->event;
+	else if (tev->event)
+		event = tev->event;
+	else {
+		/* Or generate new one from probe point */
+		if (pev->point.function &&
+			(strncmp(pev->point.function, "0x", 2) != 0) &&
+			!strisglob(pev->point.function))
+			event = pev->point.function;
+		else
+			event = tev->point.realname;
+	}
+	if (pev->group && !pev->sdt)
+		group = pev->group;
+	else if (tev->group)
+		group = tev->group;
+	else
+		group = PERFPROBE_GROUP;
+
+	/* Get an unused new event name */
+	ret = get_new_event_name(buf, 64, event, namelist,
+				 tev->point.retprobe, allow_suffix);
+	if (ret < 0)
+		return ret;
+
+	event = buf;
+
+	tev->event = strdup(event);
+	tev->group = strdup(group);
+	if (tev->event == NULL || tev->group == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Add new event name to namelist if multiprobe event is NOT
+	 * supported, since we have to use new event name for following
+	 * probes in that case.
+	 */
+	if (!multiprobe_event_is_supported())
+		strlist__add(namelist, event);
+	return 0;
+}
+
+static int __open_probe_file_and_namelist(bool uprobe,
+					  struct strlist **namelist)
+{
+	int fd;
+
+	fd = probe_file__open(PF_FL_RW | (uprobe ? PF_FL_UPROBE : 0));
+	if (fd < 0)
+		return fd;
+
+	/* Get current event names */
+	*namelist = probe_file__get_namelist(fd);
+	if (!(*namelist)) {
+		pr_debug("Failed to get current event list.\n");
+		close(fd);
+		return -ENOMEM;
+	}
+	return fd;
+}
+
+static int __add_probe_trace_events(struct perf_probe_event *pev,
+				     struct probe_trace_event *tevs,
+				     int ntevs, bool allow_suffix)
+{
+	int i, fd[2] = {-1, -1}, up, ret;
+	struct probe_trace_event *tev = NULL;
+	struct probe_cache *cache = NULL;
+	struct strlist *namelist[2] = {NULL, NULL};
+	struct nscookie nsc;
+
+	up = pev->uprobes ? 1 : 0;
+	fd[up] = __open_probe_file_and_namelist(up, &namelist[up]);
+	if (fd[up] < 0)
+		return fd[up];
+
+	ret = 0;
+	for (i = 0; i < ntevs; i++) {
+		tev = &tevs[i];
+		up = tev->uprobes ? 1 : 0;
+		if (fd[up] == -1) {	/* Open the kprobe/uprobe_events */
+			fd[up] = __open_probe_file_and_namelist(up,
+								&namelist[up]);
+			if (fd[up] < 0)
+				goto close_out;
+		}
+		/* Skip if the symbol is out of .text or blacklisted */
+		if (!tev->point.symbol && !pev->uprobes)
+			continue;
+
+		/* Set new name for tev (and update namelist) */
+		ret = probe_trace_event__set_name(tev, pev, namelist[up],
+						  allow_suffix);
+		if (ret < 0)
+			break;
+
+		nsinfo__mountns_enter(pev->nsi, &nsc);
+		ret = probe_file__add_event(fd[up], tev);
+		nsinfo__mountns_exit(&nsc);
+		if (ret < 0)
+			break;
+
+		/*
+		 * Probes after the first probe which comes from same
+		 * user input are always allowed to add suffix, because
+		 * there might be several addresses corresponding to
+		 * one code line.
+		 */
+		allow_suffix = true;
+	}
+	if (ret == -EINVAL && pev->uprobes)
+		warn_uprobe_event_compat(tev);
+	if (ret == 0 && probe_conf.cache) {
+		cache = probe_cache__new(pev->target, pev->nsi);
+		if (!cache ||
+		    probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 ||
+		    probe_cache__commit(cache) < 0)
+			pr_warning("Failed to add event to probe cache\n");
+		probe_cache__delete(cache);
+	}
+
+close_out:
+	for (up = 0; up < 2; up++) {
+		strlist__delete(namelist[up]);
+		if (fd[up] >= 0)
+			close(fd[up]);
+	}
+	return ret;
+}
+
+static int find_probe_functions(struct map *map, char *name,
+				struct symbol **syms)
+{
+	int found = 0;
+	struct symbol *sym;
+	struct rb_node *tmp;
+	const char *norm, *ver;
+	char *buf = NULL;
+	bool cut_version = true;
+
+	if (map__load(map) < 0)
+		return -EACCES;	/* Possible permission error to load symbols */
+
+	/* If user gives a version, don't cut off the version from symbols */
+	if (strchr(name, '@'))
+		cut_version = false;
+
+	map__for_each_symbol(map, sym, tmp) {
+		norm = arch__normalize_symbol_name(sym->name);
+		if (!norm)
+			continue;
+
+		if (cut_version) {
+			/* We don't care about default symbol or not */
+			ver = strchr(norm, '@');
+			if (ver) {
+				buf = strndup(norm, ver - norm);
+				if (!buf)
+					return -ENOMEM;
+				norm = buf;
+			}
+		}
+
+		if (strglobmatch(norm, name)) {
+			found++;
+			if (syms && found < probe_conf.max_probes)
+				syms[found - 1] = sym;
+		}
+		if (buf)
+			zfree(&buf);
+	}
+
+	return found;
+}
+
+void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
+				struct probe_trace_event *tev __maybe_unused,
+				struct map *map __maybe_unused,
+				struct symbol *sym __maybe_unused) { }
+
+
+static void pr_kallsyms_access_error(void)
+{
+	pr_err("Please ensure you can read the /proc/kallsyms symbol addresses.\n"
+	       "If /proc/sys/kernel/kptr_restrict is '2', you can not read\n"
+	       "kernel symbol addresses even if you are a superuser. Please change\n"
+	       "it to '1'. If kptr_restrict is '1', the superuser can read the\n"
+	       "symbol addresses.\n"
+	       "In that case, please run this command again with sudo.\n");
+}
+
+/*
+ * Find probe function addresses from map.
+ * Return an error or the number of found probe_trace_event
+ */
+static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
+					    struct probe_trace_event **tevs)
+{
+	struct map *map = NULL;
+	struct ref_reloc_sym *reloc_sym = NULL;
+	struct symbol *sym;
+	struct symbol **syms = NULL;
+	struct probe_trace_event *tev;
+	struct perf_probe_point *pp = &pev->point;
+	struct probe_trace_point *tp;
+	int num_matched_functions;
+	int ret, i, j, skipped = 0;
+	char *mod_name;
+
+	map = get_target_map(pev->target, pev->nsi, pev->uprobes);
+	if (!map) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
+	if (!syms) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Load matched symbols: Since the different local symbols may have
+	 * same name but different addresses, this lists all the symbols.
+	 */
+	num_matched_functions = find_probe_functions(map, pp->function, syms);
+	if (num_matched_functions <= 0) {
+		if (num_matched_functions == -EACCES) {
+			pr_err("Failed to load symbols from %s\n",
+			       pev->target ?: "/proc/kallsyms");
+			if (pev->target)
+				pr_err("Please ensure the file is not stripped.\n");
+			else
+				pr_kallsyms_access_error();
+		} else
+			pr_err("Failed to find symbol %s in %s\n", pp->function,
+				pev->target ? : "kernel");
+		ret = -ENOENT;
+		goto out;
+	} else if (num_matched_functions > probe_conf.max_probes) {
+		pr_err("Too many functions matched in %s\n",
+			pev->target ? : "kernel");
+		ret = -E2BIG;
+		goto out;
+	}
+
+	/* Note that the symbols in the kmodule are not relocated */
+	if (!pev->uprobes && !pev->target &&
+			(!pp->retprobe || kretprobe_offset_is_supported())) {
+		reloc_sym = kernel_get_ref_reloc_sym(NULL);
+		if (!reloc_sym) {
+			pr_warning("Relocated base symbol is not found! "
+				   "Check /proc/sys/kernel/kptr_restrict\n"
+				   "and /proc/sys/kernel/perf_event_paranoid. "
+				   "Or run as privileged perf user.\n\n");
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* Setup result trace-probe-events */
+	*tevs = zalloc(sizeof(*tev) * num_matched_functions);
+	if (!*tevs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = 0;
+
+	for (j = 0; j < num_matched_functions; j++) {
+		sym = syms[j];
+
+		if (sym->type != STT_FUNC)
+			continue;
+
+		/* There can be duplicated symbols in the map */
+		for (i = 0; i < j; i++)
+			if (sym->start == syms[i]->start) {
+				pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n",
+					 sym->name, sym->start);
+				break;
+			}
+		if (i != j)
+			continue;
+
+		tev = (*tevs) + ret;
+		tp = &tev->point;
+		if (ret == num_matched_functions) {
+			pr_warning("Too many symbols are listed. Skip it.\n");
+			break;
+		}
+		ret++;
+
+		if (pp->offset > sym->end - sym->start) {
+			pr_warning("Offset %ld is bigger than the size of %s\n",
+				   pp->offset, sym->name);
+			ret = -ENOENT;
+			goto err_out;
+		}
+		/* Add one probe point */
+		tp->address = map->unmap_ip(map, sym->start) + pp->offset;
+
+		/* Check the kprobe (not in module) is within .text  */
+		if (!pev->uprobes && !pev->target &&
+		    kprobe_warn_out_range(sym->name, tp->address)) {
+			tp->symbol = NULL;	/* Skip it */
+			skipped++;
+		} else if (reloc_sym) {
+			tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
+			tp->offset = tp->address - reloc_sym->addr;
+		} else {
+			tp->symbol = strdup_or_goto(sym->name, nomem_out);
+			tp->offset = pp->offset;
+		}
+		tp->realname = strdup_or_goto(sym->name, nomem_out);
+
+		tp->retprobe = pp->retprobe;
+		if (pev->target) {
+			if (pev->uprobes) {
+				tev->point.module = strdup_or_goto(pev->target,
+								   nomem_out);
+			} else {
+				mod_name = find_module_name(pev->target);
+				tev->point.module =
+					strdup(mod_name ? mod_name : pev->target);
+				free(mod_name);
+				if (!tev->point.module)
+					goto nomem_out;
+			}
+		}
+		tev->uprobes = pev->uprobes;
+		tev->nargs = pev->nargs;
+		if (tev->nargs) {
+			tev->args = zalloc(sizeof(struct probe_trace_arg) *
+					   tev->nargs);
+			if (tev->args == NULL)
+				goto nomem_out;
+		}
+		for (i = 0; i < tev->nargs; i++) {
+			if (pev->args[i].name)
+				tev->args[i].name =
+					strdup_or_goto(pev->args[i].name,
+							nomem_out);
+
+			tev->args[i].value = strdup_or_goto(pev->args[i].var,
+							    nomem_out);
+			if (pev->args[i].type)
+				tev->args[i].type =
+					strdup_or_goto(pev->args[i].type,
+							nomem_out);
+		}
+		arch__fix_tev_from_maps(pev, tev, map, sym);
+	}
+	if (ret == skipped) {
+		ret = -ENOENT;
+		goto err_out;
+	}
+
+out:
+	map__put(map);
+	free(syms);
+	return ret;
+
+nomem_out:
+	ret = -ENOMEM;
+err_out:
+	clear_probe_trace_events(*tevs, num_matched_functions);
+	zfree(tevs);
+	goto out;
+}
+
+static int try_to_find_absolute_address(struct perf_probe_event *pev,
+					struct probe_trace_event **tevs)
+{
+	struct perf_probe_point *pp = &pev->point;
+	struct probe_trace_event *tev;
+	struct probe_trace_point *tp;
+	int i, err;
+
+	if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2)))
+		return -EINVAL;
+	if (perf_probe_event_need_dwarf(pev))
+		return -EINVAL;
+
+	/*
+	 * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at
+	 * absolute address.
+	 *
+	 * Only one tev can be generated by this.
+	 */
+	*tevs = zalloc(sizeof(*tev));
+	if (!*tevs)
+		return -ENOMEM;
+
+	tev = *tevs;
+	tp = &tev->point;
+
+	/*
+	 * Don't use tp->offset, use address directly, because
+	 * in synthesize_probe_trace_command() address cannot be
+	 * zero.
+	 */
+	tp->address = pev->point.abs_address;
+	tp->retprobe = pp->retprobe;
+	tev->uprobes = pev->uprobes;
+
+	err = -ENOMEM;
+	/*
+	 * Give it a '0x' leading symbol name.
+	 * In __add_probe_trace_events, a NULL symbol is interpreted as
+	 * invalid.
+	 */
+	if (asprintf(&tp->symbol, "0x%" PRIx64, tp->address) < 0)
+		goto errout;
+
+	/* For kprobe, check range */
+	if ((!tev->uprobes) &&
+	    (kprobe_warn_out_range(tev->point.symbol,
+				   tev->point.address))) {
+		err = -EACCES;
+		goto errout;
+	}
+
+	if (asprintf(&tp->realname, "abs_%" PRIx64, tp->address) < 0)
+		goto errout;
+
+	if (pev->target) {
+		tp->module = strdup(pev->target);
+		if (!tp->module)
+			goto errout;
+	}
+
+	if (tev->group) {
+		tev->group = strdup(pev->group);
+		if (!tev->group)
+			goto errout;
+	}
+
+	if (pev->event) {
+		tev->event = strdup(pev->event);
+		if (!tev->event)
+			goto errout;
+	}
+
+	tev->nargs = pev->nargs;
+	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+	if (!tev->args)
+		goto errout;
+
+	for (i = 0; i < tev->nargs; i++)
+		copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]);
+
+	return 1;
+
+errout:
+	clear_probe_trace_events(*tevs, 1);
+	*tevs = NULL;
+	return err;
+}
+
+/* Concatenate two arrays */
+static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
+{
+	void *ret;
+
+	ret = malloc(sz_a + sz_b);
+	if (ret) {
+		memcpy(ret, a, sz_a);
+		memcpy(ret + sz_a, b, sz_b);
+	}
+	return ret;
+}
+
+static int
+concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs,
+			  struct probe_trace_event **tevs2, int ntevs2)
+{
+	struct probe_trace_event *new_tevs;
+	int ret = 0;
+
+	if (*ntevs == 0) {
+		*tevs = *tevs2;
+		*ntevs = ntevs2;
+		*tevs2 = NULL;
+		return 0;
+	}
+
+	if (*ntevs + ntevs2 > probe_conf.max_probes)
+		ret = -E2BIG;
+	else {
+		/* Concatenate the array of probe_trace_event */
+		new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs),
+				  *tevs2, ntevs2 * sizeof(**tevs2));
+		if (!new_tevs)
+			ret = -ENOMEM;
+		else {
+			free(*tevs);
+			*tevs = new_tevs;
+			*ntevs += ntevs2;
+		}
+	}
+	if (ret < 0)
+		clear_probe_trace_events(*tevs2, ntevs2);
+	zfree(tevs2);
+
+	return ret;
+}
+
+/*
+ * Try to find probe_trace_event from given probe caches. Return the number
+ * of cached events found, if an error occurs return the error.
+ */
+static int find_cached_events(struct perf_probe_event *pev,
+			      struct probe_trace_event **tevs,
+			      const char *target)
+{
+	struct probe_cache *cache;
+	struct probe_cache_entry *entry;
+	struct probe_trace_event *tmp_tevs = NULL;
+	int ntevs = 0;
+	int ret = 0;
+
+	cache = probe_cache__new(target, pev->nsi);
+	/* Return 0 ("not found") if the target has no probe cache. */
+	if (!cache)
+		return 0;
+
+	for_each_probe_cache_entry(entry, cache) {
+		/* Skip the cache entry which has no name */
+		if (!entry->pev.event || !entry->pev.group)
+			continue;
+		if ((!pev->group || strglobmatch(entry->pev.group, pev->group)) &&
+		    strglobmatch(entry->pev.event, pev->event)) {
+			ret = probe_cache_entry__get_event(entry, &tmp_tevs);
+			if (ret > 0)
+				ret = concat_probe_trace_events(tevs, &ntevs,
+								&tmp_tevs, ret);
+			if (ret < 0)
+				break;
+		}
+	}
+	probe_cache__delete(cache);
+	if (ret < 0) {
+		clear_probe_trace_events(*tevs, ntevs);
+		zfree(tevs);
+	} else {
+		ret = ntevs;
+		if (ntevs > 0 && target && target[0] == '/')
+			pev->uprobes = true;
+	}
+
+	return ret;
+}
+
+/* Try to find probe_trace_event from all probe caches */
+static int find_cached_events_all(struct perf_probe_event *pev,
+				   struct probe_trace_event **tevs)
+{
+	struct probe_trace_event *tmp_tevs = NULL;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *pathname;
+	int ntevs = 0;
+	int ret;
+
+	/* Get the buildid list of all valid caches */
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		ret = -errno;
+		pr_debug("Failed to get buildids: %d\n", ret);
+		return ret;
+	}
+
+	ret = 0;
+	strlist__for_each_entry(nd, bidlist) {
+		pathname = build_id_cache__origname(nd->s);
+		ret = find_cached_events(pev, &tmp_tevs, pathname);
+		/* In the case of cnt == 0, we just skip it */
+		if (ret > 0)
+			ret = concat_probe_trace_events(tevs, &ntevs,
+							&tmp_tevs, ret);
+		free(pathname);
+		if (ret < 0)
+			break;
+	}
+	strlist__delete(bidlist);
+
+	if (ret < 0) {
+		clear_probe_trace_events(*tevs, ntevs);
+		zfree(tevs);
+	} else
+		ret = ntevs;
+
+	return ret;
+}
+
+static int find_probe_trace_events_from_cache(struct perf_probe_event *pev,
+					      struct probe_trace_event **tevs)
+{
+	struct probe_cache *cache;
+	struct probe_cache_entry *entry;
+	struct probe_trace_event *tev;
+	struct str_node *node;
+	int ret, i;
+
+	if (pev->sdt) {
+		/* For SDT/cached events, we use special search functions */
+		if (!pev->target)
+			return find_cached_events_all(pev, tevs);
+		else
+			return find_cached_events(pev, tevs, pev->target);
+	}
+	cache = probe_cache__new(pev->target, pev->nsi);
+	if (!cache)
+		return 0;
+
+	entry = probe_cache__find(cache, pev);
+	if (!entry) {
+		/* SDT must be in the cache */
+		ret = pev->sdt ? -ENOENT : 0;
+		goto out;
+	}
+
+	ret = strlist__nr_entries(entry->tevlist);
+	if (ret > probe_conf.max_probes) {
+		pr_debug("Too many entries matched in the cache of %s\n",
+			 pev->target ? : "kernel");
+		ret = -E2BIG;
+		goto out;
+	}
+
+	*tevs = zalloc(ret * sizeof(*tev));
+	if (!*tevs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	i = 0;
+	strlist__for_each_entry(node, entry->tevlist) {
+		tev = &(*tevs)[i++];
+		ret = parse_probe_trace_command(node->s, tev);
+		if (ret < 0)
+			goto out;
+		/* Set the uprobes attribute as same as original */
+		tev->uprobes = pev->uprobes;
+	}
+	ret = i;
+
+out:
+	probe_cache__delete(cache);
+	return ret;
+}
+
+static int convert_to_probe_trace_events(struct perf_probe_event *pev,
+					 struct probe_trace_event **tevs)
+{
+	int ret;
+
+	if (!pev->group && !pev->sdt) {
+		/* Set group name if not given */
+		if (!pev->uprobes) {
+			pev->group = strdup(PERFPROBE_GROUP);
+			ret = pev->group ? 0 : -ENOMEM;
+		} else
+			ret = convert_exec_to_group(pev->target, &pev->group);
+		if (ret != 0) {
+			pr_warning("Failed to make a group name.\n");
+			return ret;
+		}
+	}
+
+	ret = try_to_find_absolute_address(pev, tevs);
+	if (ret > 0)
+		return ret;
+
+	/* At first, we need to lookup cache entry */
+	ret = find_probe_trace_events_from_cache(pev, tevs);
+	if (ret > 0 || pev->sdt)	/* SDT can be found only in the cache */
+		return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
+
+	/* Convert perf_probe_event with debuginfo */
+	ret = try_to_find_probe_trace_events(pev, tevs);
+	if (ret != 0)
+		return ret;	/* Found in debuginfo or got an error */
+
+	return find_probe_trace_events_from_map(pev, tevs);
+}
+
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int i, ret;
+
+	/* Loop 1: convert all events */
+	for (i = 0; i < npevs; i++) {
+		/* Init kprobe blacklist if needed */
+		if (!pevs[i].uprobes)
+			kprobe_blacklist__init();
+		/* Convert with or without debuginfo */
+		ret  = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs);
+		if (ret < 0)
+			return ret;
+		pevs[i].ntevs = ret;
+	}
+	/* This just release blacklist only if allocated */
+	kprobe_blacklist__release();
+
+	return 0;
+}
+
+static int show_probe_trace_event(struct probe_trace_event *tev)
+{
+	char *buf = synthesize_probe_trace_command(tev);
+
+	if (!buf) {
+		pr_debug("Failed to synthesize probe trace event.\n");
+		return -EINVAL;
+	}
+
+	/* Showing definition always go stdout */
+	printf("%s\n", buf);
+	free(buf);
+
+	return 0;
+}
+
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
+{
+	struct strlist *namelist = strlist__new(NULL, NULL);
+	struct probe_trace_event *tev;
+	struct perf_probe_event *pev;
+	int i, j, ret = 0;
+
+	if (!namelist)
+		return -ENOMEM;
+
+	for (j = 0; j < npevs && !ret; j++) {
+		pev = &pevs[j];
+		for (i = 0; i < pev->ntevs && !ret; i++) {
+			tev = &pev->tevs[i];
+			/* Skip if the symbol is out of .text or blacklisted */
+			if (!tev->point.symbol && !pev->uprobes)
+				continue;
+
+			/* Set new name for tev (and update namelist) */
+			ret = probe_trace_event__set_name(tev, pev,
+							  namelist, true);
+			if (!ret)
+				ret = show_probe_trace_event(tev);
+		}
+	}
+	strlist__delete(namelist);
+
+	return ret;
+}
+
+static int show_bootconfig_event(struct probe_trace_event *tev)
+{
+	struct probe_trace_point *tp = &tev->point;
+	struct strbuf buf;
+	char *ret = NULL;
+	int err;
+
+	if (strbuf_init(&buf, 32) < 0)
+		return -ENOMEM;
+
+	err = synthesize_kprobe_trace_def(tp, &buf);
+	if (err >= 0)
+		err = synthesize_probe_trace_args(tev, &buf);
+	if (err >= 0)
+		ret = strbuf_detach(&buf, NULL);
+	strbuf_release(&buf);
+
+	if (ret) {
+		printf("'%s'", ret);
+		free(ret);
+	}
+
+	return err;
+}
+
+int show_bootconfig_events(struct perf_probe_event *pevs, int npevs)
+{
+	struct strlist *namelist = strlist__new(NULL, NULL);
+	struct probe_trace_event *tev;
+	struct perf_probe_event *pev;
+	char *cur_name = NULL;
+	int i, j, ret = 0;
+
+	if (!namelist)
+		return -ENOMEM;
+
+	for (j = 0; j < npevs && !ret; j++) {
+		pev = &pevs[j];
+		if (pev->group && strcmp(pev->group, "probe"))
+			pr_warning("WARN: Group name %s is ignored\n", pev->group);
+		if (pev->uprobes) {
+			pr_warning("ERROR: Bootconfig doesn't support uprobes\n");
+			ret = -EINVAL;
+			break;
+		}
+		for (i = 0; i < pev->ntevs && !ret; i++) {
+			tev = &pev->tevs[i];
+			/* Skip if the symbol is out of .text or blacklisted */
+			if (!tev->point.symbol && !pev->uprobes)
+				continue;
+
+			/* Set new name for tev (and update namelist) */
+			ret = probe_trace_event__set_name(tev, pev,
+							  namelist, true);
+			if (ret)
+				break;
+
+			if (!cur_name || strcmp(cur_name, tev->event)) {
+				printf("%sftrace.event.kprobes.%s.probe = ",
+					cur_name ? "\n" : "", tev->event);
+				cur_name = tev->event;
+			} else
+				printf(", ");
+			ret = show_bootconfig_event(tev);
+		}
+	}
+	printf("\n");
+	strlist__delete(namelist);
+
+	return ret;
+}
+
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int i, ret = 0;
+
+	/* Loop 2: add all events */
+	for (i = 0; i < npevs; i++) {
+		ret = __add_probe_trace_events(&pevs[i], pevs[i].tevs,
+					       pevs[i].ntevs,
+					       probe_conf.force_add);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int i, j;
+	struct perf_probe_event *pev;
+
+	/* Loop 3: cleanup and free trace events  */
+	for (i = 0; i < npevs; i++) {
+		pev = &pevs[i];
+		for (j = 0; j < pevs[i].ntevs; j++)
+			clear_probe_trace_event(&pevs[i].tevs[j]);
+		zfree(&pevs[i].tevs);
+		pevs[i].ntevs = 0;
+		nsinfo__zput(pev->nsi);
+		clear_perf_probe_event(&pevs[i]);
+	}
+}
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int ret;
+
+	ret = init_probe_symbol_maps(pevs->uprobes);
+	if (ret < 0)
+		return ret;
+
+	ret = convert_perf_probe_events(pevs, npevs);
+	if (ret == 0)
+		ret = apply_perf_probe_events(pevs, npevs);
+
+	cleanup_perf_probe_events(pevs, npevs);
+
+	exit_probe_symbol_maps();
+	return ret;
+}
+
+int del_perf_probe_events(struct strfilter *filter)
+{
+	int ret, ret2, ufd = -1, kfd = -1;
+	char *str = strfilter__string(filter);
+
+	if (!str)
+		return -EINVAL;
+
+	/* Get current event names */
+	ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
+	if (ret < 0)
+		goto out;
+
+	ret = probe_file__del_events(kfd, filter);
+	if (ret < 0 && ret != -ENOENT)
+		goto error;
+
+	ret2 = probe_file__del_events(ufd, filter);
+	if (ret2 < 0 && ret2 != -ENOENT) {
+		ret = ret2;
+		goto error;
+	}
+	ret = 0;
+
+error:
+	if (kfd >= 0)
+		close(kfd);
+	if (ufd >= 0)
+		close(ufd);
+out:
+	free(str);
+
+	return ret;
+}
+
+int show_available_funcs(const char *target, struct nsinfo *nsi,
+			 struct strfilter *_filter, bool user)
+{
+        struct rb_node *nd;
+	struct map *map;
+	int ret;
+
+	ret = init_probe_symbol_maps(user);
+	if (ret < 0)
+		return ret;
+
+	/* Get a symbol map */
+	map = get_target_map(target, nsi, user);
+	if (!map) {
+		pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
+		return -EINVAL;
+	}
+
+	ret = map__load(map);
+	if (ret) {
+		if (ret == -2) {
+			char *str = strfilter__string(_filter);
+			pr_err("Failed to find symbols matched to \"%s\"\n",
+			       str);
+			free(str);
+		} else
+			pr_err("Failed to load symbols in %s\n",
+			       (target) ? : "kernel");
+		goto end;
+	}
+	if (!dso__sorted_by_name(map->dso))
+		dso__sort_by_name(map->dso);
+
+	/* Show all (filtered) symbols */
+	setup_pager();
+
+	for (nd = rb_first_cached(&map->dso->symbol_names); nd;
+	     nd = rb_next(nd)) {
+		struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+
+		if (strfilter__compare(_filter, pos->sym.name))
+			printf("%s\n", pos->sym.name);
+	}
+end:
+	map__put(map);
+	exit_probe_symbol_maps();
+
+	return ret;
+}
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+			    struct perf_probe_arg *pvar)
+{
+	tvar->value = strdup(pvar->var);
+	if (tvar->value == NULL)
+		return -ENOMEM;
+	if (pvar->type) {
+		tvar->type = strdup(pvar->type);
+		if (tvar->type == NULL)
+			return -ENOMEM;
+	}
+	if (pvar->name) {
+		tvar->name = strdup(pvar->name);
+		if (tvar->name == NULL)
+			return -ENOMEM;
+	} else
+		tvar->name = NULL;
+	return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
new file mode 100644
index 000000000..8ad5b1579
--- /dev/null
+++ b/tools/perf/util/probe-event.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_EVENT_H
+#define _PROBE_EVENT_H
+
+#include <linux/compiler.h>
+#include <stdbool.h>
+
+struct intlist;
+struct nsinfo;
+
+/* Probe related configurations */
+struct probe_conf {
+	bool	show_ext_vars;
+	bool	show_location_range;
+	bool	force_add;
+	bool	no_inlines;
+	bool	cache;
+	bool	bootconfig;
+	int	max_probes;
+	unsigned long	magic_num;
+};
+extern struct probe_conf probe_conf;
+extern bool probe_event_dry_run;
+
+#define DEFAULT_PROBE_MAGIC_NUM	0xdeade12d	/* u32: 3735937325 */
+
+struct symbol;
+
+/* kprobe-tracer and uprobe-tracer tracing point */
+struct probe_trace_point {
+	char		*realname;	/* function real name (if needed) */
+	char		*symbol;	/* Base symbol */
+	char		*module;	/* Module name */
+	unsigned long	offset;		/* Offset from symbol */
+	unsigned long	ref_ctr_offset;	/* SDT reference counter offset */
+	u64		address;	/* Actual address of the trace point */
+	bool		retprobe;	/* Return probe flag */
+};
+
+/* probe-tracer tracing argument referencing offset */
+struct probe_trace_arg_ref {
+	struct probe_trace_arg_ref	*next;	/* Next reference */
+	long				offset;	/* Offset value */
+	bool				user_access;	/* User-memory access */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing argument */
+struct probe_trace_arg {
+	char				*name;	/* Argument name */
+	char				*value;	/* Base value */
+	char				*type;	/* Type name */
+	struct probe_trace_arg_ref	*ref;	/* Referencing offset */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing event (point + arg) */
+struct probe_trace_event {
+	char				*event;	/* Event name */
+	char				*group;	/* Group name */
+	struct probe_trace_point	point;	/* Trace point */
+	int				nargs;	/* Number of args */
+	bool				uprobes;	/* uprobes only */
+	struct probe_trace_arg		*args;	/* Arguments */
+};
+
+/* Perf probe probing point */
+struct perf_probe_point {
+	char		*file;		/* File path */
+	char		*function;	/* Function name */
+	int		line;		/* Line number */
+	bool		retprobe;	/* Return probe flag */
+	char		*lazy_line;	/* Lazy matching pattern */
+	unsigned long	offset;		/* Offset from function entry */
+	u64		abs_address;	/* Absolute address of the point */
+};
+
+/* Perf probe probing argument field chain */
+struct perf_probe_arg_field {
+	struct perf_probe_arg_field	*next;	/* Next field */
+	char				*name;	/* Name of the field */
+	long				index;	/* Array index number */
+	bool				ref;	/* Referencing flag */
+};
+
+/* Perf probe probing argument */
+struct perf_probe_arg {
+	char				*name;	/* Argument name */
+	char				*var;	/* Variable name */
+	char				*type;	/* Type name */
+	struct perf_probe_arg_field	*field;	/* Structure fields */
+	bool				user_access;	/* User-memory access */
+};
+
+/* Perf probe probing event (point + arg) */
+struct perf_probe_event {
+	char			*event;	/* Event name */
+	char			*group;	/* Group name */
+	struct perf_probe_point	point;	/* Probe point */
+	int			nargs;	/* Number of arguments */
+	bool			sdt;	/* SDT/cached event flag */
+	bool			uprobes;	/* Uprobe event flag */
+	char			*target;	/* Target binary */
+	struct perf_probe_arg	*args;	/* Arguments */
+	struct probe_trace_event *tevs;
+	int			ntevs;
+	struct nsinfo		*nsi;	/* Target namespace */
+};
+
+/* Line range */
+struct line_range {
+	char			*file;		/* File name */
+	char			*function;	/* Function name */
+	int			start;		/* Start line number */
+	int			end;		/* End line number */
+	int			offset;		/* Start line offset */
+	char			*path;		/* Real path name */
+	char			*comp_dir;	/* Compile directory */
+	struct intlist		*line_list;	/* Visible lines */
+};
+
+struct strlist;
+
+/* List of variables */
+struct variable_list {
+	struct probe_trace_point	point;	/* Actual probepoint */
+	struct strlist			*vars;	/* Available variables */
+};
+
+struct map;
+int init_probe_symbol_maps(bool user_only);
+void exit_probe_symbol_maps(void);
+
+/* Command string to events */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev);
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
+
+/* Events to command string */
+char *synthesize_perf_probe_command(struct perf_probe_event *pev);
+char *synthesize_probe_trace_command(struct probe_trace_event *tev);
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
+char *synthesize_perf_probe_point(struct perf_probe_point *pp);
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+			   struct perf_probe_event *src);
+
+bool perf_probe_with_var(struct perf_probe_event *pev);
+
+/* Check the perf_probe_event needs debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
+
+/* Release event contents */
+void clear_perf_probe_event(struct perf_probe_event *pev);
+void clear_probe_trace_event(struct probe_trace_event *tev);
+
+/* Command string to line-range */
+int parse_line_range_desc(const char *cmd, struct line_range *lr);
+
+/* Release line range members */
+void line_range__clear(struct line_range *lr);
+
+/* Initialize line range */
+int line_range__init(struct line_range *lr);
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
+int show_bootconfig_events(struct perf_probe_event *pevs, int npevs);
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+
+struct strfilter;
+
+int del_perf_probe_events(struct strfilter *filter);
+
+int show_perf_probe_event(const char *group, const char *event,
+			  struct perf_probe_event *pev,
+			  const char *module, bool use_stdout);
+int show_perf_probe_events(struct strfilter *filter);
+int show_line_range(struct line_range *lr, const char *module,
+		    struct nsinfo *nsi, bool user);
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+			struct strfilter *filter);
+int show_available_funcs(const char *module, struct nsinfo *nsi,
+			 struct strfilter *filter, bool user);
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+			     struct probe_trace_event *tev, struct map *map,
+			     struct symbol *sym);
+
+/* If there is no space to write, returns -E2BIG. */
+int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4);
+
+/* Maximum index number of event-name postfix */
+#define MAX_EVENT_INDEX	1024
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+			    struct perf_probe_arg *pvar);
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs);
+
+#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
new file mode 100644
index 000000000..3d50de321
--- /dev/null
+++ b/tools/perf/util/probe-file.c
@@ -0,0 +1,1200 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * probe-file.c : operate ftrace k/uprobe events files
+ *
+ * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <linux/zalloc.h>
+#include "namespaces.h"
+#include "event.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "build-id.h"
+#include "dso.h"
+#include "color.h"
+#include "symbol.h"
+#include "strbuf.h"
+#include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
+#include "probe-event.h"
+#include "probe-file.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "string2.h"
+
+/* 4096 - 2 ('\n' + '\0') */
+#define MAX_CMDLEN 4094
+
+static bool print_common_warning(int err, bool readwrite)
+{
+	if (err == -EACCES)
+		pr_warning("No permission to %s tracefs.\nPlease %s\n",
+			   readwrite ? "write" : "read",
+			   readwrite ? "run this command again with sudo." :
+				       "try 'sudo mount -o remount,mode=755 /sys/kernel/tracing/'");
+	else
+		return false;
+
+	return true;
+}
+
+static bool print_configure_probe_event(int kerr, int uerr)
+{
+	const char *config, *file;
+
+	if (kerr == -ENOENT && uerr == -ENOENT) {
+		file = "{k,u}probe_events";
+		config = "CONFIG_KPROBE_EVENTS=y and CONFIG_UPROBE_EVENTS=y";
+	} else if (kerr == -ENOENT) {
+		file = "kprobe_events";
+		config = "CONFIG_KPROBE_EVENTS=y";
+	} else if (uerr == -ENOENT) {
+		file = "uprobe_events";
+		config = "CONFIG_UPROBE_EVENTS=y";
+	} else
+		return false;
+
+	if (!debugfs__configured() && !tracefs__configured())
+		pr_warning("Debugfs or tracefs is not mounted\n"
+			   "Please try 'sudo mount -t tracefs nodev /sys/kernel/tracing/'\n");
+	else
+		pr_warning("%s/%s does not exist.\nPlease rebuild kernel with %s.\n",
+			   tracing_path_mount(), file, config);
+
+	return true;
+}
+
+static void print_open_warning(int err, bool uprobe, bool readwrite)
+{
+	char sbuf[STRERR_BUFSIZE];
+
+	if (print_common_warning(err, readwrite))
+		return;
+
+	if (print_configure_probe_event(uprobe ? 0 : err, uprobe ? err : 0))
+		return;
+
+	pr_warning("Failed to open %s/%cprobe_events: %s\n",
+		   tracing_path_mount(), uprobe ? 'u' : 'k',
+		   str_error_r(-err, sbuf, sizeof(sbuf)));
+}
+
+static void print_both_open_warning(int kerr, int uerr, bool readwrite)
+{
+	char sbuf[STRERR_BUFSIZE];
+
+	if (kerr == uerr && print_common_warning(kerr, readwrite))
+		return;
+
+	if (print_configure_probe_event(kerr, uerr))
+		return;
+
+	if (kerr < 0)
+		pr_warning("Failed to open %s/kprobe_events: %s.\n",
+			   tracing_path_mount(),
+			   str_error_r(-kerr, sbuf, sizeof(sbuf)));
+	if (uerr < 0)
+		pr_warning("Failed to open %s/uprobe_events: %s.\n",
+			   tracing_path_mount(),
+			   str_error_r(-uerr, sbuf, sizeof(sbuf)));
+}
+
+int open_trace_file(const char *trace_file, bool readwrite)
+{
+	char buf[PATH_MAX];
+	int ret;
+
+	ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file);
+	if (ret >= 0) {
+		pr_debug("Opening %s write=%d\n", buf, readwrite);
+		if (readwrite && !probe_event_dry_run)
+			ret = open(buf, O_RDWR | O_APPEND, 0);
+		else
+			ret = open(buf, O_RDONLY, 0);
+
+		if (ret < 0)
+			ret = -errno;
+	}
+	return ret;
+}
+
+static int open_kprobe_events(bool readwrite)
+{
+	return open_trace_file("kprobe_events", readwrite);
+}
+
+static int open_uprobe_events(bool readwrite)
+{
+	return open_trace_file("uprobe_events", readwrite);
+}
+
+int probe_file__open(int flag)
+{
+	int fd;
+
+	if (flag & PF_FL_UPROBE)
+		fd = open_uprobe_events(flag & PF_FL_RW);
+	else
+		fd = open_kprobe_events(flag & PF_FL_RW);
+	if (fd < 0)
+		print_open_warning(fd, flag & PF_FL_UPROBE, flag & PF_FL_RW);
+
+	return fd;
+}
+
+int probe_file__open_both(int *kfd, int *ufd, int flag)
+{
+	if (!kfd || !ufd)
+		return -EINVAL;
+
+	*kfd = open_kprobe_events(flag & PF_FL_RW);
+	*ufd = open_uprobe_events(flag & PF_FL_RW);
+	if (*kfd < 0 && *ufd < 0) {
+		print_both_open_warning(*kfd, *ufd, flag & PF_FL_RW);
+		return *kfd;
+	}
+
+	return 0;
+}
+
+/* Get raw string list of current kprobe_events  or uprobe_events */
+struct strlist *probe_file__get_rawlist(int fd)
+{
+	int ret, idx, fddup;
+	FILE *fp;
+	char buf[MAX_CMDLEN];
+	char *p;
+	struct strlist *sl;
+
+	if (fd < 0)
+		return NULL;
+
+	sl = strlist__new(NULL, NULL);
+	if (sl == NULL)
+		return NULL;
+
+	fddup = dup(fd);
+	if (fddup < 0)
+		goto out_free_sl;
+
+	fp = fdopen(fddup, "r");
+	if (!fp)
+		goto out_close_fddup;
+
+	while (!feof(fp)) {
+		p = fgets(buf, MAX_CMDLEN, fp);
+		if (!p)
+			break;
+
+		idx = strlen(p) - 1;
+		if (p[idx] == '\n')
+			p[idx] = '\0';
+		ret = strlist__add(sl, buf);
+		if (ret < 0) {
+			pr_debug("strlist__add failed (%d)\n", ret);
+			goto out_close_fp;
+		}
+	}
+	fclose(fp);
+
+	return sl;
+
+out_close_fp:
+	fclose(fp);
+	goto out_free_sl;
+out_close_fddup:
+	close(fddup);
+out_free_sl:
+	strlist__delete(sl);
+	return NULL;
+}
+
+static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
+{
+	char buf[128];
+	struct strlist *sl, *rawlist;
+	struct str_node *ent;
+	struct probe_trace_event tev;
+	int ret = 0;
+
+	memset(&tev, 0, sizeof(tev));
+	rawlist = probe_file__get_rawlist(fd);
+	if (!rawlist)
+		return NULL;
+	sl = strlist__new(NULL, NULL);
+	strlist__for_each_entry(ent, rawlist) {
+		ret = parse_probe_trace_command(ent->s, &tev);
+		if (ret < 0)
+			break;
+		if (include_group) {
+			ret = e_snprintf(buf, 128, "%s:%s", tev.group,
+					tev.event);
+			if (ret >= 0)
+				ret = strlist__add(sl, buf);
+		} else
+			ret = strlist__add(sl, tev.event);
+		clear_probe_trace_event(&tev);
+		/* Skip if there is same name multi-probe event in the list */
+		if (ret == -EEXIST)
+			ret = 0;
+		if (ret < 0)
+			break;
+	}
+	strlist__delete(rawlist);
+
+	if (ret < 0) {
+		strlist__delete(sl);
+		return NULL;
+	}
+	return sl;
+}
+
+/* Get current perf-probe event names */
+struct strlist *probe_file__get_namelist(int fd)
+{
+	return __probe_file__get_namelist(fd, false);
+}
+
+int probe_file__add_event(int fd, struct probe_trace_event *tev)
+{
+	int ret = 0;
+	char *buf = synthesize_probe_trace_command(tev);
+	char sbuf[STRERR_BUFSIZE];
+
+	if (!buf) {
+		pr_debug("Failed to synthesize probe trace event.\n");
+		return -EINVAL;
+	}
+
+	pr_debug("Writing event: %s\n", buf);
+	if (!probe_event_dry_run) {
+		if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
+			ret = -errno;
+			pr_warning("Failed to write event: %s\n",
+				   str_error_r(errno, sbuf, sizeof(sbuf)));
+		}
+	}
+	free(buf);
+
+	return ret;
+}
+
+static int __del_trace_probe_event(int fd, struct str_node *ent)
+{
+	char *p;
+	char buf[128];
+	int ret;
+
+	/* Convert from perf-probe event to trace-probe event */
+	ret = e_snprintf(buf, 128, "-:%s", ent->s);
+	if (ret < 0)
+		goto error;
+
+	p = strchr(buf + 2, ':');
+	if (!p) {
+		pr_debug("Internal error: %s should have ':' but not.\n",
+			 ent->s);
+		ret = -ENOTSUP;
+		goto error;
+	}
+	*p = '/';
+
+	pr_debug("Writing event: %s\n", buf);
+	ret = write(fd, buf, strlen(buf));
+	if (ret < 0) {
+		ret = -errno;
+		goto error;
+	}
+
+	return 0;
+error:
+	pr_warning("Failed to delete event: %s\n",
+		   str_error_r(-ret, buf, sizeof(buf)));
+	return ret;
+}
+
+int probe_file__get_events(int fd, struct strfilter *filter,
+			   struct strlist *plist)
+{
+	struct strlist *namelist;
+	struct str_node *ent;
+	const char *p;
+	int ret = -ENOENT;
+
+	if (!plist)
+		return -EINVAL;
+
+	namelist = __probe_file__get_namelist(fd, true);
+	if (!namelist)
+		return -ENOENT;
+
+	strlist__for_each_entry(ent, namelist) {
+		p = strchr(ent->s, ':');
+		if ((p && strfilter__compare(filter, p + 1)) ||
+		    strfilter__compare(filter, ent->s)) {
+			ret = strlist__add(plist, ent->s);
+			if (ret == -ENOMEM) {
+				pr_err("strlist__add failed with -ENOMEM\n");
+				goto out;
+			}
+			ret = 0;
+		}
+	}
+out:
+	strlist__delete(namelist);
+
+	return ret;
+}
+
+int probe_file__del_strlist(int fd, struct strlist *namelist)
+{
+	int ret = 0;
+	struct str_node *ent;
+
+	strlist__for_each_entry(ent, namelist) {
+		ret = __del_trace_probe_event(fd, ent);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+int probe_file__del_events(int fd, struct strfilter *filter)
+{
+	struct strlist *namelist;
+	int ret;
+
+	namelist = strlist__new(NULL, NULL);
+	if (!namelist)
+		return -ENOMEM;
+
+	ret = probe_file__get_events(fd, filter, namelist);
+	if (ret < 0)
+		goto out;
+
+	ret = probe_file__del_strlist(fd, namelist);
+out:
+	strlist__delete(namelist);
+	return ret;
+}
+
+/* Caller must ensure to remove this entry from list */
+static void probe_cache_entry__delete(struct probe_cache_entry *entry)
+{
+	if (entry) {
+		BUG_ON(!list_empty(&entry->node));
+
+		strlist__delete(entry->tevlist);
+		clear_perf_probe_event(&entry->pev);
+		zfree(&entry->spev);
+		free(entry);
+	}
+}
+
+static struct probe_cache_entry *
+probe_cache_entry__new(struct perf_probe_event *pev)
+{
+	struct probe_cache_entry *entry = zalloc(sizeof(*entry));
+
+	if (entry) {
+		INIT_LIST_HEAD(&entry->node);
+		entry->tevlist = strlist__new(NULL, NULL);
+		if (!entry->tevlist)
+			zfree(&entry);
+		else if (pev) {
+			entry->spev = synthesize_perf_probe_command(pev);
+			if (!entry->spev ||
+			    perf_probe_event__copy(&entry->pev, pev) < 0) {
+				probe_cache_entry__delete(entry);
+				return NULL;
+			}
+		}
+	}
+
+	return entry;
+}
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+				 struct probe_trace_event **tevs)
+{
+	struct probe_trace_event *tev;
+	struct str_node *node;
+	int ret, i;
+
+	ret = strlist__nr_entries(entry->tevlist);
+	if (ret > probe_conf.max_probes)
+		return -E2BIG;
+
+	*tevs = zalloc(ret * sizeof(*tev));
+	if (!*tevs)
+		return -ENOMEM;
+
+	i = 0;
+	strlist__for_each_entry(node, entry->tevlist) {
+		tev = &(*tevs)[i++];
+		ret = parse_probe_trace_command(node->s, tev);
+		if (ret < 0)
+			break;
+	}
+	return i;
+}
+
+/* For the kernel probe caches, pass target = NULL or DSO__NAME_KALLSYMS */
+static int probe_cache__open(struct probe_cache *pcache, const char *target,
+			     struct nsinfo *nsi)
+{
+	char cpath[PATH_MAX];
+	char sbuildid[SBUILD_ID_SIZE];
+	char *dir_name = NULL;
+	bool is_kallsyms = false;
+	int ret, fd;
+	struct nscookie nsc;
+
+	if (target && build_id_cache__cached(target)) {
+		/* This is a cached buildid */
+		strlcpy(sbuildid, target, SBUILD_ID_SIZE);
+		dir_name = build_id_cache__linkname(sbuildid, NULL, 0);
+		goto found;
+	}
+
+	if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) {
+		target = DSO__NAME_KALLSYMS;
+		is_kallsyms = true;
+		ret = sysfs__sprintf_build_id("/", sbuildid);
+	} else {
+		nsinfo__mountns_enter(nsi, &nsc);
+		ret = filename__sprintf_build_id(target, sbuildid);
+		nsinfo__mountns_exit(&nsc);
+	}
+
+	if (ret < 0) {
+		pr_debug("Failed to get build-id from %s.\n", target);
+		return ret;
+	}
+
+	/* If we have no buildid cache, make it */
+	if (!build_id_cache__cached(sbuildid)) {
+		ret = build_id_cache__add_s(sbuildid, target, nsi,
+					    is_kallsyms, NULL);
+		if (ret < 0) {
+			pr_debug("Failed to add build-id cache: %s\n", target);
+			return ret;
+		}
+	}
+
+	dir_name = build_id_cache__cachedir(sbuildid, target, nsi, is_kallsyms,
+					    false);
+found:
+	if (!dir_name) {
+		pr_debug("Failed to get cache from %s\n", target);
+		return -ENOMEM;
+	}
+
+	snprintf(cpath, PATH_MAX, "%s/probes", dir_name);
+	fd = open(cpath, O_CREAT | O_RDWR, 0644);
+	if (fd < 0)
+		pr_debug("Failed to open cache(%d): %s\n", fd, cpath);
+	free(dir_name);
+	pcache->fd = fd;
+
+	return fd;
+}
+
+static int probe_cache__load(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry = NULL;
+	char buf[MAX_CMDLEN], *p;
+	int ret = 0, fddup;
+	FILE *fp;
+
+	fddup = dup(pcache->fd);
+	if (fddup < 0)
+		return -errno;
+	fp = fdopen(fddup, "r");
+	if (!fp) {
+		close(fddup);
+		return -EINVAL;
+	}
+
+	while (!feof(fp)) {
+		if (!fgets(buf, MAX_CMDLEN, fp))
+			break;
+		p = strchr(buf, '\n');
+		if (p)
+			*p = '\0';
+		/* #perf_probe_event or %sdt_event */
+		if (buf[0] == '#' || buf[0] == '%') {
+			entry = probe_cache_entry__new(NULL);
+			if (!entry) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			if (buf[0] == '%')
+				entry->sdt = true;
+			entry->spev = strdup(buf + 1);
+			if (entry->spev)
+				ret = parse_perf_probe_command(buf + 1,
+								&entry->pev);
+			else
+				ret = -ENOMEM;
+			if (ret < 0) {
+				probe_cache_entry__delete(entry);
+				goto out;
+			}
+			list_add_tail(&entry->node, &pcache->entries);
+		} else {	/* trace_probe_event */
+			if (!entry) {
+				ret = -EINVAL;
+				goto out;
+			}
+			ret = strlist__add(entry->tevlist, buf);
+			if (ret == -ENOMEM) {
+				pr_err("strlist__add failed with -ENOMEM\n");
+				goto out;
+			}
+		}
+	}
+out:
+	fclose(fp);
+	return ret;
+}
+
+static struct probe_cache *probe_cache__alloc(void)
+{
+	struct probe_cache *pcache = zalloc(sizeof(*pcache));
+
+	if (pcache) {
+		INIT_LIST_HEAD(&pcache->entries);
+		pcache->fd = -EINVAL;
+	}
+	return pcache;
+}
+
+void probe_cache__purge(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry, *n;
+
+	list_for_each_entry_safe(entry, n, &pcache->entries, node) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+}
+
+void probe_cache__delete(struct probe_cache *pcache)
+{
+	if (!pcache)
+		return;
+
+	probe_cache__purge(pcache);
+	if (pcache->fd > 0)
+		close(pcache->fd);
+	free(pcache);
+}
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi)
+{
+	struct probe_cache *pcache = probe_cache__alloc();
+	int ret;
+
+	if (!pcache)
+		return NULL;
+
+	ret = probe_cache__open(pcache, target, nsi);
+	if (ret < 0) {
+		pr_debug("Cache open error: %d\n", ret);
+		goto out_err;
+	}
+
+	ret = probe_cache__load(pcache);
+	if (ret < 0) {
+		pr_debug("Cache read error: %d\n", ret);
+		goto out_err;
+	}
+
+	return pcache;
+
+out_err:
+	probe_cache__delete(pcache);
+	return NULL;
+}
+
+static bool streql(const char *a, const char *b)
+{
+	if (a == b)
+		return true;
+
+	if (!a || !b)
+		return false;
+
+	return !strcmp(a, b);
+}
+
+struct probe_cache_entry *
+probe_cache__find(struct probe_cache *pcache, struct perf_probe_event *pev)
+{
+	struct probe_cache_entry *entry = NULL;
+	char *cmd = synthesize_perf_probe_command(pev);
+
+	if (!cmd)
+		return NULL;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		if (pev->sdt) {
+			if (entry->pev.event &&
+			    streql(entry->pev.event, pev->event) &&
+			    (!pev->group ||
+			     streql(entry->pev.group, pev->group)))
+				goto found;
+
+			continue;
+		}
+		/* Hit if same event name or same command-string */
+		if ((pev->event &&
+		     (streql(entry->pev.group, pev->group) &&
+		      streql(entry->pev.event, pev->event))) ||
+		    (!strcmp(entry->spev, cmd)))
+			goto found;
+	}
+	entry = NULL;
+
+found:
+	free(cmd);
+	return entry;
+}
+
+struct probe_cache_entry *
+probe_cache__find_by_name(struct probe_cache *pcache,
+			  const char *group, const char *event)
+{
+	struct probe_cache_entry *entry = NULL;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		/* Hit if same event name or same command-string */
+		if (streql(entry->pev.group, group) &&
+		    streql(entry->pev.event, event))
+			goto found;
+	}
+	entry = NULL;
+
+found:
+	return entry;
+}
+
+int probe_cache__add_entry(struct probe_cache *pcache,
+			   struct perf_probe_event *pev,
+			   struct probe_trace_event *tevs, int ntevs)
+{
+	struct probe_cache_entry *entry = NULL;
+	char *command;
+	int i, ret = 0;
+
+	if (!pcache || !pev || !tevs || ntevs <= 0) {
+		ret = -EINVAL;
+		goto out_err;
+	}
+
+	/* Remove old cache entry */
+	entry = probe_cache__find(pcache, pev);
+	if (entry) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+
+	ret = -ENOMEM;
+	entry = probe_cache_entry__new(pev);
+	if (!entry)
+		goto out_err;
+
+	for (i = 0; i < ntevs; i++) {
+		if (!tevs[i].point.symbol)
+			continue;
+
+		command = synthesize_probe_trace_command(&tevs[i]);
+		if (!command)
+			goto out_err;
+		ret = strlist__add(entry->tevlist, command);
+		if (ret == -ENOMEM) {
+			pr_err("strlist__add failed with -ENOMEM\n");
+			goto out_err;
+		}
+
+		free(command);
+	}
+	list_add_tail(&entry->node, &pcache->entries);
+	pr_debug("Added probe cache: %d\n", ntevs);
+	return 0;
+
+out_err:
+	pr_debug("Failed to add probe caches\n");
+	probe_cache_entry__delete(entry);
+	return ret;
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+static unsigned long long sdt_note__get_addr(struct sdt_note *note)
+{
+	return note->bit32 ?
+		(unsigned long long)note->addr.a32[SDT_NOTE_IDX_LOC] :
+		(unsigned long long)note->addr.a64[SDT_NOTE_IDX_LOC];
+}
+
+static unsigned long long sdt_note__get_ref_ctr_offset(struct sdt_note *note)
+{
+	return note->bit32 ?
+		(unsigned long long)note->addr.a32[SDT_NOTE_IDX_REFCTR] :
+		(unsigned long long)note->addr.a64[SDT_NOTE_IDX_REFCTR];
+}
+
+static const char * const type_to_suffix[] = {
+	":s64", "", "", "", ":s32", "", ":s16", ":s8",
+	"", ":u8", ":u16", "", ":u32", "", "", "", ":u64"
+};
+
+/*
+ * Isolate the string number and convert it into a decimal value;
+ * this will be an index to get suffix of the uprobe name (defining
+ * the type)
+ */
+static int sdt_arg_parse_size(char *n_ptr, const char **suffix)
+{
+	long type_idx;
+
+	type_idx = strtol(n_ptr, NULL, 10);
+	if (type_idx < -8 || type_idx > 8) {
+		pr_debug4("Failed to get a valid sdt type\n");
+		return -1;
+	}
+
+	*suffix = type_to_suffix[type_idx + 8];
+	return 0;
+}
+
+static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg)
+{
+	char *op, *desc = strdup(arg), *new_op = NULL;
+	const char *suffix = "";
+	int ret = -1;
+
+	if (desc == NULL) {
+		pr_debug4("Allocation error\n");
+		return ret;
+	}
+
+	/*
+	 * Argument is in N@OP format. N is size of the argument and OP is
+	 * the actual assembly operand. N can be omitted; in that case
+	 * argument is just OP(without @).
+	 */
+	op = strchr(desc, '@');
+	if (op) {
+		op[0] = '\0';
+		op++;
+
+		if (sdt_arg_parse_size(desc, &suffix))
+			goto error;
+	} else {
+		op = desc;
+	}
+
+	ret = arch_sdt_arg_parse_op(op, &new_op);
+
+	if (ret < 0)
+		goto error;
+
+	if (ret == SDT_ARG_VALID) {
+		ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix);
+		if (ret < 0)
+			goto error;
+	}
+
+	ret = 0;
+error:
+	free(desc);
+	free(new_op);
+	return ret;
+}
+
+static char *synthesize_sdt_probe_command(struct sdt_note *note,
+					const char *pathname,
+					const char *sdtgrp)
+{
+	struct strbuf buf;
+	char *ret = NULL;
+	int i, args_count, err;
+	unsigned long long ref_ctr_offset;
+	char *arg;
+	int arg_idx = 0;
+
+	if (strbuf_init(&buf, 32) < 0)
+		return NULL;
+
+	err = strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
+			sdtgrp, note->name, pathname,
+			sdt_note__get_addr(note));
+
+	ref_ctr_offset = sdt_note__get_ref_ctr_offset(note);
+	if (ref_ctr_offset && err >= 0)
+		err = strbuf_addf(&buf, "(0x%llx)", ref_ctr_offset);
+
+	if (err < 0)
+		goto error;
+
+	if (!note->args)
+		goto out;
+
+	if (note->args) {
+		char **args = argv_split(note->args, &args_count);
+
+		if (args == NULL)
+			goto error;
+
+		for (i = 0; i < args_count; ) {
+			/*
+			 * FIXUP: Arm64 ELF section '.note.stapsdt' uses string
+			 * format "-4@[sp, NUM]" if a probe is to access data in
+			 * the stack, e.g. below is an example for the SDT
+			 * Arguments:
+			 *
+			 *   Arguments: -4@[sp, 12] -4@[sp, 8] -4@[sp, 4]
+			 *
+			 * Since the string introduces an extra space character
+			 * in the middle of square brackets, the argument is
+			 * divided into two items.  Fixup for this case, if an
+			 * item contains sub string "[sp,", need to concatenate
+			 * the two items.
+			 */
+			if (strstr(args[i], "[sp,") && (i+1) < args_count) {
+				err = asprintf(&arg, "%s %s", args[i], args[i+1]);
+				i += 2;
+			} else {
+				err = asprintf(&arg, "%s", args[i]);
+				i += 1;
+			}
+
+			/* Failed to allocate memory */
+			if (err < 0) {
+				argv_free(args);
+				goto error;
+			}
+
+			if (synthesize_sdt_probe_arg(&buf, arg_idx, arg) < 0) {
+				free(arg);
+				argv_free(args);
+				goto error;
+			}
+
+			free(arg);
+			arg_idx++;
+		}
+
+		argv_free(args);
+	}
+
+out:
+	ret = strbuf_detach(&buf, NULL);
+error:
+	strbuf_release(&buf);
+	return ret;
+}
+
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
+{
+	struct probe_cache_entry *entry = NULL;
+	struct list_head sdtlist;
+	struct sdt_note *note;
+	char *buf;
+	char sdtgrp[64];
+	int ret;
+
+	INIT_LIST_HEAD(&sdtlist);
+	ret = get_sdt_note_list(&sdtlist, pathname);
+	if (ret < 0) {
+		pr_debug4("Failed to get sdt note: %d\n", ret);
+		return ret;
+	}
+	list_for_each_entry(note, &sdtlist, note_list) {
+		ret = snprintf(sdtgrp, 64, "sdt_%s", note->provider);
+		if (ret < 0)
+			break;
+		/* Try to find same-name entry */
+		entry = probe_cache__find_by_name(pcache, sdtgrp, note->name);
+		if (!entry) {
+			entry = probe_cache_entry__new(NULL);
+			if (!entry) {
+				ret = -ENOMEM;
+				break;
+			}
+			entry->sdt = true;
+			ret = asprintf(&entry->spev, "%s:%s=%s", sdtgrp,
+					note->name, note->name);
+			if (ret < 0)
+				break;
+			entry->pev.event = strdup(note->name);
+			entry->pev.group = strdup(sdtgrp);
+			list_add_tail(&entry->node, &pcache->entries);
+		}
+		buf = synthesize_sdt_probe_command(note, pathname, sdtgrp);
+		if (!buf) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ret = strlist__add(entry->tevlist, buf);
+
+		free(buf);
+		entry = NULL;
+
+		if (ret == -ENOMEM) {
+			pr_err("strlist__add failed with -ENOMEM\n");
+			break;
+		}
+	}
+	if (entry) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+	cleanup_sdt_note_list(&sdtlist);
+	return ret;
+}
+#endif
+
+static int probe_cache_entry__write(struct probe_cache_entry *entry, int fd)
+{
+	struct str_node *snode;
+	struct stat st;
+	struct iovec iov[3];
+	const char *prefix = entry->sdt ? "%" : "#";
+	int ret;
+	/* Save stat for rollback */
+	ret = fstat(fd, &st);
+	if (ret < 0)
+		return ret;
+
+	pr_debug("Writing cache: %s%s\n", prefix, entry->spev);
+	iov[0].iov_base = (void *)prefix; iov[0].iov_len = 1;
+	iov[1].iov_base = entry->spev; iov[1].iov_len = strlen(entry->spev);
+	iov[2].iov_base = (void *)"\n"; iov[2].iov_len = 1;
+	ret = writev(fd, iov, 3);
+	if (ret < (int)iov[1].iov_len + 2)
+		goto rollback;
+
+	strlist__for_each_entry(snode, entry->tevlist) {
+		iov[0].iov_base = (void *)snode->s;
+		iov[0].iov_len = strlen(snode->s);
+		iov[1].iov_base = (void *)"\n"; iov[1].iov_len = 1;
+		ret = writev(fd, iov, 2);
+		if (ret < (int)iov[0].iov_len + 1)
+			goto rollback;
+	}
+	return 0;
+
+rollback:
+	/* Rollback to avoid cache file corruption */
+	if (ret > 0)
+		ret = -1;
+	if (ftruncate(fd, st.st_size) < 0)
+		ret = -2;
+
+	return ret;
+}
+
+int probe_cache__commit(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry;
+	int ret = 0;
+
+	/* TBD: if we do not update existing entries, skip it */
+	ret = lseek(pcache->fd, 0, SEEK_SET);
+	if (ret < 0)
+		goto out;
+
+	ret = ftruncate(pcache->fd, 0);
+	if (ret < 0)
+		goto out;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		ret = probe_cache_entry__write(entry, pcache->fd);
+		pr_debug("Cache committed: %d\n", ret);
+		if (ret < 0)
+			break;
+	}
+out:
+	return ret;
+}
+
+static bool probe_cache_entry__compare(struct probe_cache_entry *entry,
+				       struct strfilter *filter)
+{
+	char buf[128], *ptr = entry->spev;
+
+	if (entry->pev.event) {
+		snprintf(buf, 128, "%s:%s", entry->pev.group, entry->pev.event);
+		ptr = buf;
+	}
+	return strfilter__compare(filter, ptr);
+}
+
+int probe_cache__filter_purge(struct probe_cache *pcache,
+			      struct strfilter *filter)
+{
+	struct probe_cache_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &pcache->entries, node) {
+		if (probe_cache_entry__compare(entry, filter)) {
+			pr_info("Removed cached event: %s\n", entry->spev);
+			list_del_init(&entry->node);
+			probe_cache_entry__delete(entry);
+		}
+	}
+	return 0;
+}
+
+static int probe_cache__show_entries(struct probe_cache *pcache,
+				     struct strfilter *filter)
+{
+	struct probe_cache_entry *entry;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		if (probe_cache_entry__compare(entry, filter))
+			printf("%s\n", entry->spev);
+	}
+	return 0;
+}
+
+/* Show all cached probes */
+int probe_cache__show_all_caches(struct strfilter *filter)
+{
+	struct probe_cache *pcache;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *buf = strfilter__string(filter);
+
+	pr_debug("list cache with filter: %s\n", buf);
+	free(buf);
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: %d\n", errno);
+		return -EINVAL;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		pcache = probe_cache__new(nd->s, NULL);
+		if (!pcache)
+			continue;
+		if (!list_empty(&pcache->entries)) {
+			buf = build_id_cache__origname(nd->s);
+			printf("%s (%s):\n", buf, nd->s);
+			free(buf);
+			probe_cache__show_entries(pcache, filter);
+		}
+		probe_cache__delete(pcache);
+	}
+	strlist__delete(bidlist);
+
+	return 0;
+}
+
+enum ftrace_readme {
+	FTRACE_README_PROBE_TYPE_X = 0,
+	FTRACE_README_KRETPROBE_OFFSET,
+	FTRACE_README_UPROBE_REF_CTR,
+	FTRACE_README_USER_ACCESS,
+	FTRACE_README_MULTIPROBE_EVENT,
+	FTRACE_README_IMMEDIATE_VALUE,
+	FTRACE_README_END,
+};
+
+static struct {
+	const char *pattern;
+	bool avail;
+} ftrace_readme_table[] = {
+#define DEFINE_TYPE(idx, pat)			\
+	[idx] = {.pattern = pat, .avail = false}
+	DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
+	DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
+	DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"),
+	DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*u]<offset>*"),
+	DEFINE_TYPE(FTRACE_README_MULTIPROBE_EVENT, "*Create/append/*"),
+	DEFINE_TYPE(FTRACE_README_IMMEDIATE_VALUE, "*\\imm-value,*"),
+};
+
+static bool scan_ftrace_readme(enum ftrace_readme type)
+{
+	int fd;
+	FILE *fp;
+	char *buf = NULL;
+	size_t len = 0;
+	bool ret = false;
+	static bool scanned = false;
+
+	if (scanned)
+		goto result;
+
+	fd = open_trace_file("README", false);
+	if (fd < 0)
+		return ret;
+
+	fp = fdopen(fd, "r");
+	if (!fp) {
+		close(fd);
+		return ret;
+	}
+
+	while (getline(&buf, &len, fp) > 0)
+		for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++)
+			if (!ftrace_readme_table[i].avail)
+				ftrace_readme_table[i].avail =
+					strglobmatch(buf, ftrace_readme_table[i].pattern);
+	scanned = true;
+
+	fclose(fp);
+	free(buf);
+
+result:
+	if (type >= FTRACE_README_END)
+		return false;
+
+	return ftrace_readme_table[type].avail;
+}
+
+bool probe_type_is_available(enum probe_type type)
+{
+	if (type >= PROBE_TYPE_END)
+		return false;
+	else if (type == PROBE_TYPE_X)
+		return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X);
+
+	return true;
+}
+
+bool kretprobe_offset_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
+}
+
+bool uprobe_ref_ctr_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR);
+}
+
+bool user_access_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_USER_ACCESS);
+}
+
+bool multiprobe_event_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_MULTIPROBE_EVENT);
+}
+
+bool immediate_value_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_IMMEDIATE_VALUE);
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
new file mode 100644
index 000000000..0dba88c0f
--- /dev/null
+++ b/tools/perf/util/probe-file.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PROBE_FILE_H
+#define __PROBE_FILE_H
+
+#include "probe-event.h"
+
+struct strlist;
+struct strfilter;
+
+/* Cache of probe definitions */
+struct probe_cache_entry {
+	struct list_head	node;
+	bool			sdt;
+	struct perf_probe_event pev;
+	char			*spev;
+	struct strlist		*tevlist;
+};
+
+struct probe_cache {
+	int	fd;
+	struct list_head entries;
+};
+
+enum probe_type {
+	PROBE_TYPE_U = 0,
+	PROBE_TYPE_S,
+	PROBE_TYPE_X,
+	PROBE_TYPE_STRING,
+	PROBE_TYPE_BITFIELD,
+	PROBE_TYPE_END,
+};
+
+#define PF_FL_UPROBE	1
+#define PF_FL_RW	2
+#define for_each_probe_cache_entry(entry, pcache) \
+	list_for_each_entry(entry, &pcache->entries, node)
+
+/* probe-file.c depends on libelf */
+#ifdef HAVE_LIBELF_SUPPORT
+int open_trace_file(const char *trace_file, bool readwrite);
+int probe_file__open(int flag);
+int probe_file__open_both(int *kfd, int *ufd, int flag);
+struct strlist *probe_file__get_namelist(int fd);
+struct strlist *probe_file__get_rawlist(int fd);
+int probe_file__add_event(int fd, struct probe_trace_event *tev);
+
+int probe_file__del_events(int fd, struct strfilter *filter);
+int probe_file__get_events(int fd, struct strfilter *filter,
+				  struct strlist *plist);
+int probe_file__del_strlist(int fd, struct strlist *namelist);
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+				 struct probe_trace_event **tevs);
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi);
+int probe_cache__add_entry(struct probe_cache *pcache,
+			   struct perf_probe_event *pev,
+			   struct probe_trace_event *tevs, int ntevs);
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname);
+int probe_cache__commit(struct probe_cache *pcache);
+void probe_cache__purge(struct probe_cache *pcache);
+void probe_cache__delete(struct probe_cache *pcache);
+int probe_cache__filter_purge(struct probe_cache *pcache,
+			      struct strfilter *filter);
+struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
+					    struct perf_probe_event *pev);
+struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
+					const char *group, const char *event);
+int probe_cache__show_all_caches(struct strfilter *filter);
+bool probe_type_is_available(enum probe_type type);
+bool kretprobe_offset_is_supported(void);
+bool uprobe_ref_ctr_is_supported(void);
+bool user_access_is_supported(void);
+bool multiprobe_event_is_supported(void);
+bool immediate_value_is_supported(void);
+#else	/* ! HAVE_LIBELF_SUPPORT */
+static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
+{
+	return NULL;
+}
+#define probe_cache__delete(pcache) do {} while (0)
+#endif
+#endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
new file mode 100644
index 000000000..50d861a80
--- /dev/null
+++ b/tools/perf/util/probe-finder.c
@@ -0,0 +1,2096 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * probe-finder.c : C expression to kprobe event converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <dwarf-regs.h>
+
+#include <linux/bitops.h>
+#include <linux/zalloc.h>
+#include "event.h"
+#include "dso.h"
+#include "debug.h"
+#include "intlist.h"
+#include "strbuf.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "string2.h"
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
+/* Kprobe tracer basic type is up to u64 */
+#define MAX_BASIC_TYPE_BITS	64
+
+/* Dwarf FL wrappers */
+static char *debuginfo_path;	/* Currently dummy */
+
+static const Dwfl_Callbacks offline_callbacks = {
+	.find_debuginfo = dwfl_standard_find_debuginfo,
+	.debuginfo_path = &debuginfo_path,
+
+	.section_address = dwfl_offline_section_address,
+
+	/* We use this table for core files too.  */
+	.find_elf = dwfl_build_id_find_elf,
+};
+
+/* Get a Dwarf from offline image */
+static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
+					 const char *path)
+{
+	GElf_Addr dummy;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	dbg->dwfl = dwfl_begin(&offline_callbacks);
+	if (!dbg->dwfl)
+		goto error;
+
+	dwfl_report_begin(dbg->dwfl);
+	dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd);
+	if (!dbg->mod)
+		goto error;
+
+	dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias);
+	if (!dbg->dbg)
+		goto error;
+
+	dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy);
+
+	dwfl_report_end(dbg->dwfl, NULL, NULL);
+
+	return 0;
+error:
+	if (dbg->dwfl)
+		dwfl_end(dbg->dwfl);
+	else
+		close(fd);
+	memset(dbg, 0, sizeof(*dbg));
+
+	return -ENOENT;
+}
+
+static struct debuginfo *__debuginfo__new(const char *path)
+{
+	struct debuginfo *dbg = zalloc(sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	if (debuginfo__init_offline_dwarf(dbg, path) < 0)
+		zfree(&dbg);
+	if (dbg)
+		pr_debug("Open Debuginfo file: %s\n", path);
+	return dbg;
+}
+
+enum dso_binary_type distro_dwarf_types[] = {
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+struct debuginfo *debuginfo__new(const char *path)
+{
+	enum dso_binary_type *type;
+	char buf[PATH_MAX], nil = '\0';
+	struct dso *dso;
+	struct debuginfo *dinfo = NULL;
+	struct build_id bid;
+
+	/* Try to open distro debuginfo files */
+	dso = dso__new(path);
+	if (!dso)
+		goto out;
+
+	/* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */
+	if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0)
+		dso__set_build_id(dso, &bid);
+
+	for (type = distro_dwarf_types;
+	     !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND;
+	     type++) {
+		if (dso__read_binary_type_filename(dso, *type, &nil,
+						   buf, PATH_MAX) < 0)
+			continue;
+		dinfo = __debuginfo__new(buf);
+	}
+	dso__put(dso);
+
+out:
+	/* if failed to open all distro debuginfo, open given binary */
+	return dinfo ? : __debuginfo__new(path);
+}
+
+void debuginfo__delete(struct debuginfo *dbg)
+{
+	if (dbg) {
+		if (dbg->dwfl)
+			dwfl_end(dbg->dwfl);
+		free(dbg);
+	}
+}
+
+/*
+ * Probe finder related functions
+ */
+
+static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
+{
+	struct probe_trace_arg_ref *ref;
+	ref = zalloc(sizeof(struct probe_trace_arg_ref));
+	if (ref != NULL)
+		ref->offset = offs;
+	return ref;
+}
+
+/*
+ * Convert a location into trace_arg.
+ * If tvar == NULL, this just checks variable can be converted.
+ * If fentry == true and vr_die is a parameter, do heuristic search
+ * for the location fuzzed by function entry mcount.
+ */
+static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
+				     Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
+				     unsigned int machine,
+				     struct probe_trace_arg *tvar)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Addr tmp = 0;
+	Dwarf_Op *op;
+	size_t nops;
+	unsigned int regn;
+	Dwarf_Word offs = 0;
+	bool ref = false;
+	const char *regs;
+	int ret, ret2 = 0;
+
+	if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
+		goto static_var;
+
+	/* Constant value */
+	if (dwarf_attr(vr_die, DW_AT_const_value, &attr) &&
+	    immediate_value_is_supported()) {
+		Dwarf_Sword snum;
+
+		if (!tvar)
+			return 0;
+
+		dwarf_formsdata(&attr, &snum);
+		ret = asprintf(&tvar->value, "\\%ld", (long)snum);
+
+		return ret < 0 ? -ENOMEM : 0;
+	}
+
+	/* TODO: handle more than 1 exprs */
+	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+		return -EINVAL;	/* Broken DIE ? */
+	if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
+		ret = dwarf_entrypc(sp_die, &tmp);
+		if (ret)
+			return -ENOENT;
+
+		if (probe_conf.show_location_range &&
+			(dwarf_tag(vr_die) == DW_TAG_variable)) {
+			ret2 = -ERANGE;
+		} else if (addr != tmp ||
+			dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
+			return -ENOENT;
+		}
+
+		ret = dwarf_highpc(sp_die, &tmp);
+		if (ret)
+			return -ENOENT;
+		/*
+		 * This is fuzzed by fentry mcount. We try to find the
+		 * parameter location at the earliest address.
+		 */
+		for (addr += 1; addr <= tmp; addr++) {
+			if (dwarf_getlocation_addr(&attr, addr, &op,
+						   &nops, 1) > 0)
+				goto found;
+		}
+		return -ENOENT;
+	}
+found:
+	if (nops == 0)
+		/* TODO: Support const_value */
+		return -ENOENT;
+
+	if (op->atom == DW_OP_addr) {
+static_var:
+		if (!tvar)
+			return ret2;
+		/* Static variables on memory (not stack), make @varname */
+		ret = strlen(dwarf_diename(vr_die));
+		tvar->value = zalloc(ret + 2);
+		if (tvar->value == NULL)
+			return -ENOMEM;
+		snprintf(tvar->value, ret + 2, "@%s", dwarf_diename(vr_die));
+		tvar->ref = alloc_trace_arg_ref((long)offs);
+		if (tvar->ref == NULL)
+			return -ENOMEM;
+		return ret2;
+	}
+
+	/* If this is based on frame buffer, set the offset */
+	if (op->atom == DW_OP_fbreg) {
+		if (fb_ops == NULL)
+			return -ENOTSUP;
+		ref = true;
+		offs = op->number;
+		op = &fb_ops[0];
+	}
+
+	if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
+		regn = op->atom - DW_OP_breg0;
+		offs += op->number;
+		ref = true;
+	} else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) {
+		regn = op->atom - DW_OP_reg0;
+	} else if (op->atom == DW_OP_bregx) {
+		regn = op->number;
+		offs += op->number2;
+		ref = true;
+	} else if (op->atom == DW_OP_regx) {
+		regn = op->number;
+	} else {
+		pr_debug("DW_OP %x is not supported.\n", op->atom);
+		return -ENOTSUP;
+	}
+
+	if (!tvar)
+		return ret2;
+
+	regs = get_dwarf_regstr(regn, machine);
+	if (!regs) {
+		/* This should be a bug in DWARF or this tool */
+		pr_warning("Mapping for the register number %u "
+			   "missing on this architecture.\n", regn);
+		return -ENOTSUP;
+	}
+
+	tvar->value = strdup(regs);
+	if (tvar->value == NULL)
+		return -ENOMEM;
+
+	if (ref) {
+		tvar->ref = alloc_trace_arg_ref((long)offs);
+		if (tvar->ref == NULL)
+			return -ENOMEM;
+	}
+	return ret2;
+}
+
+#define BYTES_TO_BITS(nb)	((nb) * BITS_PER_LONG / sizeof(long))
+
+static int convert_variable_type(Dwarf_Die *vr_die,
+				 struct probe_trace_arg *tvar,
+				 const char *cast, bool user_access)
+{
+	struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
+	Dwarf_Die type;
+	char buf[16];
+	char sbuf[STRERR_BUFSIZE];
+	int bsize, boffs, total;
+	int ret;
+	char prefix;
+
+	/* TODO: check all types */
+	if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "ustring") &&
+	    strcmp(cast, "x") != 0 &&
+	    strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
+		/* Non string type is OK */
+		/* and respect signedness/hexadecimal cast */
+		tvar->type = strdup(cast);
+		return (tvar->type == NULL) ? -ENOMEM : 0;
+	}
+
+	bsize = dwarf_bitsize(vr_die);
+	if (bsize > 0) {
+		/* This is a bitfield */
+		boffs = dwarf_bitoffset(vr_die);
+		total = dwarf_bytesize(vr_die);
+		if (boffs < 0 || total < 0)
+			return -ENOENT;
+		ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs,
+				BYTES_TO_BITS(total));
+		goto formatted;
+	}
+
+	if (die_get_real_type(vr_die, &type) == NULL) {
+		pr_warning("Failed to get a type information of %s.\n",
+			   dwarf_diename(vr_die));
+		return -ENOENT;
+	}
+
+	pr_debug("%s type is %s.\n",
+		 dwarf_diename(vr_die), dwarf_diename(&type));
+
+	if (cast && (!strcmp(cast, "string") || !strcmp(cast, "ustring"))) {
+		/* String type */
+		ret = dwarf_tag(&type);
+		if (ret != DW_TAG_pointer_type &&
+		    ret != DW_TAG_array_type) {
+			pr_warning("Failed to cast into string: "
+				   "%s(%s) is not a pointer nor array.\n",
+				   dwarf_diename(vr_die), dwarf_diename(&type));
+			return -EINVAL;
+		}
+		if (die_get_real_type(&type, &type) == NULL) {
+			pr_warning("Failed to get a type"
+				   " information.\n");
+			return -ENOENT;
+		}
+		if (ret == DW_TAG_pointer_type) {
+			while (*ref_ptr)
+				ref_ptr = &(*ref_ptr)->next;
+			/* Add new reference with offset +0 */
+			*ref_ptr = zalloc(sizeof(struct probe_trace_arg_ref));
+			if (*ref_ptr == NULL) {
+				pr_warning("Out of memory error\n");
+				return -ENOMEM;
+			}
+			(*ref_ptr)->user_access = user_access;
+		}
+		if (!die_compare_name(&type, "char") &&
+		    !die_compare_name(&type, "unsigned char")) {
+			pr_warning("Failed to cast into string: "
+				   "%s is not (unsigned) char *.\n",
+				   dwarf_diename(vr_die));
+			return -EINVAL;
+		}
+		tvar->type = strdup(cast);
+		return (tvar->type == NULL) ? -ENOMEM : 0;
+	}
+
+	if (cast && (strcmp(cast, "u") == 0))
+		prefix = 'u';
+	else if (cast && (strcmp(cast, "s") == 0))
+		prefix = 's';
+	else if (cast && (strcmp(cast, "x") == 0) &&
+		 probe_type_is_available(PROBE_TYPE_X))
+		prefix = 'x';
+	else
+		prefix = die_is_signed_type(&type) ? 's' :
+			 probe_type_is_available(PROBE_TYPE_X) ? 'x' : 'u';
+
+	ret = dwarf_bytesize(&type);
+	if (ret <= 0)
+		/* No size ... try to use default type */
+		return 0;
+	ret = BYTES_TO_BITS(ret);
+
+	/* Check the bitwidth */
+	if (ret > MAX_BASIC_TYPE_BITS) {
+		pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
+			dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
+		ret = MAX_BASIC_TYPE_BITS;
+	}
+	ret = snprintf(buf, 16, "%c%d", prefix, ret);
+
+formatted:
+	if (ret < 0 || ret >= 16) {
+		if (ret >= 16)
+			ret = -E2BIG;
+		pr_warning("Failed to convert variable type: %s\n",
+			   str_error_r(-ret, sbuf, sizeof(sbuf)));
+		return ret;
+	}
+	tvar->type = strdup(buf);
+	if (tvar->type == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
+				    struct perf_probe_arg_field *field,
+				    struct probe_trace_arg_ref **ref_ptr,
+				    Dwarf_Die *die_mem, bool user_access)
+{
+	struct probe_trace_arg_ref *ref = *ref_ptr;
+	Dwarf_Die type;
+	Dwarf_Word offs;
+	int ret, tag;
+
+	pr_debug("converting %s in %s\n", field->name, varname);
+	if (die_get_real_type(vr_die, &type) == NULL) {
+		pr_warning("Failed to get the type of %s.\n", varname);
+		return -ENOENT;
+	}
+	pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type),
+		  (unsigned)dwarf_dieoffset(&type));
+	tag = dwarf_tag(&type);
+
+	if (field->name[0] == '[' &&
+	    (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) {
+		/* Save original type for next field or type */
+		memcpy(die_mem, &type, sizeof(*die_mem));
+		/* Get the type of this array */
+		if (die_get_real_type(&type, &type) == NULL) {
+			pr_warning("Failed to get the type of %s.\n", varname);
+			return -ENOENT;
+		}
+		pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type),
+			 (unsigned)dwarf_dieoffset(&type));
+		if (tag == DW_TAG_pointer_type) {
+			ref = zalloc(sizeof(struct probe_trace_arg_ref));
+			if (ref == NULL)
+				return -ENOMEM;
+			if (*ref_ptr)
+				(*ref_ptr)->next = ref;
+			else
+				*ref_ptr = ref;
+		}
+		ref->offset += dwarf_bytesize(&type) * field->index;
+		ref->user_access = user_access;
+		goto next;
+	} else if (tag == DW_TAG_pointer_type) {
+		/* Check the pointer and dereference */
+		if (!field->ref) {
+			pr_err("Semantic error: %s must be referred by '->'\n",
+			       field->name);
+			return -EINVAL;
+		}
+		/* Get the type pointed by this pointer */
+		if (die_get_real_type(&type, &type) == NULL) {
+			pr_warning("Failed to get the type of %s.\n", varname);
+			return -ENOENT;
+		}
+		/* Verify it is a data structure  */
+		tag = dwarf_tag(&type);
+		if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+			pr_warning("%s is not a data structure nor a union.\n",
+				   varname);
+			return -EINVAL;
+		}
+
+		ref = zalloc(sizeof(struct probe_trace_arg_ref));
+		if (ref == NULL)
+			return -ENOMEM;
+		if (*ref_ptr)
+			(*ref_ptr)->next = ref;
+		else
+			*ref_ptr = ref;
+	} else {
+		/* Verify it is a data structure  */
+		if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+			pr_warning("%s is not a data structure nor a union.\n",
+				   varname);
+			return -EINVAL;
+		}
+		if (field->name[0] == '[') {
+			pr_err("Semantic error: %s is not a pointer"
+			       " nor array.\n", varname);
+			return -EINVAL;
+		}
+		/* While processing unnamed field, we don't care about this */
+		if (field->ref && dwarf_diename(vr_die)) {
+			pr_err("Semantic error: %s must be referred by '.'\n",
+			       field->name);
+			return -EINVAL;
+		}
+		if (!ref) {
+			pr_warning("Structure on a register is not "
+				   "supported yet.\n");
+			return -ENOTSUP;
+		}
+	}
+
+	if (die_find_member(&type, field->name, die_mem) == NULL) {
+		pr_warning("%s(type:%s) has no member %s.\n", varname,
+			   dwarf_diename(&type), field->name);
+		return -EINVAL;
+	}
+
+	/* Get the offset of the field */
+	if (tag == DW_TAG_union_type) {
+		offs = 0;
+	} else {
+		ret = die_get_data_member_location(die_mem, &offs);
+		if (ret < 0) {
+			pr_warning("Failed to get the offset of %s.\n",
+				   field->name);
+			return ret;
+		}
+	}
+	ref->offset += (long)offs;
+	ref->user_access = user_access;
+
+	/* If this member is unnamed, we need to reuse this field */
+	if (!dwarf_diename(die_mem))
+		return convert_variable_fields(die_mem, varname, field,
+						&ref, die_mem, user_access);
+
+next:
+	/* Converting next field */
+	if (field->next)
+		return convert_variable_fields(die_mem, field->name,
+				field->next, &ref, die_mem, user_access);
+	else
+		return 0;
+}
+
+static void print_var_not_found(const char *varname)
+{
+	pr_err("Failed to find the location of the '%s' variable at this address.\n"
+	       " Perhaps it has been optimized out.\n"
+	       " Use -V with the --range option to show '%s' location range.\n",
+		varname, varname);
+}
+
+/* Show a variables in kprobe event format */
+static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
+{
+	Dwarf_Die die_mem;
+	int ret;
+
+	pr_debug("Converting variable %s into trace event.\n",
+		 dwarf_diename(vr_die));
+
+	ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
+					&pf->sp_die, pf->machine, pf->tvar);
+	if (ret == -ENOENT && pf->skip_empty_arg)
+		/* This can be found in other place. skip it */
+		return 0;
+	if (ret == -ENOENT || ret == -EINVAL) {
+		print_var_not_found(pf->pvar->var);
+	} else if (ret == -ENOTSUP)
+		pr_err("Sorry, we don't support this variable location yet.\n");
+	else if (ret == 0 && pf->pvar->field) {
+		ret = convert_variable_fields(vr_die, pf->pvar->var,
+					      pf->pvar->field, &pf->tvar->ref,
+					      &die_mem, pf->pvar->user_access);
+		vr_die = &die_mem;
+	}
+	if (ret == 0)
+		ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type,
+					    pf->pvar->user_access);
+	/* *expr will be cached in libdw. Don't free it. */
+	return ret;
+}
+
+/* Find a variable in a scope DIE */
+static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	Dwarf_Die vr_die;
+	char *buf, *ptr;
+	int ret = 0;
+
+	/* Copy raw parameters */
+	if (!is_c_varname(pf->pvar->var))
+		return copy_to_probe_trace_arg(pf->tvar, pf->pvar);
+
+	if (pf->pvar->name)
+		pf->tvar->name = strdup(pf->pvar->name);
+	else {
+		buf = synthesize_perf_probe_arg(pf->pvar);
+		if (!buf)
+			return -ENOMEM;
+		ptr = strchr(buf, ':');	/* Change type separator to _ */
+		if (ptr)
+			*ptr = '_';
+		pf->tvar->name = buf;
+	}
+	if (pf->tvar->name == NULL)
+		return -ENOMEM;
+
+	pr_debug("Searching '%s' variable in context.\n", pf->pvar->var);
+	/* Search child die for local variables and parameters. */
+	if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
+		/* Search again in global variables */
+		if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+						0, &vr_die)) {
+			if (pf->skip_empty_arg)
+				return 0;
+			pr_warning("Failed to find '%s' in this function.\n",
+				   pf->pvar->var);
+			ret = -ENOENT;
+		}
+	}
+	if (ret >= 0)
+		ret = convert_variable(&vr_die, pf);
+
+	return ret;
+}
+
+/* Convert subprogram DIE to trace point */
+static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
+				  Dwarf_Addr paddr, bool retprobe,
+				  const char *function,
+				  struct probe_trace_point *tp)
+{
+	Dwarf_Addr eaddr;
+	GElf_Sym sym;
+	const char *symbol;
+
+	/* Verify the address is correct */
+	if (!dwarf_haspc(sp_die, paddr)) {
+		pr_warning("Specified offset is out of %s\n",
+			   dwarf_diename(sp_die));
+		return -EINVAL;
+	}
+
+	if (dwarf_entrypc(sp_die, &eaddr) == 0) {
+		/* If the DIE has entrypc, use it. */
+		symbol = dwarf_diename(sp_die);
+	} else {
+		/* Try to get actual symbol name and address from symtab */
+		symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+		eaddr = sym.st_value;
+	}
+	if (!symbol) {
+		pr_warning("Failed to find symbol at 0x%lx\n",
+			   (unsigned long)paddr);
+		return -ENOENT;
+	}
+
+	tp->offset = (unsigned long)(paddr - eaddr);
+	tp->address = paddr;
+	tp->symbol = strdup(symbol);
+	if (!tp->symbol)
+		return -ENOMEM;
+
+	/* Return probe must be on the head of a subprogram */
+	if (retprobe) {
+		if (eaddr != paddr) {
+			pr_warning("Failed to find \"%s%%return\",\n"
+				   " because %s is an inlined function and"
+				   " has no return point.\n", function,
+				   function);
+			return -EINVAL;
+		}
+		tp->retprobe = true;
+	}
+
+	return 0;
+}
+
+/* Call probe_finder callback with scope DIE */
+static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	Dwarf_Attribute fb_attr;
+	Dwarf_Frame *frame = NULL;
+	size_t nops;
+	int ret;
+
+	if (!sc_die) {
+		pr_err("Caller must pass a scope DIE. Program error.\n");
+		return -EINVAL;
+	}
+
+	/* If not a real subprogram, find a real one */
+	if (!die_is_func_def(sc_die)) {
+		if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+			if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+				pr_warning("Ignoring tail call from %s\n",
+						dwarf_diename(&pf->sp_die));
+				return 0;
+			} else {
+				pr_warning("Failed to find probe point in any "
+					   "functions.\n");
+				return -ENOENT;
+			}
+		}
+	} else
+		memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
+
+	/* Get the frame base attribute/ops from subprogram */
+	dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr);
+	ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
+	if (ret <= 0 || nops == 0) {
+		pf->fb_ops = NULL;
+#if _ELFUTILS_PREREQ(0, 142)
+	} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
+		   (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+		if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
+		     (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) ||
+		    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
+			pr_warning("Failed to get call frame on 0x%jx\n",
+				   (uintmax_t)pf->addr);
+			free(frame);
+			return -ENOENT;
+		}
+#endif
+	}
+
+	/* Call finder's callback handler */
+	ret = pf->callback(sc_die, pf);
+
+	/* Since *pf->fb_ops can be a part of frame. we should free it here. */
+	free(frame);
+	pf->fb_ops = NULL;
+
+	return ret;
+}
+
+struct find_scope_param {
+	const char *function;
+	const char *file;
+	int line;
+	int diff;
+	Dwarf_Die *die_mem;
+	bool found;
+};
+
+static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct find_scope_param *fsp = data;
+	const char *file;
+	int lno;
+
+	/* Skip if declared file name does not match */
+	if (fsp->file) {
+		file = dwarf_decl_file(fn_die);
+		if (!file || strcmp(fsp->file, file) != 0)
+			return 0;
+	}
+	/* If the function name is given, that's what user expects */
+	if (fsp->function) {
+		if (die_match_name(fn_die, fsp->function)) {
+			memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+			fsp->found = true;
+			return 1;
+		}
+	} else {
+		/* With the line number, find the nearest declared DIE */
+		dwarf_decl_line(fn_die, &lno);
+		if (lno < fsp->line && fsp->diff > fsp->line - lno) {
+			/* Keep a candidate and continue */
+			fsp->diff = fsp->line - lno;
+			memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+			fsp->found = true;
+		}
+	}
+	return 0;
+}
+
+/* Return innermost DIE */
+static int find_inner_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct find_scope_param *fsp = data;
+
+	memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+	fsp->found = true;
+	return 1;
+}
+
+/* Find an appropriate scope fits to given conditions */
+static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem)
+{
+	struct find_scope_param fsp = {
+		.function = pf->pev->point.function,
+		.file = pf->fname,
+		.line = pf->lno,
+		.diff = INT_MAX,
+		.die_mem = die_mem,
+		.found = false,
+	};
+	int ret;
+
+	ret = cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb,
+				   &fsp);
+	if (!ret && !fsp.found)
+		cu_walk_functions_at(&pf->cu_die, pf->addr,
+				     find_inner_scope_cb, &fsp);
+
+	return fsp.found ? die_mem : NULL;
+}
+
+static int verify_representive_line(struct probe_finder *pf, const char *fname,
+				int lineno, Dwarf_Addr addr)
+{
+	const char *__fname, *__func = NULL;
+	Dwarf_Die die_mem;
+	int __lineno;
+
+	/* Verify line number and address by reverse search */
+	if (cu_find_lineinfo(&pf->cu_die, addr, &__fname, &__lineno) < 0)
+		return 0;
+
+	pr_debug2("Reversed line: %s:%d\n", __fname, __lineno);
+	if (strcmp(fname, __fname) || lineno == __lineno)
+		return 0;
+
+	pr_warning("This line is sharing the address with other lines.\n");
+
+	if (pf->pev->point.function) {
+		/* Find best match function name and lines */
+		pf->addr = addr;
+		if (find_best_scope(pf, &die_mem)
+		    && die_match_name(&die_mem, pf->pev->point.function)
+		    && dwarf_decl_line(&die_mem, &lineno) == 0) {
+			__func = dwarf_diename(&die_mem);
+			__lineno -= lineno;
+		}
+	}
+	pr_warning("Please try to probe at %s:%d instead.\n",
+		   __func ? : __fname, __lineno);
+
+	return -ENOENT;
+}
+
+static int probe_point_line_walker(const char *fname, int lineno,
+				   Dwarf_Addr addr, void *data)
+{
+	struct probe_finder *pf = data;
+	Dwarf_Die *sc_die, die_mem;
+	int ret;
+
+	if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
+		return 0;
+
+	if (verify_representive_line(pf, fname, lineno, addr))
+		return -ENOENT;
+
+	pf->addr = addr;
+	sc_die = find_best_scope(pf, &die_mem);
+	if (!sc_die) {
+		pr_warning("Failed to find scope of probe point.\n");
+		return -ENOENT;
+	}
+
+	ret = call_probe_finder(sc_die, pf);
+
+	/* Continue if no error, because the line will be in inline function */
+	return ret < 0 ? ret : 0;
+}
+
+/* Find probe point from its line number */
+static int find_probe_point_by_line(struct probe_finder *pf)
+{
+	return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
+}
+
+/* Find lines which match lazy pattern */
+static int find_lazy_match_lines(struct intlist *list,
+				 const char *fname, const char *pat)
+{
+	FILE *fp;
+	char *line = NULL;
+	size_t line_len;
+	ssize_t len;
+	int count = 0, linenum = 1;
+	char sbuf[STRERR_BUFSIZE];
+
+	fp = fopen(fname, "r");
+	if (!fp) {
+		pr_warning("Failed to open %s: %s\n", fname,
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -errno;
+	}
+
+	while ((len = getline(&line, &line_len, fp)) > 0) {
+
+		if (line[len - 1] == '\n')
+			line[len - 1] = '\0';
+
+		if (strlazymatch(line, pat)) {
+			intlist__add(list, linenum);
+			count++;
+		}
+		linenum++;
+	}
+
+	if (ferror(fp))
+		count = -errno;
+	free(line);
+	fclose(fp);
+
+	if (count == 0)
+		pr_debug("No matched lines found in %s.\n", fname);
+	return count;
+}
+
+static int probe_point_lazy_walker(const char *fname, int lineno,
+				   Dwarf_Addr addr, void *data)
+{
+	struct probe_finder *pf = data;
+	Dwarf_Die *sc_die, die_mem;
+	int ret;
+
+	if (!intlist__has_entry(pf->lcache, lineno) ||
+	    strtailcmp(fname, pf->fname) != 0)
+		return 0;
+
+	pr_debug("Probe line found: line:%d addr:0x%llx\n",
+		 lineno, (unsigned long long)addr);
+	pf->addr = addr;
+	pf->lno = lineno;
+	sc_die = find_best_scope(pf, &die_mem);
+	if (!sc_die) {
+		pr_warning("Failed to find scope of probe point.\n");
+		return -ENOENT;
+	}
+
+	ret = call_probe_finder(sc_die, pf);
+
+	/*
+	 * Continue if no error, because the lazy pattern will match
+	 * to other lines
+	 */
+	return ret < 0 ? ret : 0;
+}
+
+/* Find probe points from lazy pattern  */
+static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	struct build_id bid;
+	char sbuild_id[SBUILD_ID_SIZE] = "";
+	int ret = 0;
+	char *fpath;
+
+	if (intlist__empty(pf->lcache)) {
+		const char *comp_dir;
+
+		comp_dir = cu_get_comp_dir(&pf->cu_die);
+		if (pf->dbg->build_id) {
+			build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE);
+			build_id__sprintf(&bid, sbuild_id);
+		}
+		ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath);
+		if (ret < 0) {
+			pr_warning("Failed to find source file path.\n");
+			return ret;
+		}
+
+		/* Matching lazy line pattern */
+		ret = find_lazy_match_lines(pf->lcache, fpath,
+					    pf->pev->point.lazy_line);
+		free(fpath);
+		if (ret <= 0)
+			return ret;
+	}
+
+	return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
+}
+
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	struct perf_probe_point *pp = &pf->pev->point;
+
+	/* Not uprobe? */
+	if (!pf->pev->uprobes)
+		return;
+
+	/* Compiled with optimization? */
+	if (die_is_optimized_target(&pf->cu_die))
+		return;
+
+	/* Don't know entrypc? */
+	if (!pf->addr)
+		return;
+
+	/* Only FUNC and FUNC@SRC are eligible. */
+	if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+	    pp->offset || pp->abs_address)
+		return;
+
+	/* Not interested in func parameter? */
+	if (!perf_probe_with_var(pf->pev))
+		return;
+
+	pr_info("Target program is compiled without optimization. Skipping prologue.\n"
+		"Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
+		pf->addr);
+
+	die_skip_prologue(sp_die, &pf->cu_die, &pf->addr);
+}
+
+static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
+{
+	struct probe_finder *pf = data;
+	struct perf_probe_point *pp = &pf->pev->point;
+	Dwarf_Addr addr;
+	int ret;
+
+	if (pp->lazy_line)
+		ret = find_probe_point_lazy(in_die, pf);
+	else {
+		/* Get probe address */
+		if (die_entrypc(in_die, &addr) != 0) {
+			pr_warning("Failed to get entry address of %s.\n",
+				   dwarf_diename(in_die));
+			return -ENOENT;
+		}
+		if (addr == 0) {
+			pr_debug("%s has no valid entry address. skipped.\n",
+				 dwarf_diename(in_die));
+			return -ENOENT;
+		}
+		pf->addr = addr;
+		pf->addr += pp->offset;
+		pr_debug("found inline addr: 0x%jx\n",
+			 (uintmax_t)pf->addr);
+
+		ret = call_probe_finder(in_die, pf);
+	}
+
+	return ret;
+}
+
+/* Callback parameter with return value for libdw */
+struct dwarf_callback_param {
+	void *data;
+	int retval;
+};
+
+/* Search function from function name */
+static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
+{
+	struct dwarf_callback_param *param = data;
+	struct probe_finder *pf = param->data;
+	struct perf_probe_point *pp = &pf->pev->point;
+
+	/* Check tag and diename */
+	if (!die_is_func_def(sp_die) ||
+	    !die_match_name(sp_die, pp->function))
+		return DWARF_CB_OK;
+
+	/* Check declared file */
+	if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
+		return DWARF_CB_OK;
+
+	pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+		 (unsigned long)dwarf_dieoffset(sp_die));
+	pf->fname = dwarf_decl_file(sp_die);
+	if (pp->line) { /* Function relative line */
+		dwarf_decl_line(sp_die, &pf->lno);
+		pf->lno += pp->line;
+		param->retval = find_probe_point_by_line(pf);
+	} else if (die_is_func_instance(sp_die)) {
+		/* Instances always have the entry address */
+		die_entrypc(sp_die, &pf->addr);
+		/* But in some case the entry address is 0 */
+		if (pf->addr == 0) {
+			pr_debug("%s has no entry PC. Skipped\n",
+				 dwarf_diename(sp_die));
+			param->retval = 0;
+		/* Real function */
+		} else if (pp->lazy_line)
+			param->retval = find_probe_point_lazy(sp_die, pf);
+		else {
+			skip_prologue(sp_die, pf);
+			pf->addr += pp->offset;
+			/* TODO: Check the address in this function */
+			param->retval = call_probe_finder(sp_die, pf);
+		}
+	} else if (!probe_conf.no_inlines) {
+		/* Inlined function: search instances */
+		param->retval = die_walk_instances(sp_die,
+					probe_point_inline_cb, (void *)pf);
+		/* This could be a non-existed inline definition */
+		if (param->retval == -ENOENT)
+			param->retval = 0;
+	}
+
+	/* We need to find other candidates */
+	if (strisglob(pp->function) && param->retval >= 0) {
+		param->retval = 0;	/* We have to clear the result */
+		return DWARF_CB_OK;
+	}
+
+	return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
+}
+
+static int find_probe_point_by_func(struct probe_finder *pf)
+{
+	struct dwarf_callback_param _param = {.data = (void *)pf,
+					      .retval = 0};
+	dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, &_param, 0);
+	return _param.retval;
+}
+
+struct pubname_callback_param {
+	char *function;
+	char *file;
+	Dwarf_Die *cu_die;
+	Dwarf_Die *sp_die;
+	int found;
+};
+
+static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
+{
+	struct pubname_callback_param *param = data;
+
+	if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) {
+		if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
+			return DWARF_CB_OK;
+
+		if (die_match_name(param->sp_die, param->function)) {
+			if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
+				return DWARF_CB_OK;
+
+			if (param->file &&
+			    strtailcmp(param->file, dwarf_decl_file(param->sp_die)))
+				return DWARF_CB_OK;
+
+			param->found = 1;
+			return DWARF_CB_ABORT;
+		}
+	}
+
+	return DWARF_CB_OK;
+}
+
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
+				  struct probe_finder *pf)
+{
+	struct perf_probe_point *pp = &pf->pev->point;
+	Dwarf_Off off, noff;
+	size_t cuhl;
+	Dwarf_Die *diep;
+	int ret = 0;
+
+	off = 0;
+	pf->lcache = intlist__new(NULL);
+	if (!pf->lcache)
+		return -ENOMEM;
+
+	/* Fastpath: lookup by function name from .debug_pubnames section */
+	if (pp->function && !strisglob(pp->function)) {
+		struct pubname_callback_param pubname_param = {
+			.function = pp->function,
+			.file	  = pp->file,
+			.cu_die	  = &pf->cu_die,
+			.sp_die	  = &pf->sp_die,
+			.found	  = 0,
+		};
+		struct dwarf_callback_param probe_param = {
+			.data = pf,
+		};
+
+		dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+				  &pubname_param, 0);
+		if (pubname_param.found) {
+			ret = probe_point_search_cb(&pf->sp_die, &probe_param);
+			if (ret)
+				goto found;
+		}
+	}
+
+	/* Loop on CUs (Compilation Unit) */
+	while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
+		/* Get the DIE(Debugging Information Entry) of this CU */
+		diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die);
+		if (!diep) {
+			off = noff;
+			continue;
+		}
+
+		/* Check if target file is included. */
+		if (pp->file)
+			pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
+		else
+			pf->fname = NULL;
+
+		if (!pp->file || pf->fname) {
+			if (pp->function)
+				ret = find_probe_point_by_func(pf);
+			else if (pp->lazy_line)
+				ret = find_probe_point_lazy(&pf->cu_die, pf);
+			else {
+				pf->lno = pp->line;
+				ret = find_probe_point_by_line(pf);
+			}
+			if (ret < 0)
+				break;
+		}
+		off = noff;
+	}
+
+found:
+	intlist__delete(pf->lcache);
+	pf->lcache = NULL;
+
+	return ret;
+}
+
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+				  struct probe_finder *pf)
+{
+	int ret = 0;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+
+	if (pf->cfi_eh || pf->cfi_dbg)
+		return debuginfo__find_probe_location(dbg, pf);
+
+	/* Get the call frame information from this dwarf */
+	elf = dwarf_getelf(dbg->dbg);
+	if (elf == NULL)
+		return -EINVAL;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		return -EINVAL;
+
+	pf->machine = ehdr.e_machine;
+
+#if _ELFUTILS_PREREQ(0, 142)
+	do {
+		GElf_Shdr shdr;
+
+		if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+		    shdr.sh_type == SHT_PROGBITS)
+			pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+		pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+	} while (0);
+#endif
+
+	ret = debuginfo__find_probe_location(dbg, pf);
+	return ret;
+}
+
+struct local_vars_finder {
+	struct probe_finder *pf;
+	struct perf_probe_arg *args;
+	bool vars;
+	int max_args;
+	int nargs;
+	int ret;
+};
+
+/* Collect available variables in this scope */
+static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+	struct local_vars_finder *vf = data;
+	struct probe_finder *pf = vf->pf;
+	int tag;
+
+	tag = dwarf_tag(die_mem);
+	if (tag == DW_TAG_formal_parameter ||
+	    (tag == DW_TAG_variable && vf->vars)) {
+		if (convert_variable_location(die_mem, vf->pf->addr,
+					      vf->pf->fb_ops, &pf->sp_die,
+					      pf->machine, NULL) == 0) {
+			vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
+			if (vf->args[vf->nargs].var == NULL) {
+				vf->ret = -ENOMEM;
+				return DIE_FIND_CB_END;
+			}
+			pr_debug(" %s", vf->args[vf->nargs].var);
+			vf->nargs++;
+		}
+	}
+
+	if (dwarf_haspc(die_mem, vf->pf->addr))
+		return DIE_FIND_CB_CONTINUE;
+	else
+		return DIE_FIND_CB_SIBLING;
+}
+
+static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
+			     struct perf_probe_arg *args)
+{
+	Dwarf_Die die_mem;
+	int i;
+	int n = 0;
+	struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
+				.max_args = MAX_PROBE_ARGS, .ret = 0};
+
+	for (i = 0; i < pf->pev->nargs; i++) {
+		/* var never be NULL */
+		if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
+			vf.vars = true;
+		else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
+			/* Copy normal argument */
+			args[n] = pf->pev->args[i];
+			n++;
+			continue;
+		}
+		pr_debug("Expanding %s into:", pf->pev->args[i].var);
+		vf.nargs = n;
+		/* Special local variables */
+		die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+			       &die_mem);
+		pr_debug(" (%d)\n", vf.nargs - n);
+		if (vf.ret < 0)
+			return vf.ret;
+		n = vf.nargs;
+	}
+	return n;
+}
+
+static bool trace_event_finder_overlap(struct trace_event_finder *tf)
+{
+	int i;
+
+	for (i = 0; i < tf->ntevs; i++) {
+		if (tf->pf.addr == tf->tevs[i].point.address)
+			return true;
+	}
+	return false;
+}
+
+/* Add a found probe point into trace event list */
+static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	struct trace_event_finder *tf =
+			container_of(pf, struct trace_event_finder, pf);
+	struct perf_probe_point *pp = &pf->pev->point;
+	struct probe_trace_event *tev;
+	struct perf_probe_arg *args = NULL;
+	int ret, i;
+
+	/*
+	 * For some reason (e.g. different column assigned to same address)
+	 * This callback can be called with the address which already passed.
+	 * Ignore it first.
+	 */
+	if (trace_event_finder_overlap(tf))
+		return 0;
+
+	/* Check number of tevs */
+	if (tf->ntevs == tf->max_tevs) {
+		pr_warning("Too many( > %d) probe point found.\n",
+			   tf->max_tevs);
+		return -ERANGE;
+	}
+	tev = &tf->tevs[tf->ntevs++];
+
+	/* Trace point should be converted from subprogram DIE */
+	ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr,
+				     pp->retprobe, pp->function, &tev->point);
+	if (ret < 0)
+		goto end;
+
+	tev->point.realname = strdup(dwarf_diename(sc_die));
+	if (!tev->point.realname) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
+		 tev->point.offset);
+
+	/* Expand special probe argument if exist */
+	args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS);
+	if (args == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	ret = expand_probe_args(sc_die, pf, args);
+	if (ret < 0)
+		goto end;
+
+	tev->nargs = ret;
+	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+	if (tev->args == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	/* Find each argument */
+	for (i = 0; i < tev->nargs; i++) {
+		pf->pvar = &args[i];
+		pf->tvar = &tev->args[i];
+		/* Variable should be found from scope DIE */
+		ret = find_variable(sc_die, pf);
+		if (ret != 0)
+			break;
+	}
+
+end:
+	if (ret) {
+		clear_probe_trace_event(tev);
+		tf->ntevs--;
+	}
+	free(args);
+	return ret;
+}
+
+static int fill_empty_trace_arg(struct perf_probe_event *pev,
+				struct probe_trace_event *tevs, int ntevs)
+{
+	char **valp;
+	char *type;
+	int i, j, ret;
+
+	if (!ntevs)
+		return -ENOENT;
+
+	for (i = 0; i < pev->nargs; i++) {
+		type = NULL;
+		for (j = 0; j < ntevs; j++) {
+			if (tevs[j].args[i].value) {
+				type = tevs[j].args[i].type;
+				break;
+			}
+		}
+		if (j == ntevs) {
+			print_var_not_found(pev->args[i].var);
+			return -ENOENT;
+		}
+		for (j = 0; j < ntevs; j++) {
+			valp = &tevs[j].args[i].value;
+			if (*valp)
+				continue;
+
+			ret = asprintf(valp, "\\%lx", probe_conf.magic_num);
+			if (ret < 0)
+				return -ENOMEM;
+			/* Note that type can be NULL */
+			if (type) {
+				tevs[j].args[i].type = strdup(type);
+				if (!tevs[j].args[i].type)
+					return -ENOMEM;
+			}
+		}
+	}
+	return 0;
+}
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+				 struct perf_probe_event *pev,
+				 struct probe_trace_event **tevs)
+{
+	struct trace_event_finder tf = {
+			.pf = {.pev = pev, .dbg = dbg, .callback = add_probe_trace_event},
+			.max_tevs = probe_conf.max_probes, .mod = dbg->mod};
+	int ret, i;
+
+	/* Allocate result tevs array */
+	*tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
+	if (*tevs == NULL)
+		return -ENOMEM;
+
+	tf.tevs = *tevs;
+	tf.ntevs = 0;
+
+	if (pev->nargs != 0 && immediate_value_is_supported())
+		tf.pf.skip_empty_arg = true;
+
+	ret = debuginfo__find_probes(dbg, &tf.pf);
+	if (ret >= 0 && tf.pf.skip_empty_arg)
+		ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs);
+
+	if (ret < 0 || tf.ntevs == 0) {
+		for (i = 0; i < tf.ntevs; i++)
+			clear_probe_trace_event(&tf.tevs[i]);
+		zfree(tevs);
+		return ret;
+	}
+
+	return (ret < 0) ? ret : tf.ntevs;
+}
+
+/* Collect available variables in this scope */
+static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+	struct available_var_finder *af = data;
+	struct variable_list *vl;
+	struct strbuf buf = STRBUF_INIT;
+	int tag, ret;
+
+	vl = &af->vls[af->nvls - 1];
+
+	tag = dwarf_tag(die_mem);
+	if (tag == DW_TAG_formal_parameter ||
+	    tag == DW_TAG_variable) {
+		ret = convert_variable_location(die_mem, af->pf.addr,
+						af->pf.fb_ops, &af->pf.sp_die,
+						af->pf.machine, NULL);
+		if (ret == 0 || ret == -ERANGE) {
+			int ret2;
+			bool externs = !af->child;
+
+			if (strbuf_init(&buf, 64) < 0)
+				goto error;
+
+			if (probe_conf.show_location_range) {
+				if (!externs)
+					ret2 = strbuf_add(&buf,
+						ret ? "[INV]\t" : "[VAL]\t", 6);
+				else
+					ret2 = strbuf_add(&buf, "[EXT]\t", 6);
+				if (ret2)
+					goto error;
+			}
+
+			ret2 = die_get_varname(die_mem, &buf);
+
+			if (!ret2 && probe_conf.show_location_range &&
+				!externs) {
+				if (strbuf_addch(&buf, '\t') < 0)
+					goto error;
+				ret2 = die_get_var_range(&af->pf.sp_die,
+							die_mem, &buf);
+			}
+
+			pr_debug("Add new var: %s\n", buf.buf);
+			if (ret2 == 0) {
+				strlist__add(vl->vars,
+					strbuf_detach(&buf, NULL));
+			}
+			strbuf_release(&buf);
+		}
+	}
+
+	if (af->child && dwarf_haspc(die_mem, af->pf.addr))
+		return DIE_FIND_CB_CONTINUE;
+	else
+		return DIE_FIND_CB_SIBLING;
+error:
+	strbuf_release(&buf);
+	pr_debug("Error in strbuf\n");
+	return DIE_FIND_CB_END;
+}
+
+static bool available_var_finder_overlap(struct available_var_finder *af)
+{
+	int i;
+
+	for (i = 0; i < af->nvls; i++) {
+		if (af->pf.addr == af->vls[i].point.address)
+			return true;
+	}
+	return false;
+
+}
+
+/* Add a found vars into available variables list */
+static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	struct available_var_finder *af =
+			container_of(pf, struct available_var_finder, pf);
+	struct perf_probe_point *pp = &pf->pev->point;
+	struct variable_list *vl;
+	Dwarf_Die die_mem;
+	int ret;
+
+	/*
+	 * For some reason (e.g. different column assigned to same address),
+	 * this callback can be called with the address which already passed.
+	 * Ignore it first.
+	 */
+	if (available_var_finder_overlap(af))
+		return 0;
+
+	/* Check number of tevs */
+	if (af->nvls == af->max_vls) {
+		pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
+		return -ERANGE;
+	}
+	vl = &af->vls[af->nvls++];
+
+	/* Trace point should be converted from subprogram DIE */
+	ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr,
+				     pp->retprobe, pp->function, &vl->point);
+	if (ret < 0)
+		return ret;
+
+	pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
+		 vl->point.offset);
+
+	/* Find local variables */
+	vl->vars = strlist__new(NULL, NULL);
+	if (vl->vars == NULL)
+		return -ENOMEM;
+	af->child = true;
+	die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
+
+	/* Find external variables */
+	if (!probe_conf.show_ext_vars)
+		goto out;
+	/* Don't need to search child DIE for external vars. */
+	af->child = false;
+	die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
+
+out:
+	if (strlist__empty(vl->vars)) {
+		strlist__delete(vl->vars);
+		vl->vars = NULL;
+	}
+
+	return ret;
+}
+
+/*
+ * Find available variables at given probe point
+ * Return the number of found probe points. Return 0 if there is no
+ * matched probe point. Return <0 if an error occurs.
+ */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+				      struct perf_probe_event *pev,
+				      struct variable_list **vls)
+{
+	struct available_var_finder af = {
+			.pf = {.pev = pev, .dbg = dbg, .callback = add_available_vars},
+			.mod = dbg->mod,
+			.max_vls = probe_conf.max_probes};
+	int ret;
+
+	/* Allocate result vls array */
+	*vls = zalloc(sizeof(struct variable_list) * af.max_vls);
+	if (*vls == NULL)
+		return -ENOMEM;
+
+	af.vls = *vls;
+	af.nvls = 0;
+
+	ret = debuginfo__find_probes(dbg, &af.pf);
+	if (ret < 0) {
+		/* Free vlist for error */
+		while (af.nvls--) {
+			zfree(&af.vls[af.nvls].point.symbol);
+			strlist__delete(af.vls[af.nvls].vars);
+		}
+		zfree(vls);
+		return ret;
+	}
+
+	return (ret < 0) ? ret : af.nvls;
+}
+
+/* For the kernel module, we need a special code to get a DIE */
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+				bool adjust_offset)
+{
+	int n, i;
+	Elf32_Word shndx;
+	Elf_Scn *scn;
+	Elf *elf;
+	GElf_Shdr mem, *shdr;
+	const char *p;
+
+	elf = dwfl_module_getelf(dbg->mod, &dbg->bias);
+	if (!elf)
+		return -EINVAL;
+
+	/* Get the number of relocations */
+	n = dwfl_module_relocations(dbg->mod);
+	if (n < 0)
+		return -ENOENT;
+	/* Search the relocation related .text section */
+	for (i = 0; i < n; i++) {
+		p = dwfl_module_relocation_info(dbg->mod, i, &shndx);
+		if (strcmp(p, ".text") == 0) {
+			/* OK, get the section header */
+			scn = elf_getscn(elf, shndx);
+			if (!scn)
+				return -ENOENT;
+			shdr = gelf_getshdr(scn, &mem);
+			if (!shdr)
+				return -ENOENT;
+			*offs = shdr->sh_addr;
+			if (adjust_offset)
+				*offs -= shdr->sh_offset;
+		}
+	}
+	return 0;
+}
+
+/* Reverse search */
+int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
+				struct perf_probe_point *ppt)
+{
+	Dwarf_Die cudie, spdie, indie;
+	Dwarf_Addr _addr = 0, baseaddr = 0;
+	const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
+	int baseline = 0, lineno = 0, ret = 0;
+
+	/* We always need to relocate the address for aranges */
+	if (debuginfo__get_text_offset(dbg, &baseaddr, false) == 0)
+		addr += baseaddr;
+	/* Find cu die */
+	if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) {
+		pr_warning("Failed to find debug information for address %" PRIx64 "\n",
+			   addr);
+		ret = -EINVAL;
+		goto end;
+	}
+
+	/* Find a corresponding line (filename and lineno) */
+	cu_find_lineinfo(&cudie, (Dwarf_Addr)addr, &fname, &lineno);
+	/* Don't care whether it failed or not */
+
+	/* Find a corresponding function (name, baseline and baseaddr) */
+	if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
+		/* Get function entry information */
+		func = basefunc = dwarf_diename(&spdie);
+		if (!func ||
+		    die_entrypc(&spdie, &baseaddr) != 0 ||
+		    dwarf_decl_line(&spdie, &baseline) != 0) {
+			lineno = 0;
+			goto post;
+		}
+
+		fname = dwarf_decl_file(&spdie);
+		if (addr == baseaddr) {
+			/* Function entry - Relative line number is 0 */
+			lineno = baseline;
+			goto post;
+		}
+
+		/* Track down the inline functions step by step */
+		while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr,
+						&indie)) {
+			/* There is an inline function */
+			if (die_entrypc(&indie, &_addr) == 0 &&
+			    _addr == addr) {
+				/*
+				 * addr is at an inline function entry.
+				 * In this case, lineno should be the call-site
+				 * line number. (overwrite lineinfo)
+				 */
+				lineno = die_get_call_lineno(&indie);
+				fname = die_get_call_file(&indie);
+				break;
+			} else {
+				/*
+				 * addr is in an inline function body.
+				 * Since lineno points one of the lines
+				 * of the inline function, baseline should
+				 * be the entry line of the inline function.
+				 */
+				tmp = dwarf_diename(&indie);
+				if (!tmp ||
+				    dwarf_decl_line(&indie, &baseline) != 0)
+					break;
+				func = tmp;
+				spdie = indie;
+			}
+		}
+		/* Verify the lineno and baseline are in a same file */
+		tmp = dwarf_decl_file(&spdie);
+		if (!tmp || strcmp(tmp, fname) != 0)
+			lineno = 0;
+	}
+
+post:
+	/* Make a relative line number or an offset */
+	if (lineno)
+		ppt->line = lineno - baseline;
+	else if (basefunc) {
+		ppt->offset = addr - baseaddr;
+		func = basefunc;
+	}
+
+	/* Duplicate strings */
+	if (func) {
+		ppt->function = strdup(func);
+		if (ppt->function == NULL) {
+			ret = -ENOMEM;
+			goto end;
+		}
+	}
+	if (fname) {
+		ppt->file = strdup(fname);
+		if (ppt->file == NULL) {
+			zfree(&ppt->function);
+			ret = -ENOMEM;
+			goto end;
+		}
+	}
+end:
+	if (ret == 0 && (fname || func))
+		ret = 1;	/* Found a point */
+	return ret;
+}
+
+/* Add a line and store the src path */
+static int line_range_add_line(const char *src, unsigned int lineno,
+			       struct line_range *lr)
+{
+	/* Copy source path */
+	if (!lr->path) {
+		lr->path = strdup(src);
+		if (lr->path == NULL)
+			return -ENOMEM;
+	}
+	return intlist__add(lr->line_list, lineno);
+}
+
+static int line_range_walk_cb(const char *fname, int lineno,
+			      Dwarf_Addr addr, void *data)
+{
+	struct line_finder *lf = data;
+	const char *__fname;
+	int __lineno;
+	int err;
+
+	if ((strtailcmp(fname, lf->fname) != 0) ||
+	    (lf->lno_s > lineno || lf->lno_e < lineno))
+		return 0;
+
+	/* Make sure this line can be reversible */
+	if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0
+	    && (lineno != __lineno || strcmp(fname, __fname)))
+		return 0;
+
+	err = line_range_add_line(fname, lineno, lf->lr);
+	if (err < 0 && err != -EEXIST)
+		return err;
+
+	return 0;
+}
+
+/* Find line range from its line number */
+static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
+{
+	int ret;
+
+	ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
+
+	/* Update status */
+	if (ret >= 0)
+		if (!intlist__empty(lf->lr->line_list))
+			ret = lf->found = 1;
+		else
+			ret = 0;	/* Lines are not found */
+	else {
+		zfree(&lf->lr->path);
+	}
+	return ret;
+}
+
+static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
+{
+	int ret = find_line_range_by_line(in_die, data);
+
+	/*
+	 * We have to check all instances of inlined function, because
+	 * some execution paths can be optimized out depends on the
+	 * function argument of instances. However, if an error occurs,
+	 * it should be handled by the caller.
+	 */
+	return ret < 0 ? ret : 0;
+}
+
+/* Search function definition from function name */
+static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
+{
+	struct dwarf_callback_param *param = data;
+	struct line_finder *lf = param->data;
+	struct line_range *lr = lf->lr;
+
+	/* Check declared file */
+	if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
+		return DWARF_CB_OK;
+
+	if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) {
+		lf->fname = dwarf_decl_file(sp_die);
+		dwarf_decl_line(sp_die, &lr->offset);
+		pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
+		lf->lno_s = lr->offset + lr->start;
+		if (lf->lno_s < 0)	/* Overflow */
+			lf->lno_s = INT_MAX;
+		lf->lno_e = lr->offset + lr->end;
+		if (lf->lno_e < 0)	/* Overflow */
+			lf->lno_e = INT_MAX;
+		pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
+		lr->start = lf->lno_s;
+		lr->end = lf->lno_e;
+		if (!die_is_func_instance(sp_die))
+			param->retval = die_walk_instances(sp_die,
+						line_range_inline_cb, lf);
+		else
+			param->retval = find_line_range_by_line(sp_die, lf);
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
+
+static int find_line_range_by_func(struct line_finder *lf)
+{
+	struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
+	dwarf_getfuncs(&lf->cu_die, line_range_search_cb, &param, 0);
+	return param.retval;
+}
+
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr)
+{
+	struct line_finder lf = {.lr = lr, .found = 0};
+	int ret = 0;
+	Dwarf_Off off = 0, noff;
+	size_t cuhl;
+	Dwarf_Die *diep;
+	const char *comp_dir;
+
+	/* Fastpath: lookup by function name from .debug_pubnames section */
+	if (lr->function) {
+		struct pubname_callback_param pubname_param = {
+			.function = lr->function, .file = lr->file,
+			.cu_die = &lf.cu_die, .sp_die = &lf.sp_die, .found = 0};
+		struct dwarf_callback_param line_range_param = {
+			.data = (void *)&lf, .retval = 0};
+
+		dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+				  &pubname_param, 0);
+		if (pubname_param.found) {
+			line_range_search_cb(&lf.sp_die, &line_range_param);
+			if (lf.found)
+				goto found;
+		}
+	}
+
+	/* Loop on CUs (Compilation Unit) */
+	while (!lf.found && ret >= 0) {
+		if (dwarf_nextcu(dbg->dbg, off, &noff, &cuhl,
+				 NULL, NULL, NULL) != 0)
+			break;
+
+		/* Get the DIE(Debugging Information Entry) of this CU */
+		diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die);
+		if (!diep) {
+			off = noff;
+			continue;
+		}
+
+		/* Check if target file is included. */
+		if (lr->file)
+			lf.fname = cu_find_realpath(&lf.cu_die, lr->file);
+		else
+			lf.fname = 0;
+
+		if (!lr->file || lf.fname) {
+			if (lr->function)
+				ret = find_line_range_by_func(&lf);
+			else {
+				lf.lno_s = lr->start;
+				lf.lno_e = lr->end;
+				ret = find_line_range_by_line(NULL, &lf);
+			}
+		}
+		off = noff;
+	}
+
+found:
+	/* Store comp_dir */
+	if (lf.found) {
+		comp_dir = cu_get_comp_dir(&lf.cu_die);
+		if (comp_dir) {
+			lr->comp_dir = strdup(comp_dir);
+			if (!lr->comp_dir)
+				ret = -ENOMEM;
+		}
+	}
+
+	pr_debug("path: %s\n", lr->path);
+	return (ret < 0) ? ret : lf.found;
+}
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+/* debuginfod doesn't require the comp_dir but buildid is required */
+static int get_source_from_debuginfod(const char *raw_path,
+				const char *sbuild_id, char **new_path)
+{
+	debuginfod_client *c = debuginfod_begin();
+	const char *p = raw_path;
+	int fd;
+
+	if (!c)
+		return -ENOMEM;
+
+	fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id,
+				0, p, new_path);
+	pr_debug("Search %s from debuginfod -> %d\n", p, fd);
+	if (fd >= 0)
+		close(fd);
+	debuginfod_end(c);
+	if (fd < 0) {
+		pr_debug("Failed to find %s in debuginfod (%s)\n",
+			raw_path, sbuild_id);
+		return -ENOENT;
+	}
+	pr_debug("Got a source %s\n", *new_path);
+
+	return 0;
+}
+#else
+static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused,
+				const char *sbuild_id __maybe_unused,
+				char **new_path __maybe_unused)
+{
+	return -ENOTSUP;
+}
+#endif
+/*
+ * Find a src file from a DWARF tag path. Prepend optional source path prefix
+ * and chop off leading directories that do not exist. Result is passed back as
+ * a newly allocated path on success.
+ * Return 0 if file was found and readable, -errno otherwise.
+ */
+int find_source_path(const char *raw_path, const char *sbuild_id,
+		const char *comp_dir, char **new_path)
+{
+	const char *prefix = symbol_conf.source_prefix;
+
+	if (sbuild_id && !prefix) {
+		if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path))
+			return 0;
+	}
+
+	if (!prefix) {
+		if (raw_path[0] != '/' && comp_dir)
+			/* If not an absolute path, try to use comp_dir */
+			prefix = comp_dir;
+		else {
+			if (access(raw_path, R_OK) == 0) {
+				*new_path = strdup(raw_path);
+				return *new_path ? 0 : -ENOMEM;
+			} else
+				return -errno;
+		}
+	}
+
+	*new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
+	if (!*new_path)
+		return -ENOMEM;
+
+	for (;;) {
+		sprintf(*new_path, "%s/%s", prefix, raw_path);
+
+		if (access(*new_path, R_OK) == 0)
+			return 0;
+
+		if (!symbol_conf.source_prefix) {
+			/* In case of searching comp_dir, don't retry */
+			zfree(new_path);
+			return -errno;
+		}
+
+		switch (errno) {
+		case ENAMETOOLONG:
+		case ENOENT:
+		case EROFS:
+		case EFAULT:
+			raw_path = strchr(++raw_path, '/');
+			if (!raw_path) {
+				zfree(new_path);
+				return -ENOENT;
+			}
+			continue;
+
+		default:
+			zfree(new_path);
+			return -errno;
+		}
+	}
+}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
new file mode 100644
index 000000000..8bc1c80d3
--- /dev/null
+++ b/tools/perf/util/probe-finder.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_FINDER_H
+#define _PROBE_FINDER_H
+
+#include <stdbool.h>
+#include "intlist.h"
+#include "build-id.h"
+#include "probe-event.h"
+#include <linux/ctype.h>
+
+#define MAX_PROBE_BUFFER	1024
+#define MAX_PROBES		 128
+#define MAX_PROBE_ARGS		 128
+
+#define PROBE_ARG_VARS		"$vars"
+#define PROBE_ARG_PARAMS	"$params"
+
+static inline int is_c_varname(const char *name)
+{
+	/* TODO */
+	return isalpha(name[0]) || name[0] == '_';
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+#include "dwarf-aux.h"
+
+/* TODO: export debuginfo data structure even if no dwarf support */
+
+/* debug information structure */
+struct debuginfo {
+	Dwarf		*dbg;
+	Dwfl_Module	*mod;
+	Dwfl		*dwfl;
+	Dwarf_Addr	bias;
+	const unsigned char	*build_id;
+};
+
+/* This also tries to open distro debuginfo */
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+				 struct perf_probe_event *pev,
+				 struct probe_trace_event **tevs);
+
+/* Find a perf_probe_point from debuginfo */
+int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
+				struct perf_probe_point *ppt);
+
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+			       bool adjust_offset);
+
+/* Find a line range */
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
+
+/* Find available variables */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+				      struct perf_probe_event *pev,
+				      struct variable_list **vls);
+
+/* Find a src file from a DWARF tag path */
+int find_source_path(const char *raw_path, const char *sbuild_id,
+		     const char *comp_dir, char **new_path);
+
+struct probe_finder {
+	struct perf_probe_event	*pev;		/* Target probe event */
+	struct debuginfo	*dbg;
+
+	/* Callback when a probe point is found */
+	int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
+
+	/* For function searching */
+	int			lno;		/* Line number */
+	Dwarf_Addr		addr;		/* Address */
+	const char		*fname;		/* Real file name */
+	Dwarf_Die		cu_die;		/* Current CU */
+	Dwarf_Die		sp_die;
+	struct intlist		*lcache;	/* Line cache for lazy match */
+
+	/* For variable searching */
+#if _ELFUTILS_PREREQ(0, 142)
+	/* Call Frame Information from .eh_frame */
+	Dwarf_CFI		*cfi_eh;
+	/* Call Frame Information from .debug_frame */
+	Dwarf_CFI		*cfi_dbg;
+#endif
+	Dwarf_Op		*fb_ops;	/* Frame base attribute */
+	unsigned int		machine;	/* Target machine arch */
+	struct perf_probe_arg	*pvar;		/* Current target variable */
+	struct probe_trace_arg	*tvar;		/* Current result variable */
+	bool			skip_empty_arg;	/* Skip non-exist args */
+};
+
+struct trace_event_finder {
+	struct probe_finder	pf;
+	Dwfl_Module		*mod;		/* For solving symbols */
+	struct probe_trace_event *tevs;		/* Found trace events */
+	int			ntevs;		/* Number of trace events */
+	int			max_tevs;	/* Max number of trace events */
+};
+
+struct available_var_finder {
+	struct probe_finder	pf;
+	Dwfl_Module		*mod;		/* For solving symbols */
+	struct variable_list	*vls;		/* Found variable lists */
+	int			nvls;		/* Number of variable lists */
+	int			max_vls;	/* Max no. of variable lists */
+	bool			child;		/* Search child scopes */
+};
+
+struct line_finder {
+	struct line_range	*lr;		/* Target line range */
+
+	const char		*fname;		/* File name */
+	int			lno_s;		/* Start line number */
+	int			lno_e;		/* End line number */
+	Dwarf_Die		cu_die;		/* Current CU */
+	Dwarf_Die		sp_die;
+	int			found;
+};
+
+#endif /* HAVE_DWARF_SUPPORT */
+
+#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
new file mode 100644
index 000000000..a1d1e4ef6
--- /dev/null
+++ b/tools/perf/util/pstack.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple pointer stack
+ *
+ * (c) 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "pstack.h"
+#include "debug.h"
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct pstack {
+	unsigned short	top;
+	unsigned short	max_nr_entries;
+	void		*entries[];
+};
+
+struct pstack *pstack__new(unsigned short max_nr_entries)
+{
+	struct pstack *pstack = zalloc((sizeof(*pstack) +
+				       max_nr_entries * sizeof(void *)));
+	if (pstack != NULL)
+		pstack->max_nr_entries = max_nr_entries;
+	return pstack;
+}
+
+void pstack__delete(struct pstack *pstack)
+{
+	free(pstack);
+}
+
+bool pstack__empty(const struct pstack *pstack)
+{
+	return pstack->top == 0;
+}
+
+void pstack__remove(struct pstack *pstack, void *key)
+{
+	unsigned short i = pstack->top, last_index = pstack->top - 1;
+
+	while (i-- != 0) {
+		if (pstack->entries[i] == key) {
+			if (i < last_index)
+				memmove(pstack->entries + i,
+					pstack->entries + i + 1,
+					(last_index - i) * sizeof(void *));
+			--pstack->top;
+			return;
+		}
+	}
+	pr_err("%s: %p not on the pstack!\n", __func__, key);
+}
+
+void pstack__push(struct pstack *pstack, void *key)
+{
+	if (pstack->top == pstack->max_nr_entries) {
+		pr_err("%s: top=%d, overflow!\n", __func__, pstack->top);
+		return;
+	}
+	pstack->entries[pstack->top++] = key;
+}
+
+void *pstack__pop(struct pstack *pstack)
+{
+	void *ret;
+
+	if (pstack->top == 0) {
+		pr_err("%s: underflow!\n", __func__);
+		return NULL;
+	}
+
+	ret = pstack->entries[--pstack->top];
+	pstack->entries[pstack->top] = NULL;
+	return ret;
+}
+
+void *pstack__peek(struct pstack *pstack)
+{
+	if (pstack->top == 0)
+		return NULL;
+	return pstack->entries[pstack->top - 1];
+}
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
new file mode 100644
index 000000000..8729b8be0
--- /dev/null
+++ b/tools/perf/util/pstack.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PSTACK_
+#define _PERF_PSTACK_
+
+#include <stdbool.h>
+
+struct pstack;
+struct pstack *pstack__new(unsigned short max_nr_entries);
+void pstack__delete(struct pstack *pstack);
+bool pstack__empty(const struct pstack *pstack);
+void pstack__remove(struct pstack *pstack, void *key);
+void pstack__push(struct pstack *pstack, void *key);
+void *pstack__pop(struct pstack *pstack);
+void *pstack__peek(struct pstack *pstack);
+
+#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644
index 000000000..aa5156c2b
--- /dev/null
+++ b/tools/perf/util/python-ext-sources
@@ -0,0 +1,43 @@
+#
+# List of files needed by perf python extension
+#
+# Each source file must be placed on its own line so that it can be
+# processed by Makefile and util/setup.py accordingly.
+#
+
+util/python.c
+../lib/ctype.c
+util/cap.c
+util/evlist.c
+util/evsel.c
+util/evsel_fprintf.c
+util/perf_event_attr_fprintf.c
+util/cpumap.c
+util/memswap.c
+util/mmap.c
+util/namespaces.c
+../lib/bitmap.c
+../lib/find_bit.c
+../lib/list_sort.c
+../lib/hweight.c
+../lib/string.c
+../lib/vsprintf.c
+util/thread_map.c
+util/util.c
+util/cgroup.c
+util/parse-branch-options.c
+util/rblist.c
+util/counts.c
+util/print_binary.c
+util/strlist.c
+util/trace-event.c
+../lib/rbtree.c
+util/string.c
+util/symbol_fprintf.c
+util/units.c
+util/affinity.c
+util/rwsem.c
+util/hashmap.c
+util/perf_regs.c
+util/pmu-hybrid.c
+util/fncache.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
new file mode 100644
index 000000000..5be5fa239
--- /dev/null
+++ b/tools/perf/util/python.c
@@ -0,0 +1,1462 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <Python.h>
+#include <structmember.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <linux/err.h>
+#include <perf/cpumap.h>
+#include <traceevent/event-parse.h>
+#include <perf/mmap.h>
+#include "evlist.h"
+#include "callchain.h"
+#include "evsel.h"
+#include "event.h"
+#include "print_binary.h"
+#include "thread_map.h"
+#include "trace-event.h"
+#include "mmap.h"
+#include "stat.h"
+#include "metricgroup.h"
+#include "util/env.h"
+#include <internal/lib.h>
+#include "util.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+  PyString_FromString(arg)
+#define _PyUnicode_AsString(arg) \
+  PyString_AsString(arg)
+#define _PyUnicode_FromFormat(...) \
+  PyString_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+  PyInt_FromLong(arg)
+
+#else
+
+#define _PyUnicode_FromString(arg) \
+  PyUnicode_FromString(arg)
+#define _PyUnicode_FromFormat(...) \
+  PyUnicode_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+  PyLong_FromLong(arg)
+#endif
+
+#ifndef Py_TYPE
+#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+#endif
+
+/*
+ * Provide these two so that we don't have to link against callchain.c and
+ * start dragging hist.c, etc.
+ */
+struct callchain_param callchain_param;
+
+int parse_callchain_record(const char *arg __maybe_unused,
+			   struct callchain_param *param __maybe_unused)
+{
+	return 0;
+}
+
+/*
+ * Add these not to drag util/env.c
+ */
+struct perf_env perf_env;
+
+const char *perf_env__cpuid(struct perf_env *env __maybe_unused)
+{
+	return NULL;
+}
+
+// This one is a bit easier, wouldn't drag too much, but leave it as a stub we need it here
+const char *perf_env__arch(struct perf_env *env __maybe_unused)
+{
+	return NULL;
+}
+
+/*
+ * Add this one here not to drag util/stat-shadow.c
+ */
+void perf_stat__collect_metric_expr(struct evlist *evsel_list)
+{
+}
+
+/*
+ * This one is needed not to drag the PMU bandwagon, jevents generated
+ * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for
+ * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so
+ * far, for the perf python binding known usecases, revisit if this become
+ * necessary.
+ */
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused)
+{
+	return NULL;
+}
+
+/*
+ * Add this one here not to drag util/metricgroup.c
+ */
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+				    struct rblist *new_metric_events,
+				    struct rblist *old_metric_events)
+{
+	return 0;
+}
+
+/*
+ * XXX: All these evsel destructors need some better mechanism, like a linked
+ * list of destructors registered when the relevant code indeed is used instead
+ * of having more and more calls in perf_evsel__delete(). -- acme
+ *
+ * For now, add some more:
+ *
+ * Not to drag the BPF bandwagon...
+ */
+void bpf_counter__destroy(struct evsel *evsel);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
+int bpf_counter__disable(struct evsel *evsel);
+
+void bpf_counter__destroy(struct evsel *evsel __maybe_unused)
+{
+}
+
+int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused)
+{
+	return 0;
+}
+
+int bpf_counter__disable(struct evsel *evsel __maybe_unused)
+{
+	return 0;
+}
+
+/*
+ * Support debug printing even though util/debug.c is not linked.  That means
+ * implementing 'verbose' and 'eprintf'.
+ */
+int verbose;
+int debug_peo_args;
+
+int eprintf(int level, int var, const char *fmt, ...);
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (var >= level) {
+		va_start(args, fmt);
+		ret = vfprintf(stderr, fmt, args);
+		va_end(args);
+	}
+
+	return ret;
+}
+
+/* Define PyVarObject_HEAD_INIT for python 2.5 */
+#ifndef PyVarObject_HEAD_INIT
+# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void);
+#else
+PyMODINIT_FUNC PyInit_perf(void);
+#endif
+
+#define member_def(type, member, ptype, help) \
+	{ #member, ptype, \
+	  offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
+	  0, help }
+
+#define sample_member_def(name, member, ptype, help) \
+	{ #name, ptype, \
+	  offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
+	  0, help }
+
+struct pyrf_event {
+	PyObject_HEAD
+	struct evsel *evsel;
+	struct perf_sample sample;
+	union perf_event   event;
+};
+
+#define sample_members \
+	sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"),			 \
+	sample_member_def(sample_pid, pid, T_INT, "event pid"),			 \
+	sample_member_def(sample_tid, tid, T_INT, "event tid"),			 \
+	sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"),		 \
+	sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"),		 \
+	sample_member_def(sample_id, id, T_ULONGLONG, "event id"),			 \
+	sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
+	sample_member_def(sample_period, period, T_ULONGLONG, "event period"),		 \
+	sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
+
+static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+
+static PyMemberDef pyrf_mmap_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(perf_event_header, misc, T_UINT, "event misc"),
+	member_def(perf_record_mmap, pid, T_UINT, "event pid"),
+	member_def(perf_record_mmap, tid, T_UINT, "event tid"),
+	member_def(perf_record_mmap, start, T_ULONGLONG, "start of the map"),
+	member_def(perf_record_mmap, len, T_ULONGLONG, "map length"),
+	member_def(perf_record_mmap, pgoff, T_ULONGLONG, "page offset"),
+	member_def(perf_record_mmap, filename, T_STRING_INPLACE, "backing store"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRI_lx64 ", "
+			 "length: %#" PRI_lx64 ", offset: %#" PRI_lx64 ", "
+			 "filename: %s }",
+		     pevent->event.mmap.pid, pevent->event.mmap.tid,
+		     pevent->event.mmap.start, pevent->event.mmap.len,
+		     pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static PyTypeObject pyrf_mmap_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.mmap_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_mmap_event__doc,
+	.tp_members	= pyrf_mmap_event__members,
+	.tp_repr	= (reprfunc)pyrf_mmap_event__repr,
+};
+
+static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+
+static PyMemberDef pyrf_task_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(perf_record_fork, pid, T_UINT, "event pid"),
+	member_def(perf_record_fork, ppid, T_UINT, "event ppid"),
+	member_def(perf_record_fork, tid, T_UINT, "event tid"),
+	member_def(perf_record_fork, ptid, T_UINT, "event ptid"),
+	member_def(perf_record_fork, time, T_ULONGLONG, "timestamp"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+{
+	return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+				   "ptid: %u, time: %" PRI_lu64 "}",
+				   pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
+				   pevent->event.fork.pid,
+				   pevent->event.fork.ppid,
+				   pevent->event.fork.tid,
+				   pevent->event.fork.ptid,
+				   pevent->event.fork.time);
+}
+
+static PyTypeObject pyrf_task_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.task_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_task_event__doc,
+	.tp_members	= pyrf_task_event__members,
+	.tp_repr	= (reprfunc)pyrf_task_event__repr,
+};
+
+static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+
+static PyMemberDef pyrf_comm_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(perf_record_comm, pid, T_UINT, "event pid"),
+	member_def(perf_record_comm, tid, T_UINT, "event tid"),
+	member_def(perf_record_comm, comm, T_STRING_INPLACE, "process name"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+{
+	return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+				   pevent->event.comm.pid,
+				   pevent->event.comm.tid,
+				   pevent->event.comm.comm);
+}
+
+static PyTypeObject pyrf_comm_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.comm_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_comm_event__doc,
+	.tp_members	= pyrf_comm_event__members,
+	.tp_repr	= (reprfunc)pyrf_comm_event__repr,
+};
+
+static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+
+static PyMemberDef pyrf_throttle_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(perf_record_throttle, time, T_ULONGLONG, "timestamp"),
+	member_def(perf_record_throttle, id, T_ULONGLONG, "event id"),
+	member_def(perf_record_throttle, stream_id, T_ULONGLONG, "event stream id"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+{
+	struct perf_record_throttle *te = (struct perf_record_throttle *)(&pevent->event.header + 1);
+
+	return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64
+				   ", stream_id: %" PRI_lu64 " }",
+				   pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
+				   te->time, te->id, te->stream_id);
+}
+
+static PyTypeObject pyrf_throttle_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.throttle_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_throttle_event__doc,
+	.tp_members	= pyrf_throttle_event__members,
+	.tp_repr	= (reprfunc)pyrf_throttle_event__repr,
+};
+
+static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+
+static PyMemberDef pyrf_lost_event__members[] = {
+	sample_members
+	member_def(perf_record_lost, id, T_ULONGLONG, "event id"),
+	member_def(perf_record_lost, lost, T_ULONGLONG, "number of lost events"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: lost, id: %#" PRI_lx64 ", "
+			 "lost: %#" PRI_lx64 " }",
+		     pevent->event.lost.id, pevent->event.lost.lost) < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static PyTypeObject pyrf_lost_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.lost_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_lost_event__doc,
+	.tp_members	= pyrf_lost_event__members,
+	.tp_repr	= (reprfunc)pyrf_lost_event__repr,
+};
+
+static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+
+static PyMemberDef pyrf_read_event__members[] = {
+	sample_members
+	member_def(perf_record_read, pid, T_UINT, "event pid"),
+	member_def(perf_record_read, tid, T_UINT, "event tid"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+{
+	return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
+				   pevent->event.read.pid,
+				   pevent->event.read.tid);
+	/*
+ 	 * FIXME: return the array of read values,
+ 	 * making this method useful ;-)
+ 	 */
+}
+
+static PyTypeObject pyrf_read_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.read_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_read_event__doc,
+	.tp_members	= pyrf_read_event__members,
+	.tp_repr	= (reprfunc)pyrf_read_event__repr,
+};
+
+static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+
+static PyMemberDef pyrf_sample_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: sample }") < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static bool is_tracepoint(struct pyrf_event *pevent)
+{
+	return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT;
+}
+
+static PyObject*
+tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
+{
+	struct tep_handle *pevent = field->event->tep;
+	void *data = pe->sample.raw_data;
+	PyObject *ret = NULL;
+	unsigned long long val;
+	unsigned int offset, len;
+
+	if (field->flags & TEP_FIELD_IS_ARRAY) {
+		offset = field->offset;
+		len    = field->size;
+		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+			val     = tep_read_number(pevent, data + offset, len);
+			offset  = val;
+			len     = offset >> 16;
+			offset &= 0xffff;
+			if (field->flags & TEP_FIELD_IS_RELATIVE)
+				offset += field->offset + field->size;
+		}
+		if (field->flags & TEP_FIELD_IS_STRING &&
+		    is_printable_array(data + offset, len)) {
+			ret = _PyUnicode_FromString((char *)data + offset);
+		} else {
+			ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+			field->flags &= ~TEP_FIELD_IS_STRING;
+		}
+	} else {
+		val = tep_read_number(pevent, data + field->offset,
+				      field->size);
+		if (field->flags & TEP_FIELD_IS_POINTER)
+			ret = PyLong_FromUnsignedLong((unsigned long) val);
+		else if (field->flags & TEP_FIELD_IS_SIGNED)
+			ret = PyLong_FromLong((long) val);
+		else
+			ret = PyLong_FromUnsignedLong((unsigned long) val);
+	}
+
+	return ret;
+}
+
+static PyObject*
+get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
+{
+	const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
+	struct evsel *evsel = pevent->evsel;
+	struct tep_format_field *field;
+
+	if (!evsel->tp_format) {
+		struct tep_event *tp_format;
+
+		tp_format = trace_event__tp_format_id(evsel->core.attr.config);
+		if (IS_ERR_OR_NULL(tp_format))
+			return NULL;
+
+		evsel->tp_format = tp_format;
+	}
+
+	field = tep_find_any_field(evsel->tp_format, str);
+	if (!field)
+		return NULL;
+
+	return tracepoint_field(pevent, field);
+}
+
+static PyObject*
+pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name)
+{
+	PyObject *obj = NULL;
+
+	if (is_tracepoint(pevent))
+		obj = get_tracepoint_field(pevent, attr_name);
+
+	return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name);
+}
+
+static PyTypeObject pyrf_sample_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.sample_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_sample_event__doc,
+	.tp_members	= pyrf_sample_event__members,
+	.tp_repr	= (reprfunc)pyrf_sample_event__repr,
+	.tp_getattro	= (getattrofunc) pyrf_sample_event__getattro,
+};
+
+static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
+
+static PyMemberDef pyrf_context_switch_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(perf_record_switch, next_prev_pid, T_UINT, "next/prev pid"),
+	member_def(perf_record_switch, next_prev_tid, T_UINT, "next/prev tid"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: context_switch, next_prev_pid: %u, next_prev_tid: %u, switch_out: %u }",
+		     pevent->event.context_switch.next_prev_pid,
+		     pevent->event.context_switch.next_prev_tid,
+		     !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static PyTypeObject pyrf_context_switch_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.context_switch_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_context_switch_event__doc,
+	.tp_members	= pyrf_context_switch_event__members,
+	.tp_repr	= (reprfunc)pyrf_context_switch_event__repr,
+};
+
+static int pyrf_event__setup_types(void)
+{
+	int err;
+	pyrf_mmap_event__type.tp_new =
+	pyrf_task_event__type.tp_new =
+	pyrf_comm_event__type.tp_new =
+	pyrf_lost_event__type.tp_new =
+	pyrf_read_event__type.tp_new =
+	pyrf_sample_event__type.tp_new =
+	pyrf_context_switch_event__type.tp_new =
+	pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+	err = PyType_Ready(&pyrf_mmap_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_lost_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_task_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_comm_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_throttle_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_read_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_sample_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_context_switch_event__type);
+	if (err < 0)
+		goto out;
+out:
+	return err;
+}
+
+static PyTypeObject *pyrf_event__type[] = {
+	[PERF_RECORD_MMAP]	 = &pyrf_mmap_event__type,
+	[PERF_RECORD_LOST]	 = &pyrf_lost_event__type,
+	[PERF_RECORD_COMM]	 = &pyrf_comm_event__type,
+	[PERF_RECORD_EXIT]	 = &pyrf_task_event__type,
+	[PERF_RECORD_THROTTLE]	 = &pyrf_throttle_event__type,
+	[PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
+	[PERF_RECORD_FORK]	 = &pyrf_task_event__type,
+	[PERF_RECORD_READ]	 = &pyrf_read_event__type,
+	[PERF_RECORD_SAMPLE]	 = &pyrf_sample_event__type,
+	[PERF_RECORD_SWITCH]	 = &pyrf_context_switch_event__type,
+	[PERF_RECORD_SWITCH_CPU_WIDE]  = &pyrf_context_switch_event__type,
+};
+
+static PyObject *pyrf_event__new(union perf_event *event)
+{
+	struct pyrf_event *pevent;
+	PyTypeObject *ptype;
+
+	if ((event->header.type < PERF_RECORD_MMAP ||
+	     event->header.type > PERF_RECORD_SAMPLE) &&
+	    !(event->header.type == PERF_RECORD_SWITCH ||
+	      event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
+		return NULL;
+
+	ptype = pyrf_event__type[event->header.type];
+	pevent = PyObject_New(struct pyrf_event, ptype);
+	if (pevent != NULL)
+		memcpy(&pevent->event, event, event->header.size);
+	return (PyObject *)pevent;
+}
+
+struct pyrf_cpu_map {
+	PyObject_HEAD
+
+	struct perf_cpu_map *cpus;
+};
+
+static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
+			      PyObject *args, PyObject *kwargs)
+{
+	static char *kwlist[] = { "cpustr", NULL };
+	char *cpustr = NULL;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
+					 kwlist, &cpustr))
+		return -1;
+
+	pcpus->cpus = perf_cpu_map__new(cpustr);
+	if (pcpus->cpus == NULL)
+		return -1;
+	return 0;
+}
+
+static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
+{
+	perf_cpu_map__put(pcpus->cpus);
+	Py_TYPE(pcpus)->tp_free((PyObject*)pcpus);
+}
+
+static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
+{
+	struct pyrf_cpu_map *pcpus = (void *)obj;
+
+	return perf_cpu_map__nr(pcpus->cpus);
+}
+
+static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
+{
+	struct pyrf_cpu_map *pcpus = (void *)obj;
+
+	if (i >= perf_cpu_map__nr(pcpus->cpus))
+		return NULL;
+
+	return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu);
+}
+
+static PySequenceMethods pyrf_cpu_map__sequence_methods = {
+	.sq_length = pyrf_cpu_map__length,
+	.sq_item   = pyrf_cpu_map__item,
+};
+
+static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+
+static PyTypeObject pyrf_cpu_map__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.cpu_map",
+	.tp_basicsize	= sizeof(struct pyrf_cpu_map),
+	.tp_dealloc	= (destructor)pyrf_cpu_map__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_cpu_map__doc,
+	.tp_as_sequence	= &pyrf_cpu_map__sequence_methods,
+	.tp_init	= (initproc)pyrf_cpu_map__init,
+};
+
+static int pyrf_cpu_map__setup_types(void)
+{
+	pyrf_cpu_map__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_cpu_map__type);
+}
+
+struct pyrf_thread_map {
+	PyObject_HEAD
+
+	struct perf_thread_map *threads;
+};
+
+static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
+				 PyObject *args, PyObject *kwargs)
+{
+	static char *kwlist[] = { "pid", "tid", "uid", NULL };
+	int pid = -1, tid = -1, uid = UINT_MAX;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+					 kwlist, &pid, &tid, &uid))
+		return -1;
+
+	pthreads->threads = thread_map__new(pid, tid, uid);
+	if (pthreads->threads == NULL)
+		return -1;
+	return 0;
+}
+
+static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
+{
+	perf_thread_map__put(pthreads->threads);
+	Py_TYPE(pthreads)->tp_free((PyObject*)pthreads);
+}
+
+static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
+{
+	struct pyrf_thread_map *pthreads = (void *)obj;
+
+	return pthreads->threads->nr;
+}
+
+static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
+{
+	struct pyrf_thread_map *pthreads = (void *)obj;
+
+	if (i >= pthreads->threads->nr)
+		return NULL;
+
+	return Py_BuildValue("i", pthreads->threads->map[i]);
+}
+
+static PySequenceMethods pyrf_thread_map__sequence_methods = {
+	.sq_length = pyrf_thread_map__length,
+	.sq_item   = pyrf_thread_map__item,
+};
+
+static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+
+static PyTypeObject pyrf_thread_map__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.thread_map",
+	.tp_basicsize	= sizeof(struct pyrf_thread_map),
+	.tp_dealloc	= (destructor)pyrf_thread_map__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_thread_map__doc,
+	.tp_as_sequence	= &pyrf_thread_map__sequence_methods,
+	.tp_init	= (initproc)pyrf_thread_map__init,
+};
+
+static int pyrf_thread_map__setup_types(void)
+{
+	pyrf_thread_map__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_thread_map__type);
+}
+
+struct pyrf_evsel {
+	PyObject_HEAD
+
+	struct evsel evsel;
+};
+
+static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
+			    PyObject *args, PyObject *kwargs)
+{
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+		.sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
+	};
+	static char *kwlist[] = {
+		"type",
+		"config",
+		"sample_freq",
+		"sample_period",
+		"sample_type",
+		"read_format",
+		"disabled",
+		"inherit",
+		"pinned",
+		"exclusive",
+		"exclude_user",
+		"exclude_kernel",
+		"exclude_hv",
+		"exclude_idle",
+		"mmap",
+		"context_switch",
+		"comm",
+		"freq",
+		"inherit_stat",
+		"enable_on_exec",
+		"task",
+		"watermark",
+		"precise_ip",
+		"mmap_data",
+		"sample_id_all",
+		"wakeup_events",
+		"bp_type",
+		"bp_addr",
+		"bp_len",
+		 NULL
+	};
+	u64 sample_period = 0;
+	u32 disabled = 0,
+	    inherit = 0,
+	    pinned = 0,
+	    exclusive = 0,
+	    exclude_user = 0,
+	    exclude_kernel = 0,
+	    exclude_hv = 0,
+	    exclude_idle = 0,
+	    mmap = 0,
+	    context_switch = 0,
+	    comm = 0,
+	    freq = 1,
+	    inherit_stat = 0,
+	    enable_on_exec = 0,
+	    task = 0,
+	    watermark = 0,
+	    precise_ip = 0,
+	    mmap_data = 0,
+	    sample_id_all = 1;
+	int idx = 0;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+					 "|iKiKKiiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
+					 &attr.type, &attr.config, &attr.sample_freq,
+					 &sample_period, &attr.sample_type,
+					 &attr.read_format, &disabled, &inherit,
+					 &pinned, &exclusive, &exclude_user,
+					 &exclude_kernel, &exclude_hv, &exclude_idle,
+					 &mmap, &context_switch, &comm, &freq, &inherit_stat,
+					 &enable_on_exec, &task, &watermark,
+					 &precise_ip, &mmap_data, &sample_id_all,
+					 &attr.wakeup_events, &attr.bp_type,
+					 &attr.bp_addr, &attr.bp_len, &idx))
+		return -1;
+
+	/* union... */
+	if (sample_period != 0) {
+		if (attr.sample_freq != 0)
+			return -1; /* FIXME: throw right exception */
+		attr.sample_period = sample_period;
+	}
+
+	/* Bitfields */
+	attr.disabled	    = disabled;
+	attr.inherit	    = inherit;
+	attr.pinned	    = pinned;
+	attr.exclusive	    = exclusive;
+	attr.exclude_user   = exclude_user;
+	attr.exclude_kernel = exclude_kernel;
+	attr.exclude_hv	    = exclude_hv;
+	attr.exclude_idle   = exclude_idle;
+	attr.mmap	    = mmap;
+	attr.context_switch = context_switch;
+	attr.comm	    = comm;
+	attr.freq	    = freq;
+	attr.inherit_stat   = inherit_stat;
+	attr.enable_on_exec = enable_on_exec;
+	attr.task	    = task;
+	attr.watermark	    = watermark;
+	attr.precise_ip	    = precise_ip;
+	attr.mmap_data	    = mmap_data;
+	attr.sample_id_all  = sample_id_all;
+	attr.size	    = sizeof(attr);
+
+	evsel__init(&pevsel->evsel, &attr, idx);
+	return 0;
+}
+
+static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
+{
+	evsel__exit(&pevsel->evsel);
+	Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
+}
+
+static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
+				  PyObject *args, PyObject *kwargs)
+{
+	struct evsel *evsel = &pevsel->evsel;
+	struct perf_cpu_map *cpus = NULL;
+	struct perf_thread_map *threads = NULL;
+	PyObject *pcpus = NULL, *pthreads = NULL;
+	int group = 0, inherit = 0;
+	static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
+					 &pcpus, &pthreads, &group, &inherit))
+		return NULL;
+
+	if (pthreads != NULL)
+		threads = ((struct pyrf_thread_map *)pthreads)->threads;
+
+	if (pcpus != NULL)
+		cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+
+	evsel->core.attr.inherit = inherit;
+	/*
+	 * This will group just the fds for this single evsel, to group
+	 * multiple events, use evlist.open().
+	 */
+	if (evsel__open(evsel, cpus, threads) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyMethodDef pyrf_evsel__methods[] = {
+	{
+		.ml_name  = "open",
+		.ml_meth  = (PyCFunction)pyrf_evsel__open,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("open the event selector file descriptor table.")
+	},
+	{ .ml_name = NULL, }
+};
+
+static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evsel__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.evsel",
+	.tp_basicsize	= sizeof(struct pyrf_evsel),
+	.tp_dealloc	= (destructor)pyrf_evsel__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_evsel__doc,
+	.tp_methods	= pyrf_evsel__methods,
+	.tp_init	= (initproc)pyrf_evsel__init,
+};
+
+static int pyrf_evsel__setup_types(void)
+{
+	pyrf_evsel__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_evsel__type);
+}
+
+struct pyrf_evlist {
+	PyObject_HEAD
+
+	struct evlist evlist;
+};
+
+static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
+			     PyObject *args, PyObject *kwargs __maybe_unused)
+{
+	PyObject *pcpus = NULL, *pthreads = NULL;
+	struct perf_cpu_map *cpus;
+	struct perf_thread_map *threads;
+
+	if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
+		return -1;
+
+	threads = ((struct pyrf_thread_map *)pthreads)->threads;
+	cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+	evlist__init(&pevlist->evlist, cpus, threads);
+	return 0;
+}
+
+static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
+{
+	evlist__exit(&pevlist->evlist);
+	Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
+}
+
+static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
+				   PyObject *args, PyObject *kwargs)
+{
+	struct evlist *evlist = &pevlist->evlist;
+	static char *kwlist[] = { "pages", "overwrite", NULL };
+	int pages = 128, overwrite = false;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
+					 &pages, &overwrite))
+		return NULL;
+
+	if (evlist__mmap(evlist, pages) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
+				   PyObject *args, PyObject *kwargs)
+{
+	struct evlist *evlist = &pevlist->evlist;
+	static char *kwlist[] = { "timeout", NULL };
+	int timeout = -1, n;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
+		return NULL;
+
+	n = evlist__poll(evlist, timeout);
+	if (n < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	return Py_BuildValue("i", n);
+}
+
+static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
+					 PyObject *args __maybe_unused,
+					 PyObject *kwargs __maybe_unused)
+{
+	struct evlist *evlist = &pevlist->evlist;
+        PyObject *list = PyList_New(0);
+	int i;
+
+	for (i = 0; i < evlist->core.pollfd.nr; ++i) {
+		PyObject *file;
+#if PY_MAJOR_VERSION < 3
+		FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r");
+
+		if (fp == NULL)
+			goto free_list;
+
+		file = PyFile_FromFile(fp, "perf", "r", NULL);
+#else
+		file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1,
+				     NULL, NULL, NULL, 0);
+#endif
+		if (file == NULL)
+			goto free_list;
+
+		if (PyList_Append(list, file) != 0) {
+			Py_DECREF(file);
+			goto free_list;
+		}
+
+		Py_DECREF(file);
+	}
+
+	return list;
+free_list:
+	return PyErr_NoMemory();
+}
+
+
+static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
+				  PyObject *args,
+				  PyObject *kwargs __maybe_unused)
+{
+	struct evlist *evlist = &pevlist->evlist;
+	PyObject *pevsel;
+	struct evsel *evsel;
+
+	if (!PyArg_ParseTuple(args, "O", &pevsel))
+		return NULL;
+
+	Py_INCREF(pevsel);
+	evsel = &((struct pyrf_evsel *)pevsel)->evsel;
+	evsel->core.idx = evlist->core.nr_entries;
+	evlist__add(evlist, evsel);
+
+	return Py_BuildValue("i", evlist->core.nr_entries);
+}
+
+static struct mmap *get_md(struct evlist *evlist, int cpu)
+{
+	int i;
+
+	for (i = 0; i < evlist->core.nr_mmaps; i++) {
+		struct mmap *md = &evlist->mmap[i];
+
+		if (md->core.cpu.cpu == cpu)
+			return md;
+	}
+
+	return NULL;
+}
+
+static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
+					  PyObject *args, PyObject *kwargs)
+{
+	struct evlist *evlist = &pevlist->evlist;
+	union perf_event *event;
+	int sample_id_all = 1, cpu;
+	static char *kwlist[] = { "cpu", "sample_id_all", NULL };
+	struct mmap *md;
+	int err;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
+					 &cpu, &sample_id_all))
+		return NULL;
+
+	md = get_md(evlist, cpu);
+	if (!md)
+		return NULL;
+
+	if (perf_mmap__read_init(&md->core) < 0)
+		goto end;
+
+	event = perf_mmap__read_event(&md->core);
+	if (event != NULL) {
+		PyObject *pyevent = pyrf_event__new(event);
+		struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
+		struct evsel *evsel;
+
+		if (pyevent == NULL)
+			return PyErr_NoMemory();
+
+		evsel = evlist__event2evsel(evlist, event);
+		if (!evsel) {
+			Py_INCREF(Py_None);
+			return Py_None;
+		}
+
+		pevent->evsel = evsel;
+
+		err = evsel__parse_sample(evsel, event, &pevent->sample);
+
+		/* Consume the even only after we parsed it out. */
+		perf_mmap__consume(&md->core);
+
+		if (err)
+			return PyErr_Format(PyExc_OSError,
+					    "perf: can't parse sample, err=%d", err);
+		return pyevent;
+	}
+end:
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
+				   PyObject *args, PyObject *kwargs)
+{
+	struct evlist *evlist = &pevlist->evlist;
+	int group = 0;
+	static char *kwlist[] = { "group", NULL };
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group))
+		return NULL;
+
+	if (group)
+		evlist__set_leader(evlist);
+
+	if (evlist__open(evlist) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyMethodDef pyrf_evlist__methods[] = {
+	{
+		.ml_name  = "mmap",
+		.ml_meth  = (PyCFunction)pyrf_evlist__mmap,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("mmap the file descriptor table.")
+	},
+	{
+		.ml_name  = "open",
+		.ml_meth  = (PyCFunction)pyrf_evlist__open,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("open the file descriptors.")
+	},
+	{
+		.ml_name  = "poll",
+		.ml_meth  = (PyCFunction)pyrf_evlist__poll,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("poll the file descriptor table.")
+	},
+	{
+		.ml_name  = "get_pollfd",
+		.ml_meth  = (PyCFunction)pyrf_evlist__get_pollfd,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("get the poll file descriptor table.")
+	},
+	{
+		.ml_name  = "add",
+		.ml_meth  = (PyCFunction)pyrf_evlist__add,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("adds an event selector to the list.")
+	},
+	{
+		.ml_name  = "read_on_cpu",
+		.ml_meth  = (PyCFunction)pyrf_evlist__read_on_cpu,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("reads an event.")
+	},
+	{ .ml_name = NULL, }
+};
+
+static Py_ssize_t pyrf_evlist__length(PyObject *obj)
+{
+	struct pyrf_evlist *pevlist = (void *)obj;
+
+	return pevlist->evlist.core.nr_entries;
+}
+
+static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
+{
+	struct pyrf_evlist *pevlist = (void *)obj;
+	struct evsel *pos;
+
+	if (i >= pevlist->evlist.core.nr_entries)
+		return NULL;
+
+	evlist__for_each_entry(&pevlist->evlist, pos) {
+		if (i-- == 0)
+			break;
+	}
+
+	return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
+}
+
+static PySequenceMethods pyrf_evlist__sequence_methods = {
+	.sq_length = pyrf_evlist__length,
+	.sq_item   = pyrf_evlist__item,
+};
+
+static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evlist__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.evlist",
+	.tp_basicsize	= sizeof(struct pyrf_evlist),
+	.tp_dealloc	= (destructor)pyrf_evlist__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_as_sequence	= &pyrf_evlist__sequence_methods,
+	.tp_doc		= pyrf_evlist__doc,
+	.tp_methods	= pyrf_evlist__methods,
+	.tp_init	= (initproc)pyrf_evlist__init,
+};
+
+static int pyrf_evlist__setup_types(void)
+{
+	pyrf_evlist__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_evlist__type);
+}
+
+#define PERF_CONST(name) { #name, PERF_##name }
+
+static struct {
+	const char *name;
+	int	    value;
+} perf__constants[] = {
+	PERF_CONST(TYPE_HARDWARE),
+	PERF_CONST(TYPE_SOFTWARE),
+	PERF_CONST(TYPE_TRACEPOINT),
+	PERF_CONST(TYPE_HW_CACHE),
+	PERF_CONST(TYPE_RAW),
+	PERF_CONST(TYPE_BREAKPOINT),
+
+	PERF_CONST(COUNT_HW_CPU_CYCLES),
+	PERF_CONST(COUNT_HW_INSTRUCTIONS),
+	PERF_CONST(COUNT_HW_CACHE_REFERENCES),
+	PERF_CONST(COUNT_HW_CACHE_MISSES),
+	PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS),
+	PERF_CONST(COUNT_HW_BRANCH_MISSES),
+	PERF_CONST(COUNT_HW_BUS_CYCLES),
+	PERF_CONST(COUNT_HW_CACHE_L1D),
+	PERF_CONST(COUNT_HW_CACHE_L1I),
+	PERF_CONST(COUNT_HW_CACHE_LL),
+	PERF_CONST(COUNT_HW_CACHE_DTLB),
+	PERF_CONST(COUNT_HW_CACHE_ITLB),
+	PERF_CONST(COUNT_HW_CACHE_BPU),
+	PERF_CONST(COUNT_HW_CACHE_OP_READ),
+	PERF_CONST(COUNT_HW_CACHE_OP_WRITE),
+	PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH),
+	PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS),
+	PERF_CONST(COUNT_HW_CACHE_RESULT_MISS),
+
+	PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND),
+	PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND),
+
+	PERF_CONST(COUNT_SW_CPU_CLOCK),
+	PERF_CONST(COUNT_SW_TASK_CLOCK),
+	PERF_CONST(COUNT_SW_PAGE_FAULTS),
+	PERF_CONST(COUNT_SW_CONTEXT_SWITCHES),
+	PERF_CONST(COUNT_SW_CPU_MIGRATIONS),
+	PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN),
+	PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ),
+	PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS),
+	PERF_CONST(COUNT_SW_EMULATION_FAULTS),
+	PERF_CONST(COUNT_SW_DUMMY),
+
+	PERF_CONST(SAMPLE_IP),
+	PERF_CONST(SAMPLE_TID),
+	PERF_CONST(SAMPLE_TIME),
+	PERF_CONST(SAMPLE_ADDR),
+	PERF_CONST(SAMPLE_READ),
+	PERF_CONST(SAMPLE_CALLCHAIN),
+	PERF_CONST(SAMPLE_ID),
+	PERF_CONST(SAMPLE_CPU),
+	PERF_CONST(SAMPLE_PERIOD),
+	PERF_CONST(SAMPLE_STREAM_ID),
+	PERF_CONST(SAMPLE_RAW),
+
+	PERF_CONST(FORMAT_TOTAL_TIME_ENABLED),
+	PERF_CONST(FORMAT_TOTAL_TIME_RUNNING),
+	PERF_CONST(FORMAT_ID),
+	PERF_CONST(FORMAT_GROUP),
+
+	PERF_CONST(RECORD_MMAP),
+	PERF_CONST(RECORD_LOST),
+	PERF_CONST(RECORD_COMM),
+	PERF_CONST(RECORD_EXIT),
+	PERF_CONST(RECORD_THROTTLE),
+	PERF_CONST(RECORD_UNTHROTTLE),
+	PERF_CONST(RECORD_FORK),
+	PERF_CONST(RECORD_READ),
+	PERF_CONST(RECORD_SAMPLE),
+	PERF_CONST(RECORD_MMAP2),
+	PERF_CONST(RECORD_AUX),
+	PERF_CONST(RECORD_ITRACE_START),
+	PERF_CONST(RECORD_LOST_SAMPLES),
+	PERF_CONST(RECORD_SWITCH),
+	PERF_CONST(RECORD_SWITCH_CPU_WIDE),
+
+	PERF_CONST(RECORD_MISC_SWITCH_OUT),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
+				  PyObject *args, PyObject *kwargs)
+{
+	struct tep_event *tp_format;
+	static char *kwlist[] = { "sys", "name", NULL };
+	char *sys  = NULL;
+	char *name = NULL;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss", kwlist,
+					 &sys, &name))
+		return NULL;
+
+	tp_format = trace_event__tp_format(sys, name);
+	if (IS_ERR(tp_format))
+		return _PyLong_FromLong(-1);
+
+	return _PyLong_FromLong(tp_format->id);
+}
+
+static PyMethodDef perf__methods[] = {
+	{
+		.ml_name  = "tracepoint",
+		.ml_meth  = (PyCFunction) pyrf__tracepoint,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("Get tracepoint config.")
+	},
+	{ .ml_name = NULL, }
+};
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void)
+#else
+PyMODINIT_FUNC PyInit_perf(void)
+#endif
+{
+	PyObject *obj;
+	int i;
+	PyObject *dict;
+#if PY_MAJOR_VERSION < 3
+	PyObject *module = Py_InitModule("perf", perf__methods);
+#else
+	static struct PyModuleDef moduledef = {
+		PyModuleDef_HEAD_INIT,
+		"perf",			/* m_name */
+		"",			/* m_doc */
+		-1,			/* m_size */
+		perf__methods,		/* m_methods */
+		NULL,			/* m_reload */
+		NULL,			/* m_traverse */
+		NULL,			/* m_clear */
+		NULL,			/* m_free */
+	};
+	PyObject *module = PyModule_Create(&moduledef);
+#endif
+
+	if (module == NULL ||
+	    pyrf_event__setup_types() < 0 ||
+	    pyrf_evlist__setup_types() < 0 ||
+	    pyrf_evsel__setup_types() < 0 ||
+	    pyrf_thread_map__setup_types() < 0 ||
+	    pyrf_cpu_map__setup_types() < 0)
+#if PY_MAJOR_VERSION < 3
+		return;
+#else
+		return module;
+#endif
+
+	/* The page_size is placed in util object. */
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	Py_INCREF(&pyrf_evlist__type);
+	PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
+
+	Py_INCREF(&pyrf_evsel__type);
+	PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
+
+	Py_INCREF(&pyrf_mmap_event__type);
+	PyModule_AddObject(module, "mmap_event", (PyObject *)&pyrf_mmap_event__type);
+
+	Py_INCREF(&pyrf_lost_event__type);
+	PyModule_AddObject(module, "lost_event", (PyObject *)&pyrf_lost_event__type);
+
+	Py_INCREF(&pyrf_comm_event__type);
+	PyModule_AddObject(module, "comm_event", (PyObject *)&pyrf_comm_event__type);
+
+	Py_INCREF(&pyrf_task_event__type);
+	PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+	Py_INCREF(&pyrf_throttle_event__type);
+	PyModule_AddObject(module, "throttle_event", (PyObject *)&pyrf_throttle_event__type);
+
+	Py_INCREF(&pyrf_task_event__type);
+	PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+	Py_INCREF(&pyrf_read_event__type);
+	PyModule_AddObject(module, "read_event", (PyObject *)&pyrf_read_event__type);
+
+	Py_INCREF(&pyrf_sample_event__type);
+	PyModule_AddObject(module, "sample_event", (PyObject *)&pyrf_sample_event__type);
+
+	Py_INCREF(&pyrf_context_switch_event__type);
+	PyModule_AddObject(module, "switch_event", (PyObject *)&pyrf_context_switch_event__type);
+
+	Py_INCREF(&pyrf_thread_map__type);
+	PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
+
+	Py_INCREF(&pyrf_cpu_map__type);
+	PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
+
+	dict = PyModule_GetDict(module);
+	if (dict == NULL)
+		goto error;
+
+	for (i = 0; perf__constants[i].name != NULL; i++) {
+		obj = _PyLong_FromLong(perf__constants[i].value);
+		if (obj == NULL)
+			goto error;
+		PyDict_SetItemString(dict, perf__constants[i].name, obj);
+		Py_DECREF(obj);
+	}
+
+error:
+	if (PyErr_Occurred())
+		PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
+#if PY_MAJOR_VERSION >= 3
+	return module;
+#endif
+}
+
+/*
+ * Dummy, to avoid dragging all the test_attr infrastructure in the python
+ * binding.
+ */
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
+                     int fd, int group_fd, unsigned long flags)
+{
+}
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
new file mode 100644
index 000000000..376e86cb4
--- /dev/null
+++ b/tools/perf/util/rb_resort.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+				    b->thread->shortname) < 0,
+	struct thread *thread;
+)
+{
+	entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+	DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+	struct rb_node *nd;
+
+	resort_rb__for_each_entry(nd, threads) {
+		struct thread *t = threads_entry;
+		printf("%s: %d\n", t->shortname, t->tid);
+	}
+
+ * Then delete it:
+
+	resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ * 	struct threads_sorted_entry {}
+ * 	threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...)					\
+struct __name##_sorted_entry {							\
+	struct rb_node	rb_node;						\
+	__VA_ARGS__								\
+};										\
+static void __name##_sorted__init_entry(struct rb_node *nd,			\
+					struct __name##_sorted_entry *entry);	\
+										\
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb)	\
+{										\
+	struct __name##_sorted_entry *a, *b;					\
+	a = rb_entry(nda, struct __name##_sorted_entry, rb_node);		\
+	b = rb_entry(ndb, struct __name##_sorted_entry, rb_node);		\
+	return __comp;								\
+}										\
+										\
+struct __name##_sorted {							\
+       struct rb_root		    entries;					\
+       struct __name##_sorted_entry nd[0];					\
+};										\
+										\
+static void __name##_sorted__insert(struct __name##_sorted *sorted,		\
+				      struct rb_node *sorted_nd)		\
+{										\
+	struct rb_node **p = &sorted->entries.rb_node, *parent = NULL;		\
+	while (*p != NULL) {							\
+		parent = *p;							\
+		if (__name##_sorted__cmp(sorted_nd, parent))			\
+			p = &(*p)->rb_left;					\
+		else								\
+			p = &(*p)->rb_right;					\
+	}									\
+	rb_link_node(sorted_nd, parent, p);					\
+	rb_insert_color(sorted_nd, &sorted->entries);				\
+}										\
+										\
+static void __name##_sorted__sort(struct __name##_sorted *sorted,		\
+				    struct rb_root *entries)			\
+{										\
+	struct rb_node *nd;							\
+	unsigned int i = 0;							\
+	for (nd = rb_first(entries); nd; nd = rb_next(nd)) {			\
+		struct __name##_sorted_entry *snd = &sorted->nd[i++];		\
+		__name##_sorted__init_entry(nd, snd);				\
+		__name##_sorted__insert(sorted, &snd->rb_node);			\
+	}									\
+}										\
+										\
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries,	\
+						    int nr_entries)		\
+{										\
+	struct __name##_sorted *sorted;						\
+	sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries);	\
+	if (sorted) {								\
+		sorted->entries = RB_ROOT;					\
+		__name##_sorted__sort(sorted, entries);				\
+	}									\
+	return sorted;								\
+}										\
+										\
+static void __name##_sorted__delete(struct __name##_sorted *sorted)		\
+{										\
+	free(sorted);								\
+}										\
+										\
+static void __name##_sorted__init_entry(struct rb_node *nd,			\
+					struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name)						\
+struct __name##_sorted_entry *__name##_entry;					\
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each_entry(__nd, __name)					\
+	for (__nd = rb_first(&__name->entries);					\
+	     __name##_entry = rb_entry(__nd, struct __name##_sorted_entry,	\
+				       rb_node), __nd;				\
+	     __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name)						\
+	__name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist)				\
+	DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,		\
+				  __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
+ DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
+			   __machine->threads[hash_bucket].nr)
+
+#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
new file mode 100644
index 000000000..f399b7ec4
--- /dev/null
+++ b/tools/perf/util/rblist.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Based on strlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rblist.h"
+
+int rblist__add_node(struct rblist *rblist, const void *new_entry)
+{
+	struct rb_node **p = &rblist->entries.rb_root.rb_node;
+	struct rb_node *parent = NULL, *new_node;
+	bool leftmost = true;
+
+	while (*p != NULL) {
+		int rc;
+
+		parent = *p;
+
+		rc = rblist->node_cmp(parent, new_entry);
+		if (rc > 0)
+			p = &(*p)->rb_left;
+		else if (rc < 0) {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+		else
+			return -EEXIST;
+	}
+
+	new_node = rblist->node_new(rblist, new_entry);
+	if (new_node == NULL)
+		return -ENOMEM;
+
+	rb_link_node(new_node, parent, p);
+	rb_insert_color_cached(new_node, &rblist->entries, leftmost);
+	++rblist->nr_entries;
+
+	return 0;
+}
+
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
+{
+	rb_erase_cached(rb_node, &rblist->entries);
+	--rblist->nr_entries;
+	rblist->node_delete(rblist, rb_node);
+}
+
+static struct rb_node *__rblist__findnew(struct rblist *rblist,
+					 const void *entry,
+					 bool create)
+{
+	struct rb_node **p = &rblist->entries.rb_root.rb_node;
+	struct rb_node *parent = NULL, *new_node = NULL;
+	bool leftmost = true;
+
+	while (*p != NULL) {
+		int rc;
+
+		parent = *p;
+
+		rc = rblist->node_cmp(parent, entry);
+		if (rc > 0)
+			p = &(*p)->rb_left;
+		else if (rc < 0) {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+		else
+			return parent;
+	}
+
+	if (create) {
+		new_node = rblist->node_new(rblist, entry);
+		if (new_node) {
+			rb_link_node(new_node, parent, p);
+			rb_insert_color_cached(new_node,
+					       &rblist->entries, leftmost);
+			++rblist->nr_entries;
+		}
+	}
+
+	return new_node;
+}
+
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry)
+{
+	return __rblist__findnew(rblist, entry, false);
+}
+
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry)
+{
+	return __rblist__findnew(rblist, entry, true);
+}
+
+void rblist__init(struct rblist *rblist)
+{
+	if (rblist != NULL) {
+		rblist->entries	 = RB_ROOT_CACHED;
+		rblist->nr_entries = 0;
+	}
+
+	return;
+}
+
+void rblist__exit(struct rblist *rblist)
+{
+	struct rb_node *pos, *next = rb_first_cached(&rblist->entries);
+
+	while (next) {
+		pos = next;
+		next = rb_next(pos);
+		rblist__remove_node(rblist, pos);
+	}
+}
+
+void rblist__delete(struct rblist *rblist)
+{
+	if (rblist != NULL) {
+		rblist__exit(rblist);
+		free(rblist);
+	}
+}
+
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx)
+{
+	struct rb_node *node;
+
+	for (node = rb_first_cached(&rblist->entries); node;
+	     node = rb_next(node)) {
+		if (!idx--)
+			return node;
+	}
+
+	return NULL;
+}
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
new file mode 100644
index 000000000..14b232a4d
--- /dev/null
+++ b/tools/perf/util/rblist.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RBLIST_H
+#define __PERF_RBLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+/*
+ * create node structs of the form:
+ * struct my_node {
+ *     struct rb_node rb_node;
+ *     ... my data ...
+ * };
+ *
+ * create list structs of the form:
+ * struct mylist {
+ *     struct rblist rblist;
+ *     ... my data ...
+ * };
+ */
+
+struct rblist {
+	struct rb_root_cached entries;
+	unsigned int   nr_entries;
+
+	int (*node_cmp)(struct rb_node *rbn, const void *entry);
+	struct rb_node *(*node_new)(struct rblist *rlist, const void *new_entry);
+	void (*node_delete)(struct rblist *rblist, struct rb_node *rb_node);
+};
+
+void rblist__init(struct rblist *rblist);
+void rblist__exit(struct rblist *rblist);
+void rblist__delete(struct rblist *rblist);
+int rblist__add_node(struct rblist *rblist, const void *new_entry);
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx);
+
+static inline bool rblist__empty(const struct rblist *rblist)
+{
+	return rblist->nr_entries == 0;
+}
+
+static inline unsigned int rblist__nr_entries(const struct rblist *rblist)
+{
+	return rblist->nr_entries;
+}
+
+#endif /* __PERF_RBLIST_H */
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
new file mode 100644
index 000000000..7b58f6c7c
--- /dev/null
+++ b/tools/perf/util/record.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "evsel_config.h"
+#include "parse-events.h"
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include <subcmd/parse-options.h>
+#include <perf/cpumap.h>
+#include "cloexec.h"
+#include "util/perf_api_probe.h"
+#include "record.h"
+#include "../perf-sys.h"
+#include "topdown.h"
+#include "map_symbol.h"
+#include "mem-events.h"
+
+/*
+ * evsel__config_leader_sampling() uses special rules for leader sampling.
+ * However, if the leader is an AUX area event, then assume the event to sample
+ * is the next event.
+ */
+static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
+{
+	struct evsel *leader = evsel__leader(evsel);
+
+	if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) ||
+	    is_mem_loads_aux_event(leader)) {
+		evlist__for_each_entry(evlist, evsel) {
+			if (evsel__leader(evsel) == leader && evsel != evsel__leader(evsel))
+				return evsel;
+		}
+	}
+
+	return leader;
+}
+
+static u64 evsel__config_term_mask(struct evsel *evsel)
+{
+	struct evsel_config_term *term;
+	struct list_head *config_terms = &evsel->config_terms;
+	u64 term_types = 0;
+
+	list_for_each_entry(term, config_terms, list) {
+		term_types |= 1 << term->type;
+	}
+	return term_types;
+}
+
+static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
+{
+	struct perf_event_attr *attr = &evsel->core.attr;
+	struct evsel *leader = evsel__leader(evsel);
+	struct evsel *read_sampler;
+	u64 term_types, freq_mask;
+
+	if (!leader->sample_read)
+		return;
+
+	read_sampler = evsel__read_sampler(evsel, evlist);
+
+	if (evsel == read_sampler)
+		return;
+
+	term_types = evsel__config_term_mask(evsel);
+	/*
+	 * Disable sampling for all group members except those with explicit
+	 * config terms or the leader. In the case of an AUX area event, the 2nd
+	 * event in the group is the one that 'leads' the sampling.
+	 */
+	freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
+	if ((term_types & freq_mask) == 0) {
+		attr->freq           = 0;
+		attr->sample_freq    = 0;
+		attr->sample_period  = 0;
+	}
+	if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
+		attr->write_backward = 0;
+
+	/*
+	 * We don't get a sample for slave events, we make them when delivering
+	 * the group leader sample. Set the slave event to follow the master
+	 * sample_type to ease up reporting.
+	 * An AUX area event also has sample_type requirements, so also include
+	 * the sample type bits from the leader's sample_type to cover that
+	 * case.
+	 */
+	attr->sample_type = read_sampler->core.attr.sample_type |
+			    leader->core.attr.sample_type;
+}
+
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
+{
+	struct evsel *evsel;
+	bool use_sample_identifier = false;
+	bool use_comm_exec;
+	bool sample_id = opts->sample_id;
+
+	/*
+	 * Set the evsel leader links before we configure attributes,
+	 * since some might depend on this info.
+	 */
+	if (opts->group)
+		evlist__set_leader(evlist);
+
+	if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0)
+		opts->no_inherit = true;
+
+	use_comm_exec = perf_can_comm_exec();
+
+	evlist__for_each_entry(evlist, evsel) {
+		evsel__config(evsel, opts, callchain);
+		if (evsel->tracking && use_comm_exec)
+			evsel->core.attr.comm_exec = 1;
+	}
+
+	/* Configure leader sampling here now that the sample type is known */
+	evlist__for_each_entry(evlist, evsel)
+		evsel__config_leader_sampling(evsel, evlist);
+
+	if (opts->full_auxtrace || opts->sample_identifier) {
+		/*
+		 * Need to be able to synthesize and parse selected events with
+		 * arbitrary sample types, which requires always being able to
+		 * match the id.
+		 */
+		use_sample_identifier = perf_can_sample_identifier();
+		sample_id = true;
+	} else if (evlist->core.nr_entries > 1) {
+		struct evsel *first = evlist__first(evlist);
+
+		evlist__for_each_entry(evlist, evsel) {
+			if (evsel->core.attr.sample_type == first->core.attr.sample_type)
+				continue;
+			use_sample_identifier = perf_can_sample_identifier();
+			break;
+		}
+		sample_id = true;
+	}
+
+	if (sample_id) {
+		evlist__for_each_entry(evlist, evsel)
+			evsel__set_sample_id(evsel, use_sample_identifier);
+	}
+
+	evlist__set_id_pos(evlist);
+}
+
+static int get_max_rate(unsigned int *rate)
+{
+	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
+}
+
+static int record_opts__config_freq(struct record_opts *opts)
+{
+	bool user_freq = opts->user_freq != UINT_MAX;
+	bool user_interval = opts->user_interval != ULLONG_MAX;
+	unsigned int max_rate;
+
+	if (user_interval && user_freq) {
+		pr_err("cannot set frequency and period at the same time\n");
+		return -1;
+	}
+
+	if (user_interval)
+		opts->default_interval = opts->user_interval;
+	if (user_freq)
+		opts->freq = opts->user_freq;
+
+	/*
+	 * User specified count overrides default frequency.
+	 */
+	if (opts->default_interval)
+		opts->freq = 0;
+	else if (opts->freq) {
+		opts->default_interval = opts->freq;
+	} else {
+		pr_err("frequency and count are zero, aborting\n");
+		return -1;
+	}
+
+	if (get_max_rate(&max_rate))
+		return 0;
+
+	/*
+	 * User specified frequency is over current maximum.
+	 */
+	if (user_freq && (max_rate < opts->freq)) {
+		if (opts->strict_freq) {
+			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
+			       "       Please use -F freq option with a lower value or consider\n"
+			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
+			       max_rate);
+			return -1;
+		} else {
+			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
+				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
+				   "         The kernel will lower it when perf's interrupts take too long.\n"
+				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
+				   max_rate, opts->freq, max_rate);
+
+			opts->freq = max_rate;
+		}
+	}
+
+	/*
+	 * Default frequency is over current maximum.
+	 */
+	if (max_rate < opts->freq) {
+		pr_warning("Lowering default frequency rate from %u to %u.\n"
+			   "Please consider tweaking "
+			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
+			   opts->freq, max_rate);
+		opts->freq = max_rate;
+	}
+
+	return 0;
+}
+
+int record_opts__config(struct record_opts *opts)
+{
+	return record_opts__config_freq(opts);
+}
+
+bool evlist__can_select_event(struct evlist *evlist, const char *str)
+{
+	struct evlist *temp_evlist;
+	struct evsel *evsel;
+	int err, fd;
+	struct perf_cpu cpu = { .cpu = 0 };
+	bool ret = false;
+	pid_t pid = -1;
+
+	temp_evlist = evlist__new();
+	if (!temp_evlist)
+		return false;
+
+	err = parse_event(temp_evlist, str);
+	if (err)
+		goto out_delete;
+
+	evsel = evlist__last(temp_evlist);
+
+	if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) {
+		struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+
+		if (cpus)
+			cpu =  perf_cpu_map__cpu(cpus, 0);
+
+		perf_cpu_map__put(cpus);
+	} else {
+		cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0);
+	}
+
+	while (1) {
+		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
+					 perf_event_open_cloexec_flag());
+		if (fd < 0) {
+			if (pid == -1 && errno == EACCES) {
+				pid = 0;
+				continue;
+			}
+			goto out_delete;
+		}
+		break;
+	}
+	close(fd);
+	ret = true;
+
+out_delete:
+	evlist__delete(temp_evlist);
+	return ret;
+}
+
+int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
+{
+	unsigned int freq;
+	struct record_opts *opts = opt->value;
+
+	if (!str)
+		return -EINVAL;
+
+	if (strcasecmp(str, "max") == 0) {
+		if (get_max_rate(&freq)) {
+			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
+			return -1;
+		}
+		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
+	} else {
+		freq = atoi(str);
+	}
+
+	opts->user_freq = freq;
+	return 0;
+}
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
new file mode 100644
index 000000000..4269e916f
--- /dev/null
+++ b/tools/perf/util/record.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_RECORD_H
+#define _PERF_RECORD_H
+
+#include <time.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include "util/target.h"
+
+struct option;
+
+struct record_opts {
+	struct target target;
+	bool	      group;
+	bool	      inherit_stat;
+	bool	      no_buffering;
+	bool	      no_inherit;
+	bool	      no_inherit_set;
+	bool	      no_samples;
+	bool	      raw_samples;
+	bool	      sample_address;
+	bool	      sample_phys_addr;
+	bool	      sample_data_page_size;
+	bool	      sample_code_page_size;
+	bool	      sample_weight;
+	bool	      sample_time;
+	bool	      sample_time_set;
+	bool	      sample_cpu;
+	bool	      sample_identifier;
+	bool	      period;
+	bool	      period_set;
+	bool	      running_time;
+	bool	      full_auxtrace;
+	bool	      auxtrace_snapshot_mode;
+	bool	      auxtrace_snapshot_on_exit;
+	bool	      auxtrace_sample_mode;
+	bool	      record_namespaces;
+	bool	      record_cgroup;
+	bool	      record_switch_events;
+	bool	      record_switch_events_set;
+	bool	      all_kernel;
+	bool	      all_user;
+	bool	      kernel_callchains;
+	bool	      user_callchains;
+	bool	      tail_synthesize;
+	bool	      overwrite;
+	bool	      ignore_missing_thread;
+	bool	      strict_freq;
+	bool	      sample_id;
+	bool	      no_bpf_event;
+	bool	      kcore;
+	bool	      text_poke;
+	bool	      build_id;
+	unsigned int  freq;
+	unsigned int  mmap_pages;
+	unsigned int  auxtrace_mmap_pages;
+	unsigned int  user_freq;
+	u64	      branch_stack;
+	u64	      sample_intr_regs;
+	u64	      sample_user_regs;
+	u64	      default_interval;
+	u64	      user_interval;
+	size_t	      auxtrace_snapshot_size;
+	const char    *auxtrace_snapshot_opts;
+	const char    *auxtrace_sample_opts;
+	bool	      sample_transaction;
+	int	      initial_delay;
+	bool	      use_clockid;
+	clockid_t     clockid;
+	u64	      clockid_res_ns;
+	int	      nr_cblocks;
+	int	      affinity;
+	int	      mmap_flush;
+	unsigned int  comp_level;
+	unsigned int  nr_threads_synthesize;
+	int	      ctl_fd;
+	int	      ctl_fd_ack;
+	bool	      ctl_fd_close;
+	int	      synth;
+	int	      threads_spec;
+	const char    *threads_user_spec;
+};
+
+extern const char * const *record_usage;
+extern struct option *record_options;
+
+int record__parse_freq(const struct option *opt, const char *str, int unset);
+
+static inline bool record_opts__no_switch_events(const struct record_opts *opts)
+{
+	return opts->record_switch_events_set && !opts->record_switch_events;
+}
+
+#endif // _PERF_RECORD_H
diff --git a/tools/perf/util/rlimit.c b/tools/perf/util/rlimit.c
new file mode 100644
index 000000000..13521d392
--- /dev/null
+++ b/tools/perf/util/rlimit.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+#include "util/debug.h"
+#include "util/rlimit.h"
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/*
+ * Bump the memlock so that we can get bpf maps of a reasonable size,
+ * like the ones used with 'perf trace' and with 'perf test bpf',
+ * improve this to some specific request if needed.
+ */
+void rlimit__bump_memlock(void)
+{
+	struct rlimit rlim;
+
+	if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) {
+		rlim.rlim_cur *= 4;
+		rlim.rlim_max *= 4;
+
+		if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
+			rlim.rlim_cur /= 2;
+			rlim.rlim_max /= 2;
+
+			if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0)
+				pr_debug("Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc\n");
+		}
+	}
+}
diff --git a/tools/perf/util/rlimit.h b/tools/perf/util/rlimit.h
new file mode 100644
index 000000000..9f59d8e71
--- /dev/null
+++ b/tools/perf/util/rlimit.h
@@ -0,0 +1,6 @@
+#ifndef __PERF_RLIMIT_H_
+#define __PERF_RLIMIT_H_
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+void rlimit__bump_memlock(void);
+#endif // __PERF_RLIMIT_H_
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000..f3d29d8dd
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+	return pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int exit_rwsem(struct rw_semaphore *sem)
+{
+	return pthread_rwlock_destroy(&sem->lock);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000..94565ad4d
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,19 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+	pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+int exit_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
new file mode 100644
index 000000000..f55ca07f3
--- /dev/null
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for s390 CPU measurement counter set diagnostic facility
+ *
+ * Copyright IBM Corp. 2019
+   Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ *		Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef S390_CPUMCF_KERNEL_H
+#define S390_CPUMCF_KERNEL_H
+
+#define	S390_CPUMCF_DIAG_DEF	0xfeef	/* Counter diagnostic entry ID */
+#define	PERF_EVENT_CPUM_CF_DIAG	0xBC000	/* Event: Counter sets */
+#define PERF_EVENT_CPUM_SF_DIAG	0xBD000 /* Event: Combined-sampling */
+
+struct cf_ctrset_entry {	/* CPU-M CF counter set entry (8 byte) */
+	unsigned int def:16;	/* 0-15  Data Entry Format */
+	unsigned int set:16;	/* 16-23 Counter set identifier */
+	unsigned int ctr:16;	/* 24-39 Number of stored counters */
+	unsigned int res1:16;	/* 40-63 Reserved */
+};
+
+struct cf_trailer_entry {	/* CPU-M CF trailer for raw traces (64 byte) */
+	/* 0 - 7 */
+	union {
+		struct {
+			unsigned int clock_base:1;	/* TOD clock base */
+			unsigned int speed:1;		/* CPU speed */
+			/* Measurement alerts */
+			unsigned int mtda:1;	/* Loss of MT ctr. data alert */
+			unsigned int caca:1;	/* Counter auth. change alert */
+			unsigned int lcda:1;	/* Loss of counter data alert */
+		};
+		unsigned long flags;		/* 0-63    All indicators */
+	};
+	/* 8 - 15 */
+	unsigned int cfvn:16;			/* 64-79   Ctr First Version */
+	unsigned int csvn:16;			/* 80-95   Ctr Second Version */
+	unsigned int cpu_speed:32;		/* 96-127  CPU speed */
+	/* 16 - 23 */
+	unsigned long timestamp;		/* 128-191 Timestamp (TOD) */
+	/* 24 - 55 */
+	union {
+		struct {
+			unsigned long progusage1;
+			unsigned long progusage2;
+			unsigned long progusage3;
+			unsigned long tod_base;
+		};
+		unsigned long progusage[4];
+	};
+	/* 56 - 63 */
+	unsigned int mach_type:16;		/* Machine type */
+	unsigned int res1:16;			/* Reserved */
+	unsigned int res2:32;			/* Reserved */
+};
+
+#define	CPUMF_CTR_SET_BASIC	0	/* Basic Counter Set */
+#define	CPUMF_CTR_SET_USER	1	/* Problem-State Counter Set */
+#define	CPUMF_CTR_SET_CRYPTO	2	/* Crypto-Activity Counter Set */
+#define	CPUMF_CTR_SET_EXT	3	/* Extended Counter Set */
+#define	CPUMF_CTR_SET_MT_DIAG	4	/* MT-diagnostic Counter Set */
+#endif
diff --git a/tools/perf/util/s390-cpumsf-kernel.h b/tools/perf/util/s390-cpumsf-kernel.h
new file mode 100644
index 000000000..de8c7ad0e
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf-kernel.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Auxtrace support for s390 CPU measurement sampling facility
+ *
+ *  Copyright IBM Corp. 2018
+ *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ *	       Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef S390_CPUMSF_KERNEL_H
+#define S390_CPUMSF_KERNEL_H
+
+#define	S390_CPUMSF_PAGESZ	4096	/* Size of sample block units */
+#define	S390_CPUMSF_DIAG_DEF_FIRST	0x8001	/* Diagnostic entry lowest id */
+
+struct hws_basic_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:4;	    /* 16-19 reserved			 */
+	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;	    /* zeros				 */
+	unsigned int T:1;	    /* 26 PSW DAT mode			 */
+	unsigned int W:1;	    /* 27 PSW wait state		 */
+	unsigned int P:1;	    /* 28 PSW Problem state		 */
+	unsigned int AS:2;	    /* 29-30 PSW address-space control	 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	unsigned int CL:2;	    /* 32-33 Configuration Level	 */
+	unsigned int:14;
+	unsigned int prim_asn:16;   /* primary ASN			 */
+	unsigned long long ia;	    /* Instruction Address		 */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
+};
+
+struct hws_diag_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:15;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	u8	     data[];	    /* Machine-dependent sample data	 */
+};
+
+struct hws_combined_entry {
+	struct hws_basic_entry	basic;	/* Basic-sampling data entry */
+	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
+};
+
+struct hws_trailer_entry {
+	union {
+		struct {
+			unsigned int f:1;	/* 0 - Block Full Indicator   */
+			unsigned int a:1;	/* 1 - Alert request control  */
+			unsigned int t:1;	/* 2 - Timestamp format	      */
+			unsigned int:29;	/* 3 - 31: Reserved	      */
+			unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
+			unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
+		};
+		unsigned long long flags;	/* 0 - 64: All indicators     */
+	};
+	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
+	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
+	unsigned long long reserved1;	 /* 32 -Reserved		      */
+	unsigned long long reserved2;	 /*				      */
+	union {				 /* 48 - reserved for programming use */
+		struct {
+			unsigned long long clock_base:1; /* in progusage2 */
+			unsigned long long progusage1:63;
+			unsigned long long progusage2;
+		};
+		unsigned long long progusage[2];
+	};
+};
+
+#endif
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
new file mode 100644
index 000000000..f3fdad28a
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.c
@@ -0,0 +1,1176 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s):  Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Auxiliary traces are collected during 'perf record' using rbd000 event.
+ * Several PERF_RECORD_XXX are generated during recording:
+ *
+ * PERF_RECORD_AUX:
+ *	Records that new data landed in the AUX buffer part.
+ * PERF_RECORD_AUXTRACE:
+ *	Defines auxtrace data. Followed by the actual data. The contents of
+ *	the auxtrace data is dependent on the event and the CPU.
+ *	This record is generated by perf record command. For details
+ *	see Documentation/perf.data-file-format.txt.
+ * PERF_RECORD_AUXTRACE_INFO:
+ *	Defines a table of contains for PERF_RECORD_AUXTRACE records. This
+ *	record is generated during 'perf record' command. Each record contains
+ *	up to 256 entries describing offset and size of the AUXTRACE data in the
+ *	perf.data file.
+ * PERF_RECORD_AUXTRACE_ERROR:
+ *	Indicates an error during AUXTRACE collection such as buffer overflow.
+ * PERF_RECORD_FINISHED_ROUND:
+ *	Perf events are not necessarily in time stamp order, as they can be
+ *	collected in parallel on different CPUs. If the events should be
+ *	processed in time order they need to be sorted first.
+ *	Perf report guarantees that there is no reordering over a
+ *	PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
+ *	time stamp lower than this record are processed (and displayed) before
+ *	the succeeding perf record are processed.
+ *
+ * These records are evaluated during perf report command.
+ *
+ * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
+ * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
+ * below.
+ * Auxiliary trace data is collected per CPU. To merge the data into the report
+ * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
+ * data is in ascending order.
+ *
+ * Each queue has a double linked list of auxtrace_buffers. This list contains
+ * the offset and size of a CPU's auxtrace data. During auxtrace processing
+ * the data portion is mmap()'ed.
+ *
+ * To sort the queues in chronological order, all queue access is controlled
+ * by the auxtrace_heap. This is basically a stack, each stack element has two
+ * entries, the queue number and a time stamp. However the stack is sorted by
+ * the time stamps. The highest time stamp is at the bottom the lowest
+ * (nearest) time stamp is at the top. That sort order is maintained at all
+ * times!
+ *
+ * After the auxtrace infrastructure has been setup, the auxtrace queues are
+ * filled with data (offset/size pairs) and the auxtrace_heap is populated.
+ *
+ * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
+ * Each record is handled by s390_cpumsf_process_event(). The time stamp of
+ * the perf record is compared with the time stamp located on the auxtrace_heap
+ * top element. If that time stamp is lower than the time stamp from the
+ * record sample, the auxtrace queues will be processed. As auxtrace queues
+ * control many auxtrace_buffers and each buffer can be quite large, the
+ * auxtrace buffer might be processed only partially. In this case the
+ * position in the auxtrace_buffer of that queue is remembered and the time
+ * stamp of the last processed entry of the auxtrace_buffer replaces the
+ * current auxtrace_heap top.
+ *
+ * 3. Auxtrace_queues might run of out data and are fed by the
+ * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
+ *
+ * Event Generation
+ * Each sampling-data entry in the auxiliary trace data generates a perf sample.
+ * This sample is filled
+ * with data from the auxtrace such as PID/TID, instruction address, CPU state,
+ * etc. This sample is processed with perf_session__deliver_synth_event() to
+ * be included into the GUI.
+ *
+ * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
+ * auxiliary traces entries until the time stamp of this record is reached
+ * auxtrace_heap top. This is triggered by ordered_event->deliver().
+ *
+ *
+ * Perf event processing.
+ * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
+ * This is the function call sequence:
+ *
+ * __cmd_report()
+ * |
+ * perf_session__process_events()
+ * |
+ * __perf_session__process_events()
+ * |
+ * perf_session__process_event()
+ * |  This functions splits the PERF_RECORD_XXX records.
+ * |  - Those generated by perf record command (type number equal or higher
+ * |    than PERF_RECORD_USER_TYPE_START) are handled by
+ * |    perf_session__process_user_event(see below)
+ * |  - Those generated by the kernel are handled by
+ * |    evlist__parse_sample_timestamp()
+ * |
+ * evlist__parse_sample_timestamp()
+ * |  Extract time stamp from sample data.
+ * |
+ * perf_session__queue_event()
+ * |  If timestamp is positive the sample is entered into an ordered_event
+ * |  list, sort order is the timestamp. The event processing is deferred until
+ * |  later (see perf_session__process_user_event()).
+ * |  Other timestamps (0 or -1) are handled immediately by
+ * |  perf_session__deliver_event(). These are events generated at start up
+ * |  of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
+ * |  records. They are needed to create a list of running processes and its
+ * |  memory mappings and layout. They are needed at the beginning to enable
+ * |  command perf report to create process trees and memory mappings.
+ * |
+ * perf_session__deliver_event()
+ * |  Delivers a PERF_RECORD_XXX entry for handling.
+ * |
+ * auxtrace__process_event()
+ * |  The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
+ * |  time stamps from the auxiliary trace buffers. This enables
+ * |  synchronization between auxiliary trace data and the events on the
+ * |  perf.data file.
+ * |
+ * machine__deliver_event()
+ * |  Handles the PERF_RECORD_XXX event. This depends on the record type.
+ *    It might update the process tree, update a process memory map or enter
+ *    a sample with IP and call back chain data into GUI data pool.
+ *
+ *
+ * Deferred processing determined by perf_session__process_user_event() is
+ * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
+ * are generated during command perf record.
+ * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
+ * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
+ * built up while reading the perf.data file.
+ * Each event is now processed by calling perf_session__deliver_event().
+ * This enables time synchronization between the data in the perf.data file and
+ * the data in the auxiliary trace buffers.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/zalloc.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "tool.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "s390-cpumsf.h"
+#include "s390-cpumsf-kernel.h"
+#include "s390-cpumcf-kernel.h"
+#include "config.h"
+
+struct s390_cpumsf {
+	struct auxtrace		auxtrace;
+	struct auxtrace_queues	queues;
+	struct auxtrace_heap	heap;
+	struct perf_session	*session;
+	struct machine		*machine;
+	u32			auxtrace_type;
+	u32			pmu_type;
+	u16			machine_type;
+	bool			data_queued;
+	bool			use_logfile;
+	char			*logdir;
+};
+
+struct s390_cpumsf_queue {
+	struct s390_cpumsf	*sf;
+	unsigned int		queue_nr;
+	struct auxtrace_buffer	*buffer;
+	int			cpu;
+	FILE			*logfile;
+	FILE			*logfile_ctr;
+};
+
+/* Check if the raw data should be dumped to file. If this is the case and
+ * the file to dump to has not been opened for writing, do so.
+ *
+ * Return 0 on success and greater zero on error so processing continues.
+ */
+static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
+			       struct perf_sample *sample)
+{
+	struct s390_cpumsf_queue *sfq;
+	struct auxtrace_queue *q;
+	int rc = 0;
+
+	if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
+		return rc;
+
+	q = &sf->queues.queue_array[sample->cpu];
+	sfq = q->priv;
+	if (!sfq)		/* Queue not yet allocated */
+		return rc;
+
+	if (!sfq->logfile_ctr) {
+		char *name;
+
+		rc = (sf->logdir)
+			? asprintf(&name, "%s/aux.ctr.%02x",
+				 sf->logdir, sample->cpu)
+			: asprintf(&name, "aux.ctr.%02x", sample->cpu);
+		if (rc > 0)
+			sfq->logfile_ctr = fopen(name, "w");
+		if (sfq->logfile_ctr == NULL) {
+			pr_err("Failed to open counter set log file %s, "
+			       "continue...\n", name);
+			rc = 1;
+		}
+		free(name);
+	}
+
+	if (sfq->logfile_ctr) {
+		/* See comment above for -4 */
+		size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
+				  sfq->logfile_ctr);
+		if (n != 1) {
+			pr_err("Failed to write counter set data\n");
+			rc = 1;
+		}
+	}
+	return rc;
+}
+
+/* Display s390 CPU measurement facility basic-sampling data entry
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
+static bool s390_cpumsf_basic_show(const char *color, size_t pos,
+				   struct hws_basic_entry *basicp)
+{
+	struct hws_basic_entry *basic = basicp;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	struct hws_basic_entry local;
+	unsigned long long word = be64toh(*(unsigned long long *)basicp);
+
+	memset(&local, 0, sizeof(local));
+	local.def = be16toh(basicp->def);
+	local.prim_asn = word & 0xffff;
+	local.CL = word >> 30 & 0x3;
+	local.I = word >> 32 & 0x1;
+	local.AS = word >> 33 & 0x3;
+	local.P = word >> 35 & 0x1;
+	local.W = word >> 36 & 0x1;
+	local.T = word >> 37 & 0x1;
+	local.U = word >> 40 & 0xf;
+	local.ia = be64toh(basicp->ia);
+	local.gpp = be64toh(basicp->gpp);
+	local.hpp = be64toh(basicp->hpp);
+	basic = &local;
+#endif
+	if (basic->def != 1) {
+		pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
+		return false;
+	}
+	color_fprintf(stdout, color, "    [%#08zx] Basic   Def:%04x Inst:%#04x"
+		      " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
+		      "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
+		      pos, basic->def, basic->U,
+		      basic->T ? 'T' : ' ',
+		      basic->W ? 'W' : ' ',
+		      basic->P ? 'P' : ' ',
+		      basic->I ? 'I' : ' ',
+		      basic->AS, basic->prim_asn, basic->ia, basic->CL,
+		      basic->hpp, basic->gpp);
+	return true;
+}
+
+/* Display s390 CPU measurement facility diagnostic-sampling data entry.
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
+static bool s390_cpumsf_diag_show(const char *color, size_t pos,
+				  struct hws_diag_entry *diagp)
+{
+	struct hws_diag_entry *diag = diagp;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	struct hws_diag_entry local;
+	unsigned long long word = be64toh(*(unsigned long long *)diagp);
+
+	local.def = be16toh(diagp->def);
+	local.I = word >> 32 & 0x1;
+	diag = &local;
+#endif
+	if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
+		pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
+		return false;
+	}
+	color_fprintf(stdout, color, "    [%#08zx] Diag    Def:%04x %c\n",
+		      pos, diag->def, diag->I ? 'I' : ' ');
+	return true;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
+					    int idx)
+{
+	/* te->t set: TOD in STCKE format, bytes 8-15
+	 * to->t not set: TOD in STCK format, bytes 0-7
+	 */
+	unsigned long long ts;
+
+	memcpy(&ts, &te->timestamp[idx], sizeof(ts));
+	return be64toh(ts);
+}
+
+/* Display s390 CPU measurement facility trailer entry */
+static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
+				     struct hws_trailer_entry *te)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	struct hws_trailer_entry local;
+	const unsigned long long flags = be64toh(te->flags);
+
+	memset(&local, 0, sizeof(local));
+	local.f = flags >> 63 & 0x1;
+	local.a = flags >> 62 & 0x1;
+	local.t = flags >> 61 & 0x1;
+	local.bsdes = be16toh((flags >> 16 & 0xffff));
+	local.dsdes = be16toh((flags & 0xffff));
+	memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
+	local.overflow = be64toh(te->overflow);
+	local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
+	local.progusage2 = be64toh(te->progusage2);
+	te = &local;
+#endif
+	if (te->bsdes != sizeof(struct hws_basic_entry)) {
+		pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
+		return false;
+	}
+	color_fprintf(stdout, color, "    [%#08zx] Trailer %c%c%c bsdes:%d"
+		      " dsdes:%d Overflow:%lld Time:%#llx\n"
+		      "\t\tC:%d TOD:%#lx\n",
+		      pos,
+		      te->f ? 'F' : ' ',
+		      te->a ? 'A' : ' ',
+		      te->t ? 'T' : ' ',
+		      te->bsdes, te->dsdes, te->overflow,
+		      trailer_timestamp(te, te->clock_base),
+		      te->clock_base, te->progusage2);
+	return true;
+}
+
+/* Test a sample data block. It must be 4KB or a multiple thereof in size and
+ * 4KB page aligned. Each sample data page has a trailer entry at the
+ * end which contains the sample entry data sizes.
+ *
+ * Return true if the sample data block passes the checks and set the
+ * basic set entry size and diagnostic set entry size.
+ *
+ * Return false on failure.
+ *
+ * Note: Old hardware does not set the basic or diagnostic entry sizes
+ * in the trailer entry. Use the type number instead.
+ */
+static bool s390_cpumsf_validate(int machine_type,
+				 unsigned char *buf, size_t len,
+				 unsigned short *bsdes,
+				 unsigned short *dsdes)
+{
+	struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
+	struct hws_trailer_entry *te;
+
+	*dsdes = *bsdes = 0;
+	if (len & (S390_CPUMSF_PAGESZ - 1))	/* Illegal size */
+		return false;
+	if (be16toh(basic->def) != 1)	/* No basic set entry, must be first */
+		return false;
+	/* Check for trailer entry at end of SDB */
+	te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+					      - sizeof(*te));
+	*bsdes = be16toh(te->bsdes);
+	*dsdes = be16toh(te->dsdes);
+	if (!te->bsdes && !te->dsdes) {
+		/* Very old hardware, use CPUID */
+		switch (machine_type) {
+		case 2097:
+		case 2098:
+			*dsdes = 64;
+			*bsdes = 32;
+			break;
+		case 2817:
+		case 2818:
+			*dsdes = 74;
+			*bsdes = 32;
+			break;
+		case 2827:
+		case 2828:
+			*dsdes = 85;
+			*bsdes = 32;
+			break;
+		case 2964:
+		case 2965:
+			*dsdes = 112;
+			*bsdes = 32;
+			break;
+		default:
+			/* Illegal trailer entry */
+			return false;
+		}
+	}
+	return true;
+}
+
+/* Return true if there is room for another entry */
+static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
+{
+	size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
+
+	if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
+		return false;
+	return true;
+}
+
+/* Dump an auxiliary buffer. These buffers are multiple of
+ * 4KB SDB pages.
+ */
+static void s390_cpumsf_dump(struct s390_cpumsf *sf,
+			     unsigned char *buf, size_t len)
+{
+	const char *color = PERF_COLOR_BLUE;
+	struct hws_basic_entry *basic;
+	struct hws_diag_entry *diag;
+	unsigned short bsdes, dsdes;
+	size_t pos = 0;
+
+	color_fprintf(stdout, color,
+		      ". ... s390 AUX data: size %zu bytes\n",
+		      len);
+
+	if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+				  &dsdes)) {
+		pr_err("Invalid AUX trace data block size:%zu"
+		       " (type:%d bsdes:%hd dsdes:%hd)\n",
+		       len, sf->machine_type, bsdes, dsdes);
+		return;
+	}
+
+	/* s390 kernel always returns 4KB blocks fully occupied,
+	 * no partially filled SDBs.
+	 */
+	while (pos < len) {
+		/* Handle Basic entry */
+		basic = (struct hws_basic_entry *)(buf + pos);
+		if (s390_cpumsf_basic_show(color, pos, basic))
+			pos += bsdes;
+		else
+			return;
+
+		/* Handle Diagnostic entry */
+		diag = (struct hws_diag_entry *)(buf + pos);
+		if (s390_cpumsf_diag_show(color, pos, diag))
+			pos += dsdes;
+		else
+			return;
+
+		/* Check for trailer entry */
+		if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+			/* Show trailer entry */
+			struct hws_trailer_entry te;
+
+			pos = (pos + S390_CPUMSF_PAGESZ)
+			       & ~(S390_CPUMSF_PAGESZ - 1);
+			pos -= sizeof(te);
+			memcpy(&te, buf + pos, sizeof(te));
+			/* Set descriptor sizes in case of old hardware
+			 * where these values are not set.
+			 */
+			te.bsdes = bsdes;
+			te.dsdes = dsdes;
+			if (s390_cpumsf_trailer_show(color, pos, &te))
+				pos += sizeof(te);
+			else
+				return;
+		}
+	}
+}
+
+static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
+				   size_t len)
+{
+	printf(".\n");
+	s390_cpumsf_dump(sf, buf, len);
+}
+
+#define	S390_LPP_PID_MASK	0xffffffff
+
+static bool s390_cpumsf_make_event(size_t pos,
+				   struct hws_basic_entry *basic,
+				   struct s390_cpumsf_queue *sfq)
+{
+	struct perf_sample sample = {
+				.ip = basic->ia,
+				.pid = basic->hpp & S390_LPP_PID_MASK,
+				.tid = basic->hpp & S390_LPP_PID_MASK,
+				.cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
+				.cpu = sfq->cpu,
+				.period = 1
+			    };
+	union perf_event event;
+
+	memset(&event, 0, sizeof(event));
+	if (basic->CL == 1)	/* Native LPAR mode */
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+					  : PERF_RECORD_MISC_KERNEL;
+	else if (basic->CL == 2)	/* Guest kernel/user space */
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+					  : PERF_RECORD_MISC_GUEST_KERNEL;
+	else if (basic->gpp || basic->prim_asn != 0xffff)
+		/* Use heuristics on old hardware */
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+					  : PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+					  : PERF_RECORD_MISC_KERNEL;
+
+	event.sample.header.type = PERF_RECORD_SAMPLE;
+	event.sample.header.misc = sample.cpumode;
+	event.sample.header.size = sizeof(struct perf_event_header);
+
+	pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
+		 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
+		 sample.tid, sample.cpumode, sample.cpu);
+	if (perf_session__deliver_synth_event(sfq->sf->session, &event,
+					      &sample)) {
+		pr_err("s390 Auxiliary Trace: failed to deliver event\n");
+		return false;
+	}
+	return true;
+}
+
+static unsigned long long get_trailer_time(const unsigned char *buf)
+{
+	struct hws_trailer_entry *te;
+	unsigned long long aux_time, progusage2;
+	bool clock_base;
+
+	te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+					      - sizeof(*te));
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
+	progusage2 = be64toh(te->progusage[1]);
+#else
+	clock_base = te->clock_base;
+	progusage2 = te->progusage2;
+#endif
+	if (!clock_base)	/* TOD_CLOCK_BASE value missing */
+		return 0;
+
+	/* Correct calculation to convert time stamp in trailer entry to
+	 * nano seconds (taken from arch/s390 function tod_to_ns()).
+	 * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
+	 */
+	aux_time = trailer_timestamp(te, clock_base) - progusage2;
+	aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
+	return aux_time;
+}
+
+/* Process the data samples of a single queue. The first parameter is a
+ * pointer to the queue, the second parameter is the time stamp. This
+ * is the time stamp:
+ * - of the event that triggered this processing.
+ * - or the time stamp when the last processing of this queue stopped.
+ *   In this case it stopped at a 4KB page boundary and record the
+ *   position on where to continue processing on the next invocation
+ *   (see buffer->use_data and buffer->use_size).
+ *
+ * When this function returns the second parameter is updated to
+ * reflect the time stamp of the last processed auxiliary data entry
+ * (taken from the trailer entry of that page). The caller uses this
+ * returned time stamp to record the last processed entry in this
+ * queue.
+ *
+ * The function returns:
+ * 0:  Processing successful. The second parameter returns the
+ *     time stamp from the trailer entry until which position
+ *     processing took place. Subsequent calls resume from this
+ *     position.
+ * <0: An error occurred during processing. The second parameter
+ *     returns the maximum time stamp.
+ * >0: Done on this queue. The second parameter returns the
+ *     maximum time stamp.
+ */
+static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
+{
+	struct s390_cpumsf *sf = sfq->sf;
+	unsigned char *buf = sfq->buffer->use_data;
+	size_t len = sfq->buffer->use_size;
+	struct hws_basic_entry *basic;
+	unsigned short bsdes, dsdes;
+	size_t pos = 0;
+	int err = 1;
+	u64 aux_ts;
+
+	if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+				  &dsdes)) {
+		*ts = ~0ULL;
+		return -1;
+	}
+
+	/* Get trailer entry time stamp and check if entries in
+	 * this auxiliary page are ready for processing. If the
+	 * time stamp of the first entry is too high, whole buffer
+	 * can be skipped. In this case return time stamp.
+	 */
+	aux_ts = get_trailer_time(buf);
+	if (!aux_ts) {
+		pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
+		       (s64)sfq->buffer->data_offset);
+		aux_ts = ~0ULL;
+		goto out;
+	}
+	if (aux_ts > *ts) {
+		*ts = aux_ts;
+		return 0;
+	}
+
+	while (pos < len) {
+		/* Handle Basic entry */
+		basic = (struct hws_basic_entry *)(buf + pos);
+		if (s390_cpumsf_make_event(pos, basic, sfq))
+			pos += bsdes;
+		else {
+			err = -EBADF;
+			goto out;
+		}
+
+		pos += dsdes;	/* Skip diagnostic entry */
+
+		/* Check for trailer entry */
+		if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+			pos = (pos + S390_CPUMSF_PAGESZ)
+			       & ~(S390_CPUMSF_PAGESZ - 1);
+			/* Check existence of next page */
+			if (pos >= len)
+				break;
+			aux_ts = get_trailer_time(buf + pos);
+			if (!aux_ts) {
+				aux_ts = ~0ULL;
+				goto out;
+			}
+			if (aux_ts > *ts) {
+				*ts = aux_ts;
+				sfq->buffer->use_data += pos;
+				sfq->buffer->use_size -= pos;
+				return 0;
+			}
+		}
+	}
+out:
+	*ts = aux_ts;
+	sfq->buffer->use_size = 0;
+	sfq->buffer->use_data = NULL;
+	return err;	/* Buffer completely scanned or error */
+}
+
+/* Run the s390 auxiliary trace decoder.
+ * Select the queue buffer to operate on, the caller already selected
+ * the proper queue, depending on second parameter 'ts'.
+ * This is the time stamp until which the auxiliary entries should
+ * be processed. This value is updated by called functions and
+ * returned to the caller.
+ *
+ * Resume processing in the current buffer. If there is no buffer
+ * get a new buffer from the queue and setup start position for
+ * processing.
+ * When a buffer is completely processed remove it from the queue
+ * before returning.
+ *
+ * This function returns
+ * 1: When the queue is empty. Second parameter will be set to
+ *    maximum time stamp.
+ * 0: Normal processing done.
+ * <0: Error during queue buffer setup. This causes the caller
+ *     to stop processing completely.
+ */
+static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
+				   u64 *ts)
+{
+
+	struct auxtrace_buffer *buffer;
+	struct auxtrace_queue *queue;
+	int err;
+
+	queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
+
+	/* Get buffer and last position in buffer to resume
+	 * decoding the auxiliary entries. One buffer might be large
+	 * and decoding might stop in between. This depends on the time
+	 * stamp of the trailer entry in each page of the auxiliary
+	 * data and the time stamp of the event triggering the decoding.
+	 */
+	if (sfq->buffer == NULL) {
+		sfq->buffer = buffer = auxtrace_buffer__next(queue,
+							     sfq->buffer);
+		if (!buffer) {
+			*ts = ~0ULL;
+			return 1;	/* Processing done on this queue */
+		}
+		/* Start with a new buffer on this queue */
+		if (buffer->data) {
+			buffer->use_size = buffer->size;
+			buffer->use_data = buffer->data;
+		}
+		if (sfq->logfile) {	/* Write into log file */
+			size_t rc = fwrite(buffer->data, buffer->size, 1,
+					   sfq->logfile);
+			if (rc != 1)
+				pr_err("Failed to write auxiliary data\n");
+		}
+	} else
+		buffer = sfq->buffer;
+
+	if (!buffer->data) {
+		int fd = perf_data__fd(sfq->sf->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data)
+			return -ENOMEM;
+		buffer->use_size = buffer->size;
+		buffer->use_data = buffer->data;
+
+		if (sfq->logfile) {	/* Write into log file */
+			size_t rc = fwrite(buffer->data, buffer->size, 1,
+					   sfq->logfile);
+			if (rc != 1)
+				pr_err("Failed to write auxiliary data\n");
+		}
+	}
+	pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
+		  __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
+		  buffer->size, buffer->use_size);
+	err = s390_cpumsf_samples(sfq, ts);
+
+	/* If non-zero, there is either an error (err < 0) or the buffer is
+	 * completely done (err > 0). The error is unrecoverable, usually
+	 * some descriptors could not be read successfully, so continue with
+	 * the next buffer.
+	 * In both cases the parameter 'ts' has been updated.
+	 */
+	if (err) {
+		sfq->buffer = NULL;
+		list_del_init(&buffer->list);
+		auxtrace_buffer__free(buffer);
+		if (err > 0)		/* Buffer done, no error */
+			err = 0;
+	}
+	return err;
+}
+
+static struct s390_cpumsf_queue *
+s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
+{
+	struct s390_cpumsf_queue *sfq;
+
+	sfq = zalloc(sizeof(struct s390_cpumsf_queue));
+	if (sfq == NULL)
+		return NULL;
+
+	sfq->sf = sf;
+	sfq->queue_nr = queue_nr;
+	sfq->cpu = -1;
+	if (sf->use_logfile) {
+		char *name;
+		int rc;
+
+		rc = (sf->logdir)
+			? asprintf(&name, "%s/aux.smp.%02x",
+				 sf->logdir, queue_nr)
+			: asprintf(&name, "aux.smp.%02x", queue_nr);
+		if (rc > 0)
+			sfq->logfile = fopen(name, "w");
+		if (sfq->logfile == NULL) {
+			pr_err("Failed to open auxiliary log file %s,"
+			       "continue...\n", name);
+			sf->use_logfile = false;
+		}
+		free(name);
+	}
+	return sfq;
+}
+
+static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
+				   struct auxtrace_queue *queue,
+				   unsigned int queue_nr, u64 ts)
+{
+	struct s390_cpumsf_queue *sfq = queue->priv;
+
+	if (list_empty(&queue->head))
+		return 0;
+
+	if (sfq == NULL) {
+		sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
+		if (!sfq)
+			return -ENOMEM;
+		queue->priv = sfq;
+
+		if (queue->cpu != -1)
+			sfq->cpu = queue->cpu;
+	}
+	return auxtrace_heap__add(&sf->heap, queue_nr, ts);
+}
+
+static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < sf->queues.nr_queues; i++) {
+		ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
+					      i, ts);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
+{
+	if (!sf->queues.new_data)
+		return 0;
+
+	sf->queues.new_data = false;
+	return s390_cpumsf_setup_queues(sf, ts);
+}
+
+static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
+{
+	unsigned int queue_nr;
+	u64 ts;
+	int ret;
+
+	while (1) {
+		struct auxtrace_queue *queue;
+		struct s390_cpumsf_queue *sfq;
+
+		if (!sf->heap.heap_cnt)
+			return 0;
+
+		if (sf->heap.heap_array[0].ordinal >= timestamp)
+			return 0;
+
+		queue_nr = sf->heap.heap_array[0].queue_nr;
+		queue = &sf->queues.queue_array[queue_nr];
+		sfq = queue->priv;
+
+		auxtrace_heap__pop(&sf->heap);
+		if (sf->heap.heap_cnt) {
+			ts = sf->heap.heap_array[0].ordinal + 1;
+			if (ts > timestamp)
+				ts = timestamp;
+		} else {
+			ts = timestamp;
+		}
+
+		ret = s390_cpumsf_run_decoder(sfq, &ts);
+		if (ret < 0) {
+			auxtrace_heap__add(&sf->heap, queue_nr, ts);
+			return ret;
+		}
+		if (!ret) {
+			ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
+				   pid_t pid, pid_t tid, u64 ip, u64 timestamp)
+{
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+	union perf_event event;
+	int err;
+
+	strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
+	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+			     code, cpu, pid, tid, ip, msg, timestamp);
+
+	err = perf_session__deliver_synth_event(sf->session, &event, NULL);
+	if (err)
+		pr_err("s390 Auxiliary Trace: failed to deliver error event,"
+			"error %d\n", err);
+	return err;
+}
+
+static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
+{
+	return s390_cpumsf_synth_error(sf, 1, sample->cpu,
+				       sample->pid, sample->tid, 0,
+				       sample->time);
+}
+
+static int
+s390_cpumsf_process_event(struct perf_session *session,
+			  union perf_event *event,
+			  struct perf_sample *sample,
+			  struct perf_tool *tool)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+	u64 timestamp = sample->time;
+	struct evsel *ev_bc000;
+
+	int err = 0;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("s390 Auxiliary Trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (event->header.type == PERF_RECORD_SAMPLE &&
+	    sample->raw_size) {
+		/* Handle event with raw data */
+		ev_bc000 = evlist__event2evsel(session->evlist, event);
+		if (ev_bc000 &&
+		    ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
+			err = s390_cpumcf_dumpctr(sf, sample);
+		return err;
+	}
+
+	if (event->header.type == PERF_RECORD_AUX &&
+	    event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+		return s390_cpumsf_lost(sf, sample);
+
+	if (timestamp) {
+		err = s390_cpumsf_update_queues(sf, timestamp);
+		if (!err)
+			err = s390_cpumsf_process_queues(sf, timestamp);
+	}
+	return err;
+}
+
+struct s390_cpumsf_synth {
+	struct perf_tool cpumsf_tool;
+	struct perf_session *session;
+};
+
+static int
+s390_cpumsf_process_auxtrace_event(struct perf_session *session,
+				   union perf_event *event __maybe_unused,
+				   struct perf_tool *tool __maybe_unused)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+
+	int fd = perf_data__fd(session->data);
+	struct auxtrace_buffer *buffer;
+	off_t data_offset;
+	int err;
+
+	if (sf->data_queued)
+		return 0;
+
+	if (perf_data__is_pipe(session->data)) {
+		data_offset = 0;
+	} else {
+		data_offset = lseek(fd, 0, SEEK_CUR);
+		if (data_offset == -1)
+			return -errno;
+	}
+
+	err = auxtrace_queues__add_event(&sf->queues, session, event,
+					 data_offset, &buffer);
+	if (err)
+		return err;
+
+	/* Dump here after copying piped trace out of the pipe */
+	if (dump_trace) {
+		if (auxtrace_buffer__get_data(buffer, fd)) {
+			s390_cpumsf_dump_event(sf, buffer->data,
+					       buffer->size);
+			auxtrace_buffer__put_data(buffer);
+		}
+	}
+	return 0;
+}
+
+static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
+			     struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static void s390_cpumsf_free_queues(struct perf_session *session)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+	struct auxtrace_queues *queues = &sf->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
+						queues->queue_array[i].priv;
+
+		if (sfq != NULL) {
+			if (sfq->logfile) {
+				fclose(sfq->logfile);
+				sfq->logfile = NULL;
+			}
+			if (sfq->logfile_ctr) {
+				fclose(sfq->logfile_ctr);
+				sfq->logfile_ctr = NULL;
+			}
+		}
+		zfree(&queues->queue_array[i].priv);
+	}
+	auxtrace_queues__free(queues);
+}
+
+static void s390_cpumsf_free(struct perf_session *session)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+
+	auxtrace_heap__free(&sf->heap);
+	s390_cpumsf_free_queues(session);
+	session->auxtrace = NULL;
+	zfree(&sf->logdir);
+	free(sf);
+}
+
+static bool
+s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+			      struct evsel *evsel)
+{
+	return evsel->core.attr.type == PERF_TYPE_RAW &&
+	       evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
+}
+
+static int s390_cpumsf_get_type(const char *cpuid)
+{
+	int ret, family = 0;
+
+	ret = sscanf(cpuid, "%*[^,],%u", &family);
+	return (ret == 1) ? family : 0;
+}
+
+/* Check itrace options set on perf report command.
+ * Return true, if none are set or all options specified can be
+ * handled on s390 (currently only option 'd' for logging.
+ * Return false otherwise.
+ */
+static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
+{
+	bool ison = false;
+
+	if (!itops || !itops->set)
+		return true;
+	ison = itops->inject || itops->instructions || itops->branches ||
+		itops->transactions || itops->ptwrites ||
+		itops->pwr_events || itops->errors ||
+		itops->dont_decode || itops->calls || itops->returns ||
+		itops->callchain || itops->thread_stack ||
+		itops->last_branch || itops->add_callchain ||
+		itops->add_last_branch;
+	if (!ison)
+		return true;
+	pr_err("Unsupported --itrace options specified\n");
+	return false;
+}
+
+/* Check for AUXTRACE dump directory if it is needed.
+ * On failure print an error message but continue.
+ * Return 0 on wrong keyword in config file and 1 otherwise.
+ */
+static int s390_cpumsf__config(const char *var, const char *value, void *cb)
+{
+	struct s390_cpumsf *sf = cb;
+	struct stat stbuf;
+	int rc;
+
+	if (strcmp(var, "auxtrace.dumpdir"))
+		return 0;
+	sf->logdir = strdup(value);
+	if (sf->logdir == NULL) {
+		pr_err("Failed to find auxtrace log directory %s,"
+		       " continue with current directory...\n", value);
+		return 1;
+	}
+	rc = stat(sf->logdir, &stbuf);
+	if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
+		pr_err("Missing auxtrace log directory %s,"
+		       " continue with current directory...\n", value);
+		zfree(&sf->logdir);
+	}
+	return 1;
+}
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+				      struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+	struct s390_cpumsf *sf;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info))
+		return -EINVAL;
+
+	sf = zalloc(sizeof(struct s390_cpumsf));
+	if (sf == NULL)
+		return -ENOMEM;
+
+	if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
+		err = -EINVAL;
+		goto err_free;
+	}
+	sf->use_logfile = session->itrace_synth_opts->log;
+	if (sf->use_logfile)
+		perf_config(s390_cpumsf__config, sf);
+
+	err = auxtrace_queues__init(&sf->queues);
+	if (err)
+		goto err_free;
+
+	sf->session = session;
+	sf->machine = &session->machines.host; /* No kvm support */
+	sf->auxtrace_type = auxtrace_info->type;
+	sf->pmu_type = PERF_TYPE_RAW;
+	sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
+
+	sf->auxtrace.process_event = s390_cpumsf_process_event;
+	sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
+	sf->auxtrace.flush_events = s390_cpumsf_flush;
+	sf->auxtrace.free_events = s390_cpumsf_free_events;
+	sf->auxtrace.free = s390_cpumsf_free;
+	sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
+	session->auxtrace = &sf->auxtrace;
+
+	if (dump_trace)
+		return 0;
+
+	err = auxtrace_queues__process_index(&sf->queues, session);
+	if (err)
+		goto err_free_queues;
+
+	if (sf->queues.populated)
+		sf->data_queued = true;
+
+	return 0;
+
+err_free_queues:
+	auxtrace_queues__free(&sf->queues);
+	session->auxtrace = NULL;
+err_free:
+	zfree(&sf->logdir);
+	free(sf);
+	return err;
+}
diff --git a/tools/perf/util/s390-cpumsf.h b/tools/perf/util/s390-cpumsf.h
new file mode 100644
index 000000000..fb64d1005
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s):  Thomas Richter <tmricht@linux.ibm.com>
+ */
+
+#ifndef INCLUDE__PERF_S390_CPUMSF_H
+#define INCLUDE__PERF_S390_CPUMSF_H
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *
+s390_cpumsf_recording_init(int *err, struct perf_pmu *s390_cpumsf_pmu);
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+				      struct perf_session *session);
+#endif
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
new file mode 100644
index 000000000..9a631d974
--- /dev/null
+++ b/tools/perf/util/s390-sample-raw.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2019
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Architecture specific trace_event function. Save event's bc000 raw data
+ * to file. File name is aux.ctr.## where ## stands for the CPU number the
+ * sample was taken from.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <sys/stat.h>
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+
+#include "debug.h"
+#include "session.h"
+#include "evlist.h"
+#include "color.h"
+#include "sample-raw.h"
+#include "s390-cpumcf-kernel.h"
+#include "pmu-events/pmu-events.h"
+
+static size_t ctrset_size(struct cf_ctrset_entry *set)
+{
+	return sizeof(*set) + set->ctr * sizeof(u64);
+}
+
+static bool ctrset_valid(struct cf_ctrset_entry *set)
+{
+	return set->def == S390_CPUMCF_DIAG_DEF;
+}
+
+/* CPU Measurement Counter Facility raw data is a byte stream. It is 8 byte
+ * aligned and might have trailing padding bytes.
+ * Display the raw data on screen.
+ */
+static bool s390_cpumcfdg_testctr(struct perf_sample *sample)
+{
+	size_t len = sample->raw_size, offset = 0;
+	unsigned char *buf = sample->raw_data;
+	struct cf_trailer_entry *te;
+	struct cf_ctrset_entry *cep, ce;
+
+	if (!len)
+		return false;
+	while (offset < len) {
+		cep = (struct cf_ctrset_entry *)(buf + offset);
+		ce.def = be16_to_cpu(cep->def);
+		ce.set = be16_to_cpu(cep->set);
+		ce.ctr = be16_to_cpu(cep->ctr);
+		ce.res1 = be16_to_cpu(cep->res1);
+
+		if (!ctrset_valid(&ce) || offset + ctrset_size(&ce) > len) {
+			/* Raw data for counter sets are always multiple of 8
+			 * bytes. Prepending a 4 bytes size field to the
+			 * raw data block in the sample causes the perf tool
+			 * to append 4 padding bytes to make the raw data part
+			 * of the sample a multiple of eight bytes again.
+			 *
+			 * If the last entry (trailer) is 4 bytes off the raw
+			 * area data end, all is good.
+			 */
+			if (len - offset - sizeof(*te) == 4)
+				break;
+			pr_err("Invalid counter set entry at %zd\n", offset);
+			return false;
+		}
+		offset += ctrset_size(&ce);
+	}
+	return true;
+}
+
+/* Dump event bc000 on screen, already tested on correctness. */
+static void s390_cpumcfdg_dumptrail(const char *color, size_t offset,
+				    struct cf_trailer_entry *tep)
+{
+	struct cf_trailer_entry  te;
+
+	te.flags = be64_to_cpu(tep->flags);
+	te.cfvn = be16_to_cpu(tep->cfvn);
+	te.csvn = be16_to_cpu(tep->csvn);
+	te.cpu_speed = be32_to_cpu(tep->cpu_speed);
+	te.timestamp = be64_to_cpu(tep->timestamp);
+	te.progusage1 = be64_to_cpu(tep->progusage1);
+	te.progusage2 = be64_to_cpu(tep->progusage2);
+	te.progusage3 = be64_to_cpu(tep->progusage3);
+	te.tod_base = be64_to_cpu(tep->tod_base);
+	te.mach_type = be16_to_cpu(tep->mach_type);
+	te.res1 = be16_to_cpu(tep->res1);
+	te.res2 = be32_to_cpu(tep->res2);
+
+	color_fprintf(stdout, color, "    [%#08zx] Trailer:%c%c%c%c%c"
+		      " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n",
+		      offset, te.clock_base ? 'T' : ' ',
+		      te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ',
+		      te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ',
+		      te.cfvn, te.csvn, te.cpu_speed, te.timestamp);
+	color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx"
+		      " Type:%x\n\n",
+		      te.progusage1, te.progusage2, te.progusage3,
+		      te.tod_base, te.mach_type);
+}
+
+/* Return starting number of a counter set */
+static int get_counterset_start(int setnr)
+{
+	switch (setnr) {
+	case CPUMF_CTR_SET_BASIC:		/* Basic counter set */
+		return 0;
+	case CPUMF_CTR_SET_USER:		/* Problem state counter set */
+		return 32;
+	case CPUMF_CTR_SET_CRYPTO:		/* Crypto counter set */
+		return 64;
+	case CPUMF_CTR_SET_EXT:			/* Extended counter set */
+		return 128;
+	case CPUMF_CTR_SET_MT_DIAG:		/* Diagnostic counter set */
+		return 448;
+	default:
+		return -1;
+	}
+}
+
+struct get_counter_name_data {
+	int wanted;
+	const char *result;
+};
+
+static int get_counter_name_callback(const struct pmu_event *evp,
+				     const struct pmu_events_table *table __maybe_unused,
+				     void *vdata)
+{
+	struct get_counter_name_data *data = vdata;
+	int rc, event_nr;
+
+	if (evp->name == NULL || evp->event == NULL)
+		return 0;
+	rc = sscanf(evp->event, "event=%x", &event_nr);
+	if (rc == 1 && event_nr == data->wanted) {
+		data->result = evp->name;
+		return 1; /* Terminate the search. */
+	}
+	return 0;
+}
+
+/* Scan the PMU table and extract the logical name of a counter from the
+ * PMU events table. Input is the counter set and counter number with in the
+ * set. Construct the event number and use this as key. If they match return
+ * the name of this counter.
+ * If no match is found a NULL pointer is returned.
+ */
+static const char *get_counter_name(int set, int nr, const struct pmu_events_table *table)
+{
+	struct get_counter_name_data data = {
+		.wanted = get_counterset_start(set) + nr,
+		.result = NULL,
+	};
+
+	if (!table)
+		return NULL;
+
+	pmu_events_table_for_each_event(table, get_counter_name_callback, &data);
+	return data.result;
+}
+
+static void s390_cpumcfdg_dump(struct perf_sample *sample)
+{
+	size_t i, len = sample->raw_size, offset = 0;
+	unsigned char *buf = sample->raw_data;
+	const char *color = PERF_COLOR_BLUE;
+	struct cf_ctrset_entry *cep, ce;
+	const struct pmu_events_table *table;
+	u64 *p;
+
+	table = pmu_events_table__find();
+	while (offset < len) {
+		cep = (struct cf_ctrset_entry *)(buf + offset);
+
+		ce.def = be16_to_cpu(cep->def);
+		ce.set = be16_to_cpu(cep->set);
+		ce.ctr = be16_to_cpu(cep->ctr);
+		ce.res1 = be16_to_cpu(cep->res1);
+
+		if (!ctrset_valid(&ce)) {	/* Print trailer */
+			s390_cpumcfdg_dumptrail(color, offset,
+						(struct cf_trailer_entry *)cep);
+			return;
+		}
+
+		color_fprintf(stdout, color, "    [%#08zx] Counterset:%d"
+			      " Counters:%d\n", offset, ce.set, ce.ctr);
+		for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) {
+			const char *ev_name = get_counter_name(ce.set, i, table);
+
+			color_fprintf(stdout, color,
+				      "\tCounter:%03d %s Value:%#018lx\n", i,
+				      ev_name ?: "<unknown>", be64_to_cpu(*p));
+		}
+		offset += ctrset_size(&ce);
+	}
+}
+
+/* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events
+ * and if the event was triggered by a counter set diagnostic event display
+ * its raw data.
+ * The function is only invoked when the dump flag -D is set.
+ */
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
+{
+	struct evsel *ev_bc000;
+
+	if (event->header.type != PERF_RECORD_SAMPLE)
+		return;
+
+	ev_bc000 = evlist__event2evsel(evlist, event);
+	if (ev_bc000 == NULL ||
+	    ev_bc000->core.attr.config != PERF_EVENT_CPUM_CF_DIAG)
+		return;
+
+	/* Display raw data on screen */
+	if (!s390_cpumcfdg_testctr(sample)) {
+		pr_err("Invalid counter set data encountered\n");
+		return;
+	}
+	s390_cpumcfdg_dump(sample);
+}
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c
new file mode 100644
index 000000000..f3f6bd9d2
--- /dev/null
+++ b/tools/perf/util/sample-raw.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <string.h>
+#include <linux/string.h>
+#include "evlist.h"
+#include "env.h"
+#include "header.h"
+#include "sample-raw.h"
+
+/*
+ * Check platform the perf data file was created on and perform platform
+ * specific interpretation.
+ */
+void evlist__init_trace_event_sample_raw(struct evlist *evlist)
+{
+	const char *arch_pf = perf_env__arch(evlist->env);
+	const char *cpuid = perf_env__cpuid(evlist->env);
+
+	if (arch_pf && !strcmp("s390", arch_pf))
+		evlist->trace_event_sample_raw = evlist__s390_sample_raw;
+	else if (arch_pf && !strcmp("x86", arch_pf) &&
+		 cpuid && strstarts(cpuid, "AuthenticAMD") &&
+		 evlist__has_amd_ibs(evlist)) {
+		evlist->trace_event_sample_raw = evlist__amd_sample_raw;
+	}
+}
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h
new file mode 100644
index 000000000..ea01c5811
--- /dev/null
+++ b/tools/perf/util/sample-raw.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SAMPLE_RAW_H
+#define __SAMPLE_RAW_H 1
+
+struct evlist;
+union perf_event;
+struct perf_sample;
+
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
+			     struct perf_sample *sample);
+bool evlist__has_amd_ibs(struct evlist *evlist);
+void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
+			    struct perf_sample *sample);
+void evlist__init_trace_event_sample_raw(struct evlist *evlist);
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
new file mode 100644
index 000000000..0f5ba2833
--- /dev/null
+++ b/tools/perf/util/scripting-engines/Build
@@ -0,0 +1,6 @@
+perf-$(CONFIG_LIBPERL)   += trace-event-perl.o
+perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+
+CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
+
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
new file mode 100644
index 000000000..a5d945415
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -0,0 +1,763 @@
+/*
+ * trace-event-perl.  Feed perf script events to an embedded Perl interpreter.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/time64.h>
+
+#include <stdbool.h>
+/* perl needs the following define, right after including stdbool.h */
+#define HAS_BOOL
+#include <EXTERN.h>
+#include <perl.h>
+
+#include "../callchain.h"
+#include "../dso.h"
+#include "../machine.h"
+#include "../map.h"
+#include "../symbol.h"
+#include "../thread.h"
+#include "../event.h"
+#include "../trace-event.h"
+#include "../evsel.h"
+#include "../debug.h"
+
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
+typedef PerlInterpreter * INTERP;
+
+void xs_init(pTHX);
+
+void xs_init(pTHX)
+{
+	const char *file = __FILE__;
+	dXSUB_SYS;
+
+	newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context,
+	      file);
+	newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file);
+}
+
+INTERP my_perl;
+
+#define TRACE_EVENT_TYPE_MAX				\
+	((1 << (sizeof(unsigned short) * 8)) - 1)
+
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static void define_symbolic_value(const char *ev_name,
+				  const char *field_name,
+				  const char *field_value,
+				  const char *field_str)
+{
+	unsigned long long value;
+	dSP;
+
+	value = eval_flag(field_value);
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVuv(value)));
+	XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_symbolic_value", 0))
+		call_pv("main::define_symbolic_value", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_symbolic_values(struct tep_print_flag_sym *field,
+				   const char *ev_name,
+				   const char *field_name)
+{
+	define_symbolic_value(ev_name, field_name, field->value, field->str);
+	if (field->next)
+		define_symbolic_values(field->next, ev_name, field_name);
+}
+
+static void define_symbolic_field(const char *ev_name,
+				  const char *field_name)
+{
+	dSP;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_symbolic_field", 0))
+		call_pv("main::define_symbolic_field", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_flag_value(const char *ev_name,
+			      const char *field_name,
+			      const char *field_value,
+			      const char *field_str)
+{
+	unsigned long long value;
+	dSP;
+
+	value = eval_flag(field_value);
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVuv(value)));
+	XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_flag_value", 0))
+		call_pv("main::define_flag_value", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_flag_values(struct tep_print_flag_sym *field,
+			       const char *ev_name,
+			       const char *field_name)
+{
+	define_flag_value(ev_name, field_name, field->value, field->str);
+	if (field->next)
+		define_flag_values(field->next, ev_name, field_name);
+}
+
+static void define_flag_field(const char *ev_name,
+			      const char *field_name,
+			      const char *delim)
+{
+	dSP;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(delim, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_flag_field", 0))
+		call_pv("main::define_flag_field", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_event_symbols(struct tep_event *event,
+				 const char *ev_name,
+				 struct tep_print_arg *args)
+{
+	if (args == NULL)
+		return;
+
+	switch (args->type) {
+	case TEP_PRINT_NULL:
+		break;
+	case TEP_PRINT_ATOM:
+		define_flag_value(ev_name, cur_field_name, "0",
+				  args->atom.atom);
+		zero_flag_atom = 0;
+		break;
+	case TEP_PRINT_FIELD:
+		free(cur_field_name);
+		cur_field_name = strdup(args->field.name);
+		break;
+	case TEP_PRINT_FLAGS:
+		define_event_symbols(event, ev_name, args->flags.field);
+		define_flag_field(ev_name, cur_field_name, args->flags.delim);
+		define_flag_values(args->flags.flags, ev_name, cur_field_name);
+		break;
+	case TEP_PRINT_SYMBOL:
+		define_event_symbols(event, ev_name, args->symbol.field);
+		define_symbolic_field(ev_name, cur_field_name);
+		define_symbolic_values(args->symbol.symbols, ev_name,
+				       cur_field_name);
+		break;
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
+		define_event_symbols(event, ev_name, args->hex.field);
+		define_event_symbols(event, ev_name, args->hex.size);
+		break;
+	case TEP_PRINT_INT_ARRAY:
+		define_event_symbols(event, ev_name, args->int_array.field);
+		define_event_symbols(event, ev_name, args->int_array.count);
+		define_event_symbols(event, ev_name, args->int_array.el_size);
+		break;
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BITMASK:
+		break;
+	case TEP_PRINT_TYPE:
+		define_event_symbols(event, ev_name, args->typecast.item);
+		break;
+	case TEP_PRINT_OP:
+		if (strcmp(args->op.op, ":") == 0)
+			zero_flag_atom = 1;
+		define_event_symbols(event, ev_name, args->op.left);
+		define_event_symbols(event, ev_name, args->op.right);
+		break;
+	case TEP_PRINT_FUNC:
+	default:
+		pr_err("Unsupported print arg type\n");
+		/* we should warn... */
+		return;
+	}
+
+	if (args->next)
+		define_event_symbols(event, ev_name, args->next);
+}
+
+static SV *perl_process_callchain(struct perf_sample *sample,
+				  struct evsel *evsel,
+				  struct addr_location *al)
+{
+	AV *list;
+
+	list = newAV();
+	if (!list)
+		goto exit;
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		goto exit;
+
+	if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+				      sample, NULL, NULL, scripting_max_stack) != 0) {
+		pr_err("Failed to resolve callchain. Skipping\n");
+		goto exit;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+
+	while (1) {
+		HV *elem;
+		struct callchain_cursor_node *node;
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+
+		elem = newHV();
+		if (!elem)
+			goto exit;
+
+		if (!hv_stores(elem, "ip", newSVuv(node->ip))) {
+			hv_undef(elem);
+			goto exit;
+		}
+
+		if (node->ms.sym) {
+			HV *sym = newHV();
+			if (!sym) {
+				hv_undef(elem);
+				goto exit;
+			}
+			if (!hv_stores(sym, "start",   newSVuv(node->ms.sym->start)) ||
+			    !hv_stores(sym, "end",     newSVuv(node->ms.sym->end)) ||
+			    !hv_stores(sym, "binding", newSVuv(node->ms.sym->binding)) ||
+			    !hv_stores(sym, "name",    newSVpvn(node->ms.sym->name,
+								node->ms.sym->namelen)) ||
+			    !hv_stores(elem, "sym",    newRV_noinc((SV*)sym))) {
+				hv_undef(sym);
+				hv_undef(elem);
+				goto exit;
+			}
+		}
+
+		if (node->ms.map) {
+			struct map *map = node->ms.map;
+			const char *dsoname = "[unknown]";
+			if (map && map->dso) {
+				if (symbol_conf.show_kernel_path && map->dso->long_name)
+					dsoname = map->dso->long_name;
+				else
+					dsoname = map->dso->name;
+			}
+			if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+				hv_undef(elem);
+				goto exit;
+			}
+		}
+
+		callchain_cursor_advance(&callchain_cursor);
+		av_push(list, newRV_noinc((SV*)elem));
+	}
+
+exit:
+	return newRV_noinc((SV*)list);
+}
+
+static void perl_process_tracepoint(struct perf_sample *sample,
+				    struct evsel *evsel,
+				    struct addr_location *al)
+{
+	struct thread *thread = al->thread;
+	struct tep_event *event = evsel->tp_format;
+	struct tep_format_field *field;
+	static char handler[256];
+	unsigned long long val;
+	unsigned long s, ns;
+	int pid;
+	int cpu = sample->cpu;
+	void *data = sample->raw_data;
+	unsigned long long nsecs = sample->time;
+	const char *comm = thread__comm_str(thread);
+
+	dSP;
+
+	if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+		return;
+
+	if (!event) {
+		pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
+		return;
+	}
+
+	pid = raw_field_value(event, "common_pid", data);
+
+	sprintf(handler, "%s::%s", event->system, event->name);
+
+	if (!test_and_set_bit(event->id, events_defined))
+		define_event_symbols(event, handler, event->print_fmt.args);
+
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+	XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+	XPUSHs(sv_2mortal(newSVuv(cpu)));
+	XPUSHs(sv_2mortal(newSVuv(s)));
+	XPUSHs(sv_2mortal(newSVuv(ns)));
+	XPUSHs(sv_2mortal(newSViv(pid)));
+	XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+	XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+
+	/* common fields other than pid can be accessed via xsub fns */
+
+	for (field = event->format.fields; field; field = field->next) {
+		if (field->flags & TEP_FIELD_IS_STRING) {
+			int offset;
+			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+				offset = *(int *)(data + field->offset);
+				offset &= 0xffff;
+				if (field->flags & TEP_FIELD_IS_RELATIVE)
+					offset += field->offset + field->size;
+			} else
+				offset = field->offset;
+			XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
+		} else { /* FIELD_IS_NUMERIC */
+			val = read_size(event, data + field->offset,
+					field->size);
+			if (field->flags & TEP_FIELD_IS_SIGNED) {
+				XPUSHs(sv_2mortal(newSViv(val)));
+			} else {
+				XPUSHs(sv_2mortal(newSVuv(val)));
+			}
+		}
+	}
+
+	PUTBACK;
+
+	if (get_cv(handler, 0))
+		call_pv(handler, G_SCALAR);
+	else if (get_cv("main::trace_unhandled", 0)) {
+		XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+		XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+		XPUSHs(sv_2mortal(newSVuv(cpu)));
+		XPUSHs(sv_2mortal(newSVuv(nsecs)));
+		XPUSHs(sv_2mortal(newSViv(pid)));
+		XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+		XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+		call_pv("main::trace_unhandled", G_SCALAR);
+	}
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void perl_process_event_generic(union perf_event *event,
+				       struct perf_sample *sample,
+				       struct evsel *evsel)
+{
+	dSP;
+
+	if (!get_cv("process_event", 0))
+		return;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+	XPUSHs(sv_2mortal(newSVpvn((const char *)event, event->header.size)));
+	XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->core.attr, sizeof(evsel->core.attr))));
+	XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample))));
+	XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size)));
+	PUTBACK;
+	call_pv("process_event", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void perl_process_event(union perf_event *event,
+			       struct perf_sample *sample,
+			       struct evsel *evsel,
+			       struct addr_location *al,
+			       struct addr_location *addr_al)
+{
+	scripting_context__update(scripting_context, event, sample, evsel, al, addr_al);
+	perl_process_tracepoint(sample, evsel, al);
+	perl_process_event_generic(event, sample, evsel);
+}
+
+static void run_start_sub(void)
+{
+	dSP; /* access to Perl stack */
+	PUSHMARK(SP);
+
+	if (get_cv("main::trace_begin", 0))
+		call_pv("main::trace_begin", G_DISCARD | G_NOARGS);
+}
+
+/*
+ * Start trace script
+ */
+static int perl_start_script(const char *script, int argc, const char **argv,
+			     struct perf_session *session)
+{
+	const char **command_line;
+	int i, err = 0;
+
+	scripting_context->session = session;
+
+	command_line = malloc((argc + 2) * sizeof(const char *));
+	command_line[0] = "";
+	command_line[1] = script;
+	for (i = 2; i < argc + 2; i++)
+		command_line[i] = argv[i - 2];
+
+	my_perl = perl_alloc();
+	perl_construct(my_perl);
+
+	if (perl_parse(my_perl, xs_init, argc + 2, (char **)command_line,
+		       (char **)NULL)) {
+		err = -1;
+		goto error;
+	}
+
+	if (perl_run(my_perl)) {
+		err = -1;
+		goto error;
+	}
+
+	if (SvTRUE(ERRSV)) {
+		err = -1;
+		goto error;
+	}
+
+	run_start_sub();
+
+	free(command_line);
+	return 0;
+error:
+	perl_free(my_perl);
+	free(command_line);
+
+	return err;
+}
+
+static int perl_flush_script(void)
+{
+	return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int perl_stop_script(void)
+{
+	dSP; /* access to Perl stack */
+	PUSHMARK(SP);
+
+	if (get_cv("main::trace_end", 0))
+		call_pv("main::trace_end", G_DISCARD | G_NOARGS);
+
+	perl_destruct(my_perl);
+	perl_free(my_perl);
+
+	return 0;
+}
+
+static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
+{
+	int i, not_first, count, nr_events;
+	struct tep_event **all_events;
+	struct tep_event *event = NULL;
+	struct tep_format_field *f;
+	char fname[PATH_MAX];
+	FILE *ofp;
+
+	sprintf(fname, "%s.pl", outfile);
+	ofp = fopen(fname, "w");
+	if (ofp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", fname);
+		return -1;
+	}
+
+	fprintf(ofp, "# perf script event handlers, "
+		"generated by perf script -g perl\n");
+
+	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+		" License version 2\n\n");
+
+	fprintf(ofp, "# The common_* event handler fields are the most useful "
+		"fields common to\n");
+
+	fprintf(ofp, "# all events.  They don't necessarily correspond to "
+		"the 'common_*' fields\n");
+
+	fprintf(ofp, "# in the format files.  Those fields not available as "
+		"handler params can\n");
+
+	fprintf(ofp, "# be retrieved using Perl functions of the form "
+		"common_*($context).\n");
+
+	fprintf(ofp, "# See Context.pm for the list of available "
+		"functions.\n\n");
+
+	fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/"
+		"Perf-Trace-Util/lib\";\n");
+
+	fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n");
+	fprintf(ofp, "use Perf::Trace::Core;\n");
+	fprintf(ofp, "use Perf::Trace::Context;\n");
+	fprintf(ofp, "use Perf::Trace::Util;\n\n");
+
+	fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
+	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n");
+
+
+	fprintf(ofp, "\n\
+sub print_backtrace\n\
+{\n\
+	my $callchain = shift;\n\
+	for my $node (@$callchain)\n\
+	{\n\
+		if(exists $node->{sym})\n\
+		{\n\
+			printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\
+		}\n\
+		else\n\
+		{\n\
+			printf( \"\\t[\\%%x]\\n\", $node{ip});\n\
+		}\n\
+	}\n\
+}\n\n\
+");
+
+	nr_events = tep_get_events_count(pevent);
+	all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
+
+	for (i = 0; all_events && i < nr_events; i++) {
+		event = all_events[i];
+		fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
+		fprintf(ofp, "\tmy (");
+
+		fprintf(ofp, "$event_name, ");
+		fprintf(ofp, "$context, ");
+		fprintf(ofp, "$common_cpu, ");
+		fprintf(ofp, "$common_secs, ");
+		fprintf(ofp, "$common_nsecs,\n");
+		fprintf(ofp, "\t    $common_pid, ");
+		fprintf(ofp, "$common_comm, ");
+		fprintf(ofp, "$common_callchain,\n\t    ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t    ");
+
+			fprintf(ofp, "$%s", f->name);
+		}
+		fprintf(ofp, ") = @_;\n\n");
+
+		fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+			"$common_secs, $common_nsecs,\n\t             "
+			"$common_pid, $common_comm, $common_callchain);\n\n");
+
+		fprintf(ofp, "\tprintf(\"");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (count && count % 4 == 0) {
+				fprintf(ofp, "\".\n\t       \"");
+			}
+			count++;
+
+			fprintf(ofp, "%s=", f->name);
+			if (f->flags & TEP_FIELD_IS_STRING ||
+			    f->flags & TEP_FIELD_IS_FLAG ||
+			    f->flags & TEP_FIELD_IS_SYMBOLIC)
+				fprintf(ofp, "%%s");
+			else if (f->flags & TEP_FIELD_IS_SIGNED)
+				fprintf(ofp, "%%d");
+			else
+				fprintf(ofp, "%%u");
+		}
+
+		fprintf(ofp, "\\n\",\n\t       ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t       ");
+
+			if (f->flags & TEP_FIELD_IS_FLAG) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t       ");
+					count = 4;
+				}
+				fprintf(ofp, "flag_str(\"");
+				fprintf(ofp, "%s::%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", $%s)", f->name,
+					f->name);
+			} else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t       ");
+					count = 4;
+				}
+				fprintf(ofp, "symbol_str(\"");
+				fprintf(ofp, "%s::%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", $%s)", f->name,
+					f->name);
+			} else
+				fprintf(ofp, "$%s", f->name);
+		}
+
+		fprintf(ofp, ");\n\n");
+
+		fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+
+		fprintf(ofp, "}\n\n");
+	}
+
+	fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
+		"$common_cpu, $common_secs, $common_nsecs,\n\t    "
+		"$common_pid, $common_comm, $common_callchain) = @_;\n\n");
+
+	fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+		"$common_secs, $common_nsecs,\n\t             $common_pid, "
+		"$common_comm, $common_callchain);\n");
+	fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+	fprintf(ofp, "}\n\n");
+
+	fprintf(ofp, "sub print_header\n{\n"
+		"\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
+		"\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t       "
+		"$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n");
+
+	fprintf(ofp,
+		"\n# Packed byte string args of process_event():\n"
+		"#\n"
+		"# $event:\tunion perf_event\tutil/event.h\n"
+		"# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n"
+		"# $sample:\tstruct perf_sample\tutil/event.h\n"
+		"# $raw_data:\tperf_sample->raw_data\tutil/event.h\n"
+		"\n"
+		"sub process_event\n"
+		"{\n"
+		"\tmy ($event, $attr, $sample, $raw_data) = @_;\n"
+		"\n"
+		"\tmy @event\t= unpack(\"LSS\", $event);\n"
+		"\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n"
+		"\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n"
+		"\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n"
+		"\n"
+		"\tuse Data::Dumper;\n"
+		"\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n"
+		"}\n");
+
+	fclose(ofp);
+
+	fprintf(stderr, "generated Perl script: %s\n", fname);
+
+	return 0;
+}
+
+struct scripting_ops perl_scripting_ops = {
+	.name = "Perl",
+	.dirname = "perl",
+	.start_script = perl_start_script,
+	.flush_script = perl_flush_script,
+	.stop_script = perl_stop_script,
+	.process_event = perl_process_event,
+	.generate_script = perl_generate_script,
+};
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
new file mode 100644
index 000000000..1f2040f36
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -0,0 +1,2176 @@
+/*
+ * trace-event-python.  Feed trace events to an embedded Python interpreter.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <Python.h>
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/time64.h>
+
+#include "../build-id.h"
+#include "../counts.h"
+#include "../debug.h"
+#include "../dso.h"
+#include "../callchain.h"
+#include "../env.h"
+#include "../evsel.h"
+#include "../event.h"
+#include "../thread.h"
+#include "../comm.h"
+#include "../machine.h"
+#include "../db-export.h"
+#include "../thread-stack.h"
+#include "../trace-event.h"
+#include "../call-path.h"
+#include "map.h"
+#include "symbol.h"
+#include "thread_map.h"
+#include "print_binary.h"
+#include "stat.h"
+#include "mem-events.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+  PyString_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+  PyString_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+  PyString_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+  PyInt_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+  PyInt_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+  PyCObject_FromVoidPtr((arg1), (arg2))
+
+PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyUnicode_FromString(arg) \
+  PyUnicode_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+  PyUnicode_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+  PyBytes_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+  PyLong_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+  PyLong_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+  PyCapsule_New((arg1), (arg2), (arg3))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
+
+#define TRACE_EVENT_TYPE_MAX				\
+	((1 << (sizeof(unsigned short) * 8)) - 1)
+
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+#define MAX_FIELDS	64
+#define N_COMMON_FIELDS	7
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static PyObject *main_module, *main_dict;
+
+struct tables {
+	struct db_export	dbe;
+	PyObject		*evsel_handler;
+	PyObject		*machine_handler;
+	PyObject		*thread_handler;
+	PyObject		*comm_handler;
+	PyObject		*comm_thread_handler;
+	PyObject		*dso_handler;
+	PyObject		*symbol_handler;
+	PyObject		*branch_type_handler;
+	PyObject		*sample_handler;
+	PyObject		*call_path_handler;
+	PyObject		*call_return_handler;
+	PyObject		*synth_handler;
+	PyObject		*context_switch_handler;
+	bool			db_export_mode;
+};
+
+static struct tables tables_global;
+
+static void handler_call_die(const char *handler_name) __noreturn;
+static void handler_call_die(const char *handler_name)
+{
+	PyErr_Print();
+	Py_FatalError("problem in Python trace event handler");
+	// Py_FatalError does not return
+	// but we have to make the compiler happy
+	abort();
+}
+
+/*
+ * Insert val into the dictionary and decrement the reference counter.
+ * This is necessary for dictionaries since PyDict_SetItemString() does not
+ * steal a reference, as opposed to PyTuple_SetItem().
+ */
+static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val)
+{
+	PyDict_SetItemString(dict, key, val);
+	Py_DECREF(val);
+}
+
+static PyObject *get_handler(const char *handler_name)
+{
+	PyObject *handler;
+
+	handler = PyDict_GetItemString(main_dict, handler_name);
+	if (handler && !PyCallable_Check(handler))
+		return NULL;
+	return handler;
+}
+
+static int get_argument_count(PyObject *handler)
+{
+	int arg_count = 0;
+
+	/*
+	 * The attribute for the code object is func_code in Python 2,
+	 * whereas it is __code__ in Python 3.0+.
+	 */
+	PyObject *code_obj = PyObject_GetAttrString(handler,
+		"func_code");
+	if (PyErr_Occurred()) {
+		PyErr_Clear();
+		code_obj = PyObject_GetAttrString(handler,
+			"__code__");
+	}
+	PyErr_Clear();
+	if (code_obj) {
+		PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
+			"co_argcount");
+		if (arg_count_obj) {
+			arg_count = (int) _PyLong_AsLong(arg_count_obj);
+			Py_DECREF(arg_count_obj);
+		}
+		Py_DECREF(code_obj);
+	}
+	return arg_count;
+}
+
+static void call_object(PyObject *handler, PyObject *args, const char *die_msg)
+{
+	PyObject *retval;
+
+	retval = PyObject_CallObject(handler, args);
+	if (retval == NULL)
+		handler_call_die(die_msg);
+	Py_DECREF(retval);
+}
+
+static void try_call_object(const char *handler_name, PyObject *args)
+{
+	PyObject *handler;
+
+	handler = get_handler(handler_name);
+	if (handler)
+		call_object(handler, args, handler_name);
+}
+
+static void define_value(enum tep_print_arg_type field_type,
+			 const char *ev_name,
+			 const char *field_name,
+			 const char *field_value,
+			 const char *field_str)
+{
+	const char *handler_name = "define_flag_value";
+	PyObject *t;
+	unsigned long long value;
+	unsigned n = 0;
+
+	if (field_type == TEP_PRINT_SYMBOL)
+		handler_name = "define_symbolic_value";
+
+	t = PyTuple_New(4);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	value = eval_flag(field_value);
+
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(value));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_str));
+
+	try_call_object(handler_name, t);
+
+	Py_DECREF(t);
+}
+
+static void define_values(enum tep_print_arg_type field_type,
+			  struct tep_print_flag_sym *field,
+			  const char *ev_name,
+			  const char *field_name)
+{
+	define_value(field_type, ev_name, field_name, field->value,
+		     field->str);
+
+	if (field->next)
+		define_values(field_type, field->next, ev_name, field_name);
+}
+
+static void define_field(enum tep_print_arg_type field_type,
+			 const char *ev_name,
+			 const char *field_name,
+			 const char *delim)
+{
+	const char *handler_name = "define_flag_field";
+	PyObject *t;
+	unsigned n = 0;
+
+	if (field_type == TEP_PRINT_SYMBOL)
+		handler_name = "define_symbolic_field";
+
+	if (field_type == TEP_PRINT_FLAGS)
+		t = PyTuple_New(3);
+	else
+		t = PyTuple_New(2);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+	if (field_type == TEP_PRINT_FLAGS)
+		PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim));
+
+	try_call_object(handler_name, t);
+
+	Py_DECREF(t);
+}
+
+static void define_event_symbols(struct tep_event *event,
+				 const char *ev_name,
+				 struct tep_print_arg *args)
+{
+	if (args == NULL)
+		return;
+
+	switch (args->type) {
+	case TEP_PRINT_NULL:
+		break;
+	case TEP_PRINT_ATOM:
+		define_value(TEP_PRINT_FLAGS, ev_name, cur_field_name, "0",
+			     args->atom.atom);
+		zero_flag_atom = 0;
+		break;
+	case TEP_PRINT_FIELD:
+		free(cur_field_name);
+		cur_field_name = strdup(args->field.name);
+		break;
+	case TEP_PRINT_FLAGS:
+		define_event_symbols(event, ev_name, args->flags.field);
+		define_field(TEP_PRINT_FLAGS, ev_name, cur_field_name,
+			     args->flags.delim);
+		define_values(TEP_PRINT_FLAGS, args->flags.flags, ev_name,
+			      cur_field_name);
+		break;
+	case TEP_PRINT_SYMBOL:
+		define_event_symbols(event, ev_name, args->symbol.field);
+		define_field(TEP_PRINT_SYMBOL, ev_name, cur_field_name, NULL);
+		define_values(TEP_PRINT_SYMBOL, args->symbol.symbols, ev_name,
+			      cur_field_name);
+		break;
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
+		define_event_symbols(event, ev_name, args->hex.field);
+		define_event_symbols(event, ev_name, args->hex.size);
+		break;
+	case TEP_PRINT_INT_ARRAY:
+		define_event_symbols(event, ev_name, args->int_array.field);
+		define_event_symbols(event, ev_name, args->int_array.count);
+		define_event_symbols(event, ev_name, args->int_array.el_size);
+		break;
+	case TEP_PRINT_STRING:
+		break;
+	case TEP_PRINT_TYPE:
+		define_event_symbols(event, ev_name, args->typecast.item);
+		break;
+	case TEP_PRINT_OP:
+		if (strcmp(args->op.op, ":") == 0)
+			zero_flag_atom = 1;
+		define_event_symbols(event, ev_name, args->op.left);
+		define_event_symbols(event, ev_name, args->op.right);
+		break;
+	default:
+		/* gcc warns for these? */
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_FUNC:
+	case TEP_PRINT_BITMASK:
+		/* we should warn... */
+		return;
+	}
+
+	if (args->next)
+		define_event_symbols(event, ev_name, args->next);
+}
+
+static PyObject *get_field_numeric_entry(struct tep_event *event,
+		struct tep_format_field *field, void *data)
+{
+	bool is_array = field->flags & TEP_FIELD_IS_ARRAY;
+	PyObject *obj = NULL, *list = NULL;
+	unsigned long long val;
+	unsigned int item_size, n_items, i;
+
+	if (is_array) {
+		list = PyList_New(field->arraylen);
+		item_size = field->size / field->arraylen;
+		n_items = field->arraylen;
+	} else {
+		item_size = field->size;
+		n_items = 1;
+	}
+
+	for (i = 0; i < n_items; i++) {
+
+		val = read_size(event, data + field->offset + i * item_size,
+				item_size);
+		if (field->flags & TEP_FIELD_IS_SIGNED) {
+			if ((long long)val >= LONG_MIN &&
+					(long long)val <= LONG_MAX)
+				obj = _PyLong_FromLong(val);
+			else
+				obj = PyLong_FromLongLong(val);
+		} else {
+			if (val <= LONG_MAX)
+				obj = _PyLong_FromLong(val);
+			else
+				obj = PyLong_FromUnsignedLongLong(val);
+		}
+		if (is_array)
+			PyList_SET_ITEM(list, i, obj);
+	}
+	if (is_array)
+		obj = list;
+	return obj;
+}
+
+static const char *get_dsoname(struct map *map)
+{
+	const char *dsoname = "[unknown]";
+
+	if (map && map->dso) {
+		if (symbol_conf.show_kernel_path && map->dso->long_name)
+			dsoname = map->dso->long_name;
+		else
+			dsoname = map->dso->name;
+	}
+
+	return dsoname;
+}
+
+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+	unsigned long offset;
+
+	if (al->addr < sym->end)
+		offset = al->addr - sym->start;
+	else
+		offset = al->addr - al->map->start - sym->start;
+
+	return offset;
+}
+
+static PyObject *python_process_callchain(struct perf_sample *sample,
+					 struct evsel *evsel,
+					 struct addr_location *al)
+{
+	PyObject *pylist;
+
+	pylist = PyList_New(0);
+	if (!pylist)
+		Py_FatalError("couldn't create Python list");
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		goto exit;
+
+	if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+				      sample, NULL, NULL,
+				      scripting_max_stack) != 0) {
+		pr_err("Failed to resolve callchain. Skipping\n");
+		goto exit;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+
+	while (1) {
+		PyObject *pyelem;
+		struct callchain_cursor_node *node;
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+
+		pyelem = PyDict_New();
+		if (!pyelem)
+			Py_FatalError("couldn't create Python dictionary");
+
+
+		pydict_set_item_string_decref(pyelem, "ip",
+				PyLong_FromUnsignedLongLong(node->ip));
+
+		if (node->ms.sym) {
+			PyObject *pysym  = PyDict_New();
+			if (!pysym)
+				Py_FatalError("couldn't create Python dictionary");
+			pydict_set_item_string_decref(pysym, "start",
+					PyLong_FromUnsignedLongLong(node->ms.sym->start));
+			pydict_set_item_string_decref(pysym, "end",
+					PyLong_FromUnsignedLongLong(node->ms.sym->end));
+			pydict_set_item_string_decref(pysym, "binding",
+					_PyLong_FromLong(node->ms.sym->binding));
+			pydict_set_item_string_decref(pysym, "name",
+					_PyUnicode_FromStringAndSize(node->ms.sym->name,
+							node->ms.sym->namelen));
+			pydict_set_item_string_decref(pyelem, "sym", pysym);
+
+			if (node->ms.map) {
+				struct map *map = node->ms.map;
+				struct addr_location node_al;
+				unsigned long offset;
+
+				node_al.addr = map->map_ip(map, node->ip);
+				node_al.map  = map;
+				offset = get_offset(node->ms.sym, &node_al);
+
+				pydict_set_item_string_decref(
+					pyelem, "sym_off",
+					PyLong_FromUnsignedLongLong(offset));
+			}
+			if (node->srcline && strcmp(":0", node->srcline)) {
+				pydict_set_item_string_decref(
+					pyelem, "sym_srcline",
+					_PyUnicode_FromString(node->srcline));
+			}
+		}
+
+		if (node->ms.map) {
+			const char *dsoname = get_dsoname(node->ms.map);
+
+			pydict_set_item_string_decref(pyelem, "dso",
+					_PyUnicode_FromString(dsoname));
+		}
+
+		callchain_cursor_advance(&callchain_cursor);
+		PyList_Append(pylist, pyelem);
+		Py_DECREF(pyelem);
+	}
+
+exit:
+	return pylist;
+}
+
+static PyObject *python_process_brstack(struct perf_sample *sample,
+					struct thread *thread)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	PyObject *pylist;
+	u64 i;
+
+	pylist = PyList_New(0);
+	if (!pylist)
+		Py_FatalError("couldn't create Python list");
+
+	if (!(br && br->nr))
+		goto exit;
+
+	for (i = 0; i < br->nr; i++) {
+		PyObject *pyelem;
+		struct addr_location al;
+		const char *dsoname;
+
+		pyelem = PyDict_New();
+		if (!pyelem)
+			Py_FatalError("couldn't create Python dictionary");
+
+		pydict_set_item_string_decref(pyelem, "from",
+		    PyLong_FromUnsignedLongLong(entries[i].from));
+		pydict_set_item_string_decref(pyelem, "to",
+		    PyLong_FromUnsignedLongLong(entries[i].to));
+		pydict_set_item_string_decref(pyelem, "mispred",
+		    PyBool_FromLong(entries[i].flags.mispred));
+		pydict_set_item_string_decref(pyelem, "predicted",
+		    PyBool_FromLong(entries[i].flags.predicted));
+		pydict_set_item_string_decref(pyelem, "in_tx",
+		    PyBool_FromLong(entries[i].flags.in_tx));
+		pydict_set_item_string_decref(pyelem, "abort",
+		    PyBool_FromLong(entries[i].flags.abort));
+		pydict_set_item_string_decref(pyelem, "cycles",
+		    PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
+
+		thread__find_map_fb(thread, sample->cpumode,
+				    entries[i].from, &al);
+		dsoname = get_dsoname(al.map);
+		pydict_set_item_string_decref(pyelem, "from_dsoname",
+					      _PyUnicode_FromString(dsoname));
+
+		thread__find_map_fb(thread, sample->cpumode,
+				    entries[i].to, &al);
+		dsoname = get_dsoname(al.map);
+		pydict_set_item_string_decref(pyelem, "to_dsoname",
+					      _PyUnicode_FromString(dsoname));
+
+		PyList_Append(pylist, pyelem);
+		Py_DECREF(pyelem);
+	}
+
+exit:
+	return pylist;
+}
+
+static int get_symoff(struct symbol *sym, struct addr_location *al,
+		      bool print_off, char *bf, int size)
+{
+	unsigned long offset;
+
+	if (!sym || !sym->name[0])
+		return scnprintf(bf, size, "%s", "[unknown]");
+
+	if (!print_off)
+		return scnprintf(bf, size, "%s", sym->name);
+
+	offset = get_offset(sym, al);
+
+	return scnprintf(bf, size, "%s+0x%x", sym->name, offset);
+}
+
+static int get_br_mspred(struct branch_flags *flags, char *bf, int size)
+{
+	if (!flags->mispred  && !flags->predicted)
+		return scnprintf(bf, size, "%s", "-");
+
+	if (flags->mispred)
+		return scnprintf(bf, size, "%s", "M");
+
+	return scnprintf(bf, size, "%s", "P");
+}
+
+static PyObject *python_process_brstacksym(struct perf_sample *sample,
+					   struct thread *thread)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	PyObject *pylist;
+	u64 i;
+	char bf[512];
+	struct addr_location al;
+
+	pylist = PyList_New(0);
+	if (!pylist)
+		Py_FatalError("couldn't create Python list");
+
+	if (!(br && br->nr))
+		goto exit;
+
+	for (i = 0; i < br->nr; i++) {
+		PyObject *pyelem;
+
+		pyelem = PyDict_New();
+		if (!pyelem)
+			Py_FatalError("couldn't create Python dictionary");
+
+		thread__find_symbol_fb(thread, sample->cpumode,
+				       entries[i].from, &al);
+		get_symoff(al.sym, &al, true, bf, sizeof(bf));
+		pydict_set_item_string_decref(pyelem, "from",
+					      _PyUnicode_FromString(bf));
+
+		thread__find_symbol_fb(thread, sample->cpumode,
+				       entries[i].to, &al);
+		get_symoff(al.sym, &al, true, bf, sizeof(bf));
+		pydict_set_item_string_decref(pyelem, "to",
+					      _PyUnicode_FromString(bf));
+
+		get_br_mspred(&entries[i].flags, bf, sizeof(bf));
+		pydict_set_item_string_decref(pyelem, "pred",
+					      _PyUnicode_FromString(bf));
+
+		if (entries[i].flags.in_tx) {
+			pydict_set_item_string_decref(pyelem, "in_tx",
+					      _PyUnicode_FromString("X"));
+		} else {
+			pydict_set_item_string_decref(pyelem, "in_tx",
+					      _PyUnicode_FromString("-"));
+		}
+
+		if (entries[i].flags.abort) {
+			pydict_set_item_string_decref(pyelem, "abort",
+					      _PyUnicode_FromString("A"));
+		} else {
+			pydict_set_item_string_decref(pyelem, "abort",
+					      _PyUnicode_FromString("-"));
+		}
+
+		PyList_Append(pylist, pyelem);
+		Py_DECREF(pyelem);
+	}
+
+exit:
+	return pylist;
+}
+
+static PyObject *get_sample_value_as_tuple(struct sample_read_value *value,
+					   u64 read_format)
+{
+	PyObject *t;
+
+	t = PyTuple_New(3);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+	PyTuple_SetItem(t, 0, PyLong_FromUnsignedLongLong(value->id));
+	PyTuple_SetItem(t, 1, PyLong_FromUnsignedLongLong(value->value));
+	if (read_format & PERF_FORMAT_LOST)
+		PyTuple_SetItem(t, 2, PyLong_FromUnsignedLongLong(value->lost));
+
+	return t;
+}
+
+static void set_sample_read_in_dict(PyObject *dict_sample,
+					 struct perf_sample *sample,
+					 struct evsel *evsel)
+{
+	u64 read_format = evsel->core.attr.read_format;
+	PyObject *values;
+	unsigned int i;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		pydict_set_item_string_decref(dict_sample, "time_enabled",
+			PyLong_FromUnsignedLongLong(sample->read.time_enabled));
+	}
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		pydict_set_item_string_decref(dict_sample, "time_running",
+			PyLong_FromUnsignedLongLong(sample->read.time_running));
+	}
+
+	if (read_format & PERF_FORMAT_GROUP)
+		values = PyList_New(sample->read.group.nr);
+	else
+		values = PyList_New(1);
+
+	if (!values)
+		Py_FatalError("couldn't create Python list");
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		struct sample_read_value *v = sample->read.group.values;
+
+		i = 0;
+		sample_read_group__for_each(v, sample->read.group.nr, read_format) {
+			PyObject *t = get_sample_value_as_tuple(v, read_format);
+			PyList_SET_ITEM(values, i, t);
+			i++;
+		}
+	} else {
+		PyObject *t = get_sample_value_as_tuple(&sample->read.one,
+							read_format);
+		PyList_SET_ITEM(values, 0, t);
+	}
+	pydict_set_item_string_decref(dict_sample, "values", values);
+}
+
+static void set_sample_datasrc_in_dict(PyObject *dict,
+				       struct perf_sample *sample)
+{
+	struct mem_info mi = { .data_src.val = sample->data_src };
+	char decode[100];
+
+	pydict_set_item_string_decref(dict, "datasrc",
+			PyLong_FromUnsignedLongLong(sample->data_src));
+
+	perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+	pydict_set_item_string_decref(dict, "datasrc_decode",
+			_PyUnicode_FromString(decode));
+}
+
+static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size)
+{
+	unsigned int i = 0, r;
+	int printed = 0;
+
+	bf[0] = 0;
+
+	if (!regs || !regs->regs)
+		return;
+
+	for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+		u64 val = regs->regs[i++];
+
+		printed += scnprintf(bf + printed, size - printed,
+				     "%5s:0x%" PRIx64 " ",
+				     perf_reg_name(r, arch), val);
+	}
+}
+
+static void set_regs_in_dict(PyObject *dict,
+			     struct perf_sample *sample,
+			     struct evsel *evsel)
+{
+	struct perf_event_attr *attr = &evsel->core.attr;
+	const char *arch = perf_env__arch(evsel__env(evsel));
+
+	/*
+	 * Here value 28 is a constant size which can be used to print
+	 * one register value and its corresponds to:
+	 * 16 chars is to specify 64 bit register in hexadecimal.
+	 * 2 chars is for appending "0x" to the hexadecimal value and
+	 * 10 chars is for register name.
+	 */
+	int size = __sw_hweight64(attr->sample_regs_intr) * 28;
+	char bf[size];
+
+	regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf));
+
+	pydict_set_item_string_decref(dict, "iregs",
+			_PyUnicode_FromString(bf));
+
+	regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf));
+
+	pydict_set_item_string_decref(dict, "uregs",
+			_PyUnicode_FromString(bf));
+}
+
+static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
+			    const char *dso_field, const char *dso_bid_field,
+			    const char *dso_map_start, const char *dso_map_end,
+			    const char *sym_field, const char *symoff_field)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	if (al->map) {
+		pydict_set_item_string_decref(dict, dso_field,
+			_PyUnicode_FromString(al->map->dso->name));
+		build_id__sprintf(&al->map->dso->bid, sbuild_id);
+		pydict_set_item_string_decref(dict, dso_bid_field,
+			_PyUnicode_FromString(sbuild_id));
+		pydict_set_item_string_decref(dict, dso_map_start,
+			PyLong_FromUnsignedLong(al->map->start));
+		pydict_set_item_string_decref(dict, dso_map_end,
+			PyLong_FromUnsignedLong(al->map->end));
+	}
+	if (al->sym) {
+		pydict_set_item_string_decref(dict, sym_field,
+			_PyUnicode_FromString(al->sym->name));
+		pydict_set_item_string_decref(dict, symoff_field,
+			PyLong_FromUnsignedLong(get_offset(al->sym, al)));
+	}
+}
+
+static void set_sample_flags(PyObject *dict, u32 flags)
+{
+	const char *ch = PERF_IP_FLAG_CHARS;
+	char *p, str[33];
+
+	for (p = str; *ch; ch++, flags >>= 1) {
+		if (flags & 1)
+			*p++ = *ch;
+	}
+	*p = 0;
+	pydict_set_item_string_decref(dict, "flags", _PyUnicode_FromString(str));
+}
+
+static void python_process_sample_flags(struct perf_sample *sample, PyObject *dict_sample)
+{
+	char flags_disp[SAMPLE_FLAGS_BUF_SIZE];
+
+	set_sample_flags(dict_sample, sample->flags);
+	perf_sample__sprintf_flags(sample->flags, flags_disp, sizeof(flags_disp));
+	pydict_set_item_string_decref(dict_sample, "flags_disp",
+		_PyUnicode_FromString(flags_disp));
+}
+
+static PyObject *get_perf_sample_dict(struct perf_sample *sample,
+					 struct evsel *evsel,
+					 struct addr_location *al,
+					 struct addr_location *addr_al,
+					 PyObject *callchain)
+{
+	PyObject *dict, *dict_sample, *brstack, *brstacksym;
+
+	dict = PyDict_New();
+	if (!dict)
+		Py_FatalError("couldn't create Python dictionary");
+
+	dict_sample = PyDict_New();
+	if (!dict_sample)
+		Py_FatalError("couldn't create Python dictionary");
+
+	pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel)));
+	pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr)));
+
+	pydict_set_item_string_decref(dict_sample, "pid",
+			_PyLong_FromLong(sample->pid));
+	pydict_set_item_string_decref(dict_sample, "tid",
+			_PyLong_FromLong(sample->tid));
+	pydict_set_item_string_decref(dict_sample, "cpu",
+			_PyLong_FromLong(sample->cpu));
+	pydict_set_item_string_decref(dict_sample, "ip",
+			PyLong_FromUnsignedLongLong(sample->ip));
+	pydict_set_item_string_decref(dict_sample, "time",
+			PyLong_FromUnsignedLongLong(sample->time));
+	pydict_set_item_string_decref(dict_sample, "period",
+			PyLong_FromUnsignedLongLong(sample->period));
+	pydict_set_item_string_decref(dict_sample, "phys_addr",
+			PyLong_FromUnsignedLongLong(sample->phys_addr));
+	pydict_set_item_string_decref(dict_sample, "addr",
+			PyLong_FromUnsignedLongLong(sample->addr));
+	set_sample_read_in_dict(dict_sample, sample, evsel);
+	pydict_set_item_string_decref(dict_sample, "weight",
+			PyLong_FromUnsignedLongLong(sample->weight));
+	pydict_set_item_string_decref(dict_sample, "transaction",
+			PyLong_FromUnsignedLongLong(sample->transaction));
+	set_sample_datasrc_in_dict(dict_sample, sample);
+	pydict_set_item_string_decref(dict, "sample", dict_sample);
+
+	pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize(
+			(const char *)sample->raw_data, sample->raw_size));
+	pydict_set_item_string_decref(dict, "comm",
+			_PyUnicode_FromString(thread__comm_str(al->thread)));
+	set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end",
+			"symbol", "symoff");
+
+	pydict_set_item_string_decref(dict, "callchain", callchain);
+
+	brstack = python_process_brstack(sample, al->thread);
+	pydict_set_item_string_decref(dict, "brstack", brstack);
+
+	brstacksym = python_process_brstacksym(sample, al->thread);
+	pydict_set_item_string_decref(dict, "brstacksym", brstacksym);
+
+	if (sample->machine_pid) {
+		pydict_set_item_string_decref(dict_sample, "machine_pid",
+				_PyLong_FromLong(sample->machine_pid));
+		pydict_set_item_string_decref(dict_sample, "vcpu",
+				_PyLong_FromLong(sample->vcpu));
+	}
+
+	pydict_set_item_string_decref(dict_sample, "cpumode",
+			_PyLong_FromLong((unsigned long)sample->cpumode));
+
+	if (addr_al) {
+		pydict_set_item_string_decref(dict_sample, "addr_correlates_sym",
+			PyBool_FromLong(1));
+		set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid",
+				"addr_dso_map_start", "addr_dso_map_end",
+				"addr_symbol", "addr_symoff");
+	}
+
+	if (sample->flags)
+		python_process_sample_flags(sample, dict_sample);
+
+	/* Instructions per cycle (IPC) */
+	if (sample->insn_cnt && sample->cyc_cnt) {
+		pydict_set_item_string_decref(dict_sample, "insn_cnt",
+			PyLong_FromUnsignedLongLong(sample->insn_cnt));
+		pydict_set_item_string_decref(dict_sample, "cyc_cnt",
+			PyLong_FromUnsignedLongLong(sample->cyc_cnt));
+	}
+
+	set_regs_in_dict(dict, sample, evsel);
+
+	return dict;
+}
+
+static void python_process_tracepoint(struct perf_sample *sample,
+				      struct evsel *evsel,
+				      struct addr_location *al,
+				      struct addr_location *addr_al)
+{
+	struct tep_event *event = evsel->tp_format;
+	PyObject *handler, *context, *t, *obj = NULL, *callchain;
+	PyObject *dict = NULL, *all_entries_dict = NULL;
+	static char handler_name[256];
+	struct tep_format_field *field;
+	unsigned long s, ns;
+	unsigned n = 0;
+	int pid;
+	int cpu = sample->cpu;
+	void *data = sample->raw_data;
+	unsigned long long nsecs = sample->time;
+	const char *comm = thread__comm_str(al->thread);
+	const char *default_handler_name = "trace_unhandled";
+
+	if (!event) {
+		snprintf(handler_name, sizeof(handler_name),
+			 "ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
+		Py_FatalError(handler_name);
+	}
+
+	pid = raw_field_value(event, "common_pid", data);
+
+	sprintf(handler_name, "%s__%s", event->system, event->name);
+
+	if (!test_and_set_bit(event->id, events_defined))
+		define_event_symbols(event, handler_name, event->print_fmt.args);
+
+	handler = get_handler(handler_name);
+	if (!handler) {
+		handler = get_handler(default_handler_name);
+		if (!handler)
+			return;
+		dict = PyDict_New();
+		if (!dict)
+			Py_FatalError("couldn't create Python dict");
+	}
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
+
+	context = _PyCapsule_New(scripting_context, NULL, NULL);
+
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name));
+	PyTuple_SetItem(t, n++, context);
+
+	/* ip unwinding */
+	callchain = python_process_callchain(sample, evsel, al);
+	/* Need an additional reference for the perf_sample dict */
+	Py_INCREF(callchain);
+
+	if (!dict) {
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(s));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(ns));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(pid));
+		PyTuple_SetItem(t, n++, _PyUnicode_FromString(comm));
+		PyTuple_SetItem(t, n++, callchain);
+	} else {
+		pydict_set_item_string_decref(dict, "common_cpu", _PyLong_FromLong(cpu));
+		pydict_set_item_string_decref(dict, "common_s", _PyLong_FromLong(s));
+		pydict_set_item_string_decref(dict, "common_ns", _PyLong_FromLong(ns));
+		pydict_set_item_string_decref(dict, "common_pid", _PyLong_FromLong(pid));
+		pydict_set_item_string_decref(dict, "common_comm", _PyUnicode_FromString(comm));
+		pydict_set_item_string_decref(dict, "common_callchain", callchain);
+	}
+	for (field = event->format.fields; field; field = field->next) {
+		unsigned int offset, len;
+		unsigned long long val;
+
+		if (field->flags & TEP_FIELD_IS_ARRAY) {
+			offset = field->offset;
+			len    = field->size;
+			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+				val     = tep_read_number(scripting_context->pevent,
+							  data + offset, len);
+				offset  = val;
+				len     = offset >> 16;
+				offset &= 0xffff;
+				if (field->flags & TEP_FIELD_IS_RELATIVE)
+					offset += field->offset + field->size;
+			}
+			if (field->flags & TEP_FIELD_IS_STRING &&
+			    is_printable_array(data + offset, len)) {
+				obj = _PyUnicode_FromString((char *) data + offset);
+			} else {
+				obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+				field->flags &= ~TEP_FIELD_IS_STRING;
+			}
+		} else { /* FIELD_IS_NUMERIC */
+			obj = get_field_numeric_entry(event, field, data);
+		}
+		if (!dict)
+			PyTuple_SetItem(t, n++, obj);
+		else
+			pydict_set_item_string_decref(dict, field->name, obj);
+
+	}
+
+	if (dict)
+		PyTuple_SetItem(t, n++, dict);
+
+	if (get_argument_count(handler) == (int) n + 1) {
+		all_entries_dict = get_perf_sample_dict(sample, evsel, al, addr_al,
+			callchain);
+		PyTuple_SetItem(t, n++,	all_entries_dict);
+	} else {
+		Py_DECREF(callchain);
+	}
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	if (!dict)
+		call_object(handler, t, handler_name);
+	else
+		call_object(handler, t, default_handler_name);
+
+	Py_DECREF(t);
+}
+
+static PyObject *tuple_new(unsigned int sz)
+{
+	PyObject *t;
+
+	t = PyTuple_New(sz);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+	return t;
+}
+
+static int tuple_set_s64(PyObject *t, unsigned int pos, s64 val)
+{
+#if BITS_PER_LONG == 64
+	return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+#endif
+#if BITS_PER_LONG == 32
+	return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val));
+#endif
+}
+
+/*
+ * Databases support only signed 64-bit numbers, so even though we are
+ * exporting a u64, it must be as s64.
+ */
+#define tuple_set_d64 tuple_set_s64
+
+static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
+{
+#if BITS_PER_LONG == 64
+	return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLong(val));
+#endif
+#if BITS_PER_LONG == 32
+	return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLongLong(val));
+#endif
+}
+
+static int tuple_set_u32(PyObject *t, unsigned int pos, u32 val)
+{
+	return PyTuple_SetItem(t, pos, PyLong_FromUnsignedLong(val));
+}
+
+static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val)
+{
+	return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+}
+
+static int tuple_set_bool(PyObject *t, unsigned int pos, bool val)
+{
+	return PyTuple_SetItem(t, pos, PyBool_FromLong(val));
+}
+
+static int tuple_set_string(PyObject *t, unsigned int pos, const char *s)
+{
+	return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s));
+}
+
+static int tuple_set_bytes(PyObject *t, unsigned int pos, void *bytes,
+			   unsigned int sz)
+{
+	return PyTuple_SetItem(t, pos, _PyBytes_FromStringAndSize(bytes, sz));
+}
+
+static int python_export_evsel(struct db_export *dbe, struct evsel *evsel)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(2);
+
+	tuple_set_d64(t, 0, evsel->db_id);
+	tuple_set_string(t, 1, evsel__name(evsel));
+
+	call_object(tables->evsel_handler, t, "evsel_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_machine(struct db_export *dbe,
+				 struct machine *machine)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(3);
+
+	tuple_set_d64(t, 0, machine->db_id);
+	tuple_set_s32(t, 1, machine->pid);
+	tuple_set_string(t, 2, machine->root_dir ? machine->root_dir : "");
+
+	call_object(tables->machine_handler, t, "machine_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_thread(struct db_export *dbe, struct thread *thread,
+				u64 main_thread_db_id, struct machine *machine)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(5);
+
+	tuple_set_d64(t, 0, thread->db_id);
+	tuple_set_d64(t, 1, machine->db_id);
+	tuple_set_d64(t, 2, main_thread_db_id);
+	tuple_set_s32(t, 3, thread->pid_);
+	tuple_set_s32(t, 4, thread->tid);
+
+	call_object(tables->thread_handler, t, "thread_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_comm(struct db_export *dbe, struct comm *comm,
+			      struct thread *thread)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(5);
+
+	tuple_set_d64(t, 0, comm->db_id);
+	tuple_set_string(t, 1, comm__str(comm));
+	tuple_set_d64(t, 2, thread->db_id);
+	tuple_set_d64(t, 3, comm->start);
+	tuple_set_s32(t, 4, comm->exec);
+
+	call_object(tables->comm_handler, t, "comm_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_comm_thread(struct db_export *dbe, u64 db_id,
+				     struct comm *comm, struct thread *thread)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(3);
+
+	tuple_set_d64(t, 0, db_id);
+	tuple_set_d64(t, 1, comm->db_id);
+	tuple_set_d64(t, 2, thread->db_id);
+
+	call_object(tables->comm_thread_handler, t, "comm_thread_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_dso(struct db_export *dbe, struct dso *dso,
+			     struct machine *machine)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	char sbuild_id[SBUILD_ID_SIZE];
+	PyObject *t;
+
+	build_id__sprintf(&dso->bid, sbuild_id);
+
+	t = tuple_new(5);
+
+	tuple_set_d64(t, 0, dso->db_id);
+	tuple_set_d64(t, 1, machine->db_id);
+	tuple_set_string(t, 2, dso->short_name);
+	tuple_set_string(t, 3, dso->long_name);
+	tuple_set_string(t, 4, sbuild_id);
+
+	call_object(tables->dso_handler, t, "dso_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_symbol(struct db_export *dbe, struct symbol *sym,
+				struct dso *dso)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	u64 *sym_db_id = symbol__priv(sym);
+	PyObject *t;
+
+	t = tuple_new(6);
+
+	tuple_set_d64(t, 0, *sym_db_id);
+	tuple_set_d64(t, 1, dso->db_id);
+	tuple_set_d64(t, 2, sym->start);
+	tuple_set_d64(t, 3, sym->end);
+	tuple_set_s32(t, 4, sym->binding);
+	tuple_set_string(t, 5, sym->name);
+
+	call_object(tables->symbol_handler, t, "symbol_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_branch_type(struct db_export *dbe, u32 branch_type,
+				     const char *name)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(2);
+
+	tuple_set_s32(t, 0, branch_type);
+	tuple_set_string(t, 1, name);
+
+	call_object(tables->branch_type_handler, t, "branch_type_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static void python_export_sample_table(struct db_export *dbe,
+				       struct export_sample *es)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(25);
+
+	tuple_set_d64(t, 0, es->db_id);
+	tuple_set_d64(t, 1, es->evsel->db_id);
+	tuple_set_d64(t, 2, es->al->maps->machine->db_id);
+	tuple_set_d64(t, 3, es->al->thread->db_id);
+	tuple_set_d64(t, 4, es->comm_db_id);
+	tuple_set_d64(t, 5, es->dso_db_id);
+	tuple_set_d64(t, 6, es->sym_db_id);
+	tuple_set_d64(t, 7, es->offset);
+	tuple_set_d64(t, 8, es->sample->ip);
+	tuple_set_d64(t, 9, es->sample->time);
+	tuple_set_s32(t, 10, es->sample->cpu);
+	tuple_set_d64(t, 11, es->addr_dso_db_id);
+	tuple_set_d64(t, 12, es->addr_sym_db_id);
+	tuple_set_d64(t, 13, es->addr_offset);
+	tuple_set_d64(t, 14, es->sample->addr);
+	tuple_set_d64(t, 15, es->sample->period);
+	tuple_set_d64(t, 16, es->sample->weight);
+	tuple_set_d64(t, 17, es->sample->transaction);
+	tuple_set_d64(t, 18, es->sample->data_src);
+	tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
+	tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+	tuple_set_d64(t, 21, es->call_path_id);
+	tuple_set_d64(t, 22, es->sample->insn_cnt);
+	tuple_set_d64(t, 23, es->sample->cyc_cnt);
+	tuple_set_s32(t, 24, es->sample->flags);
+
+	call_object(tables->sample_handler, t, "sample_table");
+
+	Py_DECREF(t);
+}
+
+static void python_export_synth(struct db_export *dbe, struct export_sample *es)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(3);
+
+	tuple_set_d64(t, 0, es->db_id);
+	tuple_set_d64(t, 1, es->evsel->core.attr.config);
+	tuple_set_bytes(t, 2, es->sample->raw_data, es->sample->raw_size);
+
+	call_object(tables->synth_handler, t, "synth_data");
+
+	Py_DECREF(t);
+}
+
+static int python_export_sample(struct db_export *dbe,
+				struct export_sample *es)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+
+	python_export_sample_table(dbe, es);
+
+	if (es->evsel->core.attr.type == PERF_TYPE_SYNTH && tables->synth_handler)
+		python_export_synth(dbe, es);
+
+	return 0;
+}
+
+static int python_export_call_path(struct db_export *dbe, struct call_path *cp)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+	u64 parent_db_id, sym_db_id;
+
+	parent_db_id = cp->parent ? cp->parent->db_id : 0;
+	sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0;
+
+	t = tuple_new(4);
+
+	tuple_set_d64(t, 0, cp->db_id);
+	tuple_set_d64(t, 1, parent_db_id);
+	tuple_set_d64(t, 2, sym_db_id);
+	tuple_set_d64(t, 3, cp->ip);
+
+	call_object(tables->call_path_handler, t, "call_path_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_call_return(struct db_export *dbe,
+				     struct call_return *cr)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	u64 comm_db_id = cr->comm ? cr->comm->db_id : 0;
+	PyObject *t;
+
+	t = tuple_new(14);
+
+	tuple_set_d64(t, 0, cr->db_id);
+	tuple_set_d64(t, 1, cr->thread->db_id);
+	tuple_set_d64(t, 2, comm_db_id);
+	tuple_set_d64(t, 3, cr->cp->db_id);
+	tuple_set_d64(t, 4, cr->call_time);
+	tuple_set_d64(t, 5, cr->return_time);
+	tuple_set_d64(t, 6, cr->branch_count);
+	tuple_set_d64(t, 7, cr->call_ref);
+	tuple_set_d64(t, 8, cr->return_ref);
+	tuple_set_d64(t, 9, cr->cp->parent->db_id);
+	tuple_set_s32(t, 10, cr->flags);
+	tuple_set_d64(t, 11, cr->parent_db_id);
+	tuple_set_d64(t, 12, cr->insn_count);
+	tuple_set_d64(t, 13, cr->cyc_count);
+
+	call_object(tables->call_return_handler, t, "call_return_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_context_switch(struct db_export *dbe, u64 db_id,
+					struct machine *machine,
+					struct perf_sample *sample,
+					u64 th_out_id, u64 comm_out_id,
+					u64 th_in_id, u64 comm_in_id, int flags)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(9);
+
+	tuple_set_d64(t, 0, db_id);
+	tuple_set_d64(t, 1, machine->db_id);
+	tuple_set_d64(t, 2, sample->time);
+	tuple_set_s32(t, 3, sample->cpu);
+	tuple_set_d64(t, 4, th_out_id);
+	tuple_set_d64(t, 5, comm_out_id);
+	tuple_set_d64(t, 6, th_in_id);
+	tuple_set_d64(t, 7, comm_in_id);
+	tuple_set_s32(t, 8, flags);
+
+	call_object(tables->context_switch_handler, t, "context_switch");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_process_call_return(struct call_return *cr, u64 *parent_db_id,
+				      void *data)
+{
+	struct db_export *dbe = data;
+
+	return db_export__call_return(dbe, cr, parent_db_id);
+}
+
+static void python_process_general_event(struct perf_sample *sample,
+					 struct evsel *evsel,
+					 struct addr_location *al,
+					 struct addr_location *addr_al)
+{
+	PyObject *handler, *t, *dict, *callchain;
+	static char handler_name[64];
+	unsigned n = 0;
+
+	snprintf(handler_name, sizeof(handler_name), "%s", "process_event");
+
+	handler = get_handler(handler_name);
+	if (!handler)
+		return;
+
+	/*
+	 * Use the MAX_FIELDS to make the function expandable, though
+	 * currently there is only one item for the tuple.
+	 */
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	/* ip unwinding */
+	callchain = python_process_callchain(sample, evsel, al);
+	dict = get_perf_sample_dict(sample, evsel, al, addr_al, callchain);
+
+	PyTuple_SetItem(t, n++, dict);
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static void python_process_event(union perf_event *event,
+				 struct perf_sample *sample,
+				 struct evsel *evsel,
+				 struct addr_location *al,
+				 struct addr_location *addr_al)
+{
+	struct tables *tables = &tables_global;
+
+	scripting_context__update(scripting_context, event, sample, evsel, al, addr_al);
+
+	switch (evsel->core.attr.type) {
+	case PERF_TYPE_TRACEPOINT:
+		python_process_tracepoint(sample, evsel, al, addr_al);
+		break;
+	/* Reserve for future process_hw/sw/raw APIs */
+	default:
+		if (tables->db_export_mode)
+			db_export__sample(&tables->dbe, event, sample, evsel, al, addr_al);
+		else
+			python_process_general_event(sample, evsel, al, addr_al);
+	}
+}
+
+static void python_process_throttle(union perf_event *event,
+				    struct perf_sample *sample,
+				    struct machine *machine)
+{
+	const char *handler_name;
+	PyObject *handler, *t;
+
+	if (event->header.type == PERF_RECORD_THROTTLE)
+		handler_name = "throttle";
+	else
+		handler_name = "unthrottle";
+	handler = get_handler(handler_name);
+	if (!handler)
+		return;
+
+	t = tuple_new(6);
+	if (!t)
+		return;
+
+	tuple_set_u64(t, 0, event->throttle.time);
+	tuple_set_u64(t, 1, event->throttle.id);
+	tuple_set_u64(t, 2, event->throttle.stream_id);
+	tuple_set_s32(t, 3, sample->cpu);
+	tuple_set_s32(t, 4, sample->pid);
+	tuple_set_s32(t, 5, sample->tid);
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static void python_do_process_switch(union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine)
+{
+	const char *handler_name = "context_switch";
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	bool out_preempt = out && (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT);
+	pid_t np_pid = -1, np_tid = -1;
+	PyObject *handler, *t;
+
+	handler = get_handler(handler_name);
+	if (!handler)
+		return;
+
+	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+		np_pid = event->context_switch.next_prev_pid;
+		np_tid = event->context_switch.next_prev_tid;
+	}
+
+	t = tuple_new(11);
+	if (!t)
+		return;
+
+	tuple_set_u64(t, 0, sample->time);
+	tuple_set_s32(t, 1, sample->cpu);
+	tuple_set_s32(t, 2, sample->pid);
+	tuple_set_s32(t, 3, sample->tid);
+	tuple_set_s32(t, 4, np_pid);
+	tuple_set_s32(t, 5, np_tid);
+	tuple_set_s32(t, 6, machine->pid);
+	tuple_set_bool(t, 7, out);
+	tuple_set_bool(t, 8, out_preempt);
+	tuple_set_s32(t, 9, sample->machine_pid);
+	tuple_set_s32(t, 10, sample->vcpu);
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static void python_process_switch(union perf_event *event,
+				  struct perf_sample *sample,
+				  struct machine *machine)
+{
+	struct tables *tables = &tables_global;
+
+	if (tables->db_export_mode)
+		db_export__switch(&tables->dbe, event, sample, machine);
+	else
+		python_do_process_switch(event, sample, machine);
+}
+
+static void python_process_auxtrace_error(struct perf_session *session __maybe_unused,
+					  union perf_event *event)
+{
+	struct perf_record_auxtrace_error *e = &event->auxtrace_error;
+	u8 cpumode = e->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	const char *handler_name = "auxtrace_error";
+	unsigned long long tm = e->time;
+	const char *msg = e->msg;
+	PyObject *handler, *t;
+
+	handler = get_handler(handler_name);
+	if (!handler)
+		return;
+
+	if (!e->fmt) {
+		tm = 0;
+		msg = (const char *)&e->time;
+	}
+
+	t = tuple_new(11);
+
+	tuple_set_u32(t, 0, e->type);
+	tuple_set_u32(t, 1, e->code);
+	tuple_set_s32(t, 2, e->cpu);
+	tuple_set_s32(t, 3, e->pid);
+	tuple_set_s32(t, 4, e->tid);
+	tuple_set_u64(t, 5, e->ip);
+	tuple_set_u64(t, 6, tm);
+	tuple_set_string(t, 7, msg);
+	tuple_set_u32(t, 8, cpumode);
+	tuple_set_s32(t, 9, e->machine_pid);
+	tuple_set_s32(t, 10, e->vcpu);
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static void get_handler_name(char *str, size_t size,
+			     struct evsel *evsel)
+{
+	char *p = str;
+
+	scnprintf(str, size, "stat__%s", evsel__name(evsel));
+
+	while ((p = strchr(p, ':'))) {
+		*p = '_';
+		p++;
+	}
+}
+
+static void
+process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp,
+	     struct perf_counts_values *count)
+{
+	PyObject *handler, *t;
+	static char handler_name[256];
+	int n = 0;
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	get_handler_name(handler_name, sizeof(handler_name),
+			 counter);
+
+	handler = get_handler(handler_name);
+	if (!handler) {
+		pr_debug("can't find python handler %s\n", handler_name);
+		return;
+	}
+
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu));
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
+
+	tuple_set_u64(t, n++, tstamp);
+	tuple_set_u64(t, n++, count->val);
+	tuple_set_u64(t, n++, count->ena);
+	tuple_set_u64(t, n++, count->run);
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static void python_process_stat(struct perf_stat_config *config,
+				struct evsel *counter, u64 tstamp)
+{
+	struct perf_thread_map *threads = counter->core.threads;
+	struct perf_cpu_map *cpus = counter->core.cpus;
+	int cpu, thread;
+
+	if (config->aggr_mode == AGGR_GLOBAL) {
+		process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp,
+			     &counter->counts->aggr);
+		return;
+	}
+
+	for (thread = 0; thread < threads->nr; thread++) {
+		for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+			process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
+				     perf_thread_map__pid(threads, thread), tstamp,
+				     perf_counts(counter->counts, cpu, thread));
+		}
+	}
+}
+
+static void python_process_stat_interval(u64 tstamp)
+{
+	PyObject *handler, *t;
+	static const char handler_name[] = "stat__interval";
+	int n = 0;
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	handler = get_handler(handler_name);
+	if (!handler) {
+		pr_debug("can't find python handler %s\n", handler_name);
+		return;
+	}
+
+	tuple_set_u64(t, n++, tstamp);
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static int perf_script_context_init(void)
+{
+	PyObject *perf_script_context;
+	PyObject *perf_trace_context;
+	PyObject *dict;
+	int ret;
+
+	perf_trace_context = PyImport_AddModule("perf_trace_context");
+	if (!perf_trace_context)
+		return -1;
+	dict = PyModule_GetDict(perf_trace_context);
+	if (!dict)
+		return -1;
+
+	perf_script_context = _PyCapsule_New(scripting_context, NULL, NULL);
+	if (!perf_script_context)
+		return -1;
+
+	ret = PyDict_SetItemString(dict, "perf_script_context", perf_script_context);
+	if (!ret)
+		ret = PyDict_SetItemString(main_dict, "perf_script_context", perf_script_context);
+	Py_DECREF(perf_script_context);
+	return ret;
+}
+
+static int run_start_sub(void)
+{
+	main_module = PyImport_AddModule("__main__");
+	if (main_module == NULL)
+		return -1;
+	Py_INCREF(main_module);
+
+	main_dict = PyModule_GetDict(main_module);
+	if (main_dict == NULL)
+		goto error;
+	Py_INCREF(main_dict);
+
+	if (perf_script_context_init())
+		goto error;
+
+	try_call_object("trace_begin", NULL);
+
+	return 0;
+
+error:
+	Py_XDECREF(main_dict);
+	Py_XDECREF(main_module);
+	return -1;
+}
+
+#define SET_TABLE_HANDLER_(name, handler_name, table_name) do {		\
+	tables->handler_name = get_handler(#table_name);		\
+	if (tables->handler_name)					\
+		tables->dbe.export_ ## name = python_export_ ## name;	\
+} while (0)
+
+#define SET_TABLE_HANDLER(name) \
+	SET_TABLE_HANDLER_(name, name ## _handler, name ## _table)
+
+static void set_table_handlers(struct tables *tables)
+{
+	const char *perf_db_export_mode = "perf_db_export_mode";
+	const char *perf_db_export_calls = "perf_db_export_calls";
+	const char *perf_db_export_callchains = "perf_db_export_callchains";
+	PyObject *db_export_mode, *db_export_calls, *db_export_callchains;
+	bool export_calls = false;
+	bool export_callchains = false;
+	int ret;
+
+	memset(tables, 0, sizeof(struct tables));
+	if (db_export__init(&tables->dbe))
+		Py_FatalError("failed to initialize export");
+
+	db_export_mode = PyDict_GetItemString(main_dict, perf_db_export_mode);
+	if (!db_export_mode)
+		return;
+
+	ret = PyObject_IsTrue(db_export_mode);
+	if (ret == -1)
+		handler_call_die(perf_db_export_mode);
+	if (!ret)
+		return;
+
+	/* handle export calls */
+	tables->dbe.crp = NULL;
+	db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
+	if (db_export_calls) {
+		ret = PyObject_IsTrue(db_export_calls);
+		if (ret == -1)
+			handler_call_die(perf_db_export_calls);
+		export_calls = !!ret;
+	}
+
+	if (export_calls) {
+		tables->dbe.crp =
+			call_return_processor__new(python_process_call_return,
+						   &tables->dbe);
+		if (!tables->dbe.crp)
+			Py_FatalError("failed to create calls processor");
+	}
+
+	/* handle export callchains */
+	tables->dbe.cpr = NULL;
+	db_export_callchains = PyDict_GetItemString(main_dict,
+						    perf_db_export_callchains);
+	if (db_export_callchains) {
+		ret = PyObject_IsTrue(db_export_callchains);
+		if (ret == -1)
+			handler_call_die(perf_db_export_callchains);
+		export_callchains = !!ret;
+	}
+
+	if (export_callchains) {
+		/*
+		 * Attempt to use the call path root from the call return
+		 * processor, if the call return processor is in use. Otherwise,
+		 * we allocate a new call path root. This prevents exporting
+		 * duplicate call path ids when both are in use simultaneously.
+		 */
+		if (tables->dbe.crp)
+			tables->dbe.cpr = tables->dbe.crp->cpr;
+		else
+			tables->dbe.cpr = call_path_root__new();
+
+		if (!tables->dbe.cpr)
+			Py_FatalError("failed to create call path root");
+	}
+
+	tables->db_export_mode = true;
+	/*
+	 * Reserve per symbol space for symbol->db_id via symbol__priv()
+	 */
+	symbol_conf.priv_size = sizeof(u64);
+
+	SET_TABLE_HANDLER(evsel);
+	SET_TABLE_HANDLER(machine);
+	SET_TABLE_HANDLER(thread);
+	SET_TABLE_HANDLER(comm);
+	SET_TABLE_HANDLER(comm_thread);
+	SET_TABLE_HANDLER(dso);
+	SET_TABLE_HANDLER(symbol);
+	SET_TABLE_HANDLER(branch_type);
+	SET_TABLE_HANDLER(sample);
+	SET_TABLE_HANDLER(call_path);
+	SET_TABLE_HANDLER(call_return);
+	SET_TABLE_HANDLER(context_switch);
+
+	/*
+	 * Synthesized events are samples but with architecture-specific data
+	 * stored in sample->raw_data. They are exported via
+	 * python_export_sample() and consequently do not need a separate export
+	 * callback.
+	 */
+	tables->synth_handler = get_handler("synth_data");
+}
+
+#if PY_MAJOR_VERSION < 3
+static void _free_command_line(const char **command_line, int num)
+{
+	free(command_line);
+}
+#else
+static void _free_command_line(wchar_t **command_line, int num)
+{
+	int i;
+	for (i = 0; i < num; i++)
+		PyMem_RawFree(command_line[i]);
+	free(command_line);
+}
+#endif
+
+
+/*
+ * Start trace script
+ */
+static int python_start_script(const char *script, int argc, const char **argv,
+			       struct perf_session *session)
+{
+	struct tables *tables = &tables_global;
+#if PY_MAJOR_VERSION < 3
+	const char **command_line;
+#else
+	wchar_t **command_line;
+#endif
+	/*
+	 * Use a non-const name variable to cope with python 2.6's
+	 * PyImport_AppendInittab prototype
+	 */
+	char buf[PATH_MAX], name[19] = "perf_trace_context";
+	int i, err = 0;
+	FILE *fp;
+
+	scripting_context->session = session;
+#if PY_MAJOR_VERSION < 3
+	command_line = malloc((argc + 1) * sizeof(const char *));
+	command_line[0] = script;
+	for (i = 1; i < argc + 1; i++)
+		command_line[i] = argv[i - 1];
+	PyImport_AppendInittab(name, initperf_trace_context);
+#else
+	command_line = malloc((argc + 1) * sizeof(wchar_t *));
+	command_line[0] = Py_DecodeLocale(script, NULL);
+	for (i = 1; i < argc + 1; i++)
+		command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+	PyImport_AppendInittab(name, PyInit_perf_trace_context);
+#endif
+	Py_Initialize();
+
+#if PY_MAJOR_VERSION < 3
+	PySys_SetArgv(argc + 1, (char **)command_line);
+#else
+	PySys_SetArgv(argc + 1, command_line);
+#endif
+
+	fp = fopen(script, "r");
+	if (!fp) {
+		sprintf(buf, "Can't open python script \"%s\"", script);
+		perror(buf);
+		err = -1;
+		goto error;
+	}
+
+	err = PyRun_SimpleFile(fp, script);
+	if (err) {
+		fprintf(stderr, "Error running python script %s\n", script);
+		goto error;
+	}
+
+	err = run_start_sub();
+	if (err) {
+		fprintf(stderr, "Error starting python script %s\n", script);
+		goto error;
+	}
+
+	set_table_handlers(tables);
+
+	if (tables->db_export_mode) {
+		err = db_export__branch_types(&tables->dbe);
+		if (err)
+			goto error;
+	}
+
+	_free_command_line(command_line, argc + 1);
+
+	return err;
+error:
+	Py_Finalize();
+	_free_command_line(command_line, argc + 1);
+
+	return err;
+}
+
+static int python_flush_script(void)
+{
+	return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int python_stop_script(void)
+{
+	struct tables *tables = &tables_global;
+
+	try_call_object("trace_end", NULL);
+
+	db_export__exit(&tables->dbe);
+
+	Py_XDECREF(main_dict);
+	Py_XDECREF(main_module);
+	Py_Finalize();
+
+	return 0;
+}
+
+static int python_generate_script(struct tep_handle *pevent, const char *outfile)
+{
+	int i, not_first, count, nr_events;
+	struct tep_event **all_events;
+	struct tep_event *event = NULL;
+	struct tep_format_field *f;
+	char fname[PATH_MAX];
+	FILE *ofp;
+
+	sprintf(fname, "%s.py", outfile);
+	ofp = fopen(fname, "w");
+	if (ofp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", fname);
+		return -1;
+	}
+	fprintf(ofp, "# perf script event handlers, "
+		"generated by perf script -g python\n");
+
+	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+		" License version 2\n\n");
+
+	fprintf(ofp, "# The common_* event handler fields are the most useful "
+		"fields common to\n");
+
+	fprintf(ofp, "# all events.  They don't necessarily correspond to "
+		"the 'common_*' fields\n");
+
+	fprintf(ofp, "# in the format files.  Those fields not available as "
+		"handler params can\n");
+
+	fprintf(ofp, "# be retrieved using Python functions of the form "
+		"common_*(context).\n");
+
+	fprintf(ofp, "# See the perf-script-python Documentation for the list "
+		"of available functions.\n\n");
+
+	fprintf(ofp, "from __future__ import print_function\n\n");
+	fprintf(ofp, "import os\n");
+	fprintf(ofp, "import sys\n\n");
+
+	fprintf(ofp, "sys.path.append(os.environ['PERF_EXEC_PATH'] + \\\n");
+	fprintf(ofp, "\t'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')\n");
+	fprintf(ofp, "\nfrom perf_trace_context import *\n");
+	fprintf(ofp, "from Core import *\n\n\n");
+
+	fprintf(ofp, "def trace_begin():\n");
+	fprintf(ofp, "\tprint(\"in trace_begin\")\n\n");
+
+	fprintf(ofp, "def trace_end():\n");
+	fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
+
+	nr_events = tep_get_events_count(pevent);
+	all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
+
+	for (i = 0; all_events && i < nr_events; i++) {
+		event = all_events[i];
+		fprintf(ofp, "def %s__%s(", event->system, event->name);
+		fprintf(ofp, "event_name, ");
+		fprintf(ofp, "context, ");
+		fprintf(ofp, "common_cpu,\n");
+		fprintf(ofp, "\tcommon_secs, ");
+		fprintf(ofp, "common_nsecs, ");
+		fprintf(ofp, "common_pid, ");
+		fprintf(ofp, "common_comm,\n\t");
+		fprintf(ofp, "common_callchain, ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t");
+
+			fprintf(ofp, "%s", f->name);
+		}
+		if (not_first++)
+			fprintf(ofp, ", ");
+		if (++count % 5 == 0)
+			fprintf(ofp, "\n\t\t");
+		fprintf(ofp, "perf_sample_dict");
+
+		fprintf(ofp, "):\n");
+
+		fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
+			"common_secs, common_nsecs,\n\t\t\t"
+			"common_pid, common_comm)\n\n");
+
+		fprintf(ofp, "\t\tprint(\"");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (count && count % 3 == 0) {
+				fprintf(ofp, "\" \\\n\t\t\"");
+			}
+			count++;
+
+			fprintf(ofp, "%s=", f->name);
+			if (f->flags & TEP_FIELD_IS_STRING ||
+			    f->flags & TEP_FIELD_IS_FLAG ||
+			    f->flags & TEP_FIELD_IS_ARRAY ||
+			    f->flags & TEP_FIELD_IS_SYMBOLIC)
+				fprintf(ofp, "%%s");
+			else if (f->flags & TEP_FIELD_IS_SIGNED)
+				fprintf(ofp, "%%d");
+			else
+				fprintf(ofp, "%%u");
+		}
+
+		fprintf(ofp, "\" %% \\\n\t\t(");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t\t");
+
+			if (f->flags & TEP_FIELD_IS_FLAG) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t\t");
+					count = 4;
+				}
+				fprintf(ofp, "flag_str(\"");
+				fprintf(ofp, "%s__%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", %s)", f->name,
+					f->name);
+			} else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t\t");
+					count = 4;
+				}
+				fprintf(ofp, "symbol_str(\"");
+				fprintf(ofp, "%s__%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", %s)", f->name,
+					f->name);
+			} else
+				fprintf(ofp, "%s", f->name);
+		}
+
+		fprintf(ofp, "))\n\n");
+
+		fprintf(ofp, "\t\tprint('Sample: {'+"
+			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
+
+		fprintf(ofp, "\t\tfor node in common_callchain:");
+		fprintf(ofp, "\n\t\t\tif 'sym' in node:");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% (");
+		fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],");
+		fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\",");
+		fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso'])  if 'dso' in node else \"\",");
+		fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))");
+		fprintf(ofp, "\n\t\t\telse:");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
+		fprintf(ofp, "\t\tprint()\n\n");
+
+	}
+
+	fprintf(ofp, "def trace_unhandled(event_name, context, "
+		"event_fields_dict, perf_sample_dict):\n");
+
+	fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n");
+	fprintf(ofp, "\t\tprint('Sample: {'+"
+		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
+
+	fprintf(ofp, "def print_header("
+		"event_name, cpu, secs, nsecs, pid, comm):\n"
+		"\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+		"(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n");
+
+	fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
+		"\treturn delimiter.join"
+		"(['%%s=%%s'%%(k,str(v))for k,v in sorted(a_dict.items())])\n");
+
+	fclose(ofp);
+
+	fprintf(stderr, "generated Python script: %s\n", fname);
+
+	return 0;
+}
+
+struct scripting_ops python_scripting_ops = {
+	.name			= "Python",
+	.dirname		= "python",
+	.start_script		= python_start_script,
+	.flush_script		= python_flush_script,
+	.stop_script		= python_stop_script,
+	.process_event		= python_process_event,
+	.process_switch		= python_process_switch,
+	.process_auxtrace_error	= python_process_auxtrace_error,
+	.process_stat		= python_process_stat,
+	.process_stat_interval	= python_process_stat_interval,
+	.process_throttle	= python_process_throttle,
+	.generate_script	= python_generate_script,
+};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
new file mode 100644
index 000000000..1a4f10de2
--- /dev/null
+++ b/tools/perf/util/session.c
@@ -0,0 +1,2888 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <api/fs/fs.h>
+
+#include <byteswap.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <perf/cpumap.h>
+
+#include "map_symbol.h"
+#include "branch.h"
+#include "debug.h"
+#include "env.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "memswap.h"
+#include "map.h"
+#include "symbol.h"
+#include "session.h"
+#include "tool.h"
+#include "perf_regs.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "sample-raw.h"
+#include "stat.h"
+#include "tsc.h"
+#include "ui/progress.h"
+#include "../perf.h"
+#include "arch/common.h"
+#include "units.h"
+#include <internal/lib.h>
+
+#ifdef HAVE_ZSTD_SUPPORT
+static int perf_session__process_compressed_event(struct perf_session *session,
+						  union perf_event *event, u64 file_offset,
+						  const char *file_path)
+{
+	void *src;
+	size_t decomp_size, src_size;
+	u64 decomp_last_rem = 0;
+	size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
+	struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
+
+	if (decomp_last) {
+		decomp_last_rem = decomp_last->size - decomp_last->head;
+		decomp_len += decomp_last_rem;
+	}
+
+	mmap_len = sizeof(struct decomp) + decomp_len;
+	decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
+		      MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+	if (decomp == MAP_FAILED) {
+		pr_err("Couldn't allocate memory for decompression\n");
+		return -1;
+	}
+
+	decomp->file_pos = file_offset;
+	decomp->file_path = file_path;
+	decomp->mmap_len = mmap_len;
+	decomp->head = 0;
+
+	if (decomp_last_rem) {
+		memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
+		decomp->size = decomp_last_rem;
+	}
+
+	src = (void *)event + sizeof(struct perf_record_compressed);
+	src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
+
+	decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
+				&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
+	if (!decomp_size) {
+		munmap(decomp, mmap_len);
+		pr_err("Couldn't decompress data\n");
+		return -1;
+	}
+
+	decomp->size += decomp_size;
+
+	if (session->active_decomp->decomp == NULL)
+		session->active_decomp->decomp = decomp;
+	else
+		session->active_decomp->decomp_last->next = decomp;
+
+	session->active_decomp->decomp_last = decomp;
+
+	pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
+
+	return 0;
+}
+#else /* !HAVE_ZSTD_SUPPORT */
+#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
+#endif
+
+static int perf_session__deliver_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_tool *tool,
+				       u64 file_offset,
+				       const char *file_path);
+
+static int perf_session__open(struct perf_session *session, int repipe_fd)
+{
+	struct perf_data *data = session->data;
+
+	if (perf_session__read_header(session, repipe_fd) < 0) {
+		pr_err("incompatible file format (rerun with -v to learn more)\n");
+		return -1;
+	}
+
+	if (perf_data__is_pipe(data))
+		return 0;
+
+	if (perf_header__has_feat(&session->header, HEADER_STAT))
+		return 0;
+
+	if (!evlist__valid_sample_type(session->evlist)) {
+		pr_err("non matching sample_type\n");
+		return -1;
+	}
+
+	if (!evlist__valid_sample_id_all(session->evlist)) {
+		pr_err("non matching sample_id_all\n");
+		return -1;
+	}
+
+	if (!evlist__valid_read_format(session->evlist)) {
+		pr_err("non matching read_format\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void perf_session__set_id_hdr_size(struct perf_session *session)
+{
+	u16 id_hdr_size = evlist__id_hdr_size(session->evlist);
+
+	machines__set_id_hdr_size(&session->machines, id_hdr_size);
+}
+
+int perf_session__create_kernel_maps(struct perf_session *session)
+{
+	int ret = machine__create_kernel_maps(&session->machines.host);
+
+	if (ret >= 0)
+		ret = machines__create_guest_kernel_maps(&session->machines);
+	return ret;
+}
+
+static void perf_session__destroy_kernel_maps(struct perf_session *session)
+{
+	machines__destroy_kernel_maps(&session->machines);
+}
+
+static bool perf_session__has_comm_exec(struct perf_session *session)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->core.attr.comm_exec)
+			return true;
+	}
+
+	return false;
+}
+
+static void perf_session__set_comm_exec(struct perf_session *session)
+{
+	bool comm_exec = perf_session__has_comm_exec(session);
+
+	machines__set_comm_exec(&session->machines, comm_exec);
+}
+
+static int ordered_events__deliver_event(struct ordered_events *oe,
+					 struct ordered_event *event)
+{
+	struct perf_session *session = container_of(oe, struct perf_session,
+						    ordered_events);
+
+	return perf_session__deliver_event(session, event->event,
+					   session->tool, event->file_offset,
+					   event->file_path);
+}
+
+struct perf_session *__perf_session__new(struct perf_data *data,
+					 bool repipe, int repipe_fd,
+					 struct perf_tool *tool)
+{
+	int ret = -ENOMEM;
+	struct perf_session *session = zalloc(sizeof(*session));
+
+	if (!session)
+		goto out;
+
+	session->repipe = repipe;
+	session->tool   = tool;
+	session->decomp_data.zstd_decomp = &session->zstd_data;
+	session->active_decomp = &session->decomp_data;
+	INIT_LIST_HEAD(&session->auxtrace_index);
+	machines__init(&session->machines);
+	ordered_events__init(&session->ordered_events,
+			     ordered_events__deliver_event, NULL);
+
+	perf_env__init(&session->header.env);
+	if (data) {
+		ret = perf_data__open(data);
+		if (ret < 0)
+			goto out_delete;
+
+		session->data = data;
+
+		if (perf_data__is_read(data)) {
+			ret = perf_session__open(session, repipe_fd);
+			if (ret < 0)
+				goto out_delete;
+
+			/*
+			 * set session attributes that are present in perf.data
+			 * but not in pipe-mode.
+			 */
+			if (!data->is_pipe) {
+				perf_session__set_id_hdr_size(session);
+				perf_session__set_comm_exec(session);
+			}
+
+			evlist__init_trace_event_sample_raw(session->evlist);
+
+			/* Open the directory data. */
+			if (data->is_dir) {
+				ret = perf_data__open_dir(data);
+				if (ret)
+					goto out_delete;
+			}
+
+			if (!symbol_conf.kallsyms_name &&
+			    !symbol_conf.vmlinux_name)
+				symbol_conf.kallsyms_name = perf_data__kallsyms_name(data);
+		}
+	} else  {
+		session->machines.host.env = &perf_env;
+	}
+
+	session->machines.host.single_address_space =
+		perf_env__single_address_space(session->machines.host.env);
+
+	if (!data || perf_data__is_write(data)) {
+		/*
+		 * In O_RDONLY mode this will be performed when reading the
+		 * kernel MMAP event, in perf_event__process_mmap().
+		 */
+		if (perf_session__create_kernel_maps(session) < 0)
+			pr_warning("Cannot read kernel map\n");
+	}
+
+	/*
+	 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
+	 * processed, so evlist__sample_id_all is not meaningful here.
+	 */
+	if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
+	    tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
+		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+		tool->ordered_events = false;
+	}
+
+	return session;
+
+ out_delete:
+	perf_session__delete(session);
+ out:
+	return ERR_PTR(ret);
+}
+
+static void perf_session__delete_threads(struct perf_session *session)
+{
+	machine__delete_threads(&session->machines.host);
+}
+
+static void perf_decomp__release_events(struct decomp *next)
+{
+	struct decomp *decomp;
+	size_t mmap_len;
+
+	do {
+		decomp = next;
+		if (decomp == NULL)
+			break;
+		next = decomp->next;
+		mmap_len = decomp->mmap_len;
+		munmap(decomp, mmap_len);
+	} while (1);
+}
+
+void perf_session__delete(struct perf_session *session)
+{
+	if (session == NULL)
+		return;
+	auxtrace__free(session);
+	auxtrace_index__free(&session->auxtrace_index);
+	perf_session__destroy_kernel_maps(session);
+	perf_session__delete_threads(session);
+	perf_decomp__release_events(session->decomp_data.decomp);
+	perf_env__exit(&session->header.env);
+	machines__exit(&session->machines);
+	if (session->data) {
+		if (perf_data__is_read(session->data))
+			evlist__delete(session->evlist);
+		perf_data__close(session->data);
+	}
+	trace_event__cleanup(&session->tevent);
+	free(session);
+}
+
+static int process_event_synth_tracing_data_stub(struct perf_session *session
+						 __maybe_unused,
+						 union perf_event *event
+						 __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
+					 union perf_event *event __maybe_unused,
+					 struct evlist **pevlist
+					 __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
+						 union perf_event *event __maybe_unused,
+						 struct evlist **pevlist
+						 __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_event_update(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event __maybe_unused,
+				     struct perf_sample *sample __maybe_unused,
+				     struct evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_stub(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event __maybe_unused,
+				       struct ordered_events *oe __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int skipn(int fd, off_t n)
+{
+	char buf[4096];
+	ssize_t ret;
+
+	while (n > 0) {
+		ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+		if (ret <= 0)
+			return ret;
+		n -= ret;
+	}
+
+	return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
+				       union perf_event *event)
+{
+	dump_printf(": unhandled!\n");
+	if (perf_data__is_pipe(session->data))
+		skipn(perf_data__fd(session->data), event->auxtrace.size);
+	return event->auxtrace.size;
+}
+
+static int process_event_op2_stub(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+
+static
+int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_thread_map(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
+			       union perf_event *event __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_cpu_map(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
+				   union perf_event *event __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat_config(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
+			     union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
+				   union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat_round(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
+					union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_time_conv(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
+						       union perf_event *event __maybe_unused,
+						       u64 file_offset __maybe_unused,
+						       const char *file_path __maybe_unused)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
+
+void perf_tool__fill_defaults(struct perf_tool *tool)
+{
+	if (tool->sample == NULL)
+		tool->sample = process_event_sample_stub;
+	if (tool->mmap == NULL)
+		tool->mmap = process_event_stub;
+	if (tool->mmap2 == NULL)
+		tool->mmap2 = process_event_stub;
+	if (tool->comm == NULL)
+		tool->comm = process_event_stub;
+	if (tool->namespaces == NULL)
+		tool->namespaces = process_event_stub;
+	if (tool->cgroup == NULL)
+		tool->cgroup = process_event_stub;
+	if (tool->fork == NULL)
+		tool->fork = process_event_stub;
+	if (tool->exit == NULL)
+		tool->exit = process_event_stub;
+	if (tool->lost == NULL)
+		tool->lost = perf_event__process_lost;
+	if (tool->lost_samples == NULL)
+		tool->lost_samples = perf_event__process_lost_samples;
+	if (tool->aux == NULL)
+		tool->aux = perf_event__process_aux;
+	if (tool->itrace_start == NULL)
+		tool->itrace_start = perf_event__process_itrace_start;
+	if (tool->context_switch == NULL)
+		tool->context_switch = perf_event__process_switch;
+	if (tool->ksymbol == NULL)
+		tool->ksymbol = perf_event__process_ksymbol;
+	if (tool->bpf == NULL)
+		tool->bpf = perf_event__process_bpf;
+	if (tool->text_poke == NULL)
+		tool->text_poke = perf_event__process_text_poke;
+	if (tool->aux_output_hw_id == NULL)
+		tool->aux_output_hw_id = perf_event__process_aux_output_hw_id;
+	if (tool->read == NULL)
+		tool->read = process_event_sample_stub;
+	if (tool->throttle == NULL)
+		tool->throttle = process_event_stub;
+	if (tool->unthrottle == NULL)
+		tool->unthrottle = process_event_stub;
+	if (tool->attr == NULL)
+		tool->attr = process_event_synth_attr_stub;
+	if (tool->event_update == NULL)
+		tool->event_update = process_event_synth_event_update_stub;
+	if (tool->tracing_data == NULL)
+		tool->tracing_data = process_event_synth_tracing_data_stub;
+	if (tool->build_id == NULL)
+		tool->build_id = process_event_op2_stub;
+	if (tool->finished_round == NULL) {
+		if (tool->ordered_events)
+			tool->finished_round = perf_event__process_finished_round;
+		else
+			tool->finished_round = process_finished_round_stub;
+	}
+	if (tool->id_index == NULL)
+		tool->id_index = process_event_op2_stub;
+	if (tool->auxtrace_info == NULL)
+		tool->auxtrace_info = process_event_op2_stub;
+	if (tool->auxtrace == NULL)
+		tool->auxtrace = process_event_auxtrace_stub;
+	if (tool->auxtrace_error == NULL)
+		tool->auxtrace_error = process_event_op2_stub;
+	if (tool->thread_map == NULL)
+		tool->thread_map = process_event_thread_map_stub;
+	if (tool->cpu_map == NULL)
+		tool->cpu_map = process_event_cpu_map_stub;
+	if (tool->stat_config == NULL)
+		tool->stat_config = process_event_stat_config_stub;
+	if (tool->stat == NULL)
+		tool->stat = process_stat_stub;
+	if (tool->stat_round == NULL)
+		tool->stat_round = process_stat_round_stub;
+	if (tool->time_conv == NULL)
+		tool->time_conv = process_event_time_conv_stub;
+	if (tool->feature == NULL)
+		tool->feature = process_event_op2_stub;
+	if (tool->compressed == NULL)
+		tool->compressed = perf_session__process_compressed_event;
+	if (tool->finished_init == NULL)
+		tool->finished_init = process_event_op2_stub;
+}
+
+static void swap_sample_id_all(union perf_event *event, void *data)
+{
+	void *end = (void *) event + event->header.size;
+	int size = end - data;
+
+	BUG_ON(size % sizeof(u64));
+	mem_bswap_64(data, size);
+}
+
+static void perf_event__all64_swap(union perf_event *event,
+				   bool sample_id_all __maybe_unused)
+{
+	struct perf_event_header *hdr = &event->header;
+	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
+}
+
+static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
+{
+	event->comm.pid = bswap_32(event->comm.pid);
+	event->comm.tid = bswap_32(event->comm.tid);
+
+	if (sample_id_all) {
+		void *data = &event->comm.comm;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
+static void perf_event__mmap_swap(union perf_event *event,
+				  bool sample_id_all)
+{
+	event->mmap.pid	  = bswap_32(event->mmap.pid);
+	event->mmap.tid	  = bswap_32(event->mmap.tid);
+	event->mmap.start = bswap_64(event->mmap.start);
+	event->mmap.len	  = bswap_64(event->mmap.len);
+	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
+
+	if (sample_id_all) {
+		void *data = &event->mmap.filename;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
+static void perf_event__mmap2_swap(union perf_event *event,
+				  bool sample_id_all)
+{
+	event->mmap2.pid   = bswap_32(event->mmap2.pid);
+	event->mmap2.tid   = bswap_32(event->mmap2.tid);
+	event->mmap2.start = bswap_64(event->mmap2.start);
+	event->mmap2.len   = bswap_64(event->mmap2.len);
+	event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
+
+	if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
+		event->mmap2.maj   = bswap_32(event->mmap2.maj);
+		event->mmap2.min   = bswap_32(event->mmap2.min);
+		event->mmap2.ino   = bswap_64(event->mmap2.ino);
+		event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
+	}
+
+	if (sample_id_all) {
+		void *data = &event->mmap2.filename;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
+{
+	event->fork.pid	 = bswap_32(event->fork.pid);
+	event->fork.tid	 = bswap_32(event->fork.tid);
+	event->fork.ppid = bswap_32(event->fork.ppid);
+	event->fork.ptid = bswap_32(event->fork.ptid);
+	event->fork.time = bswap_64(event->fork.time);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->fork + 1);
+}
+
+static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
+{
+	event->read.pid		 = bswap_32(event->read.pid);
+	event->read.tid		 = bswap_32(event->read.tid);
+	event->read.value	 = bswap_64(event->read.value);
+	event->read.time_enabled = bswap_64(event->read.time_enabled);
+	event->read.time_running = bswap_64(event->read.time_running);
+	event->read.id		 = bswap_64(event->read.id);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->read + 1);
+}
+
+static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
+{
+	event->aux.aux_offset = bswap_64(event->aux.aux_offset);
+	event->aux.aux_size   = bswap_64(event->aux.aux_size);
+	event->aux.flags      = bswap_64(event->aux.flags);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->aux + 1);
+}
+
+static void perf_event__itrace_start_swap(union perf_event *event,
+					  bool sample_id_all)
+{
+	event->itrace_start.pid	 = bswap_32(event->itrace_start.pid);
+	event->itrace_start.tid	 = bswap_32(event->itrace_start.tid);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->itrace_start + 1);
+}
+
+static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
+{
+	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+		event->context_switch.next_prev_pid =
+				bswap_32(event->context_switch.next_prev_pid);
+		event->context_switch.next_prev_tid =
+				bswap_32(event->context_switch.next_prev_tid);
+	}
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->context_switch + 1);
+}
+
+static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
+{
+	event->text_poke.addr    = bswap_64(event->text_poke.addr);
+	event->text_poke.old_len = bswap_16(event->text_poke.old_len);
+	event->text_poke.new_len = bswap_16(event->text_poke.new_len);
+
+	if (sample_id_all) {
+		size_t len = sizeof(event->text_poke.old_len) +
+			     sizeof(event->text_poke.new_len) +
+			     event->text_poke.old_len +
+			     event->text_poke.new_len;
+		void *data = &event->text_poke.old_len;
+
+		data += PERF_ALIGN(len, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
+static void perf_event__throttle_swap(union perf_event *event,
+				      bool sample_id_all)
+{
+	event->throttle.time	  = bswap_64(event->throttle.time);
+	event->throttle.id	  = bswap_64(event->throttle.id);
+	event->throttle.stream_id = bswap_64(event->throttle.stream_id);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->throttle + 1);
+}
+
+static void perf_event__namespaces_swap(union perf_event *event,
+					bool sample_id_all)
+{
+	u64 i;
+
+	event->namespaces.pid		= bswap_32(event->namespaces.pid);
+	event->namespaces.tid		= bswap_32(event->namespaces.tid);
+	event->namespaces.nr_namespaces	= bswap_64(event->namespaces.nr_namespaces);
+
+	for (i = 0; i < event->namespaces.nr_namespaces; i++) {
+		struct perf_ns_link_info *ns = &event->namespaces.link_info[i];
+
+		ns->dev = bswap_64(ns->dev);
+		ns->ino = bswap_64(ns->ino);
+	}
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->namespaces.link_info[i]);
+}
+
+static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all)
+{
+	event->cgroup.id = bswap_64(event->cgroup.id);
+
+	if (sample_id_all) {
+		void *data = &event->cgroup.path;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
+static u8 revbyte(u8 b)
+{
+	int rev = (b >> 4) | ((b & 0xf) << 4);
+	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
+	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
+	return (u8) rev;
+}
+
+/*
+ * XXX this is hack in attempt to carry flags bitfield
+ * through endian village. ABI says:
+ *
+ * Bit-fields are allocated from right to left (least to most significant)
+ * on little-endian implementations and from left to right (most to least
+ * significant) on big-endian implementations.
+ *
+ * The above seems to be byte specific, so we need to reverse each
+ * byte of the bitfield. 'Internet' also says this might be implementation
+ * specific and we probably need proper fix and carry perf_event_attr
+ * bitfield flags in separate data file FEAT_ section. Thought this seems
+ * to work for now.
+ */
+static void swap_bitfield(u8 *p, unsigned len)
+{
+	unsigned i;
+
+	for (i = 0; i < len; i++) {
+		*p = revbyte(*p);
+		p++;
+	}
+}
+
+/* exported for swapping attributes in file header */
+void perf_event__attr_swap(struct perf_event_attr *attr)
+{
+	attr->type		= bswap_32(attr->type);
+	attr->size		= bswap_32(attr->size);
+
+#define bswap_safe(f, n) 					\
+	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
+		       sizeof(attr->f) * (n)))
+#define bswap_field(f, sz) 			\
+do { 						\
+	if (bswap_safe(f, 0))			\
+		attr->f = bswap_##sz(attr->f);	\
+} while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+	bswap_field_64(config);
+	bswap_field_64(sample_period);
+	bswap_field_64(sample_type);
+	bswap_field_64(read_format);
+	bswap_field_32(wakeup_events);
+	bswap_field_32(bp_type);
+	bswap_field_64(bp_addr);
+	bswap_field_64(bp_len);
+	bswap_field_64(branch_sample_type);
+	bswap_field_64(sample_regs_user);
+	bswap_field_32(sample_stack_user);
+	bswap_field_32(aux_watermark);
+	bswap_field_16(sample_max_stack);
+	bswap_field_32(aux_sample_size);
+
+	/*
+	 * After read_format are bitfields. Check read_format because
+	 * we are unable to use offsetof on bitfield.
+	 */
+	if (bswap_safe(read_format, 1))
+		swap_bitfield((u8 *) (&attr->read_format + 1),
+			      sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
+}
+
+static void perf_event__hdr_attr_swap(union perf_event *event,
+				      bool sample_id_all __maybe_unused)
+{
+	size_t size;
+
+	perf_event__attr_swap(&event->attr.attr);
+
+	size = event->header.size;
+	size -= (void *)&event->attr.id - (void *)event;
+	mem_bswap_64(event->attr.id, size);
+}
+
+static void perf_event__event_update_swap(union perf_event *event,
+					  bool sample_id_all __maybe_unused)
+{
+	event->event_update.type = bswap_64(event->event_update.type);
+	event->event_update.id   = bswap_64(event->event_update.id);
+}
+
+static void perf_event__event_type_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	event->event_type.event_type.event_id =
+		bswap_64(event->event_type.event_type.event_id);
+}
+
+static void perf_event__tracing_data_swap(union perf_event *event,
+					  bool sample_id_all __maybe_unused)
+{
+	event->tracing_data.size = bswap_32(event->tracing_data.size);
+}
+
+static void perf_event__auxtrace_info_swap(union perf_event *event,
+					   bool sample_id_all __maybe_unused)
+{
+	size_t size;
+
+	event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
+
+	size = event->header.size;
+	size -= (void *)&event->auxtrace_info.priv - (void *)event;
+	mem_bswap_64(event->auxtrace_info.priv, size);
+}
+
+static void perf_event__auxtrace_swap(union perf_event *event,
+				      bool sample_id_all __maybe_unused)
+{
+	event->auxtrace.size      = bswap_64(event->auxtrace.size);
+	event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
+	event->auxtrace.reference = bswap_64(event->auxtrace.reference);
+	event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
+	event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
+	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
+}
+
+static void perf_event__auxtrace_error_swap(union perf_event *event,
+					    bool sample_id_all __maybe_unused)
+{
+	event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
+	event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
+	event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
+	event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
+	event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
+	event->auxtrace_error.fmt  = bswap_32(event->auxtrace_error.fmt);
+	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
+	if (event->auxtrace_error.fmt)
+		event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
+	if (event->auxtrace_error.fmt >= 2) {
+		event->auxtrace_error.machine_pid = bswap_32(event->auxtrace_error.machine_pid);
+		event->auxtrace_error.vcpu = bswap_32(event->auxtrace_error.vcpu);
+	}
+}
+
+static void perf_event__thread_map_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	unsigned i;
+
+	event->thread_map.nr = bswap_64(event->thread_map.nr);
+
+	for (i = 0; i < event->thread_map.nr; i++)
+		event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
+}
+
+static void perf_event__cpu_map_swap(union perf_event *event,
+				     bool sample_id_all __maybe_unused)
+{
+	struct perf_record_cpu_map_data *data = &event->cpu_map.data;
+
+	data->type = bswap_16(data->type);
+
+	switch (data->type) {
+	case PERF_CPU_MAP__CPUS:
+		data->cpus_data.nr = bswap_16(data->cpus_data.nr);
+
+		for (unsigned i = 0; i < data->cpus_data.nr; i++)
+			data->cpus_data.cpu[i] = bswap_16(data->cpus_data.cpu[i]);
+		break;
+	case PERF_CPU_MAP__MASK:
+		data->mask32_data.long_size = bswap_16(data->mask32_data.long_size);
+
+		switch (data->mask32_data.long_size) {
+		case 4:
+			data->mask32_data.nr = bswap_16(data->mask32_data.nr);
+			for (unsigned i = 0; i < data->mask32_data.nr; i++)
+				data->mask32_data.mask[i] = bswap_32(data->mask32_data.mask[i]);
+			break;
+		case 8:
+			data->mask64_data.nr = bswap_16(data->mask64_data.nr);
+			for (unsigned i = 0; i < data->mask64_data.nr; i++)
+				data->mask64_data.mask[i] = bswap_64(data->mask64_data.mask[i]);
+			break;
+		default:
+			pr_err("cpu_map swap: unsupported long size\n");
+		}
+		break;
+	case PERF_CPU_MAP__RANGE_CPUS:
+		data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu);
+		data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu);
+		break;
+	default:
+		break;
+	}
+}
+
+static void perf_event__stat_config_swap(union perf_event *event,
+					 bool sample_id_all __maybe_unused)
+{
+	u64 size;
+
+	size  = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
+	size += 1; /* nr item itself */
+	mem_bswap_64(&event->stat_config.nr, size);
+}
+
+static void perf_event__stat_swap(union perf_event *event,
+				  bool sample_id_all __maybe_unused)
+{
+	event->stat.id     = bswap_64(event->stat.id);
+	event->stat.thread = bswap_32(event->stat.thread);
+	event->stat.cpu    = bswap_32(event->stat.cpu);
+	event->stat.val    = bswap_64(event->stat.val);
+	event->stat.ena    = bswap_64(event->stat.ena);
+	event->stat.run    = bswap_64(event->stat.run);
+}
+
+static void perf_event__stat_round_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	event->stat_round.type = bswap_64(event->stat_round.type);
+	event->stat_round.time = bswap_64(event->stat_round.time);
+}
+
+static void perf_event__time_conv_swap(union perf_event *event,
+				       bool sample_id_all __maybe_unused)
+{
+	event->time_conv.time_shift = bswap_64(event->time_conv.time_shift);
+	event->time_conv.time_mult  = bswap_64(event->time_conv.time_mult);
+	event->time_conv.time_zero  = bswap_64(event->time_conv.time_zero);
+
+	if (event_contains(event->time_conv, time_cycles)) {
+		event->time_conv.time_cycles = bswap_64(event->time_conv.time_cycles);
+		event->time_conv.time_mask = bswap_64(event->time_conv.time_mask);
+	}
+}
+
+typedef void (*perf_event__swap_op)(union perf_event *event,
+				    bool sample_id_all);
+
+static perf_event__swap_op perf_event__swap_ops[] = {
+	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
+	[PERF_RECORD_MMAP2]		  = perf_event__mmap2_swap,
+	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
+	[PERF_RECORD_FORK]		  = perf_event__task_swap,
+	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
+	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
+	[PERF_RECORD_READ]		  = perf_event__read_swap,
+	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
+	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
+	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
+	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
+	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
+	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
+	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
+	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
+	[PERF_RECORD_NAMESPACES]	  = perf_event__namespaces_swap,
+	[PERF_RECORD_CGROUP]		  = perf_event__cgroup_swap,
+	[PERF_RECORD_TEXT_POKE]		  = perf_event__text_poke_swap,
+	[PERF_RECORD_AUX_OUTPUT_HW_ID]	  = perf_event__all64_swap,
+	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
+	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
+	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
+	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
+	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
+	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
+	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
+	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
+	[PERF_RECORD_THREAD_MAP]	  = perf_event__thread_map_swap,
+	[PERF_RECORD_CPU_MAP]		  = perf_event__cpu_map_swap,
+	[PERF_RECORD_STAT_CONFIG]	  = perf_event__stat_config_swap,
+	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
+	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
+	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
+	[PERF_RECORD_TIME_CONV]		  = perf_event__time_conv_swap,
+	[PERF_RECORD_HEADER_MAX]	  = NULL,
+};
+
+/*
+ * When perf record finishes a pass on every buffers, it records this pseudo
+ * event.
+ * We record the max timestamp t found in the pass n.
+ * Assuming these timestamps are monotonic across cpus, we know that if
+ * a buffer still has events with timestamps below t, they will be all
+ * available and then read in the pass n + 1.
+ * Hence when we start to read the pass n + 2, we can safely flush every
+ * events with timestamps below t.
+ *
+ *    ============ PASS n =================
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          1          |         2
+ *          2          |         3
+ *          -          |         4  <--- max recorded
+ *
+ *    ============ PASS n + 1 ==============
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          3          |         5
+ *          4          |         6
+ *          5          |         7 <---- max recorded
+ *
+ *      Flush every events below timestamp 4
+ *
+ *    ============ PASS n + 2 ==============
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          6          |         8
+ *          7          |         9
+ *          -          |         10
+ *
+ *      Flush every events below timestamp 7
+ *      etc...
+ */
+int perf_event__process_finished_round(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event __maybe_unused,
+				       struct ordered_events *oe)
+{
+	if (dump_trace)
+		fprintf(stdout, "\n");
+	return ordered_events__flush(oe, OE_FLUSH__ROUND);
+}
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+			      u64 timestamp, u64 file_offset, const char *file_path)
+{
+	return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset, file_path);
+}
+
+static void callchain__lbr_callstack_printf(struct perf_sample *sample)
+{
+	struct ip_callchain *callchain = sample->callchain;
+	struct branch_stack *lbr_stack = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	u64 kernel_callchain_nr = callchain->nr;
+	unsigned int i;
+
+	for (i = 0; i < kernel_callchain_nr; i++) {
+		if (callchain->ips[i] == PERF_CONTEXT_USER)
+			break;
+	}
+
+	if ((i != kernel_callchain_nr) && lbr_stack->nr) {
+		u64 total_nr;
+		/*
+		 * LBR callstack can only get user call chain,
+		 * i is kernel call chain number,
+		 * 1 is PERF_CONTEXT_USER.
+		 *
+		 * The user call chain is stored in LBR registers.
+		 * LBR are pair registers. The caller is stored
+		 * in "from" register, while the callee is stored
+		 * in "to" register.
+		 * For example, there is a call stack
+		 * "A"->"B"->"C"->"D".
+		 * The LBR registers will be recorded like
+		 * "C"->"D", "B"->"C", "A"->"B".
+		 * So only the first "to" register and all "from"
+		 * registers are needed to construct the whole stack.
+		 */
+		total_nr = i + 1 + lbr_stack->nr + 1;
+		kernel_callchain_nr = i + 1;
+
+		printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);
+
+		for (i = 0; i < kernel_callchain_nr; i++)
+			printf("..... %2d: %016" PRIx64 "\n",
+			       i, callchain->ips[i]);
+
+		printf("..... %2d: %016" PRIx64 "\n",
+		       (int)(kernel_callchain_nr), entries[0].to);
+		for (i = 0; i < lbr_stack->nr; i++)
+			printf("..... %2d: %016" PRIx64 "\n",
+			       (int)(i + kernel_callchain_nr + 1), entries[i].from);
+	}
+}
+
+static void callchain__printf(struct evsel *evsel,
+			      struct perf_sample *sample)
+{
+	unsigned int i;
+	struct ip_callchain *callchain = sample->callchain;
+
+	if (evsel__has_branch_callstack(evsel))
+		callchain__lbr_callstack_printf(sample);
+
+	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
+
+	for (i = 0; i < callchain->nr; i++)
+		printf("..... %2d: %016" PRIx64 "\n",
+		       i, callchain->ips[i]);
+}
+
+static void branch_stack__printf(struct perf_sample *sample, bool callstack)
+{
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	uint64_t i;
+
+	if (!callstack) {
+		printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+	} else {
+		/* the reason of adding 1 to nr is because after expanding
+		 * branch stack it generates nr + 1 callstack records. e.g.,
+		 *         B()->C()
+		 *         A()->B()
+		 * the final callstack should be:
+		 *         C()
+		 *         B()
+		 *         A()
+		 */
+		printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+	}
+
+	for (i = 0; i < sample->branch_stack->nr; i++) {
+		struct branch_entry *e = &entries[i];
+
+		if (!callstack) {
+			printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n",
+				i, e->from, e->to,
+				(unsigned short)e->flags.cycles,
+				e->flags.mispred ? "M" : " ",
+				e->flags.predicted ? "P" : " ",
+				e->flags.abort ? "A" : " ",
+				e->flags.in_tx ? "T" : " ",
+				(unsigned)e->flags.reserved,
+				get_branch_type(e));
+		} else {
+			if (i == 0) {
+				printf("..... %2"PRIu64": %016" PRIx64 "\n"
+				       "..... %2"PRIu64": %016" PRIx64 "\n",
+						i, e->to, i+1, e->from);
+			} else {
+				printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+			}
+		}
+	}
+}
+
+static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
+{
+	unsigned rid, i = 0;
+
+	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
+		u64 val = regs[i++];
+
+		printf(".... %-5s 0x%016" PRIx64 "\n",
+		       perf_reg_name(rid, arch), val);
+	}
+}
+
+static const char *regs_abi[] = {
+	[PERF_SAMPLE_REGS_ABI_NONE] = "none",
+	[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
+	[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
+};
+
+static inline const char *regs_dump_abi(struct regs_dump *d)
+{
+	if (d->abi > PERF_SAMPLE_REGS_ABI_64)
+		return "unknown";
+
+	return regs_abi[d->abi];
+}
+
+static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
+{
+	u64 mask = regs->mask;
+
+	printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
+	       type,
+	       mask,
+	       regs_dump_abi(regs));
+
+	regs_dump__printf(mask, regs->regs, arch);
+}
+
+static void regs_user__printf(struct perf_sample *sample, const char *arch)
+{
+	struct regs_dump *user_regs = &sample->user_regs;
+
+	if (user_regs->regs)
+		regs__printf("user", user_regs, arch);
+}
+
+static void regs_intr__printf(struct perf_sample *sample, const char *arch)
+{
+	struct regs_dump *intr_regs = &sample->intr_regs;
+
+	if (intr_regs->regs)
+		regs__printf("intr", intr_regs, arch);
+}
+
+static void stack_user__printf(struct stack_dump *dump)
+{
+	printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
+	       dump->size, dump->offset);
+}
+
+static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
+{
+	u64 sample_type = __evlist__combined_sample_type(evlist);
+
+	if (event->header.type != PERF_RECORD_SAMPLE &&
+	    !evlist__sample_id_all(evlist)) {
+		fputs("-1 -1 ", stdout);
+		return;
+	}
+
+	if ((sample_type & PERF_SAMPLE_CPU))
+		printf("%u ", sample->cpu);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		printf("%" PRIu64 " ", sample->time);
+}
+
+static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+{
+	printf("... sample_read:\n");
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		printf("...... time enabled %016" PRIx64 "\n",
+		       sample->read.time_enabled);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		printf("...... time running %016" PRIx64 "\n",
+		       sample->read.time_running);
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		struct sample_read_value *value = sample->read.group.values;
+
+		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);
+
+		sample_read_group__for_each(value, sample->read.group.nr, read_format) {
+			printf("..... id %016" PRIx64
+			       ", value %016" PRIx64,
+			       value->id, value->value);
+			if (read_format & PERF_FORMAT_LOST)
+				printf(", lost %" PRIu64, value->lost);
+			printf("\n");
+		}
+	} else {
+		printf("..... id %016" PRIx64 ", value %016" PRIx64,
+			sample->read.one.id, sample->read.one.value);
+		if (read_format & PERF_FORMAT_LOST)
+			printf(", lost %" PRIu64, sample->read.one.lost);
+		printf("\n");
+	}
+}
+
+static void dump_event(struct evlist *evlist, union perf_event *event,
+		       u64 file_offset, struct perf_sample *sample,
+		       const char *file_path)
+{
+	if (!dump_trace)
+		return;
+
+	printf("\n%#" PRIx64 "@%s [%#x]: event: %d\n",
+	       file_offset, file_path, event->header.size, event->header.type);
+
+	trace_event(event);
+	if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
+		evlist->trace_event_sample_raw(evlist, event, sample);
+
+	if (sample)
+		evlist__print_tstamp(evlist, event, sample);
+
+	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
+	       event->header.size, perf_event__name(event->header.type));
+}
+
+char *get_page_size_name(u64 size, char *str)
+{
+	if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size))
+		snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A");
+
+	return str;
+}
+
+static void dump_sample(struct evsel *evsel, union perf_event *event,
+			struct perf_sample *sample, const char *arch)
+{
+	u64 sample_type;
+	char str[PAGE_SIZE_NAME_LEN];
+
+	if (!dump_trace)
+		return;
+
+	printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
+	       event->header.misc, sample->pid, sample->tid, sample->ip,
+	       sample->period, sample->addr);
+
+	sample_type = evsel->core.attr.sample_type;
+
+	if (evsel__has_callchain(evsel))
+		callchain__printf(evsel, sample);
+
+	if (evsel__has_br_stack(evsel))
+		branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
+
+	if (sample_type & PERF_SAMPLE_REGS_USER)
+		regs_user__printf(sample, arch);
+
+	if (sample_type & PERF_SAMPLE_REGS_INTR)
+		regs_intr__printf(sample, arch);
+
+	if (sample_type & PERF_SAMPLE_STACK_USER)
+		stack_user__printf(&sample->user_stack);
+
+	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+		printf("... weight: %" PRIu64 "", sample->weight);
+			if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+				printf(",0x%"PRIx16"", sample->ins_lat);
+				printf(",0x%"PRIx16"", sample->p_stage_cyc);
+			}
+		printf("\n");
+	}
+
+	if (sample_type & PERF_SAMPLE_DATA_SRC)
+		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
+
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
+
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str));
+
+	if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+		printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str));
+
+	if (sample_type & PERF_SAMPLE_TRANSACTION)
+		printf("... transaction: %" PRIx64 "\n", sample->transaction);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		sample_read__printf(sample, evsel->core.attr.read_format);
+}
+
+static void dump_read(struct evsel *evsel, union perf_event *event)
+{
+	struct perf_record_read *read_event = &event->read;
+	u64 read_format;
+
+	if (!dump_trace)
+		return;
+
+	printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
+	       evsel__name(evsel), event->read.value);
+
+	if (!evsel)
+		return;
+
+	read_format = evsel->core.attr.read_format;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
+
+	if (read_format & PERF_FORMAT_ID)
+		printf("... id           : %" PRI_lu64 "\n", read_event->id);
+
+	if (read_format & PERF_FORMAT_LOST)
+		printf("... lost         : %" PRI_lu64 "\n", read_event->lost);
+}
+
+static struct machine *machines__find_for_cpumode(struct machines *machines,
+					       union perf_event *event,
+					       struct perf_sample *sample)
+{
+	if (perf_guest &&
+	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
+	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
+		u32 pid;
+
+		if (sample->machine_pid)
+			pid = sample->machine_pid;
+		else if (event->header.type == PERF_RECORD_MMAP
+		    || event->header.type == PERF_RECORD_MMAP2)
+			pid = event->mmap.pid;
+		else
+			pid = sample->pid;
+
+		/*
+		 * Guest code machine is created as needed and does not use
+		 * DEFAULT_GUEST_KERNEL_ID.
+		 */
+		if (symbol_conf.guest_code)
+			return machines__findnew(machines, pid);
+
+		return machines__find_guest(machines, pid);
+	}
+
+	return &machines->host;
+}
+
+static int deliver_sample_value(struct evlist *evlist,
+				struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct sample_read_value *v,
+				struct machine *machine)
+{
+	struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
+	struct evsel *evsel;
+
+	if (sid) {
+		sample->id     = v->id;
+		sample->period = v->value - sid->period;
+		sid->period    = v->value;
+	}
+
+	if (!sid || sid->evsel == NULL) {
+		++evlist->stats.nr_unknown_id;
+		return 0;
+	}
+
+	/*
+	 * There's no reason to deliver sample
+	 * for zero period, bail out.
+	 */
+	if (!sample->period)
+		return 0;
+
+	evsel = container_of(sid->evsel, struct evsel, core);
+	return tool->sample(tool, event, sample, evsel, machine);
+}
+
+static int deliver_sample_group(struct evlist *evlist,
+				struct perf_tool *tool,
+				union  perf_event *event,
+				struct perf_sample *sample,
+				struct machine *machine,
+				u64 read_format)
+{
+	int ret = -EINVAL;
+	struct sample_read_value *v = sample->read.group.values;
+
+	sample_read_group__for_each(v, sample->read.group.nr, read_format) {
+		ret = deliver_sample_value(evlist, tool, event, sample, v,
+					   machine);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
+				  union  perf_event *event, struct perf_sample *sample,
+				  struct evsel *evsel, struct machine *machine)
+{
+	/* We know evsel != NULL. */
+	u64 sample_type = evsel->core.attr.sample_type;
+	u64 read_format = evsel->core.attr.read_format;
+
+	/* Standard sample delivery. */
+	if (!(sample_type & PERF_SAMPLE_READ))
+		return tool->sample(tool, event, sample, evsel, machine);
+
+	/* For PERF_SAMPLE_READ we have either single or group mode. */
+	if (read_format & PERF_FORMAT_GROUP)
+		return deliver_sample_group(evlist, tool, event, sample,
+					    machine, read_format);
+	else
+		return deliver_sample_value(evlist, tool, event, sample,
+					    &sample->read.one, machine);
+}
+
+static int machines__deliver_event(struct machines *machines,
+				   struct evlist *evlist,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct perf_tool *tool, u64 file_offset,
+				   const char *file_path)
+{
+	struct evsel *evsel;
+	struct machine *machine;
+
+	dump_event(evlist, event, file_offset, sample, file_path);
+
+	evsel = evlist__id2evsel(evlist, sample->id);
+
+	machine = machines__find_for_cpumode(machines, event, sample);
+
+	switch (event->header.type) {
+	case PERF_RECORD_SAMPLE:
+		if (evsel == NULL) {
+			++evlist->stats.nr_unknown_id;
+			return 0;
+		}
+		if (machine == NULL) {
+			++evlist->stats.nr_unprocessable_samples;
+			dump_sample(evsel, event, sample, perf_env__arch(NULL));
+			return 0;
+		}
+		dump_sample(evsel, event, sample, perf_env__arch(machine->env));
+		return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
+	case PERF_RECORD_MMAP:
+		return tool->mmap(tool, event, sample, machine);
+	case PERF_RECORD_MMAP2:
+		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+			++evlist->stats.nr_proc_map_timeout;
+		return tool->mmap2(tool, event, sample, machine);
+	case PERF_RECORD_COMM:
+		return tool->comm(tool, event, sample, machine);
+	case PERF_RECORD_NAMESPACES:
+		return tool->namespaces(tool, event, sample, machine);
+	case PERF_RECORD_CGROUP:
+		return tool->cgroup(tool, event, sample, machine);
+	case PERF_RECORD_FORK:
+		return tool->fork(tool, event, sample, machine);
+	case PERF_RECORD_EXIT:
+		return tool->exit(tool, event, sample, machine);
+	case PERF_RECORD_LOST:
+		if (tool->lost == perf_event__process_lost)
+			evlist->stats.total_lost += event->lost.lost;
+		return tool->lost(tool, event, sample, machine);
+	case PERF_RECORD_LOST_SAMPLES:
+		if (tool->lost_samples == perf_event__process_lost_samples)
+			evlist->stats.total_lost_samples += event->lost_samples.lost;
+		return tool->lost_samples(tool, event, sample, machine);
+	case PERF_RECORD_READ:
+		dump_read(evsel, event);
+		return tool->read(tool, event, sample, evsel, machine);
+	case PERF_RECORD_THROTTLE:
+		return tool->throttle(tool, event, sample, machine);
+	case PERF_RECORD_UNTHROTTLE:
+		return tool->unthrottle(tool, event, sample, machine);
+	case PERF_RECORD_AUX:
+		if (tool->aux == perf_event__process_aux) {
+			if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+				evlist->stats.total_aux_lost += 1;
+			if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
+				evlist->stats.total_aux_partial += 1;
+			if (event->aux.flags & PERF_AUX_FLAG_COLLISION)
+				evlist->stats.total_aux_collision += 1;
+		}
+		return tool->aux(tool, event, sample, machine);
+	case PERF_RECORD_ITRACE_START:
+		return tool->itrace_start(tool, event, sample, machine);
+	case PERF_RECORD_SWITCH:
+	case PERF_RECORD_SWITCH_CPU_WIDE:
+		return tool->context_switch(tool, event, sample, machine);
+	case PERF_RECORD_KSYMBOL:
+		return tool->ksymbol(tool, event, sample, machine);
+	case PERF_RECORD_BPF_EVENT:
+		return tool->bpf(tool, event, sample, machine);
+	case PERF_RECORD_TEXT_POKE:
+		return tool->text_poke(tool, event, sample, machine);
+	case PERF_RECORD_AUX_OUTPUT_HW_ID:
+		return tool->aux_output_hw_id(tool, event, sample, machine);
+	default:
+		++evlist->stats.nr_unknown_events;
+		return -1;
+	}
+}
+
+static int perf_session__deliver_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_tool *tool,
+				       u64 file_offset,
+				       const char *file_path)
+{
+	struct perf_sample sample;
+	int ret = evlist__parse_sample(session->evlist, event, &sample);
+
+	if (ret) {
+		pr_err("Can't parse sample, err = %d\n", ret);
+		return ret;
+	}
+
+	ret = auxtrace__process_event(session, event, &sample, tool);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		return 0;
+
+	ret = machines__deliver_event(&session->machines, session->evlist,
+				      event, &sample, tool, file_offset, file_path);
+
+	if (dump_trace && sample.aux_sample.size)
+		auxtrace__dump_auxtrace_sample(session, &sample);
+
+	return ret;
+}
+
+static s64 perf_session__process_user_event(struct perf_session *session,
+					    union perf_event *event,
+					    u64 file_offset,
+					    const char *file_path)
+{
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
+	struct perf_sample sample = { .time = 0, };
+	int fd = perf_data__fd(session->data);
+	int err;
+
+	if (event->header.type != PERF_RECORD_COMPRESSED ||
+	    tool->compressed == perf_session__process_compressed_event_stub)
+		dump_event(session->evlist, event, file_offset, &sample, file_path);
+
+	/* These events are processed right away */
+	switch (event->header.type) {
+	case PERF_RECORD_HEADER_ATTR:
+		err = tool->attr(tool, event, &session->evlist);
+		if (err == 0) {
+			perf_session__set_id_hdr_size(session);
+			perf_session__set_comm_exec(session);
+		}
+		return err;
+	case PERF_RECORD_EVENT_UPDATE:
+		return tool->event_update(tool, event, &session->evlist);
+	case PERF_RECORD_HEADER_EVENT_TYPE:
+		/*
+		 * Deprecated, but we need to handle it for sake
+		 * of old data files create in pipe mode.
+		 */
+		return 0;
+	case PERF_RECORD_HEADER_TRACING_DATA:
+		/*
+		 * Setup for reading amidst mmap, but only when we
+		 * are in 'file' mode. The 'pipe' fd is in proper
+		 * place already.
+		 */
+		if (!perf_data__is_pipe(session->data))
+			lseek(fd, file_offset, SEEK_SET);
+		return tool->tracing_data(session, event);
+	case PERF_RECORD_HEADER_BUILD_ID:
+		return tool->build_id(session, event);
+	case PERF_RECORD_FINISHED_ROUND:
+		return tool->finished_round(tool, event, oe);
+	case PERF_RECORD_ID_INDEX:
+		return tool->id_index(session, event);
+	case PERF_RECORD_AUXTRACE_INFO:
+		return tool->auxtrace_info(session, event);
+	case PERF_RECORD_AUXTRACE:
+		/* setup for reading amidst mmap */
+		lseek(fd, file_offset + event->header.size, SEEK_SET);
+		return tool->auxtrace(session, event);
+	case PERF_RECORD_AUXTRACE_ERROR:
+		perf_session__auxtrace_error_inc(session, event);
+		return tool->auxtrace_error(session, event);
+	case PERF_RECORD_THREAD_MAP:
+		return tool->thread_map(session, event);
+	case PERF_RECORD_CPU_MAP:
+		return tool->cpu_map(session, event);
+	case PERF_RECORD_STAT_CONFIG:
+		return tool->stat_config(session, event);
+	case PERF_RECORD_STAT:
+		return tool->stat(session, event);
+	case PERF_RECORD_STAT_ROUND:
+		return tool->stat_round(session, event);
+	case PERF_RECORD_TIME_CONV:
+		session->time_conv = event->time_conv;
+		return tool->time_conv(session, event);
+	case PERF_RECORD_HEADER_FEATURE:
+		return tool->feature(session, event);
+	case PERF_RECORD_COMPRESSED:
+		err = tool->compressed(session, event, file_offset, file_path);
+		if (err)
+			dump_event(session->evlist, event, file_offset, &sample, file_path);
+		return err;
+	case PERF_RECORD_FINISHED_INIT:
+		return tool->finished_init(session, event);
+	default:
+		return -EINVAL;
+	}
+}
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+				      union perf_event *event,
+				      struct perf_sample *sample)
+{
+	struct evlist *evlist = session->evlist;
+	struct perf_tool *tool = session->tool;
+
+	events_stats__inc(&evlist->stats, event->header.type);
+
+	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+		return perf_session__process_user_event(session, event, 0, NULL);
+
+	return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL);
+}
+
+static void event_swap(union perf_event *event, bool sample_id_all)
+{
+	perf_event__swap_op swap;
+
+	swap = perf_event__swap_ops[event->header.type];
+	if (swap)
+		swap(event, sample_id_all);
+}
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+			     void *buf, size_t buf_sz,
+			     union perf_event **event_ptr,
+			     struct perf_sample *sample)
+{
+	union perf_event *event;
+	size_t hdr_sz, rest;
+	int fd;
+
+	if (session->one_mmap && !session->header.needs_swap) {
+		event = file_offset - session->one_mmap_offset +
+			session->one_mmap_addr;
+		goto out_parse_sample;
+	}
+
+	if (perf_data__is_pipe(session->data))
+		return -1;
+
+	fd = perf_data__fd(session->data);
+	hdr_sz = sizeof(struct perf_event_header);
+
+	if (buf_sz < hdr_sz)
+		return -1;
+
+	if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
+	    readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
+		return -1;
+
+	event = (union perf_event *)buf;
+
+	if (session->header.needs_swap)
+		perf_event_header__bswap(&event->header);
+
+	if (event->header.size < hdr_sz || event->header.size > buf_sz)
+		return -1;
+
+	buf += hdr_sz;
+	rest = event->header.size - hdr_sz;
+
+	if (readn(fd, buf, rest) != (ssize_t)rest)
+		return -1;
+
+	if (session->header.needs_swap)
+		event_swap(event, evlist__sample_id_all(session->evlist));
+
+out_parse_sample:
+
+	if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
+	    evlist__parse_sample(session->evlist, event, sample))
+		return -1;
+
+	*event_ptr = event;
+
+	return 0;
+}
+
+int perf_session__peek_events(struct perf_session *session, u64 offset,
+			      u64 size, peek_events_cb_t cb, void *data)
+{
+	u64 max_offset = offset + size;
+	char buf[PERF_SAMPLE_MAX_SIZE];
+	union perf_event *event;
+	int err;
+
+	do {
+		err = perf_session__peek_event(session, offset, buf,
+					       PERF_SAMPLE_MAX_SIZE, &event,
+					       NULL);
+		if (err)
+			return err;
+
+		err = cb(session, event, offset, data);
+		if (err)
+			return err;
+
+		offset += event->header.size;
+		if (event->header.type == PERF_RECORD_AUXTRACE)
+			offset += event->auxtrace.size;
+
+	} while (offset < max_offset);
+
+	return err;
+}
+
+static s64 perf_session__process_event(struct perf_session *session,
+				       union perf_event *event, u64 file_offset,
+				       const char *file_path)
+{
+	struct evlist *evlist = session->evlist;
+	struct perf_tool *tool = session->tool;
+	int ret;
+
+	if (session->header.needs_swap)
+		event_swap(event, evlist__sample_id_all(evlist));
+
+	if (event->header.type >= PERF_RECORD_HEADER_MAX)
+		return -EINVAL;
+
+	events_stats__inc(&evlist->stats, event->header.type);
+
+	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+		return perf_session__process_user_event(session, event, file_offset, file_path);
+
+	if (tool->ordered_events) {
+		u64 timestamp = -1ULL;
+
+		ret = evlist__parse_sample_timestamp(evlist, event, &timestamp);
+		if (ret && ret != -1)
+			return ret;
+
+		ret = perf_session__queue_event(session, event, timestamp, file_offset, file_path);
+		if (ret != -ETIME)
+			return ret;
+	}
+
+	return perf_session__deliver_event(session, event, tool, file_offset, file_path);
+}
+
+void perf_event_header__bswap(struct perf_event_header *hdr)
+{
+	hdr->type = bswap_32(hdr->type);
+	hdr->misc = bswap_16(hdr->misc);
+	hdr->size = bswap_16(hdr->size);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
+{
+	return machine__findnew_thread(&session->machines.host, -1, pid);
+}
+
+int perf_session__register_idle_thread(struct perf_session *session)
+{
+	struct thread *thread = machine__idle_thread(&session->machines.host);
+
+	/* machine__idle_thread() got the thread, so put it */
+	thread__put(thread);
+	return thread ? 0 : -1;
+}
+
+static void
+perf_session__warn_order(const struct perf_session *session)
+{
+	const struct ordered_events *oe = &session->ordered_events;
+	struct evsel *evsel;
+	bool should_warn = true;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->core.attr.write_backward)
+			should_warn = false;
+	}
+
+	if (!should_warn)
+		return;
+	if (oe->nr_unordered_events != 0)
+		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+}
+
+static void perf_session__warn_about_errors(const struct perf_session *session)
+{
+	const struct events_stats *stats = &session->evlist->stats;
+
+	if (session->tool->lost == perf_event__process_lost &&
+	    stats->nr_events[PERF_RECORD_LOST] != 0) {
+		ui__warning("Processed %d events and lost %d chunks!\n\n"
+			    "Check IO/CPU overload!\n\n",
+			    stats->nr_events[0],
+			    stats->nr_events[PERF_RECORD_LOST]);
+	}
+
+	if (session->tool->lost_samples == perf_event__process_lost_samples) {
+		double drop_rate;
+
+		drop_rate = (double)stats->total_lost_samples /
+			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+		if (drop_rate > 0.05) {
+			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
+				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+				    drop_rate * 100.0);
+		}
+	}
+
+	if (session->tool->aux == perf_event__process_aux &&
+	    stats->total_aux_lost != 0) {
+		ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
+			    stats->total_aux_lost,
+			    stats->nr_events[PERF_RECORD_AUX]);
+	}
+
+	if (session->tool->aux == perf_event__process_aux &&
+	    stats->total_aux_partial != 0) {
+		bool vmm_exclusive = false;
+
+		(void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
+		                       &vmm_exclusive);
+
+		ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
+		            "Are you running a KVM guest in the background?%s\n\n",
+			    stats->total_aux_partial,
+			    stats->nr_events[PERF_RECORD_AUX],
+			    vmm_exclusive ?
+			    "\nReloading kvm_intel module with vmm_exclusive=0\n"
+			    "will reduce the gaps to only guest's timeslices." :
+			    "");
+	}
+
+	if (session->tool->aux == perf_event__process_aux &&
+	    stats->total_aux_collision != 0) {
+		ui__warning("AUX data detected collision  %" PRIu64 " times out of %u!\n\n",
+			    stats->total_aux_collision,
+			    stats->nr_events[PERF_RECORD_AUX]);
+	}
+
+	if (stats->nr_unknown_events != 0) {
+		ui__warning("Found %u unknown events!\n\n"
+			    "Is this an older tool processing a perf.data "
+			    "file generated by a more recent tool?\n\n"
+			    "If that is not the case, consider "
+			    "reporting to linux-kernel@vger.kernel.org.\n\n",
+			    stats->nr_unknown_events);
+	}
+
+	if (stats->nr_unknown_id != 0) {
+		ui__warning("%u samples with id not present in the header\n",
+			    stats->nr_unknown_id);
+	}
+
+	if (stats->nr_invalid_chains != 0) {
+		ui__warning("Found invalid callchains!\n\n"
+			    "%u out of %u events were discarded for this reason.\n\n"
+			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+			    stats->nr_invalid_chains,
+			    stats->nr_events[PERF_RECORD_SAMPLE]);
+	}
+
+	if (stats->nr_unprocessable_samples != 0) {
+		ui__warning("%u unprocessable samples recorded.\n"
+			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
+			    stats->nr_unprocessable_samples);
+	}
+
+	perf_session__warn_order(session);
+
+	events_stats__auxtrace_error_warn(stats);
+
+	if (stats->nr_proc_map_timeout != 0) {
+		ui__warning("%d map information files for pre-existing threads were\n"
+			    "not processed, if there are samples for addresses they\n"
+			    "will not be resolved, you may find out which are these\n"
+			    "threads by running with -v and redirecting the output\n"
+			    "to a file.\n"
+			    "The time limit to process proc map is too short?\n"
+			    "Increase it by --proc-map-timeout\n",
+			    stats->nr_proc_map_timeout);
+	}
+}
+
+static int perf_session__flush_thread_stack(struct thread *thread,
+					    void *p __maybe_unused)
+{
+	return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+	return machines__for_each_thread(&session->machines,
+					 perf_session__flush_thread_stack,
+					 NULL);
+}
+
+volatile int session_done;
+
+static int __perf_session__process_decomp_events(struct perf_session *session);
+
+static int __perf_session__process_pipe_events(struct perf_session *session)
+{
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
+	union perf_event *event;
+	uint32_t size, cur_size = 0;
+	void *buf = NULL;
+	s64 skip = 0;
+	u64 head;
+	ssize_t err;
+	void *p;
+
+	perf_tool__fill_defaults(tool);
+
+	head = 0;
+	cur_size = sizeof(union perf_event);
+
+	buf = malloc(cur_size);
+	if (!buf)
+		return -errno;
+	ordered_events__set_copy_on_queue(oe, true);
+more:
+	event = buf;
+	err = perf_data__read(session->data, event,
+			      sizeof(struct perf_event_header));
+	if (err <= 0) {
+		if (err == 0)
+			goto done;
+
+		pr_err("failed to read event header\n");
+		goto out_err;
+	}
+
+	if (session->header.needs_swap)
+		perf_event_header__bswap(&event->header);
+
+	size = event->header.size;
+	if (size < sizeof(struct perf_event_header)) {
+		pr_err("bad event header size\n");
+		goto out_err;
+	}
+
+	if (size > cur_size) {
+		void *new = realloc(buf, size);
+		if (!new) {
+			pr_err("failed to allocate memory to read event\n");
+			goto out_err;
+		}
+		buf = new;
+		cur_size = size;
+		event = buf;
+	}
+	p = event;
+	p += sizeof(struct perf_event_header);
+
+	if (size - sizeof(struct perf_event_header)) {
+		err = perf_data__read(session->data, p,
+				      size - sizeof(struct perf_event_header));
+		if (err <= 0) {
+			if (err == 0) {
+				pr_err("unexpected end of event stream\n");
+				goto done;
+			}
+
+			pr_err("failed to read event data\n");
+			goto out_err;
+		}
+	}
+
+	if ((skip = perf_session__process_event(session, event, head, "pipe")) < 0) {
+		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+		       head, event->header.size, event->header.type);
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	head += size;
+
+	if (skip > 0)
+		head += skip;
+
+	err = __perf_session__process_decomp_events(session);
+	if (err)
+		goto out_err;
+
+	if (!session_done())
+		goto more;
+done:
+	/* do the final flush for ordered samples */
+	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+	if (err)
+		goto out_err;
+	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
+out_err:
+	free(buf);
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
+	ordered_events__free(&session->ordered_events);
+	auxtrace__free_events(session);
+	return err;
+}
+
+static union perf_event *
+prefetch_event(char *buf, u64 head, size_t mmap_size,
+	       bool needs_swap, union perf_event *error)
+{
+	union perf_event *event;
+	u16 event_size;
+
+	/*
+	 * Ensure we have enough space remaining to read
+	 * the size of the event in the headers.
+	 */
+	if (head + sizeof(event->header) > mmap_size)
+		return NULL;
+
+	event = (union perf_event *)(buf + head);
+	if (needs_swap)
+		perf_event_header__bswap(&event->header);
+
+	event_size = event->header.size;
+	if (head + event_size <= mmap_size)
+		return event;
+
+	/* We're not fetching the event so swap back again */
+	if (needs_swap)
+		perf_event_header__bswap(&event->header);
+
+	/* Check if the event fits into the next mmapped buf. */
+	if (event_size <= mmap_size - head % page_size) {
+		/* Remap buf and fetch again. */
+		return NULL;
+	}
+
+	/* Invalid input. Event size should never exceed mmap_size. */
+	pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
+		 " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
+
+	return error;
+}
+
+static union perf_event *
+fetch_mmaped_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
+{
+	return prefetch_event(buf, head, mmap_size, needs_swap, ERR_PTR(-EINVAL));
+}
+
+static union perf_event *
+fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
+{
+	return prefetch_event(buf, head, mmap_size, needs_swap, NULL);
+}
+
+static int __perf_session__process_decomp_events(struct perf_session *session)
+{
+	s64 skip;
+	u64 size;
+	struct decomp *decomp = session->active_decomp->decomp_last;
+
+	if (!decomp)
+		return 0;
+
+	while (decomp->head < decomp->size && !session_done()) {
+		union perf_event *event = fetch_decomp_event(decomp->head, decomp->size, decomp->data,
+							     session->header.needs_swap);
+
+		if (!event)
+			break;
+
+		size = event->header.size;
+
+		if (size < sizeof(struct perf_event_header) ||
+		    (skip = perf_session__process_event(session, event, decomp->file_pos,
+							decomp->file_path)) < 0) {
+			pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+				decomp->file_pos + decomp->head, event->header.size, event->header.type);
+			return -EINVAL;
+		}
+
+		if (skip)
+			size += skip;
+
+		decomp->head += size;
+	}
+
+	return 0;
+}
+
+/*
+ * On 64bit we can mmap the data file in one go. No need for tiny mmap
+ * slices. On 32bit we use 32MB.
+ */
+#if BITS_PER_LONG == 64
+#define MMAP_SIZE ULLONG_MAX
+#define NUM_MMAPS 1
+#else
+#define MMAP_SIZE (32 * 1024 * 1024ULL)
+#define NUM_MMAPS 128
+#endif
+
+struct reader;
+
+typedef s64 (*reader_cb_t)(struct perf_session *session,
+			   union perf_event *event,
+			   u64 file_offset,
+			   const char *file_path);
+
+struct reader {
+	int		 fd;
+	const char	 *path;
+	u64		 data_size;
+	u64		 data_offset;
+	reader_cb_t	 process;
+	bool		 in_place_update;
+	char		 *mmaps[NUM_MMAPS];
+	size_t		 mmap_size;
+	int		 mmap_idx;
+	char		 *mmap_cur;
+	u64		 file_pos;
+	u64		 file_offset;
+	u64		 head;
+	u64		 size;
+	bool		 done;
+	struct zstd_data   zstd_data;
+	struct decomp_data decomp_data;
+};
+
+static int
+reader__init(struct reader *rd, bool *one_mmap)
+{
+	u64 data_size = rd->data_size;
+	char **mmaps = rd->mmaps;
+
+	rd->head = rd->data_offset;
+	data_size += rd->data_offset;
+
+	rd->mmap_size = MMAP_SIZE;
+	if (rd->mmap_size > data_size) {
+		rd->mmap_size = data_size;
+		if (one_mmap)
+			*one_mmap = true;
+	}
+
+	memset(mmaps, 0, sizeof(rd->mmaps));
+
+	if (zstd_init(&rd->zstd_data, 0))
+		return -1;
+	rd->decomp_data.zstd_decomp = &rd->zstd_data;
+
+	return 0;
+}
+
+static void
+reader__release_decomp(struct reader *rd)
+{
+	perf_decomp__release_events(rd->decomp_data.decomp);
+	zstd_fini(&rd->zstd_data);
+}
+
+static int
+reader__mmap(struct reader *rd, struct perf_session *session)
+{
+	int mmap_prot, mmap_flags;
+	char *buf, **mmaps = rd->mmaps;
+	u64 page_offset;
+
+	mmap_prot  = PROT_READ;
+	mmap_flags = MAP_SHARED;
+
+	if (rd->in_place_update) {
+		mmap_prot  |= PROT_WRITE;
+	} else if (session->header.needs_swap) {
+		mmap_prot  |= PROT_WRITE;
+		mmap_flags = MAP_PRIVATE;
+	}
+
+	if (mmaps[rd->mmap_idx]) {
+		munmap(mmaps[rd->mmap_idx], rd->mmap_size);
+		mmaps[rd->mmap_idx] = NULL;
+	}
+
+	page_offset = page_size * (rd->head / page_size);
+	rd->file_offset += page_offset;
+	rd->head -= page_offset;
+
+	buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd,
+		   rd->file_offset);
+	if (buf == MAP_FAILED) {
+		pr_err("failed to mmap file\n");
+		return -errno;
+	}
+	mmaps[rd->mmap_idx] = rd->mmap_cur = buf;
+	rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1);
+	rd->file_pos = rd->file_offset + rd->head;
+	if (session->one_mmap) {
+		session->one_mmap_addr = buf;
+		session->one_mmap_offset = rd->file_offset;
+	}
+
+	return 0;
+}
+
+enum {
+	READER_OK,
+	READER_NODATA,
+};
+
+static int
+reader__read_event(struct reader *rd, struct perf_session *session,
+		   struct ui_progress *prog)
+{
+	u64 size;
+	int err = READER_OK;
+	union perf_event *event;
+	s64 skip;
+
+	event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur,
+				   session->header.needs_swap);
+	if (IS_ERR(event))
+		return PTR_ERR(event);
+
+	if (!event)
+		return READER_NODATA;
+
+	size = event->header.size;
+
+	skip = -EINVAL;
+
+	if (size < sizeof(struct perf_event_header) ||
+	    (skip = rd->process(session, event, rd->file_pos, rd->path)) < 0) {
+		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
+		       rd->file_offset + rd->head, event->header.size,
+		       event->header.type, strerror(-skip));
+		err = skip;
+		goto out;
+	}
+
+	if (skip)
+		size += skip;
+
+	rd->size += size;
+	rd->head += size;
+	rd->file_pos += size;
+
+	err = __perf_session__process_decomp_events(session);
+	if (err)
+		goto out;
+
+	ui_progress__update(prog, size);
+
+out:
+	return err;
+}
+
+static inline bool
+reader__eof(struct reader *rd)
+{
+	return (rd->file_pos >= rd->data_size + rd->data_offset);
+}
+
+static int
+reader__process_events(struct reader *rd, struct perf_session *session,
+		       struct ui_progress *prog)
+{
+	int err;
+
+	err = reader__init(rd, &session->one_mmap);
+	if (err)
+		goto out;
+
+	session->active_decomp = &rd->decomp_data;
+
+remap:
+	err = reader__mmap(rd, session);
+	if (err)
+		goto out;
+
+more:
+	err = reader__read_event(rd, session, prog);
+	if (err < 0)
+		goto out;
+	else if (err == READER_NODATA)
+		goto remap;
+
+	if (session_done())
+		goto out;
+
+	if (!reader__eof(rd))
+		goto more;
+
+out:
+	session->active_decomp = &session->decomp_data;
+	return err;
+}
+
+static s64 process_simple(struct perf_session *session,
+			  union perf_event *event,
+			  u64 file_offset,
+			  const char *file_path)
+{
+	return perf_session__process_event(session, event, file_offset, file_path);
+}
+
+static int __perf_session__process_events(struct perf_session *session)
+{
+	struct reader rd = {
+		.fd		= perf_data__fd(session->data),
+		.path		= session->data->file.path,
+		.data_size	= session->header.data_size,
+		.data_offset	= session->header.data_offset,
+		.process	= process_simple,
+		.in_place_update = session->data->in_place_update,
+	};
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
+	struct ui_progress prog;
+	int err;
+
+	perf_tool__fill_defaults(tool);
+
+	if (rd.data_size == 0)
+		return -1;
+
+	ui_progress__init_size(&prog, rd.data_size, "Processing events...");
+
+	err = reader__process_events(&rd, session, &prog);
+	if (err)
+		goto out_err;
+	/* do the final flush for ordered samples */
+	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+	if (err)
+		goto out_err;
+	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
+out_err:
+	ui_progress__finish();
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
+	/*
+	 * We may switching perf.data output, make ordered_events
+	 * reusable.
+	 */
+	ordered_events__reinit(&session->ordered_events);
+	auxtrace__free_events(session);
+	reader__release_decomp(&rd);
+	session->one_mmap = false;
+	return err;
+}
+
+/*
+ * Processing 2 MB of data from each reader in sequence,
+ * because that's the way the ordered events sorting works
+ * most efficiently.
+ */
+#define READER_MAX_SIZE (2 * 1024 * 1024)
+
+/*
+ * This function reads, merge and process directory data.
+ * It assumens the version 1 of directory data, where each
+ * data file holds per-cpu data, already sorted by kernel.
+ */
+static int __perf_session__process_dir_events(struct perf_session *session)
+{
+	struct perf_data *data = session->data;
+	struct perf_tool *tool = session->tool;
+	int i, ret, readers, nr_readers;
+	struct ui_progress prog;
+	u64 total_size = perf_data__size(session->data);
+	struct reader *rd;
+
+	perf_tool__fill_defaults(tool);
+
+	ui_progress__init_size(&prog, total_size, "Sorting events...");
+
+	nr_readers = 1;
+	for (i = 0; i < data->dir.nr; i++) {
+		if (data->dir.files[i].size)
+			nr_readers++;
+	}
+
+	rd = zalloc(nr_readers * sizeof(struct reader));
+	if (!rd)
+		return -ENOMEM;
+
+	rd[0] = (struct reader) {
+		.fd		 = perf_data__fd(session->data),
+		.path		 = session->data->file.path,
+		.data_size	 = session->header.data_size,
+		.data_offset	 = session->header.data_offset,
+		.process	 = process_simple,
+		.in_place_update = session->data->in_place_update,
+	};
+	ret = reader__init(&rd[0], NULL);
+	if (ret)
+		goto out_err;
+	ret = reader__mmap(&rd[0], session);
+	if (ret)
+		goto out_err;
+	readers = 1;
+
+	for (i = 0; i < data->dir.nr; i++) {
+		if (!data->dir.files[i].size)
+			continue;
+		rd[readers] = (struct reader) {
+			.fd		 = data->dir.files[i].fd,
+			.path		 = data->dir.files[i].path,
+			.data_size	 = data->dir.files[i].size,
+			.data_offset	 = 0,
+			.process	 = process_simple,
+			.in_place_update = session->data->in_place_update,
+		};
+		ret = reader__init(&rd[readers], NULL);
+		if (ret)
+			goto out_err;
+		ret = reader__mmap(&rd[readers], session);
+		if (ret)
+			goto out_err;
+		readers++;
+	}
+
+	i = 0;
+	while (readers) {
+		if (session_done())
+			break;
+
+		if (rd[i].done) {
+			i = (i + 1) % nr_readers;
+			continue;
+		}
+		if (reader__eof(&rd[i])) {
+			rd[i].done = true;
+			readers--;
+			continue;
+		}
+
+		session->active_decomp = &rd[i].decomp_data;
+		ret = reader__read_event(&rd[i], session, &prog);
+		if (ret < 0) {
+			goto out_err;
+		} else if (ret == READER_NODATA) {
+			ret = reader__mmap(&rd[i], session);
+			if (ret)
+				goto out_err;
+		}
+
+		if (rd[i].size >= READER_MAX_SIZE) {
+			rd[i].size = 0;
+			i = (i + 1) % nr_readers;
+		}
+	}
+
+	ret = ordered_events__flush(&session->ordered_events, OE_FLUSH__FINAL);
+	if (ret)
+		goto out_err;
+
+	ret = perf_session__flush_thread_stacks(session);
+out_err:
+	ui_progress__finish();
+
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
+
+	/*
+	 * We may switching perf.data output, make ordered_events
+	 * reusable.
+	 */
+	ordered_events__reinit(&session->ordered_events);
+
+	session->one_mmap = false;
+
+	session->active_decomp = &session->decomp_data;
+	for (i = 0; i < nr_readers; i++)
+		reader__release_decomp(&rd[i]);
+	zfree(&rd);
+
+	return ret;
+}
+
+int perf_session__process_events(struct perf_session *session)
+{
+	if (perf_session__register_idle_thread(session) < 0)
+		return -ENOMEM;
+
+	if (perf_data__is_pipe(session->data))
+		return __perf_session__process_pipe_events(session);
+
+	if (perf_data__is_dir(session->data) && session->data->dir.nr)
+		return __perf_session__process_dir_events(session);
+
+	return __perf_session__process_events(session);
+}
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT)
+			return true;
+	}
+
+	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+	return false;
+}
+
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
+{
+	char *bracket;
+	struct ref_reloc_sym *ref;
+	struct kmap *kmap;
+
+	ref = zalloc(sizeof(struct ref_reloc_sym));
+	if (ref == NULL)
+		return -ENOMEM;
+
+	ref->name = strdup(symbol_name);
+	if (ref->name == NULL) {
+		free(ref);
+		return -ENOMEM;
+	}
+
+	bracket = strchr(ref->name, ']');
+	if (bracket)
+		*bracket = '\0';
+
+	ref->addr = addr;
+
+	kmap = map__kmap(map);
+	if (kmap)
+		kmap->ref_reloc_sym = ref;
+
+	return 0;
+}
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
+{
+	return machines__fprintf_dsos(&session->machines, fp);
+}
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+					  bool (skip)(struct dso *dso, int parm), int parm)
+{
+	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
+}
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp,
+				       bool skip_empty)
+{
+	size_t ret;
+	const char *msg = "";
+
+	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+		msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
+
+	ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
+
+	ret += events_stats__fprintf(&session->evlist->stats, fp, skip_empty);
+	return ret;
+}
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
+{
+	/*
+	 * FIXME: Here we have to actually print all the machines in this
+	 * session, not just the host...
+	 */
+	return machine__fprintf(&session->machines.host, fp);
+}
+
+struct evsel *perf_session__find_first_evtype(struct perf_session *session,
+					      unsigned int type)
+{
+	struct evsel *pos;
+
+	evlist__for_each_entry(session->evlist, pos) {
+		if (pos->core.attr.type == type)
+			return pos;
+	}
+	return NULL;
+}
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap)
+{
+	int i, err = -1;
+	struct perf_cpu_map *map;
+	int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
+
+	for (i = 0; i < PERF_TYPE_MAX; ++i) {
+		struct evsel *evsel;
+
+		evsel = perf_session__find_first_evtype(session, i);
+		if (!evsel)
+			continue;
+
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CPU)) {
+			pr_err("File does not contain CPU events. "
+			       "Remove -C option to proceed.\n");
+			return -1;
+		}
+	}
+
+	map = perf_cpu_map__new(cpu_list);
+	if (map == NULL) {
+		pr_err("Invalid cpu_list\n");
+		return -1;
+	}
+
+	for (i = 0; i < perf_cpu_map__nr(map); i++) {
+		struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+
+		if (cpu.cpu >= nr_cpus) {
+			pr_err("Requested CPU %d too large. "
+			       "Consider raising MAX_NR_CPUS\n", cpu.cpu);
+			goto out_delete_map;
+		}
+
+		set_bit(cpu.cpu, cpu_bitmap);
+	}
+
+	err = 0;
+
+out_delete_map:
+	perf_cpu_map__put(map);
+	return err;
+}
+
+void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
+				bool full)
+{
+	if (session == NULL || fp == NULL)
+		return;
+
+	fprintf(fp, "# ========\n");
+	perf_header__fprintf_info(session, fp, full);
+	fprintf(fp, "# ========\n#\n");
+}
+
+static int perf_session__register_guest(struct perf_session *session, pid_t machine_pid)
+{
+	struct machine *machine = machines__findnew(&session->machines, machine_pid);
+	struct thread *thread;
+
+	if (!machine)
+		return -ENOMEM;
+
+	machine->single_address_space = session->machines.host.single_address_space;
+
+	thread = machine__idle_thread(machine);
+	if (!thread)
+		return -ENOMEM;
+	thread__put(thread);
+
+	machine->kallsyms_filename = perf_data__guest_kallsyms_name(session->data, machine_pid);
+
+	return 0;
+}
+
+static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid,
+				       pid_t tid, int guest_cpu)
+{
+	struct machine *machine = &session->machines.host;
+	struct thread *thread = machine__findnew_thread(machine, pid, tid);
+
+	if (!thread)
+		return -ENOMEM;
+	thread->guest_cpu = guest_cpu;
+	thread__put(thread);
+
+	return 0;
+}
+
+int perf_event__process_id_index(struct perf_session *session,
+				 union perf_event *event)
+{
+	struct evlist *evlist = session->evlist;
+	struct perf_record_id_index *ie = &event->id_index;
+	size_t sz = ie->header.size - sizeof(*ie);
+	size_t i, nr, max_nr;
+	size_t e1_sz = sizeof(struct id_index_entry);
+	size_t e2_sz = sizeof(struct id_index_entry_2);
+	size_t etot_sz = e1_sz + e2_sz;
+	struct id_index_entry_2 *e2;
+	pid_t last_pid = 0;
+
+	max_nr = sz / e1_sz;
+	nr = ie->nr;
+	if (nr > max_nr) {
+		printf("Too big: nr %zu max_nr %zu\n", nr, max_nr);
+		return -EINVAL;
+	}
+
+	if (sz >= nr * etot_sz) {
+		max_nr = sz / etot_sz;
+		if (nr > max_nr) {
+			printf("Too big2: nr %zu max_nr %zu\n", nr, max_nr);
+			return -EINVAL;
+		}
+		e2 = (void *)ie + sizeof(*ie) + nr * e1_sz;
+	} else {
+		e2 = NULL;
+	}
+
+	if (dump_trace)
+		fprintf(stdout, " nr: %zu\n", nr);
+
+	for (i = 0; i < nr; i++, (e2 ? e2++ : 0)) {
+		struct id_index_entry *e = &ie->entries[i];
+		struct perf_sample_id *sid;
+		int ret;
+
+		if (dump_trace) {
+			fprintf(stdout,	" ... id: %"PRI_lu64, e->id);
+			fprintf(stdout,	"  idx: %"PRI_lu64, e->idx);
+			fprintf(stdout,	"  cpu: %"PRI_ld64, e->cpu);
+			fprintf(stdout, "  tid: %"PRI_ld64, e->tid);
+			if (e2) {
+				fprintf(stdout, "  machine_pid: %"PRI_ld64, e2->machine_pid);
+				fprintf(stdout, "  vcpu: %"PRI_lu64"\n", e2->vcpu);
+			} else {
+				fprintf(stdout, "\n");
+			}
+		}
+
+		sid = evlist__id2sid(evlist, e->id);
+		if (!sid)
+			return -ENOENT;
+
+		sid->idx = e->idx;
+		sid->cpu.cpu = e->cpu;
+		sid->tid = e->tid;
+
+		if (!e2)
+			continue;
+
+		sid->machine_pid = e2->machine_pid;
+		sid->vcpu.cpu = e2->vcpu;
+
+		if (!sid->machine_pid)
+			continue;
+
+		if (sid->machine_pid != last_pid) {
+			ret = perf_session__register_guest(session, sid->machine_pid);
+			if (ret)
+				return ret;
+			last_pid = sid->machine_pid;
+			perf_guest = true;
+		}
+
+		ret = perf_session__set_guest_cpu(session, sid->machine_pid, e->tid, e2->vcpu);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
new file mode 100644
index 000000000..be5871ea5
--- /dev/null
+++ b/tools/perf/util/session.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SESSION_H
+#define __PERF_SESSION_H
+
+#include "trace-event.h"
+#include "event.h"
+#include "header.h"
+#include "machine.h"
+#include "data.h"
+#include "ordered-events.h"
+#include "util/compress.h"
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/perf_event.h>
+
+struct ip_callchain;
+struct symbol;
+struct thread;
+
+struct auxtrace;
+struct itrace_synth_opts;
+
+struct decomp_data {
+	struct decomp	 *decomp;
+	struct decomp	 *decomp_last;
+	struct zstd_data *zstd_decomp;
+};
+
+struct perf_session {
+	struct perf_header	header;
+	struct machines		machines;
+	struct evlist	*evlist;
+	struct auxtrace		*auxtrace;
+	struct itrace_synth_opts *itrace_synth_opts;
+	struct list_head	auxtrace_index;
+	struct trace_event	tevent;
+	struct perf_record_time_conv	time_conv;
+	bool			repipe;
+	bool			one_mmap;
+	void			*one_mmap_addr;
+	u64			one_mmap_offset;
+	struct ordered_events	ordered_events;
+	struct perf_data	*data;
+	struct perf_tool	*tool;
+	u64			bytes_transferred;
+	u64			bytes_compressed;
+	struct zstd_data	zstd_data;
+	struct decomp_data	decomp_data;
+	struct decomp_data	*active_decomp;
+};
+
+struct decomp {
+	struct decomp *next;
+	u64 file_pos;
+	const char *file_path;
+	size_t mmap_len;
+	u64 head;
+	size_t size;
+	char data[];
+};
+
+struct perf_tool;
+
+struct perf_session *__perf_session__new(struct perf_data *data,
+					 bool repipe, int repipe_fd,
+					 struct perf_tool *tool);
+
+static inline struct perf_session *perf_session__new(struct perf_data *data,
+						     struct perf_tool *tool)
+{
+	return __perf_session__new(data, false, -1, tool);
+}
+
+void perf_session__delete(struct perf_session *session);
+
+void perf_event_header__bswap(struct perf_event_header *hdr);
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+			     void *buf, size_t buf_sz,
+			     union perf_event **event_ptr,
+			     struct perf_sample *sample);
+typedef int (*peek_events_cb_t)(struct perf_session *session,
+				union perf_event *event, u64 offset,
+				void *data);
+int perf_session__peek_events(struct perf_session *session, u64 offset,
+			      u64 size, peek_events_cb_t cb, void *data);
+
+int perf_session__process_events(struct perf_session *session);
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+			      u64 timestamp, u64 file_offset, const char *file_path);
+
+void perf_tool__fill_defaults(struct perf_tool *tool);
+
+int perf_session__resolve_callchain(struct perf_session *session,
+				    struct evsel *evsel,
+				    struct thread *thread,
+				    struct ip_callchain *chain,
+				    struct symbol **parent);
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg);
+
+void perf_event__attr_swap(struct perf_event_attr *attr);
+
+int perf_session__create_kernel_maps(struct perf_session *session);
+
+void perf_session__set_id_hdr_size(struct perf_session *session);
+
+static inline
+struct machine *perf_session__find_machine(struct perf_session *session, pid_t pid)
+{
+	return machines__find(&session->machines, pid);
+}
+
+static inline
+struct machine *perf_session__findnew_machine(struct perf_session *session, pid_t pid)
+{
+	return machines__findnew(&session->machines, pid);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
+int perf_session__register_idle_thread(struct perf_session *session);
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+					  bool (fn)(struct dso *dso, int parm), int parm);
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp,
+				       bool skip_empty);
+
+struct evsel *perf_session__find_first_evtype(struct perf_session *session,
+					    unsigned int type);
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap);
+
+void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+struct evsel_str_handler;
+
+#define perf_session__set_tracepoints_handlers(session, array) \
+	__evlist__set_tracepoints_handlers(session->evlist, array, ARRAY_SIZE(array))
+
+extern volatile int session_done;
+
+#define session_done()	READ_ONCE(session_done)
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+				      union perf_event *event,
+				      struct perf_sample *sample);
+
+int perf_event__process_id_index(struct perf_session *session,
+				 union perf_event *event);
+
+int perf_event__process_finished_round(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct ordered_events *oe);
+
+#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/setns.c b/tools/perf/util/setns.c
new file mode 100644
index 000000000..48f9c0af6
--- /dev/null
+++ b/tools/perf/util/setns.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+#include "namespaces.h"
+#include <unistd.h>
+#include <sys/syscall.h>
+
+int setns(int fd, int nstype)
+{
+	return syscall(__NR_setns, fd, nstype);
+}
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
new file mode 100644
index 000000000..5b1e6468d
--- /dev/null
+++ b/tools/perf/util/setup.py
@@ -0,0 +1,91 @@
+from os import getenv, path
+from subprocess import Popen, PIPE
+from re import sub
+
+cc = getenv("CC")
+cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
+src_feature_tests  = getenv('srctree') + '/tools/build/feature'
+
+def clang_has_option(option):
+    cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+    return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
+
+if cc_is_clang:
+    from sysconfig import get_config_vars
+    vars = get_config_vars()
+    for var in ('CFLAGS', 'OPT'):
+        vars[var] = sub("-specs=[^ ]+", "", vars[var])
+        if not clang_has_option("-mcet"):
+            vars[var] = sub("-mcet", "", vars[var])
+        if not clang_has_option("-fcf-protection"):
+            vars[var] = sub("-fcf-protection", "", vars[var])
+        if not clang_has_option("-fstack-clash-protection"):
+            vars[var] = sub("-fstack-clash-protection", "", vars[var])
+        if not clang_has_option("-fstack-protector-strong"):
+            vars[var] = sub("-fstack-protector-strong", "", vars[var])
+        if not clang_has_option("-fno-semantic-interposition"):
+            vars[var] = sub("-fno-semantic-interposition", "", vars[var])
+        if not clang_has_option("-ffat-lto-objects"):
+            vars[var] = sub("-ffat-lto-objects", "", vars[var])
+
+from setuptools import setup, Extension
+
+from setuptools.command.build_ext   import build_ext   as _build_ext
+from setuptools.command.install_lib import install_lib as _install_lib
+
+class build_ext(_build_ext):
+    def finalize_options(self):
+        _build_ext.finalize_options(self)
+        self.build_lib  = build_lib
+        self.build_temp = build_tmp
+
+class install_lib(_install_lib):
+    def finalize_options(self):
+        _install_lib.finalize_options(self)
+        self.build_dir = build_lib
+
+
+cflags = getenv('CFLAGS', '').split()
+# switch off several checks (need to be at the end of cflags list)
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls', '-DPYTHON_PERF' ]
+if cc_is_clang:
+    cflags += ["-Wno-unused-command-line-argument" ]
+else:
+    cflags += ['-Wno-cast-function-type' ]
+
+src_perf  = getenv('srctree') + '/tools/perf'
+build_lib = getenv('PYTHON_EXTBUILD_LIB')
+build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+libtraceevent = getenv('LIBTRACEEVENT')
+libapikfs = getenv('LIBAPI')
+libperf = getenv('LIBPERF')
+
+ext_sources = [f.strip() for f in open('util/python-ext-sources')
+				if len(f.strip()) > 0 and f[0] != '#']
+
+# use full paths with source files
+ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
+
+extra_libraries = []
+if '-DHAVE_LIBNUMA_SUPPORT' in cflags:
+    extra_libraries = [ 'numa' ]
+if '-DHAVE_LIBCAP_SUPPORT' in cflags:
+    extra_libraries += [ 'cap' ]
+
+perf = Extension('perf',
+		  sources = ext_sources,
+		  include_dirs = ['util/include'],
+		  libraries = extra_libraries,
+		  extra_compile_args = cflags,
+		  extra_objects = [libtraceevent, libapikfs, libperf],
+                 )
+
+setup(name='perf',
+      version='0.1',
+      description='Interface with the Linux profiling infrastructure',
+      author='Arnaldo Carvalho de Melo',
+      author_email='acme@redhat.com',
+      license='GPLv2',
+      url='http://perf.wiki.kernel.org',
+      ext_modules=[perf],
+      cmdclass={'build_ext': build_ext, 'install_lib': install_lib})
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
new file mode 100644
index 000000000..388846f17
--- /dev/null
+++ b/tools/perf/util/sideband_evlist.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/mmap.h"
+#include "util/perf_api_probe.h"
+#include <perf/mmap.h>
+#include <linux/perf_event.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdbool.h>
+
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+			 evsel__sb_cb_t cb, void *data)
+{
+	struct evsel *evsel;
+
+	if (!attr->sample_id_all) {
+		pr_warning("enabling sample_id_all for all side band events\n");
+		attr->sample_id_all = 1;
+	}
+
+	evsel = evsel__new_idx(attr, evlist->core.nr_entries);
+	if (!evsel)
+		return -1;
+
+	evsel->side_band.cb = cb;
+	evsel->side_band.data = data;
+	evlist__add(evlist, evsel);
+	return 0;
+}
+
+static void *perf_evlist__poll_thread(void *arg)
+{
+	struct evlist *evlist = arg;
+	bool draining = false;
+	int i, done = 0;
+	/*
+	 * In order to read symbols from other namespaces perf to needs to call
+	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
+	 * unshare(2) the fs so that we may continue to setns into namespaces
+	 * that we're observing when, for instance, reading the build-ids at
+	 * the end of a 'perf record' session.
+	 */
+	unshare(CLONE_FS);
+
+	while (!done) {
+		bool got_data = false;
+
+		if (evlist->thread.done)
+			draining = true;
+
+		if (!draining)
+			evlist__poll(evlist, 1000);
+
+		for (i = 0; i < evlist->core.nr_mmaps; i++) {
+			struct mmap *map = &evlist->mmap[i];
+			union perf_event *event;
+
+			if (perf_mmap__read_init(&map->core))
+				continue;
+			while ((event = perf_mmap__read_event(&map->core)) != NULL) {
+				struct evsel *evsel = evlist__event2evsel(evlist, event);
+
+				if (evsel && evsel->side_band.cb)
+					evsel->side_band.cb(event, evsel->side_band.data);
+				else
+					pr_warning("cannot locate proper evsel for the side band event\n");
+
+				perf_mmap__consume(&map->core);
+				got_data = true;
+			}
+			perf_mmap__read_done(&map->core);
+		}
+
+		if (draining && !got_data)
+			break;
+	}
+	return NULL;
+}
+
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		evsel->core.attr.sample_id_all    = 1;
+		evsel->core.attr.watermark        = 1;
+		evsel->core.attr.wakeup_watermark = 1;
+		evsel->side_band.cb   = cb;
+		evsel->side_band.data = data;
+      }
+}
+
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target)
+{
+	struct evsel *counter;
+
+	if (!evlist)
+		return 0;
+
+	if (evlist__create_maps(evlist, target))
+		goto out_delete_evlist;
+
+	if (evlist->core.nr_entries > 1) {
+		bool can_sample_identifier = perf_can_sample_identifier();
+
+		evlist__for_each_entry(evlist, counter)
+			evsel__set_sample_id(counter, can_sample_identifier);
+
+		evlist__set_id_pos(evlist);
+	}
+
+	evlist__for_each_entry(evlist, counter) {
+		if (evsel__open(counter, evlist->core.user_requested_cpus,
+				evlist->core.threads) < 0)
+			goto out_delete_evlist;
+	}
+
+	if (evlist__mmap(evlist, UINT_MAX))
+		goto out_delete_evlist;
+
+	evlist__for_each_entry(evlist, counter) {
+		if (evsel__enable(counter))
+			goto out_delete_evlist;
+	}
+
+	evlist->thread.done = 0;
+	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
+		goto out_delete_evlist;
+
+	return 0;
+
+out_delete_evlist:
+	evlist__delete(evlist);
+	evlist = NULL;
+	return -1;
+}
+
+void evlist__stop_sb_thread(struct evlist *evlist)
+{
+	if (!evlist)
+		return;
+	evlist->thread.done = 1;
+	pthread_join(evlist->thread.th, NULL);
+	evlist__delete(evlist);
+}
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
new file mode 100644
index 000000000..994e9e418
--- /dev/null
+++ b/tools/perf/util/smt.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <string.h>
+#include "api/fs/fs.h"
+#include "cputopo.h"
+#include "smt.h"
+
+bool smt_on(const struct cpu_topology *topology)
+{
+	static bool cached;
+	static bool cached_result;
+	int fs_value;
+
+	if (cached)
+		return cached_result;
+
+	if (sysfs__read_int("devices/system/cpu/smt/active", &fs_value) >= 0)
+		cached_result = (fs_value == 1);
+	else
+		cached_result = cpu_topology__smt_on(topology);
+
+	cached = true;
+	return cached_result;
+}
+
+bool core_wide(bool system_wide, const char *user_requested_cpu_list,
+	       const struct cpu_topology *topology)
+{
+	/* If not everything running on a core is being recorded then we can't use core_wide. */
+	if (!system_wide)
+		return false;
+
+	/* Cheap case that SMT is disabled and therefore we're inherently core_wide. */
+	if (!smt_on(topology))
+		return true;
+
+	return cpu_topology__core_wide(topology, user_requested_cpu_list);
+}
diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h
new file mode 100644
index 000000000..ae9095f2c
--- /dev/null
+++ b/tools/perf/util/smt.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SMT_H
+#define __SMT_H 1
+
+struct cpu_topology;
+
+/* Returns true if SMT (aka hyperthreading) is enabled. */
+bool smt_on(const struct cpu_topology *topology);
+
+/*
+ * Returns true when system wide and all SMT threads for a core are in the
+ * user_requested_cpus map.
+ */
+bool core_wide(bool system_wide, const char *user_requested_cpu_list,
+	       const struct cpu_topology *topology);
+
+#endif /* __SMT_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
new file mode 100644
index 000000000..6882b1714
--- /dev/null
+++ b/tools/perf/util/sort.c
@@ -0,0 +1,3555 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <linux/mman.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "dso.h"
+#include "sort.h"
+#include "hist.h"
+#include "cacheline.h"
+#include "comm.h"
+#include "map.h"
+#include "maps.h"
+#include "symbol.h"
+#include "map_symbol.h"
+#include "branch.h"
+#include "thread.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "srcline.h"
+#include "strlist.h"
+#include "strbuf.h"
+#include <traceevent/event-parse.h>
+#include "mem-events.h"
+#include "annotate.h"
+#include "event.h"
+#include "time-utils.h"
+#include "cgroup.h"
+#include "machine.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+regex_t		parent_regex;
+const char	default_parent_pattern[] = "^sys_|^do_page_fault";
+const char	*parent_pattern = default_parent_pattern;
+const char	*default_sort_order = "comm,dso,symbol";
+const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
+const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
+const char	default_top_sort_order[] = "dso,symbol";
+const char	default_diff_sort_order[] = "dso,symbol";
+const char	default_tracepoint_sort_order[] = "trace";
+const char	*sort_order;
+const char	*field_order;
+regex_t		ignore_callees_regex;
+int		have_ignore_callees = 0;
+enum sort_mode	sort__mode = SORT_MODE__NORMAL;
+static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
+static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
+
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurrences of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
+static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+	int n;
+	va_list ap;
+
+	va_start(ap, fmt);
+	n = vsnprintf(bf, size, fmt, ap);
+	if (symbol_conf.field_sep && n > 0) {
+		char *sep = bf;
+
+		while (1) {
+			sep = strchr(sep, *symbol_conf.field_sep);
+			if (sep == NULL)
+				break;
+			*sep = '.';
+		}
+	}
+	va_end(ap);
+
+	if (n >= (int)size)
+		return size - 1;
+	return n;
+}
+
+static int64_t cmp_null(const void *l, const void *r)
+{
+	if (!l && !r)
+		return 0;
+	else if (!l)
+		return -1;
+	else
+		return 1;
+}
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->tid - left->thread->tid;
+}
+
+static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	const char *comm = thread__comm_str(he->thread);
+
+	width = max(7U, width) - 8;
+	return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
+			       width, width, comm ?: "");
+}
+
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct thread *th = arg;
+
+	if (type != HIST_FILTER__THREAD)
+		return -1;
+
+	return th && he->thread != th;
+}
+
+struct sort_entry sort_thread = {
+	.se_header	= "    Pid:Command",
+	.se_cmp		= sort__thread_cmp,
+	.se_snprintf	= hist_entry__thread_snprintf,
+	.se_filter	= hist_entry__thread_filter,
+	.se_width_idx	= HISTC_THREAD,
+};
+
+/* --sort comm */
+
+/*
+ * We can't use pointer comparison in functions below,
+ * because it gives different results based on pointer
+ * values, which could break some sorting assumptions.
+ */
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_sort(struct hist_entry *left, struct hist_entry *right)
+{
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
+				     size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, comm__str(he->comm));
+}
+
+struct sort_entry sort_comm = {
+	.se_header	= "Command",
+	.se_cmp		= sort__comm_cmp,
+	.se_collapse	= sort__comm_collapse,
+	.se_sort	= sort__comm_sort,
+	.se_snprintf	= hist_entry__comm_snprintf,
+	.se_filter	= hist_entry__thread_filter,
+	.se_width_idx	= HISTC_COMM,
+};
+
+/* --sort dso */
+
+static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
+{
+	struct dso *dso_l = map_l ? map_l->dso : NULL;
+	struct dso *dso_r = map_r ? map_r->dso : NULL;
+	const char *dso_name_l, *dso_name_r;
+
+	if (!dso_l || !dso_r)
+		return cmp_null(dso_r, dso_l);
+
+	if (verbose > 0) {
+		dso_name_l = dso_l->long_name;
+		dso_name_r = dso_r->long_name;
+	} else {
+		dso_name_l = dso_l->short_name;
+		dso_name_r = dso_r->short_name;
+	}
+
+	return strcmp(dso_name_l, dso_name_r);
+}
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return _sort__dso_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_snprintf(struct map *map, char *bf,
+				     size_t size, unsigned int width)
+{
+	if (map && map->dso) {
+		const char *dso_name = verbose > 0 ? map->dso->long_name :
+			map->dso->short_name;
+		return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "[unknown]");
+}
+
+static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
+}
+
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
+struct sort_entry sort_dso = {
+	.se_header	= "Shared Object",
+	.se_cmp		= sort__dso_cmp,
+	.se_snprintf	= hist_entry__dso_snprintf,
+	.se_filter	= hist_entry__dso_filter,
+	.se_width_idx	= HISTC_DSO,
+};
+
+/* --sort symbol */
+
+static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
+{
+	return (int64_t)(right_ip - left_ip);
+}
+
+int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+	if (!sym_l || !sym_r)
+		return cmp_null(sym_l, sym_r);
+
+	if (sym_l == sym_r)
+		return 0;
+
+	if (sym_l->inlined || sym_r->inlined) {
+		int ret = strcmp(sym_l->name, sym_r->name);
+
+		if (ret)
+			return ret;
+		if ((sym_l->start <= sym_r->end) && (sym_l->end >= sym_r->start))
+			return 0;
+	}
+
+	if (sym_l->start != sym_r->start)
+		return (int64_t)(sym_r->start - sym_l->start);
+
+	return (int64_t)(sym_r->end - sym_l->end);
+}
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	int64_t ret;
+
+	if (!left->ms.sym && !right->ms.sym)
+		return _sort__addr_cmp(left->ip, right->ip);
+
+	/*
+	 * comparing symbol address alone is not enough since it's a
+	 * relative address within a dso.
+	 */
+	if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
+		ret = sort__dso_cmp(left, right);
+		if (ret != 0)
+			return ret;
+	}
+
+	return _sort__sym_cmp(left->ms.sym, right->ms.sym);
+}
+
+static int64_t
+sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->ms.sym || !right->ms.sym)
+		return cmp_null(left->ms.sym, right->ms.sym);
+
+	return strcmp(right->ms.sym->name, left->ms.sym->name);
+}
+
+static int _hist_entry__sym_snprintf(struct map_symbol *ms,
+				     u64 ip, char level, char *bf, size_t size,
+				     unsigned int width)
+{
+	struct symbol *sym = ms->sym;
+	struct map *map = ms->map;
+	size_t ret = 0;
+
+	if (verbose > 0) {
+		char o = map ? dso__symtab_origin(map->dso) : '!';
+		u64 rip = ip;
+
+		if (map && map->dso && map->dso->kernel
+		    && map->dso->adjust_symbols)
+			rip = map->unmap_ip(map, ip);
+
+		ret += repsep_snprintf(bf, size, "%-#*llx %c ",
+				       BITS_PER_LONG / 4 + 2, rip, o);
+	}
+
+	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+	if (sym && map) {
+		if (sym->type == STT_OBJECT) {
+			ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+			ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+					ip - map->unmap_ip(map, sym->start));
+		} else {
+			ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+					       width - ret,
+					       sym->name);
+			if (sym->inlined)
+				ret += repsep_snprintf(bf + ret, size - ret,
+						       " (inlined)");
+		}
+	} else {
+		size_t len = BITS_PER_LONG / 4;
+		ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+				       len, ip);
+	}
+
+	return ret;
+}
+
+int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width)
+{
+	return _hist_entry__sym_snprintf(&he->ms, he->ip,
+					 he->level, bf, size, width);
+}
+
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
+struct sort_entry sort_sym = {
+	.se_header	= "Symbol",
+	.se_cmp		= sort__sym_cmp,
+	.se_sort	= sort__sym_sort,
+	.se_snprintf	= hist_entry__sym_snprintf,
+	.se_filter	= hist_entry__sym_filter,
+	.se_width_idx	= HISTC_SYMBOL,
+};
+
+/* --sort srcline */
+
+char *hist_entry__srcline(struct hist_entry *he)
+{
+	return map__srcline(he->ms.map, he->ip, he->ms.sym);
+}
+
+static int64_t
+sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->srcline)
+		left->srcline = hist_entry__srcline(left);
+	if (!right->srcline)
+		right->srcline = hist_entry__srcline(right);
+
+	return strcmp(right->srcline, left->srcline);
+}
+
+static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	if (!he->srcline)
+		he->srcline = hist_entry__srcline(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
+}
+
+struct sort_entry sort_srcline = {
+	.se_header	= "Source:Line",
+	.se_cmp		= sort__srcline_cmp,
+	.se_snprintf	= hist_entry__srcline_snprintf,
+	.se_width_idx	= HISTC_SRCLINE,
+};
+
+/* --sort srcline_from */
+
+static char *addr_map_symbol__srcline(struct addr_map_symbol *ams)
+{
+	return map__srcline(ams->ms.map, ams->al_addr, ams->ms.sym);
+}
+
+static int64_t
+sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info->srcline_from)
+		left->branch_info->srcline_from = addr_map_symbol__srcline(&left->branch_info->from);
+
+	if (!right->branch_info->srcline_from)
+		right->branch_info->srcline_from = addr_map_symbol__srcline(&right->branch_info->from);
+
+	return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
+}
+
+static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_from);
+}
+
+struct sort_entry sort_srcline_from = {
+	.se_header	= "From Source:Line",
+	.se_cmp		= sort__srcline_from_cmp,
+	.se_snprintf	= hist_entry__srcline_from_snprintf,
+	.se_width_idx	= HISTC_SRCLINE_FROM,
+};
+
+/* --sort srcline_to */
+
+static int64_t
+sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info->srcline_to)
+		left->branch_info->srcline_to = addr_map_symbol__srcline(&left->branch_info->to);
+
+	if (!right->branch_info->srcline_to)
+		right->branch_info->srcline_to = addr_map_symbol__srcline(&right->branch_info->to);
+
+	return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
+}
+
+static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_to);
+}
+
+struct sort_entry sort_srcline_to = {
+	.se_header	= "To Source:Line",
+	.se_cmp		= sort__srcline_to_cmp,
+	.se_snprintf	= hist_entry__srcline_to_snprintf,
+	.se_width_idx	= HISTC_SRCLINE_TO,
+};
+
+static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+
+	struct symbol *sym = he->ms.sym;
+	struct annotation *notes;
+	double ipc = 0.0, coverage = 0.0;
+	char tmp[64];
+
+	if (!sym)
+		return repsep_snprintf(bf, size, "%-*s", width, "-");
+
+	notes = symbol__annotation(sym);
+
+	if (notes->hit_cycles)
+		ipc = notes->hit_insn / ((double)notes->hit_cycles);
+
+	if (notes->total_insn) {
+		coverage = notes->cover_insn * 100.0 /
+			((double)notes->total_insn);
+	}
+
+	snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage);
+	return repsep_snprintf(bf, size, "%-*s", width, tmp);
+}
+
+struct sort_entry sort_sym_ipc = {
+	.se_header	= "IPC   [IPC Coverage]",
+	.se_cmp		= sort__sym_cmp,
+	.se_snprintf	= hist_entry__sym_ipc_snprintf,
+	.se_width_idx	= HISTC_SYMBOL_IPC,
+};
+
+static int hist_entry__sym_ipc_null_snprintf(struct hist_entry *he
+					     __maybe_unused,
+					     char *bf, size_t size,
+					     unsigned int width)
+{
+	char tmp[64];
+
+	snprintf(tmp, sizeof(tmp), "%-5s %2s", "-", "-");
+	return repsep_snprintf(bf, size, "%-*s", width, tmp);
+}
+
+struct sort_entry sort_sym_ipc_null = {
+	.se_header	= "IPC   [IPC Coverage]",
+	.se_cmp		= sort__sym_cmp,
+	.se_snprintf	= hist_entry__sym_ipc_null_snprintf,
+	.se_width_idx	= HISTC_SYMBOL_IPC,
+};
+
+/* --sort srcfile */
+
+static char no_srcfile[1];
+
+static char *hist_entry__get_srcfile(struct hist_entry *e)
+{
+	char *sf, *p;
+	struct map *map = e->ms.map;
+
+	if (!map)
+		return no_srcfile;
+
+	sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
+			 e->ms.sym, false, true, true, e->ip);
+	if (!strcmp(sf, SRCLINE_UNKNOWN))
+		return no_srcfile;
+	p = strchr(sf, ':');
+	if (p && *sf) {
+		*p = 0;
+		return sf;
+	}
+	free(sf);
+	return no_srcfile;
+}
+
+static int64_t
+sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->srcfile)
+		left->srcfile = hist_entry__get_srcfile(left);
+	if (!right->srcfile)
+		right->srcfile = hist_entry__get_srcfile(right);
+
+	return strcmp(right->srcfile, left->srcfile);
+}
+
+static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	if (!he->srcfile)
+		he->srcfile = hist_entry__get_srcfile(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
+}
+
+struct sort_entry sort_srcfile = {
+	.se_header	= "Source File",
+	.se_cmp		= sort__srcfile_cmp,
+	.se_snprintf	= hist_entry__srcfile_snprintf,
+	.se_width_idx	= HISTC_SRCFILE,
+};
+
+/* --sort parent */
+
+static int64_t
+sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct symbol *sym_l = left->parent;
+	struct symbol *sym_r = right->parent;
+
+	if (!sym_l || !sym_r)
+		return cmp_null(sym_l, sym_r);
+
+	return strcmp(sym_r->name, sym_l->name);
+}
+
+static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width,
+			      he->parent ? he->parent->name : "[other]");
+}
+
+struct sort_entry sort_parent = {
+	.se_header	= "Parent symbol",
+	.se_cmp		= sort__parent_cmp,
+	.se_snprintf	= hist_entry__parent_snprintf,
+	.se_width_idx	= HISTC_PARENT,
+};
+
+/* --sort cpu */
+
+static int64_t
+sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->cpu - left->cpu;
+}
+
+static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*.*d", width, width, he->cpu);
+}
+
+struct sort_entry sort_cpu = {
+	.se_header      = "CPU",
+	.se_cmp	        = sort__cpu_cmp,
+	.se_snprintf    = hist_entry__cpu_snprintf,
+	.se_width_idx	= HISTC_CPU,
+};
+
+/* --sort cgroup_id */
+
+static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
+{
+	return (int64_t)(right_dev - left_dev);
+}
+
+static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino)
+{
+	return (int64_t)(right_ino - left_ino);
+}
+
+static int64_t
+sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	int64_t ret;
+
+	ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev);
+	if (ret != 0)
+		return ret;
+
+	return _sort__cgroup_inode_cmp(right->cgroup_id.ino,
+				       left->cgroup_id.ino);
+}
+
+static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
+					  char *bf, size_t size,
+					  unsigned int width __maybe_unused)
+{
+	return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev,
+			       he->cgroup_id.ino);
+}
+
+struct sort_entry sort_cgroup_id = {
+	.se_header      = "cgroup id (dev/inode)",
+	.se_cmp	        = sort__cgroup_id_cmp,
+	.se_snprintf    = hist_entry__cgroup_id_snprintf,
+	.se_width_idx	= HISTC_CGROUP_ID,
+};
+
+/* --sort cgroup */
+
+static int64_t
+sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->cgroup - left->cgroup;
+}
+
+static int hist_entry__cgroup_snprintf(struct hist_entry *he,
+				       char *bf, size_t size,
+				       unsigned int width __maybe_unused)
+{
+	const char *cgrp_name = "N/A";
+
+	if (he->cgroup) {
+		struct cgroup *cgrp = cgroup__find(he->ms.maps->machine->env,
+						   he->cgroup);
+		if (cgrp != NULL)
+			cgrp_name = cgrp->name;
+		else
+			cgrp_name = "unknown";
+	}
+
+	return repsep_snprintf(bf, size, "%s", cgrp_name);
+}
+
+struct sort_entry sort_cgroup = {
+	.se_header      = "Cgroup",
+	.se_cmp	        = sort__cgroup_cmp,
+	.se_snprintf    = hist_entry__cgroup_snprintf,
+	.se_width_idx	= HISTC_CGROUP,
+};
+
+/* --sort socket */
+
+static int64_t
+sort__socket_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->socket - left->socket;
+}
+
+static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
+}
+
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+	int sk = *(const int *)arg;
+
+	if (type != HIST_FILTER__SOCKET)
+		return -1;
+
+	return sk >= 0 && he->socket != sk;
+}
+
+struct sort_entry sort_socket = {
+	.se_header      = "Socket",
+	.se_cmp	        = sort__socket_cmp,
+	.se_snprintf    = hist_entry__socket_snprintf,
+	.se_filter      = hist_entry__socket_filter,
+	.se_width_idx	= HISTC_SOCKET,
+};
+
+/* --sort time */
+
+static int64_t
+sort__time_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->time - left->time;
+}
+
+static int hist_entry__time_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char he_time[32];
+
+	if (symbol_conf.nanosecs)
+		timestamp__scnprintf_nsec(he->time, he_time,
+					  sizeof(he_time));
+	else
+		timestamp__scnprintf_usec(he->time, he_time,
+					  sizeof(he_time));
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he_time);
+}
+
+struct sort_entry sort_time = {
+	.se_header      = "Time",
+	.se_cmp	        = sort__time_cmp,
+	.se_snprintf    = hist_entry__time_snprintf,
+	.se_width_idx	= HISTC_TIME,
+};
+
+/* --sort trace */
+
+static char *get_trace_output(struct hist_entry *he)
+{
+	struct trace_seq seq;
+	struct evsel *evsel;
+	struct tep_record rec = {
+		.data = he->raw_data,
+		.size = he->raw_size,
+	};
+
+	evsel = hists_to_evsel(he->hists);
+
+	trace_seq_init(&seq);
+	if (symbol_conf.raw_trace) {
+		tep_print_fields(&seq, he->raw_data, he->raw_size,
+				 evsel->tp_format);
+	} else {
+		tep_print_event(evsel->tp_format->tep,
+				&seq, &rec, "%s", TEP_PRINT_INFO);
+	}
+	/*
+	 * Trim the buffer, it starts at 4KB and we're not going to
+	 * add anything more to this buffer.
+	 */
+	return realloc(seq.buffer, seq.len + 1);
+}
+
+static int64_t
+sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct evsel *evsel;
+
+	evsel = hists_to_evsel(left->hists);
+	if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+		return 0;
+
+	if (left->trace_output == NULL)
+		left->trace_output = get_trace_output(left);
+	if (right->trace_output == NULL)
+		right->trace_output = get_trace_output(right);
+
+	return strcmp(right->trace_output, left->trace_output);
+}
+
+static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	struct evsel *evsel;
+
+	evsel = hists_to_evsel(he->hists);
+	if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+		return scnprintf(bf, size, "%-.*s", width, "N/A");
+
+	if (he->trace_output == NULL)
+		he->trace_output = get_trace_output(he);
+	return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
+}
+
+struct sort_entry sort_trace = {
+	.se_header      = "Trace output",
+	.se_cmp	        = sort__trace_cmp,
+	.se_snprintf    = hist_entry__trace_snprintf,
+	.se_width_idx	= HISTC_TRACE,
+};
+
+/* sort keys for branch stacks */
+
+static int64_t
+sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return _sort__dso_cmp(left->branch_info->from.ms.map,
+			      right->branch_info->from.ms.map);
+}
+
+static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	if (he->branch_info)
+		return _hist_entry__dso_snprintf(he->branch_info->from.ms.map,
+						 bf, size, width);
+	else
+		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->from.ms.map ||
+		       he->branch_info->from.ms.map->dso != dso);
+}
+
+static int64_t
+sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return _sort__dso_cmp(left->branch_info->to.ms.map,
+			      right->branch_info->to.ms.map);
+}
+
+static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	if (he->branch_info)
+		return _hist_entry__dso_snprintf(he->branch_info->to.ms.map,
+						 bf, size, width);
+	else
+		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+				     const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->to.ms.map ||
+		       he->branch_info->to.ms.map->dso != dso);
+}
+
+static int64_t
+sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct addr_map_symbol *from_l, *from_r;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	from_l = &left->branch_info->from;
+	from_r = &right->branch_info->from;
+
+	if (!from_l->ms.sym && !from_r->ms.sym)
+		return _sort__addr_cmp(from_l->addr, from_r->addr);
+
+	return _sort__sym_cmp(from_l->ms.sym, from_r->ms.sym);
+}
+
+static int64_t
+sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct addr_map_symbol *to_l, *to_r;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	to_l = &left->branch_info->to;
+	to_r = &right->branch_info->to;
+
+	if (!to_l->ms.sym && !to_r->ms.sym)
+		return _sort__addr_cmp(to_l->addr, to_r->addr);
+
+	return _sort__sym_cmp(to_l->ms.sym, to_r->ms.sym);
+}
+
+static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	if (he->branch_info) {
+		struct addr_map_symbol *from = &he->branch_info->from;
+
+		return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
+						 from->al_level, bf, size, width);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	if (he->branch_info) {
+		struct addr_map_symbol *to = &he->branch_info->to;
+
+		return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
+						 to->al_level, bf, size, width);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->from.ms.sym &&
+			strstr(he->branch_info->from.ms.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->to.ms.sym &&
+		        strstr(he->branch_info->to.ms.sym->name, sym));
+}
+
+struct sort_entry sort_dso_from = {
+	.se_header	= "Source Shared Object",
+	.se_cmp		= sort__dso_from_cmp,
+	.se_snprintf	= hist_entry__dso_from_snprintf,
+	.se_filter	= hist_entry__dso_from_filter,
+	.se_width_idx	= HISTC_DSO_FROM,
+};
+
+struct sort_entry sort_dso_to = {
+	.se_header	= "Target Shared Object",
+	.se_cmp		= sort__dso_to_cmp,
+	.se_snprintf	= hist_entry__dso_to_snprintf,
+	.se_filter	= hist_entry__dso_to_filter,
+	.se_width_idx	= HISTC_DSO_TO,
+};
+
+struct sort_entry sort_sym_from = {
+	.se_header	= "Source Symbol",
+	.se_cmp		= sort__sym_from_cmp,
+	.se_snprintf	= hist_entry__sym_from_snprintf,
+	.se_filter	= hist_entry__sym_from_filter,
+	.se_width_idx	= HISTC_SYMBOL_FROM,
+};
+
+struct sort_entry sort_sym_to = {
+	.se_header	= "Target Symbol",
+	.se_cmp		= sort__sym_to_cmp,
+	.se_snprintf	= hist_entry__sym_to_snprintf,
+	.se_filter	= hist_entry__sym_to_filter,
+	.se_width_idx	= HISTC_SYMBOL_TO,
+};
+
+static int _hist_entry__addr_snprintf(struct map_symbol *ms,
+				     u64 ip, char level, char *bf, size_t size,
+				     unsigned int width)
+{
+	struct symbol *sym = ms->sym;
+	struct map *map = ms->map;
+	size_t ret = 0, offs;
+
+	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+	if (sym && map) {
+		if (sym->type == STT_OBJECT) {
+			ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+			ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+					ip - map->unmap_ip(map, sym->start));
+		} else {
+			ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+					       width - ret,
+					       sym->name);
+			offs = ip - sym->start;
+			if (offs)
+				ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", offs);
+		}
+	} else {
+		size_t len = BITS_PER_LONG / 4;
+		ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+				       len, ip);
+	}
+
+	return ret;
+}
+
+static int hist_entry__addr_from_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	if (he->branch_info) {
+		struct addr_map_symbol *from = &he->branch_info->from;
+
+		return _hist_entry__addr_snprintf(&from->ms, from->al_addr,
+						 he->level, bf, size, width);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__addr_to_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	if (he->branch_info) {
+		struct addr_map_symbol *to = &he->branch_info->to;
+
+		return _hist_entry__addr_snprintf(&to->ms, to->al_addr,
+						 he->level, bf, size, width);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int64_t
+sort__addr_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct addr_map_symbol *from_l;
+	struct addr_map_symbol *from_r;
+	int64_t ret;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	from_l = &left->branch_info->from;
+	from_r = &right->branch_info->from;
+
+	/*
+	 * comparing symbol address alone is not enough since it's a
+	 * relative address within a dso.
+	 */
+	ret = _sort__dso_cmp(from_l->ms.map, from_r->ms.map);
+	if (ret != 0)
+		return ret;
+
+	return _sort__addr_cmp(from_l->addr, from_r->addr);
+}
+
+static int64_t
+sort__addr_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct addr_map_symbol *to_l;
+	struct addr_map_symbol *to_r;
+	int64_t ret;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	to_l = &left->branch_info->to;
+	to_r = &right->branch_info->to;
+
+	/*
+	 * comparing symbol address alone is not enough since it's a
+	 * relative address within a dso.
+	 */
+	ret = _sort__dso_cmp(to_l->ms.map, to_r->ms.map);
+	if (ret != 0)
+		return ret;
+
+	return _sort__addr_cmp(to_l->addr, to_r->addr);
+}
+
+struct sort_entry sort_addr_from = {
+	.se_header	= "Source Address",
+	.se_cmp		= sort__addr_from_cmp,
+	.se_snprintf	= hist_entry__addr_from_snprintf,
+	.se_filter	= hist_entry__sym_from_filter, /* shared with sym_from */
+	.se_width_idx	= HISTC_ADDR_FROM,
+};
+
+struct sort_entry sort_addr_to = {
+	.se_header	= "Target Address",
+	.se_cmp		= sort__addr_to_cmp,
+	.se_snprintf	= hist_entry__addr_to_snprintf,
+	.se_filter	= hist_entry__sym_to_filter, /* shared with sym_to */
+	.se_width_idx	= HISTC_ADDR_TO,
+};
+
+
+static int64_t
+sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	unsigned char mp, p;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	mp = left->branch_info->flags.mispred != right->branch_info->flags.mispred;
+	p  = left->branch_info->flags.predicted != right->branch_info->flags.predicted;
+	return mp || p;
+}
+
+static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width){
+	static const char *out = "N/A";
+
+	if (he->branch_info) {
+		if (he->branch_info->flags.predicted)
+			out = "N";
+		else if (he->branch_info->flags.mispred)
+			out = "Y";
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
+}
+
+static int64_t
+sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return left->branch_info->flags.cycles -
+		right->branch_info->flags.cycles;
+}
+
+static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	if (!he->branch_info)
+		return scnprintf(bf, size, "%-.*s", width, "N/A");
+	if (he->branch_info->flags.cycles == 0)
+		return repsep_snprintf(bf, size, "%-*s", width, "-");
+	return repsep_snprintf(bf, size, "%-*hd", width,
+			       he->branch_info->flags.cycles);
+}
+
+struct sort_entry sort_cycles = {
+	.se_header	= "Basic Block Cycles",
+	.se_cmp		= sort__cycles_cmp,
+	.se_snprintf	= hist_entry__cycles_snprintf,
+	.se_width_idx	= HISTC_CYCLES,
+};
+
+/* --sort daddr_sym */
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.addr;
+	if (right->mem_info)
+		r = right->mem_info->daddr.addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map_symbol *ms = NULL;
+
+	if (he->mem_info) {
+		addr = he->mem_info->daddr.addr;
+		ms = &he->mem_info->daddr.ms;
+	}
+	return _hist_entry__sym_snprintf(ms, addr, he->level, bf, size, width);
+}
+
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->iaddr.addr;
+	if (right->mem_info)
+		r = right->mem_info->iaddr.addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map_symbol *ms = NULL;
+
+	if (he->mem_info) {
+		addr = he->mem_info->iaddr.addr;
+		ms   = &he->mem_info->iaddr.ms;
+	}
+	return _hist_entry__sym_snprintf(ms, addr, he->level, bf, size, width);
+}
+
+static int64_t
+sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *map_l = NULL;
+	struct map *map_r = NULL;
+
+	if (left->mem_info)
+		map_l = left->mem_info->daddr.ms.map;
+	if (right->mem_info)
+		map_r = right->mem_info->daddr.ms.map;
+
+	return _sort__dso_cmp(map_l, map_r);
+}
+
+static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	struct map *map = NULL;
+
+	if (he->mem_info)
+		map = he->mem_info->daddr.ms.map;
+
+	return _hist_entry__dso_snprintf(map, bf, size, width);
+}
+
+static int64_t
+sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_lock = PERF_MEM_LOCK_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_lock = PERF_MEM_LOCK_NA;
+
+	return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
+}
+
+static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[10];
+
+	perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+static int64_t
+sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
+
+	return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
+}
+
+static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[64];
+
+	perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_lvl = PERF_MEM_LVL_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_lvl = PERF_MEM_LVL_NA;
+
+	return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
+}
+
+static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[64];
+
+	perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
+
+	return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
+}
+
+static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[64];
+
+	perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 l, r;
+	struct map *l_map, *r_map;
+	int rc;
+
+	if (!left->mem_info)  return -1;
+	if (!right->mem_info) return 1;
+
+	/* group event types together */
+	if (left->cpumode > right->cpumode) return -1;
+	if (left->cpumode < right->cpumode) return 1;
+
+	l_map = left->mem_info->daddr.ms.map;
+	r_map = right->mem_info->daddr.ms.map;
+
+	/* if both are NULL, jump to sort on al_addr instead */
+	if (!l_map && !r_map)
+		goto addr;
+
+	if (!l_map) return -1;
+	if (!r_map) return 1;
+
+	rc = dso__cmp_id(l_map->dso, r_map->dso);
+	if (rc)
+		return rc;
+	/*
+	 * Addresses with no major/minor numbers are assumed to be
+	 * anonymous in userspace.  Sort those on pid then address.
+	 *
+	 * The kernel and non-zero major/minor mapped areas are
+	 * assumed to be unity mapped.  Sort those on address.
+	 */
+
+	if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
+	    (!(l_map->flags & MAP_SHARED)) &&
+	    !l_map->dso->id.maj && !l_map->dso->id.min &&
+	    !l_map->dso->id.ino && !l_map->dso->id.ino_generation) {
+		/* userspace anonymous */
+
+		if (left->thread->pid_ > right->thread->pid_) return -1;
+		if (left->thread->pid_ < right->thread->pid_) return 1;
+	}
+
+addr:
+	/* al_addr does all the right addr - start + offset calculations */
+	l = cl_address(left->mem_info->daddr.al_addr);
+	r = cl_address(right->mem_info->daddr.al_addr);
+
+	if (l > r) return -1;
+	if (l < r) return 1;
+
+	return 0;
+}
+
+static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
+					  size_t size, unsigned int width)
+{
+
+	uint64_t addr = 0;
+	struct map_symbol *ms = NULL;
+	char level = he->level;
+
+	if (he->mem_info) {
+		struct map *map = he->mem_info->daddr.ms.map;
+
+		addr = cl_address(he->mem_info->daddr.al_addr);
+		ms = &he->mem_info->daddr.ms;
+
+		/* print [s] for shared data mmaps */
+		if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
+		     map && !(map->prot & PROT_EXEC) &&
+		    (map->flags & MAP_SHARED) &&
+		    (map->dso->id.maj || map->dso->id.min ||
+		     map->dso->id.ino || map->dso->id.ino_generation))
+			level = 's';
+		else if (!map)
+			level = 'X';
+	}
+	return _hist_entry__sym_snprintf(ms, addr, level, bf, size, width);
+}
+
+struct sort_entry sort_mispredict = {
+	.se_header	= "Branch Mispredicted",
+	.se_cmp		= sort__mispredict_cmp,
+	.se_snprintf	= hist_entry__mispredict_snprintf,
+	.se_width_idx	= HISTC_MISPREDICT,
+};
+
+static int64_t
+sort__weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->weight - right->weight;
+}
+
+static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*llu", width, he->weight);
+}
+
+struct sort_entry sort_local_weight = {
+	.se_header	= "Local Weight",
+	.se_cmp		= sort__weight_cmp,
+	.se_snprintf	= hist_entry__local_weight_snprintf,
+	.se_width_idx	= HISTC_LOCAL_WEIGHT,
+};
+
+static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
+					      size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*llu", width,
+			       he->weight * he->stat.nr_events);
+}
+
+struct sort_entry sort_global_weight = {
+	.se_header	= "Weight",
+	.se_cmp		= sort__weight_cmp,
+	.se_snprintf	= hist_entry__global_weight_snprintf,
+	.se_width_idx	= HISTC_GLOBAL_WEIGHT,
+};
+
+static int64_t
+sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->ins_lat - right->ins_lat;
+}
+
+static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf,
+					      size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat);
+}
+
+struct sort_entry sort_local_ins_lat = {
+	.se_header	= "Local INSTR Latency",
+	.se_cmp		= sort__ins_lat_cmp,
+	.se_snprintf	= hist_entry__local_ins_lat_snprintf,
+	.se_width_idx	= HISTC_LOCAL_INS_LAT,
+};
+
+static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf,
+					       size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*u", width,
+			       he->ins_lat * he->stat.nr_events);
+}
+
+struct sort_entry sort_global_ins_lat = {
+	.se_header	= "INSTR Latency",
+	.se_cmp		= sort__ins_lat_cmp,
+	.se_snprintf	= hist_entry__global_ins_lat_snprintf,
+	.se_width_idx	= HISTC_GLOBAL_INS_LAT,
+};
+
+static int64_t
+sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->p_stage_cyc - right->p_stage_cyc;
+}
+
+static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*u", width,
+			he->p_stage_cyc * he->stat.nr_events);
+}
+
+
+static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
+}
+
+struct sort_entry sort_local_p_stage_cyc = {
+	.se_header      = "Local Pipeline Stage Cycle",
+	.se_cmp         = sort__p_stage_cyc_cmp,
+	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
+	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_global_p_stage_cyc = {
+	.se_header      = "Pipeline Stage Cycle",
+	.se_cmp         = sort__p_stage_cyc_cmp,
+	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
+	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_mem_daddr_sym = {
+	.se_header	= "Data Symbol",
+	.se_cmp		= sort__daddr_cmp,
+	.se_snprintf	= hist_entry__daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_DADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_iaddr_sym = {
+	.se_header	= "Code Symbol",
+	.se_cmp		= sort__iaddr_cmp,
+	.se_snprintf	= hist_entry__iaddr_snprintf,
+	.se_width_idx	= HISTC_MEM_IADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_daddr_dso = {
+	.se_header	= "Data Object",
+	.se_cmp		= sort__dso_daddr_cmp,
+	.se_snprintf	= hist_entry__dso_daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_DADDR_DSO,
+};
+
+struct sort_entry sort_mem_locked = {
+	.se_header	= "Locked",
+	.se_cmp		= sort__locked_cmp,
+	.se_snprintf	= hist_entry__locked_snprintf,
+	.se_width_idx	= HISTC_MEM_LOCKED,
+};
+
+struct sort_entry sort_mem_tlb = {
+	.se_header	= "TLB access",
+	.se_cmp		= sort__tlb_cmp,
+	.se_snprintf	= hist_entry__tlb_snprintf,
+	.se_width_idx	= HISTC_MEM_TLB,
+};
+
+struct sort_entry sort_mem_lvl = {
+	.se_header	= "Memory access",
+	.se_cmp		= sort__lvl_cmp,
+	.se_snprintf	= hist_entry__lvl_snprintf,
+	.se_width_idx	= HISTC_MEM_LVL,
+};
+
+struct sort_entry sort_mem_snoop = {
+	.se_header	= "Snoop",
+	.se_cmp		= sort__snoop_cmp,
+	.se_snprintf	= hist_entry__snoop_snprintf,
+	.se_width_idx	= HISTC_MEM_SNOOP,
+};
+
+struct sort_entry sort_mem_dcacheline = {
+	.se_header	= "Data Cacheline",
+	.se_cmp		= sort__dcacheline_cmp,
+	.se_snprintf	= hist_entry__dcacheline_snprintf,
+	.se_width_idx	= HISTC_MEM_DCACHELINE,
+};
+
+static int64_t
+sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_blk = PERF_MEM_BLK_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_blk = PERF_MEM_BLK_NA;
+
+	return (int64_t)(data_src_r.mem_blk - data_src_l.mem_blk);
+}
+
+static int hist_entry__blocked_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	char out[16];
+
+	perf_mem__blk_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+struct sort_entry sort_mem_blocked = {
+	.se_header	= "Blocked",
+	.se_cmp		= sort__blocked_cmp,
+	.se_snprintf	= hist_entry__blocked_snprintf,
+	.se_width_idx	= HISTC_MEM_BLOCKED,
+};
+
+static int64_t
+sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.phys_addr;
+	if (right->mem_info)
+		r = right->mem_info->daddr.phys_addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
+					   size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	size_t ret = 0;
+	size_t len = BITS_PER_LONG / 4;
+
+	addr = he->mem_info->daddr.phys_addr;
+
+	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
+
+	if (ret > width)
+		bf[width] = '\0';
+
+	return width;
+}
+
+struct sort_entry sort_mem_phys_daddr = {
+	.se_header	= "Data Physical Address",
+	.se_cmp		= sort__phys_daddr_cmp,
+	.se_snprintf	= hist_entry__phys_daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_PHYS_DADDR,
+};
+
+static int64_t
+sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.data_page_size;
+	if (right->mem_info)
+		r = right->mem_info->daddr.data_page_size;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf,
+					  size_t size, unsigned int width)
+{
+	char str[PAGE_SIZE_NAME_LEN];
+
+	return repsep_snprintf(bf, size, "%-*s", width,
+			       get_page_size_name(he->mem_info->daddr.data_page_size, str));
+}
+
+struct sort_entry sort_mem_data_page_size = {
+	.se_header	= "Data Page Size",
+	.se_cmp		= sort__data_page_size_cmp,
+	.se_snprintf	= hist_entry__data_page_size_snprintf,
+	.se_width_idx	= HISTC_MEM_DATA_PAGE_SIZE,
+};
+
+static int64_t
+sort__code_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = left->code_page_size;
+	uint64_t r = right->code_page_size;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__code_page_size_snprintf(struct hist_entry *he, char *bf,
+					  size_t size, unsigned int width)
+{
+	char str[PAGE_SIZE_NAME_LEN];
+
+	return repsep_snprintf(bf, size, "%-*s", width,
+			       get_page_size_name(he->code_page_size, str));
+}
+
+struct sort_entry sort_code_page_size = {
+	.se_header	= "Code Page Size",
+	.se_cmp		= sort__code_page_size_cmp,
+	.se_snprintf	= hist_entry__code_page_size_snprintf,
+	.se_width_idx	= HISTC_CODE_PAGE_SIZE,
+};
+
+static int64_t
+sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return left->branch_info->flags.abort !=
+		right->branch_info->flags.abort;
+}
+
+static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = "N/A";
+
+	if (he->branch_info) {
+		if (he->branch_info->flags.abort)
+			out = "A";
+		else
+			out = ".";
+	}
+
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_abort = {
+	.se_header	= "Transaction abort",
+	.se_cmp		= sort__abort_cmp,
+	.se_snprintf	= hist_entry__abort_snprintf,
+	.se_width_idx	= HISTC_ABORT,
+};
+
+static int64_t
+sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return left->branch_info->flags.in_tx !=
+		right->branch_info->flags.in_tx;
+}
+
+static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = "N/A";
+
+	if (he->branch_info) {
+		if (he->branch_info->flags.in_tx)
+			out = "T";
+		else
+			out = ".";
+	}
+
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_in_tx = {
+	.se_header	= "Branch in transaction",
+	.se_cmp		= sort__in_tx_cmp,
+	.se_snprintf	= hist_entry__in_tx_snprintf,
+	.se_width_idx	= HISTC_IN_TX,
+};
+
+static int64_t
+sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->transaction - right->transaction;
+}
+
+static inline char *add_str(char *p, const char *str)
+{
+	strcpy(p, str);
+	return p + strlen(str);
+}
+
+static struct txbit {
+	unsigned flag;
+	const char *name;
+	int skip_for_len;
+} txbits[] = {
+	{ PERF_TXN_ELISION,        "EL ",        0 },
+	{ PERF_TXN_TRANSACTION,    "TX ",        1 },
+	{ PERF_TXN_SYNC,           "SYNC ",      1 },
+	{ PERF_TXN_ASYNC,          "ASYNC ",     0 },
+	{ PERF_TXN_RETRY,          "RETRY ",     0 },
+	{ PERF_TXN_CONFLICT,       "CON ",       0 },
+	{ PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
+	{ PERF_TXN_CAPACITY_READ,  "CAP-READ ",  0 },
+	{ 0, NULL, 0 }
+};
+
+int hist_entry__transaction_len(void)
+{
+	int i;
+	int len = 0;
+
+	for (i = 0; txbits[i].name; i++) {
+		if (!txbits[i].skip_for_len)
+			len += strlen(txbits[i].name);
+	}
+	len += 4; /* :XX<space> */
+	return len;
+}
+
+static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	u64 t = he->transaction;
+	char buf[128];
+	char *p = buf;
+	int i;
+
+	buf[0] = 0;
+	for (i = 0; txbits[i].name; i++)
+		if (txbits[i].flag & t)
+			p = add_str(p, txbits[i].name);
+	if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
+		p = add_str(p, "NEITHER ");
+	if (t & PERF_TXN_ABORT_MASK) {
+		sprintf(p, ":%" PRIx64,
+			(t & PERF_TXN_ABORT_MASK) >>
+			PERF_TXN_ABORT_SHIFT);
+		p += strlen(p);
+	}
+
+	return repsep_snprintf(bf, size, "%-*s", width, buf);
+}
+
+struct sort_entry sort_transaction = {
+	.se_header	= "Transaction                ",
+	.se_cmp		= sort__transaction_cmp,
+	.se_snprintf	= hist_entry__transaction_snprintf,
+	.se_width_idx	= HISTC_TRANSACTION,
+};
+
+/* --sort symbol_size */
+
+static int64_t _sort__sym_size_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+	int64_t size_l = sym_l != NULL ? symbol__size(sym_l) : 0;
+	int64_t size_r = sym_r != NULL ? symbol__size(sym_r) : 0;
+
+	return size_l < size_r ? -1 :
+		size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return _sort__sym_size_cmp(right->ms.sym, left->ms.sym);
+}
+
+static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf,
+					  size_t bf_size, unsigned int width)
+{
+	if (sym)
+		return repsep_snprintf(bf, bf_size, "%*d", width, symbol__size(sym));
+
+	return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	return _hist_entry__sym_size_snprintf(he->ms.sym, bf, size, width);
+}
+
+struct sort_entry sort_sym_size = {
+	.se_header	= "Symbol size",
+	.se_cmp		= sort__sym_size_cmp,
+	.se_snprintf	= hist_entry__sym_size_snprintf,
+	.se_width_idx	= HISTC_SYM_SIZE,
+};
+
+/* --sort dso_size */
+
+static int64_t _sort__dso_size_cmp(struct map *map_l, struct map *map_r)
+{
+	int64_t size_l = map_l != NULL ? map__size(map_l) : 0;
+	int64_t size_r = map_r != NULL ? map__size(map_r) : 0;
+
+	return size_l < size_r ? -1 :
+		size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return _sort__dso_size_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
+					  size_t bf_size, unsigned int width)
+{
+	if (map && map->dso)
+		return repsep_snprintf(bf, bf_size, "%*d", width,
+				       map__size(map));
+
+	return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	return _hist_entry__dso_size_snprintf(he->ms.map, bf, size, width);
+}
+
+struct sort_entry sort_dso_size = {
+	.se_header	= "DSO size",
+	.se_cmp		= sort__dso_size_cmp,
+	.se_snprintf	= hist_entry__dso_size_snprintf,
+	.se_width_idx	= HISTC_DSO_SIZE,
+};
+
+/* --sort dso_size */
+
+static int64_t
+sort__addr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 left_ip = left->ip;
+	u64 right_ip = right->ip;
+	struct map *left_map = left->ms.map;
+	struct map *right_map = right->ms.map;
+
+	if (left_map)
+		left_ip = left_map->unmap_ip(left_map, left_ip);
+	if (right_map)
+		right_ip = right_map->unmap_ip(right_map, right_ip);
+
+	return _sort__addr_cmp(left_ip, right_ip);
+}
+
+static int hist_entry__addr_snprintf(struct hist_entry *he, char *bf,
+				     size_t size, unsigned int width)
+{
+	u64 ip = he->ip;
+	struct map *map = he->ms.map;
+
+	if (map)
+		ip = map->unmap_ip(map, ip);
+
+	return repsep_snprintf(bf, size, "%-#*llx", width, ip);
+}
+
+struct sort_entry sort_addr = {
+	.se_header	= "Address",
+	.se_cmp		= sort__addr_cmp,
+	.se_snprintf	= hist_entry__addr_snprintf,
+	.se_width_idx	= HISTC_ADDR,
+};
+
+
+struct sort_dimension {
+	const char		*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+int __weak arch_support_sort_key(const char *sort_key __maybe_unused)
+{
+	return 0;
+}
+
+const char * __weak arch_perf_header_entry(const char *se_header)
+{
+	return se_header;
+}
+
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+{
+	sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header);
+}
+
+#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension common_sort_dimensions[] = {
+	DIM(SORT_PID, "pid", sort_thread),
+	DIM(SORT_COMM, "comm", sort_comm),
+	DIM(SORT_DSO, "dso", sort_dso),
+	DIM(SORT_SYM, "symbol", sort_sym),
+	DIM(SORT_PARENT, "parent", sort_parent),
+	DIM(SORT_CPU, "cpu", sort_cpu),
+	DIM(SORT_SOCKET, "socket", sort_socket),
+	DIM(SORT_SRCLINE, "srcline", sort_srcline),
+	DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
+	DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
+	DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
+	DIM(SORT_TRANSACTION, "transaction", sort_transaction),
+	DIM(SORT_TRACE, "trace", sort_trace),
+	DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+	DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
+	DIM(SORT_CGROUP, "cgroup", sort_cgroup),
+	DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
+	DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
+	DIM(SORT_TIME, "time", sort_time),
+	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
+	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
+	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
+	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
+	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
+	DIM(SORT_ADDR, "addr", sort_addr),
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension bstack_sort_dimensions[] = {
+	DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
+	DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
+	DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
+	DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
+	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+	DIM(SORT_IN_TX, "in_tx", sort_in_tx),
+	DIM(SORT_ABORT, "abort", sort_abort),
+	DIM(SORT_CYCLES, "cycles", sort_cycles),
+	DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
+	DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
+	DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
+	DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from),
+	DIM(SORT_ADDR_TO, "addr_to", sort_addr_to),
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension memory_sort_dimensions[] = {
+	DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
+	DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym),
+	DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
+	DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
+	DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
+	DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
+	DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
+	DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
+	DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
+	DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size),
+	DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked),
+};
+
+#undef DIM
+
+struct hpp_dimension {
+	const char		*name;
+	struct perf_hpp_fmt	*fmt;
+	int			taken;
+};
+
+#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
+
+static struct hpp_dimension hpp_sort_dimensions[] = {
+	DIM(PERF_HPP__OVERHEAD, "overhead"),
+	DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
+	DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
+	DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
+	DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
+	DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
+	DIM(PERF_HPP__SAMPLES, "sample"),
+	DIM(PERF_HPP__PERIOD, "period"),
+};
+
+#undef DIM
+
+struct hpp_sort_entry {
+	struct perf_hpp_fmt hpp;
+	struct sort_entry *se;
+};
+
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+	struct hpp_sort_entry *hse;
+
+	if (!perf_hpp__is_sort_entry(fmt))
+		return;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	hists__new_col_len(hists, hse->se->se_width_idx, strlen(fmt->name));
+}
+
+static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			      struct hists *hists, int line __maybe_unused,
+			      int *span __maybe_unused)
+{
+	struct hpp_sort_entry *hse;
+	size_t len = fmt->user_len;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+	if (!len)
+		len = hists__col_len(hists, hse->se->se_width_idx);
+
+	return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
+}
+
+static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
+			     struct perf_hpp *hpp __maybe_unused,
+			     struct hists *hists)
+{
+	struct hpp_sort_entry *hse;
+	size_t len = fmt->user_len;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+	if (!len)
+		len = hists__col_len(hists, hse->se->se_width_idx);
+
+	return len;
+}
+
+static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			     struct hist_entry *he)
+{
+	struct hpp_sort_entry *hse;
+	size_t len = fmt->user_len;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+	if (!len)
+		len = hists__col_len(he->hists, hse->se->se_width_idx);
+
+	return hse->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
+			       struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	return hse->se->se_cmp(a, b);
+}
+
+static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
+				    struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_sort_entry *hse;
+	int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp;
+	return collapse_fn(a, b);
+}
+
+static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
+				struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_sort_entry *hse;
+	int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *);
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	sort_fn = hse->se->se_sort ?: hse->se->se_cmp;
+	return sort_fn(a, b);
+}
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+	return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key)					\
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)	\
+{								\
+	struct hpp_sort_entry *hse;				\
+								\
+	if (!perf_hpp__is_sort_entry(fmt))			\
+		return false;					\
+								\
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);	\
+	return hse->se == &sort_ ## key ;			\
+}
+
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_sort_entry *hse_a;
+	struct hpp_sort_entry *hse_b;
+
+	if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+		return false;
+
+	hse_a = container_of(a, struct hpp_sort_entry, hpp);
+	hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+	return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	free(hse);
+}
+
+static struct hpp_sort_entry *
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = malloc(sizeof(*hse));
+	if (hse == NULL) {
+		pr_err("Memory allocation failed\n");
+		return NULL;
+	}
+
+	hse->se = sd->entry;
+	hse->hpp.name = sd->entry->se_header;
+	hse->hpp.header = __sort__hpp_header;
+	hse->hpp.width = __sort__hpp_width;
+	hse->hpp.entry = __sort__hpp_entry;
+	hse->hpp.color = NULL;
+
+	hse->hpp.cmp = __sort__hpp_cmp;
+	hse->hpp.collapse = __sort__hpp_collapse;
+	hse->hpp.sort = __sort__hpp_sort;
+	hse->hpp.equal = __sort__hpp_equal;
+	hse->hpp.free = hse_free;
+
+	INIT_LIST_HEAD(&hse->hpp.list);
+	INIT_LIST_HEAD(&hse->hpp.sort_list);
+	hse->hpp.elide = false;
+	hse->hpp.len = 0;
+	hse->hpp.user_len = 0;
+	hse->hpp.level = level;
+
+	return hse;
+}
+
+static void hpp_free(struct perf_hpp_fmt *fmt)
+{
+	free(fmt);
+}
+
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+						       int level)
+{
+	struct perf_hpp_fmt *fmt;
+
+	fmt = memdup(hd->fmt, sizeof(*fmt));
+	if (fmt) {
+		INIT_LIST_HEAD(&fmt->list);
+		INIT_LIST_HEAD(&fmt->sort_list);
+		fmt->free = hpp_free;
+		fmt->level = level;
+	}
+
+	return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+	int ret = -1;
+	int r;
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_filter == NULL)
+			continue;
+
+		/*
+		 * hist entry is filtered if any of sort key in the hpp list
+		 * is applied.  But it should skip non-matched filter types.
+		 */
+		r = hse->se->se_filter(he, type, arg);
+		if (r >= 0) {
+			if (ret < 0)
+				ret = 0;
+			ret |= r;
+		}
+	}
+
+	return ret;
+}
+
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+					  struct perf_hpp_list *list,
+					  int level)
+{
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
+
+	if (hse == NULL)
+		return -1;
+
+	perf_hpp_list__register_sort_field(list, &hse->hpp);
+	return 0;
+}
+
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+					    struct perf_hpp_list *list)
+{
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
+
+	if (hse == NULL)
+		return -1;
+
+	perf_hpp_list__column_register(list, &hse->hpp);
+	return 0;
+}
+
+struct hpp_dynamic_entry {
+	struct perf_hpp_fmt hpp;
+	struct evsel *evsel;
+	struct tep_format_field *field;
+	unsigned dynamic_len;
+	bool raw_trace;
+};
+
+static int hde_width(struct hpp_dynamic_entry *hde)
+{
+	if (!hde->hpp.len) {
+		int len = hde->dynamic_len;
+		int namelen = strlen(hde->field->name);
+		int fieldlen = hde->field->size;
+
+		if (namelen > len)
+			len = namelen;
+
+		if (!(hde->field->flags & TEP_FIELD_IS_STRING)) {
+			/* length for print hex numbers */
+			fieldlen = hde->field->size * 2 + 2;
+		}
+		if (fieldlen > len)
+			len = fieldlen;
+
+		hde->hpp.len = len;
+	}
+	return hde->hpp.len;
+}
+
+static void update_dynamic_len(struct hpp_dynamic_entry *hde,
+			       struct hist_entry *he)
+{
+	char *str, *pos;
+	struct tep_format_field *field = hde->field;
+	size_t namelen;
+	bool last = false;
+
+	if (hde->raw_trace)
+		return;
+
+	/* parse pretty print result and update max length */
+	if (!he->trace_output)
+		he->trace_output = get_trace_output(he);
+
+	namelen = strlen(field->name);
+	str = he->trace_output;
+
+	while (str) {
+		pos = strchr(str, ' ');
+		if (pos == NULL) {
+			last = true;
+			pos = str + strlen(str);
+		}
+
+		if (!strncmp(str, field->name, namelen)) {
+			size_t len;
+
+			str += namelen + 1;
+			len = pos - str;
+
+			if (len > hde->dynamic_len)
+				hde->dynamic_len = len;
+			break;
+		}
+
+		if (last)
+			str = NULL;
+		else
+			str = pos + 1;
+	}
+}
+
+static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			      struct hists *hists __maybe_unused,
+			      int line __maybe_unused,
+			      int *span __maybe_unused)
+{
+	struct hpp_dynamic_entry *hde;
+	size_t len = fmt->user_len;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (!len)
+		len = hde_width(hde);
+
+	return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
+}
+
+static int __sort__hde_width(struct perf_hpp_fmt *fmt,
+			     struct perf_hpp *hpp __maybe_unused,
+			     struct hists *hists __maybe_unused)
+{
+	struct hpp_dynamic_entry *hde;
+	size_t len = fmt->user_len;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (!len)
+		len = hde_width(hde);
+
+	return len;
+}
+
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	return hists_to_evsel(hists) == hde->evsel;
+}
+
+static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			     struct hist_entry *he)
+{
+	struct hpp_dynamic_entry *hde;
+	size_t len = fmt->user_len;
+	char *str, *pos;
+	struct tep_format_field *field;
+	size_t namelen;
+	bool last = false;
+	int ret;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (!len)
+		len = hde_width(hde);
+
+	if (hde->raw_trace)
+		goto raw_field;
+
+	if (!he->trace_output)
+		he->trace_output = get_trace_output(he);
+
+	field = hde->field;
+	namelen = strlen(field->name);
+	str = he->trace_output;
+
+	while (str) {
+		pos = strchr(str, ' ');
+		if (pos == NULL) {
+			last = true;
+			pos = str + strlen(str);
+		}
+
+		if (!strncmp(str, field->name, namelen)) {
+			str += namelen + 1;
+			str = strndup(str, pos - str);
+
+			if (str == NULL)
+				return scnprintf(hpp->buf, hpp->size,
+						 "%*.*s", len, len, "ERROR");
+			break;
+		}
+
+		if (last)
+			str = NULL;
+		else
+			str = pos + 1;
+	}
+
+	if (str == NULL) {
+		struct trace_seq seq;
+raw_field:
+		trace_seq_init(&seq);
+		tep_print_field(&seq, he->raw_data, hde->field);
+		str = seq.buffer;
+	}
+
+	ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str);
+	free(str);
+	return ret;
+}
+
+static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
+			       struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_dynamic_entry *hde;
+	struct tep_format_field *field;
+	unsigned offset, size;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (b == NULL) {
+		update_dynamic_len(hde, a);
+		return 0;
+	}
+
+	field = hde->field;
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+		unsigned long long dyn;
+
+		tep_read_number_field(field, a->raw_data, &dyn);
+		offset = dyn & 0xffff;
+		size = (dyn >> 16) & 0xffff;
+		if (field->flags & TEP_FIELD_IS_RELATIVE)
+			offset += field->offset + field->size;
+
+		/* record max width for output */
+		if (size > hde->dynamic_len)
+			hde->dynamic_len = size;
+	} else {
+		offset = field->offset;
+		size = field->size;
+	}
+
+	return memcmp(a->raw_data + offset, b->raw_data + offset, size);
+}
+
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
+{
+	return fmt->cmp == __sort__hde_cmp;
+}
+
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_dynamic_entry *hde_a;
+	struct hpp_dynamic_entry *hde_b;
+
+	if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+		return false;
+
+	hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+	hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+	return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+	free(hde);
+}
+
+static struct hpp_dynamic_entry *
+__alloc_dynamic_entry(struct evsel *evsel, struct tep_format_field *field,
+		      int level)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = malloc(sizeof(*hde));
+	if (hde == NULL) {
+		pr_debug("Memory allocation failed\n");
+		return NULL;
+	}
+
+	hde->evsel = evsel;
+	hde->field = field;
+	hde->dynamic_len = 0;
+
+	hde->hpp.name = field->name;
+	hde->hpp.header = __sort__hde_header;
+	hde->hpp.width  = __sort__hde_width;
+	hde->hpp.entry  = __sort__hde_entry;
+	hde->hpp.color  = NULL;
+
+	hde->hpp.cmp = __sort__hde_cmp;
+	hde->hpp.collapse = __sort__hde_cmp;
+	hde->hpp.sort = __sort__hde_cmp;
+	hde->hpp.equal = __sort__hde_equal;
+	hde->hpp.free = hde_free;
+
+	INIT_LIST_HEAD(&hde->hpp.list);
+	INIT_LIST_HEAD(&hde->hpp.sort_list);
+	hde->hpp.elide = false;
+	hde->hpp.len = 0;
+	hde->hpp.user_len = 0;
+	hde->hpp.level = level;
+
+	return hde;
+}
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+	struct perf_hpp_fmt *new_fmt = NULL;
+
+	if (perf_hpp__is_sort_entry(fmt)) {
+		struct hpp_sort_entry *hse, *new_hse;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		new_hse = memdup(hse, sizeof(*hse));
+		if (new_hse)
+			new_fmt = &new_hse->hpp;
+	} else if (perf_hpp__is_dynamic_entry(fmt)) {
+		struct hpp_dynamic_entry *hde, *new_hde;
+
+		hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+		new_hde = memdup(hde, sizeof(*hde));
+		if (new_hde)
+			new_fmt = &new_hde->hpp;
+	} else {
+		new_fmt = memdup(fmt, sizeof(*fmt));
+	}
+
+	INIT_LIST_HEAD(&new_fmt->list);
+	INIT_LIST_HEAD(&new_fmt->sort_list);
+
+	return new_fmt;
+}
+
+static int parse_field_name(char *str, char **event, char **field, char **opt)
+{
+	char *event_name, *field_name, *opt_name;
+
+	event_name = str;
+	field_name = strchr(str, '.');
+
+	if (field_name) {
+		*field_name++ = '\0';
+	} else {
+		event_name = NULL;
+		field_name = str;
+	}
+
+	opt_name = strchr(field_name, '/');
+	if (opt_name)
+		*opt_name++ = '\0';
+
+	*event = event_name;
+	*field = field_name;
+	*opt   = opt_name;
+
+	return 0;
+}
+
+/* find match evsel using a given event name.  The event name can be:
+ *   1. '%' + event index (e.g. '%1' for first event)
+ *   2. full event name (e.g. sched:sched_switch)
+ *   3. partial event name (should not contain ':')
+ */
+static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
+{
+	struct evsel *evsel = NULL;
+	struct evsel *pos;
+	bool full_name;
+
+	/* case 1 */
+	if (event_name[0] == '%') {
+		int nr = strtol(event_name+1, NULL, 0);
+
+		if (nr > evlist->core.nr_entries)
+			return NULL;
+
+		evsel = evlist__first(evlist);
+		while (--nr > 0)
+			evsel = evsel__next(evsel);
+
+		return evsel;
+	}
+
+	full_name = !!strchr(event_name, ':');
+	evlist__for_each_entry(evlist, pos) {
+		/* case 2 */
+		if (full_name && !strcmp(pos->name, event_name))
+			return pos;
+		/* case 3 */
+		if (!full_name && strstr(pos->name, event_name)) {
+			if (evsel) {
+				pr_debug("'%s' event is ambiguous: it can be %s or %s\n",
+					 event_name, evsel->name, pos->name);
+				return NULL;
+			}
+			evsel = pos;
+		}
+	}
+
+	return evsel;
+}
+
+static int __dynamic_dimension__add(struct evsel *evsel,
+				    struct tep_format_field *field,
+				    bool raw_trace, int level)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = __alloc_dynamic_entry(evsel, field, level);
+	if (hde == NULL)
+		return -ENOMEM;
+
+	hde->raw_trace = raw_trace;
+
+	perf_hpp__register_sort_field(&hde->hpp);
+	return 0;
+}
+
+static int add_evsel_fields(struct evsel *evsel, bool raw_trace, int level)
+{
+	int ret;
+	struct tep_format_field *field;
+
+	field = evsel->tp_format->format.fields;
+	while (field) {
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+		if (ret < 0)
+			return ret;
+
+		field = field->next;
+	}
+	return 0;
+}
+
+static int add_all_dynamic_fields(struct evlist *evlist, bool raw_trace,
+				  int level)
+{
+	int ret;
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
+		ret = add_evsel_fields(evsel, raw_trace, level);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int add_all_matching_fields(struct evlist *evlist,
+				   char *field_name, bool raw_trace, int level)
+{
+	int ret = -ESRCH;
+	struct evsel *evsel;
+	struct tep_format_field *field;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
+		field = tep_find_any_field(evsel->tp_format, field_name);
+		if (field == NULL)
+			continue;
+
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+static int add_dynamic_entry(struct evlist *evlist, const char *tok,
+			     int level)
+{
+	char *str, *event_name, *field_name, *opt_name;
+	struct evsel *evsel;
+	struct tep_format_field *field;
+	bool raw_trace = symbol_conf.raw_trace;
+	int ret = 0;
+
+	if (evlist == NULL)
+		return -ENOENT;
+
+	str = strdup(tok);
+	if (str == NULL)
+		return -ENOMEM;
+
+	if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (opt_name) {
+		if (strcmp(opt_name, "raw")) {
+			pr_debug("unsupported field option %s\n", opt_name);
+			ret = -EINVAL;
+			goto out;
+		}
+		raw_trace = true;
+	}
+
+	if (!strcmp(field_name, "trace_fields")) {
+		ret = add_all_dynamic_fields(evlist, raw_trace, level);
+		goto out;
+	}
+
+	if (event_name == NULL) {
+		ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
+		goto out;
+	}
+
+	evsel = find_evsel(evlist, event_name);
+	if (evsel == NULL) {
+		pr_debug("Cannot find event: %s\n", event_name);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+		pr_debug("%s is not a tracepoint event\n", event_name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!strcmp(field_name, "*")) {
+		ret = add_evsel_fields(evsel, raw_trace, level);
+	} else {
+		field = tep_find_any_field(evsel->tp_format, field_name);
+		if (field == NULL) {
+			pr_debug("Cannot find event field for %s.%s\n",
+				 event_name, field_name);
+			return -ENOENT;
+		}
+
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+	}
+
+out:
+	free(str);
+	return ret;
+}
+
+static int __sort_dimension__add(struct sort_dimension *sd,
+				 struct perf_hpp_list *list,
+				 int level)
+{
+	if (sd->taken)
+		return 0;
+
+	if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
+		return -1;
+
+	if (sd->entry->se_collapse)
+		list->need_collapse = 1;
+
+	sd->taken = 1;
+
+	return 0;
+}
+
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+				struct perf_hpp_list *list,
+				int level)
+{
+	struct perf_hpp_fmt *fmt;
+
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, level);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__register_sort_field(list, fmt);
+	return 0;
+}
+
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+					struct sort_dimension *sd)
+{
+	if (sd->taken)
+		return 0;
+
+	if (__sort_dimension__add_hpp_output(sd, list) < 0)
+		return -1;
+
+	sd->taken = 1;
+	return 0;
+}
+
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+				       struct hpp_dimension *hd)
+{
+	struct perf_hpp_fmt *fmt;
+
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, 0);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__column_register(list, fmt);
+	return 0;
+}
+
+int hpp_dimension__add_output(unsigned col)
+{
+	BUG_ON(col >= PERF_HPP__MAX_INDEX);
+	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+}
+
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct evlist *evlist,
+			int level)
+{
+	unsigned int i, j;
+
+	/*
+	 * Check to see if there are any arch specific
+	 * sort dimensions not applicable for the current
+	 * architecture. If so, Skip that sort key since
+	 * we don't want to display it in the output fields.
+	 */
+	for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) {
+		if (!strcmp(arch_specific_sort_keys[j], tok) &&
+				!arch_support_sort_key(tok)) {
+			return 0;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+		struct sort_dimension *sd = &common_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
+			if (!strcmp(dynamic_headers[j], sd->name))
+				sort_dimension_add_dynamic_header(sd);
+		}
+
+		if (sd->entry == &sort_parent) {
+			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
+			if (ret) {
+				char err[BUFSIZ];
+
+				regerror(ret, &parent_regex, err, sizeof(err));
+				pr_err("Invalid regex: %s\n%s", parent_pattern, err);
+				return -EINVAL;
+			}
+			list->parent = 1;
+		} else if (sd->entry == &sort_sym) {
+			list->sym = 1;
+			/*
+			 * perf diff displays the performance difference amongst
+			 * two or more perf.data files. Those files could come
+			 * from different binaries. So we should not compare
+			 * their ips, but the name of symbol.
+			 */
+			if (sort__mode == SORT_MODE__DIFF)
+				sd->entry->se_collapse = sort__sym_sort;
+
+		} else if (sd->entry == &sort_dso) {
+			list->dso = 1;
+		} else if (sd->entry == &sort_socket) {
+			list->socket = 1;
+		} else if (sd->entry == &sort_thread) {
+			list->thread = 1;
+		} else if (sd->entry == &sort_comm) {
+			list->comm = 1;
+		}
+
+		return __sort_dimension__add(sd, list, level);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+		struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+		if (strncasecmp(tok, hd->name, strlen(tok)))
+			continue;
+
+		return __hpp_dimension__add(hd, list, level);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+		struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__BRANCH)
+			return -EINVAL;
+
+		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
+			list->sym = 1;
+
+		__sort_dimension__add(sd, list, level);
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+		struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__MEMORY)
+			return -EINVAL;
+
+		if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
+			return -EINVAL;
+
+		if (sd->entry == &sort_mem_daddr_sym)
+			list->sym = 1;
+
+		__sort_dimension__add(sd, list, level);
+		return 0;
+	}
+
+	if (!add_dynamic_entry(evlist, tok, level))
+		return 0;
+
+	return -ESRCH;
+}
+
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+			   struct evlist *evlist)
+{
+	char *tmp, *tok;
+	int ret = 0;
+	int level = 0;
+	int next_level = 1;
+	bool in_group = false;
+
+	do {
+		tok = str;
+		tmp = strpbrk(str, "{}, ");
+		if (tmp) {
+			if (in_group)
+				next_level = level;
+			else
+				next_level = level + 1;
+
+			if (*tmp == '{')
+				in_group = true;
+			else if (*tmp == '}')
+				in_group = false;
+
+			*tmp = '\0';
+			str = tmp + 1;
+		}
+
+		if (*tok) {
+			ret = sort_dimension__add(list, tok, evlist, level);
+			if (ret == -EINVAL) {
+				if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
+					ui__error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+				else
+					ui__error("Invalid --sort key: `%s'", tok);
+				break;
+			} else if (ret == -ESRCH) {
+				ui__error("Unknown --sort key: `%s'", tok);
+				break;
+			}
+		}
+
+		level = next_level;
+	} while (tmp);
+
+	return ret;
+}
+
+static const char *get_default_sort_order(struct evlist *evlist)
+{
+	const char *default_sort_orders[] = {
+		default_sort_order,
+		default_branch_sort_order,
+		default_mem_sort_order,
+		default_top_sort_order,
+		default_diff_sort_order,
+		default_tracepoint_sort_order,
+	};
+	bool use_trace = true;
+	struct evsel *evsel;
+
+	BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
+
+	if (evlist == NULL || evlist__empty(evlist))
+		goto out_no_evlist;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+			use_trace = false;
+			break;
+		}
+	}
+
+	if (use_trace) {
+		sort__mode = SORT_MODE__TRACEPOINT;
+		if (symbol_conf.raw_trace)
+			return "trace_fields";
+	}
+out_no_evlist:
+	return default_sort_orders[sort__mode];
+}
+
+static int setup_sort_order(struct evlist *evlist)
+{
+	char *new_sort_order;
+
+	/*
+	 * Append '+'-prefixed sort order to the default sort
+	 * order string.
+	 */
+	if (!sort_order || is_strict_order(sort_order))
+		return 0;
+
+	if (sort_order[1] == '\0') {
+		ui__error("Invalid --sort key: `+'");
+		return -EINVAL;
+	}
+
+	/*
+	 * We allocate new sort_order string, but we never free it,
+	 * because it's checked over the rest of the code.
+	 */
+	if (asprintf(&new_sort_order, "%s,%s",
+		     get_default_sort_order(evlist), sort_order + 1) < 0) {
+		pr_err("Not enough memory to set up --sort");
+		return -ENOMEM;
+	}
+
+	sort_order = new_sort_order;
+	return 0;
+}
+
+/*
+ * Adds 'pre,' prefix into 'str' is 'pre' is
+ * not already part of 'str'.
+ */
+static char *prefix_if_not_in(const char *pre, char *str)
+{
+	char *n;
+
+	if (!str || strstr(str, pre))
+		return str;
+
+	if (asprintf(&n, "%s,%s", pre, str) < 0)
+		n = NULL;
+
+	free(str);
+	return n;
+}
+
+static char *setup_overhead(char *keys)
+{
+	if (sort__mode == SORT_MODE__DIFF)
+		return keys;
+
+	keys = prefix_if_not_in("overhead", keys);
+
+	if (symbol_conf.cumulate_callchain)
+		keys = prefix_if_not_in("overhead_children", keys);
+
+	return keys;
+}
+
+static int __setup_sorting(struct evlist *evlist)
+{
+	char *str;
+	const char *sort_keys;
+	int ret = 0;
+
+	ret = setup_sort_order(evlist);
+	if (ret)
+		return ret;
+
+	sort_keys = sort_order;
+	if (sort_keys == NULL) {
+		if (is_strict_order(field_order)) {
+			/*
+			 * If user specified field order but no sort order,
+			 * we'll honor it and not add default sort orders.
+			 */
+			return 0;
+		}
+
+		sort_keys = get_default_sort_order(evlist);
+	}
+
+	str = strdup(sort_keys);
+	if (str == NULL) {
+		pr_err("Not enough memory to setup sort keys");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Prepend overhead fields for backward compatibility.
+	 */
+	if (!is_strict_order(field_order)) {
+		str = setup_overhead(str);
+		if (str == NULL) {
+			pr_err("Not enough memory to setup overhead keys");
+			return -ENOMEM;
+		}
+	}
+
+	ret = setup_sort_list(&perf_hpp_list, str, evlist);
+
+	free(str);
+	return ret;
+}
+
+void perf_hpp__set_elide(int idx, bool elide)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_width_idx == idx) {
+			fmt->elide = elide;
+			break;
+		}
+	}
+}
+
+static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
+{
+	if (list && strlist__nr_entries(list) == 1) {
+		if (fp != NULL)
+			fprintf(fp, "# %s: %s\n", list_name,
+				strlist__entry(list, 0)->s);
+		return true;
+	}
+	return false;
+}
+
+static bool get_elide(int idx, FILE *output)
+{
+	switch (idx) {
+	case HISTC_SYMBOL:
+		return __get_elide(symbol_conf.sym_list, "symbol", output);
+	case HISTC_DSO:
+		return __get_elide(symbol_conf.dso_list, "dso", output);
+	case HISTC_COMM:
+		return __get_elide(symbol_conf.comm_list, "comm", output);
+	default:
+		break;
+	}
+
+	if (sort__mode != SORT_MODE__BRANCH)
+		return false;
+
+	switch (idx) {
+	case HISTC_SYMBOL_FROM:
+		return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
+	case HISTC_SYMBOL_TO:
+		return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
+	case HISTC_DSO_FROM:
+		return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
+	case HISTC_DSO_TO:
+		return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+	case HISTC_ADDR_FROM:
+		return __get_elide(symbol_conf.sym_from_list, "addr_from", output);
+	case HISTC_ADDR_TO:
+		return __get_elide(symbol_conf.sym_to_list, "addr_to", output);
+	default:
+		break;
+	}
+
+	return false;
+}
+
+void sort__setup_elide(FILE *output)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		fmt->elide = get_elide(hse->se->se_width_idx, output);
+	}
+
+	/*
+	 * It makes no sense to elide all of sort entries.
+	 * Just revert them to show up again.
+	 */
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		if (!fmt->elide)
+			return;
+	}
+
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		fmt->elide = false;
+	}
+}
+
+int output_field_add(struct perf_hpp_list *list, char *tok)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+		struct sort_dimension *sd = &common_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		return __sort_dimension__add_output(list, sd);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+		struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+		if (strncasecmp(tok, hd->name, strlen(tok)))
+			continue;
+
+		return __hpp_dimension__add_output(list, hd);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+		struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__BRANCH)
+			return -EINVAL;
+
+		return __sort_dimension__add_output(list, sd);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+		struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__MEMORY)
+			return -EINVAL;
+
+		return __sort_dimension__add_output(list, sd);
+	}
+
+	return -ESRCH;
+}
+
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+	char *tmp, *tok;
+	int ret = 0;
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		ret = output_field_add(list, tok);
+		if (ret == -EINVAL) {
+			ui__error("Invalid --fields key: `%s'", tok);
+			break;
+		} else if (ret == -ESRCH) {
+			ui__error("Unknown --fields key: `%s'", tok);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+void reset_dimensions(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++)
+		common_sort_dimensions[i].taken = 0;
+
+	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++)
+		hpp_sort_dimensions[i].taken = 0;
+
+	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++)
+		bstack_sort_dimensions[i].taken = 0;
+
+	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++)
+		memory_sort_dimensions[i].taken = 0;
+}
+
+bool is_strict_order(const char *order)
+{
+	return order && (*order != '+');
+}
+
+static int __setup_output_field(void)
+{
+	char *str, *strp;
+	int ret = -EINVAL;
+
+	if (field_order == NULL)
+		return 0;
+
+	strp = str = strdup(field_order);
+	if (str == NULL) {
+		pr_err("Not enough memory to setup output fields");
+		return -ENOMEM;
+	}
+
+	if (!is_strict_order(field_order))
+		strp++;
+
+	if (!strlen(strp)) {
+		ui__error("Invalid --fields key: `+'");
+		goto out;
+	}
+
+	ret = setup_output_list(&perf_hpp_list, strp);
+
+out:
+	free(str);
+	return ret;
+}
+
+int setup_sorting(struct evlist *evlist)
+{
+	int err;
+
+	err = __setup_sorting(evlist);
+	if (err < 0)
+		return err;
+
+	if (parent_pattern != default_parent_pattern) {
+		err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
+		if (err < 0)
+			return err;
+	}
+
+	reset_dimensions();
+
+	/*
+	 * perf diff doesn't use default hpp output fields.
+	 */
+	if (sort__mode != SORT_MODE__DIFF)
+		perf_hpp__init();
+
+	err = __setup_output_field();
+	if (err < 0)
+		return err;
+
+	/* copy sort keys to output fields */
+	perf_hpp__setup_output_field(&perf_hpp_list);
+	/* and then copy output fields to sort keys */
+	perf_hpp__append_sort_keys(&perf_hpp_list);
+
+	/* setup hists-specific output fields */
+	if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+		return -1;
+
+	return 0;
+}
+
+void reset_output_field(void)
+{
+	perf_hpp_list.need_collapse = 0;
+	perf_hpp_list.parent = 0;
+	perf_hpp_list.sym = 0;
+	perf_hpp_list.dso = 0;
+
+	field_order = NULL;
+	sort_order = NULL;
+
+	reset_dimensions();
+	perf_hpp__reset_output_field(&perf_hpp_list);
+}
+
+#define INDENT (3*8 + 1)
+
+static void add_key(struct strbuf *sb, const char *str, int *llen)
+{
+	if (*llen >= 75) {
+		strbuf_addstr(sb, "\n\t\t\t ");
+		*llen = INDENT;
+	}
+	strbuf_addf(sb, " %s", str);
+	*llen += strlen(str) + 1;
+}
+
+static void add_sort_string(struct strbuf *sb, struct sort_dimension *s, int n,
+			    int *llen)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		add_key(sb, s[i].name, llen);
+}
+
+static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int n,
+				int *llen)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		add_key(sb, s[i].name, llen);
+}
+
+char *sort_help(const char *prefix)
+{
+	struct strbuf sb;
+	char *s;
+	int len = strlen(prefix) + INDENT;
+
+	strbuf_init(&sb, 300);
+	strbuf_addstr(&sb, prefix);
+	add_hpp_sort_string(&sb, hpp_sort_dimensions,
+			    ARRAY_SIZE(hpp_sort_dimensions), &len);
+	add_sort_string(&sb, common_sort_dimensions,
+			    ARRAY_SIZE(common_sort_dimensions), &len);
+	add_sort_string(&sb, bstack_sort_dimensions,
+			    ARRAY_SIZE(bstack_sort_dimensions), &len);
+	add_sort_string(&sb, memory_sort_dimensions,
+			    ARRAY_SIZE(memory_sort_dimensions), &len);
+	s = strbuf_detach(&sb, NULL);
+	strbuf_release(&sb);
+	return s;
+}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
new file mode 100644
index 000000000..04ff8b61a
--- /dev/null
+++ b/tools/perf/util/sort.h
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SORT_H
+#define __PERF_SORT_H
+#include <regex.h>
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "map_symbol.h"
+#include "symbol_conf.h"
+#include "callchain.h"
+#include "values.h"
+#include "hist.h"
+#include "stat.h"
+#include "spark.h"
+
+struct option;
+struct thread;
+
+extern regex_t parent_regex;
+extern const char *sort_order;
+extern const char *field_order;
+extern const char default_parent_pattern[];
+extern const char *parent_pattern;
+extern const char *default_sort_order;
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
+extern enum sort_mode sort__mode;
+extern struct sort_entry sort_comm;
+extern struct sort_entry sort_dso;
+extern struct sort_entry sort_sym;
+extern struct sort_entry sort_parent;
+extern struct sort_entry sort_dso_from;
+extern struct sort_entry sort_dso_to;
+extern struct sort_entry sort_sym_from;
+extern struct sort_entry sort_sym_to;
+extern struct sort_entry sort_srcline;
+extern const char default_mem_sort_order[];
+
+struct res_sample {
+	u64 time;
+	int cpu;
+	int tid;
+};
+
+struct he_stat {
+	u64			period;
+	u64			period_sys;
+	u64			period_us;
+	u64			period_guest_sys;
+	u64			period_guest_us;
+	u32			nr_events;
+};
+
+struct namespace_id {
+	u64			dev;
+	u64			ino;
+};
+
+struct hist_entry_diff {
+	bool	computed;
+	union {
+		/* PERF_HPP__DELTA */
+		double	period_ratio_delta;
+
+		/* PERF_HPP__RATIO */
+		double	period_ratio;
+
+		/* HISTC_WEIGHTED_DIFF */
+		s64	wdiff;
+
+		/* PERF_HPP_DIFF__CYCLES */
+		s64	cycles;
+	};
+	struct stats	stats;
+	unsigned long	svals[NUM_SPARKS];
+};
+
+struct hist_entry_ops {
+	void	*(*new)(size_t size);
+	void	(*free)(void *ptr);
+};
+
+/**
+ * struct hist_entry - histogram entry
+ *
+ * @row_offset - offset from the first callchain expanded to appear on screen
+ * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding
+ */
+struct hist_entry {
+	struct rb_node		rb_node_in;
+	struct rb_node		rb_node;
+	union {
+		struct list_head node;
+		struct list_head head;
+	} pairs;
+	struct he_stat		stat;
+	struct he_stat		*stat_acc;
+	struct map_symbol	ms;
+	struct thread		*thread;
+	struct comm		*comm;
+	struct namespace_id	cgroup_id;
+	u64			cgroup;
+	u64			ip;
+	u64			transaction;
+	s32			socket;
+	s32			cpu;
+	u64			code_page_size;
+	u64			weight;
+	u64			ins_lat;
+	u64			p_stage_cyc;
+	u8			cpumode;
+	u8			depth;
+
+	/* We are added by hists__add_dummy_entry. */
+	bool			dummy;
+	bool			leaf;
+
+	char			level;
+	u8			filtered;
+
+	u16			callchain_size;
+	union {
+		/*
+		 * Since perf diff only supports the stdio output, TUI
+		 * fields are only accessed from perf report (or perf
+		 * top).  So make it a union to reduce memory usage.
+		 */
+		struct hist_entry_diff	diff;
+		struct /* for TUI */ {
+			u16	row_offset;
+			u16	nr_rows;
+			bool	init_have_children;
+			bool	unfolded;
+			bool	has_children;
+			bool	has_no_entry;
+		};
+	};
+	char			*srcline;
+	char			*srcfile;
+	struct symbol		*parent;
+	struct branch_info	*branch_info;
+	long			time;
+	struct hists		*hists;
+	struct mem_info		*mem_info;
+	struct block_info	*block_info;
+	void			*raw_data;
+	u32			raw_size;
+	int			num_res;
+	struct res_sample	*res_samples;
+	void			*trace_output;
+	struct perf_hpp_list	*hpp_list;
+	struct hist_entry	*parent_he;
+	struct hist_entry_ops	*ops;
+	union {
+		/* this is for hierarchical entry structure */
+		struct {
+			struct rb_root_cached	hroot_in;
+			struct rb_root_cached   hroot_out;
+		};				/* non-leaf entries */
+		struct rb_root	sorted_chain;	/* leaf entry has callchains */
+	};
+	struct callchain_root	callchain[0]; /* must be last member */
+};
+
+static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
+{
+	return he->callchain_size != 0;
+}
+
+int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width);
+
+static inline bool hist_entry__has_pairs(struct hist_entry *he)
+{
+	return !list_empty(&he->pairs.node);
+}
+
+static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
+{
+	if (hist_entry__has_pairs(he))
+		return list_entry(he->pairs.node.next, struct hist_entry, pairs.node);
+	return NULL;
+}
+
+static inline void hist_entry__add_pair(struct hist_entry *pair,
+					struct hist_entry *he)
+{
+	list_add_tail(&pair->pairs.node, &he->pairs.head);
+}
+
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+	u64 period = he->stat.period;
+	u64 total_period = hists__total_period(he->hists);
+
+	if (unlikely(total_period == 0))
+		return 0;
+
+	if (symbol_conf.cumulate_callchain)
+		period = he->stat_acc->period;
+
+	return period * 100.0 / total_period;
+}
+
+enum sort_mode {
+	SORT_MODE__NORMAL,
+	SORT_MODE__BRANCH,
+	SORT_MODE__MEMORY,
+	SORT_MODE__TOP,
+	SORT_MODE__DIFF,
+	SORT_MODE__TRACEPOINT,
+};
+
+enum sort_type {
+	/* common sort keys */
+	SORT_PID,
+	SORT_COMM,
+	SORT_DSO,
+	SORT_SYM,
+	SORT_PARENT,
+	SORT_CPU,
+	SORT_SOCKET,
+	SORT_SRCLINE,
+	SORT_SRCFILE,
+	SORT_LOCAL_WEIGHT,
+	SORT_GLOBAL_WEIGHT,
+	SORT_TRANSACTION,
+	SORT_TRACE,
+	SORT_SYM_SIZE,
+	SORT_DSO_SIZE,
+	SORT_CGROUP,
+	SORT_CGROUP_ID,
+	SORT_SYM_IPC_NULL,
+	SORT_TIME,
+	SORT_CODE_PAGE_SIZE,
+	SORT_LOCAL_INS_LAT,
+	SORT_GLOBAL_INS_LAT,
+	SORT_LOCAL_PIPELINE_STAGE_CYC,
+	SORT_GLOBAL_PIPELINE_STAGE_CYC,
+	SORT_ADDR,
+
+	/* branch stack specific sort keys */
+	__SORT_BRANCH_STACK,
+	SORT_DSO_FROM = __SORT_BRANCH_STACK,
+	SORT_DSO_TO,
+	SORT_SYM_FROM,
+	SORT_SYM_TO,
+	SORT_MISPREDICT,
+	SORT_ABORT,
+	SORT_IN_TX,
+	SORT_CYCLES,
+	SORT_SRCLINE_FROM,
+	SORT_SRCLINE_TO,
+	SORT_SYM_IPC,
+	SORT_ADDR_FROM,
+	SORT_ADDR_TO,
+
+	/* memory mode specific sort keys */
+	__SORT_MEMORY_MODE,
+	SORT_MEM_DADDR_SYMBOL = __SORT_MEMORY_MODE,
+	SORT_MEM_DADDR_DSO,
+	SORT_MEM_LOCKED,
+	SORT_MEM_TLB,
+	SORT_MEM_LVL,
+	SORT_MEM_SNOOP,
+	SORT_MEM_DCACHELINE,
+	SORT_MEM_IADDR_SYMBOL,
+	SORT_MEM_PHYS_DADDR,
+	SORT_MEM_DATA_PAGE_SIZE,
+	SORT_MEM_BLOCKED,
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	const char *se_header;
+
+	int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
+	int64_t	(*se_sort)(struct hist_entry *, struct hist_entry *);
+	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
+			       unsigned int width);
+	int	(*se_filter)(struct hist_entry *he, int type, const void *arg);
+	u8	se_width_idx;
+};
+
+struct block_hist {
+	struct hists		block_hists;
+	struct perf_hpp_list	block_list;
+	struct perf_hpp_fmt	block_fmt;
+	int			block_idx;
+	bool			valid;
+	struct hist_entry	he;
+};
+
+extern struct sort_entry sort_thread;
+
+struct evlist;
+struct tep_handle;
+int setup_sorting(struct evlist *evlist);
+int setup_output_field(void);
+void reset_output_field(void);
+void sort__setup_elide(FILE *fp);
+void perf_hpp__set_elide(int idx, bool elide);
+
+char *sort_help(const char *prefix);
+
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
+bool is_strict_order(const char *order);
+
+int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct evlist *evlist,
+			int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
+char *hist_entry__srcline(struct hist_entry *he);
+#endif	/* __PERF_SORT_H */
diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c
new file mode 100644
index 000000000..70272a8b8
--- /dev/null
+++ b/tools/perf/util/spark.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include "spark.h"
+#include "stat.h"
+
+#define SPARK_SHIFT 8
+
+/* Print spark lines on outf for numval values in val. */
+int print_spark(char *bf, int size, unsigned long *val, int numval)
+{
+	static const char *ticks[NUM_SPARKS] = {
+		"▁",  "▂", "▃", "▄", "▅", "▆", "▇", "█"
+	};
+	int i, printed = 0;
+	unsigned long min = ULONG_MAX, max = 0, f;
+
+	for (i = 0; i < numval; i++) {
+		if (val[i] < min)
+			min = val[i];
+		if (val[i] > max)
+			max = val[i];
+	}
+	f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1);
+	if (f < 1)
+		f = 1;
+	for (i = 0; i < numval; i++) {
+		printed += scnprintf(bf + printed, size - printed, "%s",
+				     ticks[((val[i] - min) << SPARK_SHIFT) / f]);
+	}
+
+	return printed;
+}
diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h
new file mode 100644
index 000000000..25402d7d7
--- /dev/null
+++ b/tools/perf/util/spark.h
@@ -0,0 +1,8 @@
+#ifndef SPARK_H
+#define SPARK_H 1
+
+#define NUM_SPARKS 8
+
+int print_spark(char *bf, int size, unsigned long *val, int numval);
+
+#endif
diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
new file mode 100644
index 000000000..476e99896
--- /dev/null
+++ b/tools/perf/util/srccode.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Manage printing of source lines
+ * Copyright (c) 2017, Intel Corporation.
+ * Author: Andi Kleen
+ */
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <string.h>
+#include "srccode.h"
+#include "debug.h"
+#include <internal/lib.h> // page_size
+#include "fncache.h"
+
+#define MAXSRCCACHE (32*1024*1024)
+#define MAXSRCFILES     64
+#define SRC_HTAB_SZ	64
+
+struct srcfile {
+	struct hlist_node hash_nd;
+	struct list_head nd;
+	char *fn;
+	char **lines;
+	char *map;
+	unsigned numlines;
+	size_t maplen;
+};
+
+static struct hlist_head srcfile_htab[SRC_HTAB_SZ];
+static LIST_HEAD(srcfile_list);
+static long map_total_sz;
+static int num_srcfiles;
+
+static int countlines(char *map, int maplen)
+{
+	int numl;
+	char *end = map + maplen;
+	char *p = map;
+
+	if (maplen == 0)
+		return 0;
+	numl = 0;
+	while (p < end && (p = memchr(p, '\n', end - p)) != NULL) {
+		numl++;
+		p++;
+	}
+	if (p < end)
+		numl++;
+	return numl;
+}
+
+static void fill_lines(char **lines, int maxline, char *map, int maplen)
+{
+	int l;
+	char *end = map + maplen;
+	char *p = map;
+
+	if (maplen == 0 || maxline == 0)
+		return;
+	l = 0;
+	lines[l++] = map;
+	while (p < end && (p = memchr(p, '\n', end - p)) != NULL) {
+		if (l >= maxline)
+			return;
+		lines[l++] = ++p;
+	}
+	if (p < end)
+		lines[l] = p;
+}
+
+static void free_srcfile(struct srcfile *sf)
+{
+	list_del_init(&sf->nd);
+	hlist_del(&sf->hash_nd);
+	map_total_sz -= sf->maplen;
+	munmap(sf->map, sf->maplen);
+	zfree(&sf->lines);
+	zfree(&sf->fn);
+	free(sf);
+	num_srcfiles--;
+}
+
+static struct srcfile *find_srcfile(char *fn)
+{
+	struct stat st;
+	struct srcfile *h;
+	int fd;
+	unsigned long sz;
+	unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ;
+
+	hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) {
+		if (!strcmp(fn, h->fn)) {
+			/* Move to front */
+			list_move(&h->nd, &srcfile_list);
+			return h;
+		}
+	}
+
+	/* Only prune if there is more than one entry */
+	while ((num_srcfiles > MAXSRCFILES || map_total_sz > MAXSRCCACHE) &&
+	       srcfile_list.next != &srcfile_list) {
+		assert(!list_empty(&srcfile_list));
+		h = list_entry(srcfile_list.prev, struct srcfile, nd);
+		free_srcfile(h);
+	}
+
+	fd = open(fn, O_RDONLY);
+	if (fd < 0 || fstat(fd, &st) < 0) {
+		pr_debug("cannot open source file %s\n", fn);
+		return NULL;
+	}
+
+	h = malloc(sizeof(struct srcfile));
+	if (!h)
+		return NULL;
+
+	h->fn = strdup(fn);
+	if (!h->fn)
+		goto out_h;
+
+	h->maplen = st.st_size;
+	sz = (h->maplen + page_size - 1) & ~(page_size - 1);
+	h->map = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
+	close(fd);
+	if (h->map == (char *)-1) {
+		pr_debug("cannot mmap source file %s\n", fn);
+		goto out_fn;
+	}
+	h->numlines = countlines(h->map, h->maplen);
+	h->lines = calloc(h->numlines, sizeof(char *));
+	if (!h->lines)
+		goto out_map;
+	fill_lines(h->lines, h->numlines, h->map, h->maplen);
+	list_add(&h->nd, &srcfile_list);
+	hlist_add_head(&h->hash_nd, &srcfile_htab[hval]);
+	map_total_sz += h->maplen;
+	num_srcfiles++;
+	return h;
+
+out_map:
+	munmap(h->map, sz);
+out_fn:
+	zfree(&h->fn);
+out_h:
+	free(h);
+	return NULL;
+}
+
+/* Result is not 0 terminated */
+char *find_sourceline(char *fn, unsigned line, int *lenp)
+{
+	char *l, *p;
+	struct srcfile *sf = find_srcfile(fn);
+	if (!sf)
+		return NULL;
+	line--;
+	if (line >= sf->numlines)
+		return NULL;
+	l = sf->lines[line];
+	if (!l)
+		return NULL;
+	p = memchr(l, '\n', sf->map + sf->maplen - l);
+	*lenp = p - l;
+	return l;
+}
diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h
new file mode 100644
index 000000000..1b5ed7697
--- /dev/null
+++ b/tools/perf/util/srccode.h
@@ -0,0 +1,20 @@
+#ifndef SRCCODE_H
+#define SRCCODE_H 1
+
+struct srccode_state {
+	char	 *srcfile;
+	unsigned line;
+};
+
+static inline void srccode_state_init(struct srccode_state *state)
+{
+	state->srcfile = NULL;
+	state->line    = 0;
+}
+
+void srccode_state_free(struct srccode_state *state);
+
+/* Result is not 0 terminated */
+char *find_sourceline(char *fn, unsigned line, int *lenp);
+
+#endif
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
new file mode 100644
index 000000000..af468e3bb
--- /dev/null
+++ b/tools/perf/util/srcline.c
@@ -0,0 +1,920 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include "util/dso.h"
+#include "util/debug.h"
+#include "util/callchain.h"
+#include "util/symbol_conf.h"
+#include "srcline.h"
+#include "string2.h"
+#include "symbol.h"
+#include "subcmd/run-command.h"
+
+bool srcline_full_filename;
+
+static const char *dso__name(struct dso *dso)
+{
+	const char *dso_name;
+
+	if (dso->symsrc_filename)
+		dso_name = dso->symsrc_filename;
+	else
+		dso_name = dso->long_name;
+
+	if (dso_name[0] == '[')
+		return NULL;
+
+	if (!strncmp(dso_name, "/tmp/perf-", 10))
+		return NULL;
+
+	return dso_name;
+}
+
+static int inline_list__append(struct symbol *symbol, char *srcline,
+			       struct inline_node *node)
+{
+	struct inline_list *ilist;
+
+	ilist = zalloc(sizeof(*ilist));
+	if (ilist == NULL)
+		return -1;
+
+	ilist->symbol = symbol;
+	ilist->srcline = srcline;
+
+	if (callchain_param.order == ORDER_CALLEE)
+		list_add_tail(&ilist->list, &node->val);
+	else
+		list_add(&ilist->list, &node->val);
+
+	return 0;
+}
+
+/* basename version that takes a const input string */
+static const char *gnu_basename(const char *path)
+{
+	const char *base = strrchr(path, '/');
+
+	return base ? base + 1 : path;
+}
+
+static char *srcline_from_fileline(const char *file, unsigned int line)
+{
+	char *srcline;
+
+	if (!file)
+		return NULL;
+
+	if (!srcline_full_filename)
+		file = gnu_basename(file);
+
+	if (asprintf(&srcline, "%s:%u", file, line) < 0)
+		return NULL;
+
+	return srcline;
+}
+
+static struct symbol *new_inline_sym(struct dso *dso,
+				     struct symbol *base_sym,
+				     const char *funcname)
+{
+	struct symbol *inline_sym;
+	char *demangled = NULL;
+
+	if (!funcname)
+		funcname = "??";
+
+	if (dso) {
+		demangled = dso__demangle_sym(dso, 0, funcname);
+		if (demangled)
+			funcname = demangled;
+	}
+
+	if (base_sym && strcmp(funcname, base_sym->name) == 0) {
+		/* reuse the real, existing symbol */
+		inline_sym = base_sym;
+		/* ensure that we don't alias an inlined symbol, which could
+		 * lead to double frees in inline_node__delete
+		 */
+		assert(!base_sym->inlined);
+	} else {
+		/* create a fake symbol for the inline frame */
+		inline_sym = symbol__new(base_sym ? base_sym->start : 0,
+					 base_sym ? (base_sym->end - base_sym->start) : 0,
+					 base_sym ? base_sym->binding : 0,
+					 base_sym ? base_sym->type : 0,
+					 funcname);
+		if (inline_sym)
+			inline_sym->inlined = 1;
+	}
+
+	free(demangled);
+
+	return inline_sym;
+}
+
+#define MAX_INLINE_NEST 1024
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+/*
+ * Implement addr2line using libbfd.
+ */
+#define PACKAGE "perf"
+#include <bfd.h>
+
+struct a2l_data {
+	const char 	*input;
+	u64	 	addr;
+
+	bool 		found;
+	const char 	*filename;
+	const char 	*funcname;
+	unsigned 	line;
+
+	bfd 		*abfd;
+	asymbol 	**syms;
+};
+
+static int bfd_error(const char *string)
+{
+	const char *errmsg;
+
+	errmsg = bfd_errmsg(bfd_get_error());
+	fflush(stdout);
+
+	if (string)
+		pr_debug("%s: %s\n", string, errmsg);
+	else
+		pr_debug("%s\n", errmsg);
+
+	return -1;
+}
+
+static int slurp_symtab(bfd *abfd, struct a2l_data *a2l)
+{
+	long storage;
+	long symcount;
+	asymbol **syms;
+	bfd_boolean dynamic = FALSE;
+
+	if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0)
+		return bfd_error(bfd_get_filename(abfd));
+
+	storage = bfd_get_symtab_upper_bound(abfd);
+	if (storage == 0L) {
+		storage = bfd_get_dynamic_symtab_upper_bound(abfd);
+		dynamic = TRUE;
+	}
+	if (storage < 0L)
+		return bfd_error(bfd_get_filename(abfd));
+
+	syms = malloc(storage);
+	if (dynamic)
+		symcount = bfd_canonicalize_dynamic_symtab(abfd, syms);
+	else
+		symcount = bfd_canonicalize_symtab(abfd, syms);
+
+	if (symcount < 0) {
+		free(syms);
+		return bfd_error(bfd_get_filename(abfd));
+	}
+
+	a2l->syms = syms;
+	return 0;
+}
+
+static void find_address_in_section(bfd *abfd, asection *section, void *data)
+{
+	bfd_vma pc, vma;
+	bfd_size_type size;
+	struct a2l_data *a2l = data;
+	flagword flags;
+
+	if (a2l->found)
+		return;
+
+#ifdef bfd_get_section_flags
+	flags = bfd_get_section_flags(abfd, section);
+#else
+	flags = bfd_section_flags(section);
+#endif
+	if ((flags & SEC_ALLOC) == 0)
+		return;
+
+	pc = a2l->addr;
+#ifdef bfd_get_section_vma
+	vma = bfd_get_section_vma(abfd, section);
+#else
+	vma = bfd_section_vma(section);
+#endif
+#ifdef bfd_get_section_size
+	size = bfd_get_section_size(section);
+#else
+	size = bfd_section_size(section);
+#endif
+
+	if (pc < vma || pc >= vma + size)
+		return;
+
+	a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma,
+					   &a2l->filename, &a2l->funcname,
+					   &a2l->line);
+
+	if (a2l->filename && !strlen(a2l->filename))
+		a2l->filename = NULL;
+}
+
+static struct a2l_data *addr2line_init(const char *path)
+{
+	bfd *abfd;
+	struct a2l_data *a2l = NULL;
+
+	abfd = bfd_openr(path, NULL);
+	if (abfd == NULL)
+		return NULL;
+
+	if (!bfd_check_format(abfd, bfd_object))
+		goto out;
+
+	a2l = zalloc(sizeof(*a2l));
+	if (a2l == NULL)
+		goto out;
+
+	a2l->abfd = abfd;
+	a2l->input = strdup(path);
+	if (a2l->input == NULL)
+		goto out;
+
+	if (slurp_symtab(abfd, a2l))
+		goto out;
+
+	return a2l;
+
+out:
+	if (a2l) {
+		zfree((char **)&a2l->input);
+		free(a2l);
+	}
+	bfd_close(abfd);
+	return NULL;
+}
+
+static void addr2line_cleanup(struct a2l_data *a2l)
+{
+	if (a2l->abfd)
+		bfd_close(a2l->abfd);
+	zfree((char **)&a2l->input);
+	zfree(&a2l->syms);
+	free(a2l);
+}
+
+static int inline_list__append_dso_a2l(struct dso *dso,
+				       struct inline_node *node,
+				       struct symbol *sym)
+{
+	struct a2l_data *a2l = dso->a2l;
+	struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
+	char *srcline = NULL;
+
+	if (a2l->filename)
+		srcline = srcline_from_fileline(a2l->filename, a2l->line);
+
+	return inline_list__append(inline_sym, srcline, node);
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+		     char **file, unsigned int *line, struct dso *dso,
+		     bool unwind_inlines, struct inline_node *node,
+		     struct symbol *sym)
+{
+	int ret = 0;
+	struct a2l_data *a2l = dso->a2l;
+
+	if (!a2l) {
+		dso->a2l = addr2line_init(dso_name);
+		a2l = dso->a2l;
+	}
+
+	if (a2l == NULL) {
+		if (!symbol_conf.disable_add2line_warn)
+			pr_warning("addr2line_init failed for %s\n", dso_name);
+		return 0;
+	}
+
+	a2l->addr = addr;
+	a2l->found = false;
+
+	bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
+
+	if (!a2l->found)
+		return 0;
+
+	if (unwind_inlines) {
+		int cnt = 0;
+
+		if (node && inline_list__append_dso_a2l(dso, node, sym))
+			return 0;
+
+		while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
+					     &a2l->funcname, &a2l->line) &&
+		       cnt++ < MAX_INLINE_NEST) {
+
+			if (a2l->filename && !strlen(a2l->filename))
+				a2l->filename = NULL;
+
+			if (node != NULL) {
+				if (inline_list__append_dso_a2l(dso, node, sym))
+					return 0;
+				// found at least one inline frame
+				ret = 1;
+			}
+		}
+	}
+
+	if (file) {
+		*file = a2l->filename ? strdup(a2l->filename) : NULL;
+		ret = *file ? 1 : 0;
+	}
+
+	if (line)
+		*line = a2l->line;
+
+	return ret;
+}
+
+void dso__free_a2l(struct dso *dso)
+{
+	struct a2l_data *a2l = dso->a2l;
+
+	if (!a2l)
+		return;
+
+	addr2line_cleanup(a2l);
+
+	dso->a2l = NULL;
+}
+
+#else /* HAVE_LIBBFD_SUPPORT */
+
+struct a2l_subprocess {
+	struct child_process addr2line;
+	FILE *to_child;
+	FILE *from_child;
+};
+
+static int filename_split(char *filename, unsigned int *line_nr)
+{
+	char *sep;
+
+	sep = strchr(filename, '\n');
+	if (sep)
+		*sep = '\0';
+
+	if (!strcmp(filename, "??:0"))
+		return 0;
+
+	sep = strchr(filename, ':');
+	if (sep) {
+		*sep++ = '\0';
+		*line_nr = strtoul(sep, NULL, 0);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void addr2line_subprocess_cleanup(struct a2l_subprocess *a2l)
+{
+	if (a2l->addr2line.pid != -1) {
+		kill(a2l->addr2line.pid, SIGKILL);
+		finish_command(&a2l->addr2line); /* ignore result, we don't care */
+		a2l->addr2line.pid = -1;
+	}
+
+	if (a2l->to_child != NULL) {
+		fclose(a2l->to_child);
+		a2l->to_child = NULL;
+	}
+
+	if (a2l->from_child != NULL) {
+		fclose(a2l->from_child);
+		a2l->from_child = NULL;
+	}
+
+	free(a2l);
+}
+
+static struct a2l_subprocess *addr2line_subprocess_init(const char *path)
+{
+	const char *argv[] = { "addr2line", "-e", path, "-i", "-f", NULL };
+	struct a2l_subprocess *a2l = zalloc(sizeof(*a2l));
+	int start_command_status = 0;
+
+	if (a2l == NULL)
+		goto out;
+
+	a2l->to_child = NULL;
+	a2l->from_child = NULL;
+
+	a2l->addr2line.pid = -1;
+	a2l->addr2line.in = -1;
+	a2l->addr2line.out = -1;
+	a2l->addr2line.no_stderr = 1;
+
+	a2l->addr2line.argv = argv;
+	start_command_status = start_command(&a2l->addr2line);
+	a2l->addr2line.argv = NULL; /* it's not used after start_command; avoid dangling pointers */
+
+	if (start_command_status != 0) {
+		pr_warning("could not start addr2line for %s: start_command return code %d\n",
+			   path,
+			   start_command_status);
+		goto out;
+	}
+
+	a2l->to_child = fdopen(a2l->addr2line.in, "w");
+	if (a2l->to_child == NULL) {
+		pr_warning("could not open write-stream to addr2line of %s\n", path);
+		goto out;
+	}
+
+	a2l->from_child = fdopen(a2l->addr2line.out, "r");
+	if (a2l->from_child == NULL) {
+		pr_warning("could not open read-stream from addr2line of %s\n", path);
+		goto out;
+	}
+
+	return a2l;
+
+out:
+	if (a2l)
+		addr2line_subprocess_cleanup(a2l);
+
+	return NULL;
+}
+
+static int read_addr2line_record(struct a2l_subprocess *a2l,
+				 char **function,
+				 char **filename,
+				 unsigned int *line_nr)
+{
+	/*
+	 * Returns:
+	 * -1 ==> error
+	 * 0 ==> sentinel (or other ill-formed) record read
+	 * 1 ==> a genuine record read
+	 */
+	char *line = NULL;
+	size_t line_len = 0;
+	unsigned int dummy_line_nr = 0;
+	int ret = -1;
+
+	if (function != NULL)
+		zfree(function);
+
+	if (filename != NULL)
+		zfree(filename);
+
+	if (line_nr != NULL)
+		*line_nr = 0;
+
+	if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len)
+		goto error;
+
+	if (function != NULL)
+		*function = strdup(strim(line));
+
+	zfree(&line);
+	line_len = 0;
+
+	if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len)
+		goto error;
+
+	if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0) {
+		ret = 0;
+		goto error;
+	}
+
+	if (filename != NULL)
+		*filename = strdup(line);
+
+	zfree(&line);
+	line_len = 0;
+
+	return 1;
+
+error:
+	free(line);
+	if (function != NULL)
+		zfree(function);
+	if (filename != NULL)
+		zfree(filename);
+	return ret;
+}
+
+static int inline_list__append_record(struct dso *dso,
+				      struct inline_node *node,
+				      struct symbol *sym,
+				      const char *function,
+				      const char *filename,
+				      unsigned int line_nr)
+{
+	struct symbol *inline_sym = new_inline_sym(dso, sym, function);
+
+	return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node);
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+		     char **file, unsigned int *line_nr,
+		     struct dso *dso,
+		     bool unwind_inlines,
+		     struct inline_node *node,
+		     struct symbol *sym __maybe_unused)
+{
+	struct a2l_subprocess *a2l = dso->a2l;
+	char *record_function = NULL;
+	char *record_filename = NULL;
+	unsigned int record_line_nr = 0;
+	int record_status = -1;
+	int ret = 0;
+	size_t inline_count = 0;
+
+	if (!a2l) {
+		dso->a2l = addr2line_subprocess_init(dso_name);
+		a2l = dso->a2l;
+	}
+
+	if (a2l == NULL) {
+		if (!symbol_conf.disable_add2line_warn)
+			pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name);
+		goto out;
+	}
+
+	/*
+	 * Send our request and then *deliberately* send something that can't be interpreted as
+	 * a valid address to ask addr2line about (namely, ","). This causes addr2line to first
+	 * write out the answer to our request, in an unbounded/unknown number of records, and
+	 * then to write out the lines "??" and "??:0", so that we can detect when it has
+	 * finished giving us anything useful. We have to be careful about the first record,
+	 * though, because it may be genuinely unknown, in which case we'll get two sets of
+	 * "??"/"??:0" lines.
+	 */
+	if (fprintf(a2l->to_child, "%016"PRIx64"\n,\n", addr) < 0 || fflush(a2l->to_child) != 0) {
+		pr_warning("%s %s: could not send request\n", __func__, dso_name);
+		goto out;
+	}
+
+	switch (read_addr2line_record(a2l, &record_function, &record_filename, &record_line_nr)) {
+	case -1:
+		pr_warning("%s %s: could not read first record\n", __func__, dso_name);
+		goto out;
+	case 0:
+		/*
+		 * The first record was invalid, so return failure, but first read another
+		 * record, since we asked a junk question and have to clear the answer out.
+		 */
+		switch (read_addr2line_record(a2l, NULL, NULL, NULL)) {
+		case -1:
+			pr_warning("%s %s: could not read delimiter record\n", __func__, dso_name);
+			break;
+		case 0:
+			/* As expected. */
+			break;
+		default:
+			pr_warning("%s %s: unexpected record instead of sentinel",
+				   __func__, dso_name);
+			break;
+		}
+		goto out;
+	default:
+		break;
+	}
+
+	if (file) {
+		*file = strdup(record_filename);
+		ret = 1;
+	}
+	if (line_nr)
+		*line_nr = record_line_nr;
+
+	if (unwind_inlines) {
+		if (node && inline_list__append_record(dso, node, sym,
+						       record_function,
+						       record_filename,
+						       record_line_nr)) {
+			ret = 0;
+			goto out;
+		}
+	}
+
+	/* We have to read the records even if we don't care about the inline info. */
+	while ((record_status = read_addr2line_record(a2l,
+						      &record_function,
+						      &record_filename,
+						      &record_line_nr)) == 1) {
+		if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) {
+			if (inline_list__append_record(dso, node, sym,
+						       record_function,
+						       record_filename,
+						       record_line_nr)) {
+				ret = 0;
+				goto out;
+			}
+			ret = 1; /* found at least one inline frame */
+		}
+	}
+
+out:
+	free(record_function);
+	free(record_filename);
+	return ret;
+}
+
+void dso__free_a2l(struct dso *dso)
+{
+	struct a2l_subprocess *a2l = dso->a2l;
+
+	if (!a2l)
+		return;
+
+	addr2line_subprocess_cleanup(a2l);
+
+	dso->a2l = NULL;
+}
+
+#endif /* HAVE_LIBBFD_SUPPORT */
+
+static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
+					struct dso *dso, struct symbol *sym)
+{
+	struct inline_node *node;
+
+	node = zalloc(sizeof(*node));
+	if (node == NULL) {
+		perror("not enough memory for the inline node");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&node->val);
+	node->addr = addr;
+
+	addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
+	return node;
+}
+
+/*
+ * Number of addr2line failures (without success) before disabling it for that
+ * dso.
+ */
+#define A2L_FAIL_LIMIT 123
+
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, bool unwind_inlines,
+		  u64 ip)
+{
+	char *file = NULL;
+	unsigned line = 0;
+	char *srcline;
+	const char *dso_name;
+
+	if (!dso->has_srcline)
+		goto out;
+
+	dso_name = dso__name(dso);
+	if (dso_name == NULL)
+		goto out;
+
+	if (!addr2line(dso_name, addr, &file, &line, dso,
+		       unwind_inlines, NULL, sym))
+		goto out;
+
+	srcline = srcline_from_fileline(file, line);
+	free(file);
+
+	if (!srcline)
+		goto out;
+
+	dso->a2l_fails = 0;
+
+	return srcline;
+
+out:
+	if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) {
+		dso->has_srcline = 0;
+		dso__free_a2l(dso);
+	}
+
+	if (!show_addr)
+		return (show_sym && sym) ?
+			    strndup(sym->name, sym->namelen) : NULL;
+
+	if (sym) {
+		if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
+					ip - sym->start) < 0)
+			return SRCLINE_UNKNOWN;
+	} else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
+		return SRCLINE_UNKNOWN;
+	return srcline;
+}
+
+/* Returns filename and fills in line number in line */
+char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line)
+{
+	char *file = NULL;
+	const char *dso_name;
+
+	if (!dso->has_srcline)
+		goto out;
+
+	dso_name = dso__name(dso);
+	if (dso_name == NULL)
+		goto out;
+
+	if (!addr2line(dso_name, addr, &file, line, dso, true, NULL, NULL))
+		goto out;
+
+	dso->a2l_fails = 0;
+	return file;
+
+out:
+	if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) {
+		dso->has_srcline = 0;
+		dso__free_a2l(dso);
+	}
+
+	return NULL;
+}
+
+void free_srcline(char *srcline)
+{
+	if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0)
+		free(srcline);
+}
+
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, u64 ip)
+{
+	return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip);
+}
+
+struct srcline_node {
+	u64			addr;
+	char			*srcline;
+	struct rb_node		rb_node;
+};
+
+void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline)
+{
+	struct rb_node **p = &tree->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct srcline_node *i, *node;
+	bool leftmost = true;
+
+	node = zalloc(sizeof(struct srcline_node));
+	if (!node) {
+		perror("not enough memory for the srcline node");
+		return;
+	}
+
+	node->addr = addr;
+	node->srcline = srcline;
+
+	while (*p != NULL) {
+		parent = *p;
+		i = rb_entry(parent, struct srcline_node, rb_node);
+		if (addr < i->addr)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+	rb_link_node(&node->rb_node, parent, p);
+	rb_insert_color_cached(&node->rb_node, tree, leftmost);
+}
+
+char *srcline__tree_find(struct rb_root_cached *tree, u64 addr)
+{
+	struct rb_node *n = tree->rb_root.rb_node;
+
+	while (n) {
+		struct srcline_node *i = rb_entry(n, struct srcline_node,
+						  rb_node);
+
+		if (addr < i->addr)
+			n = n->rb_left;
+		else if (addr > i->addr)
+			n = n->rb_right;
+		else
+			return i->srcline;
+	}
+
+	return NULL;
+}
+
+void srcline__tree_delete(struct rb_root_cached *tree)
+{
+	struct srcline_node *pos;
+	struct rb_node *next = rb_first_cached(tree);
+
+	while (next) {
+		pos = rb_entry(next, struct srcline_node, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase_cached(&pos->rb_node, tree);
+		free_srcline(pos->srcline);
+		zfree(&pos);
+	}
+}
+
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+					    struct symbol *sym)
+{
+	const char *dso_name;
+
+	dso_name = dso__name(dso);
+	if (dso_name == NULL)
+		return NULL;
+
+	return addr2inlines(dso_name, addr, dso, sym);
+}
+
+void inline_node__delete(struct inline_node *node)
+{
+	struct inline_list *ilist, *tmp;
+
+	list_for_each_entry_safe(ilist, tmp, &node->val, list) {
+		list_del_init(&ilist->list);
+		free_srcline(ilist->srcline);
+		/* only the inlined symbols are owned by the list */
+		if (ilist->symbol && ilist->symbol->inlined)
+			symbol__delete(ilist->symbol);
+		free(ilist);
+	}
+
+	free(node);
+}
+
+void inlines__tree_insert(struct rb_root_cached *tree,
+			  struct inline_node *inlines)
+{
+	struct rb_node **p = &tree->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	const u64 addr = inlines->addr;
+	struct inline_node *i;
+	bool leftmost = true;
+
+	while (*p != NULL) {
+		parent = *p;
+		i = rb_entry(parent, struct inline_node, rb_node);
+		if (addr < i->addr)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+	rb_link_node(&inlines->rb_node, parent, p);
+	rb_insert_color_cached(&inlines->rb_node, tree, leftmost);
+}
+
+struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr)
+{
+	struct rb_node *n = tree->rb_root.rb_node;
+
+	while (n) {
+		struct inline_node *i = rb_entry(n, struct inline_node,
+						 rb_node);
+
+		if (addr < i->addr)
+			n = n->rb_left;
+		else if (addr > i->addr)
+			n = n->rb_right;
+		else
+			return i;
+	}
+
+	return NULL;
+}
+
+void inlines__tree_delete(struct rb_root_cached *tree)
+{
+	struct inline_node *pos;
+	struct rb_node *next = rb_first_cached(tree);
+
+	while (next) {
+		pos = rb_entry(next, struct inline_node, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase_cached(&pos->rb_node, tree);
+		inline_node__delete(pos);
+	}
+}
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
new file mode 100644
index 000000000..b11a0aaaa
--- /dev/null
+++ b/tools/perf/util/srcline.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_SRCLINE_H
+#define PERF_SRCLINE_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct dso;
+struct symbol;
+
+extern bool srcline_full_filename;
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, u64 ip);
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, bool unwind_inlines,
+		  u64 ip);
+void free_srcline(char *srcline);
+char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line);
+
+/* insert the srcline into the DSO, which will take ownership */
+void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline);
+/* find previously inserted srcline */
+char *srcline__tree_find(struct rb_root_cached *tree, u64 addr);
+/* delete all srclines within the tree */
+void srcline__tree_delete(struct rb_root_cached *tree);
+
+#define SRCLINE_UNKNOWN  ((char *) "??:0")
+
+struct inline_list {
+	struct symbol		*symbol;
+	char			*srcline;
+	struct list_head	list;
+};
+
+struct inline_node {
+	u64			addr;
+	struct list_head	val;
+	struct rb_node		rb_node;
+};
+
+/* parse inlined frames for the given address */
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+					    struct symbol *sym);
+/* free resources associated to the inline node list */
+void inline_node__delete(struct inline_node *node);
+
+/* insert the inline node list into the DSO, which will take ownership */
+void inlines__tree_insert(struct rb_root_cached *tree,
+			  struct inline_node *inlines);
+/* find previously inserted inline node list */
+struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr);
+/* delete all nodes within the tree of inline_node s */
+void inlines__tree_delete(struct rb_root_cached *tree);
+
+#endif /* PERF_SRCLINE_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
new file mode 100644
index 000000000..bc866d189
--- /dev/null
+++ b/tools/perf/util/stat-display.c
@@ -0,0 +1,1515 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <math.h>
+#include <perf/cpumap.h>
+#include "color.h"
+#include "counts.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "stat.h"
+#include "top.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "string2.h"
+#include <linux/ctype.h>
+#include "cgroup.h"
+#include <api/fs/fs.h>
+#include "util.h"
+#include "iostat.h"
+#include "pmu-hybrid.h"
+#include "evlist-hybrid.h"
+
+#define CNTR_NOT_SUPPORTED	"<not supported>"
+#define CNTR_NOT_COUNTED	"<not counted>"
+
+static void print_running(struct perf_stat_config *config,
+			  u64 run, u64 ena)
+{
+
+	double enabled_percent = 100;
+
+	if (run != ena)
+		enabled_percent = 100 * run / ena;
+	if (config->json_output)
+		fprintf(config->output,
+			"\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ",
+			run, enabled_percent);
+	else if (config->csv_output)
+		fprintf(config->output,
+			"%s%" PRIu64 "%s%.2f", config->csv_sep,
+			run, config->csv_sep, enabled_percent);
+	else if (run != ena)
+		fprintf(config->output, "  (%.2f%%)", 100.0 * run / ena);
+}
+
+static void print_noise_pct(struct perf_stat_config *config,
+			    double total, double avg)
+{
+	double pct = rel_stddev_stats(total, avg);
+
+	if (config->json_output)
+		fprintf(config->output, "\"variance\" : %.2f, ", pct);
+	else if (config->csv_output)
+		fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
+	else if (pct)
+		fprintf(config->output, "  ( +-%6.2f%% )", pct);
+}
+
+static void print_noise(struct perf_stat_config *config,
+			struct evsel *evsel, double avg)
+{
+	struct perf_stat_evsel *ps;
+
+	if (config->run_count == 1)
+		return;
+
+	ps = evsel->stats;
+	print_noise_pct(config, stddev_stats(&ps->res_stats), avg);
+}
+
+static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
+{
+	if (nr_cgroups) {
+		const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name  : "";
+
+		if (config->json_output)
+			fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
+		else
+			fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+	}
+}
+
+
+static void aggr_printout(struct perf_stat_config *config,
+			  struct evsel *evsel, struct aggr_cpu_id id, int nr)
+{
+
+
+	if (config->json_output && !config->interval)
+		fprintf(config->output, "{");
+
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+		if (config->json_output) {
+			fprintf(config->output,
+				"\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
+				id.socket,
+				id.die,
+				id.core,
+				nr);
+		} else {
+			fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
+				id.socket,
+				id.die,
+				config->csv_output ? 0 : -8,
+				id.core,
+				config->csv_sep,
+				config->csv_output ? 0 : 4,
+				nr,
+				config->csv_sep);
+		}
+		break;
+	case AGGR_DIE:
+		if (config->json_output) {
+			fprintf(config->output,
+				"\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
+				id.socket,
+				id.die,
+				nr);
+		} else {
+			fprintf(config->output, "S%d-D%*d%s%*d%s",
+				id.socket,
+				config->csv_output ? 0 : -8,
+				id.die,
+				config->csv_sep,
+				config->csv_output ? 0 : 4,
+				nr,
+				config->csv_sep);
+		}
+		break;
+	case AGGR_SOCKET:
+		if (config->json_output) {
+			fprintf(config->output,
+				"\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
+				id.socket,
+				nr);
+		} else {
+			fprintf(config->output, "S%*d%s%*d%s",
+				config->csv_output ? 0 : -5,
+				id.socket,
+				config->csv_sep,
+				config->csv_output ? 0 : 4,
+				nr,
+				config->csv_sep);
+		}
+		break;
+	case AGGR_NODE:
+		if (config->json_output) {
+			fprintf(config->output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
+				id.node,
+				nr);
+		} else {
+			fprintf(config->output, "N%*d%s%*d%s",
+				config->csv_output ? 0 : -5,
+				id.node,
+				config->csv_sep,
+				config->csv_output ? 0 : 4,
+				nr,
+				config->csv_sep);
+		}
+		break;
+	case AGGR_NONE:
+		if (config->json_output) {
+			if (evsel->percore && !config->percore_show_thread) {
+				fprintf(config->output, "\"core\" : \"S%d-D%d-C%d\"",
+					id.socket,
+					id.die,
+					id.core);
+			} else if (id.cpu.cpu > -1) {
+				fprintf(config->output, "\"cpu\" : \"%d\", ",
+					id.cpu.cpu);
+			}
+		} else {
+			if (evsel->percore && !config->percore_show_thread) {
+				fprintf(config->output, "S%d-D%d-C%*d%s",
+					id.socket,
+					id.die,
+					config->csv_output ? 0 : -3,
+					id.core, config->csv_sep);
+			} else if (id.cpu.cpu > -1) {
+				fprintf(config->output, "CPU%*d%s",
+					config->csv_output ? 0 : -7,
+					id.cpu.cpu, config->csv_sep);
+			}
+		}
+		break;
+	case AGGR_THREAD:
+		if (config->json_output) {
+			fprintf(config->output, "\"thread\" : \"%s-%d\", ",
+				perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+				perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+		} else {
+			fprintf(config->output, "%*s-%*d%s",
+				config->csv_output ? 0 : 16,
+				perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+				config->csv_output ? 0 : -8,
+				perf_thread_map__pid(evsel->core.threads, id.thread_idx),
+				config->csv_sep);
+		}
+		break;
+	case AGGR_GLOBAL:
+	case AGGR_UNSET:
+	case AGGR_MAX:
+	default:
+		break;
+	}
+}
+
+struct outstate {
+	FILE *fh;
+	bool newline;
+	const char *prefix;
+	int  nfields;
+	int  nr;
+	struct aggr_cpu_id id;
+	struct evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(struct perf_stat_config *config __maybe_unused,
+			 void *ctx)
+{
+	struct outstate *os = ctx;
+
+	os->newline = true;
+}
+
+static void do_new_line_std(struct perf_stat_config *config,
+			    struct outstate *os)
+{
+	fputc('\n', os->fh);
+	fputs(os->prefix, os->fh);
+	aggr_printout(config, os->evsel, os->id, os->nr);
+	if (config->aggr_mode == AGGR_NONE)
+		fprintf(os->fh, "        ");
+	fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(struct perf_stat_config *config,
+			     void *ctx, const char *color, const char *fmt,
+			     const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	bool newline = os->newline;
+
+	os->newline = false;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%-*s", METRIC_LEN, "");
+		return;
+	}
+
+	if (newline)
+		do_new_line_std(config, os);
+
+	n = fprintf(out, " # ");
+	if (color)
+		n += color_fprintf(out, color, fmt, val);
+	else
+		n += fprintf(out, fmt, val);
+	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(struct perf_stat_config *config, void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s", os->prefix);
+	aggr_printout(config, os->evsel, os->id, os->nr);
+	for (i = 0; i < os->nfields; i++)
+		fputs(config->csv_sep, os->fh);
+}
+
+static void print_metric_csv(struct perf_stat_config *config __maybe_unused,
+			     void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s", config->csv_sep, config->csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	ends = vals = skip_spaces(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, skip_spaces(unit));
+}
+
+static void print_metric_json(struct perf_stat_config *config __maybe_unused,
+			     void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt __maybe_unused,
+			     const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+
+	fprintf(out, "\"metric-value\" : %f, ", val);
+	fprintf(out, "\"metric-unit\" : \"%s\"", unit);
+	if (!config->metric_only)
+		fprintf(out, "}");
+}
+
+static void new_line_json(struct perf_stat_config *config, void *ctx)
+{
+	struct outstate *os = ctx;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s", os->prefix);
+	aggr_printout(config, os->evsel, os->id, os->nr);
+}
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(struct perf_stat_config *config,
+			      void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[1024], str[1024];
+	unsigned mlen = config->metric_only_len;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+
+	if (color)
+		mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
+
+	color_snprintf(str, sizeof(str), color ?: "", fmt, val);
+	fprintf(out, "%*s ", mlen, str);
+}
+
+static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused,
+				  void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	ends = vals = skip_spaces(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, config->csv_sep);
+}
+
+static void print_metric_only_json(struct perf_stat_config *config __maybe_unused,
+				  void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof(buf), fmt, val);
+	ends = vals = skip_spaces(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "{\"metric-value\" : \"%s\"}", vals);
+}
+
+static void new_line_metric(struct perf_stat_config *config __maybe_unused,
+			    void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(struct perf_stat_config *config,
+				void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	/* In case of iostat, print metric header for first root port only */
+	if (config->iostat_run &&
+	    os->evsel->priv != os->evsel->evlist->selected->priv)
+		return;
+
+	if (!valid_only_metric(unit) && !config->json_output)
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+
+	if (config->json_output)
+		fprintf(os->fh, "\"unit\" : \"%s\"", unit);
+	else if (config->csv_output)
+		fprintf(os->fh, "%s%s", unit, config->csv_sep);
+	else
+		fprintf(os->fh, "%*s ", config->metric_only_len, unit);
+}
+
+static int first_shadow_map_idx(struct perf_stat_config *config,
+				struct evsel *evsel, const struct aggr_cpu_id *id)
+{
+	struct perf_cpu_map *cpus = evsel__cpus(evsel);
+	struct perf_cpu cpu;
+	int idx;
+
+	if (config->aggr_mode == AGGR_NONE)
+		return perf_cpu_map__idx(cpus, id->cpu);
+
+	if (config->aggr_mode == AGGR_THREAD)
+		return id->thread_idx;
+
+	if (!config->aggr_get_id)
+		return 0;
+
+	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+		struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
+
+		if (aggr_cpu_id__equal(&cpu_id, id))
+			return idx;
+	}
+	return 0;
+}
+
+static void abs_printout(struct perf_stat_config *config,
+			 struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
+{
+	FILE *output = config->output;
+	double sc =  evsel->scale;
+	const char *fmt;
+
+	if (config->csv_output) {
+		fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
+	} else {
+		if (config->big_num)
+			fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
+		else
+			fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
+	}
+
+	aggr_printout(config, evsel, id, nr);
+
+	if (config->json_output)
+		fprintf(output, "\"counter-value\" : \"%f\", ", avg);
+	else
+		fprintf(output, fmt, avg, config->csv_sep);
+
+	if (config->json_output) {
+		if (evsel->unit) {
+			fprintf(output, "\"unit\" : \"%s\", ",
+				evsel->unit);
+		}
+	} else {
+		if (evsel->unit)
+			fprintf(output, "%-*s%s",
+				config->csv_output ? 0 : config->unit_width,
+				evsel->unit, config->csv_sep);
+	}
+
+	if (config->json_output)
+		fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel));
+	else
+		fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel));
+
+	print_cgroup(config, evsel);
+}
+
+static bool is_mixed_hw_group(struct evsel *counter)
+{
+	struct evlist *evlist = counter->evlist;
+	u32 pmu_type = counter->core.attr.type;
+	struct evsel *pos;
+
+	if (counter->core.nr_members < 2)
+		return false;
+
+	evlist__for_each_entry(evlist, pos) {
+		/* software events can be part of any hardware group */
+		if (pos->core.attr.type == PERF_TYPE_SOFTWARE)
+			continue;
+		if (pmu_type == PERF_TYPE_SOFTWARE) {
+			pmu_type = pos->core.attr.type;
+			continue;
+		}
+		if (pmu_type != pos->core.attr.type)
+			return true;
+	}
+
+	return false;
+}
+
+static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
+		     struct evsel *counter, double uval,
+		     char *prefix, u64 run, u64 ena, double noise,
+		     struct runtime_stat *st)
+{
+	struct perf_stat_output_ctx out;
+	struct outstate os = {
+		.fh = config->output,
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
+	};
+	print_metric_t pm;
+	new_line_t nl;
+
+	if (config->csv_output) {
+		static const int aggr_fields[AGGR_MAX] = {
+			[AGGR_NONE] = 1,
+			[AGGR_GLOBAL] = 0,
+			[AGGR_SOCKET] = 2,
+			[AGGR_DIE] = 2,
+			[AGGR_CORE] = 2,
+			[AGGR_THREAD] = 1,
+			[AGGR_UNSET] = 0,
+			[AGGR_NODE] = 1,
+		};
+
+		pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
+		nl = config->metric_only ? new_line_metric : new_line_csv;
+		os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0);
+	} else if (config->json_output) {
+		pm = config->metric_only ? print_metric_only_json : print_metric_json;
+		nl = config->metric_only ? new_line_metric : new_line_json;
+	} else {
+		pm = config->metric_only ? print_metric_only : print_metric_std;
+		nl = config->metric_only ? new_line_metric : new_line_std;
+	}
+
+	if (!config->no_csv_summary && config->csv_output &&
+	    config->summary && !config->interval) {
+		fprintf(config->output, "%16s%s", "summary", config->csv_sep);
+	}
+
+	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (config->metric_only) {
+			pm(config, &os, NULL, "", "", 0);
+			return;
+		}
+		aggr_printout(config, counter, id, nr);
+
+		if (config->json_output) {
+			fprintf(config->output, "\"counter-value\" : \"%s\", ",
+					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED);
+		} else {
+			fprintf(config->output, "%*s%s",
+				config->csv_output ? 0 : 18,
+				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+				config->csv_sep);
+		}
+
+		if (counter->supported) {
+			if (!evlist__has_hybrid(counter->evlist)) {
+				config->print_free_counters_hint = 1;
+				if (is_mixed_hw_group(counter))
+					config->print_mixed_hw_group_error = 1;
+			}
+		}
+
+		if (config->json_output) {
+			fprintf(config->output, "\"unit\" : \"%s\", ", counter->unit);
+		} else {
+			fprintf(config->output, "%-*s%s",
+				config->csv_output ? 0 : config->unit_width,
+				counter->unit, config->csv_sep);
+		}
+
+		if (config->json_output) {
+			fprintf(config->output, "\"event\" : \"%s\", ",
+				evsel__name(counter));
+		} else {
+			fprintf(config->output, "%*s",
+				 config->csv_output ? 0 : -25, evsel__name(counter));
+		}
+
+		print_cgroup(config, counter);
+
+		if (!config->csv_output && !config->json_output)
+			pm(config, &os, NULL, NULL, "", 0);
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+		if (config->csv_output)
+			pm(config, &os, NULL, NULL, "", 0);
+		else if (config->json_output)
+			pm(config, &os, NULL, NULL, "", 0);
+		return;
+	}
+
+	if (!config->metric_only)
+		abs_printout(config, id, nr, counter, uval);
+
+	out.print_metric = pm;
+	out.new_line = nl;
+	out.ctx = &os;
+	out.force_header = false;
+
+	if (config->csv_output && !config->metric_only) {
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+	} else if (config->json_output && !config->metric_only) {
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+	}
+
+	perf_stat__print_shadow_stats(config, counter, uval,
+				first_shadow_map_idx(config, counter, &id),
+				&out, &config->metric_events, st);
+	if (!config->csv_output && !config->metric_only && !config->json_output) {
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+	}
+}
+
+static void aggr_update_shadow(struct perf_stat_config *config,
+			       struct evlist *evlist)
+{
+	int idx, s;
+	struct perf_cpu cpu;
+	struct aggr_cpu_id s2, id;
+	u64 val;
+	struct evsel *counter;
+	struct perf_cpu_map *cpus;
+
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		id = config->aggr_map->map[s];
+		evlist__for_each_entry(evlist, counter) {
+			cpus = evsel__cpus(counter);
+			val = 0;
+			perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+				s2 = config->aggr_get_id(config, cpu);
+				if (!aggr_cpu_id__equal(&s2, &id))
+					continue;
+				val += perf_counts(counter->counts, idx, 0)->val;
+			}
+			perf_stat__update_shadow_stats(counter, val,
+					first_shadow_map_idx(config, counter, &id),
+					&rt_stat);
+		}
+	}
+}
+
+static void uniquify_event_name(struct evsel *counter)
+{
+	char *new_name;
+	char *config;
+	int ret = 0;
+
+	if (counter->uniquified_name || counter->use_config_name ||
+	    !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+					   strlen(counter->pmu_name)))
+		return;
+
+	config = strchr(counter->name, '/');
+	if (config) {
+		if (asprintf(&new_name,
+			     "%s%s", counter->pmu_name, config) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	} else {
+		if (evsel__is_hybrid(counter)) {
+			ret = asprintf(&new_name, "%s/%s/",
+				       counter->pmu_name, counter->name);
+		} else {
+			ret = asprintf(&new_name, "%s [%s]",
+				       counter->name, counter->pmu_name);
+		}
+
+		if (ret) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	}
+
+	counter->uniquified_name = true;
+}
+
+static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter,
+			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	struct evlist *evlist = counter->evlist;
+	struct evsel *alias;
+
+	alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
+	list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
+		/* Merge events with the same name, etc. but on different PMUs. */
+		if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
+			alias->scale == counter->scale &&
+			alias->cgrp == counter->cgrp &&
+			!strcmp(alias->unit, counter->unit) &&
+			evsel__is_clock(alias) == evsel__is_clock(counter) &&
+			strcmp(alias->pmu_name, counter->pmu_name)) {
+			alias->merged_stat = true;
+			cb(config, alias, data, false);
+		}
+	}
+}
+
+static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
+			 bool check)
+{
+	if (evsel__is_hybrid(counter)) {
+		if (check)
+			return config->hybrid_merge;
+		else
+			return !config->hybrid_merge;
+	}
+
+	return false;
+}
+
+static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
+			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	if (counter->merged_stat)
+		return false;
+	cb(config, counter, data, true);
+	if (config->no_merge || hybrid_merge(counter, config, false))
+		uniquify_event_name(counter);
+	else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
+		collect_all_aliases(config, counter, cb, data);
+	return true;
+}
+
+struct aggr_data {
+	u64 ena, run, val;
+	struct aggr_cpu_id id;
+	int nr;
+	int cpu_map_idx;
+};
+
+static void aggr_cb(struct perf_stat_config *config,
+		    struct evsel *counter, void *data, bool first)
+{
+	struct aggr_data *ad = data;
+	int idx;
+	struct perf_cpu cpu;
+	struct perf_cpu_map *cpus;
+	struct aggr_cpu_id s2;
+
+	cpus = evsel__cpus(counter);
+	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+		struct perf_counts_values *counts;
+
+		s2 = config->aggr_get_id(config, cpu);
+		if (!aggr_cpu_id__equal(&s2, &ad->id))
+			continue;
+		if (first)
+			ad->nr++;
+		counts = perf_counts(counter->counts, idx, 0);
+		/*
+		 * When any result is bad, make them all to give
+		 * consistent output in interval mode.
+		 */
+		if (counts->ena == 0 || counts->run == 0 ||
+		    counter->counts->scaled == -1) {
+			ad->ena = 0;
+			ad->run = 0;
+			break;
+		}
+		ad->val += counts->val;
+		ad->ena += counts->ena;
+		ad->run += counts->run;
+	}
+}
+
+static void print_counter_aggrdata(struct perf_stat_config *config,
+				   struct evsel *counter, int s,
+				   char *prefix, bool metric_only,
+				   bool *first, struct perf_cpu cpu)
+{
+	struct aggr_data ad;
+	FILE *output = config->output;
+	u64 ena, run, val;
+	int nr;
+	struct aggr_cpu_id id;
+	double uval;
+
+	ad.id = id = config->aggr_map->map[s];
+	ad.val = ad.ena = ad.run = 0;
+	ad.nr = 0;
+	if (!collect_data(config, counter, aggr_cb, &ad))
+		return;
+
+	if (perf_pmu__has_hybrid() && ad.ena == 0)
+		return;
+
+	nr = ad.nr;
+	ena = ad.ena;
+	run = ad.run;
+	val = ad.val;
+	if (*first && metric_only) {
+		*first = false;
+		aggr_printout(config, counter, id, nr);
+	}
+	if (prefix && !metric_only)
+		fprintf(output, "%s", prefix);
+
+	uval = val * counter->scale;
+	if (cpu.cpu != -1)
+		id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+
+	printout(config, id, nr, counter, uval,
+		 prefix, run, ena, 1.0, &rt_stat);
+	if (!metric_only)
+		fputc('\n', output);
+}
+
+static void print_aggr(struct perf_stat_config *config,
+		       struct evlist *evlist,
+		       char *prefix)
+{
+	bool metric_only = config->metric_only;
+	FILE *output = config->output;
+	struct evsel *counter;
+	int s;
+	bool first;
+
+	if (!config->aggr_map || !config->aggr_get_id)
+		return;
+
+	aggr_update_shadow(config, evlist);
+
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
+		first = true;
+		evlist__for_each_entry(evlist, counter) {
+			print_counter_aggrdata(config, counter, s,
+					prefix, metric_only,
+					&first, (struct perf_cpu){ .cpu = -1 });
+		}
+		if (metric_only)
+			fputc('\n', output);
+	}
+}
+
+static int cmp_val(const void *a, const void *b)
+{
+	return ((struct perf_aggr_thread_value *)b)->val -
+		((struct perf_aggr_thread_value *)a)->val;
+}
+
+static struct perf_aggr_thread_value *sort_aggr_thread(
+					struct evsel *counter,
+					int *ret,
+					struct target *_target)
+{
+	int nthreads = perf_thread_map__nr(counter->core.threads);
+	int i = 0;
+	double uval;
+	struct perf_aggr_thread_value *buf;
+
+	buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
+	if (!buf)
+		return NULL;
+
+	for (int thread = 0; thread < nthreads; thread++) {
+		int idx;
+		u64 ena = 0, run = 0, val = 0;
+
+		perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) {
+			struct perf_counts_values *counts =
+				perf_counts(counter->counts, idx, thread);
+
+			val += counts->val;
+			ena += counts->ena;
+			run += counts->run;
+		}
+
+		uval = val * counter->scale;
+
+		/*
+		 * Skip value 0 when enabling --per-thread globally,
+		 * otherwise too many 0 output.
+		 */
+		if (uval == 0.0 && target__has_per_thread(_target))
+			continue;
+
+		buf[i].counter = counter;
+		buf[i].id = aggr_cpu_id__empty();
+		buf[i].id.thread_idx = thread;
+		buf[i].uval = uval;
+		buf[i].val = val;
+		buf[i].run = run;
+		buf[i].ena = ena;
+		i++;
+	}
+
+	qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
+
+	if (ret)
+		*ret = i;
+
+	return buf;
+}
+
+static void print_aggr_thread(struct perf_stat_config *config,
+			      struct target *_target,
+			      struct evsel *counter, char *prefix)
+{
+	FILE *output = config->output;
+	int thread, sorted_threads;
+	struct aggr_cpu_id id;
+	struct perf_aggr_thread_value *buf;
+
+	buf = sort_aggr_thread(counter, &sorted_threads, _target);
+	if (!buf) {
+		perror("cannot sort aggr thread");
+		return;
+	}
+
+	for (thread = 0; thread < sorted_threads; thread++) {
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		id = buf[thread].id;
+		printout(config, id, 0, buf[thread].counter, buf[thread].uval,
+			 prefix, buf[thread].run, buf[thread].ena, 1.0,
+			 &rt_stat);
+		fputc('\n', output);
+	}
+
+	free(buf);
+}
+
+struct caggr_data {
+	double avg, avg_enabled, avg_running;
+};
+
+static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused,
+			    struct evsel *counter, void *data,
+			    bool first __maybe_unused)
+{
+	struct caggr_data *cd = data;
+	struct perf_counts_values *aggr = &counter->counts->aggr;
+
+	cd->avg += aggr->val;
+	cd->avg_enabled += aggr->ena;
+	cd->avg_running += aggr->run;
+}
+
+/*
+ * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
+ */
+static void print_counter_aggr(struct perf_stat_config *config,
+			       struct evsel *counter, char *prefix)
+{
+	bool metric_only = config->metric_only;
+	FILE *output = config->output;
+	double uval;
+	struct caggr_data cd = { .avg = 0.0 };
+
+	if (!collect_data(config, counter, counter_aggr_cb, &cd))
+		return;
+
+	if (prefix && !metric_only)
+		fprintf(output, "%s", prefix);
+
+	uval = cd.avg * counter->scale;
+	printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running,
+		 cd.avg_enabled, cd.avg, &rt_stat);
+	if (!metric_only)
+		fprintf(output, "\n");
+}
+
+static void counter_cb(struct perf_stat_config *config __maybe_unused,
+		       struct evsel *counter, void *data,
+		       bool first __maybe_unused)
+{
+	struct aggr_data *ad = data;
+
+	ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val;
+	ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena;
+	ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run;
+}
+
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(struct perf_stat_config *config,
+			  struct evsel *counter, char *prefix)
+{
+	FILE *output = config->output;
+	u64 ena, run, val;
+	double uval;
+	int idx;
+	struct perf_cpu cpu;
+	struct aggr_cpu_id id;
+
+	perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
+		struct aggr_data ad = { .cpu_map_idx = idx };
+
+		if (!collect_data(config, counter, counter_cb, &ad))
+			return;
+		val = ad.val;
+		ena = ad.ena;
+		run = ad.run;
+
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		uval = val * counter->scale;
+		id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+		printout(config, id, 0, counter, uval, prefix,
+			 run, ena, 1.0, &rt_stat);
+
+		fputc('\n', output);
+	}
+}
+
+static void print_no_aggr_metric(struct perf_stat_config *config,
+				 struct evlist *evlist,
+				 char *prefix)
+{
+	int all_idx;
+	struct perf_cpu cpu;
+
+	perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) {
+		struct evsel *counter;
+		bool first = true;
+
+		evlist__for_each_entry(evlist, counter) {
+			u64 ena, run, val;
+			double uval;
+			struct aggr_cpu_id id;
+			int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+
+			if (counter_idx < 0)
+				continue;
+
+			id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+			if (first) {
+				if (prefix)
+					fputs(prefix, config->output);
+				aggr_printout(config, counter, id, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, counter_idx, 0)->val;
+			ena = perf_counts(counter->counts, counter_idx, 0)->ena;
+			run = perf_counts(counter->counts, counter_idx, 0)->run;
+
+			uval = val * counter->scale;
+			printout(config, id, 0, counter, uval, prefix,
+				 run, ena, 1.0, &rt_stat);
+		}
+		if (!first)
+			fputc('\n', config->output);
+	}
+}
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 24,
+	[AGGR_DIE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 6,
+	[AGGR_THREAD] = 24,
+	[AGGR_NODE] = 6,
+	[AGGR_GLOBAL] = 0,
+};
+
+static const char *aggr_header_csv[] = {
+	[AGGR_CORE] 	= 	"core,cpus,",
+	[AGGR_DIE] 	= 	"die,cpus",
+	[AGGR_SOCKET] 	= 	"socket,cpus",
+	[AGGR_NONE] 	= 	"cpu,",
+	[AGGR_THREAD] 	= 	"comm-pid,",
+	[AGGR_NODE] 	= 	"node,",
+	[AGGR_GLOBAL] 	=	""
+};
+
+static void print_metric_headers(struct perf_stat_config *config,
+				 struct evlist *evlist,
+				 const char *prefix, bool no_indent)
+{
+	struct evsel *counter;
+	struct outstate os = {
+		.fh = config->output
+	};
+	struct perf_stat_output_ctx out = {
+		.ctx = &os,
+		.print_metric = print_metric_header,
+		.new_line = new_line_metric,
+		.force_header = true,
+	};
+	bool first = true;
+
+		if (config->json_output && !config->interval)
+			fprintf(config->output, "{");
+
+	if (prefix && !config->json_output)
+		fprintf(config->output, "%s", prefix);
+
+	if (!config->csv_output && !no_indent)
+		fprintf(config->output, "%*s",
+			aggr_header_lens[config->aggr_mode], "");
+	if (config->csv_output) {
+		if (config->interval)
+			fputs("time,", config->output);
+		if (!config->iostat_run)
+			fputs(aggr_header_csv[config->aggr_mode], config->output);
+	}
+	if (config->iostat_run)
+		iostat_print_header_prefix(config);
+
+	/* Print metrics headers only */
+	evlist__for_each_entry(evlist, counter) {
+		os.evsel = counter;
+
+		if (!first && config->json_output)
+			fprintf(config->output, ", ");
+		first = false;
+
+		perf_stat__print_shadow_stats(config, counter, 0,
+					      0,
+					      &out,
+					      &config->metric_events,
+					      &rt_stat);
+	}
+	if (config->json_output)
+		fprintf(config->output, "}");
+	fputc('\n', config->output);
+}
+
+static void print_interval(struct perf_stat_config *config,
+			   struct evlist *evlist,
+			   char *prefix, struct timespec *ts)
+{
+	bool metric_only = config->metric_only;
+	unsigned int unit_width = config->unit_width;
+	FILE *output = config->output;
+	static int num_print_interval;
+
+	if (config->interval_clear)
+		puts(CONSOLE_CLEAR);
+
+	if (!config->iostat_run && !config->json_output)
+		sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec,
+				 ts->tv_nsec, config->csv_sep);
+	if (!config->iostat_run && config->json_output && !config->metric_only)
+		sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long)
+				 ts->tv_sec, ts->tv_nsec);
+	if (!config->iostat_run && config->json_output && config->metric_only)
+		sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long)
+				 ts->tv_sec, ts->tv_nsec);
+
+	if ((num_print_interval == 0 && !config->csv_output && !config->json_output)
+			 || config->interval_clear) {
+		switch (config->aggr_mode) {
+		case AGGR_NODE:
+			fprintf(output, "#           time node   cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_SOCKET:
+			fprintf(output, "#           time socket cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_DIE:
+			fprintf(output, "#           time die          cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_CORE:
+			fprintf(output, "#           time core            cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_NONE:
+			fprintf(output, "#           time CPU    ");
+			if (!metric_only)
+				fprintf(output, "                counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_THREAD:
+			fprintf(output, "#           time             comm-pid");
+			if (!metric_only)
+				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_GLOBAL:
+		default:
+			if (!config->iostat_run) {
+				fprintf(output, "#           time");
+				if (!metric_only)
+					fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			}
+		case AGGR_UNSET:
+		case AGGR_MAX:
+			break;
+		}
+	}
+
+	if ((num_print_interval == 0 || config->interval_clear)
+			 && metric_only && !config->json_output)
+		print_metric_headers(config, evlist, " ", true);
+	if ((num_print_interval == 0 || config->interval_clear)
+			 && metric_only && config->json_output) {
+		fprintf(output, "{");
+		print_metric_headers(config, evlist, " ", true);
+	}
+	if (++num_print_interval == 25)
+		num_print_interval = 0;
+}
+
+static void print_header(struct perf_stat_config *config,
+			 struct target *_target,
+			 int argc, const char **argv)
+{
+	FILE *output = config->output;
+	int i;
+
+	fflush(stdout);
+
+	if (!config->csv_output && !config->json_output) {
+		fprintf(output, "\n");
+		fprintf(output, " Performance counter stats for ");
+		if (_target->bpf_str)
+			fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
+		else if (_target->system_wide)
+			fprintf(output, "\'system wide");
+		else if (_target->cpu_list)
+			fprintf(output, "\'CPU(s) %s", _target->cpu_list);
+		else if (!target__has_task(_target)) {
+			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+			for (i = 1; argv && (i < argc); i++)
+				fprintf(output, " %s", argv[i]);
+		} else if (_target->pid)
+			fprintf(output, "process id \'%s", _target->pid);
+		else
+			fprintf(output, "thread id \'%s", _target->tid);
+
+		fprintf(output, "\'");
+		if (config->run_count > 1)
+			fprintf(output, " (%d runs)", config->run_count);
+		fprintf(output, ":\n\n");
+	}
+}
+
+static int get_precision(double num)
+{
+	if (num > 1)
+		return 0;
+
+	return lround(ceil(-log10(num)));
+}
+
+static void print_table(struct perf_stat_config *config,
+			FILE *output, int precision, double avg)
+{
+	char tmp[64];
+	int idx, indent = 0;
+
+	scnprintf(tmp, 64, " %17.*f", precision, avg);
+	while (tmp[indent] == ' ')
+		indent++;
+
+	fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
+
+	for (idx = 0; idx < config->run_count; idx++) {
+		double run = (double) config->walltime_run[idx] / NSEC_PER_SEC;
+		int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
+
+		fprintf(output, " %17.*f (%+.*f) ",
+			precision, run, precision, run - avg);
+
+		for (h = 0; h < n; h++)
+			fprintf(output, "#");
+
+		fprintf(output, "\n");
+	}
+
+	fprintf(output, "\n%*s# Final result:\n", indent, "");
+}
+
+static double timeval2double(struct timeval *t)
+{
+	return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
+}
+
+static void print_footer(struct perf_stat_config *config)
+{
+	double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+	FILE *output = config->output;
+
+	if (!config->null_run)
+		fprintf(output, "\n");
+
+	if (config->run_count == 1) {
+		fprintf(output, " %17.9f seconds time elapsed", avg);
+
+		if (config->ru_display) {
+			double ru_utime = timeval2double(&config->ru_data.ru_utime);
+			double ru_stime = timeval2double(&config->ru_data.ru_stime);
+
+			fprintf(output, "\n\n");
+			fprintf(output, " %17.9f seconds user\n", ru_utime);
+			fprintf(output, " %17.9f seconds sys\n", ru_stime);
+		}
+	} else {
+		double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+		/*
+		 * Display at most 2 more significant
+		 * digits than the stddev inaccuracy.
+		 */
+		int precision = get_precision(sd) + 2;
+
+		if (config->walltime_run_table)
+			print_table(config, output, precision, avg);
+
+		fprintf(output, " %17.*f +- %.*f seconds time elapsed",
+			precision, avg, precision, sd);
+
+		print_noise_pct(config, sd, avg);
+	}
+	fprintf(output, "\n\n");
+
+	if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled())
+		fprintf(output,
+"Some events weren't counted. Try disabling the NMI watchdog:\n"
+"	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+"	perf stat ...\n"
+"	echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+	if (config->print_mixed_hw_group_error)
+		fprintf(output,
+			"The events in group usually have to be from "
+			"the same PMU. Try reorganizing the group.\n");
+}
+
+static void print_percore_thread(struct perf_stat_config *config,
+				 struct evsel *counter, char *prefix)
+{
+	int s;
+	struct aggr_cpu_id s2, id;
+	struct perf_cpu_map *cpus;
+	bool first = true;
+	int idx;
+	struct perf_cpu cpu;
+
+	cpus = evsel__cpus(counter);
+	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+		s2 = config->aggr_get_id(config, cpu);
+		for (s = 0; s < config->aggr_map->nr; s++) {
+			id = config->aggr_map->map[s];
+			if (aggr_cpu_id__equal(&s2, &id))
+				break;
+		}
+
+		print_counter_aggrdata(config, counter, s,
+				       prefix, false,
+				       &first, cpu);
+	}
+}
+
+static void print_percore(struct perf_stat_config *config,
+			  struct evsel *counter, char *prefix)
+{
+	bool metric_only = config->metric_only;
+	FILE *output = config->output;
+	int s;
+	bool first = true;
+
+	if (!config->aggr_map || !config->aggr_get_id)
+		return;
+
+	if (config->percore_show_thread)
+		return print_percore_thread(config, counter, prefix);
+
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
+		print_counter_aggrdata(config, counter, s,
+				prefix, metric_only,
+				&first, (struct perf_cpu){ .cpu = -1 });
+	}
+
+	if (metric_only)
+		fputc('\n', output);
+}
+
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+			    struct target *_target, struct timespec *ts, int argc, const char **argv)
+{
+	bool metric_only = config->metric_only;
+	int interval = config->interval;
+	struct evsel *counter;
+	char buf[64], *prefix = NULL;
+
+	if (config->iostat_run)
+		evlist->selected = evlist__first(evlist);
+
+	if (interval)
+		print_interval(config, evlist, prefix = buf, ts);
+	else
+		print_header(config, _target, argc, argv);
+
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0 && !interval)
+			print_metric_headers(config, evlist, prefix, false);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run)
+			fprintf(config->output, "%s", prefix);
+
+		if (config->json_output && !config->metric_only)
+			fprintf(config->output, "}");
+	}
+
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+	case AGGR_DIE:
+	case AGGR_SOCKET:
+	case AGGR_NODE:
+		print_aggr(config, evlist, prefix);
+		break;
+	case AGGR_THREAD:
+		evlist__for_each_entry(evlist, counter) {
+			print_aggr_thread(config, _target, counter, prefix);
+		}
+		break;
+	case AGGR_GLOBAL:
+		if (config->iostat_run)
+			iostat_print_counters(evlist, config, ts, prefix = buf,
+					      print_counter_aggr);
+		else {
+			evlist__for_each_entry(evlist, counter) {
+				print_counter_aggr(config, counter, prefix);
+			}
+			if (metric_only)
+				fputc('\n', config->output);
+		}
+		break;
+	case AGGR_NONE:
+		if (metric_only)
+			print_no_aggr_metric(config, evlist, prefix);
+		else {
+			evlist__for_each_entry(evlist, counter) {
+				if (counter->percore)
+					print_percore(config, counter, prefix);
+				else
+					print_counter(config, counter, prefix);
+			}
+		}
+		break;
+	case AGGR_MAX:
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	if (!interval && !config->csv_output && !config->json_output)
+		print_footer(config);
+
+	fflush(config->output);
+}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644
index 000000000..07b29fe27
--- /dev/null
+++ b/tools/perf/util/stat-shadow.c
@@ -0,0 +1,1336 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+#include "debug.h"
+#include "pmu.h"
+#include "rblist.h"
+#include "evlist.h"
+#include "expr.h"
+#include "metricgroup.h"
+#include "cgroup.h"
+#include "units.h"
+#include <linux/zalloc.h>
+#include "iostat.h"
+
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_DIE: Use first CPU of die
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
+
+struct runtime_stat rt_stat;
+struct stats walltime_nsecs_stats;
+struct rusage_stats ru_stats;
+
+struct saved_value {
+	struct rb_node rb_node;
+	struct evsel *evsel;
+	enum stat_type type;
+	int ctx;
+	int map_idx;  /* cpu or thread map index */
+	struct cgroup *cgrp;
+	struct stats stats;
+	u64 metric_total;
+	int metric_other;
+};
+
+static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
+{
+	struct saved_value *a = container_of(rb_node,
+					     struct saved_value,
+					     rb_node);
+	const struct saved_value *b = entry;
+
+	if (a->map_idx != b->map_idx)
+		return a->map_idx - b->map_idx;
+
+	/*
+	 * Previously the rbtree was used to link generic metrics.
+	 * The keys were evsel/cpu. Now the rbtree is extended to support
+	 * per-thread shadow stats. For shadow stats case, the keys
+	 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
+	 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
+	 */
+	if (a->type != b->type)
+		return a->type - b->type;
+
+	if (a->ctx != b->ctx)
+		return a->ctx - b->ctx;
+
+	if (a->cgrp != b->cgrp)
+		return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
+
+	if (a->evsel == b->evsel)
+		return 0;
+	if ((char *)a->evsel < (char *)b->evsel)
+		return -1;
+	return +1;
+}
+
+static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
+				     const void *entry)
+{
+	struct saved_value *nd = malloc(sizeof(struct saved_value));
+
+	if (!nd)
+		return NULL;
+	memcpy(nd, entry, sizeof(struct saved_value));
+	return &nd->rb_node;
+}
+
+static void saved_value_delete(struct rblist *rblist __maybe_unused,
+			       struct rb_node *rb_node)
+{
+	struct saved_value *v;
+
+	BUG_ON(!rb_node);
+	v = container_of(rb_node, struct saved_value, rb_node);
+	free(v);
+}
+
+static struct saved_value *saved_value_lookup(struct evsel *evsel,
+					      int map_idx,
+					      bool create,
+					      enum stat_type type,
+					      int ctx,
+					      struct runtime_stat *st,
+					      struct cgroup *cgrp)
+{
+	struct rblist *rblist;
+	struct rb_node *nd;
+	struct saved_value dm = {
+		.map_idx = map_idx,
+		.evsel = evsel,
+		.type = type,
+		.ctx = ctx,
+		.cgrp = cgrp,
+	};
+
+	rblist = &st->value_list;
+
+	/* don't use context info for clock events */
+	if (type == STAT_NSECS)
+		dm.ctx = 0;
+
+	nd = rblist__find(rblist, &dm);
+	if (nd)
+		return container_of(nd, struct saved_value, rb_node);
+	if (create) {
+		rblist__add_node(rblist, &dm);
+		nd = rblist__find(rblist, &dm);
+		if (nd)
+			return container_of(nd, struct saved_value, rb_node);
+	}
+	return NULL;
+}
+
+void runtime_stat__init(struct runtime_stat *st)
+{
+	struct rblist *rblist = &st->value_list;
+
+	rblist__init(rblist);
+	rblist->node_cmp = saved_value_cmp;
+	rblist->node_new = saved_value_new;
+	rblist->node_delete = saved_value_delete;
+}
+
+void runtime_stat__exit(struct runtime_stat *st)
+{
+	rblist__exit(&st->value_list);
+}
+
+void perf_stat__init_shadow_stats(void)
+{
+	runtime_stat__init(&rt_stat);
+}
+
+static int evsel_context(struct evsel *evsel)
+{
+	int ctx = 0;
+
+	if (evsel->core.attr.exclude_kernel)
+		ctx |= CTX_BIT_KERNEL;
+	if (evsel->core.attr.exclude_user)
+		ctx |= CTX_BIT_USER;
+	if (evsel->core.attr.exclude_hv)
+		ctx |= CTX_BIT_HV;
+	if (evsel->core.attr.exclude_host)
+		ctx |= CTX_BIT_HOST;
+	if (evsel->core.attr.exclude_idle)
+		ctx |= CTX_BIT_IDLE;
+
+	return ctx;
+}
+
+static void reset_stat(struct runtime_stat *st)
+{
+	struct rblist *rblist;
+	struct rb_node *pos, *next;
+
+	rblist = &st->value_list;
+	next = rb_first_cached(&rblist->entries);
+	while (next) {
+		pos = next;
+		next = rb_next(pos);
+		memset(&container_of(pos, struct saved_value, rb_node)->stats,
+		       0,
+		       sizeof(struct stats));
+	}
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+	reset_stat(&rt_stat);
+	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+	memset(&ru_stats, 0, sizeof(ru_stats));
+}
+
+void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
+{
+	reset_stat(st);
+}
+
+struct runtime_stat_data {
+	int ctx;
+	struct cgroup *cgrp;
+};
+
+static void update_runtime_stat(struct runtime_stat *st,
+				enum stat_type type,
+				int map_idx, u64 count,
+				struct runtime_stat_data *rsd)
+{
+	struct saved_value *v = saved_value_lookup(NULL, map_idx, true, type,
+						   rsd->ctx, st, rsd->cgrp);
+
+	if (v)
+		update_stats(&v->stats, count);
+}
+
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
+				    int map_idx, struct runtime_stat *st)
+{
+	u64 count_ns = count;
+	struct saved_value *v;
+	struct runtime_stat_data rsd = {
+		.ctx = evsel_context(counter),
+		.cgrp = counter->cgrp,
+	};
+
+	count *= counter->scale;
+
+	if (evsel__is_clock(counter))
+		update_runtime_stat(st, STAT_NSECS, map_idx, count_ns, &rsd);
+	else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+		update_runtime_stat(st, STAT_CYCLES, map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
+		update_runtime_stat(st, STAT_CYCLES_IN_TX, map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
+		update_runtime_stat(st, STAT_TRANSACTION, map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, ELISION_START))
+		update_runtime_stat(st, STAT_ELISION, map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+		update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+		update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
+		update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
+		update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
+		update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
+		update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
+		update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
+		update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
+		update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
+				    map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
+		update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
+				    map_idx, count, &rsd);
+	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
+				    map_idx, count, &rsd);
+	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
+				    map_idx, count, &rsd);
+	else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+		update_runtime_stat(st, STAT_BRANCHES, map_idx, count, &rsd);
+	else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+		update_runtime_stat(st, STAT_CACHEREFS, map_idx, count, &rsd);
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+		update_runtime_stat(st, STAT_L1_DCACHE, map_idx, count, &rsd);
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+		update_runtime_stat(st, STAT_L1_ICACHE, map_idx, count, &rsd);
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+		update_runtime_stat(st, STAT_LL_CACHE, map_idx, count, &rsd);
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+		update_runtime_stat(st, STAT_DTLB_CACHE, map_idx, count, &rsd);
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+		update_runtime_stat(st, STAT_ITLB_CACHE, map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, SMI_NUM))
+		update_runtime_stat(st, STAT_SMI_NUM, map_idx, count, &rsd);
+	else if (perf_stat_evsel__is(counter, APERF))
+		update_runtime_stat(st, STAT_APERF, map_idx, count, &rsd);
+
+	if (counter->collect_stat) {
+		v = saved_value_lookup(counter, map_idx, true, STAT_NONE, 0, st,
+				       rsd.cgrp);
+		update_stats(&v->stats, count);
+		if (counter->metric_leader)
+			v->metric_total += count;
+	} else if (counter->metric_leader) {
+		v = saved_value_lookup(counter->metric_leader,
+				       map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
+		v->metric_total += count;
+		v->metric_other++;
+	}
+}
+
+/* used for get_ratio_color() */
+enum grc_type {
+	GRC_STALLED_CYCLES_FE,
+	GRC_STALLED_CYCLES_BE,
+	GRC_CACHE_MISSES,
+	GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+	static const double grc_table[GRC_MAX_NR][3] = {
+		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
+	};
+	const char *color = PERF_COLOR_NORMAL;
+
+	if (ratio > grc_table[type][0])
+		color = PERF_COLOR_RED;
+	else if (ratio > grc_table[type][1])
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > grc_table[type][2])
+		color = PERF_COLOR_YELLOW;
+
+	return color;
+}
+
+static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
+						const char *name)
+{
+	struct evsel *c2;
+
+	evlist__for_each_entry (evsel_list, c2) {
+		if (!strcasecmp(c2->name, name) && !c2->collect_stat)
+			return c2;
+	}
+	return NULL;
+}
+
+/* Mark MetricExpr target events and link events using them to them. */
+void perf_stat__collect_metric_expr(struct evlist *evsel_list)
+{
+	struct evsel *counter, *leader, **metric_events, *oc;
+	bool found;
+	struct expr_parse_ctx *ctx;
+	struct hashmap_entry *cur;
+	size_t bkt;
+	int i;
+
+	ctx = expr__ctx_new();
+	if (!ctx) {
+		pr_debug("expr__ctx_new failed");
+		return;
+	}
+	evlist__for_each_entry(evsel_list, counter) {
+		bool invalid = false;
+
+		leader = evsel__leader(counter);
+		if (!counter->metric_expr)
+			continue;
+
+		expr__ctx_clear(ctx);
+		metric_events = counter->metric_events;
+		if (!metric_events) {
+			if (expr__find_ids(counter->metric_expr,
+					   counter->name,
+					   ctx) < 0)
+				continue;
+
+			metric_events = calloc(sizeof(struct evsel *),
+					       hashmap__size(ctx->ids) + 1);
+			if (!metric_events) {
+				expr__ctx_free(ctx);
+				return;
+			}
+			counter->metric_events = metric_events;
+		}
+
+		i = 0;
+		hashmap__for_each_entry(ctx->ids, cur, bkt) {
+			const char *metric_name = (const char *)cur->key;
+
+			found = false;
+			if (leader) {
+				/* Search in group */
+				for_each_group_member (oc, leader) {
+					if (!strcasecmp(oc->name,
+							metric_name) &&
+						!oc->collect_stat) {
+						found = true;
+						break;
+					}
+				}
+			}
+			if (!found) {
+				/* Search ignoring groups */
+				oc = perf_stat__find_event(evsel_list,
+							   metric_name);
+			}
+			if (!oc) {
+				/* Deduping one is good enough to handle duplicated PMUs. */
+				static char *printed;
+
+				/*
+				 * Adding events automatically would be difficult, because
+				 * it would risk creating groups that are not schedulable.
+				 * perf stat doesn't understand all the scheduling constraints
+				 * of events. So we ask the user instead to add the missing
+				 * events.
+				 */
+				if (!printed ||
+				    strcasecmp(printed, metric_name)) {
+					fprintf(stderr,
+						"Add %s event to groups to get metric expression for %s\n",
+						metric_name,
+						counter->name);
+					free(printed);
+					printed = strdup(metric_name);
+				}
+				invalid = true;
+				continue;
+			}
+			metric_events[i++] = oc;
+			oc->collect_stat = true;
+		}
+		metric_events[i] = NULL;
+		if (invalid) {
+			free(metric_events);
+			counter->metric_events = NULL;
+			counter->metric_expr = NULL;
+		}
+	}
+	expr__ctx_free(ctx);
+}
+
+static double runtime_stat_avg(struct runtime_stat *st,
+			       enum stat_type type, int map_idx,
+			       struct runtime_stat_data *rsd)
+{
+	struct saved_value *v;
+
+	v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp);
+	if (!v)
+		return 0.0;
+
+	return avg_stats(&v->stats);
+}
+
+static double runtime_stat_n(struct runtime_stat *st,
+			     enum stat_type type, int map_idx,
+			     struct runtime_stat_data *rsd)
+{
+	struct saved_value *v;
+
+	v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp);
+	if (!v)
+		return 0.0;
+
+	return v->stats.n;
+}
+
+static void print_stalled_cycles_frontend(struct perf_stat_config *config,
+					  int map_idx, double avg,
+					  struct perf_stat_output_ctx *out,
+					  struct runtime_stat *st,
+					  struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
+
+	if (ratio)
+		out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
+				  ratio);
+	else
+		out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
+}
+
+static void print_stalled_cycles_backend(struct perf_stat_config *config,
+					 int map_idx, double avg,
+					 struct perf_stat_output_ctx *out,
+					 struct runtime_stat *st,
+					 struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
+
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
+}
+
+static void print_branch_misses(struct perf_stat_config *config,
+				int map_idx, double avg,
+				struct perf_stat_output_ctx *out,
+				struct runtime_stat *st,
+				struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_BRANCHES, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
+}
+
+static void print_l1_dcache_misses(struct perf_stat_config *config,
+				   int map_idx, double avg,
+				   struct perf_stat_output_ctx *out,
+				   struct runtime_stat *st,
+				   struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_L1_DCACHE, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
+}
+
+static void print_l1_icache_misses(struct perf_stat_config *config,
+				   int map_idx, double avg,
+				   struct perf_stat_output_ctx *out,
+				   struct runtime_stat *st,
+				   struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_L1_ICACHE, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
+}
+
+static void print_dtlb_cache_misses(struct perf_stat_config *config,
+				    int map_idx, double avg,
+				    struct perf_stat_output_ctx *out,
+				    struct runtime_stat *st,
+				    struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_DTLB_CACHE, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
+}
+
+static void print_itlb_cache_misses(struct perf_stat_config *config,
+				    int map_idx, double avg,
+				    struct perf_stat_output_ctx *out,
+				    struct runtime_stat *st,
+				    struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_ITLB_CACHE, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
+}
+
+static void print_ll_cache_misses(struct perf_stat_config *config,
+				  int map_idx, double avg,
+				  struct perf_stat_output_ctx *out,
+				  struct runtime_stat *st,
+				  struct runtime_stat_data *rsd)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = runtime_stat_avg(st, STAT_LL_CACHE, map_idx, rsd);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
+}
+
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ *			TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+	if (x < 0 && x >= -0.02)
+		return 0.0;
+	return x;
+}
+
+static double td_total_slots(int map_idx, struct runtime_stat *st,
+			     struct runtime_stat_data *rsd)
+{
+	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, map_idx, rsd);
+}
+
+static double td_bad_spec(int map_idx, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
+{
+	double bad_spec = 0;
+	double total_slots;
+	double total;
+
+	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, map_idx, rsd) -
+		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, map_idx, rsd) +
+		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, map_idx, rsd);
+
+	total_slots = td_total_slots(map_idx, st, rsd);
+	if (total_slots)
+		bad_spec = total / total_slots;
+	return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int map_idx, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
+{
+	double retiring = 0;
+	double total_slots = td_total_slots(map_idx, st, rsd);
+	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
+					    map_idx, rsd);
+
+	if (total_slots)
+		retiring = ret_slots / total_slots;
+	return retiring;
+}
+
+static double td_fe_bound(int map_idx, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
+{
+	double fe_bound = 0;
+	double total_slots = td_total_slots(map_idx, st, rsd);
+	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
+					    map_idx, rsd);
+
+	if (total_slots)
+		fe_bound = fetch_bub / total_slots;
+	return fe_bound;
+}
+
+static double td_be_bound(int map_idx, struct runtime_stat *st,
+			  struct runtime_stat_data *rsd)
+{
+	double sum = (td_fe_bound(map_idx, st, rsd) +
+		      td_bad_spec(map_idx, st, rsd) +
+		      td_retiring(map_idx, st, rsd));
+	if (sum == 0)
+		return 0;
+	return sanitize_val(1.0 - sum);
+}
+
+/*
+ * Kernel reports metrics multiplied with slots. To get back
+ * the ratios we need to recreate the sum.
+ */
+
+static double td_metric_ratio(int map_idx, enum stat_type type,
+			      struct runtime_stat *stat,
+			      struct runtime_stat_data *rsd)
+{
+	double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) +
+		runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd);
+	double d = runtime_stat_avg(stat, type, map_idx, rsd);
+
+	if (sum)
+		return d / sum;
+	return 0;
+}
+
+/*
+ * ... but only if most of the values are actually available.
+ * We allow two missing.
+ */
+
+static bool full_td(int map_idx, struct runtime_stat *stat,
+		    struct runtime_stat_data *rsd)
+{
+	int c = 0;
+
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) > 0)
+		c++;
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) > 0)
+		c++;
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) > 0)
+		c++;
+	if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd) > 0)
+		c++;
+	return c >= 2;
+}
+
+static void print_smi_cost(struct perf_stat_config *config, int map_idx,
+			   struct perf_stat_output_ctx *out,
+			   struct runtime_stat *st,
+			   struct runtime_stat_data *rsd)
+{
+	double smi_num, aperf, cycles, cost = 0.0;
+	const char *color = NULL;
+
+	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, map_idx, rsd);
+	aperf = runtime_stat_avg(st, STAT_APERF, map_idx, rsd);
+	cycles = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
+
+	if ((cycles == 0) || (aperf == 0))
+		return;
+
+	if (smi_num)
+		cost = (aperf - cycles) / aperf * 100.00;
+
+	if (cost > 10)
+		color = PERF_COLOR_RED;
+	out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
+static int prepare_metric(struct evsel **metric_events,
+			  struct metric_ref *metric_refs,
+			  struct expr_parse_ctx *pctx,
+			  int map_idx,
+			  struct runtime_stat *st)
+{
+	double scale;
+	char *n;
+	int i, j, ret;
+
+	for (i = 0; metric_events[i]; i++) {
+		struct saved_value *v;
+		struct stats *stats;
+		u64 metric_total = 0;
+		int source_count;
+
+		if (evsel__is_tool(metric_events[i])) {
+			source_count = 1;
+			switch (metric_events[i]->tool_event) {
+			case PERF_TOOL_DURATION_TIME:
+				stats = &walltime_nsecs_stats;
+				scale = 1e-9;
+				break;
+			case PERF_TOOL_USER_TIME:
+				stats = &ru_stats.ru_utime_usec_stat;
+				scale = 1e-6;
+				break;
+			case PERF_TOOL_SYSTEM_TIME:
+				stats = &ru_stats.ru_stime_usec_stat;
+				scale = 1e-6;
+				break;
+			case PERF_TOOL_NONE:
+				pr_err("Invalid tool event 'none'");
+				abort();
+			case PERF_TOOL_MAX:
+				pr_err("Invalid tool event 'max'");
+				abort();
+			default:
+				pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
+				abort();
+			}
+		} else {
+			v = saved_value_lookup(metric_events[i], map_idx, false,
+					       STAT_NONE, 0, st,
+					       metric_events[i]->cgrp);
+			if (!v)
+				break;
+			stats = &v->stats;
+			/*
+			 * If an event was scaled during stat gathering, reverse
+			 * the scale before computing the metric.
+			 */
+			scale = 1.0 / metric_events[i]->scale;
+
+			source_count = evsel__source_count(metric_events[i]);
+
+			if (v->metric_other)
+				metric_total = v->metric_total * scale;
+		}
+		n = strdup(evsel__metric_id(metric_events[i]));
+		if (!n)
+			return -ENOMEM;
+
+		expr__add_id_val_source_count(pctx, n,
+					metric_total ? : avg_stats(stats) * scale,
+					source_count);
+	}
+
+	for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
+		ret = expr__add_ref(pctx, &metric_refs[j]);
+		if (ret)
+			return ret;
+	}
+
+	return i;
+}
+
+static void generic_metric(struct perf_stat_config *config,
+			   const char *metric_expr,
+			   struct evsel **metric_events,
+			   struct metric_ref *metric_refs,
+			   char *name,
+			   const char *metric_name,
+			   const char *metric_unit,
+			   int runtime,
+			   int map_idx,
+			   struct perf_stat_output_ctx *out,
+			   struct runtime_stat *st)
+{
+	print_metric_t print_metric = out->print_metric;
+	struct expr_parse_ctx *pctx;
+	double ratio, scale;
+	int i;
+	void *ctxp = out->ctx;
+
+	pctx = expr__ctx_new();
+	if (!pctx)
+		return;
+
+	if (config->user_requested_cpu_list)
+		pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
+	pctx->sctx.runtime = runtime;
+	pctx->sctx.system_wide = config->system_wide;
+	i = prepare_metric(metric_events, metric_refs, pctx, map_idx, st);
+	if (i < 0) {
+		expr__ctx_free(pctx);
+		return;
+	}
+	if (!metric_events[i]) {
+		if (expr__parse(&ratio, pctx, metric_expr) == 0) {
+			char *unit;
+			char metric_bf[64];
+
+			if (metric_unit && metric_name) {
+				if (perf_pmu__convert_scale(metric_unit,
+					&unit, &scale) >= 0) {
+					ratio *= scale;
+				}
+				if (strstr(metric_expr, "?"))
+					scnprintf(metric_bf, sizeof(metric_bf),
+					  "%s  %s_%d", unit, metric_name, runtime);
+				else
+					scnprintf(metric_bf, sizeof(metric_bf),
+					  "%s  %s", unit, metric_name);
+
+				print_metric(config, ctxp, NULL, "%8.1f",
+					     metric_bf, ratio);
+			} else {
+				print_metric(config, ctxp, NULL, "%8.2f",
+					metric_name ?
+					metric_name :
+					out->force_header ?  name : "",
+					ratio);
+			}
+		} else {
+			print_metric(config, ctxp, NULL, NULL,
+				     out->force_header ?
+				     (metric_name ? metric_name : name) : "", 0);
+		}
+	} else {
+		print_metric(config, ctxp, NULL, NULL,
+			     out->force_header ?
+			     (metric_name ? metric_name : name) : "", 0);
+	}
+
+	expr__ctx_free(pctx);
+}
+
+double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st)
+{
+	struct expr_parse_ctx *pctx;
+	double ratio = 0.0;
+
+	pctx = expr__ctx_new();
+	if (!pctx)
+		return NAN;
+
+	if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, map_idx, st) < 0)
+		goto out;
+
+	if (expr__parse(&ratio, pctx, mexp->metric_expr))
+		ratio = 0.0;
+
+out:
+	expr__ctx_free(pctx);
+	return ratio;
+}
+
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+				   struct evsel *evsel,
+				   double avg, int map_idx,
+				   struct perf_stat_output_ctx *out,
+				   struct rblist *metric_events,
+				   struct runtime_stat *st)
+{
+	void *ctxp = out->ctx;
+	print_metric_t print_metric = out->print_metric;
+	double total, ratio = 0.0, total2;
+	const char *color = NULL;
+	struct runtime_stat_data rsd = {
+		.ctx = evsel_context(evsel),
+		.cgrp = evsel->cgrp,
+	};
+	struct metric_event *me;
+	int num = 1;
+
+	if (config->iostat_run) {
+		iostat_print_metric(config, evsel, out);
+	} else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+		total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
+
+		if (total) {
+			ratio = avg / total;
+			print_metric(config, ctxp, NULL, "%7.2f ",
+					"insn per cycle", ratio);
+		} else {
+			print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
+		}
+
+		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, map_idx, &rsd);
+
+		total = max(total, runtime_stat_avg(st,
+						    STAT_STALLED_CYCLES_BACK,
+						    map_idx, &rsd));
+
+		if (total && avg) {
+			out->new_line(config, ctxp);
+			ratio = total / avg;
+			print_metric(config, ctxp, NULL, "%7.2f ",
+					"stalled cycles per insn",
+					ratio);
+		}
+	} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+		if (runtime_stat_n(st, STAT_BRANCHES, map_idx, &rsd) != 0)
+			print_branch_misses(config, map_idx, avg, out, st, &rsd);
+		else
+			print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
+	} else if (
+		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_L1_DCACHE, map_idx, &rsd) != 0)
+			print_l1_dcache_misses(config, map_idx, avg, out, st, &rsd);
+		else
+			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
+	} else if (
+		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_L1_ICACHE, map_idx, &rsd) != 0)
+			print_l1_icache_misses(config, map_idx, avg, out, st, &rsd);
+		else
+			print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
+	} else if (
+		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_DTLB_CACHE, map_idx, &rsd) != 0)
+			print_dtlb_cache_misses(config, map_idx, avg, out, st, &rsd);
+		else
+			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
+	} else if (
+		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_ITLB_CACHE, map_idx, &rsd) != 0)
+			print_itlb_cache_misses(config, map_idx, avg, out, st, &rsd);
+		else
+			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
+	} else if (
+		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_LL_CACHE, map_idx, &rsd) != 0)
+			print_ll_cache_misses(config, map_idx, avg, out, st, &rsd);
+		else
+			print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
+	} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
+		total = runtime_stat_avg(st, STAT_CACHEREFS, map_idx, &rsd);
+
+		if (total)
+			ratio = avg * 100 / total;
+
+		if (runtime_stat_n(st, STAT_CACHEREFS, map_idx, &rsd) != 0)
+			print_metric(config, ctxp, NULL, "%8.3f %%",
+				     "of all cache refs", ratio);
+		else
+			print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
+	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+		print_stalled_cycles_frontend(config, map_idx, avg, out, st, &rsd);
+	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+		print_stalled_cycles_backend(config, map_idx, avg, out, st, &rsd);
+	} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+		total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd);
+
+		if (total) {
+			ratio = avg / total;
+			print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
+		} else {
+			print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
+		}
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+		total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
+
+		if (total)
+			print_metric(config, ctxp, NULL,
+					"%7.2f%%", "transactional cycles",
+					100.0 * (avg / total));
+		else
+			print_metric(config, ctxp, NULL, NULL, "transactional cycles",
+				     0);
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+		total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
+		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
+
+		if (total2 < avg)
+			total2 = avg;
+		if (total)
+			print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
+				100.0 * ((total2-avg) / total));
+		else
+			print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
+	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
+
+		if (avg)
+			ratio = total / avg;
+
+		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, map_idx, &rsd) != 0)
+			print_metric(config, ctxp, NULL, "%8.0f",
+				     "cycles / transaction", ratio);
+		else
+			print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
+				      0);
+	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
+
+		if (avg)
+			ratio = total / avg;
+
+		print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
+	} else if (evsel__is_clock(evsel)) {
+		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
+			print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
+				     avg / (ratio * evsel->scale));
+		else
+			print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+		double fe_bound = td_fe_bound(map_idx, st, &rsd);
+
+		if (fe_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
+				fe_bound * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+		double retiring = td_retiring(map_idx, st, &rsd);
+
+		if (retiring > 0.7)
+			color = PERF_COLOR_GREEN;
+		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
+				retiring * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+		double bad_spec = td_bad_spec(map_idx, st, &rsd);
+
+		if (bad_spec > 0.1)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
+				bad_spec * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+		double be_bound = td_be_bound(map_idx, st, &rsd);
+		const char *name = "backend bound";
+		static int have_recovery_bubbles = -1;
+
+		/* In case the CPU does not support topdown-recovery-bubbles */
+		if (have_recovery_bubbles < 0)
+			have_recovery_bubbles = pmu_have_event("cpu",
+					"topdown-recovery-bubbles");
+		if (!have_recovery_bubbles)
+			name = "backend bound/bad spec";
+
+		if (be_bound > 0.2)
+			color = PERF_COLOR_RED;
+		if (td_total_slots(map_idx, st, &rsd) > 0)
+			print_metric(config, ctxp, color, "%8.1f%%", name,
+					be_bound * 100.);
+		else
+			print_metric(config, ctxp, NULL, NULL, name, 0);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
+		   full_td(map_idx, st, &rsd)) {
+		double retiring = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_RETIRING, st,
+						  &rsd);
+		if (retiring > 0.7)
+			color = PERF_COLOR_GREEN;
+		print_metric(config, ctxp, color, "%8.1f%%", "Retiring",
+				retiring * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
+		   full_td(map_idx, st, &rsd)) {
+		double fe_bound = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_FE_BOUND, st,
+						  &rsd);
+		if (fe_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound",
+				fe_bound * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
+		   full_td(map_idx, st, &rsd)) {
+		double be_bound = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_BE_BOUND, st,
+						  &rsd);
+		if (be_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound",
+				be_bound * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
+		   full_td(map_idx, st, &rsd)) {
+		double bad_spec = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_BAD_SPEC, st,
+						  &rsd);
+		if (bad_spec > 0.1)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation",
+				bad_spec * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
+			full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+		double retiring = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_RETIRING, st,
+						  &rsd);
+		double heavy_ops = td_metric_ratio(map_idx,
+						   STAT_TOPDOWN_HEAVY_OPS, st,
+						   &rsd);
+		double light_ops = retiring - heavy_ops;
+
+		if (retiring > 0.7 && heavy_ops > 0.1)
+			color = PERF_COLOR_GREEN;
+		print_metric(config, ctxp, color, "%8.1f%%", "Heavy Operations",
+				heavy_ops * 100.);
+		if (retiring > 0.7 && light_ops > 0.6)
+			color = PERF_COLOR_GREEN;
+		else
+			color = NULL;
+		print_metric(config, ctxp, color, "%8.1f%%", "Light Operations",
+				light_ops * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
+			full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+		double bad_spec = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_BAD_SPEC, st,
+						  &rsd);
+		double br_mis = td_metric_ratio(map_idx,
+						STAT_TOPDOWN_BR_MISPREDICT, st,
+						&rsd);
+		double m_clears = bad_spec - br_mis;
+
+		if (bad_spec > 0.1 && br_mis > 0.05)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "Branch Mispredict",
+				br_mis * 100.);
+		if (bad_spec > 0.1 && m_clears > 0.05)
+			color = PERF_COLOR_RED;
+		else
+			color = NULL;
+		print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears",
+				m_clears * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
+			full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+		double fe_bound = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_FE_BOUND, st,
+						  &rsd);
+		double fetch_lat = td_metric_ratio(map_idx,
+						   STAT_TOPDOWN_FETCH_LAT, st,
+						   &rsd);
+		double fetch_bw = fe_bound - fetch_lat;
+
+		if (fe_bound > 0.2 && fetch_lat > 0.15)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "Fetch Latency",
+				fetch_lat * 100.);
+		if (fe_bound > 0.2 && fetch_bw > 0.1)
+			color = PERF_COLOR_RED;
+		else
+			color = NULL;
+		print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth",
+				fetch_bw * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
+			full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+		double be_bound = td_metric_ratio(map_idx,
+						  STAT_TOPDOWN_BE_BOUND, st,
+						  &rsd);
+		double mem_bound = td_metric_ratio(map_idx,
+						   STAT_TOPDOWN_MEM_BOUND, st,
+						   &rsd);
+		double core_bound = be_bound - mem_bound;
+
+		if (be_bound > 0.2 && mem_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(config, ctxp, color, "%8.1f%%", "Memory Bound",
+				mem_bound * 100.);
+		if (be_bound > 0.2 && core_bound > 0.1)
+			color = PERF_COLOR_RED;
+		else
+			color = NULL;
+		print_metric(config, ctxp, color, "%8.1f%%", "Core Bound",
+				core_bound * 100.);
+	} else if (evsel->metric_expr) {
+		generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
+			       evsel->name, evsel->metric_name, NULL, 1,
+			       map_idx, out, st);
+	} else if (runtime_stat_n(st, STAT_NSECS, map_idx, &rsd) != 0) {
+		char unit = ' ';
+		char unit_buf[10] = "/sec";
+
+		total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd);
+		if (total)
+			ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
+
+		if (unit != ' ')
+			snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+		print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
+	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+		print_smi_cost(config, map_idx, out, st, &rsd);
+	} else {
+		num = 0;
+	}
+
+	if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
+		struct metric_expr *mexp;
+
+		list_for_each_entry (mexp, &me->head, nd) {
+			if (num++ > 0)
+				out->new_line(config, ctxp);
+			generic_metric(config, mexp->metric_expr, mexp->metric_events,
+				       mexp->metric_refs, evsel->name, mexp->metric_name,
+				       mexp->metric_unit, mexp->runtime,
+				       map_idx, out, st);
+		}
+	}
+	if (num == 0)
+		print_metric(config, ctxp, NULL, NULL, NULL, 0);
+}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
new file mode 100644
index 000000000..b63b3a312
--- /dev/null
+++ b/tools/perf/util/stat.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <linux/err.h>
+#include <inttypes.h>
+#include <math.h>
+#include <string.h>
+#include "counts.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "header.h"
+#include "stat.h"
+#include "session.h"
+#include "target.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
+#include <linux/zalloc.h>
+
+void update_stats(struct stats *stats, u64 val)
+{
+	double delta;
+
+	stats->n++;
+	delta = val - stats->mean;
+	stats->mean += delta / stats->n;
+	stats->M2 += delta*(val - stats->mean);
+
+	if (val > stats->max)
+		stats->max = val;
+
+	if (val < stats->min)
+		stats->min = val;
+}
+
+double avg_stats(struct stats *stats)
+{
+	return stats->mean;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2 = -------------------------------
+ *                  n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ *             s
+ * s_mean = -------
+ *          sqrt(n)
+ *
+ */
+double stddev_stats(struct stats *stats)
+{
+	double variance, variance_mean;
+
+	if (stats->n < 2)
+		return 0.0;
+
+	variance = stats->M2 / (stats->n - 1);
+	variance_mean = variance / stats->n;
+
+	return sqrt(variance_mean);
+}
+
+double rel_stddev_stats(double stddev, double avg)
+{
+	double pct = 0.0;
+
+	if (avg)
+		pct = 100.0 * stddev/avg;
+
+	return pct;
+}
+
+bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	return ps->id == id;
+}
+
+#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
+static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
+	ID(NONE,		x),
+	ID(CYCLES_IN_TX,	cpu/cycles-t/),
+	ID(TRANSACTION_START,	cpu/tx-start/),
+	ID(ELISION_START,	cpu/el-start/),
+	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
+	ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+	ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(TOPDOWN_RETIRING, topdown-retiring),
+	ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
+	ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
+	ID(TOPDOWN_BE_BOUND, topdown-be-bound),
+	ID(TOPDOWN_HEAVY_OPS, topdown-heavy-ops),
+	ID(TOPDOWN_BR_MISPREDICT, topdown-br-mispredict),
+	ID(TOPDOWN_FETCH_LAT, topdown-fetch-lat),
+	ID(TOPDOWN_MEM_BOUND, topdown-mem-bound),
+	ID(SMI_NUM, msr/smi/),
+	ID(APERF, msr/aperf/),
+};
+#undef ID
+
+static void perf_stat_evsel_id_init(struct evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+	int i;
+
+	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
+
+	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
+		if (!strcmp(evsel__name(evsel), id_str[i]) ||
+		    (strstr(evsel__name(evsel), id_str[i]) && evsel->pmu_name
+		     && strstr(evsel__name(evsel), evsel->pmu_name))) {
+			ps->id = i;
+			break;
+		}
+	}
+}
+
+static void evsel__reset_stat_priv(struct evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	init_stats(&ps->res_stats);
+}
+
+static int evsel__alloc_stat_priv(struct evsel *evsel)
+{
+	evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
+	if (evsel->stats == NULL)
+		return -ENOMEM;
+	perf_stat_evsel_id_init(evsel);
+	evsel__reset_stat_priv(evsel);
+	return 0;
+}
+
+static void evsel__free_stat_priv(struct evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	if (ps)
+		zfree(&ps->group_data);
+	zfree(&evsel->stats);
+}
+
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel)
+{
+	int cpu_map_nr = evsel__nr_cpus(evsel);
+	int nthreads = perf_thread_map__nr(evsel->core.threads);
+	struct perf_counts *counts;
+
+	counts = perf_counts__new(cpu_map_nr, nthreads);
+	if (counts)
+		evsel->prev_raw_counts = counts;
+
+	return counts ? 0 : -ENOMEM;
+}
+
+static void evsel__free_prev_raw_counts(struct evsel *evsel)
+{
+	perf_counts__delete(evsel->prev_raw_counts);
+	evsel->prev_raw_counts = NULL;
+}
+
+static void evsel__reset_prev_raw_counts(struct evsel *evsel)
+{
+	if (evsel->prev_raw_counts)
+		perf_counts__reset(evsel->prev_raw_counts);
+}
+
+static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
+{
+	if (evsel__alloc_stat_priv(evsel) < 0 ||
+	    evsel__alloc_counts(evsel) < 0 ||
+	    (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
+		return -ENOMEM;
+
+	return 0;
+}
+
+int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__alloc_stats(evsel, alloc_raw))
+			goto out_free;
+	}
+
+	return 0;
+
+out_free:
+	evlist__free_stats(evlist);
+	return -1;
+}
+
+void evlist__free_stats(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		evsel__free_stat_priv(evsel);
+		evsel__free_counts(evsel);
+		evsel__free_prev_raw_counts(evsel);
+	}
+}
+
+void evlist__reset_stats(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		evsel__reset_stat_priv(evsel);
+		evsel__reset_counts(evsel);
+	}
+}
+
+void evlist__reset_prev_raw_counts(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__reset_prev_raw_counts(evsel);
+}
+
+static void evsel__copy_prev_raw_counts(struct evsel *evsel)
+{
+	int idx, nthreads = perf_thread_map__nr(evsel->core.threads);
+
+	for (int thread = 0; thread < nthreads; thread++) {
+		perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) {
+			*perf_counts(evsel->counts, idx, thread) =
+				*perf_counts(evsel->prev_raw_counts, idx, thread);
+		}
+	}
+
+	evsel->counts->aggr = evsel->prev_raw_counts->aggr;
+}
+
+void evlist__copy_prev_raw_counts(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__copy_prev_raw_counts(evsel);
+}
+
+void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	/*
+	 * To collect the overall statistics for interval mode,
+	 * we copy the counts from evsel->prev_raw_counts to
+	 * evsel->counts. The perf_stat_process_counter creates
+	 * aggr values from per cpu values, but the per cpu values
+	 * are 0 for AGGR_GLOBAL. So we use a trick that saves the
+	 * previous aggr value to the first member of perf_counts,
+	 * then aggr calculation in process_counter_values can work
+	 * correctly.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		*perf_counts(evsel->prev_raw_counts, 0, 0) =
+			evsel->prev_raw_counts->aggr;
+	}
+}
+
+static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
+{
+	uint64_t *key = (uint64_t *) __key;
+
+	return *key & 0xffffffff;
+}
+
+static bool pkg_id_equal(const void *__key1, const void *__key2,
+			 void *ctx __maybe_unused)
+{
+	uint64_t *key1 = (uint64_t *) __key1;
+	uint64_t *key2 = (uint64_t *) __key2;
+
+	return *key1 == *key2;
+}
+
+static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
+			 int cpu_map_idx, bool *skip)
+{
+	struct hashmap *mask = counter->per_pkg_mask;
+	struct perf_cpu_map *cpus = evsel__cpus(counter);
+	struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx);
+	int s, d, ret = 0;
+	uint64_t *key;
+
+	*skip = false;
+
+	if (!counter->per_pkg)
+		return 0;
+
+	if (perf_cpu_map__empty(cpus))
+		return 0;
+
+	if (!mask) {
+		mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
+		if (IS_ERR(mask))
+			return -ENOMEM;
+
+		counter->per_pkg_mask = mask;
+	}
+
+	/*
+	 * we do not consider an event that has not run as a good
+	 * instance to mark a package as used (skip=1). Otherwise
+	 * we may run into a situation where the first CPU in a package
+	 * is not running anything, yet the second is, and this function
+	 * would mark the package as used after the first CPU and would
+	 * not read the values from the second CPU.
+	 */
+	if (!(vals->run && vals->ena))
+		return 0;
+
+	s = cpu__get_socket_id(cpu);
+	if (s < 0)
+		return -1;
+
+	/*
+	 * On multi-die system, die_id > 0. On no-die system, die_id = 0.
+	 * We use hashmap(socket, die) to check the used socket+die pair.
+	 */
+	d = cpu__get_die_id(cpu);
+	if (d < 0)
+		return -1;
+
+	key = malloc(sizeof(*key));
+	if (!key)
+		return -ENOMEM;
+
+	*key = (uint64_t)d << 32 | s;
+	if (hashmap__find(mask, (void *)key, NULL)) {
+		*skip = true;
+		free(key);
+	} else
+		ret = hashmap__add(mask, (void *)key, (void *)1);
+
+	return ret;
+}
+
+static int
+process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
+		       int cpu_map_idx, int thread,
+		       struct perf_counts_values *count)
+{
+	struct perf_counts_values *aggr = &evsel->counts->aggr;
+	static struct perf_counts_values zero;
+	bool skip = false;
+
+	if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) {
+		pr_err("failed to read per-pkg counter\n");
+		return -1;
+	}
+
+	if (skip)
+		count = &zero;
+
+	switch (config->aggr_mode) {
+	case AGGR_THREAD:
+	case AGGR_CORE:
+	case AGGR_DIE:
+	case AGGR_SOCKET:
+	case AGGR_NODE:
+	case AGGR_NONE:
+		if (!evsel->snapshot)
+			evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
+		perf_counts_values__scale(count, config->scale, NULL);
+		if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
+			perf_stat__update_shadow_stats(evsel, count->val,
+						       cpu_map_idx, &rt_stat);
+		}
+
+		if (config->aggr_mode == AGGR_THREAD) {
+			perf_stat__update_shadow_stats(evsel, count->val,
+						       thread, &rt_stat);
+		}
+		break;
+	case AGGR_GLOBAL:
+		aggr->val += count->val;
+		aggr->ena += count->ena;
+		aggr->run += count->run;
+	case AGGR_UNSET:
+	case AGGR_MAX:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int process_counter_maps(struct perf_stat_config *config,
+				struct evsel *counter)
+{
+	int nthreads = perf_thread_map__nr(counter->core.threads);
+	int ncpus = evsel__nr_cpus(counter);
+	int idx, thread;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		for (idx = 0; idx < ncpus; idx++) {
+			if (process_counter_values(config, counter, idx, thread,
+						   perf_counts(counter->counts, idx, thread)))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+			      struct evsel *counter)
+{
+	struct perf_counts_values *aggr = &counter->counts->aggr;
+	struct perf_stat_evsel *ps = counter->stats;
+	u64 *count = counter->counts->aggr.values;
+	int ret;
+
+	aggr->val = aggr->ena = aggr->run = 0;
+
+	if (counter->per_pkg)
+		evsel__zero_per_pkg(counter);
+
+	ret = process_counter_maps(config, counter);
+	if (ret)
+		return ret;
+
+	if (config->aggr_mode != AGGR_GLOBAL)
+		return 0;
+
+	if (!counter->snapshot)
+		evsel__compute_deltas(counter, -1, -1, aggr);
+	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+
+	update_stats(&ps->res_stats, *count);
+
+	if (verbose > 0) {
+		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+			evsel__name(counter), count[0], count[1], count[2]);
+	}
+
+	/*
+	 * Save the full runtime - to allow normalization during printout:
+	 */
+	perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
+
+	return 0;
+}
+
+int perf_event__process_stat_event(struct perf_session *session,
+				   union perf_event *event)
+{
+	struct perf_counts_values count, *ptr;
+	struct perf_record_stat *st = &event->stat;
+	struct evsel *counter;
+	int cpu_map_idx;
+
+	count.val = st->val;
+	count.ena = st->ena;
+	count.run = st->run;
+
+	counter = evlist__id2evsel(session->evlist, st->id);
+	if (!counter) {
+		pr_err("Failed to resolve counter for stat event.\n");
+		return -EINVAL;
+	}
+	cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
+	if (cpu_map_idx == -1) {
+		pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
+		return -EINVAL;
+	}
+	ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
+	if (ptr == NULL) {
+		pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
+			st->cpu, st->thread, evsel__name(counter));
+		return -EINVAL;
+	}
+	*ptr = count;
+	counter->supported = true;
+	return 0;
+}
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
+{
+	struct perf_record_stat *st = (struct perf_record_stat *)event;
+	size_t ret;
+
+	ret  = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
+		       st->id, st->cpu, st->thread);
+	ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
+		       st->val, st->ena, st->run);
+
+	return ret;
+}
+
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
+{
+	struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
+	size_t ret;
+
+	ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
+		      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
+
+	return ret;
+}
+
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
+{
+	struct perf_stat_config sc;
+	size_t ret;
+
+	perf_event__read_stat_config(&sc, &event->stat_config);
+
+	ret  = fprintf(fp, "\n");
+	ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
+	ret += fprintf(fp, "... scale     %d\n", sc.scale);
+	ret += fprintf(fp, "... interval  %u\n", sc.interval);
+
+	return ret;
+}
+
+int create_perf_stat_counter(struct evsel *evsel,
+			     struct perf_stat_config *config,
+			     struct target *target,
+			     int cpu_map_idx)
+{
+	struct perf_event_attr *attr = &evsel->core.attr;
+	struct evsel *leader = evsel__leader(evsel);
+
+	attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+			    PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+	/*
+	 * The event is part of non trivial group, let's enable
+	 * the group read (for leader) and ID retrieval for all
+	 * members.
+	 */
+	if (leader->core.nr_members > 1)
+		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+	attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
+
+	/*
+	 * Some events get initialized with sample_(period/type) set,
+	 * like tracepoints. Clear it up for counting.
+	 */
+	attr->sample_period = 0;
+
+	if (config->identifier)
+		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+	if (config->all_user) {
+		attr->exclude_kernel = 1;
+		attr->exclude_user   = 0;
+	}
+
+	if (config->all_kernel) {
+		attr->exclude_kernel = 0;
+		attr->exclude_user   = 1;
+	}
+
+	/*
+	 * Disabling all counters initially, they will be enabled
+	 * either manually by us or by kernel via enable_on_exec
+	 * set later.
+	 */
+	if (evsel__is_group_leader(evsel)) {
+		attr->disabled = 1;
+
+		if (target__enable_on_exec(target))
+			attr->enable_on_exec = 1;
+	}
+
+	if (target__has_cpu(target) && !target__has_per_thread(target))
+		return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
+
+	return evsel__open_per_thread(evsel, evsel->core.threads);
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
new file mode 100644
index 000000000..05c5125d7
--- /dev/null
+++ b/tools/perf/util/stat.h
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STATS_H
+#define __PERF_STATS_H
+
+#include <linux/types.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include "cpumap.h"
+#include "rblist.h"
+
+struct perf_cpu_map;
+struct perf_stat_config;
+struct timespec;
+
+struct stats {
+	double n, mean, M2;
+	u64 max, min;
+};
+
+enum perf_stat_evsel_id {
+	PERF_STAT_EVSEL_ID__NONE = 0,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
+	PERF_STAT_EVSEL_ID__TRANSACTION_START,
+	PERF_STAT_EVSEL_ID__ELISION_START,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+	PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING,
+	PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
+	PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
+	PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
+	PERF_STAT_EVSEL_ID__TOPDOWN_HEAVY_OPS,
+	PERF_STAT_EVSEL_ID__TOPDOWN_BR_MISPREDICT,
+	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_LAT,
+	PERF_STAT_EVSEL_ID__TOPDOWN_MEM_BOUND,
+	PERF_STAT_EVSEL_ID__SMI_NUM,
+	PERF_STAT_EVSEL_ID__APERF,
+	PERF_STAT_EVSEL_ID__MAX,
+};
+
+struct perf_stat_evsel {
+	struct stats		 res_stats;
+	enum perf_stat_evsel_id	 id;
+	u64			*group_data;
+};
+
+enum aggr_mode {
+	AGGR_NONE,
+	AGGR_GLOBAL,
+	AGGR_SOCKET,
+	AGGR_DIE,
+	AGGR_CORE,
+	AGGR_THREAD,
+	AGGR_UNSET,
+	AGGR_NODE,
+	AGGR_MAX
+};
+
+enum {
+	CTX_BIT_USER	= 1 << 0,
+	CTX_BIT_KERNEL	= 1 << 1,
+	CTX_BIT_HV	= 1 << 2,
+	CTX_BIT_HOST	= 1 << 3,
+	CTX_BIT_IDLE	= 1 << 4,
+	CTX_BIT_MAX	= 1 << 5,
+};
+
+#define NUM_CTX CTX_BIT_MAX
+
+enum stat_type {
+	STAT_NONE = 0,
+	STAT_NSECS,
+	STAT_CYCLES,
+	STAT_STALLED_CYCLES_FRONT,
+	STAT_STALLED_CYCLES_BACK,
+	STAT_BRANCHES,
+	STAT_CACHEREFS,
+	STAT_L1_DCACHE,
+	STAT_L1_ICACHE,
+	STAT_LL_CACHE,
+	STAT_ITLB_CACHE,
+	STAT_DTLB_CACHE,
+	STAT_CYCLES_IN_TX,
+	STAT_TRANSACTION,
+	STAT_ELISION,
+	STAT_TOPDOWN_TOTAL_SLOTS,
+	STAT_TOPDOWN_SLOTS_ISSUED,
+	STAT_TOPDOWN_SLOTS_RETIRED,
+	STAT_TOPDOWN_FETCH_BUBBLES,
+	STAT_TOPDOWN_RECOVERY_BUBBLES,
+	STAT_TOPDOWN_RETIRING,
+	STAT_TOPDOWN_BAD_SPEC,
+	STAT_TOPDOWN_FE_BOUND,
+	STAT_TOPDOWN_BE_BOUND,
+	STAT_TOPDOWN_HEAVY_OPS,
+	STAT_TOPDOWN_BR_MISPREDICT,
+	STAT_TOPDOWN_FETCH_LAT,
+	STAT_TOPDOWN_MEM_BOUND,
+	STAT_SMI_NUM,
+	STAT_APERF,
+	STAT_MAX
+};
+
+struct runtime_stat {
+	struct rblist value_list;
+};
+
+struct rusage_stats {
+	struct stats ru_utime_usec_stat;
+	struct stats ru_stime_usec_stat;
+};
+
+typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);
+
+struct perf_stat_config {
+	enum aggr_mode		 aggr_mode;
+	bool			 scale;
+	bool			 no_inherit;
+	bool			 identifier;
+	bool			 csv_output;
+	bool			 json_output;
+	bool			 interval_clear;
+	bool			 metric_only;
+	bool			 null_run;
+	bool			 ru_display;
+	bool			 big_num;
+	bool			 no_merge;
+	bool			 hybrid_merge;
+	bool			 walltime_run_table;
+	bool			 all_kernel;
+	bool			 all_user;
+	bool			 percore_show_thread;
+	bool			 summary;
+	bool			 no_csv_summary;
+	bool			 metric_no_group;
+	bool			 metric_no_merge;
+	bool			 stop_read_counter;
+	bool			 iostat_run;
+	char			 *user_requested_cpu_list;
+	bool			 system_wide;
+	FILE			*output;
+	unsigned int		 interval;
+	unsigned int		 timeout;
+	unsigned int		 unit_width;
+	unsigned int		 metric_only_len;
+	int			 times;
+	int			 run_count;
+	int			 print_free_counters_hint;
+	int			 print_mixed_hw_group_error;
+	const char		*csv_sep;
+	struct stats		*walltime_nsecs_stats;
+	struct rusage		 ru_data;
+	struct rusage_stats		 *ru_stats;
+	struct cpu_aggr_map	*aggr_map;
+	aggr_get_id_t		 aggr_get_id;
+	struct cpu_aggr_map	*cpus_aggr_map;
+	u64			*walltime_run;
+	struct rblist		 metric_events;
+	int			 ctl_fd;
+	int			 ctl_fd_ack;
+	bool			 ctl_fd_close;
+	const char		*cgroup_list;
+	unsigned int		topdown_level;
+};
+
+void perf_stat__set_big_num(int set);
+void perf_stat__set_no_csv_summary(int set);
+
+void update_stats(struct stats *stats, u64 val);
+double avg_stats(struct stats *stats);
+double stddev_stats(struct stats *stats);
+double rel_stddev_stats(double stddev, double avg);
+
+static inline void init_stats(struct stats *stats)
+{
+	stats->n    = 0.0;
+	stats->mean = 0.0;
+	stats->M2   = 0.0;
+	stats->min  = (u64) -1;
+	stats->max  = 0;
+}
+
+static inline void init_rusage_stats(struct rusage_stats *ru_stats) {
+	init_stats(&ru_stats->ru_utime_usec_stat);
+	init_stats(&ru_stats->ru_stime_usec_stat);
+}
+
+static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) {
+	const u64 us_to_ns = 1000;
+	const u64 s_to_ns = 1000000000;
+	update_stats(&ru_stats->ru_utime_usec_stat,
+	             (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
+	update_stats(&ru_stats->ru_stime_usec_stat,
+	             (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
+}
+
+struct evsel;
+struct evlist;
+
+struct perf_aggr_thread_value {
+	struct evsel *counter;
+	struct aggr_cpu_id id;
+	double uval;
+	u64 val;
+	u64 run;
+	u64 ena;
+};
+
+bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
+
+#define perf_stat_evsel__is(evsel, id) \
+	__perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+
+extern struct runtime_stat rt_stat;
+extern struct stats walltime_nsecs_stats;
+extern struct rusage_stats ru_stats;
+
+typedef void (*print_metric_t)(struct perf_stat_config *config,
+			       void *ctx, const char *color, const char *unit,
+			       const char *fmt, double val);
+typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
+
+void runtime_stat__init(struct runtime_stat *st);
+void runtime_stat__exit(struct runtime_stat *st);
+void perf_stat__init_shadow_stats(void);
+void perf_stat__reset_shadow_stats(void);
+void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
+void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
+				    int map_idx, struct runtime_stat *st);
+struct perf_stat_output_ctx {
+	void *ctx;
+	print_metric_t print_metric;
+	new_line_t new_line;
+	bool force_header;
+};
+
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+				   struct evsel *evsel,
+				   double avg, int map_idx,
+				   struct perf_stat_output_ctx *out,
+				   struct rblist *metric_events,
+				   struct runtime_stat *st);
+void perf_stat__collect_metric_expr(struct evlist *);
+
+int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
+void evlist__free_stats(struct evlist *evlist);
+void evlist__reset_stats(struct evlist *evlist);
+void evlist__reset_prev_raw_counts(struct evlist *evlist);
+void evlist__copy_prev_raw_counts(struct evlist *evlist);
+void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+			      struct evsel *counter);
+struct perf_tool;
+union perf_event;
+struct perf_session;
+struct target;
+
+int perf_event__process_stat_event(struct perf_session *session,
+				   union perf_event *event);
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
+
+int create_perf_stat_counter(struct evsel *evsel,
+			     struct perf_stat_config *config,
+			     struct target *target,
+			     int cpu_map_idx);
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+			    struct target *_target, struct timespec *ts, int argc, const char **argv);
+
+struct metric_expr;
+double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st);
+#endif
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
new file mode 100644
index 000000000..a64a37628
--- /dev/null
+++ b/tools/perf/util/strbuf.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cache.h"
+#include "debug.h"
+#include "strbuf.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+int strbuf_init(struct strbuf *sb, ssize_t hint)
+{
+	sb->alloc = sb->len = 0;
+	sb->buf = strbuf_slopbuf;
+	if (hint)
+		return strbuf_grow(sb, hint);
+	return 0;
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+	if (sb->alloc) {
+		zfree(&sb->buf);
+		strbuf_init(sb, 0);
+	}
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+	char *res = sb->alloc ? sb->buf : NULL;
+	if (sz)
+		*sz = sb->len;
+	strbuf_init(sb, 0);
+	return res;
+}
+
+int strbuf_grow(struct strbuf *sb, size_t extra)
+{
+	char *buf;
+	size_t nr = sb->len + extra + 1;
+
+	if (nr < sb->alloc)
+		return 0;
+
+	if (nr <= sb->len)
+		return -E2BIG;
+
+	if (alloc_nr(sb->alloc) > nr)
+		nr = alloc_nr(sb->alloc);
+
+	/*
+	 * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is
+	 * a static variable. Thus we have to avoid passing it to realloc.
+	 */
+	buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf));
+	if (!buf)
+		return -ENOMEM;
+
+	sb->buf = buf;
+	sb->alloc = nr;
+	return 0;
+}
+
+int strbuf_addch(struct strbuf *sb, int c)
+{
+	int ret = strbuf_grow(sb, 1);
+	if (ret)
+		return ret;
+
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+	return 0;
+}
+
+int strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+	int ret = strbuf_grow(sb, len);
+	if (ret)
+		return ret;
+
+	memcpy(sb->buf + sb->len, data, len);
+	return strbuf_setlen(sb, sb->len + len);
+}
+
+static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+{
+	int len, ret;
+	va_list ap_saved;
+
+	if (!strbuf_avail(sb)) {
+		ret = strbuf_grow(sb, 64);
+		if (ret)
+			return ret;
+	}
+
+	va_copy(ap_saved, ap);
+	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+	if (len < 0) {
+		va_end(ap_saved);
+		return len;
+	}
+	if (len > strbuf_avail(sb)) {
+		ret = strbuf_grow(sb, len);
+		if (ret) {
+			va_end(ap_saved);
+			return ret;
+		}
+		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
+		if (len > strbuf_avail(sb)) {
+			pr_debug("this should not happen, your vsnprintf is broken");
+			va_end(ap_saved);
+			return -EINVAL;
+		}
+	}
+	va_end(ap_saved);
+	return strbuf_setlen(sb, sb->len + len);
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = strbuf_addv(sb, fmt, ap);
+	va_end(ap);
+	return ret;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint)
+{
+	size_t oldlen = sb->len;
+	size_t oldalloc = sb->alloc;
+	int ret;
+
+	ret = strbuf_grow(sb, hint ? hint : 8192);
+	if (ret)
+		return ret;
+
+	for (;;) {
+		ssize_t cnt;
+
+		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+		if (cnt < 0) {
+			if (oldalloc == 0)
+				strbuf_release(sb);
+			else
+				strbuf_setlen(sb, oldlen);
+			return cnt;
+		}
+		if (!cnt)
+			break;
+		sb->len += cnt;
+		ret = strbuf_grow(sb, 8192);
+		if (ret)
+			return ret;
+	}
+
+	sb->buf[sb->len] = '\0';
+	return sb->len - oldlen;
+}
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
new file mode 100644
index 000000000..be94d7046
--- /dev/null
+++ b/tools/perf/util/strbuf.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRBUF_H
+#define __PERF_STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ *    build complex strings/buffers whose final size isn't easily known.
+ *
+ *    It is NOT legal to copy the ->buf pointer away.
+ *    `strbuf_detach' is the operation that detaches a buffer from its shell
+ *    while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ *    allocated. The extra byte is used to store a '\0', allowing the ->buf
+ *    member to be a valid C-string. Every strbuf function ensure this
+ *    invariant is preserved.
+ *
+ *    Note that it is OK to "play" with the buffer directly if you work it
+ *    that way:
+ *
+ *    strbuf_grow(sb, SOME_SIZE);
+ *       ... Here, the memory array starting at sb->buf, and of length
+ *       ... strbuf_avail(sb) is all yours, and you are sure that
+ *       ... strbuf_avail(sb) is at least SOME_SIZE.
+ *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ *    Doing so is safe, though if it has to be done in many places, adding the
+ *    missing API to the strbuf module is the way to go.
+ *
+ *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ *         even if it's true in the current implementation. Alloc is somehow a
+ *         "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+	size_t alloc;
+	size_t len;
+	char *buf;
+};
+
+#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+int strbuf_init(struct strbuf *buf, ssize_t hint);
+void strbuf_release(struct strbuf *buf);
+char *strbuf_detach(struct strbuf *buf, size_t *);
+
+/*----- strbuf size related -----*/
+static inline ssize_t strbuf_avail(const struct strbuf *sb) {
+	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+int strbuf_grow(struct strbuf *buf, size_t);
+
+static inline int strbuf_setlen(struct strbuf *sb, size_t len) {
+	if (!sb->alloc) {
+		int ret = strbuf_grow(sb, 0);
+		if (ret)
+			return ret;
+	}
+	assert(len < sb->alloc);
+	sb->len = len;
+	sb->buf[len] = '\0';
+	return 0;
+}
+
+/*----- add data in your buffer -----*/
+int strbuf_addch(struct strbuf *sb, int c);
+
+int strbuf_add(struct strbuf *buf, const void *, size_t);
+static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
+	return strbuf_add(sb, s, strlen(s));
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3);
+
+/* XXX: if read fails, any partial read is undone */
+ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
+
+#endif /* __PERF_STRBUF_H */
diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c
new file mode 100644
index 000000000..545e44981
--- /dev/null
+++ b/tools/perf/util/stream.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Compare and figure out the top N hottest streams
+ * Copyright (c) 2020, Intel Corporation.
+ * Author: Jin Yao
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "hist.h"
+#include "sort.h"
+#include "stream.h"
+#include "evlist.h"
+
+static void evsel_streams__delete(struct evsel_streams *es, int nr_evsel)
+{
+	for (int i = 0; i < nr_evsel; i++)
+		zfree(&es[i].streams);
+
+	free(es);
+}
+
+void evlist_streams__delete(struct evlist_streams *els)
+{
+	evsel_streams__delete(els->ev_streams, els->nr_evsel);
+	free(els);
+}
+
+static struct evlist_streams *evlist_streams__new(int nr_evsel,
+						  int nr_streams_max)
+{
+	struct evlist_streams *els;
+	struct evsel_streams *es;
+
+	els = zalloc(sizeof(*els));
+	if (!els)
+		return NULL;
+
+	es = calloc(nr_evsel, sizeof(struct evsel_streams));
+	if (!es) {
+		free(els);
+		return NULL;
+	}
+
+	for (int i = 0; i < nr_evsel; i++) {
+		struct evsel_streams *s = &es[i];
+
+		s->streams = calloc(nr_streams_max, sizeof(struct stream));
+		if (!s->streams)
+			goto err;
+
+		s->nr_streams_max = nr_streams_max;
+		s->evsel_idx = -1;
+	}
+
+	els->ev_streams = es;
+	els->nr_evsel = nr_evsel;
+	return els;
+
+err:
+	evsel_streams__delete(es, nr_evsel);
+	return NULL;
+}
+
+/*
+ * The cnodes with high hit number are hot callchains.
+ */
+static void evsel_streams__set_hot_cnode(struct evsel_streams *es,
+					 struct callchain_node *cnode)
+{
+	int i, idx = 0;
+	u64 hit;
+
+	if (es->nr_streams < es->nr_streams_max) {
+		i = es->nr_streams;
+		es->streams[i].cnode = cnode;
+		es->nr_streams++;
+		return;
+	}
+
+	/*
+	 * Considering a few number of hot streams, only use simple
+	 * way to find the cnode with smallest hit number and replace.
+	 */
+	hit = (es->streams[0].cnode)->hit;
+	for (i = 1; i < es->nr_streams; i++) {
+		if ((es->streams[i].cnode)->hit < hit) {
+			hit = (es->streams[i].cnode)->hit;
+			idx = i;
+		}
+	}
+
+	if (cnode->hit > hit)
+		es->streams[idx].cnode = cnode;
+}
+
+static void update_hot_callchain(struct hist_entry *he,
+				 struct evsel_streams *es)
+{
+	struct rb_root *root = &he->sorted_chain;
+	struct rb_node *rb_node = rb_first(root);
+	struct callchain_node *cnode;
+
+	while (rb_node) {
+		cnode = rb_entry(rb_node, struct callchain_node, rb_node);
+		evsel_streams__set_hot_cnode(es, cnode);
+		rb_node = rb_next(rb_node);
+	}
+}
+
+static void init_hot_callchain(struct hists *hists, struct evsel_streams *es)
+{
+	struct rb_node *next = rb_first_cached(&hists->entries);
+
+	while (next) {
+		struct hist_entry *he;
+
+		he = rb_entry(next, struct hist_entry, rb_node);
+		update_hot_callchain(he, es);
+		next = rb_next(&he->rb_node);
+	}
+
+	es->streams_hits = callchain_total_hits(hists);
+}
+
+static int evlist__init_callchain_streams(struct evlist *evlist,
+					  struct evlist_streams *els)
+{
+	struct evsel_streams *es = els->ev_streams;
+	struct evsel *pos;
+	int i = 0;
+
+	BUG_ON(els->nr_evsel < evlist->core.nr_entries);
+
+	evlist__for_each_entry(evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+
+		hists__output_resort(hists, NULL);
+		init_hot_callchain(hists, &es[i]);
+		es[i].evsel_idx = pos->core.idx;
+		i++;
+	}
+
+	return 0;
+}
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+					      int nr_streams_max)
+{
+	int nr_evsel = evlist->core.nr_entries, ret = -1;
+	struct evlist_streams *els = evlist_streams__new(nr_evsel,
+							 nr_streams_max);
+
+	if (!els)
+		return NULL;
+
+	ret = evlist__init_callchain_streams(evlist, els);
+	if (ret) {
+		evlist_streams__delete(els);
+		return NULL;
+	}
+
+	return els;
+}
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+					   int evsel_idx)
+{
+	struct evsel_streams *es = els->ev_streams;
+
+	for (int i = 0; i < els->nr_evsel; i++) {
+		if (es[i].evsel_idx == evsel_idx)
+			return &es[i];
+	}
+
+	return NULL;
+}
+
+static struct stream *stream__callchain_match(struct stream *base_stream,
+					      struct evsel_streams *es_pair)
+{
+	for (int i = 0; i < es_pair->nr_streams; i++) {
+		struct stream *pair_stream = &es_pair->streams[i];
+
+		if (callchain_cnode_matched(base_stream->cnode,
+					    pair_stream->cnode)) {
+			return pair_stream;
+		}
+	}
+
+	return NULL;
+}
+
+static struct stream *stream__match(struct stream *base_stream,
+				    struct evsel_streams *es_pair)
+{
+	return stream__callchain_match(base_stream, es_pair);
+}
+
+static void stream__link(struct stream *base_stream, struct stream *pair_stream)
+{
+	base_stream->pair_cnode = pair_stream->cnode;
+	pair_stream->pair_cnode = base_stream->cnode;
+}
+
+void evsel_streams__match(struct evsel_streams *es_base,
+			  struct evsel_streams *es_pair)
+{
+	for (int i = 0; i < es_base->nr_streams; i++) {
+		struct stream *base_stream = &es_base->streams[i];
+		struct stream *pair_stream;
+
+		pair_stream = stream__match(base_stream, es_pair);
+		if (pair_stream)
+			stream__link(base_stream, pair_stream);
+	}
+}
+
+static void print_callchain_pair(struct stream *base_stream, int idx,
+				 struct evsel_streams *es_base,
+				 struct evsel_streams *es_pair)
+{
+	struct callchain_node *base_cnode = base_stream->cnode;
+	struct callchain_node *pair_cnode = base_stream->pair_cnode;
+	struct callchain_list *base_chain, *pair_chain;
+	char buf1[512], buf2[512], cbuf1[256], cbuf2[256];
+	char *s1, *s2;
+	double pct;
+
+	printf("\nhot chain pair %d:\n", idx);
+
+	pct = (double)base_cnode->hit / (double)es_base->streams_hits;
+	scnprintf(buf1, sizeof(buf1), "cycles: %ld, hits: %.2f%%",
+		  callchain_avg_cycles(base_cnode), pct * 100.0);
+
+	pct = (double)pair_cnode->hit / (double)es_pair->streams_hits;
+	scnprintf(buf2, sizeof(buf2), "cycles: %ld, hits: %.2f%%",
+		  callchain_avg_cycles(pair_cnode), pct * 100.0);
+
+	printf("%35s\t%35s\n", buf1, buf2);
+
+	printf("%35s\t%35s\n",
+	       "---------------------------",
+	       "--------------------------");
+
+	pair_chain = list_first_entry(&pair_cnode->val,
+				      struct callchain_list,
+				      list);
+
+	list_for_each_entry(base_chain, &base_cnode->val, list) {
+		if (&pair_chain->list == &pair_cnode->val)
+			return;
+
+		s1 = callchain_list__sym_name(base_chain, cbuf1, sizeof(cbuf1),
+					      false);
+		s2 = callchain_list__sym_name(pair_chain, cbuf2, sizeof(cbuf2),
+					      false);
+
+		scnprintf(buf1, sizeof(buf1), "%35s\t%35s", s1, s2);
+		printf("%s\n", buf1);
+		pair_chain = list_next_entry(pair_chain, list);
+	}
+}
+
+static void print_stream_callchain(struct stream *stream, int idx,
+				   struct evsel_streams *es, bool pair)
+{
+	struct callchain_node *cnode = stream->cnode;
+	struct callchain_list *chain;
+	char buf[512], cbuf[256], *s;
+	double pct;
+
+	printf("\nhot chain %d:\n", idx);
+
+	pct = (double)cnode->hit / (double)es->streams_hits;
+	scnprintf(buf, sizeof(buf), "cycles: %ld, hits: %.2f%%",
+		  callchain_avg_cycles(cnode), pct * 100.0);
+
+	if (pair) {
+		printf("%35s\t%35s\n", "", buf);
+		printf("%35s\t%35s\n",
+		       "", "--------------------------");
+	} else {
+		printf("%35s\n", buf);
+		printf("%35s\n", "--------------------------");
+	}
+
+	list_for_each_entry(chain, &cnode->val, list) {
+		s = callchain_list__sym_name(chain, cbuf, sizeof(cbuf), false);
+
+		if (pair)
+			scnprintf(buf, sizeof(buf), "%35s\t%35s", "", s);
+		else
+			scnprintf(buf, sizeof(buf), "%35s", s);
+
+		printf("%s\n", buf);
+	}
+}
+
+static void callchain_streams_report(struct evsel_streams *es_base,
+				     struct evsel_streams *es_pair)
+{
+	struct stream *base_stream;
+	int i, idx = 0;
+
+	printf("[ Matched hot streams ]\n");
+	for (i = 0; i < es_base->nr_streams; i++) {
+		base_stream = &es_base->streams[i];
+		if (base_stream->pair_cnode) {
+			print_callchain_pair(base_stream, ++idx,
+					     es_base, es_pair);
+		}
+	}
+
+	idx = 0;
+	printf("\n[ Hot streams in old perf data only ]\n");
+	for (i = 0; i < es_base->nr_streams; i++) {
+		base_stream = &es_base->streams[i];
+		if (!base_stream->pair_cnode) {
+			print_stream_callchain(base_stream, ++idx,
+					       es_base, false);
+		}
+	}
+
+	idx = 0;
+	printf("\n[ Hot streams in new perf data only ]\n");
+	for (i = 0; i < es_pair->nr_streams; i++) {
+		base_stream = &es_pair->streams[i];
+		if (!base_stream->pair_cnode) {
+			print_stream_callchain(base_stream, ++idx,
+					       es_pair, true);
+		}
+	}
+}
+
+void evsel_streams__report(struct evsel_streams *es_base,
+			   struct evsel_streams *es_pair)
+{
+	return callchain_streams_report(es_base, es_pair);
+}
diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h
new file mode 100644
index 000000000..bee768874
--- /dev/null
+++ b/tools/perf/util/stream.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STREAM_H
+#define __PERF_STREAM_H
+
+#include "callchain.h"
+
+struct stream {
+	struct callchain_node	*cnode;
+	struct callchain_node	*pair_cnode;
+};
+
+struct evsel_streams {
+	struct stream		*streams;
+	int			nr_streams_max;
+	int			nr_streams;
+	int			evsel_idx;
+	u64			streams_hits;
+};
+
+struct evlist_streams {
+	struct evsel_streams	*ev_streams;
+	int			nr_evsel;
+};
+
+struct evlist;
+
+void evlist_streams__delete(struct evlist_streams *els);
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+					      int nr_streams_max);
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+					   int evsel_idx);
+
+void evsel_streams__match(struct evsel_streams *es_base,
+			  struct evsel_streams *es_pair);
+
+void evsel_streams__report(struct evsel_streams *es_base,
+			   struct evsel_streams *es_pair);
+
+#endif /* __PERF_STREAM_H */
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
new file mode 100644
index 000000000..78aa4c3b9
--- /dev/null
+++ b/tools/perf/util/strfilter.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "string2.h"
+#include "strfilter.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+/* Operators */
+static const char *OP_and	= "&";	/* Logical AND */
+static const char *OP_or	= "|";	/* Logical OR */
+static const char *OP_not	= "!";	/* Logical NOT */
+
+#define is_operator(c)	((c) == '|' || (c) == '&' || (c) == '!')
+#define is_separator(c)	(is_operator(c) || (c) == '(' || (c) == ')')
+
+static void strfilter_node__delete(struct strfilter_node *node)
+{
+	if (node) {
+		if (node->p && !is_operator(*node->p))
+			zfree((char **)&node->p);
+		strfilter_node__delete(node->l);
+		strfilter_node__delete(node->r);
+		free(node);
+	}
+}
+
+void strfilter__delete(struct strfilter *filter)
+{
+	if (filter) {
+		strfilter_node__delete(filter->root);
+		free(filter);
+	}
+}
+
+static const char *get_token(const char *s, const char **e)
+{
+	const char *p;
+
+	s = skip_spaces(s);
+
+	if (*s == '\0') {
+		p = s;
+		goto end;
+	}
+
+	p = s + 1;
+	if (!is_separator(*s)) {
+		/* End search */
+retry:
+		while (*p && !is_separator(*p) && !isspace(*p))
+			p++;
+		/* Escape and special case: '!' is also used in glob pattern */
+		if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
+			p++;
+			goto retry;
+		}
+	}
+end:
+	*e = p;
+	return s;
+}
+
+static struct strfilter_node *strfilter_node__alloc(const char *op,
+						    struct strfilter_node *l,
+						    struct strfilter_node *r)
+{
+	struct strfilter_node *node = zalloc(sizeof(*node));
+
+	if (node) {
+		node->p = op;
+		node->l = l;
+		node->r = r;
+	}
+
+	return node;
+}
+
+static struct strfilter_node *strfilter_node__new(const char *s,
+						  const char **ep)
+{
+	struct strfilter_node root, *cur, *last_op;
+	const char *e;
+
+	if (!s)
+		return NULL;
+
+	memset(&root, 0, sizeof(root));
+	last_op = cur = &root;
+
+	s = get_token(s, &e);
+	while (*s != '\0' && *s != ')') {
+		switch (*s) {
+		case '&':	/* Exchg last OP->r with AND */
+			if (!cur->r || !last_op->r)
+				goto error;
+			cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
+			if (!cur)
+				goto nomem;
+			last_op->r = cur;
+			last_op = cur;
+			break;
+		case '|':	/* Exchg the root with OR */
+			if (!cur->r || !root.r)
+				goto error;
+			cur = strfilter_node__alloc(OP_or, root.r, NULL);
+			if (!cur)
+				goto nomem;
+			root.r = cur;
+			last_op = cur;
+			break;
+		case '!':	/* Add NOT as a leaf node */
+			if (cur->r)
+				goto error;
+			cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
+			if (!cur->r)
+				goto nomem;
+			cur = cur->r;
+			break;
+		case '(':	/* Recursively parses inside the parenthesis */
+			if (cur->r)
+				goto error;
+			cur->r = strfilter_node__new(s + 1, &s);
+			if (!s)
+				goto nomem;
+			if (!cur->r || *s != ')')
+				goto error;
+			e = s + 1;
+			break;
+		default:
+			if (cur->r)
+				goto error;
+			cur->r = strfilter_node__alloc(NULL, NULL, NULL);
+			if (!cur->r)
+				goto nomem;
+			cur->r->p = strndup(s, e - s);
+			if (!cur->r->p)
+				goto nomem;
+		}
+		s = get_token(e, &e);
+	}
+	if (!cur->r)
+		goto error;
+	*ep = s;
+	return root.r;
+nomem:
+	s = NULL;
+error:
+	*ep = s;
+	strfilter_node__delete(root.r);
+	return NULL;
+}
+
+/*
+ * Parse filter rule and return new strfilter.
+ * Return NULL if fail, and *ep == NULL if memory allocation failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err)
+{
+	struct strfilter *filter = zalloc(sizeof(*filter));
+	const char *ep = NULL;
+
+	if (filter)
+		filter->root = strfilter_node__new(rules, &ep);
+
+	if (!filter || !filter->root || *ep != '\0') {
+		if (err)
+			*err = ep;
+		strfilter__delete(filter);
+		filter = NULL;
+	}
+
+	return filter;
+}
+
+static int strfilter__append(struct strfilter *filter, bool _or,
+			     const char *rules, const char **err)
+{
+	struct strfilter_node *right, *root;
+	const char *ep = NULL;
+
+	if (!filter || !rules)
+		return -EINVAL;
+
+	right = strfilter_node__new(rules, &ep);
+	if (!right || *ep != '\0') {
+		if (err)
+			*err = ep;
+		goto error;
+	}
+	root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
+	if (!root) {
+		ep = NULL;
+		goto error;
+	}
+
+	filter->root = root;
+	return 0;
+
+error:
+	strfilter_node__delete(right);
+	return ep ? -EINVAL : -ENOMEM;
+}
+
+int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
+{
+	return strfilter__append(filter, true, rules, err);
+}
+
+int strfilter__and(struct strfilter *filter, const char *rules,
+		   const char **err)
+{
+	return strfilter__append(filter, false, rules, err);
+}
+
+static bool strfilter_node__compare(struct strfilter_node *node,
+				    const char *str)
+{
+	if (!node || !node->p)
+		return false;
+
+	switch (*node->p) {
+	case '|':	/* OR */
+		return strfilter_node__compare(node->l, str) ||
+			strfilter_node__compare(node->r, str);
+	case '&':	/* AND */
+		return strfilter_node__compare(node->l, str) &&
+			strfilter_node__compare(node->r, str);
+	case '!':	/* NOT */
+		return !strfilter_node__compare(node->r, str);
+	default:
+		return strglobmatch(str, node->p);
+	}
+}
+
+/* Return true if STR matches the filter rules */
+bool strfilter__compare(struct strfilter *filter, const char *str)
+{
+	if (!filter)
+		return false;
+	return strfilter_node__compare(filter->root, str);
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
+
+/* sprint node in parenthesis if needed */
+static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
+{
+	int len;
+	int pt = node->r ? 2 : 0;	/* don't need to check node->l */
+
+	if (buf && pt)
+		*buf++ = '(';
+	len = strfilter_node__sprint(node, buf);
+	if (len < 0)
+		return len;
+	if (buf && pt)
+		*(buf + len) = ')';
+	return len + pt;
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
+{
+	int len = 0, rlen;
+
+	if (!node || !node->p)
+		return -EINVAL;
+
+	switch (*node->p) {
+	case '|':
+	case '&':
+		len = strfilter_node__sprint_pt(node->l, buf);
+		if (len < 0)
+			return len;
+		__fallthrough;
+	case '!':
+		if (buf) {
+			*(buf + len++) = *node->p;
+			buf += len;
+		} else
+			len++;
+		rlen = strfilter_node__sprint_pt(node->r, buf);
+		if (rlen < 0)
+			return rlen;
+		len += rlen;
+		break;
+	default:
+		len = strlen(node->p);
+		if (buf)
+			strcpy(buf, node->p);
+	}
+
+	return len;
+}
+
+char *strfilter__string(struct strfilter *filter)
+{
+	int len;
+	char *ret = NULL;
+
+	len = strfilter_node__sprint(filter->root, NULL);
+	if (len < 0)
+		return NULL;
+
+	ret = malloc(len + 1);
+	if (ret)
+		strfilter_node__sprint(filter->root, ret);
+
+	return ret;
+}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
new file mode 100644
index 000000000..c05aca9ca
--- /dev/null
+++ b/tools/perf/util/strfilter.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRFILTER_H
+#define __PERF_STRFILTER_H
+/* General purpose glob matching filter */
+
+#include <linux/list.h>
+#include <stdbool.h>
+
+/* A node of string filter */
+struct strfilter_node {
+	struct strfilter_node *l;	/* Tree left branch (for &,|) */
+	struct strfilter_node *r;	/* Tree right branch (for !,&,|) */
+	const char *p;		/* Operator or rule */
+};
+
+/* String filter */
+struct strfilter {
+	struct strfilter_node *root;
+};
+
+/**
+ * strfilter__new - Create a new string filter
+ * @rules: Filter rule, which is a combination of glob expressions.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and return new strfilter. Return NULL if an error detected.
+ * In that case, *@err will indicate where it is detected, and *@err is NULL
+ * if a memory allocation is failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err);
+
+/**
+ * strfilter__or - Append an additional rule by logical-or
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-or.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-or.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__or(struct strfilter *filter,
+		  const char *rules, const char **err);
+
+/**
+ * strfilter__add - Append an additional rule by logical-and
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-and.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-and.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__and(struct strfilter *filter,
+		   const char *rules, const char **err);
+
+/**
+ * strfilter__compare - compare given string and a string filter
+ * @filter: String filter
+ * @str: target string
+ *
+ * Compare @str and @filter. Return true if the str match the rule
+ */
+bool strfilter__compare(struct strfilter *filter, const char *str);
+
+/**
+ * strfilter__delete - delete a string filter
+ * @filter: String filter to delete
+ *
+ * Delete @filter.
+ */
+void strfilter__delete(struct strfilter *filter);
+
+/**
+ * strfilter__string - Reconstruct a rule string from filter
+ * @filter: String filter to reconstruct
+ *
+ * Reconstruct a rule string from @filter. This will be good for
+ * debug messages. Note that returning string must be freed afterward.
+ */
+char *strfilter__string(struct strfilter *filter);
+
+#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
new file mode 100644
index 000000000..4f12a96f3
--- /dev/null
+++ b/tools/perf/util/string.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "string2.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include <linux/ctype.h>
+
+const char *graph_dotted_line =
+	"---------------------------------------------------------------------"
+	"---------------------------------------------------------------------"
+	"---------------------------------------------------------------------";
+const char *dots =
+	"....................................................................."
+	"....................................................................."
+	".....................................................................";
+
+/*
+ * perf_atoll()
+ * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
+ * and return its numeric value
+ */
+s64 perf_atoll(const char *str)
+{
+	s64 length;
+	char *p;
+	char c;
+
+	if (!isdigit(str[0]))
+		goto out_err;
+
+	length = strtoll(str, &p, 10);
+	switch (c = *p++) {
+		case 'b': case 'B':
+			if (*p)
+				goto out_err;
+
+			__fallthrough;
+		case '\0':
+			return length;
+		default:
+			goto out_err;
+		/* two-letter suffices */
+		case 'k': case 'K':
+			length <<= 10;
+			break;
+		case 'm': case 'M':
+			length <<= 20;
+			break;
+		case 'g': case 'G':
+			length <<= 30;
+			break;
+		case 't': case 'T':
+			length <<= 40;
+			break;
+	}
+	/* we want the cases to match */
+	if (islower(c)) {
+		if (strcmp(p, "b") != 0)
+			goto out_err;
+	} else {
+		if (strcmp(p, "B") != 0)
+			goto out_err;
+	}
+	return length;
+
+out_err:
+	return -1;
+}
+
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+	bool complement = false, ret = true;
+
+	if (*pat == '!') {
+		complement = true;
+		pat++;
+	}
+	if (*pat++ == c)	/* First character is special */
+		goto end;
+
+	while (*pat && *pat != ']') {	/* Matching */
+		if (*pat == '-' && *(pat + 1) != ']') {	/* Range */
+			if (*(pat - 1) <= c && c <= *(pat + 1))
+				goto end;
+			if (*(pat - 1) > *(pat + 1))
+				goto error;
+			pat += 2;
+		} else if (*pat++ == c)
+			goto end;
+	}
+	if (!*pat)
+		goto error;
+	ret = false;
+
+end:
+	while (*pat && *pat != ']')	/* Searching closing */
+		pat++;
+	if (!*pat)
+		goto error;
+	*npat = pat + 1;
+	return complement ? !ret : ret;
+
+error:
+	return false;
+}
+
+/* Glob/lazy pattern matching */
+static bool __match_glob(const char *str, const char *pat, bool ignore_space,
+			bool case_ins)
+{
+	while (*str && *pat && *pat != '*') {
+		if (ignore_space) {
+			/* Ignore spaces for lazy matching */
+			if (isspace(*str)) {
+				str++;
+				continue;
+			}
+			if (isspace(*pat)) {
+				pat++;
+				continue;
+			}
+		}
+		if (*pat == '?') {	/* Matches any single character */
+			str++;
+			pat++;
+			continue;
+		} else if (*pat == '[')	/* Character classes/Ranges */
+			if (__match_charclass(pat + 1, *str, &pat)) {
+				str++;
+				continue;
+			} else
+				return false;
+		else if (*pat == '\\') /* Escaped char match as normal char */
+			pat++;
+		if (case_ins) {
+			if (tolower(*str) != tolower(*pat))
+				return false;
+		} else if (*str != *pat)
+			return false;
+		str++;
+		pat++;
+	}
+	/* Check wild card */
+	if (*pat == '*') {
+		while (*pat == '*')
+			pat++;
+		if (!*pat)	/* Tail wild card matches all */
+			return true;
+		while (*str)
+			if (__match_glob(str++, pat, ignore_space, case_ins))
+				return true;
+	}
+	return !*str && !*pat;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
+bool strglobmatch(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, false, false);
+}
+
+bool strglobmatch_nocase(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, false, true);
+}
+
+/**
+ * strlazymatch - matching pattern strings lazily with glob pattern
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This is similar to strglobmatch, except this ignores spaces in
+ * the target string.
+ */
+bool strlazymatch(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, true, false);
+}
+
+/**
+ * strtailcmp - Compare the tail of two strings
+ * @s1: 1st string to be compared
+ * @s2: 2nd string to be compared
+ *
+ * Return 0 if whole of either string is same as another's tail part.
+ */
+int strtailcmp(const char *s1, const char *s2)
+{
+	int i1 = strlen(s1);
+	int i2 = strlen(s2);
+	while (--i1 >= 0 && --i2 >= 0) {
+		if (s1[i1] != s2[i2])
+			return s1[i1] - s2[i2];
+	}
+	return 0;
+}
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
+{
+	/*
+	 * FIXME: replace this with an expression using log10() when we
+	 * find a suitable implementation, maybe the one in the dvb drivers...
+	 *
+	 * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators
+	 */
+	size_t size = nints * 28 + 1; /* \0 */
+	size_t i, printed = 0;
+	char *expr = malloc(size);
+
+	if (expr) {
+		const char *or_and = "||", *eq_neq = "==";
+		char *e = expr;
+
+		if (!in) {
+			or_and = "&&";
+			eq_neq = "!=";
+		}
+
+		for (i = 0; i < nints; ++i) {
+			if (printed == size)
+				goto out_err_overflow;
+
+			if (i > 0)
+				printed += scnprintf(e + printed, size - printed, " %s ", or_and);
+			printed += scnprintf(e + printed, size - printed,
+					     "%s %s %d", var, eq_neq, ints[i]);
+		}
+	}
+
+	return expr;
+
+out_err_overflow:
+	free(expr);
+	return NULL;
+}
+
+/* Like strpbrk(), but not break if it is right after a backslash (escaped) */
+char *strpbrk_esc(char *str, const char *stopset)
+{
+	char *ptr;
+
+	do {
+		ptr = strpbrk(str, stopset);
+		if (ptr == str ||
+		    (ptr == str + 1 && *(ptr - 1) != '\\'))
+			break;
+		str = ptr + 1;
+	} while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\');
+
+	return ptr;
+}
+
+/* Like strdup, but do not copy a single backslash */
+char *strdup_esc(const char *str)
+{
+	char *s, *d, *p, *ret = strdup(str);
+
+	if (!ret)
+		return NULL;
+
+	d = strchr(ret, '\\');
+	if (!d)
+		return ret;
+
+	s = d + 1;
+	do {
+		if (*s == '\0') {
+			*d = '\0';
+			break;
+		}
+		p = strchr(s + 1, '\\');
+		if (p) {
+			memmove(d, s, p - s);
+			d += p - s;
+			s = p + 1;
+		} else
+			memmove(d, s, strlen(s) + 1);
+	} while (p);
+
+	return ret;
+}
+
+unsigned int hex(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	return c - 'A' + 10;
+}
diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
new file mode 100644
index 000000000..56c30fef9
--- /dev/null
+++ b/tools/perf/util/string2.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_STRING_H
+#define PERF_STRING_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <sys/types.h> // pid_t
+#include <stddef.h>
+#include <string.h>
+
+extern const char *graph_dotted_line;
+extern const char *dots;
+
+s64 perf_atoll(const char *str);
+bool strglobmatch(const char *str, const char *pat);
+bool strglobmatch_nocase(const char *str, const char *pat);
+bool strlazymatch(const char *str, const char *pat);
+static inline bool strisglob(const char *str)
+{
+	return strpbrk(str, "*?[") != NULL;
+}
+int strtailcmp(const char *s1, const char *s2);
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints);
+
+static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints)
+{
+	return asprintf_expr_inout_ints(var, true, nints, ints);
+}
+
+static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints)
+{
+	return asprintf_expr_inout_ints(var, false, nints, ints);
+}
+
+char *asprintf__tp_filter_pids(size_t npids, pid_t *pids);
+
+char *strpbrk_esc(char *str, const char *stopset);
+char *strdup_esc(const char *str);
+
+unsigned int hex(char c);
+
+#endif /* PERF_STRING_H */
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
new file mode 100644
index 000000000..8a868cbef
--- /dev/null
+++ b/tools/perf/util/strlist.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "strlist.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/zalloc.h>
+
+static
+struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry)
+{
+	const char *s = entry;
+	struct rb_node *rc = NULL;
+	struct strlist *strlist = container_of(rblist, struct strlist, rblist);
+	struct str_node *snode = malloc(sizeof(*snode));
+
+	if (snode != NULL) {
+		if (strlist->dupstr) {
+			s = strdup(s);
+			if (s == NULL)
+				goto out_delete;
+		}
+		snode->s = s;
+		rc = &snode->rb_node;
+	}
+
+	return rc;
+
+out_delete:
+	free(snode);
+	return NULL;
+}
+
+static void str_node__delete(struct str_node *snode, bool dupstr)
+{
+	if (dupstr)
+		zfree((char **)&snode->s);
+	free(snode);
+}
+
+static
+void strlist__node_delete(struct rblist *rblist, struct rb_node *rb_node)
+{
+	struct strlist *slist = container_of(rblist, struct strlist, rblist);
+	struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+	str_node__delete(snode, slist->dupstr);
+}
+
+static int strlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+	const char *str = entry;
+	struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+	return strcmp(snode->s, str);
+}
+
+int strlist__add(struct strlist *slist, const char *new_entry)
+{
+	return rblist__add_node(&slist->rblist, new_entry);
+}
+
+int strlist__load(struct strlist *slist, const char *filename)
+{
+	char entry[1024];
+	int err;
+	FILE *fp = fopen(filename, "r");
+
+	if (fp == NULL)
+		return -errno;
+
+	while (fgets(entry, sizeof(entry), fp) != NULL) {
+		const size_t len = strlen(entry);
+
+		if (len == 0)
+			continue;
+		entry[len - 1] = '\0';
+
+		err = strlist__add(slist, entry);
+		if (err != 0)
+			goto out;
+	}
+
+	err = 0;
+out:
+	fclose(fp);
+	return err;
+}
+
+void strlist__remove(struct strlist *slist, struct str_node *snode)
+{
+	rblist__remove_node(&slist->rblist, &snode->rb_node);
+}
+
+struct str_node *strlist__find(struct strlist *slist, const char *entry)
+{
+	struct str_node *snode = NULL;
+	struct rb_node *rb_node = rblist__find(&slist->rblist, entry);
+
+	if (rb_node)
+		snode = container_of(rb_node, struct str_node, rb_node);
+
+	return snode;
+}
+
+static int strlist__parse_list_entry(struct strlist *slist, const char *s,
+				     const char *subst_dir)
+{
+	int err;
+	char *subst = NULL;
+
+	if (strncmp(s, "file://", 7) == 0)
+		return strlist__load(slist, s + 7);
+
+	if (subst_dir) {
+		err = -ENOMEM;
+		if (asprintf(&subst, "%s/%s", subst_dir, s) < 0)
+			goto out;
+
+		if (access(subst, F_OK) == 0) {
+			err = strlist__load(slist, subst);
+			goto out;
+		}
+
+		if (slist->file_only) {
+			err = -ENOENT;
+			goto out;
+		}
+	}
+
+	err = strlist__add(slist, s);
+out:
+	free(subst);
+	return err;
+}
+
+static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir)
+{
+	char *sep;
+	int err;
+
+	while ((sep = strchr(s, ',')) != NULL) {
+		*sep = '\0';
+		err = strlist__parse_list_entry(slist, s, subst_dir);
+		*sep = ',';
+		if (err != 0)
+			return err;
+		s = sep + 1;
+	}
+
+	return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0;
+}
+
+struct strlist *strlist__new(const char *list, const struct strlist_config *config)
+{
+	struct strlist *slist = malloc(sizeof(*slist));
+
+	if (slist != NULL) {
+		bool dupstr = true;
+		bool file_only = false;
+		const char *dirname = NULL;
+
+		if (config) {
+			dupstr = !config->dont_dupstr;
+			dirname = config->dirname;
+			file_only = config->file_only;
+		}
+
+		rblist__init(&slist->rblist);
+		slist->rblist.node_cmp    = strlist__node_cmp;
+		slist->rblist.node_new    = strlist__node_new;
+		slist->rblist.node_delete = strlist__node_delete;
+
+		slist->dupstr	 = dupstr;
+		slist->file_only = file_only;
+
+		if (list && strlist__parse_list(slist, list, dirname) != 0)
+			goto out_error;
+	}
+
+	return slist;
+out_error:
+	free(slist);
+	return NULL;
+}
+
+void strlist__delete(struct strlist *slist)
+{
+	if (slist != NULL)
+		rblist__delete(&slist->rblist);
+}
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
+{
+	struct str_node *snode = NULL;
+	struct rb_node *rb_node;
+
+	rb_node = rblist__entry(&slist->rblist, idx);
+	if (rb_node)
+		snode = container_of(rb_node, struct str_node, rb_node);
+
+	return snode;
+}
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
new file mode 100644
index 000000000..7e82c71dc
--- /dev/null
+++ b/tools/perf/util/strlist.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRLIST_H
+#define __PERF_STRLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct str_node {
+	struct rb_node rb_node;
+	const char     *s;
+};
+
+struct strlist {
+	struct rblist rblist;
+	bool	      dupstr;
+	bool	      file_only;
+};
+
+/*
+ * @file_only: When dirname is present, only consider entries as filenames,
+ *             that should not be added to the list if dirname/entry is not
+ *             found
+ */
+struct strlist_config {
+	bool dont_dupstr;
+	bool file_only;
+	const char *dirname;
+};
+
+struct strlist *strlist__new(const char *slist, const struct strlist_config *config);
+void strlist__delete(struct strlist *slist);
+
+void strlist__remove(struct strlist *slist, struct str_node *sn);
+int strlist__load(struct strlist *slist, const char *filename);
+int strlist__add(struct strlist *slist, const char *str);
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
+struct str_node *strlist__find(struct strlist *slist, const char *entry);
+
+static inline bool strlist__has_entry(struct strlist *slist, const char *entry)
+{
+	return strlist__find(slist, entry) != NULL;
+}
+
+static inline bool strlist__empty(const struct strlist *slist)
+{
+	return rblist__empty(&slist->rblist);
+}
+
+static inline unsigned int strlist__nr_entries(const struct strlist *slist)
+{
+	return rblist__nr_entries(&slist->rblist);
+}
+
+/* For strlist iteration */
+static inline struct str_node *strlist__first(struct strlist *slist)
+{
+	struct rb_node *rn = rb_first_cached(&slist->rblist.entries);
+	return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+static inline struct str_node *strlist__next(struct str_node *sn)
+{
+	struct rb_node *rn;
+	if (!sn)
+		return NULL;
+	rn = rb_next(&sn->rb_node);
+	return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+
+/**
+ * strlist_for_each      - iterate over a strlist
+ * @pos:	the &struct str_node to use as a loop cursor.
+ * @slist:	the &struct strlist for loop.
+ */
+#define strlist__for_each_entry(pos, slist)	\
+	for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
+
+/**
+ * strlist_for_each_safe - iterate over a strlist safe against removal of
+ *                         str_node
+ * @pos:	the &struct str_node to use as a loop cursor.
+ * @n:		another &struct str_node to use as temporary storage.
+ * @slist:	the &struct strlist for loop.
+ */
+#define strlist__for_each_entry_safe(pos, n, slist)	\
+	for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
+	     pos = n, n = strlist__next(n))
+#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
new file mode 100644
index 000000000..1e0c731fc
--- /dev/null
+++ b/tools/perf/util/svghelper.c
@@ -0,0 +1,809 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svghelper.c - helper functions for outputting svg
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * Authors:
+ *     Arjan van de Ven <arjan@linux.intel.com>
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/bitmap.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+#include <internal/cpumap.h>
+#include <perf/cpumap.h>
+
+#include "env.h"
+#include "svghelper.h"
+
+static u64 first_time, last_time;
+static u64 turbo_frequency, max_freq;
+
+
+#define SLOT_MULT 30.0
+#define SLOT_HEIGHT 25.0
+#define SLOT_HALF (SLOT_HEIGHT / 2)
+
+int svg_page_width = 1000;
+u64 svg_highlight;
+const char *svg_highlight_name;
+
+#define MIN_TEXT_SIZE 0.01
+
+static u64 total_height;
+static FILE *svgfile;
+
+static double cpu2slot(int cpu)
+{
+	return 2 * cpu + 1;
+}
+
+static int *topology_map;
+
+static double cpu2y(int cpu)
+{
+	if (topology_map)
+		return cpu2slot(topology_map[cpu]) * SLOT_MULT;
+	else
+		return cpu2slot(cpu) * SLOT_MULT;
+}
+
+static double time2pixels(u64 __time)
+{
+	double X;
+
+	X = 1.0 * svg_page_width * (__time - first_time) / (last_time - first_time);
+	return X;
+}
+
+/*
+ * Round text sizes so that the svg viewer only needs a discrete
+ * number of renderings of the font
+ */
+static double round_text_size(double size)
+{
+	int loop = 100;
+	double target = 10.0;
+
+	if (size >= 10.0)
+		return size;
+	while (loop--) {
+		if (size >= target)
+			return target;
+		target = target / 2.0;
+	}
+	return size;
+}
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end)
+{
+	int new_width;
+
+	svgfile = fopen(filename, "w");
+	if (!svgfile) {
+		fprintf(stderr, "Cannot open %s for output\n", filename);
+		return;
+	}
+	first_time = start;
+	first_time = first_time / 100000000 * 100000000;
+	last_time = end;
+
+	/*
+	 * if the recording is short, we default to a width of 1000, but
+	 * for longer recordings we want at least 200 units of width per second
+	 */
+	new_width = (last_time - first_time) / 5000000;
+
+	if (new_width > svg_page_width)
+		svg_page_width = new_width;
+
+	total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT;
+	fprintf(svgfile, "<?xml version=\"1.0\" standalone=\"no\"?> \n");
+	fprintf(svgfile, "<!DOCTYPE svg SYSTEM \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n");
+	fprintf(svgfile, "<svg width=\"%i\" height=\"%" PRIu64 "\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", svg_page_width, total_height);
+
+	fprintf(svgfile, "<defs>\n  <style type=\"text/css\">\n    <![CDATA[\n");
+
+	fprintf(svgfile, "      rect          { stroke-width: 1; }\n");
+	fprintf(svgfile, "      rect.process  { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:1;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.process2 { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.process3 { fill:rgb(180,180,180); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.sample   { fill:rgb(  0,  0,255); fill-opacity:0.8; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.sample_hi{ fill:rgb(255,128,  0); fill-opacity:0.8; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.error    { fill:rgb(255,  0,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.net      { fill:rgb(  0,128,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.disk     { fill:rgb(  0,  0,255); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.sync     { fill:rgb(128,128,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.poll     { fill:rgb(  0,128,128); fill-opacity:0.2; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.blocked  { fill:rgb(255,  0,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.waiting  { fill:rgb(224,214,  0); fill-opacity:0.8; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.WAITING  { fill:rgb(255,214, 48); fill-opacity:0.6; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.cpu      { fill:rgb(192,192,192); fill-opacity:0.2; stroke-width:0.5; stroke:rgb(128,128,128); } \n");
+	fprintf(svgfile, "      rect.pstate   { fill:rgb(128,128,128); fill-opacity:0.8; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c1       { fill:rgb(255,214,214); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c2       { fill:rgb(255,172,172); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c3       { fill:rgb(255,130,130); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c4       { fill:rgb(255, 88, 88); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c5       { fill:rgb(255, 44, 44); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c6       { fill:rgb(255,  0,  0); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      line.pstate   { stroke:rgb(255,255,  0); stroke-opacity:0.8; stroke-width:2; } \n");
+
+	fprintf(svgfile, "    ]]>\n   </style>\n</defs>\n");
+}
+
+static double normalize_height(double height)
+{
+	if (height < 0.25)
+		return 0.25;
+	else if (height < 0.50)
+		return 0.50;
+	else if (height < 0.75)
+		return 0.75;
+	else
+		return 0.100;
+}
+
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+	double w = time2pixels(end) - time2pixels(start);
+	height = normalize_height(height);
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start),
+		w,
+		Yslot * SLOT_MULT,
+		SLOT_HALF * height,
+		type);
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+	double w = time2pixels(end) - time2pixels(start);
+	height = normalize_height(height);
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start),
+		w,
+		Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HALF * height,
+		SLOT_HALF * height,
+		type);
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+	double w = time2pixels(end) - time2pixels(start);
+	height = normalize_height(height);
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start),
+		w,
+		Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HEIGHT * height,
+		SLOT_HEIGHT * height,
+		type);
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_box(int Yslot, u64 start, u64 end, const char *type)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT, type);
+}
+
+static char *time_to_string(u64 duration);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>#%d blocked %s</title>\n", cpu,
+		time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Blocked on:\n%s</desc>\n", backtrace);
+	svg_box(Yslot, start, end, "blocked");
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+	double text_size;
+	const char *type;
+
+	if (!svgfile)
+		return;
+
+	if (svg_highlight && end - start > svg_highlight)
+		type = "sample_hi";
+	else
+		type = "sample";
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<title>#%d running %s</title>\n",
+		cpu, time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT,
+		type);
+
+	text_size = (time2pixels(end)-time2pixels(start));
+	if (cpu > 9)
+		text_size = text_size/2;
+	if (text_size > 1.25)
+		text_size = 1.25;
+	text_size = round_text_size(text_size);
+
+	if (text_size > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">%i</text>\n",
+			time2pixels(start), Yslot *  SLOT_MULT + SLOT_HEIGHT - 1, text_size,  cpu + 1);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+static char *time_to_string(u64 duration)
+{
+	static char text[80];
+
+	text[0] = 0;
+
+	if (duration < NSEC_PER_USEC) /* less than 1 usec */
+		return text;
+
+	if (duration < NSEC_PER_MSEC) { /* less than 1 msec */
+		sprintf(text, "%.1f us", duration / (double)NSEC_PER_USEC);
+		return text;
+	}
+	sprintf(text, "%.1f ms", duration / (double)NSEC_PER_MSEC);
+
+	return text;
+}
+
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+	char *text;
+	const char *style;
+	double font_size;
+
+	if (!svgfile)
+		return;
+
+	style = "waiting";
+
+	if (end-start > 10 * NSEC_PER_MSEC) /* 10 msec */
+		style = "WAITING";
+
+	text = time_to_string(end-start);
+
+	font_size = 1.0 * (time2pixels(end)-time2pixels(start));
+
+	if (font_size > 3)
+		font_size = 3;
+
+	font_size = round_text_size(font_size);
+
+	fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), Yslot * SLOT_MULT);
+	fprintf(svgfile, "<title>#%d waiting %s</title>\n", cpu, time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Waiting on:\n%s</desc>\n", backtrace);
+	fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(end)-time2pixels(start), SLOT_HEIGHT, style);
+	if (font_size > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\"> %s</text>\n",
+			font_size, text);
+	fprintf(svgfile, "</g>\n");
+}
+
+static char *cpu_model(void)
+{
+	static char cpu_m[255];
+	char buf[256];
+	FILE *file;
+
+	cpu_m[0] = 0;
+	/* CPU type */
+	file = fopen("/proc/cpuinfo", "r");
+	if (file) {
+		while (fgets(buf, 255, file)) {
+			if (strstr(buf, "model name")) {
+				strlcpy(cpu_m, &buf[13], 255);
+				break;
+			}
+		}
+		fclose(file);
+	}
+
+	/* CPU type */
+	file = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies", "r");
+	if (file) {
+		while (fgets(buf, 255, file)) {
+			unsigned int freq;
+			freq = strtoull(buf, NULL, 10);
+			if (freq > max_freq)
+				max_freq = freq;
+		}
+		fclose(file);
+	}
+	return cpu_m;
+}
+
+void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq)
+{
+	char cpu_string[80];
+	if (!svgfile)
+		return;
+
+	max_freq = __max_freq;
+	turbo_frequency = __turbo_freq;
+
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"cpu\"/>\n",
+		time2pixels(first_time),
+		time2pixels(last_time)-time2pixels(first_time),
+		cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+	sprintf(cpu_string, "CPU %i", (int)cpu);
+	fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+		10+time2pixels(first_time), cpu2y(cpu) + SLOT_HEIGHT/2, cpu_string);
+
+	fprintf(svgfile, "<text transform=\"translate(%.8f,%.8f)\" font-size=\"1.25pt\">%s</text>\n",
+		10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, cpu_model());
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace)
+{
+	double width;
+	const char *type;
+
+	if (!svgfile)
+		return;
+
+	if (svg_highlight && end - start >= svg_highlight)
+		type = "sample_hi";
+	else if (svg_highlight_name && strstr(name, svg_highlight_name))
+		type = "sample_hi";
+	else
+		type = "sample";
+
+	fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), cpu2y(cpu));
+	fprintf(svgfile, "<title>%d %s running %s</title>\n", pid, name, time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+	fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(end)-time2pixels(start), SLOT_MULT+SLOT_HEIGHT, type);
+	width = time2pixels(end)-time2pixels(start);
+	if (width > 6)
+		width = 6;
+
+	width = round_text_size(width);
+
+	if (width > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\">%s</text>\n",
+			width, name);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_cstate(int cpu, u64 start, u64 end, int type)
+{
+	double width;
+	char style[128];
+
+	if (!svgfile)
+		return;
+
+
+	fprintf(svgfile, "<g>\n");
+
+	if (type > 6)
+		type = 6;
+	sprintf(style, "c%i", type);
+
+	fprintf(svgfile, "<rect class=\"%s\" x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\"/>\n",
+		style,
+		time2pixels(start), time2pixels(end)-time2pixels(start),
+		cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+	width = (time2pixels(end)-time2pixels(start))/2.0;
+	if (width > 6)
+		width = 6;
+
+	width = round_text_size(width);
+
+	if (width > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">C%i</text>\n",
+			time2pixels(start), cpu2y(cpu)+width, width, type);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+static char *HzToHuman(unsigned long hz)
+{
+	static char buffer[1024];
+	unsigned long long Hz;
+
+	memset(buffer, 0, 1024);
+
+	Hz = hz;
+
+	/* default: just put the Number in */
+	sprintf(buffer, "%9lli", Hz);
+
+	if (Hz > 1000)
+		sprintf(buffer, " %6lli Mhz", (Hz+500)/1000);
+
+	if (Hz > 1500000)
+		sprintf(buffer, " %6.2f Ghz", (Hz+5000.0)/1000000);
+
+	if (Hz == turbo_frequency)
+		sprintf(buffer, "Turbo");
+
+	return buffer;
+}
+
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq)
+{
+	double height = 0;
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+
+	if (max_freq)
+		height = freq * 1.0 / max_freq * (SLOT_HEIGHT + SLOT_MULT);
+	height = 1 + cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - height;
+	fprintf(svgfile, "<line x1=\"%.8f\" x2=\"%.8f\" y1=\"%.1f\" y2=\"%.1f\" class=\"pstate\"/>\n",
+		time2pixels(start), time2pixels(end), height, height);
+	fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"0.25pt\">%s</text>\n",
+		time2pixels(start), height+0.9, HzToHuman(freq));
+
+	fprintf(svgfile, "</g>\n");
+}
+
+
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace)
+{
+	double height;
+
+	if (!svgfile)
+		return;
+
+
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<title>%s wakes up %s</title>\n",
+		desc1 ? desc1 : "?",
+		desc2 ? desc2 : "?");
+
+	if (backtrace)
+		fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+	if (row1 < row2) {
+		if (row1) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+			if (desc2)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+					time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_HEIGHT/48, desc2);
+		}
+		if (row2) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32,  time2pixels(start), row2 * SLOT_MULT);
+			if (desc1)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+					time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32, desc1);
+		}
+	} else {
+		if (row2) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+			if (desc1)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+					time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/48, desc1);
+		}
+		if (row1) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row1 * SLOT_MULT - SLOT_MULT/32,  time2pixels(start), row1 * SLOT_MULT);
+			if (desc2)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+					time2pixels(start), row1 * SLOT_MULT - SLOT_HEIGHT/32, desc2);
+		}
+	}
+	height = row1 * SLOT_MULT;
+	if (row2 > row1)
+		height += SLOT_HEIGHT;
+	if (row1)
+		fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(32,255,32)\"/>\n",
+			time2pixels(start), height);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace)
+{
+	double height;
+
+	if (!svgfile)
+		return;
+
+
+	fprintf(svgfile, "<g>\n");
+
+	if (backtrace)
+		fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+	if (row1 < row2)
+		fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+			time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row2 * SLOT_MULT);
+	else
+		fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+			time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row1 * SLOT_MULT);
+
+	height = row1 * SLOT_MULT;
+	if (row2 > row1)
+		height += SLOT_HEIGHT;
+	fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(32,255,32)\"/>\n",
+			time2pixels(start), height);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_interrupt(u64 start, int row, const char *backtrace)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<title>Wakeup from interrupt</title>\n");
+
+	if (backtrace)
+		fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+	fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(255,128,128)\"/>\n",
+			time2pixels(start), row * SLOT_MULT);
+	fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(255,128,128)\"/>\n",
+			time2pixels(start), row * SLOT_MULT + SLOT_HEIGHT);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_text(int Yslot, u64 start, const char *text)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+		time2pixels(start), Yslot * SLOT_MULT+SLOT_HEIGHT/2, text);
+}
+
+static void svg_legenda_box(int X, const char *text, const char *style)
+{
+	double boxsize;
+	boxsize = SLOT_HEIGHT / 2;
+
+	fprintf(svgfile, "<rect x=\"%i\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+		X, boxsize, boxsize, style);
+	fprintf(svgfile, "<text transform=\"translate(%.8f, %.8f)\" font-size=\"%.8fpt\">%s</text>\n",
+		X + boxsize + 5, boxsize, 0.8 * boxsize, text);
+}
+
+void svg_io_legenda(void)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	svg_legenda_box(0,	"Disk", "disk");
+	svg_legenda_box(100,	"Network", "net");
+	svg_legenda_box(200,	"Sync", "sync");
+	svg_legenda_box(300,	"Poll", "poll");
+	svg_legenda_box(400,	"Error", "error");
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_legenda(void)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	svg_legenda_box(0,	"Running", "sample");
+	svg_legenda_box(100,	"Idle","c1");
+	svg_legenda_box(200,	"Deeper Idle", "c3");
+	svg_legenda_box(350,	"Deepest Idle", "c6");
+	svg_legenda_box(550,	"Sleeping", "process2");
+	svg_legenda_box(650,	"Waiting for cpu", "waiting");
+	svg_legenda_box(800,	"Blocked on IO", "blocked");
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_time_grid(double min_thickness)
+{
+	u64 i;
+
+	if (!svgfile)
+		return;
+
+	i = first_time;
+	while (i < last_time) {
+		int color = 220;
+		double thickness = 0.075;
+		if ((i % 100000000) == 0) {
+			thickness = 0.5;
+			color = 192;
+		}
+		if ((i % 1000000000) == 0) {
+			thickness = 2.0;
+			color = 128;
+		}
+
+		if (thickness >= min_thickness)
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%" PRIu64 "\" style=\"stroke:rgb(%i,%i,%i);stroke-width:%.3f\"/>\n",
+				time2pixels(i), SLOT_MULT/2, time2pixels(i),
+				total_height, color, color, color, thickness);
+
+		i += 10000000;
+	}
+}
+
+void svg_close(void)
+{
+	if (svgfile) {
+		fprintf(svgfile, "</svg>\n");
+		fclose(svgfile);
+		svgfile = NULL;
+	}
+}
+
+#define cpumask_bits(maskp) ((maskp)->bits)
+typedef struct { DECLARE_BITMAP(bits, MAX_NR_CPUS); } cpumask_t;
+
+struct topology {
+	cpumask_t *sib_core;
+	int sib_core_nr;
+	cpumask_t *sib_thr;
+	int sib_thr_nr;
+};
+
+static void scan_thread_topology(int *map, struct topology *t, int cpu,
+				 int *pos, int nr_cpus)
+{
+	int i;
+	int thr;
+
+	for (i = 0; i < t->sib_thr_nr; i++) {
+		if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
+			continue;
+
+		for_each_set_bit(thr, cpumask_bits(&t->sib_thr[i]), nr_cpus)
+			if (map[thr] == -1)
+				map[thr] = (*pos)++;
+	}
+}
+
+static void scan_core_topology(int *map, struct topology *t, int nr_cpus)
+{
+	int pos = 0;
+	int i;
+	int cpu;
+
+	for (i = 0; i < t->sib_core_nr; i++)
+		for_each_set_bit(cpu, cpumask_bits(&t->sib_core[i]), nr_cpus)
+			scan_thread_topology(map, t, cpu, &pos, nr_cpus);
+}
+
+static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
+{
+	int i;
+	int ret = 0;
+	struct perf_cpu_map *m;
+	struct perf_cpu c;
+
+	m = perf_cpu_map__new(s);
+	if (!m)
+		return -1;
+
+	for (i = 0; i < perf_cpu_map__nr(m); i++) {
+		c = perf_cpu_map__cpu(m, i);
+		if (c.cpu >= nr_cpus) {
+			ret = -1;
+			break;
+		}
+
+		set_bit(c.cpu, cpumask_bits(b));
+	}
+
+	perf_cpu_map__put(m);
+
+	return ret;
+}
+
+int svg_build_topology_map(struct perf_env *env)
+{
+	int i, nr_cpus;
+	struct topology t;
+	char *sib_core, *sib_thr;
+
+	nr_cpus = min(env->nr_cpus_online, MAX_NR_CPUS);
+
+	t.sib_core_nr = env->nr_sibling_cores;
+	t.sib_thr_nr = env->nr_sibling_threads;
+	t.sib_core = calloc(env->nr_sibling_cores, sizeof(cpumask_t));
+	t.sib_thr = calloc(env->nr_sibling_threads, sizeof(cpumask_t));
+
+	sib_core = env->sibling_cores;
+	sib_thr = env->sibling_threads;
+
+	if (!t.sib_core || !t.sib_thr) {
+		fprintf(stderr, "topology: no memory\n");
+		goto exit;
+	}
+
+	for (i = 0; i < env->nr_sibling_cores; i++) {
+		if (str_to_bitmap(sib_core, &t.sib_core[i], nr_cpus)) {
+			fprintf(stderr, "topology: can't parse siblings map\n");
+			goto exit;
+		}
+
+		sib_core += strlen(sib_core) + 1;
+	}
+
+	for (i = 0; i < env->nr_sibling_threads; i++) {
+		if (str_to_bitmap(sib_thr, &t.sib_thr[i], nr_cpus)) {
+			fprintf(stderr, "topology: can't parse siblings map\n");
+			goto exit;
+		}
+
+		sib_thr += strlen(sib_thr) + 1;
+	}
+
+	topology_map = malloc(sizeof(int) * nr_cpus);
+	if (!topology_map) {
+		fprintf(stderr, "topology: no memory\n");
+		goto exit;
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		topology_map[i] = -1;
+
+	scan_core_topology(topology_map, &t, nr_cpus);
+
+	return 0;
+
+exit:
+	zfree(&t.sib_core);
+	zfree(&t.sib_thr);
+
+	return -1;
+}
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
new file mode 100644
index 000000000..81823e8ba
--- /dev/null
+++ b/tools/perf/util/svghelper.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SVGHELPER_H
+#define __PERF_SVGHELPER_H
+
+#include <linux/types.h>
+
+struct perf_env;
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_box(int Yslot, u64 start, u64 end, const char *type);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
+void svg_cstate(int cpu, u64 start, u64 end, int type);
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+
+
+void svg_time_grid(double min_thickness);
+void svg_io_legenda(void);
+void svg_legenda(void);
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
+void svg_interrupt(u64 start, int row, const char *backtrace);
+void svg_text(int Yslot, u64 start, const char *text);
+void svg_close(void);
+int svg_build_topology_map(struct perf_env *env);
+
+extern int svg_page_width;
+extern u64 svg_highlight;
+extern const char *svg_highlight_name;
+
+#endif /* __PERF_SVGHELPER_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
new file mode 100644
index 000000000..29c9348c3
--- /dev/null
+++ b/tools/perf/util/symbol-elf.c
@@ -0,0 +1,2539 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include "dso.h"
+#include "map.h"
+#include "maps.h"
+#include "symbol.h"
+#include "symsrc.h"
+#include "demangle-ocaml.h"
+#include "demangle-java.h"
+#include "demangle-rust.h"
+#include "machine.h"
+#include "vdso.h"
+#include "debug.h"
+#include "util/copyfile.h"
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <symbol/kallsyms.h>
+#include <internal/lib.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64	183  /* ARM 64 bit */
+#endif
+
+#ifndef ELF32_ST_VISIBILITY
+#define ELF32_ST_VISIBILITY(o)	((o) & 0x03)
+#endif
+
+/* For ELF64 the definitions are the same.  */
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o)	ELF32_ST_VISIBILITY (o)
+#endif
+
+/* How to extract information held in the st_other field.  */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(val)	ELF64_ST_VISIBILITY (val)
+#endif
+
+typedef Elf64_Nhdr GElf_Nhdr;
+
+#ifndef DMGL_PARAMS
+#define DMGL_NO_OPTS     0              /* For readability... */
+#define DMGL_PARAMS      (1 << 0)       /* Include function args */
+#define DMGL_ANSI        (1 << 1)       /* Include const, volatile, etc */
+#endif
+
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+#else
+#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
+extern char *cplus_demangle(const char *, int);
+
+static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i)
+{
+	return cplus_demangle(c, i);
+}
+#else
+#ifdef NO_DEMANGLE
+static inline char *bfd_demangle(void __maybe_unused *v,
+				 const char __maybe_unused *c,
+				 int __maybe_unused i)
+{
+	return NULL;
+}
+#endif
+#endif
+#endif
+
+#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
+static int elf_getphdrnum(Elf *elf, size_t *dst)
+{
+	GElf_Ehdr gehdr;
+	GElf_Ehdr *ehdr;
+
+	ehdr = gelf_getehdr(elf, &gehdr);
+	if (!ehdr)
+		return -1;
+
+	*dst = ehdr->e_phnum;
+
+	return 0;
+}
+#endif
+
+#ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT
+static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused)
+{
+	pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__);
+	return -1;
+}
+#endif
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @syms: struct elf_symtab instance to iterate
+ * @idx: uint32_t idx
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \
+	for (idx = 0, gelf_getsym(syms, idx, &sym);\
+	     idx < nr_syms; \
+	     idx++, gelf_getsym(syms, idx, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+	return GELF_ST_TYPE(sym->st_info);
+}
+
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym)
+{
+	return GELF_ST_VISIBILITY(sym->st_other);
+}
+
+#ifndef STT_GNU_IFUNC
+#define STT_GNU_IFUNC 10
+#endif
+
+static inline int elf_sym__is_function(const GElf_Sym *sym)
+{
+	return (elf_sym__type(sym) == STT_FUNC ||
+		elf_sym__type(sym) == STT_GNU_IFUNC) &&
+	       sym->st_name != 0 &&
+	       sym->st_shndx != SHN_UNDEF;
+}
+
+static inline bool elf_sym__is_object(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_OBJECT &&
+		sym->st_name != 0 &&
+		sym->st_shndx != SHN_UNDEF;
+}
+
+static inline int elf_sym__is_label(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_NOTYPE &&
+		sym->st_name != 0 &&
+		sym->st_shndx != SHN_UNDEF &&
+		sym->st_shndx != SHN_ABS &&
+		elf_sym__visibility(sym) != STV_HIDDEN &&
+		elf_sym__visibility(sym) != STV_INTERNAL;
+}
+
+static bool elf_sym__filter(GElf_Sym *sym)
+{
+	return elf_sym__is_function(sym) || elf_sym__is_object(sym);
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+					const Elf_Data *symstrs)
+{
+	return symstrs->d_buf + sym->st_name;
+}
+
+static inline const char *elf_sec__name(const GElf_Shdr *shdr,
+					const Elf_Data *secstrs)
+{
+	return secstrs->d_buf + shdr->sh_name;
+}
+
+static inline int elf_sec__is_text(const GElf_Shdr *shdr,
+					const Elf_Data *secstrs)
+{
+	return strstr(elf_sec__name(shdr, secstrs), "text") != NULL;
+}
+
+static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
+				    const Elf_Data *secstrs)
+{
+	return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
+}
+
+static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs)
+{
+	return elf_sec__is_text(shdr, secstrs) || 
+	       elf_sec__is_data(shdr, secstrs);
+}
+
+static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
+{
+	Elf_Scn *sec = NULL;
+	GElf_Shdr shdr;
+	size_t cnt = 1;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		gelf_getshdr(sec, &shdr);
+
+		if ((addr >= shdr.sh_addr) &&
+		    (addr < (shdr.sh_addr + shdr.sh_size)))
+			return cnt;
+
+		++cnt;
+	}
+
+	return -1;
+}
+
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+			     GElf_Shdr *shp, const char *name, size_t *idx)
+{
+	Elf_Scn *sec = NULL;
+	size_t cnt = 1;
+
+	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
+	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL))
+		return NULL;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		char *str;
+
+		gelf_getshdr(sec, shp);
+		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+		if (str && !strcmp(name, str)) {
+			if (idx)
+				*idx = cnt;
+			return sec;
+		}
+		++cnt;
+	}
+
+	return NULL;
+}
+
+static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
+{
+	size_t i, phdrnum;
+	u64 sz;
+
+	if (elf_getphdrnum(elf, &phdrnum))
+		return -1;
+
+	for (i = 0; i < phdrnum; i++) {
+		if (gelf_getphdr(elf, i, phdr) == NULL)
+			return -1;
+
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		sz = max(phdr->p_memsz, phdr->p_filesz);
+		if (!sz)
+			continue;
+
+		if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
+			return 0;
+	}
+
+	/* Not found any valid program header */
+	return -1;
+}
+
+static bool want_demangle(bool is_kernel_sym)
+{
+	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+	int demangle_flags = verbose > 0 ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+	char *demangled = NULL;
+
+	/*
+	 * We need to figure out if the object was created from C++ sources
+	 * DWARF DW_compile_unit has this, but we don't always have access
+	 * to it...
+	 */
+	if (!want_demangle(dso->kernel || kmodule))
+	    return demangled;
+
+	demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+	if (demangled == NULL) {
+		demangled = ocaml_demangle_sym(elf_name);
+		if (demangled == NULL) {
+			demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+		}
+	}
+	else if (rust_is_mangled(demangled))
+		/*
+		    * Input to Rust demangling is the BFD-demangled
+		    * name which it Rust-demangles in place.
+		    */
+		rust_demangle_sym(demangled);
+
+	return demangled;
+}
+
+#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
+
+#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
+
+/*
+ * We need to check if we have a .dynsym, so that we can handle the
+ * .plt, synthesizing its symbols, that aren't on the symtabs (be it
+ * .dynsym or .symtab).
+ * And always look at the original dso, not at debuginfo packages, that
+ * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
+ */
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
+{
+	uint32_t nr_rel_entries, idx;
+	GElf_Sym sym;
+	u64 plt_offset, plt_header_size, plt_entry_size;
+	GElf_Shdr shdr_plt;
+	struct symbol *f;
+	GElf_Shdr shdr_rel_plt, shdr_dynsym;
+	Elf_Data *reldata, *syms, *symstrs;
+	Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym;
+	size_t dynsym_idx;
+	GElf_Ehdr ehdr;
+	char sympltname[1024];
+	Elf *elf;
+	int nr = 0, symidx, err = 0;
+
+	if (!ss->dynsym)
+		return 0;
+
+	elf = ss->elf;
+	ehdr = ss->ehdr;
+
+	scn_dynsym = ss->dynsym;
+	shdr_dynsym = ss->dynshdr;
+	dynsym_idx = ss->dynsym_idx;
+
+	if (scn_dynsym == NULL)
+		goto out_elf_end;
+
+	scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+					  ".rela.plt", NULL);
+	if (scn_plt_rel == NULL) {
+		scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+						  ".rel.plt", NULL);
+		if (scn_plt_rel == NULL)
+			goto out_elf_end;
+	}
+
+	err = -1;
+
+	if (shdr_rel_plt.sh_link != dynsym_idx)
+		goto out_elf_end;
+
+	if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL)
+		goto out_elf_end;
+
+	/*
+	 * Fetch the relocation section to find the idxes to the GOT
+	 * and the symbols in the .dynsym they refer to.
+	 */
+	reldata = elf_getdata(scn_plt_rel, NULL);
+	if (reldata == NULL)
+		goto out_elf_end;
+
+	syms = elf_getdata(scn_dynsym, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link);
+	if (scn_symstrs == NULL)
+		goto out_elf_end;
+
+	symstrs = elf_getdata(scn_symstrs, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	if (symstrs->d_size == 0)
+		goto out_elf_end;
+
+	nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
+	plt_offset = shdr_plt.sh_offset;
+	switch (ehdr.e_machine) {
+		case EM_ARM:
+			plt_header_size = 20;
+			plt_entry_size = 12;
+			break;
+
+		case EM_AARCH64:
+			plt_header_size = 32;
+			plt_entry_size = 16;
+			break;
+
+		case EM_SPARC:
+			plt_header_size = 48;
+			plt_entry_size = 12;
+			break;
+
+		case EM_SPARCV9:
+			plt_header_size = 128;
+			plt_entry_size = 32;
+			break;
+
+		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
+			plt_header_size = shdr_plt.sh_entsize;
+			plt_entry_size = shdr_plt.sh_entsize;
+			break;
+	}
+	plt_offset += plt_header_size;
+
+	if (shdr_rel_plt.sh_type == SHT_RELA) {
+		GElf_Rela pos_mem, *pos;
+
+		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
+					   nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
+			symidx = GELF_R_SYM(pos->r_info);
+			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_name);
+			free(demangled);
+
+			f = symbol__new(plt_offset, plt_entry_size,
+					STB_GLOBAL, STT_FUNC, sympltname);
+			if (!f)
+				goto out_elf_end;
+
+			plt_offset += plt_entry_size;
+			symbols__insert(&dso->symbols, f);
+			++nr;
+		}
+	} else if (shdr_rel_plt.sh_type == SHT_REL) {
+		GElf_Rel pos_mem, *pos;
+		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
+					  nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
+			symidx = GELF_R_SYM(pos->r_info);
+			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_name);
+			free(demangled);
+
+			f = symbol__new(plt_offset, plt_entry_size,
+					STB_GLOBAL, STT_FUNC, sympltname);
+			if (!f)
+				goto out_elf_end;
+
+			plt_offset += plt_entry_size;
+			symbols__insert(&dso->symbols, f);
+			++nr;
+		}
+	}
+
+	err = 0;
+out_elf_end:
+	if (err == 0)
+		return nr;
+	pr_debug("%s: problems reading %s PLT info.\n",
+		 __func__, dso->long_name);
+	return 0;
+}
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+	return demangle_sym(dso, kmodule, elf_name);
+}
+
+/*
+ * Align offset to 4 bytes as needed for note name and descriptor data.
+ */
+#define NOTE_ALIGN(n) (((n) + 3) & -4U)
+
+static int elf_read_build_id(Elf *elf, void *bf, size_t size)
+{
+	int err = -1;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	Elf_Kind ek;
+	void *ptr;
+
+	if (size < BUILD_ID_SIZE)
+		goto out;
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		pr_err("%s: cannot get elf header.\n", __func__);
+		goto out;
+	}
+
+	/*
+	 * Check following sections for notes:
+	 *   '.note.gnu.build-id'
+	 *   '.notes'
+	 *   '.note' (VDSO specific)
+	 */
+	do {
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".note.gnu.build-id", NULL);
+		if (sec)
+			break;
+
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".notes", NULL);
+		if (sec)
+			break;
+
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".note", NULL);
+		if (sec)
+			break;
+
+		return err;
+
+	} while (0);
+
+	data = elf_getdata(sec, NULL);
+	if (data == NULL)
+		goto out;
+
+	ptr = data->d_buf;
+	while (ptr < (data->d_buf + data->d_size)) {
+		GElf_Nhdr *nhdr = ptr;
+		size_t namesz = NOTE_ALIGN(nhdr->n_namesz),
+		       descsz = NOTE_ALIGN(nhdr->n_descsz);
+		const char *name;
+
+		ptr += sizeof(*nhdr);
+		name = ptr;
+		ptr += namesz;
+		if (nhdr->n_type == NT_GNU_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU")) {
+			if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+				size_t sz = min(size, descsz);
+				memcpy(bf, ptr, sz);
+				memset(bf + sz, 0, size - sz);
+				err = sz;
+				break;
+			}
+		}
+		ptr += descsz;
+	}
+
+out:
+	return err;
+}
+
+#ifdef HAVE_LIBBFD_BUILDID_SUPPORT
+
+static int read_build_id(const char *filename, struct build_id *bid)
+{
+	size_t size = sizeof(bid->data);
+	int err = -1;
+	bfd *abfd;
+
+	abfd = bfd_openr(filename, NULL);
+	if (!abfd)
+		return -1;
+
+	if (!bfd_check_format(abfd, bfd_object)) {
+		pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	if (!abfd->build_id || abfd->build_id->size > size)
+		goto out_close;
+
+	memcpy(bid->data, abfd->build_id->data, abfd->build_id->size);
+	memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size);
+	err = bid->size = abfd->build_id->size;
+
+out_close:
+	bfd_close(abfd);
+	return err;
+}
+
+#else // HAVE_LIBBFD_BUILDID_SUPPORT
+
+static int read_build_id(const char *filename, struct build_id *bid)
+{
+	size_t size = sizeof(bid->data);
+	int fd, err = -1;
+	Elf *elf;
+
+	if (size < BUILD_ID_SIZE)
+		goto out;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	err = elf_read_build_id(elf, bid->data, size);
+	if (err > 0)
+		bid->size = err;
+
+	elf_end(elf);
+out_close:
+	close(fd);
+out:
+	return err;
+}
+
+#endif // HAVE_LIBBFD_BUILDID_SUPPORT
+
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+	struct kmod_path m = { .name = NULL, };
+	char path[PATH_MAX];
+	int err;
+
+	if (!filename)
+		return -EFAULT;
+
+	err = kmod_path__parse(&m, filename);
+	if (err)
+		return -1;
+
+	if (m.comp) {
+		int error = 0, fd;
+
+		fd = filename__decompress(filename, path, sizeof(path), m.comp, &error);
+		if (fd < 0) {
+			pr_debug("Failed to decompress (error %d) %s\n",
+				 error, filename);
+			return -1;
+		}
+		close(fd);
+		filename = path;
+	}
+
+	err = read_build_id(filename, bid);
+
+	if (m.comp)
+		unlink(filename);
+	return err;
+}
+
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
+{
+	size_t size = sizeof(bid->data);
+	int fd, err = -1;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	while (1) {
+		char bf[BUFSIZ];
+		GElf_Nhdr nhdr;
+		size_t namesz, descsz;
+
+		if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
+			break;
+
+		namesz = NOTE_ALIGN(nhdr.n_namesz);
+		descsz = NOTE_ALIGN(nhdr.n_descsz);
+		if (nhdr.n_type == NT_GNU_BUILD_ID &&
+		    nhdr.n_namesz == sizeof("GNU")) {
+			if (read(fd, bf, namesz) != (ssize_t)namesz)
+				break;
+			if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
+				size_t sz = min(descsz, size);
+				if (read(fd, bid->data, sz) == (ssize_t)sz) {
+					memset(bid->data + sz, 0, size - sz);
+					bid->size = sz;
+					err = 0;
+					break;
+				}
+			} else if (read(fd, bf, descsz) != (ssize_t)descsz)
+				break;
+		} else {
+			int n = namesz + descsz;
+
+			if (n > (int)sizeof(bf)) {
+				n = sizeof(bf);
+				pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
+					 __func__, filename, nhdr.n_namesz, nhdr.n_descsz);
+			}
+			if (read(fd, bf, n) != n)
+				break;
+		}
+	}
+	close(fd);
+out:
+	return err;
+}
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+			     size_t size)
+{
+	int err = -1;
+	asection *section;
+	bfd *abfd;
+
+	abfd = bfd_openr(filename, NULL);
+	if (!abfd)
+		return -1;
+
+	if (!bfd_check_format(abfd, bfd_object)) {
+		pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	section = bfd_get_section_by_name(abfd, ".gnu_debuglink");
+	if (!section)
+		goto out_close;
+
+	if (section->size > size)
+		goto out_close;
+
+	if (!bfd_get_section_contents(abfd, section, debuglink, 0,
+				      section->size))
+		goto out_close;
+
+	err = 0;
+
+out_close:
+	bfd_close(abfd);
+	return err;
+}
+
+#else
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+			     size_t size)
+{
+	int fd, err = -1;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	Elf_Kind ek;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out_elf_end;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		pr_err("%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+				  ".gnu_debuglink", NULL);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	data = elf_getdata(sec, NULL);
+	if (data == NULL)
+		goto out_elf_end;
+
+	/* the start of this section is a zero-terminated string */
+	strncpy(debuglink, data->d_buf, size);
+
+	err = 0;
+
+out_elf_end:
+	elf_end(elf);
+out_close:
+	close(fd);
+out:
+	return err;
+}
+
+#endif
+
+static int dso__swap_init(struct dso *dso, unsigned char eidata)
+{
+	static unsigned int const endian = 1;
+
+	dso->needs_swap = DSO_SWAP__NO;
+
+	switch (eidata) {
+	case ELFDATA2LSB:
+		/* We are big endian, DSO is little endian. */
+		if (*(unsigned char const *)&endian != 1)
+			dso->needs_swap = DSO_SWAP__YES;
+		break;
+
+	case ELFDATA2MSB:
+		/* We are little endian, DSO is big endian. */
+		if (*(unsigned char const *)&endian != 0)
+			dso->needs_swap = DSO_SWAP__YES;
+		break;
+
+	default:
+		pr_err("unrecognized DSO data encoding %d\n", eidata);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss)
+{
+	return ss->dynsym || ss->opdsec;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss)
+{
+	return ss->symtab != NULL;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+	zfree(&ss->name);
+	elf_end(ss->elf);
+	close(ss->fd);
+}
+
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+	/*
+	 * Usually vmlinux is an ELF file with type ET_EXEC for most
+	 * architectures; except Arm64 kernel is linked with option
+	 * '-share', so need to check type ET_DYN.
+	 */
+	return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL ||
+	       ehdr.e_type == ET_DYN;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+		 enum dso_binary_type type)
+{
+	GElf_Ehdr ehdr;
+	Elf *elf;
+	int fd;
+
+	if (dso__needs_decompress(dso)) {
+		fd = dso__decompress_kmodule_fd(dso, name);
+		if (fd < 0)
+			return -1;
+
+		type = dso->symtab_type;
+	} else {
+		fd = open(name, O_RDONLY);
+		if (fd < 0) {
+			dso->load_errno = errno;
+			return -1;
+		}
+	}
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+		dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+		goto out_close;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+		pr_debug("%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) {
+		dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR;
+		goto out_elf_end;
+	}
+
+	/* Always reject images with a mismatched build-id: */
+	if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
+		u8 build_id[BUILD_ID_SIZE];
+		struct build_id bid;
+		int size;
+
+		size = elf_read_build_id(elf, build_id, BUILD_ID_SIZE);
+		if (size <= 0) {
+			dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID;
+			goto out_elf_end;
+		}
+
+		build_id__init(&bid, build_id, size);
+		if (!dso__build_id_equal(dso, &bid)) {
+			pr_debug("%s: build id mismatch for %s.\n", __func__, name);
+			dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
+			goto out_elf_end;
+		}
+	}
+
+	ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+	ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab",
+			NULL);
+	if (ss->symshdr.sh_type != SHT_SYMTAB)
+		ss->symtab = NULL;
+
+	ss->dynsym_idx = 0;
+	ss->dynsym = elf_section_by_name(elf, &ehdr, &ss->dynshdr, ".dynsym",
+			&ss->dynsym_idx);
+	if (ss->dynshdr.sh_type != SHT_DYNSYM)
+		ss->dynsym = NULL;
+
+	ss->opdidx = 0;
+	ss->opdsec = elf_section_by_name(elf, &ehdr, &ss->opdshdr, ".opd",
+			&ss->opdidx);
+	if (ss->opdshdr.sh_type != SHT_PROGBITS)
+		ss->opdsec = NULL;
+
+	if (dso->kernel == DSO_SPACE__USER)
+		ss->adjust_symbols = true;
+	else
+		ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
+
+	ss->name   = strdup(name);
+	if (!ss->name) {
+		dso->load_errno = errno;
+		goto out_elf_end;
+	}
+
+	ss->elf    = elf;
+	ss->fd     = fd;
+	ss->ehdr   = ehdr;
+	ss->type   = type;
+
+	return 0;
+
+out_elf_end:
+	elf_end(elf);
+out_close:
+	close(fd);
+	return -1;
+}
+
+/**
+ * ref_reloc_sym_not_found - has kernel relocation symbol been found.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns %true if we are dealing with the kernel maps and the
+ * relocation reference symbol has not yet been found.  Otherwise %false is
+ * returned.
+ */
+static bool ref_reloc_sym_not_found(struct kmap *kmap)
+{
+	return kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
+	       !kmap->ref_reloc_sym->unrelocated_addr;
+}
+
+/**
+ * ref_reloc - kernel relocation offset.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns the offset of kernel addresses as determined by using
+ * the relocation reference symbol i.e. if the kernel has not been relocated
+ * then the return value is zero.
+ */
+static u64 ref_reloc(struct kmap *kmap)
+{
+	if (kmap && kmap->ref_reloc_sym &&
+	    kmap->ref_reloc_sym->unrelocated_addr)
+		return kmap->ref_reloc_sym->addr -
+		       kmap->ref_reloc_sym->unrelocated_addr;
+	return 0;
+}
+
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+		GElf_Sym *sym __maybe_unused) { }
+
+static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
+				      GElf_Sym *sym, GElf_Shdr *shdr,
+				      struct maps *kmaps, struct kmap *kmap,
+				      struct dso **curr_dsop, struct map **curr_mapp,
+				      const char *section_name,
+				      bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
+{
+	struct dso *curr_dso = *curr_dsop;
+	struct map *curr_map;
+	char dso_name[PATH_MAX];
+
+	/* Adjust symbol to map to file offset */
+	if (adjust_kernel_syms)
+		sym->st_value -= shdr->sh_addr - shdr->sh_offset;
+
+	if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0)
+		return 0;
+
+	if (strcmp(section_name, ".text") == 0) {
+		/*
+		 * The initial kernel mapping is based on
+		 * kallsyms and identity maps.  Overwrite it to
+		 * map to the kernel dso.
+		 */
+		if (*remap_kernel && dso->kernel && !kmodule) {
+			*remap_kernel = false;
+			map->start = shdr->sh_addr + ref_reloc(kmap);
+			map->end = map->start + shdr->sh_size;
+			map->pgoff = shdr->sh_offset;
+			map->map_ip = map__map_ip;
+			map->unmap_ip = map__unmap_ip;
+			/* Ensure maps are correctly ordered */
+			if (kmaps) {
+				map__get(map);
+				maps__remove(kmaps, map);
+				maps__insert(kmaps, map);
+				map__put(map);
+			}
+		}
+
+		/*
+		 * The initial module mapping is based on
+		 * /proc/modules mapped to offset zero.
+		 * Overwrite it to map to the module dso.
+		 */
+		if (*remap_kernel && kmodule) {
+			*remap_kernel = false;
+			map->pgoff = shdr->sh_offset;
+		}
+
+		*curr_mapp = map;
+		*curr_dsop = dso;
+		return 0;
+	}
+
+	if (!kmap)
+		return 0;
+
+	snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
+
+	curr_map = maps__find_by_name(kmaps, dso_name);
+	if (curr_map == NULL) {
+		u64 start = sym->st_value;
+
+		if (kmodule)
+			start += map->start + shdr->sh_offset;
+
+		curr_dso = dso__new(dso_name);
+		if (curr_dso == NULL)
+			return -1;
+		curr_dso->kernel = dso->kernel;
+		curr_dso->long_name = dso->long_name;
+		curr_dso->long_name_len = dso->long_name_len;
+		curr_map = map__new2(start, curr_dso);
+		dso__put(curr_dso);
+		if (curr_map == NULL)
+			return -1;
+
+		if (curr_dso->kernel)
+			map__kmap(curr_map)->kmaps = kmaps;
+
+		if (adjust_kernel_syms) {
+			curr_map->start  = shdr->sh_addr + ref_reloc(kmap);
+			curr_map->end	 = curr_map->start + shdr->sh_size;
+			curr_map->pgoff	 = shdr->sh_offset;
+		} else {
+			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+		}
+		curr_dso->symtab_type = dso->symtab_type;
+		maps__insert(kmaps, curr_map);
+		/*
+		 * Add it before we drop the reference to curr_map, i.e. while
+		 * we still are sure to have a reference to this DSO via
+		 * *curr_map->dso.
+		 */
+		dsos__add(&kmaps->machine->dsos, curr_dso);
+		/* kmaps already got it */
+		map__put(curr_map);
+		dso__set_loaded(curr_dso);
+		*curr_mapp = curr_map;
+		*curr_dsop = curr_dso;
+	} else
+		*curr_dsop = curr_map->dso;
+
+	return 0;
+}
+
+static int
+dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+		       struct symsrc *runtime_ss, int kmodule, int dynsym)
+{
+	struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
+	struct maps *kmaps = kmap ? map__kmaps(map) : NULL;
+	struct map *curr_map = map;
+	struct dso *curr_dso = dso;
+	Elf_Data *symstrs, *secstrs, *secstrs_run, *secstrs_sym;
+	uint32_t nr_syms;
+	int err = -1;
+	uint32_t idx;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	GElf_Shdr tshdr;
+	Elf_Data *syms, *opddata = NULL;
+	GElf_Sym sym;
+	Elf_Scn *sec, *sec_strndx;
+	Elf *elf;
+	int nr = 0;
+	bool remap_kernel = false, adjust_kernel_syms = false;
+
+	if (kmap && !kmaps)
+		return -1;
+
+	elf = syms_ss->elf;
+	ehdr = syms_ss->ehdr;
+	if (dynsym) {
+		sec  = syms_ss->dynsym;
+		shdr = syms_ss->dynshdr;
+	} else {
+		sec =  syms_ss->symtab;
+		shdr = syms_ss->symshdr;
+	}
+
+	if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+				".text", NULL))
+		dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
+	if (runtime_ss->opdsec)
+		opddata = elf_rawdata(runtime_ss->opdsec, NULL);
+
+	syms = elf_getdata(sec, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	sec = elf_getscn(elf, shdr.sh_link);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	symstrs = elf_getdata(sec, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	sec_strndx = elf_getscn(runtime_ss->elf, runtime_ss->ehdr.e_shstrndx);
+	if (sec_strndx == NULL)
+		goto out_elf_end;
+
+	secstrs_run = elf_getdata(sec_strndx, NULL);
+	if (secstrs_run == NULL)
+		goto out_elf_end;
+
+	sec_strndx = elf_getscn(elf, ehdr.e_shstrndx);
+	if (sec_strndx == NULL)
+		goto out_elf_end;
+
+	secstrs_sym = elf_getdata(sec_strndx, NULL);
+	if (secstrs_sym == NULL)
+		goto out_elf_end;
+
+	nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+	memset(&sym, 0, sizeof(sym));
+
+	/*
+	 * The kernel relocation symbol is needed in advance in order to adjust
+	 * kernel maps correctly.
+	 */
+	if (ref_reloc_sym_not_found(kmap)) {
+		elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+			const char *elf_name = elf_sym__name(&sym, symstrs);
+
+			if (strcmp(elf_name, kmap->ref_reloc_sym->name))
+				continue;
+			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
+			map->reloc = kmap->ref_reloc_sym->addr -
+				     kmap->ref_reloc_sym->unrelocated_addr;
+			break;
+		}
+	}
+
+	/*
+	 * Handle any relocation of vdso necessary because older kernels
+	 * attempted to prelink vdso to its virtual address.
+	 */
+	if (dso__is_vdso(dso))
+		map->reloc = map->start - dso->text_offset;
+
+	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
+	/*
+	 * Initial kernel and module mappings do not map to the dso.
+	 * Flag the fixups.
+	 */
+	if (dso->kernel) {
+		remap_kernel = true;
+		adjust_kernel_syms = dso->adjust_symbols;
+	}
+	elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+		struct symbol *f;
+		const char *elf_name = elf_sym__name(&sym, symstrs);
+		char *demangled = NULL;
+		int is_label = elf_sym__is_label(&sym);
+		const char *section_name;
+		bool used_opd = false;
+
+		if (!is_label && !elf_sym__filter(&sym))
+			continue;
+
+		/* Reject ARM ELF "mapping symbols": these aren't unique and
+		 * don't identify functions, so will confuse the profile
+		 * output: */
+		if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) {
+			if (elf_name[0] == '$' && strchr("adtx", elf_name[1])
+			    && (elf_name[2] == '\0' || elf_name[2] == '.'))
+				continue;
+		}
+
+		if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) {
+			u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr;
+			u64 *opd = opddata->d_buf + offset;
+			sym.st_value = DSO__SWAP(dso, u64, *opd);
+			sym.st_shndx = elf_addr_to_index(runtime_ss->elf,
+					sym.st_value);
+			used_opd = true;
+		}
+
+		/*
+		 * When loading symbols in a data mapping, ABS symbols (which
+		 * has a value of SHN_ABS in its st_shndx) failed at
+		 * elf_getscn().  And it marks the loading as a failure so
+		 * already loaded symbols cannot be fixed up.
+		 *
+		 * I'm not sure what should be done. Just ignore them for now.
+		 * - Namhyung Kim
+		 */
+		if (sym.st_shndx == SHN_ABS)
+			continue;
+
+		sec = elf_getscn(syms_ss->elf, sym.st_shndx);
+		if (!sec)
+			goto out_elf_end;
+
+		gelf_getshdr(sec, &shdr);
+
+		/*
+		 * If the attribute bit SHF_ALLOC is not set, the section
+		 * doesn't occupy memory during process execution.
+		 * E.g. ".gnu.warning.*" section is used by linker to generate
+		 * warnings when calling deprecated functions, the symbols in
+		 * the section aren't loaded to memory during process execution,
+		 * so skip them.
+		 */
+		if (!(shdr.sh_flags & SHF_ALLOC))
+			continue;
+
+		secstrs = secstrs_sym;
+
+		/*
+		 * We have to fallback to runtime when syms' section header has
+		 * NOBITS set. NOBITS results in file offset (sh_offset) not
+		 * being incremented. So sh_offset used below has different
+		 * values for syms (invalid) and runtime (valid).
+		 */
+		if (shdr.sh_type == SHT_NOBITS) {
+			sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
+			if (!sec)
+				goto out_elf_end;
+
+			gelf_getshdr(sec, &shdr);
+			secstrs = secstrs_run;
+		}
+
+		if (is_label && !elf_sec__filter(&shdr, secstrs))
+			continue;
+
+		section_name = elf_sec__name(&shdr, secstrs);
+
+		/* On ARM, symbols for thumb functions have 1 added to
+		 * the symbol address as a flag - remove it */
+		if ((ehdr.e_machine == EM_ARM) &&
+		    (GELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
+		    (sym.st_value & 1))
+			--sym.st_value;
+
+		if (dso->kernel) {
+			if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
+						       section_name, adjust_kernel_syms, kmodule, &remap_kernel))
+				goto out_elf_end;
+		} else if ((used_opd && runtime_ss->adjust_symbols) ||
+			   (!used_opd && syms_ss->adjust_symbols)) {
+			GElf_Phdr phdr;
+
+			if (elf_read_program_header(runtime_ss->elf,
+						    (u64)sym.st_value, &phdr)) {
+				pr_debug4("%s: failed to find program header for "
+					   "symbol: %s st_value: %#" PRIx64 "\n",
+					   __func__, elf_name, (u64)sym.st_value);
+				pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+					"sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n",
+					__func__, (u64)sym.st_value, (u64)shdr.sh_addr,
+					(u64)shdr.sh_offset);
+				/*
+				 * Fail to find program header, let's rollback
+				 * to use shdr.sh_addr and shdr.sh_offset to
+				 * calibrate symbol's file address, though this
+				 * is not necessary for normal C ELF file, we
+				 * still need to handle java JIT symbols in this
+				 * case.
+				 */
+				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+			} else {
+				pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+					"p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
+					__func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
+					(u64)phdr.p_offset);
+				sym.st_value -= phdr.p_vaddr - phdr.p_offset;
+			}
+		}
+
+		demangled = demangle_sym(dso, kmodule, elf_name);
+		if (demangled != NULL)
+			elf_name = demangled;
+
+		f = symbol__new(sym.st_value, sym.st_size,
+				GELF_ST_BIND(sym.st_info),
+				GELF_ST_TYPE(sym.st_info), elf_name);
+		free(demangled);
+		if (!f)
+			goto out_elf_end;
+
+		arch__sym_update(f, &sym);
+
+		__symbols__insert(&curr_dso->symbols, f, dso->kernel);
+		nr++;
+	}
+
+	/*
+	 * For misannotated, zeroed, ASM function sizes.
+	 */
+	if (nr > 0) {
+		symbols__fixup_end(&dso->symbols, false);
+		symbols__fixup_duplicate(&dso->symbols);
+		if (kmap) {
+			/*
+			 * We need to fixup this here too because we create new
+			 * maps here, for things like vsyscall sections.
+			 */
+			maps__fixup_end(kmaps);
+		}
+	}
+	err = nr;
+out_elf_end:
+	return err;
+}
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+		  struct symsrc *runtime_ss, int kmodule)
+{
+	int nr = 0;
+	int err = -1;
+
+	dso->symtab_type = syms_ss->type;
+	dso->is_64_bit = syms_ss->is_64_bit;
+	dso->rel = syms_ss->ehdr.e_type == ET_REL;
+
+	/*
+	 * Modules may already have symbols from kallsyms, but those symbols
+	 * have the wrong values for the dso maps, so remove them.
+	 */
+	if (kmodule && syms_ss->symtab)
+		symbols__delete(&dso->symbols);
+
+	if (!syms_ss->symtab) {
+		/*
+		 * If the vmlinux is stripped, fail so we will fall back
+		 * to using kallsyms. The vmlinux runtime symbols aren't
+		 * of much use.
+		 */
+		if (dso->kernel)
+			return err;
+	} else  {
+		err = dso__load_sym_internal(dso, map, syms_ss, runtime_ss,
+					     kmodule, 0);
+		if (err < 0)
+			return err;
+		nr = err;
+	}
+
+	if (syms_ss->dynsym) {
+		err = dso__load_sym_internal(dso, map, syms_ss, runtime_ss,
+					     kmodule, 1);
+		if (err < 0)
+			return err;
+		err += nr;
+	}
+
+	return err;
+}
+
+static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
+{
+	GElf_Phdr phdr;
+	size_t i, phdrnum;
+	int err;
+	u64 sz;
+
+	if (elf_getphdrnum(elf, &phdrnum))
+		return -1;
+
+	for (i = 0; i < phdrnum; i++) {
+		if (gelf_getphdr(elf, i, &phdr) == NULL)
+			return -1;
+		if (phdr.p_type != PT_LOAD)
+			continue;
+		if (exe) {
+			if (!(phdr.p_flags & PF_X))
+				continue;
+		} else {
+			if (!(phdr.p_flags & PF_R))
+				continue;
+		}
+		sz = min(phdr.p_memsz, phdr.p_filesz);
+		if (!sz)
+			continue;
+		err = mapfn(phdr.p_vaddr, sz, phdr.p_offset, data);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit)
+{
+	int err;
+	Elf *elf;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return -1;
+
+	if (is_64_bit)
+		*is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+	err = elf_read_maps(elf, exe, mapfn, data);
+
+	elf_end(elf);
+	return err;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+	enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+	GElf_Ehdr ehdr;
+	Elf_Kind ek;
+	Elf *elf;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		goto out;
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out_end;
+
+	if (gelf_getclass(elf) == ELFCLASS64) {
+		dso_type = DSO__TYPE_64BIT;
+		goto out_end;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out_end;
+
+	if (ehdr.e_machine == EM_X86_64)
+		dso_type = DSO__TYPE_X32BIT;
+	else
+		dso_type = DSO__TYPE_32BIT;
+out_end:
+	elf_end(elf);
+out:
+	return dso_type;
+}
+
+static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 len)
+{
+	ssize_t r;
+	size_t n;
+	int err = -1;
+	char *buf = malloc(page_size);
+
+	if (buf == NULL)
+		return -1;
+
+	if (lseek(to, to_offs, SEEK_SET) != to_offs)
+		goto out;
+
+	if (lseek(from, from_offs, SEEK_SET) != from_offs)
+		goto out;
+
+	while (len) {
+		n = page_size;
+		if (len < n)
+			n = len;
+		/* Use read because mmap won't work on proc files */
+		r = read(from, buf, n);
+		if (r < 0)
+			goto out;
+		if (!r)
+			break;
+		n = r;
+		r = write(to, buf, n);
+		if (r < 0)
+			goto out;
+		if ((size_t)r != n)
+			goto out;
+		len -= n;
+	}
+
+	err = 0;
+out:
+	free(buf);
+	return err;
+}
+
+struct kcore {
+	int fd;
+	int elfclass;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+};
+
+static int kcore__open(struct kcore *kcore, const char *filename)
+{
+	GElf_Ehdr *ehdr;
+
+	kcore->fd = open(filename, O_RDONLY);
+	if (kcore->fd == -1)
+		return -1;
+
+	kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL);
+	if (!kcore->elf)
+		goto out_close;
+
+	kcore->elfclass = gelf_getclass(kcore->elf);
+	if (kcore->elfclass == ELFCLASSNONE)
+		goto out_end;
+
+	ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+	if (!ehdr)
+		goto out_end;
+
+	return 0;
+
+out_end:
+	elf_end(kcore->elf);
+out_close:
+	close(kcore->fd);
+	return -1;
+}
+
+static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
+		       bool temp)
+{
+	kcore->elfclass = elfclass;
+
+	if (temp)
+		kcore->fd = mkstemp(filename);
+	else
+		kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400);
+	if (kcore->fd == -1)
+		return -1;
+
+	kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL);
+	if (!kcore->elf)
+		goto out_close;
+
+	if (!gelf_newehdr(kcore->elf, elfclass))
+		goto out_end;
+
+	memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr));
+
+	return 0;
+
+out_end:
+	elf_end(kcore->elf);
+out_close:
+	close(kcore->fd);
+	unlink(filename);
+	return -1;
+}
+
+static void kcore__close(struct kcore *kcore)
+{
+	elf_end(kcore->elf);
+	close(kcore->fd);
+}
+
+static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count)
+{
+	GElf_Ehdr *ehdr = &to->ehdr;
+	GElf_Ehdr *kehdr = &from->ehdr;
+
+	memcpy(ehdr->e_ident, kehdr->e_ident, EI_NIDENT);
+	ehdr->e_type      = kehdr->e_type;
+	ehdr->e_machine   = kehdr->e_machine;
+	ehdr->e_version   = kehdr->e_version;
+	ehdr->e_entry     = 0;
+	ehdr->e_shoff     = 0;
+	ehdr->e_flags     = kehdr->e_flags;
+	ehdr->e_phnum     = count;
+	ehdr->e_shentsize = 0;
+	ehdr->e_shnum     = 0;
+	ehdr->e_shstrndx  = 0;
+
+	if (from->elfclass == ELFCLASS32) {
+		ehdr->e_phoff     = sizeof(Elf32_Ehdr);
+		ehdr->e_ehsize    = sizeof(Elf32_Ehdr);
+		ehdr->e_phentsize = sizeof(Elf32_Phdr);
+	} else {
+		ehdr->e_phoff     = sizeof(Elf64_Ehdr);
+		ehdr->e_ehsize    = sizeof(Elf64_Ehdr);
+		ehdr->e_phentsize = sizeof(Elf64_Phdr);
+	}
+
+	if (!gelf_update_ehdr(to->elf, ehdr))
+		return -1;
+
+	if (!gelf_newphdr(to->elf, count))
+		return -1;
+
+	return 0;
+}
+
+static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset,
+			   u64 addr, u64 len)
+{
+	GElf_Phdr phdr = {
+		.p_type		= PT_LOAD,
+		.p_flags	= PF_R | PF_W | PF_X,
+		.p_offset	= offset,
+		.p_vaddr	= addr,
+		.p_paddr	= 0,
+		.p_filesz	= len,
+		.p_memsz	= len,
+		.p_align	= page_size,
+	};
+
+	if (!gelf_update_phdr(kcore->elf, idx, &phdr))
+		return -1;
+
+	return 0;
+}
+
+static off_t kcore__write(struct kcore *kcore)
+{
+	return elf_update(kcore->elf, ELF_C_WRITE);
+}
+
+struct phdr_data {
+	off_t offset;
+	off_t rel;
+	u64 addr;
+	u64 len;
+	struct list_head node;
+	struct phdr_data *remaps;
+};
+
+struct sym_data {
+	u64 addr;
+	struct list_head node;
+};
+
+struct kcore_copy_info {
+	u64 stext;
+	u64 etext;
+	u64 first_symbol;
+	u64 last_symbol;
+	u64 first_module;
+	u64 first_module_symbol;
+	u64 last_module_symbol;
+	size_t phnum;
+	struct list_head phdrs;
+	struct list_head syms;
+};
+
+#define kcore_copy__for_each_phdr(k, p) \
+	list_for_each_entry((p), &(k)->phdrs, node)
+
+static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset)
+{
+	struct phdr_data *p = zalloc(sizeof(*p));
+
+	if (p) {
+		p->addr   = addr;
+		p->len    = len;
+		p->offset = offset;
+	}
+
+	return p;
+}
+
+static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci,
+						 u64 addr, u64 len,
+						 off_t offset)
+{
+	struct phdr_data *p = phdr_data__new(addr, len, offset);
+
+	if (p)
+		list_add_tail(&p->node, &kci->phdrs);
+
+	return p;
+}
+
+static void kcore_copy__free_phdrs(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &kci->phdrs, node) {
+		list_del_init(&p->node);
+		free(p);
+	}
+}
+
+static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci,
+					    u64 addr)
+{
+	struct sym_data *s = zalloc(sizeof(*s));
+
+	if (s) {
+		s->addr = addr;
+		list_add_tail(&s->node, &kci->syms);
+	}
+
+	return s;
+}
+
+static void kcore_copy__free_syms(struct kcore_copy_info *kci)
+{
+	struct sym_data *s, *tmp;
+
+	list_for_each_entry_safe(s, tmp, &kci->syms, node) {
+		list_del_init(&s->node);
+		free(s);
+	}
+}
+
+static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
+					u64 start)
+{
+	struct kcore_copy_info *kci = arg;
+
+	if (!kallsyms__is_function(type))
+		return 0;
+
+	if (strchr(name, '[')) {
+		if (!kci->first_module_symbol || start < kci->first_module_symbol)
+			kci->first_module_symbol = start;
+		if (start > kci->last_module_symbol)
+			kci->last_module_symbol = start;
+		return 0;
+	}
+
+	if (!kci->first_symbol || start < kci->first_symbol)
+		kci->first_symbol = start;
+
+	if (!kci->last_symbol || start > kci->last_symbol)
+		kci->last_symbol = start;
+
+	if (!strcmp(name, "_stext")) {
+		kci->stext = start;
+		return 0;
+	}
+
+	if (!strcmp(name, "_etext")) {
+		kci->etext = start;
+		return 0;
+	}
+
+	if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start))
+		return -1;
+
+	return 0;
+}
+
+static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci,
+				      const char *dir)
+{
+	char kallsyms_filename[PATH_MAX];
+
+	scnprintf(kallsyms_filename, PATH_MAX, "%s/kallsyms", dir);
+
+	if (symbol__restricted_filename(kallsyms_filename, "/proc/kallsyms"))
+		return -1;
+
+	if (kallsyms__parse(kallsyms_filename, kci,
+			    kcore_copy__process_kallsyms) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int kcore_copy__process_modules(void *arg,
+				       const char *name __maybe_unused,
+				       u64 start, u64 size __maybe_unused)
+{
+	struct kcore_copy_info *kci = arg;
+
+	if (!kci->first_module || start < kci->first_module)
+		kci->first_module = start;
+
+	return 0;
+}
+
+static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
+				     const char *dir)
+{
+	char modules_filename[PATH_MAX];
+
+	scnprintf(modules_filename, PATH_MAX, "%s/modules", dir);
+
+	if (symbol__restricted_filename(modules_filename, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(modules_filename, kci,
+			   kcore_copy__process_modules) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end,
+			   u64 pgoff, u64 s, u64 e)
+{
+	u64 len, offset;
+
+	if (s < start || s >= end)
+		return 0;
+
+	offset = (s - start) + pgoff;
+	len = e < end ? e - s : end - s;
+
+	return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1;
+}
+
+static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_copy_info *kci = data;
+	u64 end = start + len;
+	struct sym_data *sdat;
+
+	if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext))
+		return -1;
+
+	if (kcore_copy__map(kci, start, end, pgoff, kci->first_module,
+			    kci->last_module_symbol))
+		return -1;
+
+	list_for_each_entry(sdat, &kci->syms, node) {
+		u64 s = round_down(sdat->addr, page_size);
+
+		if (kcore_copy__map(kci, start, end, pgoff, s, s + len))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
+{
+	if (elf_read_maps(elf, true, kcore_copy__read_map, kci) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void kcore_copy__find_remaps(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p, *k = NULL;
+	u64 kend;
+
+	if (!kci->stext)
+		return;
+
+	/* Find phdr that corresponds to the kernel map (contains stext) */
+	kcore_copy__for_each_phdr(kci, p) {
+		u64 pend = p->addr + p->len - 1;
+
+		if (p->addr <= kci->stext && pend >= kci->stext) {
+			k = p;
+			break;
+		}
+	}
+
+	if (!k)
+		return;
+
+	kend = k->offset + k->len;
+
+	/* Find phdrs that remap the kernel */
+	kcore_copy__for_each_phdr(kci, p) {
+		u64 pend = p->offset + p->len;
+
+		if (p == k)
+			continue;
+
+		if (p->offset >= k->offset && pend <= kend)
+			p->remaps = k;
+	}
+}
+
+static void kcore_copy__layout(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p;
+	off_t rel = 0;
+
+	kcore_copy__find_remaps(kci);
+
+	kcore_copy__for_each_phdr(kci, p) {
+		if (!p->remaps) {
+			p->rel = rel;
+			rel += p->len;
+		}
+		kci->phnum += 1;
+	}
+
+	kcore_copy__for_each_phdr(kci, p) {
+		struct phdr_data *k = p->remaps;
+
+		if (k)
+			p->rel = p->offset - k->offset + k->rel;
+	}
+}
+
+static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
+				 Elf *elf)
+{
+	if (kcore_copy__parse_kallsyms(kci, dir))
+		return -1;
+
+	if (kcore_copy__parse_modules(kci, dir))
+		return -1;
+
+	if (kci->stext)
+		kci->stext = round_down(kci->stext, page_size);
+	else
+		kci->stext = round_down(kci->first_symbol, page_size);
+
+	if (kci->etext) {
+		kci->etext = round_up(kci->etext, page_size);
+	} else if (kci->last_symbol) {
+		kci->etext = round_up(kci->last_symbol, page_size);
+		kci->etext += page_size;
+	}
+
+	if (kci->first_module_symbol &&
+	    (!kci->first_module || kci->first_module_symbol < kci->first_module))
+		kci->first_module = kci->first_module_symbol;
+
+	kci->first_module = round_down(kci->first_module, page_size);
+
+	if (kci->last_module_symbol) {
+		kci->last_module_symbol = round_up(kci->last_module_symbol,
+						   page_size);
+		kci->last_module_symbol += page_size;
+	}
+
+	if (!kci->stext || !kci->etext)
+		return -1;
+
+	if (kci->first_module && !kci->last_module_symbol)
+		return -1;
+
+	if (kcore_copy__read_maps(kci, elf))
+		return -1;
+
+	kcore_copy__layout(kci);
+
+	return 0;
+}
+
+static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
+				 const char *name)
+{
+	char from_filename[PATH_MAX];
+	char to_filename[PATH_MAX];
+
+	scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+	scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+	return copyfile_mode(from_filename, to_filename, 0400);
+}
+
+static int kcore_copy__unlink(const char *dir, const char *name)
+{
+	char filename[PATH_MAX];
+
+	scnprintf(filename, PATH_MAX, "%s/%s", dir, name);
+
+	return unlink(filename);
+}
+
+static int kcore_copy__compare_fds(int from, int to)
+{
+	char *buf_from;
+	char *buf_to;
+	ssize_t ret;
+	size_t len;
+	int err = -1;
+
+	buf_from = malloc(page_size);
+	buf_to = malloc(page_size);
+	if (!buf_from || !buf_to)
+		goto out;
+
+	while (1) {
+		/* Use read because mmap won't work on proc files */
+		ret = read(from, buf_from, page_size);
+		if (ret < 0)
+			goto out;
+
+		if (!ret)
+			break;
+
+		len = ret;
+
+		if (readn(to, buf_to, len) != (int)len)
+			goto out;
+
+		if (memcmp(buf_from, buf_to, len))
+			goto out;
+	}
+
+	err = 0;
+out:
+	free(buf_to);
+	free(buf_from);
+	return err;
+}
+
+static int kcore_copy__compare_files(const char *from_filename,
+				     const char *to_filename)
+{
+	int from, to, err = -1;
+
+	from = open(from_filename, O_RDONLY);
+	if (from < 0)
+		return -1;
+
+	to = open(to_filename, O_RDONLY);
+	if (to < 0)
+		goto out_close_from;
+
+	err = kcore_copy__compare_fds(from, to);
+
+	close(to);
+out_close_from:
+	close(from);
+	return err;
+}
+
+static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
+				    const char *name)
+{
+	char from_filename[PATH_MAX];
+	char to_filename[PATH_MAX];
+
+	scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+	scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+	return kcore_copy__compare_files(from_filename, to_filename);
+}
+
+/**
+ * kcore_copy - copy kallsyms, modules and kcore from one directory to another.
+ * @from_dir: from directory
+ * @to_dir: to directory
+ *
+ * This function copies kallsyms, modules and kcore files from one directory to
+ * another.  kallsyms and modules are copied entirely.  Only code segments are
+ * copied from kcore.  It is assumed that two segments suffice: one for the
+ * kernel proper and one for all the modules.  The code segments are determined
+ * from kallsyms and modules files.  The kernel map starts at _stext or the
+ * lowest function symbol, and ends at _etext or the highest function symbol.
+ * The module map starts at the lowest module address and ends at the highest
+ * module symbol.  Start addresses are rounded down to the nearest page.  End
+ * addresses are rounded up to the nearest page.  An extra page is added to the
+ * highest kernel symbol and highest module symbol to, hopefully, encompass that
+ * symbol too.  Because it contains only code sections, the resulting kcore is
+ * unusual.  One significant peculiarity is that the mapping (start -> pgoff)
+ * is not the same for the kernel map and the modules map.  That happens because
+ * the data is copied adjacently whereas the original kcore has gaps.  Finally,
+ * kallsyms file is compared with its copy to check that modules have not been
+ * loaded or unloaded while the copies were taking place.
+ *
+ * Return: %0 on success, %-1 on failure.
+ */
+int kcore_copy(const char *from_dir, const char *to_dir)
+{
+	struct kcore kcore;
+	struct kcore extract;
+	int idx = 0, err = -1;
+	off_t offset, sz;
+	struct kcore_copy_info kci = { .stext = 0, };
+	char kcore_filename[PATH_MAX];
+	char extract_filename[PATH_MAX];
+	struct phdr_data *p;
+
+	INIT_LIST_HEAD(&kci.phdrs);
+	INIT_LIST_HEAD(&kci.syms);
+
+	if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
+		return -1;
+
+	if (kcore_copy__copy_file(from_dir, to_dir, "modules"))
+		goto out_unlink_kallsyms;
+
+	scnprintf(kcore_filename, PATH_MAX, "%s/kcore", from_dir);
+	scnprintf(extract_filename, PATH_MAX, "%s/kcore", to_dir);
+
+	if (kcore__open(&kcore, kcore_filename))
+		goto out_unlink_modules;
+
+	if (kcore_copy__calc_maps(&kci, from_dir, kcore.elf))
+		goto out_kcore_close;
+
+	if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
+		goto out_kcore_close;
+
+	if (kcore__copy_hdr(&kcore, &extract, kci.phnum))
+		goto out_extract_close;
+
+	offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) +
+		 gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT);
+	offset = round_up(offset, page_size);
+
+	kcore_copy__for_each_phdr(&kci, p) {
+		off_t offs = p->rel + offset;
+
+		if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len))
+			goto out_extract_close;
+	}
+
+	sz = kcore__write(&extract);
+	if (sz < 0 || sz > offset)
+		goto out_extract_close;
+
+	kcore_copy__for_each_phdr(&kci, p) {
+		off_t offs = p->rel + offset;
+
+		if (p->remaps)
+			continue;
+		if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len))
+			goto out_extract_close;
+	}
+
+	if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
+		goto out_extract_close;
+
+	err = 0;
+
+out_extract_close:
+	kcore__close(&extract);
+	if (err)
+		unlink(extract_filename);
+out_kcore_close:
+	kcore__close(&kcore);
+out_unlink_modules:
+	if (err)
+		kcore_copy__unlink(to_dir, "modules");
+out_unlink_kallsyms:
+	if (err)
+		kcore_copy__unlink(to_dir, "kallsyms");
+
+	kcore_copy__free_phdrs(&kci);
+	kcore_copy__free_syms(&kci);
+
+	return err;
+}
+
+int kcore_extract__create(struct kcore_extract *kce)
+{
+	struct kcore kcore;
+	struct kcore extract;
+	size_t count = 1;
+	int idx = 0, err = -1;
+	off_t offset = page_size, sz;
+
+	if (kcore__open(&kcore, kce->kcore_filename))
+		return -1;
+
+	strcpy(kce->extract_filename, PERF_KCORE_EXTRACT);
+	if (kcore__init(&extract, kce->extract_filename, kcore.elfclass, true))
+		goto out_kcore_close;
+
+	if (kcore__copy_hdr(&kcore, &extract, count))
+		goto out_extract_close;
+
+	if (kcore__add_phdr(&extract, idx, offset, kce->addr, kce->len))
+		goto out_extract_close;
+
+	sz = kcore__write(&extract);
+	if (sz < 0 || sz > offset)
+		goto out_extract_close;
+
+	if (copy_bytes(kcore.fd, kce->offs, extract.fd, offset, kce->len))
+		goto out_extract_close;
+
+	err = 0;
+
+out_extract_close:
+	kcore__close(&extract);
+	if (err)
+		unlink(kce->extract_filename);
+out_kcore_close:
+	kcore__close(&kcore);
+
+	return err;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce)
+{
+	unlink(kce->extract_filename);
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+
+static void sdt_adjust_loc(struct sdt_note *tmp, GElf_Addr base_off)
+{
+	if (!base_off)
+		return;
+
+	if (tmp->bit32)
+		tmp->addr.a32[SDT_NOTE_IDX_LOC] =
+			tmp->addr.a32[SDT_NOTE_IDX_LOC] + base_off -
+			tmp->addr.a32[SDT_NOTE_IDX_BASE];
+	else
+		tmp->addr.a64[SDT_NOTE_IDX_LOC] =
+			tmp->addr.a64[SDT_NOTE_IDX_LOC] + base_off -
+			tmp->addr.a64[SDT_NOTE_IDX_BASE];
+}
+
+static void sdt_adjust_refctr(struct sdt_note *tmp, GElf_Addr base_addr,
+			      GElf_Addr base_off)
+{
+	if (!base_off)
+		return;
+
+	if (tmp->bit32 && tmp->addr.a32[SDT_NOTE_IDX_REFCTR])
+		tmp->addr.a32[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+	else if (tmp->addr.a64[SDT_NOTE_IDX_REFCTR])
+		tmp->addr.a64[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+}
+
+/**
+ * populate_sdt_note : Parse raw data and identify SDT note
+ * @elf: elf of the opened file
+ * @data: raw data of a section with description offset applied
+ * @len: note description size
+ * @type: type of the note
+ * @sdt_notes: List to add the SDT note
+ *
+ * Responsible for parsing the @data in section .note.stapsdt in @elf and
+ * if its an SDT note, it appends to @sdt_notes list.
+ */
+static int populate_sdt_note(Elf **elf, const char *data, size_t len,
+			     struct list_head *sdt_notes)
+{
+	const char *provider, *name, *args;
+	struct sdt_note *tmp = NULL;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	int ret = -EINVAL;
+
+	union {
+		Elf64_Addr a64[NR_ADDR];
+		Elf32_Addr a32[NR_ADDR];
+	} buf;
+
+	Elf_Data dst = {
+		.d_buf = &buf, .d_type = ELF_T_ADDR, .d_version = EV_CURRENT,
+		.d_size = gelf_fsize((*elf), ELF_T_ADDR, NR_ADDR, EV_CURRENT),
+		.d_off = 0, .d_align = 0
+	};
+	Elf_Data src = {
+		.d_buf = (void *) data, .d_type = ELF_T_ADDR,
+		.d_version = EV_CURRENT, .d_size = dst.d_size, .d_off = 0,
+		.d_align = 0
+	};
+
+	tmp = (struct sdt_note *)calloc(1, sizeof(struct sdt_note));
+	if (!tmp) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	INIT_LIST_HEAD(&tmp->note_list);
+
+	if (len < dst.d_size + 3)
+		goto out_free_note;
+
+	/* Translation from file representation to memory representation */
+	if (gelf_xlatetom(*elf, &dst, &src,
+			  elf_getident(*elf, NULL)[EI_DATA]) == NULL) {
+		pr_err("gelf_xlatetom : %s\n", elf_errmsg(-1));
+		goto out_free_note;
+	}
+
+	/* Populate the fields of sdt_note */
+	provider = data + dst.d_size;
+
+	name = (const char *)memchr(provider, '\0', data + len - provider);
+	if (name++ == NULL)
+		goto out_free_note;
+
+	tmp->provider = strdup(provider);
+	if (!tmp->provider) {
+		ret = -ENOMEM;
+		goto out_free_note;
+	}
+	tmp->name = strdup(name);
+	if (!tmp->name) {
+		ret = -ENOMEM;
+		goto out_free_prov;
+	}
+
+	args = memchr(name, '\0', data + len - name);
+
+	/*
+	 * There is no argument if:
+	 * - We reached the end of the note;
+	 * - There is not enough room to hold a potential string;
+	 * - The argument string is empty or just contains ':'.
+	 */
+	if (args == NULL || data + len - args < 2 ||
+		args[1] == ':' || args[1] == '\0')
+		tmp->args = NULL;
+	else {
+		tmp->args = strdup(++args);
+		if (!tmp->args) {
+			ret = -ENOMEM;
+			goto out_free_name;
+		}
+	}
+
+	if (gelf_getclass(*elf) == ELFCLASS32) {
+		memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr));
+		tmp->bit32 = true;
+	} else {
+		memcpy(&tmp->addr, &buf, 3 * sizeof(Elf64_Addr));
+		tmp->bit32 = false;
+	}
+
+	if (!gelf_getehdr(*elf, &ehdr)) {
+		pr_debug("%s : cannot get elf header.\n", __func__);
+		ret = -EBADF;
+		goto out_free_args;
+	}
+
+	/* Adjust the prelink effect :
+	 * Find out the .stapsdt.base section.
+	 * This scn will help us to handle prelinking (if present).
+	 * Compare the retrieved file offset of the base section with the
+	 * base address in the description of the SDT note. If its different,
+	 * then accordingly, adjust the note location.
+	 */
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL))
+		sdt_adjust_loc(tmp, shdr.sh_offset);
+
+	/* Adjust reference counter offset */
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_PROBES_SCN, NULL))
+		sdt_adjust_refctr(tmp, shdr.sh_addr, shdr.sh_offset);
+
+	list_add_tail(&tmp->note_list, sdt_notes);
+	return 0;
+
+out_free_args:
+	zfree(&tmp->args);
+out_free_name:
+	zfree(&tmp->name);
+out_free_prov:
+	zfree(&tmp->provider);
+out_free_note:
+	free(tmp);
+out_err:
+	return ret;
+}
+
+/**
+ * construct_sdt_notes_list : constructs a list of SDT notes
+ * @elf : elf to look into
+ * @sdt_notes : empty list_head
+ *
+ * Scans the sections in 'elf' for the section
+ * .note.stapsdt. It, then calls populate_sdt_note to find
+ * out the SDT events and populates the 'sdt_notes'.
+ */
+static int construct_sdt_notes_list(Elf *elf, struct list_head *sdt_notes)
+{
+	GElf_Ehdr ehdr;
+	Elf_Scn *scn = NULL;
+	Elf_Data *data;
+	GElf_Shdr shdr;
+	size_t shstrndx, next;
+	GElf_Nhdr nhdr;
+	size_t name_off, desc_off, offset;
+	int ret = 0;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		ret = -EBADF;
+		goto out_ret;
+	}
+	if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
+		ret = -EBADF;
+		goto out_ret;
+	}
+
+	/* Look for the required section */
+	scn = elf_section_by_name(elf, &ehdr, &shdr, SDT_NOTE_SCN, NULL);
+	if (!scn) {
+		ret = -ENOENT;
+		goto out_ret;
+	}
+
+	if ((shdr.sh_type != SHT_NOTE) || (shdr.sh_flags & SHF_ALLOC)) {
+		ret = -ENOENT;
+		goto out_ret;
+	}
+
+	data = elf_getdata(scn, NULL);
+
+	/* Get the SDT notes */
+	for (offset = 0; (next = gelf_getnote(data, offset, &nhdr, &name_off,
+					      &desc_off)) > 0; offset = next) {
+		if (nhdr.n_namesz == sizeof(SDT_NOTE_NAME) &&
+		    !memcmp(data->d_buf + name_off, SDT_NOTE_NAME,
+			    sizeof(SDT_NOTE_NAME))) {
+			/* Check the type of the note */
+			if (nhdr.n_type != SDT_NOTE_TYPE)
+				goto out_ret;
+
+			ret = populate_sdt_note(&elf, ((data->d_buf) + desc_off),
+						nhdr.n_descsz, sdt_notes);
+			if (ret < 0)
+				goto out_ret;
+		}
+	}
+	if (list_empty(sdt_notes))
+		ret = -ENOENT;
+
+out_ret:
+	return ret;
+}
+
+/**
+ * get_sdt_note_list : Wrapper to construct a list of sdt notes
+ * @head : empty list_head
+ * @target : file to find SDT notes from
+ *
+ * This opens the file, initializes
+ * the ELF and then calls construct_sdt_notes_list.
+ */
+int get_sdt_note_list(struct list_head *head, const char *target)
+{
+	Elf *elf;
+	int fd, ret;
+
+	fd = open(target, O_RDONLY);
+	if (fd < 0)
+		return -EBADF;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (!elf) {
+		ret = -EBADF;
+		goto out_close;
+	}
+	ret = construct_sdt_notes_list(elf, head);
+	elf_end(elf);
+out_close:
+	close(fd);
+	return ret;
+}
+
+/**
+ * cleanup_sdt_note_list : free the sdt notes' list
+ * @sdt_notes: sdt notes' list
+ *
+ * Free up the SDT notes in @sdt_notes.
+ * Returns the number of SDT notes free'd.
+ */
+int cleanup_sdt_note_list(struct list_head *sdt_notes)
+{
+	struct sdt_note *tmp, *pos;
+	int nr_free = 0;
+
+	list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
+		list_del_init(&pos->note_list);
+		zfree(&pos->args);
+		zfree(&pos->name);
+		zfree(&pos->provider);
+		free(pos);
+		nr_free++;
+	}
+	return nr_free;
+}
+
+/**
+ * sdt_notes__get_count: Counts the number of sdt events
+ * @start: list_head to sdt_notes list
+ *
+ * Returns the number of SDT notes in a list
+ */
+int sdt_notes__get_count(struct list_head *start)
+{
+	struct sdt_note *sdt_ptr;
+	int count = 0;
+
+	list_for_each_entry(sdt_ptr, start, note_list)
+		count++;
+	return count;
+}
+#endif
+
+void symbol__elf_init(void)
+{
+	elf_version(EV_CURRENT);
+}
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
new file mode 100644
index 000000000..f9eb0bee7
--- /dev/null
+++ b/tools/perf/util/symbol-minimal.c
@@ -0,0 +1,387 @@
+#include "dso.h"
+#include "symbol.h"
+#include "symsrc.h"
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <linux/zalloc.h>
+#include <internal/lib.h>
+
+static bool check_need_swap(int file_endian)
+{
+	const int data = 1;
+	u8 *check = (u8 *)&data;
+	int host_endian;
+
+	if (check[0] == 1)
+		host_endian = ELFDATA2LSB;
+	else
+		host_endian = ELFDATA2MSB;
+
+	return host_endian != file_endian;
+}
+
+#define NOTE_ALIGN(sz) (((sz) + 3) & ~3)
+
+#define NT_GNU_BUILD_ID	3
+
+static int read_build_id(void *note_data, size_t note_len, struct build_id *bid,
+			 bool need_swap)
+{
+	size_t size = sizeof(bid->data);
+	struct {
+		u32 n_namesz;
+		u32 n_descsz;
+		u32 n_type;
+	} *nhdr;
+	void *ptr;
+
+	ptr = note_data;
+	while (ptr < (note_data + note_len)) {
+		const char *name;
+		size_t namesz, descsz;
+
+		nhdr = ptr;
+		if (need_swap) {
+			nhdr->n_namesz = bswap_32(nhdr->n_namesz);
+			nhdr->n_descsz = bswap_32(nhdr->n_descsz);
+			nhdr->n_type = bswap_32(nhdr->n_type);
+		}
+
+		namesz = NOTE_ALIGN(nhdr->n_namesz);
+		descsz = NOTE_ALIGN(nhdr->n_descsz);
+
+		ptr += sizeof(*nhdr);
+		name = ptr;
+		ptr += namesz;
+		if (nhdr->n_type == NT_GNU_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU")) {
+			if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+				size_t sz = min(size, descsz);
+				memcpy(bid->data, ptr, sz);
+				memset(bid->data + sz, 0, size - sz);
+				bid->size = sz;
+				return 0;
+			}
+		}
+		ptr += descsz;
+	}
+
+	return -1;
+}
+
+int filename__read_debuglink(const char *filename __maybe_unused,
+			     char *debuglink __maybe_unused,
+			     size_t size __maybe_unused)
+{
+	return -1;
+}
+
+/*
+ * Just try PT_NOTE header otherwise fails
+ */
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+	FILE *fp;
+	int ret = -1;
+	bool need_swap = false;
+	u8 e_ident[EI_NIDENT];
+	size_t buf_size;
+	void *buf;
+	int i;
+
+	fp = fopen(filename, "r");
+	if (fp == NULL)
+		return -1;
+
+	if (fread(e_ident, sizeof(e_ident), 1, fp) != 1)
+		goto out;
+
+	if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+	    e_ident[EI_VERSION] != EV_CURRENT)
+		goto out;
+
+	need_swap = check_need_swap(e_ident[EI_DATA]);
+
+	/* for simplicity */
+	fseek(fp, 0, SEEK_SET);
+
+	if (e_ident[EI_CLASS] == ELFCLASS32) {
+		Elf32_Ehdr ehdr;
+		Elf32_Phdr *phdr;
+
+		if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+			goto out;
+
+		if (need_swap) {
+			ehdr.e_phoff = bswap_32(ehdr.e_phoff);
+			ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+			ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+		}
+
+		buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+		buf = malloc(buf_size);
+		if (buf == NULL)
+			goto out;
+
+		fseek(fp, ehdr.e_phoff, SEEK_SET);
+		if (fread(buf, buf_size, 1, fp) != 1)
+			goto out_free;
+
+		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+			void *tmp;
+			long offset;
+
+			if (need_swap) {
+				phdr->p_type = bswap_32(phdr->p_type);
+				phdr->p_offset = bswap_32(phdr->p_offset);
+				phdr->p_filesz = bswap_32(phdr->p_filesz);
+			}
+
+			if (phdr->p_type != PT_NOTE)
+				continue;
+
+			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
+			tmp = realloc(buf, buf_size);
+			if (tmp == NULL)
+				goto out_free;
+
+			buf = tmp;
+			fseek(fp, offset, SEEK_SET);
+			if (fread(buf, buf_size, 1, fp) != 1)
+				goto out_free;
+
+			ret = read_build_id(buf, buf_size, bid, need_swap);
+			if (ret == 0)
+				ret = bid->size;
+			break;
+		}
+	} else {
+		Elf64_Ehdr ehdr;
+		Elf64_Phdr *phdr;
+
+		if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+			goto out;
+
+		if (need_swap) {
+			ehdr.e_phoff = bswap_64(ehdr.e_phoff);
+			ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+			ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+		}
+
+		buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+		buf = malloc(buf_size);
+		if (buf == NULL)
+			goto out;
+
+		fseek(fp, ehdr.e_phoff, SEEK_SET);
+		if (fread(buf, buf_size, 1, fp) != 1)
+			goto out_free;
+
+		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+			void *tmp;
+			long offset;
+
+			if (need_swap) {
+				phdr->p_type = bswap_32(phdr->p_type);
+				phdr->p_offset = bswap_64(phdr->p_offset);
+				phdr->p_filesz = bswap_64(phdr->p_filesz);
+			}
+
+			if (phdr->p_type != PT_NOTE)
+				continue;
+
+			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
+			tmp = realloc(buf, buf_size);
+			if (tmp == NULL)
+				goto out_free;
+
+			buf = tmp;
+			fseek(fp, offset, SEEK_SET);
+			if (fread(buf, buf_size, 1, fp) != 1)
+				goto out_free;
+
+			ret = read_build_id(buf, buf_size, bid, need_swap);
+			if (ret == 0)
+				ret = bid->size;
+			break;
+		}
+	}
+out_free:
+	free(buf);
+out:
+	fclose(fp);
+	return ret;
+}
+
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
+{
+	int fd;
+	int ret = -1;
+	struct stat stbuf;
+	size_t buf_size;
+	void *buf;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	if (fstat(fd, &stbuf) < 0)
+		goto out;
+
+	buf_size = stbuf.st_size;
+	buf = malloc(buf_size);
+	if (buf == NULL)
+		goto out;
+
+	if (read(fd, buf, buf_size) != (ssize_t) buf_size)
+		goto out_free;
+
+	ret = read_build_id(buf, buf_size, bid, false);
+out_free:
+	free(buf);
+out:
+	close(fd);
+	return ret;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+	         enum dso_binary_type type)
+{
+	int fd = open(name, O_RDONLY);
+	if (fd < 0)
+		goto out_errno;
+
+	ss->name = strdup(name);
+	if (!ss->name)
+		goto out_close;
+
+	ss->fd = fd;
+	ss->type = type;
+
+	return 0;
+out_close:
+	close(fd);
+out_errno:
+	dso->load_errno = errno;
+	return -1;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss __maybe_unused)
+{
+	/* Assume all sym sources could be a runtime image. */
+	return true;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss __maybe_unused)
+{
+	return false;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+	zfree(&ss->name);
+	close(ss->fd);
+}
+
+int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
+				struct symsrc *ss __maybe_unused)
+{
+	return 0;
+}
+
+static int fd__is_64_bit(int fd)
+{
+	u8 e_ident[EI_NIDENT];
+
+	if (lseek(fd, 0, SEEK_SET))
+		return -1;
+
+	if (readn(fd, e_ident, sizeof(e_ident)) != sizeof(e_ident))
+		return -1;
+
+	if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+	    e_ident[EI_VERSION] != EV_CURRENT)
+		return -1;
+
+	return e_ident[EI_CLASS] == ELFCLASS64;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+	Elf64_Ehdr ehdr;
+	int ret;
+
+	ret = fd__is_64_bit(fd);
+	if (ret < 0)
+		return DSO__TYPE_UNKNOWN;
+
+	if (ret)
+		return DSO__TYPE_64BIT;
+
+	if (readn(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
+		return DSO__TYPE_UNKNOWN;
+
+	if (ehdr.e_machine == EM_X86_64)
+		return DSO__TYPE_X32BIT;
+
+	return DSO__TYPE_32BIT;
+}
+
+int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
+		  struct symsrc *ss,
+		  struct symsrc *runtime_ss __maybe_unused,
+		  int kmodule __maybe_unused)
+{
+	struct build_id bid;
+	int ret;
+
+	ret = fd__is_64_bit(ss->fd);
+	if (ret >= 0)
+		dso->is_64_bit = ret;
+
+	if (filename__read_build_id(ss->name, &bid) > 0)
+		dso__set_build_id(dso, &bid);
+	return 0;
+}
+
+int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused,
+		    mapfn_t mapfn __maybe_unused, void *data __maybe_unused,
+		    bool *is_64_bit __maybe_unused)
+{
+	return -1;
+}
+
+int kcore_extract__create(struct kcore_extract *kce __maybe_unused)
+{
+	return -1;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce __maybe_unused)
+{
+}
+
+int kcore_copy(const char *from_dir __maybe_unused,
+	       const char *to_dir __maybe_unused)
+{
+	return -1;
+}
+
+void symbol__elf_init(void)
+{
+}
+
+char *dso__demangle_sym(struct dso *dso __maybe_unused,
+			int kmodule __maybe_unused,
+			const char *elf_name __maybe_unused)
+{
+	return NULL;
+}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
new file mode 100644
index 000000000..98014f937
--- /dev/null
+++ b/tools/perf/util/symbol.c
@@ -0,0 +1,2707 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include "annotate.h"
+#include "build-id.h"
+#include "cap.h"
+#include "dso.h"
+#include "util.h" // lsdir()
+#include "debug.h"
+#include "event.h"
+#include "machine.h"
+#include "map.h"
+#include "symbol.h"
+#include "map_symbol.h"
+#include "mem-events.h"
+#include "symsrc.h"
+#include "strlist.h"
+#include "intlist.h"
+#include "namespaces.h"
+#include "header.h"
+#include "path.h"
+#include <linux/ctype.h>
+#include <linux/zalloc.h>
+
+#include <elf.h>
+#include <limits.h>
+#include <symbol/kallsyms.h>
+#include <sys/utsname.h>
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map);
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
+static bool symbol__is_idle(const char *name);
+
+int vmlinux_path__nr_entries;
+char **vmlinux_path;
+
+struct symbol_conf symbol_conf = {
+	.nanosecs		= false,
+	.use_modules		= true,
+	.try_vmlinux_path	= true,
+	.demangle		= true,
+	.demangle_kernel	= false,
+	.cumulate_callchain	= true,
+	.time_quantum		= 100 * NSEC_PER_MSEC, /* 100ms */
+	.show_hist_headers	= true,
+	.symfs			= "",
+	.event_group		= true,
+	.inline_name		= true,
+	.res_sample		= 0,
+};
+
+static enum dso_binary_type binary_type_symtab[] = {
+	DSO_BINARY_TYPE__KALLSYMS,
+	DSO_BINARY_TYPE__GUEST_KALLSYMS,
+	DSO_BINARY_TYPE__JAVA_JIT,
+	DSO_BINARY_TYPE__DEBUGLINK,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+	DSO_BINARY_TYPE__GUEST_KMODULE,
+	DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
+
+static bool symbol_type__filter(char symbol_type)
+{
+	symbol_type = toupper(symbol_type);
+	return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B';
+}
+
+static int prefix_underscores_count(const char *str)
+{
+	const char *tail = str;
+
+	while (*tail == '_')
+		tail++;
+
+	return tail - str;
+}
+
+const char * __weak arch__normalize_symbol_name(const char *name)
+{
+	return name;
+}
+
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+	return strcmp(namea, nameb);
+}
+
+int __weak arch__compare_symbol_names_n(const char *namea, const char *nameb,
+					unsigned int n)
+{
+	return strncmp(namea, nameb, n);
+}
+
+int __weak arch__choose_best_symbol(struct symbol *syma,
+				    struct symbol *symb __maybe_unused)
+{
+	/* Avoid "SyS" kernel syscall aliases */
+	if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
+		return SYMBOL_B;
+	if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+		return SYMBOL_B;
+
+	return SYMBOL_A;
+}
+
+static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
+{
+	s64 a;
+	s64 b;
+	size_t na, nb;
+
+	/* Prefer a symbol with non zero length */
+	a = syma->end - syma->start;
+	b = symb->end - symb->start;
+	if ((b == 0) && (a > 0))
+		return SYMBOL_A;
+	else if ((a == 0) && (b > 0))
+		return SYMBOL_B;
+
+	/* Prefer a non weak symbol over a weak one */
+	a = syma->binding == STB_WEAK;
+	b = symb->binding == STB_WEAK;
+	if (b && !a)
+		return SYMBOL_A;
+	if (a && !b)
+		return SYMBOL_B;
+
+	/* Prefer a global symbol over a non global one */
+	a = syma->binding == STB_GLOBAL;
+	b = symb->binding == STB_GLOBAL;
+	if (a && !b)
+		return SYMBOL_A;
+	if (b && !a)
+		return SYMBOL_B;
+
+	/* Prefer a symbol with less underscores */
+	a = prefix_underscores_count(syma->name);
+	b = prefix_underscores_count(symb->name);
+	if (b > a)
+		return SYMBOL_A;
+	else if (a > b)
+		return SYMBOL_B;
+
+	/* Choose the symbol with the longest name */
+	na = strlen(syma->name);
+	nb = strlen(symb->name);
+	if (na > nb)
+		return SYMBOL_A;
+	else if (na < nb)
+		return SYMBOL_B;
+
+	return arch__choose_best_symbol(syma, symb);
+}
+
+void symbols__fixup_duplicate(struct rb_root_cached *symbols)
+{
+	struct rb_node *nd;
+	struct symbol *curr, *next;
+
+	if (symbol_conf.allow_aliases)
+		return;
+
+	nd = rb_first_cached(symbols);
+
+	while (nd) {
+		curr = rb_entry(nd, struct symbol, rb_node);
+again:
+		nd = rb_next(&curr->rb_node);
+		next = rb_entry(nd, struct symbol, rb_node);
+
+		if (!nd)
+			break;
+
+		if (curr->start != next->start)
+			continue;
+
+		if (choose_best_symbol(curr, next) == SYMBOL_A) {
+			rb_erase_cached(&next->rb_node, symbols);
+			symbol__delete(next);
+			goto again;
+		} else {
+			nd = rb_next(&curr->rb_node);
+			rb_erase_cached(&curr->rb_node, symbols);
+			symbol__delete(curr);
+		}
+	}
+}
+
+/* Update zero-sized symbols using the address of the next symbol */
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
+{
+	struct rb_node *nd, *prevnd = rb_first_cached(symbols);
+	struct symbol *curr, *prev;
+
+	if (prevnd == NULL)
+		return;
+
+	curr = rb_entry(prevnd, struct symbol, rb_node);
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		prev = curr;
+		curr = rb_entry(nd, struct symbol, rb_node);
+
+		/*
+		 * On some architecture kernel text segment start is located at
+		 * some low memory address, while modules are located at high
+		 * memory addresses (or vice versa).  The gap between end of
+		 * kernel text segment and beginning of first module's text
+		 * segment is very big.  Therefore do not fill this gap and do
+		 * not assign it to the kernel dso map (kallsyms).
+		 *
+		 * In kallsyms, it determines module symbols using '[' character
+		 * like in:
+		 *   ffffffffc1937000 T hdmi_driver_init  [snd_hda_codec_hdmi]
+		 */
+		if (prev->end == prev->start) {
+			/* Last kernel/module symbol mapped to end of page */
+			if (is_kallsyms && (!strchr(prev->name, '[') !=
+					    !strchr(curr->name, '[')))
+				prev->end = roundup(prev->end + 4096, 4096);
+			else
+				prev->end = curr->start;
+
+			pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
+				  __func__, prev->name, prev->end);
+		}
+	}
+
+	/* Last entry */
+	if (curr->end == curr->start)
+		curr->end = roundup(curr->start, 4096) + 4096;
+}
+
+void maps__fixup_end(struct maps *maps)
+{
+	struct map *prev = NULL, *curr;
+
+	down_write(&maps->lock);
+
+	maps__for_each_entry(maps, curr) {
+		if (prev != NULL && !prev->end)
+			prev->end = curr->start;
+
+		prev = curr;
+	}
+
+	/*
+	 * We still haven't the actual symbols, so guess the
+	 * last map final address.
+	 */
+	if (curr && !curr->end)
+		curr->end = ~0ULL;
+
+	up_write(&maps->lock);
+}
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
+{
+	size_t namelen = strlen(name) + 1;
+	struct symbol *sym = calloc(1, (symbol_conf.priv_size +
+					sizeof(*sym) + namelen));
+	if (sym == NULL)
+		return NULL;
+
+	if (symbol_conf.priv_size) {
+		if (symbol_conf.init_annotation) {
+			struct annotation *notes = (void *)sym;
+			annotation__init(notes);
+		}
+		sym = ((void *)sym) + symbol_conf.priv_size;
+	}
+
+	sym->start   = start;
+	sym->end     = len ? start + len : start;
+	sym->type    = type;
+	sym->binding = binding;
+	sym->namelen = namelen - 1;
+
+	pr_debug4("%s: %s %#" PRIx64 "-%#" PRIx64 "\n",
+		  __func__, name, start, sym->end);
+	memcpy(sym->name, name, namelen);
+
+	return sym;
+}
+
+void symbol__delete(struct symbol *sym)
+{
+	if (symbol_conf.priv_size) {
+		if (symbol_conf.init_annotation) {
+			struct annotation *notes = symbol__annotation(sym);
+
+			annotation__exit(notes);
+		}
+	}
+	free(((void *)sym) - symbol_conf.priv_size);
+}
+
+void symbols__delete(struct rb_root_cached *symbols)
+{
+	struct symbol *pos;
+	struct rb_node *next = rb_first_cached(symbols);
+
+	while (next) {
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase_cached(&pos->rb_node, symbols);
+		symbol__delete(pos);
+	}
+}
+
+void __symbols__insert(struct rb_root_cached *symbols,
+		       struct symbol *sym, bool kernel)
+{
+	struct rb_node **p = &symbols->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	const u64 ip = sym->start;
+	struct symbol *s;
+	bool leftmost = true;
+
+	if (kernel) {
+		const char *name = sym->name;
+		/*
+		 * ppc64 uses function descriptors and appends a '.' to the
+		 * start of every instruction address. Remove it.
+		 */
+		if (name[0] == '.')
+			name++;
+		sym->idle = symbol__is_idle(name);
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol, rb_node);
+		if (ip < s->start)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+	rb_link_node(&sym->rb_node, parent, p);
+	rb_insert_color_cached(&sym->rb_node, symbols, leftmost);
+}
+
+void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
+{
+	__symbols__insert(symbols, sym, false);
+}
+
+static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
+{
+	struct rb_node *n;
+
+	if (symbols == NULL)
+		return NULL;
+
+	n = symbols->rb_root.rb_node;
+
+	while (n) {
+		struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+		if (ip < s->start)
+			n = n->rb_left;
+		else if (ip > s->end || (ip == s->end && ip != s->start))
+			n = n->rb_right;
+		else
+			return s;
+	}
+
+	return NULL;
+}
+
+static struct symbol *symbols__first(struct rb_root_cached *symbols)
+{
+	struct rb_node *n = rb_first_cached(symbols);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
+static struct symbol *symbols__last(struct rb_root_cached *symbols)
+{
+	struct rb_node *n = rb_last(&symbols->rb_root);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
+static struct symbol *symbols__next(struct symbol *sym)
+{
+	struct rb_node *n = rb_next(&sym->rb_node);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
+static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym)
+{
+	struct rb_node **p = &symbols->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct symbol_name_rb_node *symn, *s;
+	bool leftmost = true;
+
+	symn = container_of(sym, struct symbol_name_rb_node, sym);
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol_name_rb_node, rb_node);
+		if (strcmp(sym->name, s->sym.name) < 0)
+			p = &(*p)->rb_left;
+		else {
+			p = &(*p)->rb_right;
+			leftmost = false;
+		}
+	}
+	rb_link_node(&symn->rb_node, parent, p);
+	rb_insert_color_cached(&symn->rb_node, symbols, leftmost);
+}
+
+static void symbols__sort_by_name(struct rb_root_cached *symbols,
+				  struct rb_root_cached *source)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		symbols__insert_by_name(symbols, pos);
+	}
+}
+
+int symbol__match_symbol_name(const char *name, const char *str,
+			      enum symbol_tag_include includes)
+{
+	const char *versioning;
+
+	if (includes == SYMBOL_TAG_INCLUDE__DEFAULT_ONLY &&
+	    (versioning = strstr(name, "@@"))) {
+		int len = strlen(str);
+
+		if (len < versioning - name)
+			len = versioning - name;
+
+		return arch__compare_symbol_names_n(name, str, len);
+	} else
+		return arch__compare_symbol_names(name, str);
+}
+
+static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols,
+					    const char *name,
+					    enum symbol_tag_include includes)
+{
+	struct rb_node *n;
+	struct symbol_name_rb_node *s = NULL;
+
+	if (symbols == NULL)
+		return NULL;
+
+	n = symbols->rb_root.rb_node;
+
+	while (n) {
+		int cmp;
+
+		s = rb_entry(n, struct symbol_name_rb_node, rb_node);
+		cmp = symbol__match_symbol_name(s->sym.name, name, includes);
+
+		if (cmp > 0)
+			n = n->rb_left;
+		else if (cmp < 0)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	if (n == NULL)
+		return NULL;
+
+	if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY)
+		/* return first symbol that has same name (if any) */
+		for (n = rb_prev(n); n; n = rb_prev(n)) {
+			struct symbol_name_rb_node *tmp;
+
+			tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
+			if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
+				break;
+
+			s = tmp;
+		}
+
+	return &s->sym;
+}
+
+void dso__reset_find_symbol_cache(struct dso *dso)
+{
+	dso->last_find_result.addr   = 0;
+	dso->last_find_result.symbol = NULL;
+}
+
+void dso__insert_symbol(struct dso *dso, struct symbol *sym)
+{
+	__symbols__insert(&dso->symbols, sym, dso->kernel);
+
+	/* update the symbol cache if necessary */
+	if (dso->last_find_result.addr >= sym->start &&
+	    (dso->last_find_result.addr < sym->end ||
+	    sym->start == sym->end)) {
+		dso->last_find_result.symbol = sym;
+	}
+}
+
+void dso__delete_symbol(struct dso *dso, struct symbol *sym)
+{
+	rb_erase_cached(&sym->rb_node, &dso->symbols);
+	symbol__delete(sym);
+	dso__reset_find_symbol_cache(dso);
+}
+
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
+{
+	if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
+		dso->last_find_result.addr   = addr;
+		dso->last_find_result.symbol = symbols__find(&dso->symbols, addr);
+	}
+
+	return dso->last_find_result.symbol;
+}
+
+struct symbol *dso__first_symbol(struct dso *dso)
+{
+	return symbols__first(&dso->symbols);
+}
+
+struct symbol *dso__last_symbol(struct dso *dso)
+{
+	return symbols__last(&dso->symbols);
+}
+
+struct symbol *dso__next_symbol(struct symbol *sym)
+{
+	return symbols__next(sym);
+}
+
+struct symbol *symbol__next_by_name(struct symbol *sym)
+{
+	struct symbol_name_rb_node *s = container_of(sym, struct symbol_name_rb_node, sym);
+	struct rb_node *n = rb_next(&s->rb_node);
+
+	return n ? &rb_entry(n, struct symbol_name_rb_node, rb_node)->sym : NULL;
+}
+
+ /*
+  * Returns first symbol that matched with @name.
+  */
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name)
+{
+	struct symbol *s = symbols__find_by_name(&dso->symbol_names, name,
+						 SYMBOL_TAG_INCLUDE__NONE);
+	if (!s)
+		s = symbols__find_by_name(&dso->symbol_names, name,
+					  SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
+	return s;
+}
+
+void dso__sort_by_name(struct dso *dso)
+{
+	dso__set_sorted_by_name(dso);
+	return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+static int hex2u64(const char *ptr, u64 *long_val)
+{
+	char *p;
+
+	*long_val = strtoull(ptr, &p, 16);
+
+	return p - ptr;
+}
+
+
+int modules__parse(const char *filename, void *arg,
+		   int (*process_module)(void *arg, const char *name,
+					 u64 start, u64 size))
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	int err = 0;
+
+	file = fopen(filename, "r");
+	if (file == NULL)
+		return -1;
+
+	while (1) {
+		char name[PATH_MAX];
+		u64 start, size;
+		char *sep, *endptr;
+		ssize_t line_len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0) {
+			if (feof(file))
+				break;
+			err = -1;
+			goto out;
+		}
+
+		if (!line) {
+			err = -1;
+			goto out;
+		}
+
+		line[--line_len] = '\0'; /* \n */
+
+		sep = strrchr(line, 'x');
+		if (sep == NULL)
+			continue;
+
+		hex2u64(sep + 1, &start);
+
+		sep = strchr(line, ' ');
+		if (sep == NULL)
+			continue;
+
+		*sep = '\0';
+
+		scnprintf(name, sizeof(name), "[%s]", line);
+
+		size = strtoul(sep + 1, &endptr, 0);
+		if (*endptr != ' ' && *endptr != '\t')
+			continue;
+
+		err = process_module(arg, name, start, size);
+		if (err)
+			break;
+	}
+out:
+	free(line);
+	fclose(file);
+	return err;
+}
+
+/*
+ * These are symbols in the kernel image, so make sure that
+ * sym is from a kernel DSO.
+ */
+static bool symbol__is_idle(const char *name)
+{
+	const char * const idle_symbols[] = {
+		"acpi_idle_do_entry",
+		"acpi_processor_ffh_cstate_enter",
+		"arch_cpu_idle",
+		"cpu_idle",
+		"cpu_startup_entry",
+		"idle_cpu",
+		"intel_idle",
+		"default_idle",
+		"native_safe_halt",
+		"enter_idle",
+		"exit_idle",
+		"mwait_idle",
+		"mwait_idle_with_hints",
+		"mwait_idle_with_hints.constprop.0",
+		"poll_idle",
+		"ppc64_runlatch_off",
+		"pseries_dedicated_idle_sleep",
+		"psw_idle",
+		"psw_idle_exit",
+		NULL
+	};
+	int i;
+	static struct strlist *idle_symbols_list;
+
+	if (idle_symbols_list)
+		return strlist__has_entry(idle_symbols_list, name);
+
+	idle_symbols_list = strlist__new(NULL, NULL);
+
+	for (i = 0; idle_symbols[i]; i++)
+		strlist__add(idle_symbols_list, idle_symbols[i]);
+
+	return strlist__has_entry(idle_symbols_list, name);
+}
+
+static int map__process_kallsym_symbol(void *arg, const char *name,
+				       char type, u64 start)
+{
+	struct symbol *sym;
+	struct dso *dso = arg;
+	struct rb_root_cached *root = &dso->symbols;
+
+	if (!symbol_type__filter(type))
+		return 0;
+
+	/* Ignore local symbols for ARM modules */
+	if (name[0] == '$')
+		return 0;
+
+	/*
+	 * module symbols are not sorted so we add all
+	 * symbols, setting length to 0, and rely on
+	 * symbols__fixup_end() to fix it up.
+	 */
+	sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name);
+	if (sym == NULL)
+		return -ENOMEM;
+	/*
+	 * We will pass the symbols to the filter later, in
+	 * map__split_kallsyms, when we have split the maps per module
+	 */
+	__symbols__insert(root, sym, !strchr(name, '['));
+
+	return 0;
+}
+
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
+{
+	return kallsyms__parse(filename, dso, map__process_kallsym_symbol);
+}
+
+static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso)
+{
+	struct map *curr_map;
+	struct symbol *pos;
+	int count = 0;
+	struct rb_root_cached old_root = dso->symbols;
+	struct rb_root_cached *root = &dso->symbols;
+	struct rb_node *next = rb_first_cached(root);
+
+	if (!kmaps)
+		return -1;
+
+	*root = RB_ROOT_CACHED;
+
+	while (next) {
+		char *module;
+
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		rb_erase_cached(&pos->rb_node, &old_root);
+		RB_CLEAR_NODE(&pos->rb_node);
+		module = strchr(pos->name, '\t');
+		if (module)
+			*module = '\0';
+
+		curr_map = maps__find(kmaps, pos->start);
+
+		if (!curr_map) {
+			symbol__delete(pos);
+			continue;
+		}
+
+		pos->start -= curr_map->start - curr_map->pgoff;
+		if (pos->end > curr_map->end)
+			pos->end = curr_map->end;
+		if (pos->end)
+			pos->end -= curr_map->start - curr_map->pgoff;
+		symbols__insert(&curr_map->dso->symbols, pos);
+		++count;
+	}
+
+	/* Symbols have been adjusted */
+	dso->adjust_symbols = 1;
+
+	return count;
+}
+
+/*
+ * Split the symbols into maps, making sure there are no overlaps, i.e. the
+ * kernel range is broken in several maps, named [kernel].N, as we don't have
+ * the original ELF section names vmlinux have.
+ */
+static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
+				struct map *initial_map)
+{
+	struct machine *machine;
+	struct map *curr_map = initial_map;
+	struct symbol *pos;
+	int count = 0, moved = 0;
+	struct rb_root_cached *root = &dso->symbols;
+	struct rb_node *next = rb_first_cached(root);
+	int kernel_range = 0;
+	bool x86_64;
+
+	if (!kmaps)
+		return -1;
+
+	machine = kmaps->machine;
+
+	x86_64 = machine__is(machine, "x86_64");
+
+	while (next) {
+		char *module;
+
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		module = strchr(pos->name, '\t');
+		if (module) {
+			if (!symbol_conf.use_modules)
+				goto discard_symbol;
+
+			*module++ = '\0';
+
+			if (strcmp(curr_map->dso->short_name, module)) {
+				if (curr_map != initial_map &&
+				    dso->kernel == DSO_SPACE__KERNEL_GUEST &&
+				    machine__is_default_guest(machine)) {
+					/*
+					 * We assume all symbols of a module are
+					 * continuous in * kallsyms, so curr_map
+					 * points to a module and all its
+					 * symbols are in its kmap. Mark it as
+					 * loaded.
+					 */
+					dso__set_loaded(curr_map->dso);
+				}
+
+				curr_map = maps__find_by_name(kmaps, module);
+				if (curr_map == NULL) {
+					pr_debug("%s/proc/{kallsyms,modules} "
+					         "inconsistency while looking "
+						 "for \"%s\" module!\n",
+						 machine->root_dir, module);
+					curr_map = initial_map;
+					goto discard_symbol;
+				}
+
+				if (curr_map->dso->loaded &&
+				    !machine__is_default_guest(machine))
+					goto discard_symbol;
+			}
+			/*
+			 * So that we look just like we get from .ko files,
+			 * i.e. not prelinked, relative to initial_map->start.
+			 */
+			pos->start = curr_map->map_ip(curr_map, pos->start);
+			pos->end   = curr_map->map_ip(curr_map, pos->end);
+		} else if (x86_64 && is_entry_trampoline(pos->name)) {
+			/*
+			 * These symbols are not needed anymore since the
+			 * trampoline maps refer to the text section and it's
+			 * symbols instead. Avoid having to deal with
+			 * relocations, and the assumption that the first symbol
+			 * is the start of kernel text, by simply removing the
+			 * symbols at this point.
+			 */
+			goto discard_symbol;
+		} else if (curr_map != initial_map) {
+			char dso_name[PATH_MAX];
+			struct dso *ndso;
+
+			if (delta) {
+				/* Kernel was relocated at boot time */
+				pos->start -= delta;
+				pos->end -= delta;
+			}
+
+			if (count == 0) {
+				curr_map = initial_map;
+				goto add_symbol;
+			}
+
+			if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+				snprintf(dso_name, sizeof(dso_name),
+					"[guest.kernel].%d",
+					kernel_range++);
+			else
+				snprintf(dso_name, sizeof(dso_name),
+					"[kernel].%d",
+					kernel_range++);
+
+			ndso = dso__new(dso_name);
+			if (ndso == NULL)
+				return -1;
+
+			ndso->kernel = dso->kernel;
+
+			curr_map = map__new2(pos->start, ndso);
+			if (curr_map == NULL) {
+				dso__put(ndso);
+				return -1;
+			}
+
+			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+			maps__insert(kmaps, curr_map);
+			++kernel_range;
+		} else if (delta) {
+			/* Kernel was relocated at boot time */
+			pos->start -= delta;
+			pos->end -= delta;
+		}
+add_symbol:
+		if (curr_map != initial_map) {
+			rb_erase_cached(&pos->rb_node, root);
+			symbols__insert(&curr_map->dso->symbols, pos);
+			++moved;
+		} else
+			++count;
+
+		continue;
+discard_symbol:
+		rb_erase_cached(&pos->rb_node, root);
+		symbol__delete(pos);
+	}
+
+	if (curr_map != initial_map &&
+	    dso->kernel == DSO_SPACE__KERNEL_GUEST &&
+	    machine__is_default_guest(kmaps->machine)) {
+		dso__set_loaded(curr_map->dso);
+	}
+
+	return count + moved;
+}
+
+bool symbol__restricted_filename(const char *filename,
+				 const char *restricted_filename)
+{
+	bool restricted = false;
+
+	if (symbol_conf.kptr_restrict) {
+		char *r = realpath(filename, NULL);
+
+		if (r != NULL) {
+			restricted = strcmp(r, restricted_filename) == 0;
+			free(r);
+			return restricted;
+		}
+	}
+
+	return restricted;
+}
+
+struct module_info {
+	struct rb_node rb_node;
+	char *name;
+	u64 start;
+};
+
+static void add_module(struct module_info *mi, struct rb_root *modules)
+{
+	struct rb_node **p = &modules->rb_node;
+	struct rb_node *parent = NULL;
+	struct module_info *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct module_info, rb_node);
+		if (strcmp(mi->name, m->name) < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&mi->rb_node, parent, p);
+	rb_insert_color(&mi->rb_node, modules);
+}
+
+static void delete_modules(struct rb_root *modules)
+{
+	struct module_info *mi;
+	struct rb_node *next = rb_first(modules);
+
+	while (next) {
+		mi = rb_entry(next, struct module_info, rb_node);
+		next = rb_next(&mi->rb_node);
+		rb_erase(&mi->rb_node, modules);
+		zfree(&mi->name);
+		free(mi);
+	}
+}
+
+static struct module_info *find_module(const char *name,
+				       struct rb_root *modules)
+{
+	struct rb_node *n = modules->rb_node;
+
+	while (n) {
+		struct module_info *m;
+		int cmp;
+
+		m = rb_entry(n, struct module_info, rb_node);
+		cmp = strcmp(name, m->name);
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return m;
+	}
+
+	return NULL;
+}
+
+static int __read_proc_modules(void *arg, const char *name, u64 start,
+			       u64 size __maybe_unused)
+{
+	struct rb_root *modules = arg;
+	struct module_info *mi;
+
+	mi = zalloc(sizeof(struct module_info));
+	if (!mi)
+		return -ENOMEM;
+
+	mi->name = strdup(name);
+	mi->start = start;
+
+	if (!mi->name) {
+		free(mi);
+		return -ENOMEM;
+	}
+
+	add_module(mi, modules);
+
+	return 0;
+}
+
+static int read_proc_modules(const char *filename, struct rb_root *modules)
+{
+	if (symbol__restricted_filename(filename, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(filename, modules, __read_proc_modules)) {
+		delete_modules(modules);
+		return -1;
+	}
+
+	return 0;
+}
+
+int compare_proc_modules(const char *from, const char *to)
+{
+	struct rb_root from_modules = RB_ROOT;
+	struct rb_root to_modules = RB_ROOT;
+	struct rb_node *from_node, *to_node;
+	struct module_info *from_m, *to_m;
+	int ret = -1;
+
+	if (read_proc_modules(from, &from_modules))
+		return -1;
+
+	if (read_proc_modules(to, &to_modules))
+		goto out_delete_from;
+
+	from_node = rb_first(&from_modules);
+	to_node = rb_first(&to_modules);
+	while (from_node) {
+		if (!to_node)
+			break;
+
+		from_m = rb_entry(from_node, struct module_info, rb_node);
+		to_m = rb_entry(to_node, struct module_info, rb_node);
+
+		if (from_m->start != to_m->start ||
+		    strcmp(from_m->name, to_m->name))
+			break;
+
+		from_node = rb_next(from_node);
+		to_node = rb_next(to_node);
+	}
+
+	if (!from_node && !to_node)
+		ret = 0;
+
+	delete_modules(&to_modules);
+out_delete_from:
+	delete_modules(&from_modules);
+
+	return ret;
+}
+
+static int do_validate_kcore_modules(const char *filename, struct maps *kmaps)
+{
+	struct rb_root modules = RB_ROOT;
+	struct map *old_map;
+	int err;
+
+	err = read_proc_modules(filename, &modules);
+	if (err)
+		return err;
+
+	maps__for_each_entry(kmaps, old_map) {
+		struct module_info *mi;
+
+		if (!__map__is_kmodule(old_map)) {
+			continue;
+		}
+
+		/* Module must be in memory at the same address */
+		mi = find_module(old_map->dso->short_name, &modules);
+		if (!mi || mi->start != old_map->start) {
+			err = -EINVAL;
+			goto out;
+		}
+	}
+out:
+	delete_modules(&modules);
+	return err;
+}
+
+/*
+ * If kallsyms is referenced by name then we look for filename in the same
+ * directory.
+ */
+static bool filename_from_kallsyms_filename(char *filename,
+					    const char *base_name,
+					    const char *kallsyms_filename)
+{
+	char *name;
+
+	strcpy(filename, kallsyms_filename);
+	name = strrchr(filename, '/');
+	if (!name)
+		return false;
+
+	name += 1;
+
+	if (!strcmp(name, "kallsyms")) {
+		strcpy(name, base_name);
+		return true;
+	}
+
+	return false;
+}
+
+static int validate_kcore_modules(const char *kallsyms_filename,
+				  struct map *map)
+{
+	struct maps *kmaps = map__kmaps(map);
+	char modules_filename[PATH_MAX];
+
+	if (!kmaps)
+		return -EINVAL;
+
+	if (!filename_from_kallsyms_filename(modules_filename, "modules",
+					     kallsyms_filename))
+		return -EINVAL;
+
+	if (do_validate_kcore_modules(modules_filename, kmaps))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int validate_kcore_addresses(const char *kallsyms_filename,
+				    struct map *map)
+{
+	struct kmap *kmap = map__kmap(map);
+
+	if (!kmap)
+		return -EINVAL;
+
+	if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
+		u64 start;
+
+		if (kallsyms__get_function_start(kallsyms_filename,
+						 kmap->ref_reloc_sym->name, &start))
+			return -ENOENT;
+		if (start != kmap->ref_reloc_sym->addr)
+			return -EINVAL;
+	}
+
+	return validate_kcore_modules(kallsyms_filename, map);
+}
+
+struct kcore_mapfn_data {
+	struct dso *dso;
+	struct list_head maps;
+};
+
+static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_mapfn_data *md = data;
+	struct map *map;
+
+	map = map__new2(start, md->dso);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->end = map->start + len;
+	map->pgoff = pgoff;
+
+	list_add(&map->node, &md->maps);
+
+	return 0;
+}
+
+/*
+ * Merges map into maps by splitting the new map within the existing map
+ * regions.
+ */
+int maps__merge_in(struct maps *kmaps, struct map *new_map)
+{
+	struct map *old_map;
+	LIST_HEAD(merged);
+
+	maps__for_each_entry(kmaps, old_map) {
+		/* no overload with this one */
+		if (new_map->end < old_map->start ||
+		    new_map->start >= old_map->end)
+			continue;
+
+		if (new_map->start < old_map->start) {
+			/*
+			 * |new......
+			 *       |old....
+			 */
+			if (new_map->end < old_map->end) {
+				/*
+				 * |new......|     -> |new..|
+				 *       |old....| ->       |old....|
+				 */
+				new_map->end = old_map->start;
+			} else {
+				/*
+				 * |new.............| -> |new..|       |new..|
+				 *       |old....|    ->       |old....|
+				 */
+				struct map *m = map__clone(new_map);
+
+				if (!m)
+					return -ENOMEM;
+
+				m->end = old_map->start;
+				list_add_tail(&m->node, &merged);
+				new_map->pgoff += old_map->end - new_map->start;
+				new_map->start = old_map->end;
+			}
+		} else {
+			/*
+			 *      |new......
+			 * |old....
+			 */
+			if (new_map->end < old_map->end) {
+				/*
+				 *      |new..|   -> x
+				 * |old.........| -> |old.........|
+				 */
+				map__put(new_map);
+				new_map = NULL;
+				break;
+			} else {
+				/*
+				 *      |new......| ->         |new...|
+				 * |old....|        -> |old....|
+				 */
+				new_map->pgoff += old_map->end - new_map->start;
+				new_map->start = old_map->end;
+			}
+		}
+	}
+
+	while (!list_empty(&merged)) {
+		old_map = list_entry(merged.next, struct map, node);
+		list_del_init(&old_map->node);
+		maps__insert(kmaps, old_map);
+		map__put(old_map);
+	}
+
+	if (new_map) {
+		maps__insert(kmaps, new_map);
+		map__put(new_map);
+	}
+	return 0;
+}
+
+static int dso__load_kcore(struct dso *dso, struct map *map,
+			   const char *kallsyms_filename)
+{
+	struct maps *kmaps = map__kmaps(map);
+	struct kcore_mapfn_data md;
+	struct map *old_map, *new_map, *replacement_map = NULL, *next;
+	struct machine *machine;
+	bool is_64_bit;
+	int err, fd;
+	char kcore_filename[PATH_MAX];
+	u64 stext;
+
+	if (!kmaps)
+		return -EINVAL;
+
+	machine = kmaps->machine;
+
+	/* This function requires that the map is the kernel map */
+	if (!__map__is_kernel(map))
+		return -EINVAL;
+
+	if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
+					     kallsyms_filename))
+		return -EINVAL;
+
+	/* Modules and kernel must be present at their original addresses */
+	if (validate_kcore_addresses(kallsyms_filename, map))
+		return -EINVAL;
+
+	md.dso = dso;
+	INIT_LIST_HEAD(&md.maps);
+
+	fd = open(kcore_filename, O_RDONLY);
+	if (fd < 0) {
+		pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n",
+			 kcore_filename);
+		return -EINVAL;
+	}
+
+	/* Read new maps into temporary lists */
+	err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md,
+			      &is_64_bit);
+	if (err)
+		goto out_err;
+	dso->is_64_bit = is_64_bit;
+
+	if (list_empty(&md.maps)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	/* Remove old maps */
+	maps__for_each_entry_safe(kmaps, old_map, next) {
+		/*
+		 * We need to preserve eBPF maps even if they are
+		 * covered by kcore, because we need to access
+		 * eBPF dso for source data.
+		 */
+		if (old_map != map && !__map__is_bpf_prog(old_map))
+			maps__remove(kmaps, old_map);
+	}
+	machine->trampolines_mapped = false;
+
+	/* Find the kernel map using the '_stext' symbol */
+	if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
+		u64 replacement_size = 0;
+
+		list_for_each_entry(new_map, &md.maps, node) {
+			u64 new_size = new_map->end - new_map->start;
+
+			if (!(stext >= new_map->start && stext < new_map->end))
+				continue;
+
+			/*
+			 * On some architectures, ARM64 for example, the kernel
+			 * text can get allocated inside of the vmalloc segment.
+			 * Select the smallest matching segment, in case stext
+			 * falls within more than one in the list.
+			 */
+			if (!replacement_map || new_size < replacement_size) {
+				replacement_map = new_map;
+				replacement_size = new_size;
+			}
+		}
+	}
+
+	if (!replacement_map)
+		replacement_map = list_entry(md.maps.next, struct map, node);
+
+	/* Add new maps */
+	while (!list_empty(&md.maps)) {
+		new_map = list_entry(md.maps.next, struct map, node);
+		list_del_init(&new_map->node);
+		if (new_map == replacement_map) {
+			map->start	= new_map->start;
+			map->end	= new_map->end;
+			map->pgoff	= new_map->pgoff;
+			map->map_ip	= new_map->map_ip;
+			map->unmap_ip	= new_map->unmap_ip;
+			/* Ensure maps are correctly ordered */
+			map__get(map);
+			maps__remove(kmaps, map);
+			maps__insert(kmaps, map);
+			map__put(map);
+			map__put(new_map);
+		} else {
+			/*
+			 * Merge kcore map into existing maps,
+			 * and ensure that current maps (eBPF)
+			 * stay intact.
+			 */
+			if (maps__merge_in(kmaps, new_map))
+				goto out_err;
+		}
+	}
+
+	if (machine__is(machine, "x86_64")) {
+		u64 addr;
+
+		/*
+		 * If one of the corresponding symbols is there, assume the
+		 * entry trampoline maps are too.
+		 */
+		if (!kallsyms__get_function_start(kallsyms_filename,
+						  ENTRY_TRAMPOLINE_NAME,
+						  &addr))
+			machine->trampolines_mapped = true;
+	}
+
+	/*
+	 * Set the data type and long name so that kcore can be read via
+	 * dso__data_read_addr().
+	 */
+	if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+		dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE;
+	else
+		dso->binary_type = DSO_BINARY_TYPE__KCORE;
+	dso__set_long_name(dso, strdup(kcore_filename), true);
+
+	close(fd);
+
+	if (map->prot & PROT_EXEC)
+		pr_debug("Using %s for kernel object code\n", kcore_filename);
+	else
+		pr_debug("Using %s for kernel data\n", kcore_filename);
+
+	return 0;
+
+out_err:
+	while (!list_empty(&md.maps)) {
+		map = list_entry(md.maps.next, struct map, node);
+		list_del_init(&map->node);
+		map__put(map);
+	}
+	close(fd);
+	return -EINVAL;
+}
+
+/*
+ * If the kernel is relocated at boot time, kallsyms won't match.  Compute the
+ * delta based on the relocation reference symbol.
+ */
+static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta)
+{
+	u64 addr;
+
+	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
+		return 0;
+
+	if (kallsyms__get_function_start(filename, kmap->ref_reloc_sym->name, &addr))
+		return -1;
+
+	*delta = addr - kmap->ref_reloc_sym->addr;
+	return 0;
+}
+
+int __dso__load_kallsyms(struct dso *dso, const char *filename,
+			 struct map *map, bool no_kcore)
+{
+	struct kmap *kmap = map__kmap(map);
+	u64 delta = 0;
+
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return -1;
+
+	if (!kmap || !kmap->kmaps)
+		return -1;
+
+	if (dso__load_all_kallsyms(dso, filename) < 0)
+		return -1;
+
+	if (kallsyms__delta(kmap, filename, &delta))
+		return -1;
+
+	symbols__fixup_end(&dso->symbols, true);
+	symbols__fixup_duplicate(&dso->symbols);
+
+	if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
+
+	if (!no_kcore && !dso__load_kcore(dso, map, filename))
+		return maps__split_kallsyms_for_kcore(kmap->kmaps, dso);
+	else
+		return maps__split_kallsyms(kmap->kmaps, dso, delta, map);
+}
+
+int dso__load_kallsyms(struct dso *dso, const char *filename,
+		       struct map *map)
+{
+	return __dso__load_kallsyms(dso, filename, map, false);
+}
+
+static int dso__load_perf_map(const char *map_path, struct dso *dso)
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	int nr_syms = 0;
+
+	file = fopen(map_path, "r");
+	if (file == NULL)
+		goto out_failure;
+
+	while (!feof(file)) {
+		u64 start, size;
+		struct symbol *sym;
+		int line_len, len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		len = hex2u64(line, &start);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		len += hex2u64(line + len, &size);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len);
+
+		if (sym == NULL)
+			goto out_delete_line;
+
+		symbols__insert(&dso->symbols, sym);
+		nr_syms++;
+	}
+
+	free(line);
+	fclose(file);
+
+	return nr_syms;
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+
+static int bfd_symbols__cmpvalue(const void *a, const void *b)
+{
+	const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b;
+
+	if (bfd_asymbol_value(as) != bfd_asymbol_value(bs))
+		return bfd_asymbol_value(as) - bfd_asymbol_value(bs);
+
+	return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0];
+}
+
+static int bfd2elf_binding(asymbol *symbol)
+{
+	if (symbol->flags & BSF_WEAK)
+		return STB_WEAK;
+	if (symbol->flags & BSF_GLOBAL)
+		return STB_GLOBAL;
+	if (symbol->flags & BSF_LOCAL)
+		return STB_LOCAL;
+	return -1;
+}
+
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
+{
+	int err = -1;
+	long symbols_size, symbols_count, i;
+	asection *section;
+	asymbol **symbols, *sym;
+	struct symbol *symbol;
+	bfd *abfd;
+	u64 start, len;
+
+	abfd = bfd_openr(debugfile, NULL);
+	if (!abfd)
+		return -1;
+
+	if (!bfd_check_format(abfd, bfd_object)) {
+		pr_debug2("%s: cannot read %s bfd file.\n", __func__,
+			  dso->long_name);
+		goto out_close;
+	}
+
+	if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
+		goto out_close;
+
+	symbols_size = bfd_get_symtab_upper_bound(abfd);
+	if (symbols_size == 0) {
+		bfd_close(abfd);
+		return 0;
+	}
+
+	if (symbols_size < 0)
+		goto out_close;
+
+	symbols = malloc(symbols_size);
+	if (!symbols)
+		goto out_close;
+
+	symbols_count = bfd_canonicalize_symtab(abfd, symbols);
+	if (symbols_count < 0)
+		goto out_free;
+
+	section = bfd_get_section_by_name(abfd, ".text");
+	if (section) {
+		for (i = 0; i < symbols_count; ++i) {
+			if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") ||
+			    !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__"))
+				break;
+		}
+		if (i < symbols_count) {
+			/* PE symbols can only have 4 bytes, so use .text high bits */
+			dso->text_offset = section->vma - (u32)section->vma;
+			dso->text_offset += (u32)bfd_asymbol_value(symbols[i]);
+		} else {
+			dso->text_offset = section->vma - section->filepos;
+		}
+	}
+
+	qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
+
+#ifdef bfd_get_section
+#define bfd_asymbol_section bfd_get_section
+#endif
+	for (i = 0; i < symbols_count; ++i) {
+		sym = symbols[i];
+		section = bfd_asymbol_section(sym);
+		if (bfd2elf_binding(sym) < 0)
+			continue;
+
+		while (i + 1 < symbols_count &&
+		       bfd_asymbol_section(symbols[i + 1]) == section &&
+		       bfd2elf_binding(symbols[i + 1]) < 0)
+			i++;
+
+		if (i + 1 < symbols_count &&
+		    bfd_asymbol_section(symbols[i + 1]) == section)
+			len = symbols[i + 1]->value - sym->value;
+		else
+			len = section->size - sym->value;
+
+		start = bfd_asymbol_value(sym) - dso->text_offset;
+		symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC,
+				     bfd_asymbol_name(sym));
+		if (!symbol)
+			goto out_free;
+
+		symbols__insert(&dso->symbols, symbol);
+	}
+#ifdef bfd_get_section
+#undef bfd_asymbol_section
+#endif
+
+	symbols__fixup_end(&dso->symbols, false);
+	symbols__fixup_duplicate(&dso->symbols);
+	dso->adjust_symbols = 1;
+
+	err = 0;
+out_free:
+	free(symbols);
+out_close:
+	bfd_close(abfd);
+	return err;
+}
+#endif
+
+static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
+					   enum dso_binary_type type)
+{
+	switch (type) {
+	case DSO_BINARY_TYPE__JAVA_JIT:
+	case DSO_BINARY_TYPE__DEBUGLINK:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+	case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+	case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+	case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+		return !kmod && dso->kernel == DSO_SPACE__USER;
+
+	case DSO_BINARY_TYPE__KALLSYMS:
+	case DSO_BINARY_TYPE__VMLINUX:
+	case DSO_BINARY_TYPE__KCORE:
+		return dso->kernel == DSO_SPACE__KERNEL;
+
+	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+	case DSO_BINARY_TYPE__GUEST_VMLINUX:
+	case DSO_BINARY_TYPE__GUEST_KCORE:
+		return dso->kernel == DSO_SPACE__KERNEL_GUEST;
+
+	case DSO_BINARY_TYPE__GUEST_KMODULE:
+	case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+		/*
+		 * kernel modules know their symtab type - it's set when
+		 * creating a module dso in machine__addnew_module_map().
+		 */
+		return kmod && dso->symtab_type == type;
+
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+		return true;
+
+	case DSO_BINARY_TYPE__BPF_PROG_INFO:
+	case DSO_BINARY_TYPE__BPF_IMAGE:
+	case DSO_BINARY_TYPE__OOL:
+	case DSO_BINARY_TYPE__NOT_FOUND:
+	default:
+		return false;
+	}
+}
+
+/* Checks for the existence of the perf-<pid>.map file in two different
+ * locations.  First, if the process is a separate mount namespace, check in
+ * that namespace using the pid of the innermost pid namespace.  If's not in a
+ * namespace, or the file can't be found there, try in the mount namespace of
+ * the tracing process using our view of its pid.
+ */
+static int dso__find_perf_map(char *filebuf, size_t bufsz,
+			      struct nsinfo **nsip)
+{
+	struct nscookie nsc;
+	struct nsinfo *nsi;
+	struct nsinfo *nnsi;
+	int rc = -1;
+
+	nsi = *nsip;
+
+	if (nsinfo__need_setns(nsi)) {
+		snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__nstgid(nsi));
+		nsinfo__mountns_enter(nsi, &nsc);
+		rc = access(filebuf, R_OK);
+		nsinfo__mountns_exit(&nsc);
+		if (rc == 0)
+			return rc;
+	}
+
+	nnsi = nsinfo__copy(nsi);
+	if (nnsi) {
+		nsinfo__put(nsi);
+
+		nsinfo__clear_need_setns(nnsi);
+		snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__tgid(nnsi));
+		*nsip = nnsi;
+		rc = 0;
+	}
+
+	return rc;
+}
+
+int dso__load(struct dso *dso, struct map *map)
+{
+	char *name;
+	int ret = -1;
+	u_int i;
+	struct machine *machine = NULL;
+	char *root_dir = (char *) "";
+	int ss_pos = 0;
+	struct symsrc ss_[2];
+	struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
+	bool kmod;
+	bool perfmap;
+	struct build_id bid;
+	struct nscookie nsc;
+	char newmapname[PATH_MAX];
+	const char *map_path = dso->long_name;
+
+	mutex_lock(&dso->lock);
+	perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0;
+	if (perfmap) {
+		if (dso->nsinfo && (dso__find_perf_map(newmapname,
+		    sizeof(newmapname), &dso->nsinfo) == 0)) {
+			map_path = newmapname;
+		}
+	}
+
+	nsinfo__mountns_enter(dso->nsinfo, &nsc);
+
+	/* check again under the dso->lock */
+	if (dso__loaded(dso)) {
+		ret = 1;
+		goto out;
+	}
+
+	kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+		dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+	if (dso->kernel && !kmod) {
+		if (dso->kernel == DSO_SPACE__KERNEL)
+			ret = dso__load_kernel_sym(dso, map);
+		else if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+			ret = dso__load_guest_kernel_sym(dso, map);
+
+		machine = map__kmaps(map)->machine;
+		if (machine__is(machine, "x86_64"))
+			machine__map_x86_64_entry_trampolines(machine, dso);
+		goto out;
+	}
+
+	dso->adjust_symbols = 0;
+
+	if (perfmap) {
+		ret = dso__load_perf_map(map_path, dso);
+		dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
+					     DSO_BINARY_TYPE__NOT_FOUND;
+		goto out;
+	}
+
+	if (machine)
+		root_dir = machine->root_dir;
+
+	name = malloc(PATH_MAX);
+	if (!name)
+		goto out;
+
+	/*
+	 * Read the build id if possible. This is required for
+	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
+	 */
+	if (!dso->has_build_id &&
+	    is_regular_file(dso->long_name)) {
+	    __symbol__join_symfs(name, PATH_MAX, dso->long_name);
+		if (filename__read_build_id(name, &bid) > 0)
+			dso__set_build_id(dso, &bid);
+	}
+
+	/*
+	 * Iterate over candidate debug images.
+	 * Keep track of "interesting" ones (those which have a symtab, dynsym,
+	 * and/or opd section) for processing.
+	 */
+	for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) {
+		struct symsrc *ss = &ss_[ss_pos];
+		bool next_slot = false;
+		bool is_reg;
+		bool nsexit;
+		int bfdrc = -1;
+		int sirc = -1;
+
+		enum dso_binary_type symtab_type = binary_type_symtab[i];
+
+		nsexit = (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE ||
+		    symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO);
+
+		if (!dso__is_compatible_symtab_type(dso, kmod, symtab_type))
+			continue;
+
+		if (dso__read_binary_type_filename(dso, symtab_type,
+						   root_dir, name, PATH_MAX))
+			continue;
+
+		if (nsexit)
+			nsinfo__mountns_exit(&nsc);
+
+		is_reg = is_regular_file(name);
+		if (!is_reg && errno == ENOENT && dso->nsinfo) {
+			char *new_name = filename_with_chroot(dso->nsinfo->pid,
+							      name);
+			if (new_name) {
+				is_reg = is_regular_file(new_name);
+				strlcpy(name, new_name, PATH_MAX);
+				free(new_name);
+			}
+		}
+
+#ifdef HAVE_LIBBFD_SUPPORT
+		if (is_reg)
+			bfdrc = dso__load_bfd_symbols(dso, name);
+#endif
+		if (is_reg && bfdrc < 0)
+			sirc = symsrc__init(ss, dso, name, symtab_type);
+
+		if (nsexit)
+			nsinfo__mountns_enter(dso->nsinfo, &nsc);
+
+		if (bfdrc == 0) {
+			ret = 0;
+			break;
+		}
+
+		if (!is_reg || sirc < 0)
+			continue;
+
+		if (!syms_ss && symsrc__has_symtab(ss)) {
+			syms_ss = ss;
+			next_slot = true;
+			if (!dso->symsrc_filename)
+				dso->symsrc_filename = strdup(name);
+		}
+
+		if (!runtime_ss && symsrc__possibly_runtime(ss)) {
+			runtime_ss = ss;
+			next_slot = true;
+		}
+
+		if (next_slot) {
+			ss_pos++;
+
+			if (syms_ss && runtime_ss)
+				break;
+		} else {
+			symsrc__destroy(ss);
+		}
+
+	}
+
+	if (!runtime_ss && !syms_ss)
+		goto out_free;
+
+	if (runtime_ss && !syms_ss) {
+		syms_ss = runtime_ss;
+	}
+
+	/* We'll have to hope for the best */
+	if (!runtime_ss && syms_ss)
+		runtime_ss = syms_ss;
+
+	if (syms_ss)
+		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
+	else
+		ret = -1;
+
+	if (ret > 0) {
+		int nr_plt;
+
+		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss);
+		if (nr_plt > 0)
+			ret += nr_plt;
+	}
+
+	for (; ss_pos > 0; ss_pos--)
+		symsrc__destroy(&ss_[ss_pos - 1]);
+out_free:
+	free(name);
+	if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
+		ret = 0;
+out:
+	dso__set_loaded(dso);
+	mutex_unlock(&dso->lock);
+	nsinfo__mountns_exit(&nsc);
+
+	return ret;
+}
+
+static int map__strcmp(const void *a, const void *b)
+{
+	const struct map *ma = *(const struct map **)a, *mb = *(const struct map **)b;
+	return strcmp(ma->dso->short_name, mb->dso->short_name);
+}
+
+static int map__strcmp_name(const void *name, const void *b)
+{
+	const struct map *map = *(const struct map **)b;
+	return strcmp(name, map->dso->short_name);
+}
+
+void __maps__sort_by_name(struct maps *maps)
+{
+	qsort(maps->maps_by_name, maps->nr_maps, sizeof(struct map *), map__strcmp);
+}
+
+static int map__groups__sort_by_name_from_rbtree(struct maps *maps)
+{
+	struct map *map;
+	struct map **maps_by_name = realloc(maps->maps_by_name, maps->nr_maps * sizeof(map));
+	int i = 0;
+
+	if (maps_by_name == NULL)
+		return -1;
+
+	maps->maps_by_name = maps_by_name;
+	maps->nr_maps_allocated = maps->nr_maps;
+
+	maps__for_each_entry(maps, map)
+		maps_by_name[i++] = map;
+
+	__maps__sort_by_name(maps);
+	return 0;
+}
+
+static struct map *__maps__find_by_name(struct maps *maps, const char *name)
+{
+	struct map **mapp;
+
+	if (maps->maps_by_name == NULL &&
+	    map__groups__sort_by_name_from_rbtree(maps))
+		return NULL;
+
+	mapp = bsearch(name, maps->maps_by_name, maps->nr_maps, sizeof(*mapp), map__strcmp_name);
+	if (mapp)
+		return *mapp;
+	return NULL;
+}
+
+struct map *maps__find_by_name(struct maps *maps, const char *name)
+{
+	struct map *map;
+
+	down_read(&maps->lock);
+
+	if (maps->last_search_by_name && strcmp(maps->last_search_by_name->dso->short_name, name) == 0) {
+		map = maps->last_search_by_name;
+		goto out_unlock;
+	}
+	/*
+	 * If we have maps->maps_by_name, then the name isn't in the rbtree,
+	 * as maps->maps_by_name mirrors the rbtree when lookups by name are
+	 * made.
+	 */
+	map = __maps__find_by_name(maps, name);
+	if (map || maps->maps_by_name != NULL)
+		goto out_unlock;
+
+	/* Fallback to traversing the rbtree... */
+	maps__for_each_entry(maps, map)
+		if (strcmp(map->dso->short_name, name) == 0) {
+			maps->last_search_by_name = map;
+			goto out_unlock;
+		}
+
+	map = NULL;
+
+out_unlock:
+	up_read(&maps->lock);
+	return map;
+}
+
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+		      const char *vmlinux, bool vmlinux_allocated)
+{
+	int err = -1;
+	struct symsrc ss;
+	char symfs_vmlinux[PATH_MAX];
+	enum dso_binary_type symtab_type;
+
+	if (vmlinux[0] == '/')
+		snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s", vmlinux);
+	else
+		symbol__join_symfs(symfs_vmlinux, vmlinux);
+
+	if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+		symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+	else
+		symtab_type = DSO_BINARY_TYPE__VMLINUX;
+
+	if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
+		return -1;
+
+	err = dso__load_sym(dso, map, &ss, &ss, 0);
+	symsrc__destroy(&ss);
+
+	if (err > 0) {
+		if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+			dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+		else
+			dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
+		dso__set_long_name(dso, vmlinux, vmlinux_allocated);
+		dso__set_loaded(dso);
+		pr_debug("Using %s for symbols\n", symfs_vmlinux);
+	}
+
+	return err;
+}
+
+int dso__load_vmlinux_path(struct dso *dso, struct map *map)
+{
+	int i, err = 0;
+	char *filename = NULL;
+
+	pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+		 vmlinux_path__nr_entries + 1);
+
+	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+		err = dso__load_vmlinux(dso, map, vmlinux_path[i], false);
+		if (err > 0)
+			goto out;
+	}
+
+	if (!symbol_conf.ignore_vmlinux_buildid)
+		filename = dso__build_id_filename(dso, NULL, 0, false);
+	if (filename != NULL) {
+		err = dso__load_vmlinux(dso, map, filename, true);
+		if (err > 0)
+			goto out;
+		free(filename);
+	}
+out:
+	return err;
+}
+
+static bool visible_dir_filter(const char *name, struct dirent *d)
+{
+	if (d->d_type != DT_DIR)
+		return false;
+	return lsdir_no_dot_filter(name, d);
+}
+
+static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
+{
+	char kallsyms_filename[PATH_MAX];
+	int ret = -1;
+	struct strlist *dirs;
+	struct str_node *nd;
+
+	dirs = lsdir(dir, visible_dir_filter);
+	if (!dirs)
+		return -1;
+
+	strlist__for_each_entry(nd, dirs) {
+		scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
+			  "%s/%s/kallsyms", dir, nd->s);
+		if (!validate_kcore_addresses(kallsyms_filename, map)) {
+			strlcpy(dir, kallsyms_filename, dir_sz);
+			ret = 0;
+			break;
+		}
+	}
+
+	strlist__delete(dirs);
+
+	return ret;
+}
+
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+	int fd = open(file, O_RDONLY);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
+static char *dso__find_kallsyms(struct dso *dso, struct map *map)
+{
+	struct build_id bid;
+	char sbuild_id[SBUILD_ID_SIZE];
+	bool is_host = false;
+	char path[PATH_MAX];
+
+	if (!dso->has_build_id) {
+		/*
+		 * Last resort, if we don't have a build-id and couldn't find
+		 * any vmlinux file, try the running kernel kallsyms table.
+		 */
+		goto proc_kallsyms;
+	}
+
+	if (sysfs__read_build_id("/sys/kernel/notes", &bid) == 0)
+		is_host = dso__build_id_equal(dso, &bid);
+
+	/* Try a fast path for /proc/kallsyms if possible */
+	if (is_host) {
+		/*
+		 * Do not check the build-id cache, unless we know we cannot use
+		 * /proc/kcore or module maps don't match to /proc/kallsyms.
+		 * To check readability of /proc/kcore, do not use access(R_OK)
+		 * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+		 * can't check it.
+		 */
+		if (filename__readable("/proc/kcore") &&
+		    !validate_kcore_addresses("/proc/kallsyms", map))
+			goto proc_kallsyms;
+	}
+
+	build_id__sprintf(&dso->bid, sbuild_id);
+
+	/* Find kallsyms in build-id cache with kcore */
+	scnprintf(path, sizeof(path), "%s/%s/%s",
+		  buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
+	if (!find_matching_kcore(map, path, sizeof(path)))
+		return strdup(path);
+
+	/* Use current /proc/kallsyms if possible */
+	if (is_host) {
+proc_kallsyms:
+		return strdup("/proc/kallsyms");
+	}
+
+	/* Finally, find a cache of kallsyms */
+	if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
+		pr_err("No kallsyms or vmlinux with build-id %s was found\n",
+		       sbuild_id);
+		return NULL;
+	}
+
+	return strdup(path);
+}
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map)
+{
+	int err;
+	const char *kallsyms_filename = NULL;
+	char *kallsyms_allocated_filename = NULL;
+	char *filename = NULL;
+
+	/*
+	 * Step 1: if the user specified a kallsyms or vmlinux filename, use
+	 * it and only it, reporting errors to the user if it cannot be used.
+	 *
+	 * For instance, try to analyse an ARM perf.data file _without_ a
+	 * build-id, or if the user specifies the wrong path to the right
+	 * vmlinux file, obviously we can't fallback to another vmlinux (a
+	 * x86_86 one, on the machine where analysis is being performed, say),
+	 * or worse, /proc/kallsyms.
+	 *
+	 * If the specified file _has_ a build-id and there is a build-id
+	 * section in the perf.data file, we will still do the expected
+	 * validation in dso__load_vmlinux and will bail out if they don't
+	 * match.
+	 */
+	if (symbol_conf.kallsyms_name != NULL) {
+		kallsyms_filename = symbol_conf.kallsyms_name;
+		goto do_kallsyms;
+	}
+
+	if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
+		return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
+	}
+
+	/*
+	 * Before checking on common vmlinux locations, check if it's
+	 * stored as standard build id binary (not kallsyms) under
+	 * .debug cache.
+	 */
+	if (!symbol_conf.ignore_vmlinux_buildid)
+		filename = __dso__build_id_filename(dso, NULL, 0, false, false);
+	if (filename != NULL) {
+		err = dso__load_vmlinux(dso, map, filename, true);
+		if (err > 0)
+			return err;
+		free(filename);
+	}
+
+	if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
+		err = dso__load_vmlinux_path(dso, map);
+		if (err > 0)
+			return err;
+	}
+
+	/* do not try local files if a symfs was given */
+	if (symbol_conf.symfs[0] != 0)
+		return -1;
+
+	kallsyms_allocated_filename = dso__find_kallsyms(dso, map);
+	if (!kallsyms_allocated_filename)
+		return -1;
+
+	kallsyms_filename = kallsyms_allocated_filename;
+
+do_kallsyms:
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
+	if (err > 0)
+		pr_debug("Using %s for symbols\n", kallsyms_filename);
+	free(kallsyms_allocated_filename);
+
+	if (err > 0 && !dso__is_kcore(dso)) {
+		dso->binary_type = DSO_BINARY_TYPE__KALLSYMS;
+		dso__set_long_name(dso, DSO__NAME_KALLSYMS, false);
+		map__fixup_start(map);
+		map__fixup_end(map);
+	}
+
+	return err;
+}
+
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
+{
+	int err;
+	const char *kallsyms_filename;
+	struct machine *machine = map__kmaps(map)->machine;
+	char path[PATH_MAX];
+
+	if (machine->kallsyms_filename) {
+		kallsyms_filename = machine->kallsyms_filename;
+	} else if (machine__is_default_guest(machine)) {
+		/*
+		 * if the user specified a vmlinux filename, use it and only
+		 * it, reporting errors to the user if it cannot be used.
+		 * Or use file guest_kallsyms inputted by user on commandline
+		 */
+		if (symbol_conf.default_guest_vmlinux_name != NULL) {
+			err = dso__load_vmlinux(dso, map,
+						symbol_conf.default_guest_vmlinux_name,
+						false);
+			return err;
+		}
+
+		kallsyms_filename = symbol_conf.default_guest_kallsyms;
+		if (!kallsyms_filename)
+			return -1;
+	} else {
+		sprintf(path, "%s/proc/kallsyms", machine->root_dir);
+		kallsyms_filename = path;
+	}
+
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
+	if (err > 0)
+		pr_debug("Using %s for symbols\n", kallsyms_filename);
+	if (err > 0 && !dso__is_kcore(dso)) {
+		dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+		dso__set_long_name(dso, machine->mmap_name, false);
+		map__fixup_start(map);
+		map__fixup_end(map);
+	}
+
+	return err;
+}
+
+static void vmlinux_path__exit(void)
+{
+	while (--vmlinux_path__nr_entries >= 0)
+		zfree(&vmlinux_path[vmlinux_path__nr_entries]);
+	vmlinux_path__nr_entries = 0;
+
+	zfree(&vmlinux_path);
+}
+
+static const char * const vmlinux_paths[] = {
+	"vmlinux",
+	"/boot/vmlinux"
+};
+
+static const char * const vmlinux_paths_upd[] = {
+	"/boot/vmlinux-%s",
+	"/usr/lib/debug/boot/vmlinux-%s",
+	"/lib/modules/%s/build/vmlinux",
+	"/usr/lib/debug/lib/modules/%s/vmlinux",
+	"/usr/lib/debug/boot/vmlinux-%s.debug"
+};
+
+static int vmlinux_path__add(const char *new_entry)
+{
+	vmlinux_path[vmlinux_path__nr_entries] = strdup(new_entry);
+	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+		return -1;
+	++vmlinux_path__nr_entries;
+
+	return 0;
+}
+
+static int vmlinux_path__init(struct perf_env *env)
+{
+	struct utsname uts;
+	char bf[PATH_MAX];
+	char *kernel_version;
+	unsigned int i;
+
+	vmlinux_path = malloc(sizeof(char *) * (ARRAY_SIZE(vmlinux_paths) +
+			      ARRAY_SIZE(vmlinux_paths_upd)));
+	if (vmlinux_path == NULL)
+		return -1;
+
+	for (i = 0; i < ARRAY_SIZE(vmlinux_paths); i++)
+		if (vmlinux_path__add(vmlinux_paths[i]) < 0)
+			goto out_fail;
+
+	/* only try kernel version if no symfs was given */
+	if (symbol_conf.symfs[0] != 0)
+		return 0;
+
+	if (env) {
+		kernel_version = env->os_release;
+	} else {
+		if (uname(&uts) < 0)
+			goto out_fail;
+
+		kernel_version = uts.release;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(vmlinux_paths_upd); i++) {
+		snprintf(bf, sizeof(bf), vmlinux_paths_upd[i], kernel_version);
+		if (vmlinux_path__add(bf) < 0)
+			goto out_fail;
+	}
+
+	return 0;
+
+out_fail:
+	vmlinux_path__exit();
+	return -1;
+}
+
+int setup_list(struct strlist **list, const char *list_str,
+		      const char *list_name)
+{
+	if (list_str == NULL)
+		return 0;
+
+	*list = strlist__new(list_str, NULL);
+	if (!*list) {
+		pr_err("problems parsing %s list\n", list_name);
+		return -1;
+	}
+
+	symbol_conf.has_filter = true;
+	return 0;
+}
+
+int setup_intlist(struct intlist **list, const char *list_str,
+		  const char *list_name)
+{
+	if (list_str == NULL)
+		return 0;
+
+	*list = intlist__new(list_str);
+	if (!*list) {
+		pr_err("problems parsing %s list\n", list_name);
+		return -1;
+	}
+	return 0;
+}
+
+static int setup_addrlist(struct intlist **addr_list, struct strlist *sym_list)
+{
+	struct str_node *pos, *tmp;
+	unsigned long val;
+	char *sep;
+	const char *end;
+	int i = 0, err;
+
+	*addr_list = intlist__new(NULL);
+	if (!*addr_list)
+		return -1;
+
+	strlist__for_each_entry_safe(pos, tmp, sym_list) {
+		errno = 0;
+		val = strtoul(pos->s, &sep, 16);
+		if (errno || (sep == pos->s))
+			continue;
+
+		if (*sep != '\0') {
+			end = pos->s + strlen(pos->s) - 1;
+			while (end >= sep && isspace(*end))
+				end--;
+
+			if (end >= sep)
+				continue;
+		}
+
+		err = intlist__add(*addr_list, val);
+		if (err)
+			break;
+
+		strlist__remove(sym_list, pos);
+		i++;
+	}
+
+	if (i == 0) {
+		intlist__delete(*addr_list);
+		*addr_list = NULL;
+	}
+
+	return 0;
+}
+
+static bool symbol__read_kptr_restrict(void)
+{
+	bool value = false;
+	FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+
+	if (fp != NULL) {
+		char line[8];
+
+		if (fgets(line, sizeof(line), fp) != NULL)
+			value = perf_cap__capable(CAP_SYSLOG) ?
+					(atoi(line) >= 2) :
+					(atoi(line) != 0);
+
+		fclose(fp);
+	}
+
+	/* Per kernel/kallsyms.c:
+	 * we also restrict when perf_event_paranoid > 1 w/o CAP_SYSLOG
+	 */
+	if (perf_event_paranoid() > 1 && !perf_cap__capable(CAP_SYSLOG))
+		value = true;
+
+	return value;
+}
+
+int symbol__annotation_init(void)
+{
+	if (symbol_conf.init_annotation)
+		return 0;
+
+	if (symbol_conf.initialized) {
+		pr_err("Annotation needs to be init before symbol__init()\n");
+		return -1;
+	}
+
+	symbol_conf.priv_size += sizeof(struct annotation);
+	symbol_conf.init_annotation = true;
+	return 0;
+}
+
+int symbol__init(struct perf_env *env)
+{
+	const char *symfs;
+
+	if (symbol_conf.initialized)
+		return 0;
+
+	symbol_conf.priv_size = PERF_ALIGN(symbol_conf.priv_size, sizeof(u64));
+
+	symbol__elf_init();
+
+	if (symbol_conf.sort_by_name)
+		symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) -
+					  sizeof(struct symbol));
+
+	if (symbol_conf.try_vmlinux_path && vmlinux_path__init(env) < 0)
+		return -1;
+
+	if (symbol_conf.field_sep && *symbol_conf.field_sep == '.') {
+		pr_err("'.' is the only non valid --field-separator argument\n");
+		return -1;
+	}
+
+	if (setup_list(&symbol_conf.dso_list,
+		       symbol_conf.dso_list_str, "dso") < 0)
+		return -1;
+
+	if (setup_list(&symbol_conf.comm_list,
+		       symbol_conf.comm_list_str, "comm") < 0)
+		goto out_free_dso_list;
+
+	if (setup_intlist(&symbol_conf.pid_list,
+		       symbol_conf.pid_list_str, "pid") < 0)
+		goto out_free_comm_list;
+
+	if (setup_intlist(&symbol_conf.tid_list,
+		       symbol_conf.tid_list_str, "tid") < 0)
+		goto out_free_pid_list;
+
+	if (setup_list(&symbol_conf.sym_list,
+		       symbol_conf.sym_list_str, "symbol") < 0)
+		goto out_free_tid_list;
+
+	if (symbol_conf.sym_list &&
+	    setup_addrlist(&symbol_conf.addr_list, symbol_conf.sym_list) < 0)
+		goto out_free_sym_list;
+
+	if (setup_list(&symbol_conf.bt_stop_list,
+		       symbol_conf.bt_stop_list_str, "symbol") < 0)
+		goto out_free_sym_list;
+
+	/*
+	 * A path to symbols of "/" is identical to ""
+	 * reset here for simplicity.
+	 */
+	symfs = realpath(symbol_conf.symfs, NULL);
+	if (symfs == NULL)
+		symfs = symbol_conf.symfs;
+	if (strcmp(symfs, "/") == 0)
+		symbol_conf.symfs = "";
+	if (symfs != symbol_conf.symfs)
+		free((void *)symfs);
+
+	symbol_conf.kptr_restrict = symbol__read_kptr_restrict();
+
+	symbol_conf.initialized = true;
+	return 0;
+
+out_free_sym_list:
+	strlist__delete(symbol_conf.sym_list);
+	intlist__delete(symbol_conf.addr_list);
+out_free_tid_list:
+	intlist__delete(symbol_conf.tid_list);
+out_free_pid_list:
+	intlist__delete(symbol_conf.pid_list);
+out_free_comm_list:
+	strlist__delete(symbol_conf.comm_list);
+out_free_dso_list:
+	strlist__delete(symbol_conf.dso_list);
+	return -1;
+}
+
+void symbol__exit(void)
+{
+	if (!symbol_conf.initialized)
+		return;
+	strlist__delete(symbol_conf.bt_stop_list);
+	strlist__delete(symbol_conf.sym_list);
+	strlist__delete(symbol_conf.dso_list);
+	strlist__delete(symbol_conf.comm_list);
+	intlist__delete(symbol_conf.tid_list);
+	intlist__delete(symbol_conf.pid_list);
+	intlist__delete(symbol_conf.addr_list);
+	vmlinux_path__exit();
+	symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
+	symbol_conf.bt_stop_list = NULL;
+	symbol_conf.initialized = false;
+}
+
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+			 const char *dir, int unset __maybe_unused)
+{
+	char *bf = NULL;
+	int ret;
+
+	symbol_conf.symfs = strdup(dir);
+	if (symbol_conf.symfs == NULL)
+		return -ENOMEM;
+
+	/* skip the locally configured cache if a symfs is given, and
+	 * config buildid dir to symfs/.debug
+	 */
+	ret = asprintf(&bf, "%s/%s", dir, ".debug");
+	if (ret < 0)
+		return -ENOMEM;
+
+	set_buildid_dir(bf);
+
+	free(bf);
+	return 0;
+}
+
+struct mem_info *mem_info__get(struct mem_info *mi)
+{
+	if (mi)
+		refcount_inc(&mi->refcnt);
+	return mi;
+}
+
+void mem_info__put(struct mem_info *mi)
+{
+	if (mi && refcount_dec_and_test(&mi->refcnt))
+		free(mi);
+}
+
+struct mem_info *mem_info__new(void)
+{
+	struct mem_info *mi = zalloc(sizeof(*mi));
+
+	if (mi)
+		refcount_set(&mi->refcnt, 1);
+	return mi;
+}
+
+/*
+ * Checks that user supplied symbol kernel files are accessible because
+ * the default mechanism for accessing elf files fails silently. i.e. if
+ * debug syms for a build ID aren't found perf carries on normally. When
+ * they are user supplied we should assume that the user doesn't want to
+ * silently fail.
+ */
+int symbol__validate_sym_arguments(void)
+{
+	if (symbol_conf.vmlinux_name &&
+	    access(symbol_conf.vmlinux_name, R_OK)) {
+		pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
+		return -EINVAL;
+	}
+	if (symbol_conf.kallsyms_name &&
+	    access(symbol_conf.kallsyms_name, R_OK)) {
+		pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name);
+		return -EINVAL;
+	}
+	return 0;
+}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
new file mode 100644
index 000000000..0b893dcc8
--- /dev/null
+++ b/tools/perf/util/symbol.h
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMBOL
+#define __PERF_SYMBOL 1
+
+#include <linux/types.h>
+#include <linux/refcount.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include "path.h"
+#include "symbol_conf.h"
+#include "spark.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+#include <libelf.h>
+#include <gelf.h>
+#endif
+#include <elf.h>
+
+struct dso;
+struct map;
+struct maps;
+struct option;
+struct build_id;
+
+/*
+ * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
+ * for newer versions we can use mmap to reduce memory usage:
+ */
+#ifdef ELF_C_READ_MMAP
+# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define PERF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+#ifdef HAVE_LIBELF_SUPPORT
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+			     GElf_Shdr *shp, const char *name, size_t *idx);
+#endif
+
+/**
+ * A symtab entry. When allocated this may be preceded by an annotation (see
+ * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node
+ * to sort by name (see struct symbol_name_rb_node).
+ */
+struct symbol {
+	struct rb_node	rb_node;
+	/** Range of symbol [start, end). */
+	u64		start;
+	u64		end;
+	/** Length of the string name. */
+	u16		namelen;
+	/** ELF symbol type as defined for st_info. E.g STT_OBJECT or STT_FUNC. */
+	u8		type:4;
+	/** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
+	u8		binding:4;
+	/** Set true for kernel symbols of idle routines. */
+	u8		idle:1;
+	/** Resolvable but tools ignore it (e.g. idle routines). */
+	u8		ignore:1;
+	/** Symbol for an inlined function. */
+	u8		inlined:1;
+	/** Has symbol__annotate2 been performed. */
+	u8		annotate2:1;
+	/** Architecture specific. Unused except on PPC where it holds st_other. */
+	u8		arch_sym;
+	/** The name of length namelen associated with the symbol. */
+	char		name[];
+};
+
+void symbol__delete(struct symbol *sym);
+void symbols__delete(struct rb_root_cached *symbols);
+
+/* symbols__for_each_entry - iterate over symbols (rb_root)
+ *
+ * @symbols: the rb_root of symbols
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @nd: the 'struct rb_node *' to use as a temporary storage
+ */
+#define symbols__for_each_entry(symbols, pos, nd)			\
+	for (nd = rb_first_cached(symbols);					\
+	     nd && (pos = rb_entry(nd, struct symbol, rb_node));	\
+	     nd = rb_next(nd))
+
+static inline size_t symbol__size(const struct symbol *sym)
+{
+	return sym->end - sym->start;
+}
+
+struct strlist;
+struct intlist;
+
+struct symbol_name_rb_node {
+	struct rb_node	rb_node;
+	struct symbol	sym;
+};
+
+static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
+{
+	return path__join(bf, size, symbol_conf.symfs, path);
+}
+
+#define symbol__join_symfs(bf, path) __symbol__join_symfs(bf, sizeof(bf), path)
+
+extern int vmlinux_path__nr_entries;
+extern char **vmlinux_path;
+
+static inline void *symbol__priv(struct symbol *sym)
+{
+	return ((void *)sym) - symbol_conf.priv_size;
+}
+
+struct ref_reloc_sym {
+	const char	*name;
+	u64		addr;
+	u64		unrelocated_addr;
+};
+
+struct addr_location {
+	struct thread *thread;
+	struct maps   *maps;
+	struct map    *map;
+	struct symbol *sym;
+	const char    *srcline;
+	u64	      addr;
+	char	      level;
+	u8	      filtered;
+	u8	      cpumode;
+	s32	      cpu;
+	s32	      socket;
+};
+
+int dso__load(struct dso *dso, struct map *map);
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+		      const char *vmlinux, bool vmlinux_allocated);
+int dso__load_vmlinux_path(struct dso *dso, struct map *map);
+int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
+			 bool no_kcore);
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
+
+void dso__insert_symbol(struct dso *dso,
+			struct symbol *sym);
+void dso__delete_symbol(struct dso *dso,
+			struct symbol *sym);
+
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
+
+struct symbol *symbol__next_by_name(struct symbol *sym);
+
+struct symbol *dso__first_symbol(struct dso *dso);
+struct symbol *dso__last_symbol(struct dso *dso);
+struct symbol *dso__next_symbol(struct symbol *sym);
+
+enum dso_type dso__type_fd(int fd);
+
+int filename__read_build_id(const char *filename, struct build_id *id);
+int sysfs__read_build_id(const char *filename, struct build_id *bid);
+int modules__parse(const char *filename, void *arg,
+		   int (*process_module)(void *arg, const char *name,
+					 u64 start, u64 size));
+int filename__read_debuglink(const char *filename, char *debuglink,
+			     size_t size);
+
+struct perf_env;
+int symbol__init(struct perf_env *env);
+void symbol__exit(void);
+void symbol__elf_init(void);
+int symbol__annotation_init(void);
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name);
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+				      const struct addr_location *al,
+				      bool unknown_as_addr,
+				      bool print_offsets, FILE *fp);
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+				    const struct addr_location *al, FILE *fp);
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+				 const struct addr_location *al,
+				 bool unknown_as_addr, FILE *fp);
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
+size_t symbol__fprintf(struct symbol *sym, FILE *fp);
+bool symbol__restricted_filename(const char *filename,
+				 const char *restricted_filename);
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+			 const char *dir, int unset __maybe_unused);
+
+struct symsrc;
+
+#ifdef HAVE_LIBBFD_SUPPORT
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile);
+#endif
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+		  struct symsrc *runtime_ss, int kmodule);
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
+
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
+		       bool kernel);
+void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
+void symbols__fixup_duplicate(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
+void maps__fixup_end(struct maps *maps);
+
+typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit);
+
+#define PERF_KCORE_EXTRACT "/tmp/perf-kcore-XXXXXX"
+
+struct kcore_extract {
+	char *kcore_filename;
+	u64 addr;
+	u64 offs;
+	u64 len;
+	char extract_filename[sizeof(PERF_KCORE_EXTRACT)];
+	int fd;
+};
+
+int kcore_extract__create(struct kcore_extract *kce);
+void kcore_extract__delete(struct kcore_extract *kce);
+
+int kcore_copy(const char *from_dir, const char *to_dir);
+int compare_proc_modules(const char *from, const char *to);
+
+int setup_list(struct strlist **list, const char *list_str,
+	       const char *list_name);
+int setup_intlist(struct intlist **list, const char *list_str,
+		  const char *list_name);
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
+#endif
+
+const char *arch__normalize_symbol_name(const char *name);
+#define SYMBOL_A 0
+#define SYMBOL_B 1
+
+int arch__compare_symbol_names(const char *namea, const char *nameb);
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+				 unsigned int n);
+int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
+
+enum symbol_tag_include {
+	SYMBOL_TAG_INCLUDE__NONE = 0,
+	SYMBOL_TAG_INCLUDE__DEFAULT_ONLY
+};
+
+int symbol__match_symbol_name(const char *namea, const char *nameb,
+			      enum symbol_tag_include includes);
+
+/* structure containing an SDT note's info */
+struct sdt_note {
+	char *name;			/* name of the note*/
+	char *provider;			/* provider name */
+	char *args;
+	bool bit32;			/* whether the location is 32 bits? */
+	union {				/* location, base and semaphore addrs */
+		Elf64_Addr a64[3];
+		Elf32_Addr a32[3];
+	} addr;
+	struct list_head note_list;	/* SDT notes' list */
+};
+
+int get_sdt_note_list(struct list_head *head, const char *target);
+int cleanup_sdt_note_list(struct list_head *sdt_notes);
+int sdt_notes__get_count(struct list_head *start);
+
+#define SDT_PROBES_SCN ".probes"
+#define SDT_BASE_SCN ".stapsdt.base"
+#define SDT_NOTE_SCN  ".note.stapsdt"
+#define SDT_NOTE_TYPE 3
+#define SDT_NOTE_NAME "stapsdt"
+#define NR_ADDR 3
+
+enum {
+	SDT_NOTE_IDX_LOC = 0,
+	SDT_NOTE_IDX_BASE,
+	SDT_NOTE_IDX_REFCTR,
+};
+
+struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__get(struct mem_info *mi);
+void   mem_info__put(struct mem_info *mi);
+
+static inline void __mem_info__zput(struct mem_info **mi)
+{
+	mem_info__put(*mi);
+	*mi = NULL;
+}
+
+#define mem_info__zput(mi) __mem_info__zput(&mi)
+
+int symbol__validate_sym_arguments(void);
+
+#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
new file mode 100644
index 000000000..bc3d046fb
--- /dev/null
+++ b/tools/perf/util/symbol_conf.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMBOL_CONF
+#define __PERF_SYMBOL_CONF 1
+
+#include <stdbool.h>
+
+struct strlist;
+struct intlist;
+
+struct symbol_conf {
+	bool		nanosecs;
+	unsigned short	priv_size;
+	bool		try_vmlinux_path,
+			init_annotation,
+			force,
+			ignore_vmlinux,
+			ignore_vmlinux_buildid,
+			show_kernel_path,
+			use_modules,
+			allow_aliases,
+			sort_by_name,
+			show_nr_samples,
+			show_total_period,
+			use_callchain,
+			cumulate_callchain,
+			show_branchflag_count,
+			exclude_other,
+			show_cpu_utilization,
+			initialized,
+			kptr_restrict,
+			event_group,
+			demangle,
+			demangle_kernel,
+			filter_relative,
+			show_hist_headers,
+			branch_callstack,
+			has_filter,
+			show_ref_callgraph,
+			hide_unresolved,
+			raw_trace,
+			report_hierarchy,
+			report_block,
+			report_individual_block,
+			inline_name,
+			disable_add2line_warn,
+			buildid_mmap2,
+			guest_code;
+	const char	*vmlinux_name,
+			*kallsyms_name,
+			*source_prefix,
+			*field_sep,
+			*graph_function;
+	const char	*default_guest_vmlinux_name,
+			*default_guest_kallsyms,
+			*default_guest_modules;
+	const char	*guestmount;
+	const char	*dso_list_str,
+			*comm_list_str,
+			*pid_list_str,
+			*tid_list_str,
+			*sym_list_str,
+			*col_width_list_str,
+			*bt_stop_list_str;
+	unsigned long	time_quantum;
+       struct strlist	*dso_list,
+			*comm_list,
+			*sym_list,
+			*dso_from_list,
+			*dso_to_list,
+			*sym_from_list,
+			*sym_to_list,
+			*bt_stop_list;
+	struct intlist	*pid_list,
+			*tid_list,
+			*addr_list;
+	const char	*symfs;
+	int		res_sample;
+	int		pad_output_len_dso;
+	int		group_sort_idx;
+	int		addr_range;
+};
+
+extern struct symbol_conf symbol_conf;
+
+#endif // __PERF_SYMBOL_CONF
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
new file mode 100644
index 000000000..2664fb65e
--- /dev/null
+++ b/tools/perf/util/symbol_fprintf.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "dso.h"
+#include "map.h"
+#include "symbol.h"
+
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+{
+	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
+		       sym->start, sym->end,
+		       sym->binding == STB_GLOBAL ? 'g' :
+		       sym->binding == STB_LOCAL  ? 'l' : 'w',
+		       sym->name);
+}
+
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+				      const struct addr_location *al,
+				      bool unknown_as_addr,
+				      bool print_offsets, FILE *fp)
+{
+	unsigned long offset;
+	size_t length;
+
+	if (sym) {
+		length = fprintf(fp, "%s", sym->name);
+		if (al && print_offsets) {
+			if (al->addr < sym->end)
+				offset = al->addr - sym->start;
+			else
+				offset = al->addr - al->map->start - sym->start;
+			length += fprintf(fp, "+0x%lx", offset);
+		}
+		return length;
+	} else if (al && unknown_as_addr)
+		return fprintf(fp, "[%#" PRIx64 "]", al->addr);
+	else
+		return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+				    const struct addr_location *al,
+				    FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, al, false, true, fp);
+}
+
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+				 const struct addr_location *al,
+				 bool unknown_as_addr, FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp);
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp);
+}
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+				    FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+	struct symbol_name_rb_node *pos;
+
+	for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+		ret += fprintf(fp, "%s\n", pos->sym.name);
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/symsrc.h b/tools/perf/util/symsrc.h
new file mode 100644
index 000000000..2665b4bde
--- /dev/null
+++ b/tools/perf/util/symsrc.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMSRC_
+#define __PERF_SYMSRC_ 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include "dso.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+#include <libelf.h>
+#include <gelf.h>
+#endif
+#include <elf.h>
+
+struct symsrc {
+	char		     *name;
+	int		     fd;
+	enum dso_binary_type type;
+
+#ifdef HAVE_LIBELF_SUPPORT
+	Elf		     *elf;
+	GElf_Ehdr	     ehdr;
+
+	Elf_Scn		     *opdsec;
+	size_t		     opdidx;
+	GElf_Shdr	     opdshdr;
+
+	Elf_Scn		     *symtab;
+	GElf_Shdr	     symshdr;
+
+	Elf_Scn		     *dynsym;
+	size_t		     dynsym_idx;
+	GElf_Shdr	     dynshdr;
+
+	bool		     adjust_symbols;
+	bool		     is_64_bit;
+#endif
+};
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type);
+void symsrc__destroy(struct symsrc *ss);
+
+bool symsrc__has_symtab(struct symsrc *ss);
+bool symsrc__possibly_runtime(struct symsrc *ss);
+
+#endif /* __PERF_SYMSRC_ */
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
new file mode 100644
index 000000000..cccd293b5
--- /dev/null
+++ b/tools/perf/util/synthetic-events.c
@@ -0,0 +1,2406 @@
+// SPDX-License-Identifier: GPL-2.0-only 
+
+#include "util/cgroup.h"
+#include "util/data.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/map.h"
+#include "util/map_symbol.h"
+#include "util/branch.h"
+#include "util/memswap.h"
+#include "util/namespaces.h"
+#include "util/session.h"
+#include "util/stat.h"
+#include "util/symbol.h"
+#include "util/synthetic-events.h"
+#include "util/target.h"
+#include "util/time-utils.h"
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/perf_event.h>
+#include <asm/bug.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <internal/lib.h> // page_size
+#include <internal/threadmap.h>
+#include <perf/threadmap.h>
+#include <symbol/kallsyms.h>
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
+#include <api/io.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
+
+unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
+
+int perf_tool__process_synth_event(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct machine *machine,
+				   perf_event__handler_t process)
+{
+	struct perf_sample synth_sample = {
+		.pid	   = -1,
+		.tid	   = -1,
+		.time	   = -1,
+		.stream_id = -1,
+		.cpu	   = -1,
+		.period	   = 1,
+		.cpumode   = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+	};
+
+	return process(tool, event, &synth_sample, machine);
+};
+
+/*
+ * Assumes that the first 4095 bytes of /proc/pid/stat contains
+ * the comm, tgid and ppid.
+ */
+static int perf_event__get_comm_ids(pid_t pid, pid_t tid, char *comm, size_t len,
+				    pid_t *tgid, pid_t *ppid, bool *kernel)
+{
+	char bf[4096];
+	int fd;
+	size_t size = 0;
+	ssize_t n;
+	char *name, *tgids, *ppids, *vmpeak, *threads;
+
+	*tgid = -1;
+	*ppid = -1;
+
+	if (pid)
+		snprintf(bf, sizeof(bf), "/proc/%d/task/%d/status", pid, tid);
+	else
+		snprintf(bf, sizeof(bf), "/proc/%d/status", tid);
+
+	fd = open(bf, O_RDONLY);
+	if (fd < 0) {
+		pr_debug("couldn't open %s\n", bf);
+		return -1;
+	}
+
+	n = read(fd, bf, sizeof(bf) - 1);
+	close(fd);
+	if (n <= 0) {
+		pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
+			   tid);
+		return -1;
+	}
+	bf[n] = '\0';
+
+	name = strstr(bf, "Name:");
+	tgids = strstr(name ?: bf, "Tgid:");
+	ppids = strstr(tgids ?: bf, "PPid:");
+	vmpeak = strstr(ppids ?: bf, "VmPeak:");
+
+	if (vmpeak)
+		threads = NULL;
+	else
+		threads = strstr(ppids ?: bf, "Threads:");
+
+	if (name) {
+		char *nl;
+
+		name = skip_spaces(name + 5);  /* strlen("Name:") */
+		nl = strchr(name, '\n');
+		if (nl)
+			*nl = '\0';
+
+		size = strlen(name);
+		if (size >= len)
+			size = len - 1;
+		memcpy(comm, name, size);
+		comm[size] = '\0';
+	} else {
+		pr_debug("Name: string not found for pid %d\n", tid);
+	}
+
+	if (tgids) {
+		tgids += 5;  /* strlen("Tgid:") */
+		*tgid = atoi(tgids);
+	} else {
+		pr_debug("Tgid: string not found for pid %d\n", tid);
+	}
+
+	if (ppids) {
+		ppids += 5;  /* strlen("PPid:") */
+		*ppid = atoi(ppids);
+	} else {
+		pr_debug("PPid: string not found for pid %d\n", tid);
+	}
+
+	if (!vmpeak && threads)
+		*kernel = true;
+	else
+		*kernel = false;
+
+	return 0;
+}
+
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t tid,
+				    struct machine *machine,
+				    pid_t *tgid, pid_t *ppid, bool *kernel)
+{
+	size_t size;
+
+	*ppid = -1;
+
+	memset(&event->comm, 0, sizeof(event->comm));
+
+	if (machine__is_host(machine)) {
+		if (perf_event__get_comm_ids(pid, tid, event->comm.comm,
+					     sizeof(event->comm.comm),
+					     tgid, ppid, kernel) != 0) {
+			return -1;
+		}
+	} else {
+		*tgid = machine->pid;
+	}
+
+	if (*tgid < 0)
+		return -1;
+
+	event->comm.pid = *tgid;
+	event->comm.header.type = PERF_RECORD_COMM;
+
+	size = strlen(event->comm.comm) + 1;
+	size = PERF_ALIGN(size, sizeof(u64));
+	memset(event->comm.comm + size, 0, machine->id_hdr_size);
+	event->comm.header.size = (sizeof(event->comm) -
+				(sizeof(event->comm.comm) - size) +
+				machine->id_hdr_size);
+	event->comm.tid = tid;
+
+	return 0;
+}
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+					 union perf_event *event, pid_t pid,
+					 perf_event__handler_t process,
+					 struct machine *machine)
+{
+	pid_t tgid, ppid;
+	bool kernel_thread;
+
+	if (perf_event__prepare_comm(event, 0, pid, machine, &tgid, &ppid,
+				     &kernel_thread) != 0)
+		return -1;
+
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+		return -1;
+
+	return tgid;
+}
+
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+					 struct perf_ns_link_info *ns_link_info)
+{
+	struct stat64 st;
+	char proc_ns[128];
+
+	sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+	if (stat64(proc_ns, &st) == 0) {
+		ns_link_info->dev = st.st_dev;
+		ns_link_info->ino = st.st_ino;
+	}
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+				      union perf_event *event,
+				      pid_t pid, pid_t tgid,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	u32 idx;
+	struct perf_ns_link_info *ns_link_info;
+
+	if (!tool || !tool->namespace_events)
+		return 0;
+
+	memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+	       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+	       machine->id_hdr_size));
+
+	event->namespaces.pid = tgid;
+	event->namespaces.tid = pid;
+
+	event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+	ns_link_info = event->namespaces.link_info;
+
+	for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+		perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+					     &ns_link_info[idx]);
+
+	event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+	event->namespaces.header.size = (sizeof(event->namespaces) +
+			(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+			machine->id_hdr_size);
+
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+		return -1;
+
+	return 0;
+}
+
+static int perf_event__synthesize_fork(struct perf_tool *tool,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid, pid_t ppid,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
+
+	/*
+	 * for main thread set parent to ppid from status file. For other
+	 * threads set parent pid to main thread. ie., assume main thread
+	 * spawns all threads in a process
+	*/
+	if (tgid == pid) {
+		event->fork.ppid = ppid;
+		event->fork.ptid = ppid;
+	} else {
+		event->fork.ppid = tgid;
+		event->fork.ptid = tgid;
+	}
+	event->fork.pid  = tgid;
+	event->fork.tid  = pid;
+	event->fork.header.type = PERF_RECORD_FORK;
+	event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
+
+	event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
+
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+		return -1;
+
+	return 0;
+}
+
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+				u32 *prot, u32 *flags, __u64 *offset,
+				u32 *maj, u32 *min,
+				__u64 *inode,
+				ssize_t pathname_size, char *pathname)
+{
+	__u64 temp;
+	int ch;
+	char *start_pathname = pathname;
+
+	if (io__get_hex(io, start) != '-')
+		return false;
+	if (io__get_hex(io, end) != ' ')
+		return false;
+
+	/* map protection and flags bits */
+	*prot = 0;
+	ch = io__get_char(io);
+	if (ch == 'r')
+		*prot |= PROT_READ;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'w')
+		*prot |= PROT_WRITE;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'x')
+		*prot |= PROT_EXEC;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 's')
+		*flags = MAP_SHARED;
+	else if (ch == 'p')
+		*flags = MAP_PRIVATE;
+	else
+		return false;
+	if (io__get_char(io) != ' ')
+		return false;
+
+	if (io__get_hex(io, offset) != ' ')
+		return false;
+
+	if (io__get_hex(io, &temp) != ':')
+		return false;
+	*maj = temp;
+	if (io__get_hex(io, &temp) != ' ')
+		return false;
+	*min = temp;
+
+	ch = io__get_dec(io, inode);
+	if (ch != ' ') {
+		*pathname = '\0';
+		return ch == '\n';
+	}
+	do {
+		ch = io__get_char(io);
+	} while (ch == ' ');
+	while (true) {
+		if (ch < 0)
+			return false;
+		if (ch == '\0' || ch == '\n' ||
+		    (pathname + 1 - start_pathname) >= pathname_size) {
+			*pathname = '\0';
+			return true;
+		}
+		*pathname++ = ch;
+		ch = io__get_char(io);
+	}
+}
+
+static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
+					     struct machine *machine,
+					     bool is_kernel)
+{
+	struct build_id bid;
+	struct nsinfo *nsi;
+	struct nscookie nc;
+	struct dso *dso = NULL;
+	struct dso_id id;
+	int rc;
+
+	if (is_kernel) {
+		rc = sysfs__read_build_id("/sys/kernel/notes", &bid);
+		goto out;
+	}
+
+	id.maj = event->maj;
+	id.min = event->min;
+	id.ino = event->ino;
+	id.ino_generation = event->ino_generation;
+
+	dso = dsos__findnew_id(&machine->dsos, event->filename, &id);
+	if (dso && dso->has_build_id) {
+		bid = dso->bid;
+		rc = 0;
+		goto out;
+	}
+
+	nsi = nsinfo__new(event->pid);
+	nsinfo__mountns_enter(nsi, &nc);
+
+	rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
+
+	nsinfo__mountns_exit(&nc);
+	nsinfo__put(nsi);
+
+out:
+	if (rc == 0) {
+		memcpy(event->build_id, bid.data, sizeof(bid.data));
+		event->build_id_size = (u8) bid.size;
+		event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
+		event->__reserved_1 = 0;
+		event->__reserved_2 = 0;
+
+		if (dso && !dso->has_build_id)
+			dso__set_build_id(dso, &bid);
+	} else {
+		if (event->filename[0] == '/') {
+			pr_debug2("Failed to read build ID for %s\n",
+				  event->filename);
+		}
+	}
+	dso__put(dso);
+}
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid,
+				       perf_event__handler_t process,
+				       struct machine *machine,
+				       bool mmap_data)
+{
+	unsigned long long t;
+	char bf[BUFSIZ];
+	struct io io;
+	bool truncation = false;
+	unsigned long long timeout = proc_map_timeout * 1000000ULL;
+	int rc = 0;
+	const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+	int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
+
+	if (machine__is_default_guest(machine))
+		return 0;
+
+	snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
+		machine->root_dir, pid, pid);
+
+	io.fd = open(bf, O_RDONLY, 0);
+	if (io.fd < 0) {
+		/*
+		 * We raced with a task exiting - just return:
+		 */
+		pr_debug("couldn't open %s\n", bf);
+		return -1;
+	}
+	io__init(&io, io.fd, bf, sizeof(bf));
+
+	event->header.type = PERF_RECORD_MMAP2;
+	t = rdclock();
+
+	while (!io.eof) {
+		static const char anonstr[] = "//anon";
+		size_t size, aligned_size;
+
+		/* ensure null termination since stack will be reused. */
+		event->mmap2.filename[0] = '\0';
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		if (!read_proc_maps_line(&io,
+					&event->mmap2.start,
+					&event->mmap2.len,
+					&event->mmap2.prot,
+					&event->mmap2.flags,
+					&event->mmap2.pgoff,
+					&event->mmap2.maj,
+					&event->mmap2.min,
+					&event->mmap2.ino,
+					sizeof(event->mmap2.filename),
+					event->mmap2.filename))
+			continue;
+
+		if ((rdclock() - t) > timeout) {
+			pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
+				   "You may want to increase "
+				   "the time limit by --proc-map-timeout\n",
+				   machine->root_dir, pid, pid);
+			truncation = true;
+			goto out;
+		}
+
+		event->mmap2.ino_generation = 0;
+
+		/*
+		 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+		 */
+		if (machine__is_host(machine))
+			event->header.misc = PERF_RECORD_MISC_USER;
+		else
+			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
+
+		if ((event->mmap2.prot & PROT_EXEC) == 0) {
+			if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
+				continue;
+
+			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+		}
+
+out:
+		if (truncation)
+			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
+		if (!strcmp(event->mmap2.filename, ""))
+			strcpy(event->mmap2.filename, anonstr);
+
+		if (hugetlbfs_mnt_len &&
+		    !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+			     hugetlbfs_mnt_len)) {
+			strcpy(event->mmap2.filename, anonstr);
+			event->mmap2.flags |= MAP_HUGETLB;
+		}
+
+		size = strlen(event->mmap2.filename) + 1;
+		aligned_size = PERF_ALIGN(size, sizeof(u64));
+		event->mmap2.len -= event->mmap.start;
+		event->mmap2.header.size = (sizeof(event->mmap2) -
+					(sizeof(event->mmap2.filename) - aligned_size));
+		memset(event->mmap2.filename + size, 0, machine->id_hdr_size +
+			(aligned_size - size));
+		event->mmap2.header.size += machine->id_hdr_size;
+		event->mmap2.pid = tgid;
+		event->mmap2.tid = pid;
+
+		if (symbol_conf.buildid_mmap2)
+			perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
+
+		if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+			rc = -1;
+			break;
+		}
+
+		if (truncation)
+			break;
+	}
+
+	close(io.fd);
+	return rc;
+}
+
+#ifdef HAVE_FILE_HANDLE
+static int perf_event__synthesize_cgroup(struct perf_tool *tool,
+					 union perf_event *event,
+					 char *path, size_t mount_len,
+					 perf_event__handler_t process,
+					 struct machine *machine)
+{
+	size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path);
+	size_t path_len = strlen(path) - mount_len + 1;
+	struct {
+		struct file_handle fh;
+		uint64_t cgroup_id;
+	} handle;
+	int mount_id;
+
+	while (path_len % sizeof(u64))
+		path[mount_len + path_len++] = '\0';
+
+	memset(&event->cgroup, 0, event_size);
+
+	event->cgroup.header.type = PERF_RECORD_CGROUP;
+	event->cgroup.header.size = event_size + path_len + machine->id_hdr_size;
+
+	handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+	if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) {
+		pr_debug("stat failed: %s\n", path);
+		return -1;
+	}
+
+	event->cgroup.id = handle.cgroup_id;
+	strncpy(event->cgroup.path, path + mount_len, path_len);
+	memset(event->cgroup.path + path_len, 0, machine->id_hdr_size);
+
+	if (perf_tool__process_synth_event(tool, event, machine, process) < 0) {
+		pr_debug("process synth event failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
+					union perf_event *event,
+					char *path, size_t mount_len,
+					perf_event__handler_t process,
+					struct machine *machine)
+{
+	size_t pos = strlen(path);
+	DIR *d;
+	struct dirent *dent;
+	int ret = 0;
+
+	if (perf_event__synthesize_cgroup(tool, event, path, mount_len,
+					  process, machine) < 0)
+		return -1;
+
+	d = opendir(path);
+	if (d == NULL) {
+		pr_debug("failed to open directory: %s\n", path);
+		return -1;
+	}
+
+	while ((dent = readdir(d)) != NULL) {
+		if (dent->d_type != DT_DIR)
+			continue;
+		if (!strcmp(dent->d_name, ".") ||
+		    !strcmp(dent->d_name, ".."))
+			continue;
+
+		/* any sane path should be less than PATH_MAX */
+		if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX)
+			continue;
+
+		if (path[pos - 1] != '/')
+			strcat(path, "/");
+		strcat(path, dent->d_name);
+
+		ret = perf_event__walk_cgroup_tree(tool, event, path,
+						   mount_len, process, machine);
+		if (ret < 0)
+			break;
+
+		path[pos] = '\0';
+	}
+
+	closedir(d);
+	return ret;
+}
+
+int perf_event__synthesize_cgroups(struct perf_tool *tool,
+				   perf_event__handler_t process,
+				   struct machine *machine)
+{
+	union perf_event event;
+	char cgrp_root[PATH_MAX];
+	size_t mount_len;  /* length of mount point in the path */
+
+	if (!tool || !tool->cgroup_events)
+		return 0;
+
+	if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) {
+		pr_debug("cannot find cgroup mount point\n");
+		return -1;
+	}
+
+	mount_len = strlen(cgrp_root);
+	/* make sure the path starts with a slash (after mount point) */
+	strcat(cgrp_root, "/");
+
+	if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len,
+					 process, machine) < 0)
+		return -1;
+
+	return 0;
+}
+#else
+int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
+				   perf_event__handler_t process __maybe_unused,
+				   struct machine *machine __maybe_unused)
+{
+	return -1;
+}
+#endif
+
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
+				   struct machine *machine)
+{
+	int rc = 0;
+	struct map *pos;
+	struct maps *maps = machine__kernel_maps(machine);
+	union perf_event *event;
+	size_t size = symbol_conf.buildid_mmap2 ?
+			sizeof(event->mmap2) : sizeof(event->mmap);
+
+	event = zalloc(size + machine->id_hdr_size);
+	if (event == NULL) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for kernel modules\n");
+		return -1;
+	}
+
+	/*
+	 * kernel uses 0 for user space maps, see kernel/perf_event.c
+	 * __perf_event_mmap
+	 */
+	if (machine__is_host(machine))
+		event->header.misc = PERF_RECORD_MISC_KERNEL;
+	else
+		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+	maps__for_each_entry(maps, pos) {
+		if (!__map__is_kmodule(pos))
+			continue;
+
+		if (symbol_conf.buildid_mmap2) {
+			size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+			event->mmap2.header.type = PERF_RECORD_MMAP2;
+			event->mmap2.header.size = (sizeof(event->mmap2) -
+						(sizeof(event->mmap2.filename) - size));
+			memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+			event->mmap2.header.size += machine->id_hdr_size;
+			event->mmap2.start = pos->start;
+			event->mmap2.len   = pos->end - pos->start;
+			event->mmap2.pid   = machine->pid;
+
+			memcpy(event->mmap2.filename, pos->dso->long_name,
+			       pos->dso->long_name_len + 1);
+
+			perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
+		} else {
+			size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+			event->mmap.header.type = PERF_RECORD_MMAP;
+			event->mmap.header.size = (sizeof(event->mmap) -
+						(sizeof(event->mmap.filename) - size));
+			memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+			event->mmap.header.size += machine->id_hdr_size;
+			event->mmap.start = pos->start;
+			event->mmap.len   = pos->end - pos->start;
+			event->mmap.pid   = machine->pid;
+
+			memcpy(event->mmap.filename, pos->dso->long_name,
+			       pos->dso->long_name_len + 1);
+		}
+
+		if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+			rc = -1;
+			break;
+		}
+	}
+
+	free(event);
+	return rc;
+}
+
+static int filter_task(const struct dirent *dirent)
+{
+	return isdigit(dirent->d_name[0]);
+}
+
+static int __event__synthesize_thread(union perf_event *comm_event,
+				      union perf_event *mmap_event,
+				      union perf_event *fork_event,
+				      union perf_event *namespaces_event,
+				      pid_t pid, int full, perf_event__handler_t process,
+				      struct perf_tool *tool, struct machine *machine,
+				      bool needs_mmap, bool mmap_data)
+{
+	char filename[PATH_MAX];
+	struct dirent **dirent;
+	pid_t tgid, ppid;
+	int rc = 0;
+	int i, n;
+
+	/* special case: only send one comm event using passed in pid */
+	if (!full) {
+		tgid = perf_event__synthesize_comm(tool, comm_event, pid,
+						   process, machine);
+
+		if (tgid == -1)
+			return -1;
+
+		if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+						      tgid, process, machine) < 0)
+			return -1;
+
+		/*
+		 * send mmap only for thread group leader
+		 * see thread__init_maps()
+		 */
+		if (pid == tgid && needs_mmap &&
+		    perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+						       process, machine, mmap_data))
+			return -1;
+
+		return 0;
+	}
+
+	if (machine__is_default_guest(machine))
+		return 0;
+
+	snprintf(filename, sizeof(filename), "%s/proc/%d/task",
+		 machine->root_dir, pid);
+
+	n = scandir(filename, &dirent, filter_task, NULL);
+	if (n < 0)
+		return n;
+
+	for (i = 0; i < n; i++) {
+		char *end;
+		pid_t _pid;
+		bool kernel_thread = false;
+
+		_pid = strtol(dirent[i]->d_name, &end, 10);
+		if (*end)
+			continue;
+
+		/* some threads may exit just after scan, ignore it */
+		if (perf_event__prepare_comm(comm_event, pid, _pid, machine,
+					     &tgid, &ppid, &kernel_thread) != 0)
+			continue;
+
+		rc = -1;
+		if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
+						ppid, process, machine) < 0)
+			break;
+
+		if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+						      tgid, process, machine) < 0)
+			break;
+
+		/*
+		 * Send the prepared comm event
+		 */
+		if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
+			break;
+
+		rc = 0;
+		if (_pid == pid && !kernel_thread && needs_mmap) {
+			/* process the parent's maps too */
+			rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+						process, machine, mmap_data);
+			if (rc)
+				break;
+		}
+	}
+
+	for (i = 0; i < n; i++)
+		zfree(&dirent[i]);
+	free(dirent);
+
+	return rc;
+}
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+				      struct perf_thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine,
+				      bool needs_mmap, bool mmap_data)
+{
+	union perf_event *comm_event, *mmap_event, *fork_event;
+	union perf_event *namespaces_event;
+	int err = -1, thread, j;
+
+	comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+	if (comm_event == NULL)
+		goto out;
+
+	mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+	if (mmap_event == NULL)
+		goto out_free_comm;
+
+	fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+	if (fork_event == NULL)
+		goto out_free_mmap;
+
+	namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+				  (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+				  machine->id_hdr_size);
+	if (namespaces_event == NULL)
+		goto out_free_fork;
+
+	err = 0;
+	for (thread = 0; thread < threads->nr; ++thread) {
+		if (__event__synthesize_thread(comm_event, mmap_event,
+					       fork_event, namespaces_event,
+					       perf_thread_map__pid(threads, thread), 0,
+					       process, tool, machine,
+					       needs_mmap, mmap_data)) {
+			err = -1;
+			break;
+		}
+
+		/*
+		 * comm.pid is set to thread group id by
+		 * perf_event__synthesize_comm
+		 */
+		if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) {
+			bool need_leader = true;
+
+			/* is thread group leader in thread_map? */
+			for (j = 0; j < threads->nr; ++j) {
+				if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) {
+					need_leader = false;
+					break;
+				}
+			}
+
+			/* if not, generate events for it */
+			if (need_leader &&
+			    __event__synthesize_thread(comm_event, mmap_event,
+						       fork_event, namespaces_event,
+						       comm_event->comm.pid, 0,
+						       process, tool, machine,
+						       needs_mmap, mmap_data)) {
+				err = -1;
+				break;
+			}
+		}
+	}
+	free(namespaces_event);
+out_free_fork:
+	free(fork_event);
+out_free_mmap:
+	free(mmap_event);
+out_free_comm:
+	free(comm_event);
+out:
+	return err;
+}
+
+static int __perf_event__synthesize_threads(struct perf_tool *tool,
+					    perf_event__handler_t process,
+					    struct machine *machine,
+					    bool needs_mmap,
+					    bool mmap_data,
+					    struct dirent **dirent,
+					    int start,
+					    int num)
+{
+	union perf_event *comm_event, *mmap_event, *fork_event;
+	union perf_event *namespaces_event;
+	int err = -1;
+	char *end;
+	pid_t pid;
+	int i;
+
+	comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+	if (comm_event == NULL)
+		goto out;
+
+	mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+	if (mmap_event == NULL)
+		goto out_free_comm;
+
+	fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+	if (fork_event == NULL)
+		goto out_free_mmap;
+
+	namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+				  (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+				  machine->id_hdr_size);
+	if (namespaces_event == NULL)
+		goto out_free_fork;
+
+	for (i = start; i < start + num; i++) {
+		if (!isdigit(dirent[i]->d_name[0]))
+			continue;
+
+		pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
+		/* only interested in proper numerical dirents */
+		if (*end)
+			continue;
+		/*
+		 * We may race with exiting thread, so don't stop just because
+		 * one thread couldn't be synthesized.
+		 */
+		__event__synthesize_thread(comm_event, mmap_event, fork_event,
+					   namespaces_event, pid, 1, process,
+					   tool, machine, needs_mmap, mmap_data);
+	}
+	err = 0;
+
+	free(namespaces_event);
+out_free_fork:
+	free(fork_event);
+out_free_mmap:
+	free(mmap_event);
+out_free_comm:
+	free(comm_event);
+out:
+	return err;
+}
+
+struct synthesize_threads_arg {
+	struct perf_tool *tool;
+	perf_event__handler_t process;
+	struct machine *machine;
+	bool needs_mmap;
+	bool mmap_data;
+	struct dirent **dirent;
+	int num;
+	int start;
+};
+
+static void *synthesize_threads_worker(void *arg)
+{
+	struct synthesize_threads_arg *args = arg;
+
+	__perf_event__synthesize_threads(args->tool, args->process,
+					 args->machine,
+					 args->needs_mmap, args->mmap_data,
+					 args->dirent,
+					 args->start, args->num);
+	return NULL;
+}
+
+int perf_event__synthesize_threads(struct perf_tool *tool,
+				   perf_event__handler_t process,
+				   struct machine *machine,
+				   bool needs_mmap, bool mmap_data,
+				   unsigned int nr_threads_synthesize)
+{
+	struct synthesize_threads_arg *args = NULL;
+	pthread_t *synthesize_threads = NULL;
+	char proc_path[PATH_MAX];
+	struct dirent **dirent;
+	int num_per_thread;
+	int m, n, i, j;
+	int thread_nr;
+	int base = 0;
+	int err = -1;
+
+
+	if (machine__is_default_guest(machine))
+		return 0;
+
+	snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
+	n = scandir(proc_path, &dirent, filter_task, NULL);
+	if (n < 0)
+		return err;
+
+	if (nr_threads_synthesize == UINT_MAX)
+		thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
+	else
+		thread_nr = nr_threads_synthesize;
+
+	if (thread_nr <= 1) {
+		err = __perf_event__synthesize_threads(tool, process,
+						       machine,
+						       needs_mmap, mmap_data,
+						       dirent, base, n);
+		goto free_dirent;
+	}
+	if (thread_nr > n)
+		thread_nr = n;
+
+	synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+	if (synthesize_threads == NULL)
+		goto free_dirent;
+
+	args = calloc(sizeof(*args), thread_nr);
+	if (args == NULL)
+		goto free_threads;
+
+	num_per_thread = n / thread_nr;
+	m = n % thread_nr;
+	for (i = 0; i < thread_nr; i++) {
+		args[i].tool = tool;
+		args[i].process = process;
+		args[i].machine = machine;
+		args[i].needs_mmap = needs_mmap;
+		args[i].mmap_data = mmap_data;
+		args[i].dirent = dirent;
+	}
+	for (i = 0; i < m; i++) {
+		args[i].num = num_per_thread + 1;
+		args[i].start = i * args[i].num;
+	}
+	if (i != 0)
+		base = args[i-1].start + args[i-1].num;
+	for (j = i; j < thread_nr; j++) {
+		args[j].num = num_per_thread;
+		args[j].start = base + (j - i) * args[i].num;
+	}
+
+	for (i = 0; i < thread_nr; i++) {
+		if (pthread_create(&synthesize_threads[i], NULL,
+				   synthesize_threads_worker, &args[i]))
+			goto out_join;
+	}
+	err = 0;
+out_join:
+	for (i = 0; i < thread_nr; i++)
+		pthread_join(synthesize_threads[i], NULL);
+	free(args);
+free_threads:
+	free(synthesize_threads);
+free_dirent:
+	for (i = 0; i < n; i++)
+		zfree(&dirent[i]);
+	free(dirent);
+
+	return err;
+}
+
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+					      perf_event__handler_t process __maybe_unused,
+					      struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+						perf_event__handler_t process,
+						struct machine *machine)
+{
+	union perf_event *event;
+	size_t size = symbol_conf.buildid_mmap2 ?
+			sizeof(event->mmap2) : sizeof(event->mmap);
+	struct map *map = machine__kernel_map(machine);
+	struct kmap *kmap;
+	int err;
+
+	if (map == NULL)
+		return -1;
+
+	kmap = map__kmap(map);
+	if (!kmap->ref_reloc_sym)
+		return -1;
+
+	/*
+	 * We should get this from /sys/kernel/sections/.text, but till that is
+	 * available use this, and after it is use this as a fallback for older
+	 * kernels.
+	 */
+	event = zalloc(size + machine->id_hdr_size);
+	if (event == NULL) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for kernel modules\n");
+		return -1;
+	}
+
+	if (machine__is_host(machine)) {
+		/*
+		 * kernel uses PERF_RECORD_MISC_USER for user space maps,
+		 * see kernel/perf_event.c __perf_event_mmap
+		 */
+		event->header.misc = PERF_RECORD_MISC_KERNEL;
+	} else {
+		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+	}
+
+	if (symbol_conf.buildid_mmap2) {
+		size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename),
+				"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+		size = PERF_ALIGN(size, sizeof(u64));
+		event->mmap2.header.type = PERF_RECORD_MMAP2;
+		event->mmap2.header.size = (sizeof(event->mmap2) -
+				(sizeof(event->mmap2.filename) - size) + machine->id_hdr_size);
+		event->mmap2.pgoff = kmap->ref_reloc_sym->addr;
+		event->mmap2.start = map->start;
+		event->mmap2.len   = map->end - event->mmap.start;
+		event->mmap2.pid   = machine->pid;
+
+		perf_record_mmap2__read_build_id(&event->mmap2, machine, true);
+	} else {
+		size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
+				"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+		size = PERF_ALIGN(size, sizeof(u64));
+		event->mmap.header.type = PERF_RECORD_MMAP;
+		event->mmap.header.size = (sizeof(event->mmap) -
+				(sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
+		event->mmap.pgoff = kmap->ref_reloc_sym->addr;
+		event->mmap.start = map->start;
+		event->mmap.len   = map->end - event->mmap.start;
+		event->mmap.pid   = machine->pid;
+	}
+
+	err = perf_tool__process_synth_event(tool, event, machine, process);
+	free(event);
+
+	return err;
+}
+
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	int err;
+
+	err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+	if (err < 0)
+		return err;
+
+	return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+				      struct perf_thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	union perf_event *event;
+	int i, err, size;
+
+	size  = sizeof(event->thread_map);
+	size +=	threads->nr * sizeof(event->thread_map.entries[0]);
+
+	event = zalloc(size);
+	if (!event)
+		return -ENOMEM;
+
+	event->header.type = PERF_RECORD_THREAD_MAP;
+	event->header.size = size;
+	event->thread_map.nr = threads->nr;
+
+	for (i = 0; i < threads->nr; i++) {
+		struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i];
+		char *comm = perf_thread_map__comm(threads, i);
+
+		if (!comm)
+			comm = (char *) "";
+
+		entry->pid = perf_thread_map__pid(threads, i);
+		strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+	}
+
+	err = process(tool, event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+struct synthesize_cpu_map_data {
+	const struct perf_cpu_map *map;
+	int nr;
+	int min_cpu;
+	int max_cpu;
+	int has_any_cpu;
+	int type;
+	size_t size;
+	struct perf_record_cpu_map_data *data;
+};
+
+static void synthesize_cpus(struct synthesize_cpu_map_data *data)
+{
+	data->data->type = PERF_CPU_MAP__CPUS;
+	data->data->cpus_data.nr = data->nr;
+	for (int i = 0; i < data->nr; i++)
+		data->data->cpus_data.cpu[i] = perf_cpu_map__cpu(data->map, i).cpu;
+}
+
+static void synthesize_mask(struct synthesize_cpu_map_data *data)
+{
+	int idx;
+	struct perf_cpu cpu;
+
+	/* Due to padding, the 4bytes per entry mask variant is always smaller. */
+	data->data->type = PERF_CPU_MAP__MASK;
+	data->data->mask32_data.nr = BITS_TO_U32(data->max_cpu);
+	data->data->mask32_data.long_size = 4;
+
+	perf_cpu_map__for_each_cpu(cpu, idx, data->map) {
+		int bit_word = cpu.cpu / 32;
+		u32 bit_mask = 1U << (cpu.cpu & 31);
+
+		data->data->mask32_data.mask[bit_word] |= bit_mask;
+	}
+}
+
+static void synthesize_range_cpus(struct synthesize_cpu_map_data *data)
+{
+	data->data->type = PERF_CPU_MAP__RANGE_CPUS;
+	data->data->range_cpu_data.any_cpu = data->has_any_cpu;
+	data->data->range_cpu_data.start_cpu = data->min_cpu;
+	data->data->range_cpu_data.end_cpu = data->max_cpu;
+}
+
+static void *cpu_map_data__alloc(struct synthesize_cpu_map_data *syn_data,
+				 size_t header_size)
+{
+	size_t size_cpus, size_mask;
+
+	syn_data->nr = perf_cpu_map__nr(syn_data->map);
+	syn_data->has_any_cpu = (perf_cpu_map__cpu(syn_data->map, 0).cpu == -1) ? 1 : 0;
+
+	syn_data->min_cpu = perf_cpu_map__cpu(syn_data->map, syn_data->has_any_cpu).cpu;
+	syn_data->max_cpu = perf_cpu_map__max(syn_data->map).cpu;
+	if (syn_data->max_cpu - syn_data->min_cpu + 1 == syn_data->nr - syn_data->has_any_cpu) {
+		/* A consecutive range of CPUs can be encoded using a range. */
+		assert(sizeof(u16) + sizeof(struct perf_record_range_cpu_map) == sizeof(u64));
+		syn_data->type = PERF_CPU_MAP__RANGE_CPUS;
+		syn_data->size = header_size + sizeof(u64);
+		return zalloc(syn_data->size);
+	}
+
+	size_cpus = sizeof(u16) + sizeof(struct cpu_map_entries) + syn_data->nr * sizeof(u16);
+	/* Due to padding, the 4bytes per entry mask variant is always smaller. */
+	size_mask = sizeof(u16) + sizeof(struct perf_record_mask_cpu_map32) +
+		BITS_TO_U32(syn_data->max_cpu) * sizeof(__u32);
+	if (syn_data->has_any_cpu || size_cpus < size_mask) {
+		/* Follow the CPU map encoding. */
+		syn_data->type = PERF_CPU_MAP__CPUS;
+		syn_data->size = header_size + PERF_ALIGN(size_cpus, sizeof(u64));
+		return zalloc(syn_data->size);
+	}
+	/* Encode using a bitmask. */
+	syn_data->type = PERF_CPU_MAP__MASK;
+	syn_data->size = header_size + PERF_ALIGN(size_mask, sizeof(u64));
+	return zalloc(syn_data->size);
+}
+
+static void cpu_map_data__synthesize(struct synthesize_cpu_map_data *data)
+{
+	switch (data->type) {
+	case PERF_CPU_MAP__CPUS:
+		synthesize_cpus(data);
+		break;
+	case PERF_CPU_MAP__MASK:
+		synthesize_mask(data);
+		break;
+	case PERF_CPU_MAP__RANGE_CPUS:
+		synthesize_range_cpus(data);
+		break;
+	default:
+		break;
+	}
+}
+
+static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map)
+{
+	struct synthesize_cpu_map_data syn_data = { .map = map };
+	struct perf_record_cpu_map *event;
+
+
+	event = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header));
+	if (!event)
+		return NULL;
+
+	syn_data.data = &event->data;
+	event->header.type = PERF_RECORD_CPU_MAP;
+	event->header.size = syn_data.size;
+	cpu_map_data__synthesize(&syn_data);
+	return event;
+}
+
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+				   const struct perf_cpu_map *map,
+				   perf_event__handler_t process,
+				   struct machine *machine)
+{
+	struct perf_record_cpu_map *event;
+	int err;
+
+	event = cpu_map_event__new(map);
+	if (!event)
+		return -ENOMEM;
+
+	err = process(tool, (union perf_event *) event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+				       struct perf_stat_config *config,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	struct perf_record_stat_config *event;
+	int size, i = 0, err;
+
+	size  = sizeof(*event);
+	size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+	event = zalloc(size);
+	if (!event)
+		return -ENOMEM;
+
+	event->header.type = PERF_RECORD_STAT_CONFIG;
+	event->header.size = size;
+	event->nr          = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val)					\
+	event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term;	\
+	event->data[i].val = __val;				\
+	i++;
+
+	ADD(AGGR_MODE,	config->aggr_mode)
+	ADD(INTERVAL,	config->interval)
+	ADD(SCALE,	config->scale)
+
+	WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+		  "stat config terms unbalanced\n");
+#undef ADD
+
+	err = process(tool, (union perf_event *) event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+				struct perf_cpu cpu, u32 thread, u64 id,
+				struct perf_counts_values *count,
+				perf_event__handler_t process,
+				struct machine *machine)
+{
+	struct perf_record_stat event;
+
+	event.header.type = PERF_RECORD_STAT;
+	event.header.size = sizeof(event);
+	event.header.misc = 0;
+
+	event.id        = id;
+	event.cpu       = cpu.cpu;
+	event.thread    = thread;
+	event.val       = count->val;
+	event.ena       = count->ena;
+	event.run       = count->run;
+
+	return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+				      u64 evtime, u64 type,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	struct perf_record_stat_round event;
+
+	event.header.type = PERF_RECORD_STAT_ROUND;
+	event.header.size = sizeof(event);
+	event.header.misc = 0;
+
+	event.time = evtime;
+	event.type = type;
+
+	return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format)
+{
+	size_t sz, result = sizeof(struct perf_record_sample);
+
+	if (type & PERF_SAMPLE_IDENTIFIER)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_IP)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TIME)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_ADDR)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_ID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_CPU)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_PERIOD)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_READ) {
+		result += sizeof(u64);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+			result += sizeof(u64);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+			result += sizeof(u64);
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			sz = sample_read_value_size(read_format);
+			result += sz * sample->read.group.nr;
+		} else {
+			result += sizeof(u64);
+			if (read_format & PERF_FORMAT_LOST)
+				result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		sz = (sample->callchain->nr + 1) * sizeof(u64);
+		result += sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		result += sizeof(u32);
+		result += sample->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+		/* nr, hw_idx */
+		sz += 2 * sizeof(u64);
+		result += sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		if (sample->user_regs.abi) {
+			result += sizeof(u64);
+			sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+			result += sz;
+		} else {
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		sz = sample->user_stack.size;
+		result += sizeof(u64);
+		if (sz) {
+			result += sz;
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT_TYPE)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TRANSACTION)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_REGS_INTR) {
+		if (sample->intr_regs.abi) {
+			result += sizeof(u64);
+			sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+			result += sz;
+		} else {
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_PHYS_ADDR)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_CGROUP)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_CODE_PAGE_SIZE)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_AUX) {
+		result += sizeof(u64);
+		result += sample->aux_sample.size;
+	}
+
+	return result;
+}
+
+void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+					       __u64 *array, u64 type __maybe_unused)
+{
+	*array = data->weight;
+}
+
+static __u64 *copy_read_group_values(__u64 *array, __u64 read_format,
+				     const struct perf_sample *sample)
+{
+	size_t sz = sample_read_value_size(read_format);
+	struct sample_read_value *v = sample->read.group.values;
+
+	sample_read_group__for_each(v, sample->read.group.nr, read_format) {
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		memcpy(array, v, sz);
+		array = (void *)array + sz;
+	}
+	return array;
+}
+
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format,
+				  const struct perf_sample *sample)
+{
+	__u64 *array;
+	size_t sz;
+	/*
+	 * used for cross-endian analysis. See git commit 65014ab3
+	 * for why this goofiness is needed.
+	 */
+	union u64_swap u;
+
+	array = event->sample.array;
+
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		*array = sample->id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_IP) {
+		*array = sample->ip;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u.val32[0] = sample->pid;
+		u.val32[1] = sample->tid;
+		*array = u.val64;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		*array = sample->time;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ADDR) {
+		*array = sample->addr;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		*array = sample->id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		*array = sample->stream_id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+		u.val32[0] = sample->cpu;
+		u.val32[1] = 0;
+		*array = u.val64;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		*array = sample->period;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_READ) {
+		if (read_format & PERF_FORMAT_GROUP)
+			*array = sample->read.group.nr;
+		else
+			*array = sample->read.one.value;
+		array++;
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			*array = sample->read.time_enabled;
+			array++;
+		}
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			*array = sample->read.time_running;
+			array++;
+		}
+
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			array = copy_read_group_values(array, read_format,
+						       sample);
+		} else {
+			*array = sample->read.one.id;
+			array++;
+
+			if (read_format & PERF_FORMAT_LOST) {
+				*array = sample->read.one.lost;
+				array++;
+			}
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		sz = (sample->callchain->nr + 1) * sizeof(u64);
+		memcpy(array, sample->callchain, sz);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		u.val32[0] = sample->raw_size;
+		*array = u.val64;
+		array = (void *)array + sizeof(u32);
+
+		memcpy(array, sample->raw_data, sample->raw_size);
+		array = (void *)array + sample->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+		/* nr, hw_idx */
+		sz += 2 * sizeof(u64);
+		memcpy(array, sample->branch_stack, sz);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		if (sample->user_regs.abi) {
+			*array++ = sample->user_regs.abi;
+			sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+			memcpy(array, sample->user_regs.regs, sz);
+			array = (void *)array + sz;
+		} else {
+			*array++ = 0;
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		sz = sample->user_stack.size;
+		*array++ = sz;
+		if (sz) {
+			memcpy(array, sample->user_stack.data, sz);
+			array = (void *)array + sz;
+			*array++ = sz;
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
+		arch_perf_synthesize_sample_weight(sample, array, type);
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		*array = sample->data_src;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		*array = sample->transaction;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_REGS_INTR) {
+		if (sample->intr_regs.abi) {
+			*array++ = sample->intr_regs.abi;
+			sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+			memcpy(array, sample->intr_regs.regs, sz);
+			array = (void *)array + sz;
+		} else {
+			*array++ = 0;
+		}
+	}
+
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		*array = sample->phys_addr;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CGROUP) {
+		*array = sample->cgroup;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+		*array = sample->data_page_size;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
+		*array = sample->code_page_size;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_AUX) {
+		sz = sample->aux_sample.size;
+		*array++ = sz;
+		memcpy(array, sample->aux_sample.data, sz);
+		array = (void *)array + sz;
+	}
+
+	return 0;
+}
+
+int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample)
+{
+	__u64 *start = array;
+
+	/*
+	 * used for cross-endian analysis. See git commit 65014ab3
+	 * for why this goofiness is needed.
+	 */
+	union u64_swap u;
+
+	if (type & PERF_SAMPLE_TID) {
+		u.val32[0] = sample->pid;
+		u.val32[1] = sample->tid;
+		*array = u.val64;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		*array = sample->time;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		*array = sample->id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		*array = sample->stream_id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+		u.val32[0] = sample->cpu;
+		u.val32[1] = 0;
+		*array = u.val64;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		*array = sample->id;
+		array++;
+	}
+
+	return (void *)array - (void *)start;
+}
+
+int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+				      struct evlist *evlist, struct machine *machine, size_t from)
+{
+	union perf_event *ev;
+	struct evsel *evsel;
+	size_t nr = 0, i = 0, sz, max_nr, n, pos;
+	size_t e1_sz = sizeof(struct id_index_entry);
+	size_t e2_sz = sizeof(struct id_index_entry_2);
+	size_t etot_sz = e1_sz + e2_sz;
+	bool e2_needed = false;
+	int err;
+
+	max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) / etot_sz;
+
+	pos = 0;
+	evlist__for_each_entry(evlist, evsel) {
+		if (pos++ < from)
+			continue;
+		nr += evsel->core.ids;
+	}
+
+	if (!nr)
+		return 0;
+
+	pr_debug2("Synthesizing id index\n");
+
+	n = nr > max_nr ? max_nr : nr;
+	sz = sizeof(struct perf_record_id_index) + n * etot_sz;
+	ev = zalloc(sz);
+	if (!ev)
+		return -ENOMEM;
+
+	sz = sizeof(struct perf_record_id_index) + n * e1_sz;
+
+	ev->id_index.header.type = PERF_RECORD_ID_INDEX;
+	ev->id_index.nr = n;
+
+	pos = 0;
+	evlist__for_each_entry(evlist, evsel) {
+		u32 j;
+
+		if (pos++ < from)
+			continue;
+		for (j = 0; j < evsel->core.ids; j++, i++) {
+			struct id_index_entry *e;
+			struct id_index_entry_2 *e2;
+			struct perf_sample_id *sid;
+
+			if (i >= n) {
+				ev->id_index.header.size = sz + (e2_needed ? n * e2_sz : 0);
+				err = process(tool, ev, NULL, machine);
+				if (err)
+					goto out_err;
+				nr -= n;
+				i = 0;
+				e2_needed = false;
+			}
+
+			e = &ev->id_index.entries[i];
+
+			e->id = evsel->core.id[j];
+
+			sid = evlist__id2sid(evlist, e->id);
+			if (!sid) {
+				free(ev);
+				return -ENOENT;
+			}
+
+			e->idx = sid->idx;
+			e->cpu = sid->cpu.cpu;
+			e->tid = sid->tid;
+
+			if (sid->machine_pid)
+				e2_needed = true;
+
+			e2 = (void *)ev + sz;
+			e2[i].machine_pid = sid->machine_pid;
+			e2[i].vcpu        = sid->vcpu.cpu;
+		}
+	}
+
+	sz = sizeof(struct perf_record_id_index) + nr * e1_sz;
+	ev->id_index.header.size = sz + (e2_needed ? nr * e2_sz : 0);
+	ev->id_index.nr = nr;
+
+	err = process(tool, ev, NULL, machine);
+out_err:
+	free(ev);
+
+	return err;
+}
+
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process,
+				    struct evlist *evlist, struct machine *machine)
+{
+	return __perf_event__synthesize_id_index(tool, process, evlist, machine, 0);
+}
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+				  struct target *target, struct perf_thread_map *threads,
+				  perf_event__handler_t process, bool needs_mmap,
+				  bool data_mmap, unsigned int nr_threads_synthesize)
+{
+	/*
+	 * When perf runs in non-root PID namespace, and the namespace's proc FS
+	 * is not mounted, nsinfo__is_in_root_namespace() returns false.
+	 * In this case, the proc FS is coming for the parent namespace, thus
+	 * perf tool will wrongly gather process info from its parent PID
+	 * namespace.
+	 *
+	 * To avoid the confusion that the perf tool runs in a child PID
+	 * namespace but it synthesizes thread info from its parent PID
+	 * namespace, returns failure with warning.
+	 */
+	if (!nsinfo__is_in_root_namespace()) {
+		pr_err("Perf runs in non-root PID namespace but it tries to ");
+		pr_err("gather process info from its parent PID namespace.\n");
+		pr_err("Please mount the proc file system properly, e.g. ");
+		pr_err("add the option '--mount-proc' for unshare command.\n");
+		return -EPERM;
+	}
+
+	if (target__has_task(target))
+		return perf_event__synthesize_thread_map(tool, threads, process, machine,
+							 needs_mmap, data_mmap);
+	else if (target__has_cpu(target))
+		return perf_event__synthesize_threads(tool, process, machine,
+						      needs_mmap, data_mmap,
+						      nr_threads_synthesize);
+	/* command specified */
+	return 0;
+}
+
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+				struct perf_thread_map *threads, bool needs_mmap,
+				bool data_mmap, unsigned int nr_threads_synthesize)
+{
+	return __machine__synthesize_threads(machine, NULL, target, threads,
+					     perf_event__process, needs_mmap,
+					     data_mmap, nr_threads_synthesize);
+}
+
+static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id)
+{
+	struct perf_record_event_update *ev;
+
+	size += sizeof(*ev);
+	size  = PERF_ALIGN(size, sizeof(u64));
+
+	ev = zalloc(size);
+	if (ev) {
+		ev->header.type = PERF_RECORD_EVENT_UPDATE;
+		ev->header.size = (u16)size;
+		ev->type	= type;
+		ev->id		= id;
+	}
+	return ev;
+}
+
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel,
+					     perf_event__handler_t process)
+{
+	size_t size = strlen(evsel->unit);
+	struct perf_record_event_update *ev;
+	int err;
+
+	ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->core.id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	strlcpy(ev->unit, evsel->unit, size + 1);
+	err = process(tool, (union perf_event *)ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel,
+					      perf_event__handler_t process)
+{
+	struct perf_record_event_update *ev;
+	struct perf_record_event_update_scale *ev_data;
+	int err;
+
+	ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->core.id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	ev->scale.scale = evsel->scale;
+	err = process(tool, (union perf_event *)ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel,
+					     perf_event__handler_t process)
+{
+	struct perf_record_event_update *ev;
+	size_t len = strlen(evsel->name);
+	int err;
+
+	ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->core.id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	strlcpy(ev->name, evsel->name, len + 1);
+	err = process(tool, (union perf_event *)ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
+					     perf_event__handler_t process)
+{
+	struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
+	struct perf_record_event_update *ev;
+	int err;
+
+	ev = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header) + 2 * sizeof(u64));
+	if (!ev)
+		return -ENOMEM;
+
+	syn_data.data = &ev->cpus.cpus;
+	ev->header.type = PERF_RECORD_EVENT_UPDATE;
+	ev->header.size = (u16)syn_data.size;
+	ev->type	= PERF_EVENT_UPDATE__CPUS;
+	ev->id		= evsel->core.id[0];
+	cpu_map_data__synthesize(&syn_data);
+
+	err = process(tool, (union perf_event *)ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist,
+				 perf_event__handler_t process)
+{
+	struct evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->core.ids,
+						  evsel->core.id, process);
+		if (err) {
+			pr_debug("failed to create perf header attribute\n");
+			return err;
+		}
+	}
+
+	return err;
+}
+
+static bool has_unit(struct evsel *evsel)
+{
+	return evsel->unit && *evsel->unit;
+}
+
+static bool has_scale(struct evsel *evsel)
+{
+	return evsel->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list,
+				      perf_event__handler_t process, bool is_pipe)
+{
+	struct evsel *evsel;
+	int err;
+
+	/*
+	 * Synthesize other events stuff not carried within
+	 * attr event - unit, scale, name
+	 */
+	evlist__for_each_entry(evsel_list, evsel) {
+		if (!evsel->supported)
+			continue;
+
+		/*
+		 * Synthesize unit and scale only if it's defined.
+		 */
+		if (has_unit(evsel)) {
+			err = perf_event__synthesize_event_update_unit(tool, evsel, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel unit.\n");
+				return err;
+			}
+		}
+
+		if (has_scale(evsel)) {
+			err = perf_event__synthesize_event_update_scale(tool, evsel, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel evsel.\n");
+				return err;
+			}
+		}
+
+		if (evsel->core.own_cpus) {
+			err = perf_event__synthesize_event_update_cpus(tool, evsel, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel cpus.\n");
+				return err;
+			}
+		}
+
+		/*
+		 * Name is needed only for pipe output,
+		 * perf.data carries event names.
+		 */
+		if (is_pipe) {
+			err = perf_event__synthesize_event_update_name(tool, evsel, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel name.\n");
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr,
+				u32 ids, u64 *id, perf_event__handler_t process)
+{
+	union perf_event *ev;
+	size_t size;
+	int err;
+
+	size = sizeof(struct perf_event_attr);
+	size = PERF_ALIGN(size, sizeof(u64));
+	size += sizeof(struct perf_event_header);
+	size += ids * sizeof(u64);
+
+	ev = zalloc(size);
+
+	if (ev == NULL)
+		return -ENOMEM;
+
+	ev->attr.attr = *attr;
+	memcpy(ev->attr.id, id, ids * sizeof(u64));
+
+	ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
+	ev->attr.header.size = (u16)size;
+
+	if (ev->attr.header.size == size)
+		err = process(tool, ev, NULL, NULL);
+	else
+		err = -E2BIG;
+
+	free(ev);
+
+	return err;
+}
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist,
+					perf_event__handler_t process)
+{
+	union perf_event ev;
+	struct tracing_data *tdata;
+	ssize_t size = 0, aligned_size = 0, padding;
+	struct feat_fd ff;
+
+	/*
+	 * We are going to store the size of the data followed
+	 * by the data contents. Since the fd descriptor is a pipe,
+	 * we cannot seek back to store the size of the data once
+	 * we know it. Instead we:
+	 *
+	 * - write the tracing data to the temp file
+	 * - get/write the data size to pipe
+	 * - write the tracing data from the temp file
+	 *   to the pipe
+	 */
+	tdata = tracing_data_get(&evlist->core.entries, fd, true);
+	if (!tdata)
+		return -1;
+
+	memset(&ev, 0, sizeof(ev));
+
+	ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
+	size = tdata->size;
+	aligned_size = PERF_ALIGN(size, sizeof(u64));
+	padding = aligned_size - size;
+	ev.tracing_data.header.size = sizeof(ev.tracing_data);
+	ev.tracing_data.size = aligned_size;
+
+	process(tool, &ev, NULL, NULL);
+
+	/*
+	 * The put function will copy all the tracing data
+	 * stored in temp file to the pipe.
+	 */
+	tracing_data_put(tdata);
+
+	ff = (struct feat_fd){ .fd = fd };
+	if (write_padded(&ff, NULL, 0, padding))
+		return -1;
+
+	return aligned_size;
+}
+
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc,
+				    perf_event__handler_t process, struct machine *machine)
+{
+	union perf_event ev;
+	size_t len;
+
+	if (!pos->hit)
+		return 0;
+
+	memset(&ev, 0, sizeof(ev));
+
+	len = pos->long_name_len + 1;
+	len = PERF_ALIGN(len, NAME_ALIGN);
+	memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data));
+	ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
+	ev.build_id.header.misc = misc;
+	ev.build_id.pid = machine->pid;
+	ev.build_id.header.size = sizeof(ev.build_id) + len;
+	memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
+
+	return process(tool, &ev, NULL, machine);
+}
+
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool,
+				       struct evlist *evlist, perf_event__handler_t process, bool attrs)
+{
+	int err;
+
+	if (attrs) {
+		err = perf_event__synthesize_attrs(tool, evlist, process);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			return err;
+		}
+	}
+
+	err = perf_event__synthesize_extra_attr(tool, evlist, process, attrs);
+	err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_cpu_map(tool, evlist->core.user_requested_cpus, process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_stat_config(tool, config, process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize config.\n");
+		return err;
+	}
+
+	return 0;
+}
+
+extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
+
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
+				    struct evlist *evlist, perf_event__handler_t process)
+{
+	struct perf_header *header = &session->header;
+	struct perf_record_header_feature *fe;
+	struct feat_fd ff;
+	size_t sz, sz_hdr;
+	int feat, ret;
+
+	sz_hdr = sizeof(fe->header);
+	sz = sizeof(union perf_event);
+	/* get a nice alignment */
+	sz = PERF_ALIGN(sz, page_size);
+
+	memset(&ff, 0, sizeof(ff));
+
+	ff.buf = malloc(sz);
+	if (!ff.buf)
+		return -ENOMEM;
+
+	ff.size = sz - sz_hdr;
+	ff.ph = &session->header;
+
+	for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+		if (!feat_ops[feat].synthesize) {
+			pr_debug("No record header feature for header :%d\n", feat);
+			continue;
+		}
+
+		ff.offset = sizeof(*fe);
+
+		ret = feat_ops[feat].write(&ff, evlist);
+		if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
+			pr_debug("Error writing feature\n");
+			continue;
+		}
+		/* ff.buf may have changed due to realloc in do_write() */
+		fe = ff.buf;
+		memset(fe, 0, sizeof(*fe));
+
+		fe->feat_id = feat;
+		fe->header.type = PERF_RECORD_HEADER_FEATURE;
+		fe->header.size = ff.offset;
+
+		ret = process(tool, ff.buf, NULL, NULL);
+		if (ret) {
+			free(ff.buf);
+			return ret;
+		}
+	}
+
+	/* Send HEADER_LAST_FEATURE mark. */
+	fe = ff.buf;
+	fe->feat_id     = HEADER_LAST_FEATURE;
+	fe->header.type = PERF_RECORD_HEADER_FEATURE;
+	fe->header.size = sizeof(*fe);
+
+	ret = process(tool, ff.buf, NULL, NULL);
+
+	free(ff.buf);
+	return ret;
+}
+
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+				    struct perf_session *session,
+				    struct perf_data *data,
+				    perf_event__handler_t process)
+{
+	int err;
+	int ret = 0;
+	struct evlist *evlist = session->evlist;
+
+	/*
+	 * We need to synthesize events first, because some
+	 * features works on top of them (on report side).
+	 */
+	err = perf_event__synthesize_attrs(tool, evlist, process);
+	if (err < 0) {
+		pr_err("Couldn't synthesize attrs.\n");
+		return err;
+	}
+	ret += err;
+
+	err = perf_event__synthesize_features(tool, session, evlist, process);
+	if (err < 0) {
+		pr_err("Couldn't synthesize features.\n");
+		return err;
+	}
+	ret += err;
+
+	if (have_tracepoints(&evlist->core.entries)) {
+		int fd = perf_data__fd(data);
+
+		/*
+		 * FIXME err <= 0 here actually means that
+		 * there were no tracepoints so its not really
+		 * an error, just that we don't need to
+		 * synthesize anything.  We really have to
+		 * return this more properly and also
+		 * propagate errors that now are calling die()
+		 */
+		err = perf_event__synthesize_tracing_data(tool,	fd, evlist,
+							  process);
+		if (err <= 0) {
+			pr_err("Couldn't record tracing data.\n");
+			return err;
+		}
+		ret += err;
+	}
+
+	return ret;
+}
+
+int parse_synth_opt(char *synth)
+{
+	char *p, *q;
+	int ret = 0;
+
+	if (synth == NULL)
+		return -1;
+
+	for (q = synth; (p = strsep(&q, ",")); p = q) {
+		if (!strcasecmp(p, "no") || !strcasecmp(p, "none"))
+			return 0;
+
+		if (!strcasecmp(p, "all"))
+			return PERF_SYNTH_ALL;
+
+		if (!strcasecmp(p, "task"))
+			ret |= PERF_SYNTH_TASK;
+		else if (!strcasecmp(p, "mmap"))
+			ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP;
+		else if (!strcasecmp(p, "cgroup"))
+			ret |= PERF_SYNTH_CGROUP;
+		else
+			return -1;
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
new file mode 100644
index 000000000..53737d161
--- /dev/null
+++ b/tools/perf/util/synthetic-events.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYNTHETIC_EVENTS_H
+#define __PERF_SYNTHETIC_EVENTS_H
+
+#include <stdbool.h>
+#include <sys/types.h> // pid_t
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <perf/cpumap.h>
+
+struct auxtrace_record;
+struct dso;
+struct evlist;
+struct evsel;
+struct machine;
+struct perf_counts_values;
+struct perf_cpu_map;
+struct perf_data;
+struct perf_event_attr;
+struct perf_event_mmap_page;
+struct perf_sample;
+struct perf_session;
+struct perf_stat_config;
+struct perf_thread_map;
+struct perf_tool;
+struct record_opts;
+struct target;
+
+union perf_event;
+
+enum perf_record_synth {
+	PERF_SYNTH_TASK		= 1 << 0,
+	PERF_SYNTH_MMAP		= 1 << 1,
+	PERF_SYNTH_CGROUP	= 1 << 2,
+
+	/* last element */
+	PERF_SYNTH_MAX		= 1 << 3,
+};
+#define PERF_SYNTH_ALL  (PERF_SYNTH_MAX - 1)
+
+int parse_synth_opt(char *str);
+
+typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event,
+				     struct perf_sample *sample, struct machine *machine);
+
+int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process);
+int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe);
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine);
+int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from);
+int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample);
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
+int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
+int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
+int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
+int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
+pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
+
+int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format);
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+				  struct target *target, struct perf_thread_map *threads,
+				  perf_event__handler_t process, bool needs_mmap, bool data_mmap,
+				  unsigned int nr_threads_synthesize);
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+				struct perf_thread_map *threads, bool needs_mmap, bool data_mmap,
+				unsigned int nr_threads_synthesize);
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool,
+					 struct perf_session *session, perf_event__handler_t process);
+
+#else // HAVE_AUXTRACE_SUPPORT
+
+#include <errno.h>
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+				     struct perf_tool *tool __maybe_unused,
+				     struct perf_session *session __maybe_unused,
+				     perf_event__handler_t process __maybe_unused)
+{
+	return -EINVAL;
+}
+#endif // HAVE_AUXTRACE_SUPPORT
+
+#ifdef HAVE_LIBBPF_SUPPORT
+int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process,
+				      struct machine *machine, struct record_opts *opts);
+#else // HAVE_LIBBPF_SUPPORT
+static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
+						    perf_event__handler_t process __maybe_unused,
+						    struct machine *machine __maybe_unused,
+						    struct record_opts *opts __maybe_unused)
+{
+	return 0;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+				    struct perf_session *session,
+				    struct perf_data *data,
+				    perf_event__handler_t process);
+
+#endif // __PERF_SYNTHETIC_EVENTS_H
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
new file mode 100644
index 000000000..a2e906858
--- /dev/null
+++ b/tools/perf/util/syscalltbl.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * System call table mapper
+ *
+ * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "syscalltbl.h"
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
+#include <string.h>
+#include "string2.h"
+
+#if defined(__x86_64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_x86_64;
+#elif defined(__s390x__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_s390_64;
+#elif defined(__powerpc64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_64;
+#elif defined(__powerpc__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_32;
+#elif defined(__aarch64__)
+#include <asm/syscalls.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_arm64;
+#elif defined(__mips__)
+#include <asm/syscalls_n64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_mips_n64;
+#endif
+
+struct syscall {
+	int id;
+	const char *name;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
+{
+	const char *key = vkey;
+	const struct syscall *entry = ventry;
+
+	return strcmp(key, entry->name);
+}
+
+static int syscallcmp(const void *va, const void *vb)
+{
+	const struct syscall *a = va, *b = vb;
+
+	return strcmp(a->name, b->name);
+}
+
+static int syscalltbl__init_native(struct syscalltbl *tbl)
+{
+	int nr_entries = 0, i, j;
+	struct syscall *entries;
+
+	for (i = 0; i <= syscalltbl_native_max_id; ++i)
+		if (syscalltbl_native[i])
+			++nr_entries;
+
+	entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
+	if (tbl->syscalls.entries == NULL)
+		return -1;
+
+	for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
+		if (syscalltbl_native[i]) {
+			entries[j].name = syscalltbl_native[i];
+			entries[j].id = i;
+			++j;
+		}
+	}
+
+	qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
+	tbl->syscalls.nr_entries = nr_entries;
+	tbl->syscalls.max_id	 = syscalltbl_native_max_id;
+	return 0;
+}
+
+struct syscalltbl *syscalltbl__new(void)
+{
+	struct syscalltbl *tbl = malloc(sizeof(*tbl));
+	if (tbl) {
+		if (syscalltbl__init_native(tbl)) {
+			free(tbl);
+			return NULL;
+		}
+	}
+	return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+	zfree(&tbl->syscalls.entries);
+	free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+{
+	return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+	struct syscall *sc = bsearch(name, tbl->syscalls.entries,
+				     tbl->syscalls.nr_entries, sizeof(*sc),
+				     syscallcmpname);
+
+	return sc ? sc->id : -1;
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	int i;
+	struct syscall *syscalls = tbl->syscalls.entries;
+
+	for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
+		if (strglobmatch(syscalls[i].name, syscall_glob)) {
+			*idx = i;
+			return syscalls[i].id;
+		}
+	}
+
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	*idx = -1;
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+
+#else /* HAVE_SYSCALL_TABLE_SUPPORT */
+
+#include <libaudit.h>
+
+struct syscalltbl *syscalltbl__new(void)
+{
+	struct syscalltbl *tbl = zalloc(sizeof(*tbl));
+	if (tbl)
+		tbl->audit_machine = audit_detect_machine();
+	return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+	free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
+{
+	return audit_syscall_to_name(id, tbl->audit_machine);
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+	return audit_name_to_syscall(name, tbl->audit_machine);
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
+				  const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
+{
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
new file mode 100644
index 000000000..a41d2ca9e
--- /dev/null
+++ b/tools/perf/util/syscalltbl.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYSCALLTBL_H
+#define __PERF_SYSCALLTBL_H
+
+struct syscalltbl {
+	int audit_machine;
+	struct {
+		int max_id;
+		int nr_entries;
+		void *entries;
+	} syscalls;
+};
+
+struct syscalltbl *syscalltbl__new(void);
+void syscalltbl__delete(struct syscalltbl *tbl);
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
+int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+
+#endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
new file mode 100644
index 000000000..0f383418e
--- /dev/null
+++ b/tools/perf/util/target.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helper functions for handling target threads/cpus
+ *
+ * Copyright (C) 2012, LG Electronics, Namhyung Kim <namhyung.kim@lge.com>
+ */
+
+#include "target.h"
+
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+enum target_errno target__validate(struct target *target)
+{
+	enum target_errno ret = TARGET_ERRNO__SUCCESS;
+
+	if (target->pid)
+		target->tid = target->pid;
+
+	/* CPU and PID are mutually exclusive */
+	if (target->tid && target->cpu_list) {
+		target->cpu_list = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__PID_OVERRIDE_CPU;
+	}
+
+	/* UID and PID are mutually exclusive */
+	if (target->tid && target->uid_str) {
+		target->uid_str = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__PID_OVERRIDE_UID;
+	}
+
+	/* UID and CPU are mutually exclusive */
+	if (target->uid_str && target->cpu_list) {
+		target->cpu_list = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__UID_OVERRIDE_CPU;
+	}
+
+	/* PID and SYSTEM are mutually exclusive */
+	if (target->tid && target->system_wide) {
+		target->system_wide = false;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM;
+	}
+
+	/* UID and SYSTEM are mutually exclusive */
+	if (target->uid_str && target->system_wide) {
+		target->system_wide = false;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM;
+	}
+
+	/* BPF and CPU are mutually exclusive */
+	if (target->bpf_str && target->cpu_list) {
+		target->cpu_list = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__BPF_OVERRIDE_CPU;
+	}
+
+	/* BPF and PID/TID are mutually exclusive */
+	if (target->bpf_str && target->tid) {
+		target->tid = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__BPF_OVERRIDE_PID;
+	}
+
+	/* BPF and UID are mutually exclusive */
+	if (target->bpf_str && target->uid_str) {
+		target->uid_str = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__BPF_OVERRIDE_UID;
+	}
+
+	/* BPF and THREADS are mutually exclusive */
+	if (target->bpf_str && target->per_thread) {
+		target->per_thread = false;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__BPF_OVERRIDE_THREAD;
+	}
+
+	/* THREAD and SYSTEM/CPU are mutually exclusive */
+	if (target->per_thread && (target->system_wide || target->cpu_list)) {
+		target->per_thread = false;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD;
+	}
+
+	return ret;
+}
+
+enum target_errno target__parse_uid(struct target *target)
+{
+	struct passwd pwd, *result;
+	char buf[1024];
+	const char *str = target->uid_str;
+
+	target->uid = UINT_MAX;
+	if (str == NULL)
+		return TARGET_ERRNO__SUCCESS;
+
+	/* Try user name first */
+	getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+	if (result == NULL) {
+		/*
+		 * The user name not found. Maybe it's a UID number.
+		 */
+		char *endptr;
+		int uid = strtol(str, &endptr, 10);
+
+		if (*endptr != '\0')
+			return TARGET_ERRNO__INVALID_UID;
+
+		getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+		if (result == NULL)
+			return TARGET_ERRNO__USER_NOT_FOUND;
+	}
+
+	target->uid = result->pw_uid;
+	return TARGET_ERRNO__SUCCESS;
+}
+
+/*
+ * This must have a same ordering as the enum target_errno.
+ */
+static const char *target__error_str[] = {
+	"PID/TID switch overriding CPU",
+	"PID/TID switch overriding UID",
+	"UID switch overriding CPU",
+	"PID/TID switch overriding SYSTEM",
+	"UID switch overriding SYSTEM",
+	"SYSTEM/CPU switch overriding PER-THREAD",
+	"BPF switch overriding CPU",
+	"BPF switch overriding PID/TID",
+	"BPF switch overriding UID",
+	"BPF switch overriding THREAD",
+	"Invalid User: %s",
+	"Problems obtaining information for user %s",
+};
+
+int target__strerror(struct target *target, int errnum,
+			  char *buf, size_t buflen)
+{
+	int idx;
+	const char *msg;
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		str_error_r(errnum, buf, buflen);
+		return 0;
+	}
+
+	if (errnum <  __TARGET_ERRNO__START || errnum >= __TARGET_ERRNO__END)
+		return -1;
+
+	idx = errnum - __TARGET_ERRNO__START;
+	msg = target__error_str[idx];
+
+	switch (errnum) {
+	case TARGET_ERRNO__PID_OVERRIDE_CPU ...
+	     TARGET_ERRNO__BPF_OVERRIDE_THREAD:
+		snprintf(buf, buflen, "%s", msg);
+		break;
+
+	case TARGET_ERRNO__INVALID_UID:
+	case TARGET_ERRNO__USER_NOT_FOUND:
+		snprintf(buf, buflen, msg, target->uid_str);
+		break;
+
+	default:
+		/* cannot reach here */
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
new file mode 100644
index 000000000..880f1af7f
--- /dev/null
+++ b/tools/perf/util/target.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TARGET_H
+#define _PERF_TARGET_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+struct target {
+	const char   *pid;
+	const char   *tid;
+	const char   *cpu_list;
+	const char   *uid_str;
+	const char   *bpf_str;
+	uid_t	     uid;
+	bool	     system_wide;
+	bool	     uses_mmap;
+	bool	     default_per_cpu;
+	bool	     per_thread;
+	bool	     use_bpf;
+	bool	     hybrid;
+	int	     initial_delay;
+	const char   *attr_map;
+};
+
+enum target_errno {
+	TARGET_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__TARGET_ERRNO__START		= -10000,
+
+	/* for target__validate() */
+	TARGET_ERRNO__PID_OVERRIDE_CPU	= __TARGET_ERRNO__START,
+	TARGET_ERRNO__PID_OVERRIDE_UID,
+	TARGET_ERRNO__UID_OVERRIDE_CPU,
+	TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
+	TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
+	TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
+	TARGET_ERRNO__BPF_OVERRIDE_CPU,
+	TARGET_ERRNO__BPF_OVERRIDE_PID,
+	TARGET_ERRNO__BPF_OVERRIDE_UID,
+	TARGET_ERRNO__BPF_OVERRIDE_THREAD,
+
+	/* for target__parse_uid() */
+	TARGET_ERRNO__INVALID_UID,
+	TARGET_ERRNO__USER_NOT_FOUND,
+
+	__TARGET_ERRNO__END,
+};
+
+enum target_errno target__validate(struct target *target);
+enum target_errno target__parse_uid(struct target *target);
+
+int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
+
+static inline bool target__has_task(struct target *target)
+{
+	return target->tid || target->pid || target->uid_str;
+}
+
+static inline bool target__has_cpu(struct target *target)
+{
+	return target->system_wide || target->cpu_list;
+}
+
+static inline bool target__none(struct target *target)
+{
+	return !target__has_task(target) && !target__has_cpu(target);
+}
+
+static inline bool target__enable_on_exec(struct target *target)
+{
+	/*
+	 * Normally enable_on_exec should be set if:
+	 *  1) The tracee process is forked (not attaching to existed task or cpu).
+	 *  2) And initial_delay is not configured.
+	 * Otherwise, we enable tracee events manually.
+	 */
+	return target__none(target) && !target->initial_delay;
+}
+
+static inline bool target__has_per_thread(struct target *target)
+{
+	return target->system_wide && target->per_thread;
+}
+
+static inline bool target__uses_dummy_map(struct target *target)
+{
+	bool use_dummy = false;
+
+	if (target->default_per_cpu)
+		use_dummy = target->per_thread ? true : false;
+	else if (target__has_task(target) ||
+	         (!target__has_cpu(target) && !target->uses_mmap))
+		use_dummy = true;
+	else if (target__has_per_thread(target))
+		use_dummy = true;
+
+	return use_dummy;
+}
+
+#endif /* _PERF_TARGET_H */
diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c
new file mode 100644
index 000000000..e7aa82c06
--- /dev/null
+++ b/tools/perf/util/term.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "term.h"
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+void get_term_dimensions(struct winsize *ws)
+{
+	char *s = getenv("LINES");
+
+	if (s != NULL) {
+		ws->ws_row = atoi(s);
+		s = getenv("COLUMNS");
+		if (s != NULL) {
+			ws->ws_col = atoi(s);
+			if (ws->ws_row && ws->ws_col)
+				return;
+		}
+	}
+#ifdef TIOCGWINSZ
+	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+	    ws->ws_row && ws->ws_col)
+		return;
+#endif
+	ws->ws_row = 25;
+	ws->ws_col = 80;
+}
+
+void set_term_quiet_input(struct termios *old)
+{
+	struct termios tc;
+
+	tcgetattr(0, old);
+	tc = *old;
+	tc.c_lflag &= ~(ICANON | ECHO);
+	tc.c_cc[VMIN] = 0;
+	tc.c_cc[VTIME] = 0;
+	tcsetattr(0, TCSANOW, &tc);
+}
diff --git a/tools/perf/util/term.h b/tools/perf/util/term.h
new file mode 100644
index 000000000..607b170a9
--- /dev/null
+++ b/tools/perf/util/term.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TERM_H
+#define __PERF_TERM_H
+
+struct termios;
+struct winsize;
+
+void get_term_dimensions(struct winsize *ws);
+void set_term_quiet_input(struct termios *old);
+
+#endif /* __PERF_TERM_H */
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
new file mode 100644
index 000000000..1b992bbba
--- /dev/null
+++ b/tools/perf/util/thread-stack.c
@@ -0,0 +1,1240 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * thread-stack.c: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/zalloc.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "thread.h"
+#include "event.h"
+#include "machine.h"
+#include "env.h"
+#include "debug.h"
+#include "symbol.h"
+#include "comm.h"
+#include "call-path.h"
+#include "thread-stack.h"
+
+#define STACK_GROWTH 2048
+
+/*
+ * State of retpoline detection.
+ *
+ * RETPOLINE_NONE: no retpoline detection
+ * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
+ * X86_RETPOLINE_DETECTED: x86 retpoline detected
+ */
+enum retpoline_state_t {
+	RETPOLINE_NONE,
+	X86_RETPOLINE_POSSIBLE,
+	X86_RETPOLINE_DETECTED,
+};
+
+/**
+ * struct thread_stack_entry - thread stack entry.
+ * @ret_addr: return address
+ * @timestamp: timestamp (if known)
+ * @ref: external reference (e.g. db_id of sample)
+ * @branch_count: the branch count when the entry was created
+ * @insn_count: the instruction count when the entry was created
+ * @cyc_count the cycle count when the entry was created
+ * @db_id: id used for db-export
+ * @cp: call path
+ * @no_call: a 'call' was not seen
+ * @trace_end: a 'call' but trace ended
+ * @non_call: a branch but not a 'call' to the start of a different symbol
+ */
+struct thread_stack_entry {
+	u64 ret_addr;
+	u64 timestamp;
+	u64 ref;
+	u64 branch_count;
+	u64 insn_count;
+	u64 cyc_count;
+	u64 db_id;
+	struct call_path *cp;
+	bool no_call;
+	bool trace_end;
+	bool non_call;
+};
+
+/**
+ * struct thread_stack - thread stack constructed from 'call' and 'return'
+ *                       branch samples.
+ * @stack: array that holds the stack
+ * @cnt: number of entries in the stack
+ * @sz: current maximum stack size
+ * @trace_nr: current trace number
+ * @branch_count: running branch count
+ * @insn_count: running  instruction count
+ * @cyc_count running  cycle count
+ * @kernel_start: kernel start address
+ * @last_time: last timestamp
+ * @crp: call/return processor
+ * @comm: current comm
+ * @arr_sz: size of array if this is the first element of an array
+ * @rstate: used to detect retpolines
+ * @br_stack_rb: branch stack (ring buffer)
+ * @br_stack_sz: maximum branch stack size
+ * @br_stack_pos: current position in @br_stack_rb
+ * @mispred_all: mark all branches as mispredicted
+ */
+struct thread_stack {
+	struct thread_stack_entry *stack;
+	size_t cnt;
+	size_t sz;
+	u64 trace_nr;
+	u64 branch_count;
+	u64 insn_count;
+	u64 cyc_count;
+	u64 kernel_start;
+	u64 last_time;
+	struct call_return_processor *crp;
+	struct comm *comm;
+	unsigned int arr_sz;
+	enum retpoline_state_t rstate;
+	struct branch_stack *br_stack_rb;
+	unsigned int br_stack_sz;
+	unsigned int br_stack_pos;
+	bool mispred_all;
+};
+
+/*
+ * Assume pid == tid == 0 identifies the idle task as defined by
+ * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
+ * and therefore requires a stack for each cpu.
+ */
+static inline bool thread_stack__per_cpu(struct thread *thread)
+{
+	return !(thread->tid || thread->pid_);
+}
+
+static int thread_stack__grow(struct thread_stack *ts)
+{
+	struct thread_stack_entry *new_stack;
+	size_t sz, new_sz;
+
+	new_sz = ts->sz + STACK_GROWTH;
+	sz = new_sz * sizeof(struct thread_stack_entry);
+
+	new_stack = realloc(ts->stack, sz);
+	if (!new_stack)
+		return -ENOMEM;
+
+	ts->stack = new_stack;
+	ts->sz = new_sz;
+
+	return 0;
+}
+
+static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
+			      struct call_return_processor *crp,
+			      bool callstack, unsigned int br_stack_sz)
+{
+	int err;
+
+	if (callstack) {
+		err = thread_stack__grow(ts);
+		if (err)
+			return err;
+	}
+
+	if (br_stack_sz) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += br_stack_sz * sizeof(struct branch_entry);
+		ts->br_stack_rb = zalloc(sz);
+		if (!ts->br_stack_rb)
+			return -ENOMEM;
+		ts->br_stack_sz = br_stack_sz;
+	}
+
+	if (thread->maps && thread->maps->machine) {
+		struct machine *machine = thread->maps->machine;
+		const char *arch = perf_env__arch(machine->env);
+
+		ts->kernel_start = machine__kernel_start(machine);
+		if (!strcmp(arch, "x86"))
+			ts->rstate = X86_RETPOLINE_POSSIBLE;
+	} else {
+		ts->kernel_start = 1ULL << 63;
+	}
+	ts->crp = crp;
+
+	return 0;
+}
+
+static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
+					      struct call_return_processor *crp,
+					      bool callstack,
+					      unsigned int br_stack_sz)
+{
+	struct thread_stack *ts = thread->ts, *new_ts;
+	unsigned int old_sz = ts ? ts->arr_sz : 0;
+	unsigned int new_sz = 1;
+
+	if (thread_stack__per_cpu(thread) && cpu > 0)
+		new_sz = roundup_pow_of_two(cpu + 1);
+
+	if (!ts || new_sz > old_sz) {
+		new_ts = calloc(new_sz, sizeof(*ts));
+		if (!new_ts)
+			return NULL;
+		if (ts)
+			memcpy(new_ts, ts, old_sz * sizeof(*ts));
+		new_ts->arr_sz = new_sz;
+		zfree(&thread->ts);
+		thread->ts = new_ts;
+		ts = new_ts;
+	}
+
+	if (thread_stack__per_cpu(thread) && cpu > 0 &&
+	    (unsigned int)cpu < ts->arr_sz)
+		ts += cpu;
+
+	if (!ts->stack &&
+	    thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
+		return NULL;
+
+	return ts;
+}
+
+static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
+{
+	struct thread_stack *ts = thread->ts;
+
+	if (cpu < 0)
+		cpu = 0;
+
+	if (!ts || (unsigned int)cpu >= ts->arr_sz)
+		return NULL;
+
+	ts += cpu;
+
+	if (!ts->stack)
+		return NULL;
+
+	return ts;
+}
+
+static inline struct thread_stack *thread__stack(struct thread *thread,
+						    int cpu)
+{
+	if (!thread)
+		return NULL;
+
+	if (thread_stack__per_cpu(thread))
+		return thread__cpu_stack(thread, cpu);
+
+	return thread->ts;
+}
+
+static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
+			      bool trace_end)
+{
+	int err = 0;
+
+	if (ts->cnt == ts->sz) {
+		err = thread_stack__grow(ts);
+		if (err) {
+			pr_warning("Out of memory: discarding thread stack\n");
+			ts->cnt = 0;
+		}
+	}
+
+	ts->stack[ts->cnt].trace_end = trace_end;
+	ts->stack[ts->cnt++].ret_addr = ret_addr;
+
+	return err;
+}
+
+static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
+{
+	size_t i;
+
+	/*
+	 * In some cases there may be functions which are not seen to return.
+	 * For example when setjmp / longjmp has been used.  Or the perf context
+	 * switch in the kernel which doesn't stop and start tracing in exactly
+	 * the same code path.  When that happens the return address will be
+	 * further down the stack.  If the return address is not found at all,
+	 * we assume the opposite (i.e. this is a return for a call that wasn't
+	 * seen for some reason) and leave the stack alone.
+	 */
+	for (i = ts->cnt; i; ) {
+		if (ts->stack[--i].ret_addr == ret_addr) {
+			ts->cnt = i;
+			return;
+		}
+	}
+}
+
+static void thread_stack__pop_trace_end(struct thread_stack *ts)
+{
+	size_t i;
+
+	for (i = ts->cnt; i; ) {
+		if (ts->stack[--i].trace_end)
+			ts->cnt = i;
+		else
+			return;
+	}
+}
+
+static bool thread_stack__in_kernel(struct thread_stack *ts)
+{
+	if (!ts->cnt)
+		return false;
+
+	return ts->stack[ts->cnt - 1].cp->in_kernel;
+}
+
+static int thread_stack__call_return(struct thread *thread,
+				     struct thread_stack *ts, size_t idx,
+				     u64 timestamp, u64 ref, bool no_return)
+{
+	struct call_return_processor *crp = ts->crp;
+	struct thread_stack_entry *tse;
+	struct call_return cr = {
+		.thread = thread,
+		.comm = ts->comm,
+		.db_id = 0,
+	};
+	u64 *parent_db_id;
+
+	tse = &ts->stack[idx];
+	cr.cp = tse->cp;
+	cr.call_time = tse->timestamp;
+	cr.return_time = timestamp;
+	cr.branch_count = ts->branch_count - tse->branch_count;
+	cr.insn_count = ts->insn_count - tse->insn_count;
+	cr.cyc_count = ts->cyc_count - tse->cyc_count;
+	cr.db_id = tse->db_id;
+	cr.call_ref = tse->ref;
+	cr.return_ref = ref;
+	if (tse->no_call)
+		cr.flags |= CALL_RETURN_NO_CALL;
+	if (no_return)
+		cr.flags |= CALL_RETURN_NO_RETURN;
+	if (tse->non_call)
+		cr.flags |= CALL_RETURN_NON_CALL;
+
+	/*
+	 * The parent db_id must be assigned before exporting the child. Note
+	 * it is not possible to export the parent first because its information
+	 * is not yet complete because its 'return' has not yet been processed.
+	 */
+	parent_db_id = idx ? &(tse - 1)->db_id : NULL;
+
+	return crp->process(&cr, parent_db_id, crp->data);
+}
+
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+{
+	struct call_return_processor *crp = ts->crp;
+	int err;
+
+	if (!crp) {
+		ts->cnt = 0;
+		ts->br_stack_pos = 0;
+		if (ts->br_stack_rb)
+			ts->br_stack_rb->nr = 0;
+		return 0;
+	}
+
+	while (ts->cnt) {
+		err = thread_stack__call_return(thread, ts, --ts->cnt,
+						ts->last_time, 0, true);
+		if (err) {
+			pr_err("Error flushing thread stack!\n");
+			ts->cnt = 0;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int thread_stack__flush(struct thread *thread)
+{
+	struct thread_stack *ts = thread->ts;
+	unsigned int pos;
+	int err = 0;
+
+	if (ts) {
+		for (pos = 0; pos < ts->arr_sz; pos++) {
+			int ret = __thread_stack__flush(thread, ts + pos);
+
+			if (ret)
+				err = ret;
+		}
+	}
+
+	return err;
+}
+
+static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
+					  u64 from_ip, u64 to_ip)
+{
+	struct branch_stack *bs = ts->br_stack_rb;
+	struct branch_entry *be;
+
+	if (!ts->br_stack_pos)
+		ts->br_stack_pos = ts->br_stack_sz;
+
+	ts->br_stack_pos -= 1;
+
+	be              = &bs->entries[ts->br_stack_pos];
+	be->from        = from_ip;
+	be->to          = to_ip;
+	be->flags.value = 0;
+	be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
+	be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
+	/* No support for mispredict */
+	be->flags.mispred = ts->mispred_all;
+
+	if (bs->nr < ts->br_stack_sz)
+		bs->nr += 1;
+}
+
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
+			u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+			unsigned int br_stack_sz, bool mispred_all)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+
+	if (!thread)
+		return -EINVAL;
+
+	if (!ts) {
+		ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
+		if (!ts) {
+			pr_warning("Out of memory: no thread stack\n");
+			return -ENOMEM;
+		}
+		ts->trace_nr = trace_nr;
+		ts->mispred_all = mispred_all;
+	}
+
+	/*
+	 * When the trace is discontinuous, the trace_nr changes.  In that case
+	 * the stack might be completely invalid.  Better to report nothing than
+	 * to report something misleading, so flush the stack.
+	 */
+	if (trace_nr != ts->trace_nr) {
+		if (ts->trace_nr)
+			__thread_stack__flush(thread, ts);
+		ts->trace_nr = trace_nr;
+	}
+
+	if (br_stack_sz)
+		thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
+
+	/*
+	 * Stop here if thread_stack__process() is in use, or not recording call
+	 * stack.
+	 */
+	if (ts->crp || !callstack)
+		return 0;
+
+	if (flags & PERF_IP_FLAG_CALL) {
+		u64 ret_addr;
+
+		if (!to_ip)
+			return 0;
+		ret_addr = from_ip + insn_len;
+		if (ret_addr == to_ip)
+			return 0; /* Zero-length calls are excluded */
+		return thread_stack__push(ts, ret_addr,
+					  flags & PERF_IP_FLAG_TRACE_END);
+	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		/*
+		 * If the caller did not change the trace number (which would
+		 * have flushed the stack) then try to make sense of the stack.
+		 * Possibly, tracing began after returning to the current
+		 * address, so try to pop that. Also, do not expect a call made
+		 * when the trace ended, to return, so pop that.
+		 */
+		thread_stack__pop(ts, to_ip);
+		thread_stack__pop_trace_end(ts);
+	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
+		thread_stack__pop(ts, to_ip);
+	}
+
+	return 0;
+}
+
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+
+	if (!ts)
+		return;
+
+	if (trace_nr != ts->trace_nr) {
+		if (ts->trace_nr)
+			__thread_stack__flush(thread, ts);
+		ts->trace_nr = trace_nr;
+	}
+}
+
+static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
+{
+	__thread_stack__flush(thread, ts);
+	zfree(&ts->stack);
+	zfree(&ts->br_stack_rb);
+}
+
+static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
+{
+	unsigned int arr_sz = ts->arr_sz;
+
+	__thread_stack__free(thread, ts);
+	memset(ts, 0, sizeof(*ts));
+	ts->arr_sz = arr_sz;
+}
+
+void thread_stack__free(struct thread *thread)
+{
+	struct thread_stack *ts = thread->ts;
+	unsigned int pos;
+
+	if (ts) {
+		for (pos = 0; pos < ts->arr_sz; pos++)
+			__thread_stack__free(thread, ts + pos);
+		zfree(&thread->ts);
+	}
+}
+
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
+{
+	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
+
+void thread_stack__sample(struct thread *thread, int cpu,
+			  struct ip_callchain *chain,
+			  size_t sz, u64 ip, u64 kernel_start)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+	u64 context = callchain_context(ip, kernel_start);
+	u64 last_context;
+	size_t i, j;
+
+	if (sz < 2) {
+		chain->nr = 0;
+		return;
+	}
+
+	chain->ips[0] = context;
+	chain->ips[1] = ip;
+
+	if (!ts) {
+		chain->nr = 2;
+		return;
+	}
+
+	last_context = context;
+
+	for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
+		ip = ts->stack[ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context != last_context) {
+			if (i >= sz - 1)
+				break;
+			chain->ips[i++] = context;
+			last_context = context;
+		}
+		chain->ips[i] = ip;
+	}
+
+	chain->nr = i;
+}
+
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the call chain.
+ */
+void thread_stack__sample_late(struct thread *thread, int cpu,
+			       struct ip_callchain *chain, size_t sz,
+			       u64 sample_ip, u64 kernel_start)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+	u64 sample_context = callchain_context(sample_ip, kernel_start);
+	u64 last_context, context, ip;
+	size_t nr = 0, j;
+
+	if (sz < 2) {
+		chain->nr = 0;
+		return;
+	}
+
+	if (!ts)
+		goto out;
+
+	/*
+	 * When tracing kernel space, kernel addresses occur at the top of the
+	 * call chain after the event occurred but before tracing stopped.
+	 * Skip them.
+	 */
+	for (j = 1; j <= ts->cnt; j++) {
+		ip = ts->stack[ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context == PERF_CONTEXT_USER ||
+		    (context == sample_context && ip == sample_ip))
+			break;
+	}
+
+	last_context = sample_ip; /* Use sample_ip as an invalid context */
+
+	for (; nr < sz && j <= ts->cnt; nr++, j++) {
+		ip = ts->stack[ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context != last_context) {
+			if (nr >= sz - 1)
+				break;
+			chain->ips[nr++] = context;
+			last_context = context;
+		}
+		chain->ips[nr] = ip;
+	}
+out:
+	if (nr) {
+		chain->nr = nr;
+	} else {
+		chain->ips[0] = sample_context;
+		chain->ips[1] = sample_ip;
+		chain->nr = 2;
+	}
+}
+
+void thread_stack__br_sample(struct thread *thread, int cpu,
+			     struct branch_stack *dst, unsigned int sz)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+	const size_t bsz = sizeof(struct branch_entry);
+	struct branch_stack *src;
+	struct branch_entry *be;
+	unsigned int nr;
+
+	dst->nr = 0;
+
+	if (!ts)
+		return;
+
+	src = ts->br_stack_rb;
+	if (!src->nr)
+		return;
+
+	dst->nr = min((unsigned int)src->nr, sz);
+
+	be = &dst->entries[0];
+	nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
+	memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
+
+	if (src->nr >= ts->br_stack_sz) {
+		sz -= nr;
+		be = &dst->entries[nr];
+		nr = min(ts->br_stack_pos, sz);
+		memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
+	}
+}
+
+/* Start of user space branch entries */
+static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
+{
+	if (!*start)
+		*start = be->to && be->to < kernel_start;
+
+	return *start;
+}
+
+/*
+ * Start of branch entries after the ip fell in between 2 branches, or user
+ * space branch entries.
+ */
+static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
+		     bool *start, struct branch_entry *nb)
+{
+	if (!*start) {
+		*start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
+			 be->from < kernel_start ||
+			 (be->to && be->to < kernel_start);
+	}
+
+	return *start;
+}
+
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the branch stack.
+ */
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+				  struct branch_stack *dst, unsigned int sz,
+				  u64 ip, u64 kernel_start)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+	struct branch_entry *d, *s, *spos, *ssz;
+	struct branch_stack *src;
+	unsigned int nr = 0;
+	bool start = false;
+
+	dst->nr = 0;
+
+	if (!ts)
+		return;
+
+	src = ts->br_stack_rb;
+	if (!src->nr)
+		return;
+
+	spos = &src->entries[ts->br_stack_pos];
+	ssz  = &src->entries[ts->br_stack_sz];
+
+	d = &dst->entries[0];
+	s = spos;
+
+	if (ip < kernel_start) {
+		/*
+		 * User space sample: start copying branch entries when the
+		 * branch is in user space.
+		 */
+		for (s = spos; s < ssz && nr < sz; s++) {
+			if (us_start(s, kernel_start, &start)) {
+				*d++ = *s;
+				nr += 1;
+			}
+		}
+
+		if (src->nr >= ts->br_stack_sz) {
+			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+				if (us_start(s, kernel_start, &start)) {
+					*d++ = *s;
+					nr += 1;
+				}
+			}
+		}
+	} else {
+		struct branch_entry *nb = NULL;
+
+		/*
+		 * Kernel space sample: start copying branch entries when the ip
+		 * falls in between 2 branches (or the branch is in user space
+		 * because then the start must have been missed).
+		 */
+		for (s = spos; s < ssz && nr < sz; s++) {
+			if (ks_start(s, ip, kernel_start, &start, nb)) {
+				*d++ = *s;
+				nr += 1;
+			}
+			nb = s;
+		}
+
+		if (src->nr >= ts->br_stack_sz) {
+			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+				if (ks_start(s, ip, kernel_start, &start, nb)) {
+					*d++ = *s;
+					nr += 1;
+				}
+				nb = s;
+			}
+		}
+	}
+
+	dst->nr = nr;
+}
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
+			   void *data)
+{
+	struct call_return_processor *crp;
+
+	crp = zalloc(sizeof(struct call_return_processor));
+	if (!crp)
+		return NULL;
+	crp->cpr = call_path_root__new();
+	if (!crp->cpr)
+		goto out_free;
+	crp->process = process;
+	crp->data = data;
+	return crp;
+
+out_free:
+	free(crp);
+	return NULL;
+}
+
+void call_return_processor__free(struct call_return_processor *crp)
+{
+	if (crp) {
+		call_path_root__free(crp->cpr);
+		free(crp);
+	}
+}
+
+static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
+				 u64 timestamp, u64 ref, struct call_path *cp,
+				 bool no_call, bool trace_end)
+{
+	struct thread_stack_entry *tse;
+	int err;
+
+	if (!cp)
+		return -ENOMEM;
+
+	if (ts->cnt == ts->sz) {
+		err = thread_stack__grow(ts);
+		if (err)
+			return err;
+	}
+
+	tse = &ts->stack[ts->cnt++];
+	tse->ret_addr = ret_addr;
+	tse->timestamp = timestamp;
+	tse->ref = ref;
+	tse->branch_count = ts->branch_count;
+	tse->insn_count = ts->insn_count;
+	tse->cyc_count = ts->cyc_count;
+	tse->cp = cp;
+	tse->no_call = no_call;
+	tse->trace_end = trace_end;
+	tse->non_call = false;
+	tse->db_id = 0;
+
+	return 0;
+}
+
+static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
+				u64 ret_addr, u64 timestamp, u64 ref,
+				struct symbol *sym)
+{
+	int err;
+
+	if (!ts->cnt)
+		return 1;
+
+	if (ts->cnt == 1) {
+		struct thread_stack_entry *tse = &ts->stack[0];
+
+		if (tse->cp->sym == sym)
+			return thread_stack__call_return(thread, ts, --ts->cnt,
+							 timestamp, ref, false);
+	}
+
+	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
+	    !ts->stack[ts->cnt - 1].non_call) {
+		return thread_stack__call_return(thread, ts, --ts->cnt,
+						 timestamp, ref, false);
+	} else {
+		size_t i = ts->cnt - 1;
+
+		while (i--) {
+			if (ts->stack[i].ret_addr != ret_addr ||
+			    ts->stack[i].non_call)
+				continue;
+			i += 1;
+			while (ts->cnt > i) {
+				err = thread_stack__call_return(thread, ts,
+								--ts->cnt,
+								timestamp, ref,
+								true);
+				if (err)
+					return err;
+			}
+			return thread_stack__call_return(thread, ts, --ts->cnt,
+							 timestamp, ref, false);
+		}
+	}
+
+	return 1;
+}
+
+static int thread_stack__bottom(struct thread_stack *ts,
+				struct perf_sample *sample,
+				struct addr_location *from_al,
+				struct addr_location *to_al, u64 ref)
+{
+	struct call_path_root *cpr = ts->crp->cpr;
+	struct call_path *cp;
+	struct symbol *sym;
+	u64 ip;
+
+	if (sample->ip) {
+		ip = sample->ip;
+		sym = from_al->sym;
+	} else if (sample->addr) {
+		ip = sample->addr;
+		sym = to_al->sym;
+	} else {
+		return 0;
+	}
+
+	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
+				ts->kernel_start);
+
+	return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
+				     true, false);
+}
+
+static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
+				struct perf_sample *sample, u64 ref)
+{
+	u64 tm = sample->time;
+	int err;
+
+	/* Return to userspace, so pop all kernel addresses */
+	while (thread_stack__in_kernel(ts)) {
+		err = thread_stack__call_return(thread, ts, --ts->cnt,
+						tm, ref, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int thread_stack__no_call_return(struct thread *thread,
+					struct thread_stack *ts,
+					struct perf_sample *sample,
+					struct addr_location *from_al,
+					struct addr_location *to_al, u64 ref)
+{
+	struct call_path_root *cpr = ts->crp->cpr;
+	struct call_path *root = &cpr->call_path;
+	struct symbol *fsym = from_al->sym;
+	struct symbol *tsym = to_al->sym;
+	struct call_path *cp, *parent;
+	u64 ks = ts->kernel_start;
+	u64 addr = sample->addr;
+	u64 tm = sample->time;
+	u64 ip = sample->ip;
+	int err;
+
+	if (ip >= ks && addr < ks) {
+		/* Return to userspace, so pop all kernel addresses */
+		err = thread_stack__pop_ks(thread, ts, sample, ref);
+		if (err)
+			return err;
+
+		/* If the stack is empty, push the userspace address */
+		if (!ts->cnt) {
+			cp = call_path__findnew(cpr, root, tsym, addr, ks);
+			return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
+						     false);
+		}
+	} else if (thread_stack__in_kernel(ts) && ip < ks) {
+		/* Return to userspace, so pop all kernel addresses */
+		err = thread_stack__pop_ks(thread, ts, sample, ref);
+		if (err)
+			return err;
+	}
+
+	if (ts->cnt)
+		parent = ts->stack[ts->cnt - 1].cp;
+	else
+		parent = root;
+
+	if (parent->sym == from_al->sym) {
+		/*
+		 * At the bottom of the stack, assume the missing 'call' was
+		 * before the trace started. So, pop the current symbol and push
+		 * the 'to' symbol.
+		 */
+		if (ts->cnt == 1) {
+			err = thread_stack__call_return(thread, ts, --ts->cnt,
+							tm, ref, false);
+			if (err)
+				return err;
+		}
+
+		if (!ts->cnt) {
+			cp = call_path__findnew(cpr, root, tsym, addr, ks);
+
+			return thread_stack__push_cp(ts, addr, tm, ref, cp,
+						     true, false);
+		}
+
+		/*
+		 * Otherwise assume the 'return' is being used as a jump (e.g.
+		 * retpoline) and just push the 'to' symbol.
+		 */
+		cp = call_path__findnew(cpr, parent, tsym, addr, ks);
+
+		err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
+		if (!err)
+			ts->stack[ts->cnt - 1].non_call = true;
+
+		return err;
+	}
+
+	/*
+	 * Assume 'parent' has not yet returned, so push 'to', and then push and
+	 * pop 'from'.
+	 */
+
+	cp = call_path__findnew(cpr, parent, tsym, addr, ks);
+
+	err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
+	if (err)
+		return err;
+
+	cp = call_path__findnew(cpr, cp, fsym, ip, ks);
+
+	err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
+	if (err)
+		return err;
+
+	return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
+}
+
+static int thread_stack__trace_begin(struct thread *thread,
+				     struct thread_stack *ts, u64 timestamp,
+				     u64 ref)
+{
+	struct thread_stack_entry *tse;
+	int err;
+
+	if (!ts->cnt)
+		return 0;
+
+	/* Pop trace end */
+	tse = &ts->stack[ts->cnt - 1];
+	if (tse->trace_end) {
+		err = thread_stack__call_return(thread, ts, --ts->cnt,
+						timestamp, ref, false);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int thread_stack__trace_end(struct thread_stack *ts,
+				   struct perf_sample *sample, u64 ref)
+{
+	struct call_path_root *cpr = ts->crp->cpr;
+	struct call_path *cp;
+	u64 ret_addr;
+
+	/* No point having 'trace end' on the bottom of the stack */
+	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
+		return 0;
+
+	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
+				ts->kernel_start);
+
+	ret_addr = sample->ip + sample->insn_len;
+
+	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
+				     false, true);
+}
+
+static bool is_x86_retpoline(const char *name)
+{
+	const char *p = strstr(name, "__x86_indirect_thunk_");
+
+	return p == name || !strcmp(name, "__indirect_thunk_start");
+}
+
+/*
+ * x86 retpoline functions pollute the call graph. This function removes them.
+ * This does not handle function return thunks, nor is there any improvement
+ * for the handling of inline thunks or extern thunks.
+ */
+static int thread_stack__x86_retpoline(struct thread_stack *ts,
+				       struct perf_sample *sample,
+				       struct addr_location *to_al)
+{
+	struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
+	struct call_path_root *cpr = ts->crp->cpr;
+	struct symbol *sym = tse->cp->sym;
+	struct symbol *tsym = to_al->sym;
+	struct call_path *cp;
+
+	if (sym && is_x86_retpoline(sym->name)) {
+		/*
+		 * This is a x86 retpoline fn. It pollutes the call graph by
+		 * showing up everywhere there is an indirect branch, but does
+		 * not itself mean anything. Here the top-of-stack is removed,
+		 * by decrementing the stack count, and then further down, the
+		 * resulting top-of-stack is replaced with the actual target.
+		 * The result is that the retpoline functions will no longer
+		 * appear in the call graph. Note this only affects the call
+		 * graph, since all the original branches are left unchanged.
+		 */
+		ts->cnt -= 1;
+		sym = ts->stack[ts->cnt - 2].cp->sym;
+		if (sym && sym == tsym && to_al->addr != tsym->start) {
+			/*
+			 * Target is back to the middle of the symbol we came
+			 * from so assume it is an indirect jmp and forget it
+			 * altogether.
+			 */
+			ts->cnt -= 1;
+			return 0;
+		}
+	} else if (sym && sym == tsym) {
+		/*
+		 * Target is back to the symbol we came from so assume it is an
+		 * indirect jmp and forget it altogether.
+		 */
+		ts->cnt -= 1;
+		return 0;
+	}
+
+	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
+				sample->addr, ts->kernel_start);
+	if (!cp)
+		return -ENOMEM;
+
+	/* Replace the top-of-stack with the actual target */
+	ts->stack[ts->cnt - 1].cp = cp;
+
+	return 0;
+}
+
+int thread_stack__process(struct thread *thread, struct comm *comm,
+			  struct perf_sample *sample,
+			  struct addr_location *from_al,
+			  struct addr_location *to_al, u64 ref,
+			  struct call_return_processor *crp)
+{
+	struct thread_stack *ts = thread__stack(thread, sample->cpu);
+	enum retpoline_state_t rstate;
+	int err = 0;
+
+	if (ts && !ts->crp) {
+		/* Supersede thread_stack__event() */
+		thread_stack__reset(thread, ts);
+		ts = NULL;
+	}
+
+	if (!ts) {
+		ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
+		if (!ts)
+			return -ENOMEM;
+		ts->comm = comm;
+	}
+
+	rstate = ts->rstate;
+	if (rstate == X86_RETPOLINE_DETECTED)
+		ts->rstate = X86_RETPOLINE_POSSIBLE;
+
+	/* Flush stack on exec */
+	if (ts->comm != comm && thread->pid_ == thread->tid) {
+		err = __thread_stack__flush(thread, ts);
+		if (err)
+			return err;
+		ts->comm = comm;
+	}
+
+	/* If the stack is empty, put the current symbol on the stack */
+	if (!ts->cnt) {
+		err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
+		if (err)
+			return err;
+	}
+
+	ts->branch_count += 1;
+	ts->insn_count += sample->insn_cnt;
+	ts->cyc_count += sample->cyc_cnt;
+	ts->last_time = sample->time;
+
+	if (sample->flags & PERF_IP_FLAG_CALL) {
+		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
+		struct call_path_root *cpr = ts->crp->cpr;
+		struct call_path *cp;
+		u64 ret_addr;
+
+		if (!sample->ip || !sample->addr)
+			return 0;
+
+		ret_addr = sample->ip + sample->insn_len;
+		if (ret_addr == sample->addr)
+			return 0; /* Zero-length calls are excluded */
+
+		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+					to_al->sym, sample->addr,
+					ts->kernel_start);
+		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
+					    cp, false, trace_end);
+
+		/*
+		 * A call to the same symbol but not the start of the symbol,
+		 * may be the start of a x86 retpoline.
+		 */
+		if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
+		    from_al->sym == to_al->sym &&
+		    to_al->addr != to_al->sym->start)
+			ts->rstate = X86_RETPOLINE_DETECTED;
+
+	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
+		if (!sample->addr) {
+			u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
+						 PERF_IP_FLAG_INTERRUPT;
+
+			if (!(sample->flags & return_from_kernel))
+				return 0;
+
+			/* Pop kernel stack */
+			return thread_stack__pop_ks(thread, ts, sample, ref);
+		}
+
+		if (!sample->ip)
+			return 0;
+
+		/* x86 retpoline 'return' doesn't match the stack */
+		if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
+		    ts->stack[ts->cnt - 1].ret_addr != sample->addr)
+			return thread_stack__x86_retpoline(ts, sample, to_al);
+
+		err = thread_stack__pop_cp(thread, ts, sample->addr,
+					   sample->time, ref, from_al->sym);
+		if (err) {
+			if (err < 0)
+				return err;
+			err = thread_stack__no_call_return(thread, ts, sample,
+							   from_al, to_al, ref);
+		}
+	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
+	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
+		err = thread_stack__trace_end(ts, sample, ref);
+	} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
+		   from_al->sym != to_al->sym && to_al->sym &&
+		   to_al->addr == to_al->sym->start) {
+		struct call_path_root *cpr = ts->crp->cpr;
+		struct call_path *cp;
+
+		/*
+		 * The compiler might optimize a call/ret combination by making
+		 * it a jmp. Make that visible by recording on the stack a
+		 * branch to the start of a different symbol. Note, that means
+		 * when a ret pops the stack, all jmps must be popped off first.
+		 */
+		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+					to_al->sym, sample->addr,
+					ts->kernel_start);
+		err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
+					    false);
+		if (!err)
+			ts->stack[ts->cnt - 1].non_call = true;
+	}
+
+	return err;
+}
+
+size_t thread_stack__depth(struct thread *thread, int cpu)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+
+	if (!ts)
+		return 0;
+	return ts->cnt;
+}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
new file mode 100644
index 000000000..b3cd09beb
--- /dev/null
+++ b/tools/perf/util/thread-stack.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * thread-stack.h: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#ifndef __PERF_THREAD_STACK_H
+#define __PERF_THREAD_STACK_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+struct thread;
+struct comm;
+struct ip_callchain;
+struct symbol;
+struct dso;
+struct perf_sample;
+struct addr_location;
+struct call_path;
+
+/*
+ * Call/Return flags.
+ *
+ * CALL_RETURN_NO_CALL: 'return' but no matching 'call'
+ * CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
+ * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different
+ *                       symbol
+ */
+enum {
+	CALL_RETURN_NO_CALL	= 1 << 0,
+	CALL_RETURN_NO_RETURN	= 1 << 1,
+	CALL_RETURN_NON_CALL	= 1 << 2,
+};
+
+/**
+ * struct call_return - paired call/return information.
+ * @thread: thread in which call/return occurred
+ * @comm: comm in which call/return occurred
+ * @cp: call path
+ * @call_time: timestamp of call (if known)
+ * @return_time: timestamp of return (if known)
+ * @branch_count: number of branches seen between call and return
+ * @insn_count: approx. number of instructions between call and return
+ * @cyc_count: approx. number of cycles between call and return
+ * @call_ref: external reference to 'call' sample (e.g. db_id)
+ * @return_ref:  external reference to 'return' sample (e.g. db_id)
+ * @db_id: id used for db-export
+ * @parent_db_id: id of parent call used for db-export
+ * @flags: Call/Return flags
+ */
+struct call_return {
+	struct thread *thread;
+	struct comm *comm;
+	struct call_path *cp;
+	u64 call_time;
+	u64 return_time;
+	u64 branch_count;
+	u64 insn_count;
+	u64 cyc_count;
+	u64 call_ref;
+	u64 return_ref;
+	u64 db_id;
+	u64 parent_db_id;
+	u32 flags;
+};
+
+/**
+ * struct call_return_processor - provides a call-back to consume call-return
+ *                                information.
+ * @cpr: call path root
+ * @process: call-back that accepts call/return information
+ * @data: anonymous data for call-back
+ */
+struct call_return_processor {
+	struct call_path_root *cpr;
+	int (*process)(struct call_return *cr, u64 *parent_db_id, void *data);
+	void *data;
+};
+
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
+			u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+			unsigned int br_stack_sz, bool mispred_all);
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
+void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
+			  size_t sz, u64 ip, u64 kernel_start);
+void thread_stack__sample_late(struct thread *thread, int cpu,
+			       struct ip_callchain *chain, size_t sz, u64 ip,
+			       u64 kernel_start);
+void thread_stack__br_sample(struct thread *thread, int cpu,
+			     struct branch_stack *dst, unsigned int sz);
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+				  struct branch_stack *dst, unsigned int sz,
+				  u64 sample_ip, u64 kernel_start);
+int thread_stack__flush(struct thread *thread);
+void thread_stack__free(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread, int cpu);
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
+			   void *data);
+void call_return_processor__free(struct call_return_processor *crp);
+int thread_stack__process(struct thread *thread, struct comm *comm,
+			  struct perf_sample *sample,
+			  struct addr_location *from_al,
+			  struct addr_location *to_al, u64 ref,
+			  struct call_return_processor *crp);
+
+#endif
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
new file mode 100644
index 000000000..e3e5427e1
--- /dev/null
+++ b/tools/perf/util/thread.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include "dso.h"
+#include "session.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "namespaces.h"
+#include "comm.h"
+#include "map.h"
+#include "symbol.h"
+#include "unwind.h"
+#include "callchain.h"
+
+#include <api/fs/fs.h>
+
+int thread__init_maps(struct thread *thread, struct machine *machine)
+{
+	pid_t pid = thread->pid_;
+
+	if (pid == thread->tid || pid == -1) {
+		thread->maps = maps__new(machine);
+	} else {
+		struct thread *leader = __machine__findnew_thread(machine, pid, pid);
+		if (leader) {
+			thread->maps = maps__get(leader->maps);
+			thread__put(leader);
+		}
+	}
+
+	return thread->maps ? 0 : -1;
+}
+
+struct thread *thread__new(pid_t pid, pid_t tid)
+{
+	char *comm_str;
+	struct comm *comm;
+	struct thread *thread = zalloc(sizeof(*thread));
+
+	if (thread != NULL) {
+		thread->pid_ = pid;
+		thread->tid = tid;
+		thread->ppid = -1;
+		thread->cpu = -1;
+		thread->guest_cpu = -1;
+		thread->lbr_stitch_enable = false;
+		INIT_LIST_HEAD(&thread->namespaces_list);
+		INIT_LIST_HEAD(&thread->comm_list);
+		init_rwsem(&thread->namespaces_lock);
+		init_rwsem(&thread->comm_lock);
+
+		comm_str = malloc(32);
+		if (!comm_str)
+			goto err_thread;
+
+		snprintf(comm_str, 32, ":%d", tid);
+		comm = comm__new(comm_str, 0, false);
+		free(comm_str);
+		if (!comm)
+			goto err_thread;
+
+		list_add(&comm->list, &thread->comm_list);
+		refcount_set(&thread->refcnt, 1);
+		RB_CLEAR_NODE(&thread->rb_node);
+		/* Thread holds first ref to nsdata. */
+		thread->nsinfo = nsinfo__new(pid);
+		srccode_state_init(&thread->srccode_state);
+	}
+
+	return thread;
+
+err_thread:
+	free(thread);
+	return NULL;
+}
+
+void thread__delete(struct thread *thread)
+{
+	struct namespaces *namespaces, *tmp_namespaces;
+	struct comm *comm, *tmp_comm;
+
+	BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
+
+	thread_stack__free(thread);
+
+	if (thread->maps) {
+		maps__put(thread->maps);
+		thread->maps = NULL;
+	}
+	down_write(&thread->namespaces_lock);
+	list_for_each_entry_safe(namespaces, tmp_namespaces,
+				 &thread->namespaces_list, list) {
+		list_del_init(&namespaces->list);
+		namespaces__free(namespaces);
+	}
+	up_write(&thread->namespaces_lock);
+
+	down_write(&thread->comm_lock);
+	list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
+		list_del_init(&comm->list);
+		comm__free(comm);
+	}
+	up_write(&thread->comm_lock);
+
+	nsinfo__zput(thread->nsinfo);
+	srccode_state_free(&thread->srccode_state);
+
+	exit_rwsem(&thread->namespaces_lock);
+	exit_rwsem(&thread->comm_lock);
+	thread__free_stitch_list(thread);
+	free(thread);
+}
+
+struct thread *thread__get(struct thread *thread)
+{
+	if (thread)
+		refcount_inc(&thread->refcnt);
+	return thread;
+}
+
+void thread__put(struct thread *thread)
+{
+	if (thread && refcount_dec_and_test(&thread->refcnt)) {
+		/*
+		 * Remove it from the dead threads list, as last reference is
+		 * gone, if it is in a dead threads list.
+		 *
+		 * We may not be there anymore if say, the machine where it was
+		 * stored was already deleted, so we already removed it from
+		 * the dead threads and some other piece of code still keeps a
+		 * reference.
+		 *
+		 * This is what 'perf sched' does and finally drops it in
+		 * perf_sched__lat(), where it calls perf_sched__read_events(),
+		 * that processes the events by creating a session and deleting
+		 * it, which ends up destroying the list heads for the dead
+		 * threads, but before it does that it removes all threads from
+		 * it using list_del_init().
+		 *
+		 * So we need to check here if it is in a dead threads list and
+		 * if so, remove it before finally deleting the thread, to avoid
+		 * an use after free situation.
+		 */
+		if (!list_empty(&thread->node))
+			list_del_init(&thread->node);
+		thread__delete(thread);
+	}
+}
+
+static struct namespaces *__thread__namespaces(const struct thread *thread)
+{
+	if (list_empty(&thread->namespaces_list))
+		return NULL;
+
+	return list_first_entry(&thread->namespaces_list, struct namespaces, list);
+}
+
+struct namespaces *thread__namespaces(struct thread *thread)
+{
+	struct namespaces *ns;
+
+	down_read(&thread->namespaces_lock);
+	ns = __thread__namespaces(thread);
+	up_read(&thread->namespaces_lock);
+
+	return ns;
+}
+
+static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
+				    struct perf_record_namespaces *event)
+{
+	struct namespaces *new, *curr = __thread__namespaces(thread);
+
+	new = namespaces__new(event);
+	if (!new)
+		return -ENOMEM;
+
+	list_add(&new->list, &thread->namespaces_list);
+
+	if (timestamp && curr) {
+		/*
+		 * setns syscall must have changed few or all the namespaces
+		 * of this thread. Update end time for the namespaces
+		 * previously used.
+		 */
+		curr = list_next_entry(new, list);
+		curr->end_time = timestamp;
+	}
+
+	return 0;
+}
+
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+			   struct perf_record_namespaces *event)
+{
+	int ret;
+
+	down_write(&thread->namespaces_lock);
+	ret = __thread__set_namespaces(thread, timestamp, event);
+	up_write(&thread->namespaces_lock);
+	return ret;
+}
+
+struct comm *thread__comm(const struct thread *thread)
+{
+	if (list_empty(&thread->comm_list))
+		return NULL;
+
+	return list_first_entry(&thread->comm_list, struct comm, list);
+}
+
+struct comm *thread__exec_comm(const struct thread *thread)
+{
+	struct comm *comm, *last = NULL, *second_last = NULL;
+
+	list_for_each_entry(comm, &thread->comm_list, list) {
+		if (comm->exec)
+			return comm;
+		second_last = last;
+		last = comm;
+	}
+
+	/*
+	 * 'last' with no start time might be the parent's comm of a synthesized
+	 * thread (created by processing a synthesized fork event). For a main
+	 * thread, that is very probably wrong. Prefer a later comm to avoid
+	 * that case.
+	 */
+	if (second_last && !last->start && thread->pid_ == thread->tid)
+		return second_last;
+
+	return last;
+}
+
+static int ____thread__set_comm(struct thread *thread, const char *str,
+				u64 timestamp, bool exec)
+{
+	struct comm *new, *curr = thread__comm(thread);
+
+	/* Override the default :tid entry */
+	if (!thread->comm_set) {
+		int err = comm__override(curr, str, timestamp, exec);
+		if (err)
+			return err;
+	} else {
+		new = comm__new(str, timestamp, exec);
+		if (!new)
+			return -ENOMEM;
+		list_add(&new->list, &thread->comm_list);
+
+		if (exec)
+			unwind__flush_access(thread->maps);
+	}
+
+	thread->comm_set = true;
+
+	return 0;
+}
+
+int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
+		       bool exec)
+{
+	int ret;
+
+	down_write(&thread->comm_lock);
+	ret = ____thread__set_comm(thread, str, timestamp, exec);
+	up_write(&thread->comm_lock);
+	return ret;
+}
+
+int thread__set_comm_from_proc(struct thread *thread)
+{
+	char path[64];
+	char *comm = NULL;
+	size_t sz;
+	int err = -1;
+
+	if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
+		       thread->pid_, thread->tid) >= (int)sizeof(path)) &&
+	    procfs__read_str(path, &comm, &sz) == 0) {
+		comm[sz - 1] = '\0';
+		err = thread__set_comm(thread, comm, 0);
+	}
+
+	return err;
+}
+
+static const char *__thread__comm_str(const struct thread *thread)
+{
+	const struct comm *comm = thread__comm(thread);
+
+	if (!comm)
+		return NULL;
+
+	return comm__str(comm);
+}
+
+const char *thread__comm_str(struct thread *thread)
+{
+	const char *str;
+
+	down_read(&thread->comm_lock);
+	str = __thread__comm_str(thread);
+	up_read(&thread->comm_lock);
+
+	return str;
+}
+
+/* CHECKME: it should probably better return the max comm len from its comm list */
+int thread__comm_len(struct thread *thread)
+{
+	if (!thread->comm_len) {
+		const char *comm = thread__comm_str(thread);
+		if (!comm)
+			return 0;
+		thread->comm_len = strlen(comm);
+	}
+
+	return thread->comm_len;
+}
+
+size_t thread__fprintf(struct thread *thread, FILE *fp)
+{
+	return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) +
+	       maps__fprintf(thread->maps, fp);
+}
+
+int thread__insert_map(struct thread *thread, struct map *map)
+{
+	int ret;
+
+	ret = unwind__prepare_access(thread->maps, map, NULL);
+	if (ret)
+		return ret;
+
+	maps__fixup_overlappings(thread->maps, map, stderr);
+	maps__insert(thread->maps, map);
+
+	return 0;
+}
+
+static int __thread__prepare_access(struct thread *thread)
+{
+	bool initialized = false;
+	int err = 0;
+	struct maps *maps = thread->maps;
+	struct map *map;
+
+	down_read(&maps->lock);
+
+	maps__for_each_entry(maps, map) {
+		err = unwind__prepare_access(thread->maps, map, &initialized);
+		if (err || initialized)
+			break;
+	}
+
+	up_read(&maps->lock);
+
+	return err;
+}
+
+static int thread__prepare_access(struct thread *thread)
+{
+	int err = 0;
+
+	if (dwarf_callchain_users)
+		err = __thread__prepare_access(thread);
+
+	return err;
+}
+
+static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone)
+{
+	/* This is new thread, we share map groups for process. */
+	if (thread->pid_ == parent->pid_)
+		return thread__prepare_access(thread);
+
+	if (thread->maps == parent->maps) {
+		pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
+			 thread->pid_, thread->tid, parent->pid_, parent->tid);
+		return 0;
+	}
+	/* But this one is new process, copy maps. */
+	return do_maps_clone ? maps__clone(thread, parent->maps) : 0;
+}
+
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
+{
+	if (parent->comm_set) {
+		const char *comm = thread__comm_str(parent);
+		int err;
+		if (!comm)
+			return -ENOMEM;
+		err = thread__set_comm(thread, comm, timestamp);
+		if (err)
+			return err;
+	}
+
+	thread->ppid = parent->tid;
+	return thread__clone_maps(thread, parent, do_maps_clone);
+}
+
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
+					struct addr_location *al)
+{
+	size_t i;
+	const u8 cpumodes[] = {
+		PERF_RECORD_MISC_USER,
+		PERF_RECORD_MISC_KERNEL,
+		PERF_RECORD_MISC_GUEST_USER,
+		PERF_RECORD_MISC_GUEST_KERNEL
+	};
+
+	for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
+		thread__find_symbol(thread, cpumodes[i], addr, al);
+		if (al->map)
+			break;
+	}
+}
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+	if (thread->pid_ == thread->tid)
+		return thread__get(thread);
+
+	if (thread->pid_ == -1)
+		return NULL;
+
+	return machine__find_thread(machine, thread->pid_, thread->pid_);
+}
+
+int thread__memcpy(struct thread *thread, struct machine *machine,
+		   void *buf, u64 ip, int len, bool *is64bit)
+{
+       u8 cpumode = PERF_RECORD_MISC_USER;
+       struct addr_location al;
+       long offset;
+
+       if (machine__kernel_ip(machine, ip))
+               cpumode = PERF_RECORD_MISC_KERNEL;
+
+       if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso ||
+	   al.map->dso->data.status == DSO_DATA_STATUS_ERROR ||
+	   map__load(al.map) < 0)
+               return -1;
+
+       offset = al.map->map_ip(al.map, ip);
+       if (is64bit)
+               *is64bit = al.map->dso->is_64_bit;
+
+       return dso__data_read_offset(al.map->dso, machine, offset, buf, len);
+}
+
+void thread__free_stitch_list(struct thread *thread)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	struct stitch_list *pos, *tmp;
+
+	if (!lbr_stitch)
+		return;
+
+	list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+		list_del_init(&pos->node);
+		free(pos);
+	}
+
+	list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) {
+		list_del_init(&pos->node);
+		free(pos);
+	}
+
+	zfree(&lbr_stitch->prev_lbr_cursor);
+	zfree(&thread->lbr_stitch);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
new file mode 100644
index 000000000..241f300d7
--- /dev/null
+++ b/tools/perf/util/thread.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_H
+#define __PERF_THREAD_H
+
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "srccode.h"
+#include "symbol_conf.h"
+#include <strlist.h>
+#include <intlist.h>
+#include "rwsem.h"
+#include "event.h"
+#include "callchain.h"
+
+struct addr_location;
+struct map;
+struct perf_record_namespaces;
+struct thread_stack;
+struct unwind_libunwind_ops;
+
+struct lbr_stitch {
+	struct list_head		lists;
+	struct list_head		free_lists;
+	struct perf_sample		prev_sample;
+	struct callchain_cursor_node	*prev_lbr_cursor;
+};
+
+struct thread {
+	union {
+		struct rb_node	 rb_node;
+		struct list_head node;
+	};
+	struct maps		*maps;
+	pid_t			pid_; /* Not all tools update this */
+	pid_t			tid;
+	pid_t			ppid;
+	int			cpu;
+	int			guest_cpu; /* For QEMU thread */
+	refcount_t		refcnt;
+	bool			comm_set;
+	int			comm_len;
+	bool			dead; /* if set thread has exited */
+	struct list_head	namespaces_list;
+	struct rw_semaphore	namespaces_lock;
+	struct list_head	comm_list;
+	struct rw_semaphore	comm_lock;
+	u64			db_id;
+
+	void			*priv;
+	struct thread_stack	*ts;
+	struct nsinfo		*nsinfo;
+	struct srccode_state	srccode_state;
+	bool			filter;
+	int			filter_entry_depth;
+
+	/* LBR call stack stitch */
+	bool			lbr_stitch_enable;
+	struct lbr_stitch	*lbr_stitch;
+};
+
+struct machine;
+struct namespaces;
+struct comm;
+
+struct thread *thread__new(pid_t pid, pid_t tid);
+int thread__init_maps(struct thread *thread, struct machine *machine);
+void thread__delete(struct thread *thread);
+
+struct thread *thread__get(struct thread *thread);
+void thread__put(struct thread *thread);
+
+static inline void __thread__zput(struct thread **thread)
+{
+	thread__put(*thread);
+	*thread = NULL;
+}
+
+#define thread__zput(thread) __thread__zput(&thread)
+
+static inline void thread__exited(struct thread *thread)
+{
+	thread->dead = true;
+}
+
+struct namespaces *thread__namespaces(struct thread *thread);
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+			   struct perf_record_namespaces *event);
+
+int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
+		       bool exec);
+static inline int thread__set_comm(struct thread *thread, const char *comm,
+				   u64 timestamp)
+{
+	return __thread__set_comm(thread, comm, timestamp, false);
+}
+
+int thread__set_comm_from_proc(struct thread *thread);
+
+int thread__comm_len(struct thread *thread);
+struct comm *thread__comm(const struct thread *thread);
+struct comm *thread__exec_comm(const struct thread *thread);
+const char *thread__comm_str(struct thread *thread);
+int thread__insert_map(struct thread *thread, struct map *map);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
+size_t thread__fprintf(struct thread *thread, FILE *fp);
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al);
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+				struct addr_location *al);
+
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+				   u64 addr, struct addr_location *al);
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+				      u64 addr, struct addr_location *al);
+
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
+					struct addr_location *al);
+
+int thread__memcpy(struct thread *thread, struct machine *machine,
+		   void *buf, u64 ip, int len, bool *is64bit);
+
+static inline void *thread__priv(struct thread *thread)
+{
+	return thread->priv;
+}
+
+static inline void thread__set_priv(struct thread *thread, void *p)
+{
+	thread->priv = p;
+}
+
+static inline bool thread__is_filtered(struct thread *thread)
+{
+	if (symbol_conf.comm_list &&
+	    !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread))) {
+		return true;
+	}
+
+	if (symbol_conf.pid_list &&
+	    !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) {
+		return true;
+	}
+
+	if (symbol_conf.tid_list &&
+	    !intlist__has_entry(symbol_conf.tid_list, thread->tid)) {
+		return true;
+	}
+
+	return false;
+}
+
+void thread__free_stitch_list(struct thread *thread);
+
+#endif	/* __PERF_THREAD_H */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000..c9bfe4696
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "string2.h"
+#include "strlist.h"
+#include <string.h>
+#include <api/fs/fs.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include "asm/bug.h"
+#include "thread_map.h"
+#include "debug.h"
+#include "event.h"
+
+/* Skip "." and ".." directories */
+static int filter(const struct dirent *dir)
+{
+	if (dir->d_name[0] == '.')
+		return 0;
+	else
+		return 1;
+}
+
+#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
+
+struct perf_thread_map *thread_map__new_by_pid(pid_t pid)
+{
+	struct perf_thread_map *threads;
+	char name[256];
+	int items;
+	struct dirent **namelist = NULL;
+	int i;
+
+	sprintf(name, "/proc/%d/task", pid);
+	items = scandir(name, &namelist, filter, NULL);
+	if (items <= 0)
+		return NULL;
+
+	threads = thread_map__alloc(items);
+	if (threads != NULL) {
+		for (i = 0; i < items; i++)
+			perf_thread_map__set_pid(threads, i, atoi(namelist[i]->d_name));
+		threads->nr = items;
+		refcount_set(&threads->refcnt, 1);
+	}
+
+	for (i=0; i<items; i++)
+		zfree(&namelist[i]);
+	free(namelist);
+
+	return threads;
+}
+
+struct perf_thread_map *thread_map__new_by_tid(pid_t tid)
+{
+	struct perf_thread_map *threads = thread_map__alloc(1);
+
+	if (threads != NULL) {
+		perf_thread_map__set_pid(threads, 0, tid);
+		threads->nr = 1;
+		refcount_set(&threads->refcnt, 1);
+	}
+
+	return threads;
+}
+
+static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid)
+{
+	DIR *proc;
+	int max_threads = 32, items, i;
+	char path[NAME_MAX + 1 + 6];
+	struct dirent *dirent, **namelist = NULL;
+	struct perf_thread_map *threads = thread_map__alloc(max_threads);
+
+	if (threads == NULL)
+		goto out;
+
+	proc = opendir("/proc");
+	if (proc == NULL)
+		goto out_free_threads;
+
+	threads->nr = 0;
+	refcount_set(&threads->refcnt, 1);
+
+	while ((dirent = readdir(proc)) != NULL) {
+		char *end;
+		bool grow = false;
+		pid_t pid = strtol(dirent->d_name, &end, 10);
+
+		if (*end) /* only interested in proper numerical dirents */
+			continue;
+
+		snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
+
+		if (uid != UINT_MAX) {
+			struct stat st;
+
+			if (stat(path, &st) != 0 || st.st_uid != uid)
+				continue;
+		}
+
+		snprintf(path, sizeof(path), "/proc/%d/task", pid);
+		items = scandir(path, &namelist, filter, NULL);
+		if (items <= 0)
+			goto out_free_closedir;
+
+		while (threads->nr + items >= max_threads) {
+			max_threads *= 2;
+			grow = true;
+		}
+
+		if (grow) {
+			struct perf_thread_map *tmp;
+
+			tmp = perf_thread_map__realloc(threads, max_threads);
+			if (tmp == NULL)
+				goto out_free_namelist;
+
+			threads = tmp;
+		}
+
+		for (i = 0; i < items; i++) {
+			perf_thread_map__set_pid(threads, threads->nr + i,
+						    atoi(namelist[i]->d_name));
+		}
+
+		for (i = 0; i < items; i++)
+			zfree(&namelist[i]);
+		free(namelist);
+
+		threads->nr += items;
+	}
+
+out_closedir:
+	closedir(proc);
+out:
+	return threads;
+
+out_free_threads:
+	free(threads);
+	return NULL;
+
+out_free_namelist:
+	for (i = 0; i < items; i++)
+		zfree(&namelist[i]);
+	free(namelist);
+
+out_free_closedir:
+	zfree(&threads);
+	goto out_closedir;
+}
+
+struct perf_thread_map *thread_map__new_all_cpus(void)
+{
+	return __thread_map__new_all_cpus(UINT_MAX);
+}
+
+struct perf_thread_map *thread_map__new_by_uid(uid_t uid)
+{
+	return __thread_map__new_all_cpus(uid);
+}
+
+struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
+{
+	if (pid != -1)
+		return thread_map__new_by_pid(pid);
+
+	if (tid == -1 && uid != UINT_MAX)
+		return thread_map__new_by_uid(uid);
+
+	return thread_map__new_by_tid(tid);
+}
+
+static struct perf_thread_map *thread_map__new_by_pid_str(const char *pid_str)
+{
+	struct perf_thread_map *threads = NULL, *nt;
+	char name[256];
+	int items, total_tasks = 0;
+	struct dirent **namelist = NULL;
+	int i, j = 0;
+	pid_t pid, prev_pid = INT_MAX;
+	char *end_ptr;
+	struct str_node *pos;
+	struct strlist_config slist_config = { .dont_dupstr = true, };
+	struct strlist *slist = strlist__new(pid_str, &slist_config);
+
+	if (!slist)
+		return NULL;
+
+	strlist__for_each_entry(pos, slist) {
+		pid = strtol(pos->s, &end_ptr, 10);
+
+		if (pid == INT_MIN || pid == INT_MAX ||
+		    (*end_ptr != '\0' && *end_ptr != ','))
+			goto out_free_threads;
+
+		if (pid == prev_pid)
+			continue;
+
+		sprintf(name, "/proc/%d/task", pid);
+		items = scandir(name, &namelist, filter, NULL);
+		if (items <= 0)
+			goto out_free_threads;
+
+		total_tasks += items;
+		nt = perf_thread_map__realloc(threads, total_tasks);
+		if (nt == NULL)
+			goto out_free_namelist;
+
+		threads = nt;
+
+		for (i = 0; i < items; i++) {
+			perf_thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name));
+			zfree(&namelist[i]);
+		}
+		threads->nr = total_tasks;
+		free(namelist);
+	}
+
+out:
+	strlist__delete(slist);
+	if (threads)
+		refcount_set(&threads->refcnt, 1);
+	return threads;
+
+out_free_namelist:
+	for (i = 0; i < items; i++)
+		zfree(&namelist[i]);
+	free(namelist);
+
+out_free_threads:
+	zfree(&threads);
+	goto out;
+}
+
+struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str)
+{
+	struct perf_thread_map *threads = NULL, *nt;
+	int ntasks = 0;
+	pid_t tid, prev_tid = INT_MAX;
+	char *end_ptr;
+	struct str_node *pos;
+	struct strlist_config slist_config = { .dont_dupstr = true, };
+	struct strlist *slist;
+
+	/* perf-stat expects threads to be generated even if tid not given */
+	if (!tid_str)
+		return perf_thread_map__new_dummy();
+
+	slist = strlist__new(tid_str, &slist_config);
+	if (!slist)
+		return NULL;
+
+	strlist__for_each_entry(pos, slist) {
+		tid = strtol(pos->s, &end_ptr, 10);
+
+		if (tid == INT_MIN || tid == INT_MAX ||
+		    (*end_ptr != '\0' && *end_ptr != ','))
+			goto out_free_threads;
+
+		if (tid == prev_tid)
+			continue;
+
+		ntasks++;
+		nt = perf_thread_map__realloc(threads, ntasks);
+
+		if (nt == NULL)
+			goto out_free_threads;
+
+		threads = nt;
+		perf_thread_map__set_pid(threads, ntasks - 1, tid);
+		threads->nr = ntasks;
+	}
+out:
+	if (threads)
+		refcount_set(&threads->refcnt, 1);
+	return threads;
+
+out_free_threads:
+	zfree(&threads);
+	strlist__delete(slist);
+	goto out;
+}
+
+struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid,
+				       uid_t uid, bool all_threads)
+{
+	if (pid)
+		return thread_map__new_by_pid_str(pid);
+
+	if (!tid && uid != UINT_MAX)
+		return thread_map__new_by_uid(uid);
+
+	if (all_threads)
+		return thread_map__new_all_cpus();
+
+	return thread_map__new_by_tid_str(tid);
+}
+
+size_t thread_map__fprintf(struct perf_thread_map *threads, FILE *fp)
+{
+	int i;
+	size_t printed = fprintf(fp, "%d thread%s: ",
+				 threads->nr, threads->nr > 1 ? "s" : "");
+	for (i = 0; i < threads->nr; ++i)
+		printed += fprintf(fp, "%s%d", i ? ", " : "", perf_thread_map__pid(threads, i));
+
+	return printed + fprintf(fp, "\n");
+}
+
+static int get_comm(char **comm, pid_t pid)
+{
+	char *path;
+	size_t size;
+	int err;
+
+	if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1)
+		return -ENOMEM;
+
+	err = filename__read_str(path, comm, &size);
+	if (!err) {
+		/*
+		 * We're reading 16 bytes, while filename__read_str
+		 * allocates data per BUFSIZ bytes, so we can safely
+		 * mark the end of the string.
+		 */
+		(*comm)[size] = 0;
+		strim(*comm);
+	}
+
+	free(path);
+	return err;
+}
+
+static void comm_init(struct perf_thread_map *map, int i)
+{
+	pid_t pid = perf_thread_map__pid(map, i);
+	char *comm = NULL;
+
+	/* dummy pid comm initialization */
+	if (pid == -1) {
+		map->map[i].comm = strdup("dummy");
+		return;
+	}
+
+	/*
+	 * The comm name is like extra bonus ;-),
+	 * so just warn if we fail for any reason.
+	 */
+	if (get_comm(&comm, pid))
+		pr_warning("Couldn't resolve comm name for pid %d\n", pid);
+
+	map->map[i].comm = comm;
+}
+
+void thread_map__read_comms(struct perf_thread_map *threads)
+{
+	int i;
+
+	for (i = 0; i < threads->nr; ++i)
+		comm_init(threads, i);
+}
+
+static void thread_map__copy_event(struct perf_thread_map *threads,
+				   struct perf_record_thread_map *event)
+{
+	unsigned i;
+
+	threads->nr = (int) event->nr;
+
+	for (i = 0; i < event->nr; i++) {
+		perf_thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid);
+		threads->map[i].comm = strndup(event->entries[i].comm, 16);
+	}
+
+	refcount_set(&threads->refcnt, 1);
+}
+
+struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event)
+{
+	struct perf_thread_map *threads;
+
+	threads = thread_map__alloc(event->nr);
+	if (threads)
+		thread_map__copy_event(threads, event);
+
+	return threads;
+}
+
+bool thread_map__has(struct perf_thread_map *threads, pid_t pid)
+{
+	int i;
+
+	for (i = 0; i < threads->nr; ++i) {
+		if (threads->map[i].pid == pid)
+			return true;
+	}
+
+	return false;
+}
+
+int thread_map__remove(struct perf_thread_map *threads, int idx)
+{
+	int i;
+
+	if (threads->nr < 1)
+		return -EINVAL;
+
+	if (idx >= threads->nr)
+		return -EINVAL;
+
+	/*
+	 * Free the 'idx' item and shift the rest up.
+	 */
+	zfree(&threads->map[idx].comm);
+
+	for (i = idx; i < threads->nr - 1; i++)
+		threads->map[i] = threads->map[i + 1];
+
+	threads->nr--;
+	return 0;
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000..3bb860a32
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_MAP_H
+#define __PERF_THREAD_MAP_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <linux/refcount.h>
+#include <internal/threadmap.h>
+#include <perf/threadmap.h>
+
+struct perf_record_thread_map;
+
+struct perf_thread_map *thread_map__new_dummy(void);
+struct perf_thread_map *thread_map__new_by_pid(pid_t pid);
+struct perf_thread_map *thread_map__new_by_tid(pid_t tid);
+struct perf_thread_map *thread_map__new_by_uid(uid_t uid);
+struct perf_thread_map *thread_map__new_all_cpus(void);
+struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event);
+
+struct perf_thread_map *thread_map__new_str(const char *pid,
+		const char *tid, uid_t uid, bool all_threads);
+
+struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str);
+
+size_t thread_map__fprintf(struct perf_thread_map *threads, FILE *fp);
+
+void thread_map__read_comms(struct perf_thread_map *threads);
+bool thread_map__has(struct perf_thread_map *threads, pid_t pid);
+int thread_map__remove(struct perf_thread_map *threads, int idx);
+#endif	/* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
new file mode 100644
index 000000000..302443921
--- /dev/null
+++ b/tools/perf/util/time-utils.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <sys/time.h>
+#include <linux/time64.h>
+#include <time.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+#include <linux/ctype.h>
+
+#include "debug.h"
+#include "time-utils.h"
+#include "session.h"
+#include "evlist.h"
+
+int parse_nsec_time(const char *str, u64 *ptime)
+{
+	u64 time_sec, time_nsec;
+	char *end;
+
+	time_sec = strtoul(str, &end, 10);
+	if (*end != '.' && *end != '\0')
+		return -1;
+
+	if (*end == '.') {
+		int i;
+		char nsec_buf[10];
+
+		if (strlen(++end) > 9)
+			return -1;
+
+		strncpy(nsec_buf, end, 9);
+		nsec_buf[9] = '\0';
+
+		/* make it nsec precision */
+		for (i = strlen(nsec_buf); i < 9; i++)
+			nsec_buf[i] = '0';
+
+		time_nsec = strtoul(nsec_buf, &end, 10);
+		if (*end != '\0')
+			return -1;
+	} else
+		time_nsec = 0;
+
+	*ptime = time_sec * NSEC_PER_SEC + time_nsec;
+	return 0;
+}
+
+static int parse_timestr_sec_nsec(struct perf_time_interval *ptime,
+				  char *start_str, char *end_str)
+{
+	if (start_str && (*start_str != '\0') &&
+	    (parse_nsec_time(start_str, &ptime->start) != 0)) {
+		return -1;
+	}
+
+	if (end_str && (*end_str != '\0') &&
+	    (parse_nsec_time(end_str, &ptime->end) != 0)) {
+		return -1;
+	}
+
+	return 0;
+}
+
+static int split_start_end(char **start, char **end, const char *ostr, char ch)
+{
+	char *start_str, *end_str;
+	char *d, *str;
+
+	if (ostr == NULL || *ostr == '\0')
+		return 0;
+
+	/* copy original string because we need to modify it */
+	str = strdup(ostr);
+	if (str == NULL)
+		return -ENOMEM;
+
+	start_str = str;
+	d = strchr(start_str, ch);
+	if (d) {
+		*d = '\0';
+		++d;
+	}
+	end_str = d;
+
+	*start = start_str;
+	*end = end_str;
+
+	return 0;
+}
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
+{
+	char *start_str = NULL, *end_str;
+	int rc;
+
+	rc = split_start_end(&start_str, &end_str, ostr, ',');
+	if (rc || !start_str)
+		return rc;
+
+	ptime->start = 0;
+	ptime->end = 0;
+
+	rc = parse_timestr_sec_nsec(ptime, start_str, end_str);
+
+	free(start_str);
+
+	/* make sure end time is after start time if it was given */
+	if (rc == 0 && ptime->end && ptime->end < ptime->start)
+		return -EINVAL;
+
+	pr_debug("start time %" PRIu64 ", ", ptime->start);
+	pr_debug("end time %" PRIu64 "\n", ptime->end);
+
+	return rc;
+}
+
+static int perf_time__parse_strs(struct perf_time_interval *ptime,
+				 const char *ostr, int size)
+{
+	const char *cp;
+	char *str, *arg, *p;
+	int i, num = 0, rc = 0;
+
+	/* Count the commas */
+	for (cp = ostr; *cp; cp++)
+		num += !!(*cp == ',');
+
+	if (!num)
+		return -EINVAL;
+
+	BUG_ON(num > size);
+
+	str = strdup(ostr);
+	if (!str)
+		return -ENOMEM;
+
+	/* Split the string and parse each piece, except the last */
+	for (i = 0, p = str; i < num - 1; i++) {
+		arg = p;
+		/* Find next comma, there must be one */
+		p = skip_spaces(strchr(p, ',') + 1);
+		/* Skip the value, must not contain space or comma */
+		while (*p && !isspace(*p)) {
+			if (*p++ == ',') {
+				rc = -EINVAL;
+				goto out;
+			}
+		}
+		/* Split and parse */
+		if (*p)
+			*p++ = 0;
+		rc = perf_time__parse_str(ptime + i, arg);
+		if (rc < 0)
+			goto out;
+	}
+
+	/* Parse the last piece */
+	rc = perf_time__parse_str(ptime + i, p);
+	if (rc < 0)
+		goto out;
+
+	/* Check there is no overlap */
+	for (i = 0; i < num - 1; i++) {
+		if (ptime[i].end >= ptime[i + 1].start) {
+			rc = -EINVAL;
+			goto out;
+		}
+	}
+
+	rc = num;
+out:
+	free(str);
+
+	return rc;
+}
+
+static int parse_percent(double *pcnt, char *str)
+{
+	char *c, *endptr;
+	double d;
+
+	c = strchr(str, '%');
+	if (c)
+		*c = '\0';
+	else
+		return -1;
+
+	d = strtod(str, &endptr);
+	if (endptr != str + strlen(str))
+		return -1;
+
+	*pcnt = d / 100.0;
+	return 0;
+}
+
+static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt,
+			    double end_pcnt, u64 start, u64 end)
+{
+	u64 total = end - start;
+
+	if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+	    end_pcnt < 0.0 || end_pcnt > 1.0) {
+		return -1;
+	}
+
+	ptime->start = start + round(start_pcnt * total);
+	ptime->end = start + round(end_pcnt * total);
+
+	if (ptime->end > ptime->start && ptime->end != end)
+		ptime->end -= 1;
+
+	return 0;
+}
+
+static int percent_slash_split(char *str, struct perf_time_interval *ptime,
+			       u64 start, u64 end)
+{
+	char *p, *end_str;
+	double pcnt, start_pcnt, end_pcnt;
+	int i;
+
+	/*
+	 * Example:
+	 * 10%/2: select the second 10% slice and the third 10% slice
+	 */
+
+	/* We can modify this string since the original one is copied */
+	p = strchr(str, '/');
+	if (!p)
+		return -1;
+
+	*p = '\0';
+	if (parse_percent(&pcnt, str) < 0)
+		return -1;
+
+	p++;
+	i = (int)strtol(p, &end_str, 10);
+	if (*end_str)
+		return -1;
+
+	if (pcnt <= 0.0)
+		return -1;
+
+	start_pcnt = pcnt * (i - 1);
+	end_pcnt = pcnt * i;
+
+	return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
+}
+
+static int percent_dash_split(char *str, struct perf_time_interval *ptime,
+			      u64 start, u64 end)
+{
+	char *start_str = NULL, *end_str;
+	double start_pcnt, end_pcnt;
+	int ret;
+
+	/*
+	 * Example: 0%-10%
+	 */
+
+	ret = split_start_end(&start_str, &end_str, str, '-');
+	if (ret || !start_str)
+		return ret;
+
+	if ((parse_percent(&start_pcnt, start_str) != 0) ||
+	    (parse_percent(&end_pcnt, end_str) != 0)) {
+		free(start_str);
+		return -1;
+	}
+
+	free(start_str);
+
+	return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
+}
+
+typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
+				 u64 start, u64 end);
+
+static int percent_comma_split(struct perf_time_interval *ptime_buf, int num,
+			       const char *ostr, u64 start, u64 end,
+			       time_pecent_split func)
+{
+	char *str, *p1, *p2;
+	int len, ret, i = 0;
+
+	str = strdup(ostr);
+	if (str == NULL)
+		return -ENOMEM;
+
+	len = strlen(str);
+	p1 = str;
+
+	while (p1 < str + len) {
+		if (i >= num) {
+			free(str);
+			return -1;
+		}
+
+		p2 = strchr(p1, ',');
+		if (p2)
+			*p2 = '\0';
+
+		ret = (func)(p1, &ptime_buf[i], start, end);
+		if (ret < 0) {
+			free(str);
+			return -1;
+		}
+
+		pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start);
+		pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end);
+
+		i++;
+
+		if (p2)
+			p1 = p2 + 1;
+		else
+			break;
+	}
+
+	free(str);
+	return i;
+}
+
+static int one_percent_convert(struct perf_time_interval *ptime_buf,
+			       const char *ostr, u64 start, u64 end, char *c)
+{
+	char *str;
+	int len = strlen(ostr), ret;
+
+	/*
+	 * c points to '%'.
+	 * '%' should be the last character
+	 */
+	if (ostr + len - 1 != c)
+		return -1;
+
+	/*
+	 * Construct a string like "xx%/1"
+	 */
+	str = malloc(len + 3);
+	if (str == NULL)
+		return -ENOMEM;
+
+	memcpy(str, ostr, len);
+	strcpy(str + len, "/1");
+
+	ret = percent_slash_split(str, ptime_buf, start, end);
+	if (ret == 0)
+		ret = 1;
+
+	free(str);
+	return ret;
+}
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+				 const char *ostr, u64 start, u64 end)
+{
+	char *c;
+
+	/*
+	 * ostr example:
+	 * 10%/2,10%/3: select the second 10% slice and the third 10% slice
+	 * 0%-10%,30%-40%: multiple time range
+	 * 50%: just one percent
+	 */
+
+	memset(ptime_buf, 0, sizeof(*ptime_buf) * num);
+
+	c = strchr(ostr, '/');
+	if (c) {
+		return percent_comma_split(ptime_buf, num, ostr, start,
+					   end, percent_slash_split);
+	}
+
+	c = strchr(ostr, '-');
+	if (c) {
+		return percent_comma_split(ptime_buf, num, ostr, start,
+					   end, percent_dash_split);
+	}
+
+	c = strchr(ostr, '%');
+	if (c)
+		return one_percent_convert(ptime_buf, ostr, start, end, c);
+
+	return -1;
+}
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size)
+{
+	const char *p1, *p2;
+	int i = 1;
+	struct perf_time_interval *ptime;
+
+	/*
+	 * At least allocate one time range.
+	 */
+	if (!ostr)
+		goto alloc;
+
+	p1 = ostr;
+	while (p1 < ostr + strlen(ostr)) {
+		p2 = strchr(p1, ',');
+		if (!p2)
+			break;
+
+		p1 = p2 + 1;
+		i++;
+	}
+
+alloc:
+	*size = i;
+	ptime = calloc(i, sizeof(*ptime));
+	return ptime;
+}
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
+{
+	/* if time is not set don't drop sample */
+	if (timestamp == 0)
+		return false;
+
+	/* otherwise compare sample time to time window */
+	if ((ptime->start && timestamp < ptime->start) ||
+	    (ptime->end && timestamp > ptime->end)) {
+		return true;
+	}
+
+	return false;
+}
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+				   int num, u64 timestamp)
+{
+	struct perf_time_interval *ptime;
+	int i;
+
+	if ((!ptime_buf) || (timestamp == 0) || (num == 0))
+		return false;
+
+	if (num == 1)
+		return perf_time__skip_sample(&ptime_buf[0], timestamp);
+
+	/*
+	 * start/end of multiple time ranges must be valid.
+	 */
+	for (i = 0; i < num; i++) {
+		ptime = &ptime_buf[i];
+
+		if (timestamp >= ptime->start &&
+		    (timestamp <= ptime->end || !ptime->end)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+int perf_time__parse_for_ranges_reltime(const char *time_str,
+				struct perf_session *session,
+				struct perf_time_interval **ranges,
+				int *range_size, int *range_num,
+				bool reltime)
+{
+	bool has_percent = strchr(time_str, '%');
+	struct perf_time_interval *ptime_range;
+	int size, num, ret = -EINVAL;
+
+	ptime_range = perf_time__range_alloc(time_str, &size);
+	if (!ptime_range)
+		return -ENOMEM;
+
+	if (has_percent || reltime) {
+		if (session->evlist->first_sample_time == 0 &&
+		    session->evlist->last_sample_time == 0) {
+			pr_err("HINT: no first/last sample time found in perf data.\n"
+			       "Please use latest perf binary to execute 'perf record'\n"
+			       "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
+			goto error;
+		}
+	}
+
+	if (has_percent) {
+		num = perf_time__percent_parse_str(
+				ptime_range, size,
+				time_str,
+				session->evlist->first_sample_time,
+				session->evlist->last_sample_time);
+	} else {
+		num = perf_time__parse_strs(ptime_range, time_str, size);
+	}
+
+	if (num < 0)
+		goto error_invalid;
+
+	if (reltime) {
+		int i;
+
+		for (i = 0; i < num; i++) {
+			ptime_range[i].start += session->evlist->first_sample_time;
+			ptime_range[i].end += session->evlist->first_sample_time;
+		}
+	}
+
+	*range_size = size;
+	*range_num = num;
+	*ranges = ptime_range;
+	return 0;
+
+error_invalid:
+	pr_err("Invalid time string\n");
+error:
+	free(ptime_range);
+	return ret;
+}
+
+int perf_time__parse_for_ranges(const char *time_str,
+				struct perf_session *session,
+				struct perf_time_interval **ranges,
+				int *range_size, int *range_num)
+{
+	return perf_time__parse_for_ranges_reltime(time_str, session, ranges,
+					range_size, range_num, false);
+}
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
+{
+	u64  sec = timestamp / NSEC_PER_SEC;
+	u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC;
+
+	return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec);
+}
+
+int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz)
+{
+	u64 sec  = timestamp / NSEC_PER_SEC,
+	    nsec = timestamp % NSEC_PER_SEC;
+
+	return scnprintf(buf, sz, "%" PRIu64 ".%09" PRIu64, sec, nsec);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+	struct timeval tv;
+	struct tm tm;
+	char dt[32];
+
+	if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+		return -1;
+
+	if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+		return -1;
+
+	scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+	return 0;
+}
diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h
new file mode 100644
index 000000000..1142b0bdd
--- /dev/null
+++ b/tools/perf/util/time-utils.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TIME_UTILS_H_
+#define _TIME_UTILS_H_
+
+#include <stddef.h>
+#include <time.h>
+#include <linux/types.h>
+
+struct perf_time_interval {
+	u64 start, end;
+};
+
+int parse_nsec_time(const char *str, u64 *ptime);
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+				 const char *ostr, u64 start, u64 end);
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size);
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+				   int num, u64 timestamp);
+
+struct perf_session;
+
+int perf_time__parse_for_ranges_reltime(const char *str, struct perf_session *session,
+				struct perf_time_interval **ranges,
+				int *range_size, int *range_num,
+				bool reltime);
+
+int perf_time__parse_for_ranges(const char *str, struct perf_session *session,
+				struct perf_time_interval **ranges,
+				int *range_size, int *range_num);
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
+int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz);
+
+int fetch_current_timestamp(char *buf, size_t sz);
+
+static inline unsigned long long rdclock(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+#endif
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
new file mode 100644
index 000000000..c957fb849
--- /dev/null
+++ b/tools/perf/util/tool.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOOL_H
+#define __PERF_TOOL_H
+
+#include <stdbool.h>
+
+#include <linux/types.h>
+
+struct perf_session;
+union perf_event;
+struct evlist;
+struct evsel;
+struct perf_sample;
+struct perf_tool;
+struct machine;
+struct ordered_events;
+
+typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
+			    struct perf_sample *sample,
+			    struct evsel *evsel, struct machine *machine);
+
+typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
+			struct perf_sample *sample, struct machine *machine);
+
+typedef int (*event_attr_op)(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct evlist **pevlist);
+
+typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
+typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
+typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data,
+			 const char *str);
+
+typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
+			struct ordered_events *oe);
+
+enum show_feature_header {
+	SHOW_FEAT_NO_HEADER = 0,
+	SHOW_FEAT_HEADER,
+	SHOW_FEAT_HEADER_FULL_INFO,
+};
+
+struct perf_tool {
+	event_sample	sample,
+			read;
+	event_op	mmap,
+			mmap2,
+			comm,
+			namespaces,
+			cgroup,
+			fork,
+			exit,
+			lost,
+			lost_samples,
+			aux,
+			itrace_start,
+			aux_output_hw_id,
+			context_switch,
+			throttle,
+			unthrottle,
+			ksymbol,
+			bpf,
+			text_poke;
+
+	event_attr_op	attr;
+	event_attr_op	event_update;
+	event_op2	tracing_data;
+	event_oe	finished_round;
+	event_op2	build_id,
+			id_index,
+			auxtrace_info,
+			auxtrace_error,
+			time_conv,
+			thread_map,
+			cpu_map,
+			stat_config,
+			stat,
+			stat_round,
+			feature,
+			finished_init;
+	event_op4	compressed;
+	event_op3	auxtrace;
+	bool		ordered_events;
+	bool		ordering_requires_timestamps;
+	bool		namespace_events;
+	bool		cgroup_events;
+	bool		no_warn;
+	enum show_feature_header show_feat_hdr;
+};
+
+#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
new file mode 100644
index 000000000..b8b32431d
--- /dev/null
+++ b/tools/perf/util/top.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Refactored from builtin-top.c, see that files for further copyright notes.
+ */
+
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "symbol.h"
+#include "top.h"
+#include "../perf.h"
+#include <inttypes.h>
+
+#define SNPRINTF(buf, size, fmt, args...) \
+({ \
+	size_t r = snprintf(buf, size, fmt, ## args); \
+	r > size ?  size : r; \
+})
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
+{
+	float samples_per_sec;
+	float ksamples_per_sec;
+	float esamples_percent;
+	struct record_opts *opts = &top->record_opts;
+	struct target *target = &opts->target;
+	size_t ret = 0;
+
+	if (top->samples) {
+		samples_per_sec = top->samples / top->delay_secs;
+		ksamples_per_sec = top->kernel_samples / top->delay_secs;
+		esamples_percent = (100.0 * top->exact_samples) / top->samples;
+	} else {
+		samples_per_sec = ksamples_per_sec = esamples_percent = 0.0;
+	}
+
+	if (!perf_guest) {
+		float ksamples_percent = 0.0;
+
+		if (samples_per_sec)
+			ksamples_percent = (100.0 * ksamples_per_sec) /
+							samples_per_sec;
+		ret = SNPRINTF(bf, size,
+			       "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%%"
+			       "  exact: %4.1f%% lost: %" PRIu64 "/%" PRIu64 " drop: %" PRIu64 "/%" PRIu64 " [",
+			       samples_per_sec, ksamples_percent, esamples_percent,
+			       top->lost, top->lost_total, top->drop, top->drop_total);
+	} else {
+		float us_samples_per_sec = top->us_samples / top->delay_secs;
+		float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
+		float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
+
+		ret = SNPRINTF(bf, size,
+			       "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% us:%4.1f%%"
+			       " guest kernel:%4.1f%% guest us:%4.1f%%"
+			       " exact: %4.1f%% [", samples_per_sec,
+			       100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+						 samples_per_sec)),
+			       100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
+						 samples_per_sec)),
+			       100.0 - (100.0 * ((samples_per_sec -
+						  guest_kernel_samples_per_sec) /
+						 samples_per_sec)),
+			       100.0 - (100.0 * ((samples_per_sec -
+						  guest_us_samples_per_sec) /
+						 samples_per_sec)),
+			       esamples_percent);
+	}
+
+	if (top->evlist->core.nr_entries == 1) {
+		struct evsel *first = evlist__first(top->evlist);
+		ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
+				(uint64_t)first->core.attr.sample_period,
+				opts->freq ? "Hz" : "");
+	}
+
+	ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel));
+
+	ret += SNPRINTF(bf + ret, size - ret, "], ");
+
+	if (target->pid)
+		ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
+				target->pid);
+	else if (target->tid)
+		ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
+				target->tid);
+	else if (target->uid_str != NULL)
+		ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+				target->uid_str);
+	else
+		ret += SNPRINTF(bf + ret, size - ret, " (all");
+
+	if (target->cpu_list)
+		ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
+				perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
+				? "s" : "",
+				target->cpu_list);
+	else {
+		if (target->tid)
+			ret += SNPRINTF(bf + ret, size - ret, ")");
+		else
+			ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
+					perf_cpu_map__nr(top->evlist->core.user_requested_cpus),
+					perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
+					? "s" : "");
+	}
+
+	perf_top__reset_sample_counters(top);
+	return ret;
+}
+
+void perf_top__reset_sample_counters(struct perf_top *top)
+{
+	top->samples = top->us_samples = top->kernel_samples =
+	top->exact_samples = top->guest_kernel_samples =
+	top->guest_us_samples = top->lost = top->drop = 0;
+}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
new file mode 100644
index 000000000..a8b0d79bd
--- /dev/null
+++ b/tools/perf/util/top.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOP_H
+#define __PERF_TOP_H 1
+
+#include "tool.h"
+#include "evswitch.h"
+#include "annotate.h"
+#include "mutex.h"
+#include "ordered-events.h"
+#include "record.h"
+#include <linux/types.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ioctl.h>
+
+struct evlist;
+struct evsel;
+struct perf_session;
+
+struct perf_top {
+	struct perf_tool   tool;
+	struct evlist *evlist, *sb_evlist;
+	struct record_opts record_opts;
+	struct annotation_options annotation_opts;
+	struct evswitch	   evswitch;
+	/*
+	 * Symbols will be added here in perf_event__process_sample and will
+	 * get out after decayed.
+	 */
+	u64		   samples, lost, lost_total, drop, drop_total;
+	u64		   kernel_samples, us_samples;
+	u64		   exact_samples;
+	u64		   guest_us_samples, guest_kernel_samples;
+	int		   print_entries, count_filter, delay_secs;
+	int		   max_stack;
+	bool		   hide_kernel_symbols, hide_user_symbols, zero;
+#ifdef HAVE_SLANG_SUPPORT
+	bool		   use_tui;
+#endif
+	bool		   use_stdio;
+	bool		   vmlinux_warned;
+	bool		   dump_symtab;
+	bool		   stitch_lbr;
+	struct hist_entry  *sym_filter_entry;
+	struct evsel 	   *sym_evsel;
+	struct perf_session *session;
+	struct winsize	   winsize;
+	int		   realtime_prio;
+	const char	   *sym_filter;
+	float		   min_percent;
+	unsigned int	   nr_threads_synthesize;
+
+	struct {
+		struct ordered_events	*in;
+		struct ordered_events	 data[2];
+		bool			 rotate;
+		struct mutex mutex;
+		struct cond cond;
+	} qe;
+};
+
+#define CONSOLE_CLEAR "[H[2J"
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
+void perf_top__reset_sample_counters(struct perf_top *top);
+#endif /* __PERF_TOP_H */
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
new file mode 100644
index 000000000..109084155
--- /dev/null
+++ b/tools/perf/util/topdown.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "pmu.h"
+#include "pmu-hybrid.h"
+#include "topdown.h"
+
+int topdown_filter_events(const char **attr, char **str, bool use_group,
+			  const char *pmu_name)
+{
+	int off = 0;
+	int i;
+	int len = 0;
+	char *s;
+	bool is_hybrid = perf_pmu__is_hybrid(pmu_name);
+
+	for (i = 0; attr[i]; i++) {
+		if (pmu_have_event(pmu_name, attr[i])) {
+			if (is_hybrid)
+				len += strlen(attr[i]) + strlen(pmu_name) + 3;
+			else
+				len += strlen(attr[i]) + 1;
+			attr[i - off] = attr[i];
+		} else
+			off++;
+	}
+	attr[i - off] = NULL;
+
+	*str = malloc(len + 1 + 2);
+	if (!*str)
+		return -1;
+	s = *str;
+	if (i - off == 0) {
+		*s = 0;
+		return 0;
+	}
+	if (use_group)
+		*s++ = '{';
+	for (i = 0; attr[i]; i++) {
+		if (!is_hybrid)
+			strcpy(s, attr[i]);
+		else
+			sprintf(s, "%s/%s/", pmu_name, attr[i]);
+		s += strlen(s);
+		*s++ = ',';
+	}
+	if (use_group) {
+		s[-1] = '}';
+		*s = 0;
+	} else
+		s[-1] = 0;
+	return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+	*warn = false;
+	return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
+__weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused)
+{
+	return false;
+}
+
+__weak const char *arch_get_topdown_pmu_name(struct evlist *evlist
+					     __maybe_unused,
+					     bool warn __maybe_unused)
+{
+	return "cpu";
+}
diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
new file mode 100644
index 000000000..f9531528c
--- /dev/null
+++ b/tools/perf/util/topdown.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TOPDOWN_H
+#define TOPDOWN_H 1
+#include "evsel.h"
+#include "evlist.h"
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+bool arch_topdown_sample_read(struct evsel *leader);
+const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn);
+int topdown_filter_events(const char **attr, char **str, bool use_group,
+			  const char *pmu_name);
+
+#endif
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
new file mode 100644
index 000000000..892c323b4
--- /dev/null
+++ b/tools/perf/util/trace-event-info.c
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008,2009, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <dirent.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <internal/lib.h> // page_size
+#include <sys/param.h>
+
+#include "trace-event.h"
+#include "tracepoint.h"
+#include <api/fs/tracing_path.h>
+#include "evsel.h"
+#include "debug.h"
+
+#define VERSION "0.6"
+#define MAX_EVENT_LENGTH 512
+
+static int output_fd;
+
+struct tracepoint_path {
+	char *system;
+	char *name;
+	struct tracepoint_path *next;
+};
+
+int bigendian(void)
+{
+	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
+	unsigned int *ptr;
+
+	ptr = (unsigned int *)(void *)str;
+	return *ptr == 0x01020304;
+}
+
+/* unfortunately, you can not stat debugfs or proc files for size */
+static int record_file(const char *file, ssize_t hdr_sz)
+{
+	unsigned long long size = 0;
+	char buf[BUFSIZ], *sizep;
+	off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR);
+	int r, fd;
+	int err = -EIO;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0) {
+		pr_debug("Can't read '%s'", file);
+		return -errno;
+	}
+
+	/* put in zeros for file size, then fill true size later */
+	if (hdr_sz) {
+		if (write(output_fd, &size, hdr_sz) != hdr_sz)
+			goto out;
+	}
+
+	do {
+		r = read(fd, buf, BUFSIZ);
+		if (r > 0) {
+			size += r;
+			if (write(output_fd, buf, r) != r)
+				goto out;
+		}
+	} while (r > 0);
+
+	/* ugh, handle big-endian hdr_size == 4 */
+	sizep = (char*)&size;
+	if (bigendian())
+		sizep += sizeof(u64) - hdr_sz;
+
+	if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) {
+		pr_debug("writing file size failed\n");
+		goto out;
+	}
+
+	err = 0;
+out:
+	close(fd);
+	return err;
+}
+
+static int record_header_files(void)
+{
+	char *path = get_events_file("header_page");
+	struct stat st;
+	int err = -EIO;
+
+	if (!path) {
+		pr_debug("can't get tracing/events/header_page");
+		return -ENOMEM;
+	}
+
+	if (stat(path, &st) < 0) {
+		pr_debug("can't read '%s'", path);
+		goto out;
+	}
+
+	if (write(output_fd, "header_page", 12) != 12) {
+		pr_debug("can't write header_page\n");
+		goto out;
+	}
+
+	if (record_file(path, 8) < 0) {
+		pr_debug("can't record header_page file\n");
+		goto out;
+	}
+
+	put_events_file(path);
+
+	path = get_events_file("header_event");
+	if (!path) {
+		pr_debug("can't get tracing/events/header_event");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (stat(path, &st) < 0) {
+		pr_debug("can't read '%s'", path);
+		goto out;
+	}
+
+	if (write(output_fd, "header_event", 13) != 13) {
+		pr_debug("can't write header_event\n");
+		goto out;
+	}
+
+	if (record_file(path, 8) < 0) {
+		pr_debug("can't record header_event file\n");
+		goto out;
+	}
+
+	err = 0;
+out:
+	put_events_file(path);
+	return err;
+}
+
+static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+	while (tps) {
+		if (!strcmp(sys, tps->name))
+			return true;
+		tps = tps->next;
+	}
+
+	return false;
+}
+
+#define for_each_event_tps(dir, dent, tps)			\
+	while ((dent = readdir(dir)))				\
+		if (dent->d_type == DT_DIR &&			\
+		    (strcmp(dent->d_name, ".")) &&		\
+		    (strcmp(dent->d_name, "..")))		\
+
+static int copy_event_system(const char *sys, struct tracepoint_path *tps)
+{
+	struct dirent *dent;
+	struct stat st;
+	char *format;
+	DIR *dir;
+	int count = 0;
+	int ret;
+	int err;
+
+	dir = opendir(sys);
+	if (!dir) {
+		pr_debug("can't read directory '%s'", sys);
+		return -errno;
+	}
+
+	for_each_event_tps(dir, dent, tps) {
+		if (!name_in_tp_list(dent->d_name, tps))
+			continue;
+
+		if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+		ret = stat(format, &st);
+		free(format);
+		if (ret < 0)
+			continue;
+		count++;
+	}
+
+	if (write(output_fd, &count, 4) != 4) {
+		err = -EIO;
+		pr_debug("can't write count\n");
+		goto out;
+	}
+
+	rewinddir(dir);
+	for_each_event_tps(dir, dent, tps) {
+		if (!name_in_tp_list(dent->d_name, tps))
+			continue;
+
+		if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+		ret = stat(format, &st);
+
+		if (ret >= 0) {
+			err = record_file(format, 8);
+			if (err) {
+				free(format);
+				goto out;
+			}
+		}
+		free(format);
+	}
+	err = 0;
+out:
+	closedir(dir);
+	return err;
+}
+
+static int record_ftrace_files(struct tracepoint_path *tps)
+{
+	char *path;
+	int ret;
+
+	path = get_events_file("ftrace");
+	if (!path) {
+		pr_debug("can't get tracing/events/ftrace");
+		return -ENOMEM;
+	}
+
+	ret = copy_event_system(path, tps);
+
+	put_tracing_file(path);
+
+	return ret;
+}
+
+static bool system_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+	while (tps) {
+		if (!strcmp(sys, tps->system))
+			return true;
+		tps = tps->next;
+	}
+
+	return false;
+}
+
+static int record_event_files(struct tracepoint_path *tps)
+{
+	struct dirent *dent;
+	struct stat st;
+	char *path;
+	char *sys;
+	DIR *dir;
+	int count = 0;
+	int ret;
+	int err;
+
+	path = get_tracing_file("events");
+	if (!path) {
+		pr_debug("can't get tracing/events");
+		return -ENOMEM;
+	}
+
+	dir = opendir(path);
+	if (!dir) {
+		err = -errno;
+		pr_debug("can't read directory '%s'", path);
+		goto out;
+	}
+
+	for_each_event_tps(dir, dent, tps) {
+		if (strcmp(dent->d_name, "ftrace") == 0 ||
+		    !system_in_tp_list(dent->d_name, tps))
+			continue;
+
+		count++;
+	}
+
+	if (write(output_fd, &count, 4) != 4) {
+		err = -EIO;
+		pr_debug("can't write count\n");
+		goto out;
+	}
+
+	rewinddir(dir);
+	for_each_event_tps(dir, dent, tps) {
+		if (strcmp(dent->d_name, "ftrace") == 0 ||
+		    !system_in_tp_list(dent->d_name, tps))
+			continue;
+
+		if (asprintf(&sys, "%s/%s", path, dent->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+		ret = stat(sys, &st);
+		if (ret >= 0) {
+			ssize_t size = strlen(dent->d_name) + 1;
+
+			if (write(output_fd, dent->d_name, size) != size ||
+			    copy_event_system(sys, tps) < 0) {
+				err = -EIO;
+				free(sys);
+				goto out;
+			}
+		}
+		free(sys);
+	}
+	err = 0;
+out:
+	closedir(dir);
+	put_tracing_file(path);
+
+	return err;
+}
+
+static int record_proc_kallsyms(void)
+{
+	unsigned long long size = 0;
+	/*
+	 * Just to keep older perf.data file parsers happy, record a zero
+	 * sized kallsyms file, i.e. do the same thing that was done when
+	 * /proc/kallsyms (or something specified via --kallsyms, in a
+	 * different path) couldn't be read.
+	 */
+	return write(output_fd, &size, 4) != 4 ? -EIO : 0;
+}
+
+static int record_ftrace_printk(void)
+{
+	unsigned int size;
+	char *path;
+	struct stat st;
+	int ret, err = 0;
+
+	path = get_tracing_file("printk_formats");
+	if (!path) {
+		pr_debug("can't get tracing/printk_formats");
+		return -ENOMEM;
+	}
+
+	ret = stat(path, &st);
+	if (ret < 0) {
+		/* not found */
+		size = 0;
+		if (write(output_fd, &size, 4) != 4)
+			err = -EIO;
+		goto out;
+	}
+	err = record_file(path, 4);
+
+out:
+	put_tracing_file(path);
+	return err;
+}
+
+static int record_saved_cmdline(void)
+{
+	unsigned long long size;
+	char *path;
+	struct stat st;
+	int ret, err = 0;
+
+	path = get_tracing_file("saved_cmdlines");
+	if (!path) {
+		pr_debug("can't get tracing/saved_cmdline");
+		return -ENOMEM;
+	}
+
+	ret = stat(path, &st);
+	if (ret < 0) {
+		/* not found */
+		size = 0;
+		if (write(output_fd, &size, 8) != 8)
+			err = -EIO;
+		goto out;
+	}
+	err = record_file(path, 8);
+
+out:
+	put_tracing_file(path);
+	return err;
+}
+
+static void
+put_tracepoints_path(struct tracepoint_path *tps)
+{
+	while (tps) {
+		struct tracepoint_path *t = tps;
+
+		tps = tps->next;
+		zfree(&t->name);
+		zfree(&t->system);
+		free(t);
+	}
+}
+
+static struct tracepoint_path *tracepoint_id_to_path(u64 config)
+{
+	struct tracepoint_path *path = NULL;
+	DIR *sys_dir, *evt_dir;
+	struct dirent *sys_dirent, *evt_dirent;
+	char id_buf[24];
+	int fd;
+	u64 id;
+	char evt_path[MAXPATHLEN];
+	char *dir_path;
+
+	sys_dir = tracing_events__opendir();
+	if (!sys_dir)
+		return NULL;
+
+	for_each_subsystem(sys_dir, sys_dirent) {
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
+		evt_dir = opendir(dir_path);
+		if (!evt_dir)
+			goto next;
+
+		for_each_event(dir_path, evt_dir, evt_dirent) {
+
+			scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
+				  evt_dirent->d_name);
+			fd = open(evt_path, O_RDONLY);
+			if (fd < 0)
+				continue;
+			if (read(fd, id_buf, sizeof(id_buf)) < 0) {
+				close(fd);
+				continue;
+			}
+			close(fd);
+			id = atoll(id_buf);
+			if (id == config) {
+				put_events_file(dir_path);
+				closedir(evt_dir);
+				closedir(sys_dir);
+				path = zalloc(sizeof(*path));
+				if (!path)
+					return NULL;
+				if (asprintf(&path->system, "%.*s",
+					     MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) {
+					free(path);
+					return NULL;
+				}
+				if (asprintf(&path->name, "%.*s",
+					     MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) {
+					zfree(&path->system);
+					free(path);
+					return NULL;
+				}
+				return path;
+			}
+		}
+		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
+	}
+
+	closedir(sys_dir);
+	return NULL;
+}
+
+static struct tracepoint_path *tracepoint_name_to_path(const char *name)
+{
+	struct tracepoint_path *path = zalloc(sizeof(*path));
+	char *str = strchr(name, ':');
+
+	if (path == NULL || str == NULL) {
+		free(path);
+		return NULL;
+	}
+
+	path->system = strndup(name, str - name);
+	path->name = strdup(str+1);
+
+	if (path->system == NULL || path->name == NULL) {
+		zfree(&path->system);
+		zfree(&path->name);
+		zfree(&path);
+	}
+
+	return path;
+}
+
+static struct tracepoint_path *
+get_tracepoints_path(struct list_head *pattrs)
+{
+	struct tracepoint_path path, *ppath = &path;
+	struct evsel *pos;
+	int nr_tracepoints = 0;
+
+	list_for_each_entry(pos, pattrs, core.node) {
+		if (pos->core.attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+		++nr_tracepoints;
+
+		if (pos->name) {
+			ppath->next = tracepoint_name_to_path(pos->name);
+			if (ppath->next)
+				goto next;
+
+			if (strchr(pos->name, ':') == NULL)
+				goto try_id;
+
+			goto error;
+		}
+
+try_id:
+		ppath->next = tracepoint_id_to_path(pos->core.attr.config);
+		if (!ppath->next) {
+error:
+			pr_debug("No memory to alloc tracepoints list\n");
+			put_tracepoints_path(path.next);
+			return NULL;
+		}
+next:
+		ppath = ppath->next;
+	}
+
+	return nr_tracepoints > 0 ? path.next : NULL;
+}
+
+bool have_tracepoints(struct list_head *pattrs)
+{
+	struct evsel *pos;
+
+	list_for_each_entry(pos, pattrs, core.node)
+		if (pos->core.attr.type == PERF_TYPE_TRACEPOINT)
+			return true;
+
+	return false;
+}
+
+static int tracing_data_header(void)
+{
+	char buf[20];
+	ssize_t size;
+
+	/* just guessing this is someone's birthday.. ;) */
+	buf[0] = 23;
+	buf[1] = 8;
+	buf[2] = 68;
+	memcpy(buf + 3, "tracing", 7);
+
+	if (write(output_fd, buf, 10) != 10)
+		return -1;
+
+	size = strlen(VERSION) + 1;
+	if (write(output_fd, VERSION, size) != size)
+		return -1;
+
+	/* save endian */
+	if (bigendian())
+		buf[0] = 1;
+	else
+		buf[0] = 0;
+
+	if (write(output_fd, buf, 1) != 1)
+		return -1;
+
+	/* save size of long */
+	buf[0] = sizeof(long);
+	if (write(output_fd, buf, 1) != 1)
+		return -1;
+
+	/* save page_size */
+	if (write(output_fd, &page_size, 4) != 4)
+		return -1;
+
+	return 0;
+}
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+				      int fd, bool temp)
+{
+	struct tracepoint_path *tps;
+	struct tracing_data *tdata;
+	int err;
+
+	output_fd = fd;
+
+	tps = get_tracepoints_path(pattrs);
+	if (!tps)
+		return NULL;
+
+	tdata = malloc(sizeof(*tdata));
+	if (!tdata)
+		return NULL;
+
+	tdata->temp = temp;
+	tdata->size = 0;
+
+	if (temp) {
+		int temp_fd;
+
+		snprintf(tdata->temp_file, sizeof(tdata->temp_file),
+			 "/tmp/perf-XXXXXX");
+		if (!mkstemp(tdata->temp_file)) {
+			pr_debug("Can't make temp file");
+			free(tdata);
+			return NULL;
+		}
+
+		temp_fd = open(tdata->temp_file, O_RDWR);
+		if (temp_fd < 0) {
+			pr_debug("Can't read '%s'", tdata->temp_file);
+			free(tdata);
+			return NULL;
+		}
+
+		/*
+		 * Set the temp file the default output, so all the
+		 * tracing data are stored into it.
+		 */
+		output_fd = temp_fd;
+	}
+
+	err = tracing_data_header();
+	if (err)
+		goto out;
+	err = record_header_files();
+	if (err)
+		goto out;
+	err = record_ftrace_files(tps);
+	if (err)
+		goto out;
+	err = record_event_files(tps);
+	if (err)
+		goto out;
+	err = record_proc_kallsyms();
+	if (err)
+		goto out;
+	err = record_ftrace_printk();
+	if (err)
+		goto out;
+	err = record_saved_cmdline();
+
+out:
+	/*
+	 * All tracing data are stored by now, we can restore
+	 * the default output file in case we used temp file.
+	 */
+	if (temp) {
+		tdata->size = lseek(output_fd, 0, SEEK_CUR);
+		close(output_fd);
+		output_fd = fd;
+	}
+
+	if (err)
+		zfree(&tdata);
+
+	put_tracepoints_path(tps);
+	return tdata;
+}
+
+int tracing_data_put(struct tracing_data *tdata)
+{
+	int err = 0;
+
+	if (tdata->temp) {
+		err = record_file(tdata->temp_file, 0);
+		unlink(tdata->temp_file);
+	}
+
+	free(tdata);
+	return err;
+}
+
+int read_tracing_data(int fd, struct list_head *pattrs)
+{
+	int err;
+	struct tracing_data *tdata;
+
+	/*
+	 * We work over the real file, so we can write data
+	 * directly, no temp file is needed.
+	 */
+	tdata = tracing_data_get(pattrs, fd, false);
+	if (!tdata)
+		return -ENOMEM;
+
+	err = tracing_data_put(tdata);
+	return err;
+}
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
new file mode 100644
index 000000000..c9c83a406
--- /dev/null
+++ b/tools/perf/util/trace-event-parse.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "trace-event.h"
+
+#include <linux/ctype.h>
+
+static int get_common_field(struct scripting_context *context,
+			    int *offset, int *size, const char *type)
+{
+	struct tep_handle *pevent = context->pevent;
+	struct tep_event *event;
+	struct tep_format_field *field;
+
+	if (!*size) {
+
+		event = tep_get_first_event(pevent);
+		if (!event)
+			return 0;
+
+		field = tep_find_common_field(event, type);
+		if (!field)
+			return 0;
+		*offset = field->offset;
+		*size = field->size;
+	}
+
+	return tep_read_number(pevent, context->event_data + *offset, *size);
+}
+
+int common_lock_depth(struct scripting_context *context)
+{
+	static int offset;
+	static int size;
+	int ret;
+
+	ret = get_common_field(context, &size, &offset,
+			       "common_lock_depth");
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+int common_flags(struct scripting_context *context)
+{
+	static int offset;
+	static int size;
+	int ret;
+
+	ret = get_common_field(context, &size, &offset,
+			       "common_flags");
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+int common_pc(struct scripting_context *context)
+{
+	static int offset;
+	static int size;
+	int ret;
+
+	ret = get_common_field(context, &size, &offset,
+			       "common_preempt_count");
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+unsigned long long
+raw_field_value(struct tep_event *event, const char *name, void *data)
+{
+	struct tep_format_field *field;
+	unsigned long long val;
+
+	field = tep_find_any_field(event, name);
+	if (!field)
+		return 0ULL;
+
+	tep_read_number_field(field, data, &val);
+
+	return val;
+}
+
+unsigned long long read_size(struct tep_event *event, void *ptr, int size)
+{
+	return tep_read_number(event->tep, ptr, size);
+}
+
+void event_format__fprintf(struct tep_event *event,
+			   int cpu, void *data, int size, FILE *fp)
+{
+	struct tep_record record;
+	struct trace_seq s;
+
+	memset(&record, 0, sizeof(record));
+	record.cpu = cpu;
+	record.size = size;
+	record.data = data;
+
+	trace_seq_init(&s);
+	tep_print_event(event->tep, &s, &record, "%s", TEP_PRINT_INFO);
+	trace_seq_do_fprintf(&s, fp);
+	trace_seq_destroy(&s);
+}
+
+void event_format__print(struct tep_event *event,
+			 int cpu, void *data, int size)
+{
+	return event_format__fprintf(event, cpu, data, size, stdout);
+}
+
+void parse_ftrace_printk(struct tep_handle *pevent,
+			 char *file, unsigned int size __maybe_unused)
+{
+	unsigned long long addr;
+	char *printk;
+	char *line;
+	char *next = NULL;
+	char *addr_str;
+	char *fmt = NULL;
+
+	line = strtok_r(file, "\n", &next);
+	while (line) {
+		addr_str = strtok_r(line, ":", &fmt);
+		if (!addr_str) {
+			pr_warning("printk format with empty entry");
+			break;
+		}
+		addr = strtoull(addr_str, NULL, 16);
+		/* fmt still has a space, skip it */
+		printk = strdup(fmt+1);
+		line = strtok_r(NULL, "\n", &next);
+		tep_register_print_string(pevent, printk, addr);
+		free(printk);
+	}
+}
+
+void parse_saved_cmdline(struct tep_handle *pevent,
+			 char *file, unsigned int size __maybe_unused)
+{
+	char comm[17]; /* Max comm length in the kernel is 16. */
+	char *line;
+	char *next = NULL;
+	int pid;
+
+	line = strtok_r(file, "\n", &next);
+	while (line) {
+		if (sscanf(line, "%d %16s", &pid, comm) == 2)
+			tep_register_comm(pevent, comm, pid);
+		line = strtok_r(NULL, "\n", &next);
+	}
+}
+
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size)
+{
+	return tep_parse_event(pevent, buf, size, "ftrace");
+}
+
+int parse_event_file(struct tep_handle *pevent,
+		     char *buf, unsigned long size, char *sys)
+{
+	return tep_parse_event(pevent, buf, size, sys);
+}
+
+struct flag {
+	const char *name;
+	unsigned long long value;
+};
+
+static const struct flag flags[] = {
+	{ "HI_SOFTIRQ", 0 },
+	{ "TIMER_SOFTIRQ", 1 },
+	{ "NET_TX_SOFTIRQ", 2 },
+	{ "NET_RX_SOFTIRQ", 3 },
+	{ "BLOCK_SOFTIRQ", 4 },
+	{ "IRQ_POLL_SOFTIRQ", 5 },
+	{ "TASKLET_SOFTIRQ", 6 },
+	{ "SCHED_SOFTIRQ", 7 },
+	{ "HRTIMER_SOFTIRQ", 8 },
+	{ "RCU_SOFTIRQ", 9 },
+
+	{ "HRTIMER_NORESTART", 0 },
+	{ "HRTIMER_RESTART", 1 },
+};
+
+unsigned long long eval_flag(const char *flag)
+{
+	int i;
+
+	/*
+	 * Some flags in the format files do not get converted.
+	 * If the flag is not numeric, see if it is something that
+	 * we already know about.
+	 */
+	if (isdigit(flag[0]))
+		return strtoull(flag, NULL, 0);
+
+	for (i = 0; i < (int)(ARRAY_SIZE(flags)); i++)
+		if (strcmp(flags[i].name, flag) == 0)
+			return flags[i].value;
+
+	return 0;
+}
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
new file mode 100644
index 000000000..8a01af783
--- /dev/null
+++ b/tools/perf/util/trace-event-read.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "trace-event.h"
+#include "debug.h"
+
+static int input_fd;
+
+static ssize_t trace_data_size;
+static bool repipe;
+
+static int __do_read(int fd, void *buf, int size)
+{
+	int rsize = size;
+
+	while (size) {
+		int ret = read(fd, buf, size);
+
+		if (ret <= 0)
+			return -1;
+
+		if (repipe) {
+			int retw = write(STDOUT_FILENO, buf, ret);
+
+			if (retw <= 0 || retw != ret) {
+				pr_debug("repiping input file");
+				return -1;
+			}
+		}
+
+		size -= ret;
+		buf += ret;
+	}
+
+	return rsize;
+}
+
+static int do_read(void *data, int size)
+{
+	int r;
+
+	r = __do_read(input_fd, data, size);
+	if (r <= 0) {
+		pr_debug("reading input file (size expected=%d received=%d)",
+			 size, r);
+		return -1;
+	}
+
+	trace_data_size += r;
+
+	return r;
+}
+
+/* If it fails, the next read will report it */
+static void skip(int size)
+{
+	char buf[BUFSIZ];
+	int r;
+
+	while (size) {
+		r = size > BUFSIZ ? BUFSIZ : size;
+		do_read(buf, r);
+		size -= r;
+	}
+}
+
+static unsigned int read4(struct tep_handle *pevent)
+{
+	unsigned int data;
+
+	if (do_read(&data, 4) < 0)
+		return 0;
+	return tep_read_number(pevent, &data, 4);
+}
+
+static unsigned long long read8(struct tep_handle *pevent)
+{
+	unsigned long long data;
+
+	if (do_read(&data, 8) < 0)
+		return 0;
+	return tep_read_number(pevent, &data, 8);
+}
+
+static char *read_string(void)
+{
+	char buf[BUFSIZ];
+	char *str = NULL;
+	int size = 0;
+	off_t r;
+	char c;
+
+	for (;;) {
+		r = read(input_fd, &c, 1);
+		if (r < 0) {
+			pr_debug("reading input file");
+			goto out;
+		}
+
+		if (!r) {
+			pr_debug("no data");
+			goto out;
+		}
+
+		if (repipe) {
+			int retw = write(STDOUT_FILENO, &c, 1);
+
+			if (retw <= 0 || retw != r) {
+				pr_debug("repiping input file string");
+				goto out;
+			}
+		}
+
+		buf[size++] = c;
+
+		if (!c)
+			break;
+	}
+
+	trace_data_size += size;
+
+	str = malloc(size);
+	if (str)
+		memcpy(str, buf, size);
+out:
+	return str;
+}
+
+static int read_proc_kallsyms(struct tep_handle *pevent)
+{
+	unsigned int size;
+
+	size = read4(pevent);
+	if (!size)
+		return 0;
+	/*
+	 * Just skip it, now that we configure libtraceevent to use the
+	 * tools/perf/ symbol resolver.
+	 *
+	 * We need to skip it so that we can continue parsing old perf.data
+	 * files, that contains this /proc/kallsyms payload.
+	 *
+	 * Newer perf.data files will have just the 4-bytes zeros "kallsyms
+	 * payload", so that older tools can continue reading it and interpret
+	 * it as "no kallsyms payload is present".
+	 */
+	lseek(input_fd, size, SEEK_CUR);
+	trace_data_size += size;
+	return 0;
+}
+
+static int read_ftrace_printk(struct tep_handle *pevent)
+{
+	unsigned int size;
+	char *buf;
+
+	/* it can have 0 size */
+	size = read4(pevent);
+	if (!size)
+		return 0;
+
+	buf = malloc(size + 1);
+	if (buf == NULL)
+		return -1;
+
+	if (do_read(buf, size) < 0) {
+		free(buf);
+		return -1;
+	}
+
+	buf[size] = '\0';
+
+	parse_ftrace_printk(pevent, buf, size);
+
+	free(buf);
+	return 0;
+}
+
+static int read_header_files(struct tep_handle *pevent)
+{
+	unsigned long long size;
+	char *header_page;
+	char buf[BUFSIZ];
+	int ret = 0;
+
+	if (do_read(buf, 12) < 0)
+		return -1;
+
+	if (memcmp(buf, "header_page", 12) != 0) {
+		pr_debug("did not read header page");
+		return -1;
+	}
+
+	size = read8(pevent);
+
+	header_page = malloc(size);
+	if (header_page == NULL)
+		return -1;
+
+	if (do_read(header_page, size) < 0) {
+		pr_debug("did not read header page");
+		free(header_page);
+		return -1;
+	}
+
+	if (!tep_parse_header_page(pevent, header_page, size,
+				   tep_get_long_size(pevent))) {
+		/*
+		 * The commit field in the page is of type long,
+		 * use that instead, since it represents the kernel.
+		 */
+		tep_set_long_size(pevent, tep_get_header_page_size(pevent));
+	}
+	free(header_page);
+
+	if (do_read(buf, 13) < 0)
+		return -1;
+
+	if (memcmp(buf, "header_event", 13) != 0) {
+		pr_debug("did not read header event");
+		return -1;
+	}
+
+	size = read8(pevent);
+	skip(size);
+
+	return ret;
+}
+
+static int read_ftrace_file(struct tep_handle *pevent, unsigned long long size)
+{
+	int ret;
+	char *buf;
+
+	buf = malloc(size);
+	if (buf == NULL) {
+		pr_debug("memory allocation failure\n");
+		return -1;
+	}
+
+	ret = do_read(buf, size);
+	if (ret < 0) {
+		pr_debug("error reading ftrace file.\n");
+		goto out;
+	}
+
+	ret = parse_ftrace_file(pevent, buf, size);
+	if (ret < 0)
+		pr_debug("error parsing ftrace file.\n");
+out:
+	free(buf);
+	return ret;
+}
+
+static int read_event_file(struct tep_handle *pevent, char *sys,
+			   unsigned long long size)
+{
+	int ret;
+	char *buf;
+
+	buf = malloc(size);
+	if (buf == NULL) {
+		pr_debug("memory allocation failure\n");
+		return -1;
+	}
+
+	ret = do_read(buf, size);
+	if (ret < 0)
+		goto out;
+
+	ret = parse_event_file(pevent, buf, size, sys);
+	if (ret < 0)
+		pr_debug("error parsing event file.\n");
+out:
+	free(buf);
+	return ret;
+}
+
+static int read_ftrace_files(struct tep_handle *pevent)
+{
+	unsigned long long size;
+	int count;
+	int i;
+	int ret;
+
+	count = read4(pevent);
+
+	for (i = 0; i < count; i++) {
+		size = read8(pevent);
+		ret = read_ftrace_file(pevent, size);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int read_event_files(struct tep_handle *pevent)
+{
+	unsigned long long size;
+	char *sys;
+	int systems;
+	int count;
+	int i,x;
+	int ret;
+
+	systems = read4(pevent);
+
+	for (i = 0; i < systems; i++) {
+		sys = read_string();
+		if (sys == NULL)
+			return -1;
+
+		count = read4(pevent);
+
+		for (x=0; x < count; x++) {
+			size = read8(pevent);
+			ret = read_event_file(pevent, sys, size);
+			if (ret) {
+				free(sys);
+				return ret;
+			}
+		}
+		free(sys);
+	}
+	return 0;
+}
+
+static int read_saved_cmdline(struct tep_handle *pevent)
+{
+	unsigned long long size;
+	char *buf;
+	int ret;
+
+	/* it can have 0 size */
+	size = read8(pevent);
+	if (!size)
+		return 0;
+
+	buf = malloc(size + 1);
+	if (buf == NULL) {
+		pr_debug("memory allocation failure\n");
+		return -1;
+	}
+
+	ret = do_read(buf, size);
+	if (ret < 0) {
+		pr_debug("error reading saved cmdlines\n");
+		goto out;
+	}
+	buf[ret] = '\0';
+
+	parse_saved_cmdline(pevent, buf, size);
+	ret = 0;
+out:
+	free(buf);
+	return ret;
+}
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
+{
+	char buf[BUFSIZ];
+	char test[] = { 23, 8, 68 };
+	char *version;
+	int show_version = 0;
+	int show_funcs = 0;
+	int show_printk = 0;
+	ssize_t size = -1;
+	int file_bigendian;
+	int host_bigendian;
+	int file_long_size;
+	int file_page_size;
+	struct tep_handle *pevent = NULL;
+	int err;
+
+	repipe = __repipe;
+	input_fd = fd;
+
+	if (do_read(buf, 3) < 0)
+		return -1;
+	if (memcmp(buf, test, 3) != 0) {
+		pr_debug("no trace data in the file");
+		return -1;
+	}
+
+	if (do_read(buf, 7) < 0)
+		return -1;
+	if (memcmp(buf, "tracing", 7) != 0) {
+		pr_debug("not a trace file (missing 'tracing' tag)");
+		return -1;
+	}
+
+	version = read_string();
+	if (version == NULL)
+		return -1;
+	if (show_version)
+		printf("version = %s\n", version);
+
+	if (do_read(buf, 1) < 0) {
+		free(version);
+		return -1;
+	}
+	file_bigendian = buf[0];
+	host_bigendian = bigendian();
+
+	if (trace_event__init(tevent)) {
+		pr_debug("trace_event__init failed");
+		goto out;
+	}
+
+	pevent = tevent->pevent;
+
+	tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+	tep_set_file_bigendian(pevent, file_bigendian);
+	tep_set_local_bigendian(pevent, host_bigendian);
+
+	if (do_read(buf, 1) < 0)
+		goto out;
+	file_long_size = buf[0];
+
+	file_page_size = read4(pevent);
+	if (!file_page_size)
+		goto out;
+
+	tep_set_long_size(pevent, file_long_size);
+	tep_set_page_size(pevent, file_page_size);
+
+	err = read_header_files(pevent);
+	if (err)
+		goto out;
+	err = read_ftrace_files(pevent);
+	if (err)
+		goto out;
+	err = read_event_files(pevent);
+	if (err)
+		goto out;
+	err = read_proc_kallsyms(pevent);
+	if (err)
+		goto out;
+	err = read_ftrace_printk(pevent);
+	if (err)
+		goto out;
+	if (atof(version) >= 0.6) {
+		err = read_saved_cmdline(pevent);
+		if (err)
+			goto out;
+	}
+
+	size = trace_data_size;
+	repipe = false;
+
+	if (show_funcs) {
+		tep_print_funcs(pevent);
+	} else if (show_printk) {
+		tep_print_printk(pevent);
+	}
+
+	pevent = NULL;
+
+out:
+	if (pevent)
+		trace_event__cleanup(tevent);
+	free(version);
+	return size;
+}
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
new file mode 100644
index 000000000..7172ca052
--- /dev/null
+++ b/tools/perf/util/trace-event-scripting.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * trace-event-scripting.  Scripting engine common and initialization code.
+ *
+ * Copyright (C) 2009-2010 Tom Zanussi <tzanussi@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "trace-event.h"
+#include "event.h"
+#include "evsel.h"
+#include <linux/zalloc.h>
+
+struct scripting_context *scripting_context;
+
+void scripting_context__update(struct scripting_context *c,
+			       union perf_event *event,
+			       struct perf_sample *sample,
+			       struct evsel *evsel,
+			       struct addr_location *al,
+			       struct addr_location *addr_al)
+{
+	c->event_data = sample->raw_data;
+	if (evsel->tp_format)
+		c->pevent = evsel->tp_format->tep;
+	else
+		c->pevent = NULL;
+	c->event = event;
+	c->sample = sample;
+	c->evsel = evsel;
+	c->al = al;
+	c->addr_al = addr_al;
+}
+
+static int flush_script_unsupported(void)
+{
+	return 0;
+}
+
+static int stop_script_unsupported(void)
+{
+	return 0;
+}
+
+static void process_event_unsupported(union perf_event *event __maybe_unused,
+				      struct perf_sample *sample __maybe_unused,
+				      struct evsel *evsel __maybe_unused,
+				      struct addr_location *al __maybe_unused,
+				      struct addr_location *addr_al __maybe_unused)
+{
+}
+
+static void print_python_unsupported_msg(void)
+{
+	fprintf(stderr, "Python scripting not supported."
+		"  Install libpython and rebuild perf to enable it.\n"
+		"For example:\n  # apt-get install python-dev (ubuntu)"
+		"\n  # yum install python-devel (Fedora)"
+		"\n  etc.\n");
+}
+
+static int python_start_script_unsupported(const char *script __maybe_unused,
+					   int argc __maybe_unused,
+					   const char **argv __maybe_unused,
+					   struct perf_session *session __maybe_unused)
+{
+	print_python_unsupported_msg();
+
+	return -1;
+}
+
+static int python_generate_script_unsupported(struct tep_handle *pevent
+					      __maybe_unused,
+					      const char *outfile
+					      __maybe_unused)
+{
+	print_python_unsupported_msg();
+
+	return -1;
+}
+
+struct scripting_ops python_scripting_unsupported_ops = {
+	.name = "Python",
+	.dirname = "python",
+	.start_script = python_start_script_unsupported,
+	.flush_script = flush_script_unsupported,
+	.stop_script = stop_script_unsupported,
+	.process_event = process_event_unsupported,
+	.generate_script = python_generate_script_unsupported,
+};
+
+static void register_python_scripting(struct scripting_ops *scripting_ops)
+{
+	if (scripting_context == NULL)
+		scripting_context = malloc(sizeof(*scripting_context));
+
+       if (scripting_context == NULL ||
+	   script_spec_register("Python", scripting_ops) ||
+	   script_spec_register("py", scripting_ops)) {
+		pr_err("Error registering Python script extension: disabling it\n");
+		zfree(&scripting_context);
+	}
+}
+
+#ifndef HAVE_LIBPYTHON_SUPPORT
+void setup_python_scripting(void)
+{
+	register_python_scripting(&python_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops python_scripting_ops;
+
+void setup_python_scripting(void)
+{
+	register_python_scripting(&python_scripting_ops);
+}
+#endif
+
+static void print_perl_unsupported_msg(void)
+{
+	fprintf(stderr, "Perl scripting not supported."
+		"  Install libperl and rebuild perf to enable it.\n"
+		"For example:\n  # apt-get install libperl-dev (ubuntu)"
+		"\n  # yum install 'perl(ExtUtils::Embed)' (Fedora)"
+		"\n  etc.\n");
+}
+
+static int perl_start_script_unsupported(const char *script __maybe_unused,
+					 int argc __maybe_unused,
+					 const char **argv __maybe_unused,
+					 struct perf_session *session __maybe_unused)
+{
+	print_perl_unsupported_msg();
+
+	return -1;
+}
+
+static int perl_generate_script_unsupported(struct tep_handle *pevent
+					    __maybe_unused,
+					    const char *outfile __maybe_unused)
+{
+	print_perl_unsupported_msg();
+
+	return -1;
+}
+
+struct scripting_ops perl_scripting_unsupported_ops = {
+	.name = "Perl",
+	.dirname = "perl",
+	.start_script = perl_start_script_unsupported,
+	.flush_script = flush_script_unsupported,
+	.stop_script = stop_script_unsupported,
+	.process_event = process_event_unsupported,
+	.generate_script = perl_generate_script_unsupported,
+};
+
+static void register_perl_scripting(struct scripting_ops *scripting_ops)
+{
+	if (scripting_context == NULL)
+		scripting_context = malloc(sizeof(*scripting_context));
+
+       if (scripting_context == NULL ||
+	   script_spec_register("Perl", scripting_ops) ||
+	   script_spec_register("pl", scripting_ops)) {
+		pr_err("Error registering Perl script extension: disabling it\n");
+		zfree(&scripting_context);
+	}
+}
+
+#ifndef HAVE_LIBPERL_SUPPORT
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops perl_scripting_ops;
+
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_ops);
+}
+#endif
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
new file mode 100644
index 000000000..b3ee651e3
--- /dev/null
+++ b/tools/perf/util/trace-event.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <traceevent/event-parse.h>
+#include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
+#include "trace-event.h"
+#include "machine.h"
+
+/*
+ * global trace_event object used by trace_event__tp_format
+ *
+ * TODO There's no cleanup call for this. Add some sort of
+ * __exit function support and call trace_event__cleanup
+ * there.
+ */
+static struct trace_event tevent;
+static bool tevent_initialized;
+
+int trace_event__init(struct trace_event *t)
+{
+	struct tep_handle *pevent = tep_alloc();
+
+	if (pevent) {
+		t->plugin_list = tep_load_plugins(pevent);
+		t->pevent  = pevent;
+	}
+
+	return pevent ? 0 : -1;
+}
+
+static int trace_event__init2(void)
+{
+	int be = tep_is_bigendian();
+	struct tep_handle *pevent;
+
+	if (trace_event__init(&tevent))
+		return -1;
+
+	pevent = tevent.pevent;
+	tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+	tep_set_file_bigendian(pevent, be);
+	tep_set_local_bigendian(pevent, be);
+	tevent_initialized = true;
+	return 0;
+}
+
+int trace_event__register_resolver(struct machine *machine,
+				   tep_func_resolver_t *func)
+{
+	if (!tevent_initialized && trace_event__init2())
+		return -1;
+
+	return tep_set_function_resolver(tevent.pevent, func, machine);
+}
+
+void trace_event__cleanup(struct trace_event *t)
+{
+	tep_unload_plugins(t->plugin_list, t->pevent);
+	tep_free(t->pevent);
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static struct tep_event*
+tp_format(const char *sys, const char *name)
+{
+	char *tp_dir = get_events_file(sys);
+	struct tep_handle *pevent = tevent.pevent;
+	struct tep_event *event = NULL;
+	char path[PATH_MAX];
+	size_t size;
+	char *data;
+	int err;
+
+	if (!tp_dir)
+		return ERR_PTR(-errno);
+
+	scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name);
+	put_events_file(tp_dir);
+
+	err = filename__read_str(path, &data, &size);
+	if (err)
+		return ERR_PTR(err);
+
+	tep_parse_format(pevent, &event, data, size, sys);
+
+	free(data);
+	return event;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+struct tep_event*
+trace_event__tp_format(const char *sys, const char *name)
+{
+	if (!tevent_initialized && trace_event__init2())
+		return ERR_PTR(-ENOMEM);
+
+	return tp_format(sys, name);
+}
+
+struct tep_event *trace_event__tp_format_id(int id)
+{
+	if (!tevent_initialized && trace_event__init2())
+		return ERR_PTR(-ENOMEM);
+
+	return tep_find_event(tevent.pevent, id);
+}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
new file mode 100644
index 000000000..640981105
--- /dev/null
+++ b/tools/perf/util/trace-event.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UTIL_TRACE_EVENT_H
+#define _PERF_UTIL_TRACE_EVENT_H
+
+#include <traceevent/event-parse.h>
+#include "parse-events.h"
+
+struct machine;
+struct perf_sample;
+union perf_event;
+struct perf_tool;
+struct thread;
+struct tep_plugin_list;
+struct evsel;
+
+struct trace_event {
+	struct tep_handle	*pevent;
+	struct tep_plugin_list	*plugin_list;
+};
+
+int trace_event__init(struct trace_event *t);
+void trace_event__cleanup(struct trace_event *t);
+int trace_event__register_resolver(struct machine *machine,
+				   tep_func_resolver_t *func);
+struct tep_event*
+trace_event__tp_format(const char *sys, const char *name);
+
+struct tep_event *trace_event__tp_format_id(int id);
+
+int bigendian(void);
+
+void event_format__fprintf(struct tep_event *event,
+			   int cpu, void *data, int size, FILE *fp);
+
+void event_format__print(struct tep_event *event,
+			 int cpu, void *data, int size);
+
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
+int parse_event_file(struct tep_handle *pevent,
+		     char *buf, unsigned long size, char *sys);
+
+unsigned long long
+raw_field_value(struct tep_event *event, const char *name, void *data);
+
+void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size);
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
+
+unsigned long long read_size(struct tep_event *event, void *ptr, int size);
+unsigned long long eval_flag(const char *flag);
+
+int read_tracing_data(int fd, struct list_head *pattrs);
+
+struct tracing_data {
+	/* size is only valid if temp is 'true' */
+	ssize_t size;
+	bool temp;
+	char temp_file[50];
+};
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+				      int fd, bool temp);
+int tracing_data_put(struct tracing_data *tdata);
+
+
+struct addr_location;
+
+struct perf_session;
+struct perf_stat_config;
+
+struct scripting_ops {
+	const char *name;
+	const char *dirname; /* For script path .../scripts/<dirname>/... */
+	int (*start_script)(const char *script, int argc, const char **argv,
+			    struct perf_session *session);
+	int (*flush_script) (void);
+	int (*stop_script) (void);
+	void (*process_event) (union perf_event *event,
+			       struct perf_sample *sample,
+			       struct evsel *evsel,
+			       struct addr_location *al,
+			       struct addr_location *addr_al);
+	void (*process_switch)(union perf_event *event,
+			       struct perf_sample *sample,
+			       struct machine *machine);
+	void (*process_auxtrace_error)(struct perf_session *session,
+				       union perf_event *event);
+	void (*process_stat)(struct perf_stat_config *config,
+			     struct evsel *evsel, u64 tstamp);
+	void (*process_stat_interval)(u64 tstamp);
+	void (*process_throttle)(union perf_event *event,
+				 struct perf_sample *sample,
+				 struct machine *machine);
+	int (*generate_script) (struct tep_handle *pevent, const char *outfile);
+};
+
+extern unsigned int scripting_max_stack;
+
+int script_spec_register(const char *spec, struct scripting_ops *ops);
+
+void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
+		       struct machine *machine);
+
+void setup_perl_scripting(void);
+void setup_python_scripting(void);
+
+struct scripting_context {
+	struct tep_handle *pevent;
+	void *event_data;
+	union perf_event *event;
+	struct perf_sample *sample;
+	struct evsel *evsel;
+	struct addr_location *al;
+	struct addr_location *addr_al;
+	struct perf_session *session;
+};
+
+void scripting_context__update(struct scripting_context *scripting_context,
+			       union perf_event *event,
+			       struct perf_sample *sample,
+			       struct evsel *evsel,
+			       struct addr_location *al,
+			       struct addr_location *addr_al);
+
+int common_pc(struct scripting_context *context);
+int common_flags(struct scripting_context *context);
+int common_lock_depth(struct scripting_context *context);
+
+#define SAMPLE_FLAGS_BUF_SIZE 64
+int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
+
+#endif /* _PERF_UTIL_TRACE_EVENT_H */
diff --git a/tools/perf/util/tracepoint.c b/tools/perf/util/tracepoint.c
new file mode 100644
index 000000000..92dd8b455
--- /dev/null
+++ b/tools/perf/util/tracepoint.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tracepoint.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#include <api/fs/tracing_path.h>
+
+int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
+{
+	char evt_path[MAXPATHLEN];
+	int fd;
+
+	snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name);
+	fd = open(evt_path, O_RDONLY);
+	if (fd < 0)
+		return -EINVAL;
+	close(fd);
+
+	return 0;
+}
+
+/*
+ * Check whether event is in <debugfs_mount_point>/tracing/events
+ */
+int is_valid_tracepoint(const char *event_string)
+{
+	DIR *sys_dir, *evt_dir;
+	struct dirent *sys_dirent, *evt_dirent;
+	char evt_path[MAXPATHLEN];
+	char *dir_path;
+
+	sys_dir = tracing_events__opendir();
+	if (!sys_dir)
+		return 0;
+
+	for_each_subsystem(sys_dir, sys_dirent) {
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
+		evt_dir = opendir(dir_path);
+		if (!evt_dir)
+			goto next;
+
+		for_each_event(dir_path, evt_dir, evt_dirent) {
+			snprintf(evt_path, MAXPATHLEN, "%s:%s",
+				 sys_dirent->d_name, evt_dirent->d_name);
+			if (!strcmp(evt_path, event_string)) {
+				closedir(evt_dir);
+				put_events_file(dir_path);
+				closedir(sys_dir);
+				return 1;
+			}
+		}
+		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
+	}
+	closedir(sys_dir);
+	return 0;
+}
diff --git a/tools/perf/util/tracepoint.h b/tools/perf/util/tracepoint.h
new file mode 100644
index 000000000..c4a110fe8
--- /dev/null
+++ b/tools/perf/util/tracepoint.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TRACEPOINT_H
+#define __PERF_TRACEPOINT_H
+
+#include <dirent.h>
+#include <string.h>
+
+int tp_event_has_id(const char *dir_path, struct dirent *evt_dir);
+
+#define for_each_event(dir_path, evt_dir, evt_dirent)		\
+	while ((evt_dirent = readdir(evt_dir)) != NULL)		\
+		if (evt_dirent->d_type == DT_DIR &&		\
+		    (strcmp(evt_dirent->d_name, ".")) &&	\
+		    (strcmp(evt_dirent->d_name, "..")) &&	\
+		    (!tp_event_has_id(dir_path, evt_dirent)))
+
+#define for_each_subsystem(sys_dir, sys_dirent)			\
+	while ((sys_dirent = readdir(sys_dir)) != NULL)		\
+		if (sys_dirent->d_type == DT_DIR &&		\
+		    (strcmp(sys_dirent->d_name, ".")) &&	\
+		    (strcmp(sys_dirent->d_name, "..")))
+
+int is_valid_tracepoint(const char *event_string);
+
+#endif /* __PERF_TRACEPOINT_H */
diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
new file mode 100644
index 000000000..33e997f9c
--- /dev/null
+++ b/tools/perf/util/trigger.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRIGGER_H_
+#define __TRIGGER_H_ 1
+
+#include "asm/bug.h"
+
+/*
+ * Use trigger to model operations which need to be executed when
+ * an event (a signal, for example) is observed.
+ *
+ * States and transits:
+ *
+ *
+ *  OFF--> ON --> READY --(hit)--> HIT
+ *                 ^               |
+ *                 |            (ready)
+ *                 |               |
+ *                  \_____________/
+ *
+ * is_hit and is_ready are two key functions to query the state of
+ * a trigger. is_hit means the event already happen; is_ready means the
+ * trigger is waiting for the event.
+ */
+
+struct trigger {
+	volatile enum {
+		TRIGGER_ERROR		= -2,
+		TRIGGER_OFF		= -1,
+		TRIGGER_ON		= 0,
+		TRIGGER_READY		= 1,
+		TRIGGER_HIT		= 2,
+	} state;
+	const char *name;
+};
+
+#define TRIGGER_WARN_ONCE(t, exp) \
+	WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \
+		  t->name, t->state, __func__)
+
+static inline bool trigger_is_available(struct trigger *t)
+{
+	return t->state >= 0;
+}
+
+static inline bool trigger_is_error(struct trigger *t)
+{
+	return t->state <= TRIGGER_ERROR;
+}
+
+static inline void trigger_on(struct trigger *t)
+{
+	TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
+	t->state = TRIGGER_ON;
+}
+
+static inline void trigger_ready(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	t->state = TRIGGER_READY;
+}
+
+static inline void trigger_hit(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	TRIGGER_WARN_ONCE(t, TRIGGER_READY);
+	t->state = TRIGGER_HIT;
+}
+
+static inline void trigger_off(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	t->state = TRIGGER_OFF;
+}
+
+static inline void trigger_error(struct trigger *t)
+{
+	t->state = TRIGGER_ERROR;
+}
+
+static inline bool trigger_is_ready(struct trigger *t)
+{
+	return t->state == TRIGGER_READY;
+}
+
+static inline bool trigger_is_hit(struct trigger *t)
+{
+	return t->state == TRIGGER_HIT;
+}
+
+#define DEFINE_TRIGGER(n) \
+struct trigger n = {.state = TRIGGER_OFF, .name = #n}
+#endif
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
new file mode 100644
index 000000000..f19791d46
--- /dev/null
+++ b/tools/perf/util/tsc.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+
+#include "event.h"
+#include "synthetic-events.h"
+#include "debug.h"
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+	u64 t, quot, rem;
+
+	t = ns - tc->time_zero;
+	quot = t / tc->time_mult;
+	rem  = t % tc->time_mult;
+	return (quot << tc->time_shift) +
+	       (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+	u64 quot, rem;
+
+	if (tc->cap_user_time_short)
+		cyc = tc->time_cycles +
+			((cyc - tc->time_cycles) & tc->time_mask);
+
+	quot = cyc >> tc->time_shift;
+	rem  = cyc & (((u64)1 << tc->time_shift) - 1);
+	return tc->time_zero + quot * tc->time_mult +
+	       ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+			     struct perf_tsc_conversion *tc)
+{
+	u32 seq;
+	int i = 0;
+
+	while (1) {
+		seq = pc->lock;
+		rmb();
+		tc->time_mult = pc->time_mult;
+		tc->time_shift = pc->time_shift;
+		tc->time_zero = pc->time_zero;
+		tc->time_cycles = pc->time_cycles;
+		tc->time_mask = pc->time_mask;
+		tc->cap_user_time_zero = pc->cap_user_time_zero;
+		tc->cap_user_time_short	= pc->cap_user_time_short;
+		rmb();
+		if (pc->lock == seq && !(seq & 1))
+			break;
+		if (++i > 10000) {
+			pr_debug("failed to get perf_event_mmap_page lock\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!tc->cap_user_time_zero)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+				struct perf_tool *tool,
+				perf_event__handler_t process,
+				struct machine *machine)
+{
+	union perf_event event = {
+		.time_conv = {
+			.header = {
+				.type = PERF_RECORD_TIME_CONV,
+				.size = sizeof(struct perf_record_time_conv),
+			},
+		},
+	};
+	struct perf_tsc_conversion tc;
+	int err;
+
+	if (!pc)
+		return 0;
+	err = perf_read_tsc_conversion(pc, &tc);
+	if (err == -EOPNOTSUPP)
+		return 0;
+	if (err)
+		return err;
+
+	pr_debug2("Synthesizing TSC conversion information\n");
+
+	event.time_conv.time_mult  = tc.time_mult;
+	event.time_conv.time_shift = tc.time_shift;
+	event.time_conv.time_zero  = tc.time_zero;
+	event.time_conv.time_cycles = tc.time_cycles;
+	event.time_conv.time_mask = tc.time_mask;
+	event.time_conv.cap_user_time_zero = tc.cap_user_time_zero;
+	event.time_conv.cap_user_time_short = tc.cap_user_time_short;
+
+	return process(tool, &event, NULL, machine);
+}
+
+u64 __weak rdtsc(void)
+{
+	return 0;
+}
+
+size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp)
+{
+	struct perf_record_time_conv *tc = (struct perf_record_time_conv *)event;
+	size_t ret;
+
+	ret  = fprintf(fp, "\n... Time Shift      %" PRI_lu64 "\n", tc->time_shift);
+	ret += fprintf(fp, "... Time Muliplier  %" PRI_lu64 "\n", tc->time_mult);
+	ret += fprintf(fp, "... Time Zero       %" PRI_lu64 "\n", tc->time_zero);
+
+	/*
+	 * The event TIME_CONV was extended for the fields from "time_cycles"
+	 * when supported cap_user_time_short, for backward compatibility,
+	 * prints the extended fields only if they are contained in the event.
+	 */
+	if (event_contains(*tc, time_cycles)) {
+		ret += fprintf(fp, "... Time Cycles     %" PRI_lu64 "\n",
+			       tc->time_cycles);
+		ret += fprintf(fp, "... Time Mask       %#" PRI_lx64 "\n",
+			       tc->time_mask);
+		ret += fprintf(fp, "... Cap Time Zero   %" PRId32 "\n",
+			       tc->cap_user_time_zero);
+		ret += fprintf(fp, "... Cap Time Short  %" PRId32 "\n",
+			       tc->cap_user_time_short);
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
new file mode 100644
index 000000000..88fd1c4c1
--- /dev/null
+++ b/tools/perf/util/tsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TSC_H
+#define __PERF_TSC_H
+
+#include <linux/types.h>
+
+#include "event.h"
+
+struct perf_tsc_conversion {
+	u16 time_shift;
+	u32 time_mult;
+	u64 time_zero;
+	u64 time_cycles;
+	u64 time_mask;
+
+	bool cap_user_time_zero;
+	bool cap_user_time_short;
+};
+
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+			     struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+u64 rdtsc(void);
+double arch_get_tsc_freq(void);
+
+size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp);
+
+#endif // __PERF_TSC_H
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
new file mode 100644
index 000000000..32c39cfe2
--- /dev/null
+++ b/tools/perf/util/units.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "units.h"
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
+{
+	struct parse_tag *i = tags;
+
+	while (i->tag) {
+		char *s = strchr(str, i->tag);
+
+		if (s) {
+			unsigned long int value;
+			char *endptr;
+
+			value = strtoul(str, &endptr, 10);
+			if (s != endptr)
+				break;
+
+			if (value > ULONG_MAX / i->mult)
+				break;
+			value *= i->mult;
+			return value;
+		}
+		i++;
+	}
+
+	return (unsigned long) -1;
+}
+
+double convert_unit_double(double value, char *unit)
+{
+	*unit = ' ';
+
+	if (value > 1000.0) {
+		value /= 1000.0;
+		*unit = 'K';
+	}
+
+	if (value > 1000.0) {
+		value /= 1000.0;
+		*unit = 'M';
+	}
+
+	if (value > 1000.0) {
+		value /= 1000.0;
+		*unit = 'G';
+	}
+
+	return value;
+}
+
+unsigned long convert_unit(unsigned long value, char *unit)
+{
+	double v = convert_unit_double((double)value, unit);
+
+	return (unsigned long)v;
+}
+
+int unit_number__scnprintf(char *buf, size_t size, u64 n)
+{
+	char unit[4] = "BKMG";
+	int i = 0;
+
+	while (((n / 1024) > 1) && (i < 3)) {
+		n /= 1024;
+		i++;
+	}
+
+	return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]);
+}
diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h
new file mode 100644
index 000000000..ea43e74e3
--- /dev/null
+++ b/tools/perf/util/units.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UNIT_H
+#define PERF_UNIT_H
+
+#include <stddef.h>
+#include <linux/types.h>
+
+struct parse_tag {
+	char tag;
+	int  mult;
+};
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+
+double convert_unit_double(double value, char *unit);
+unsigned long convert_unit(unsigned long value, char *unit);
+int unit_number__scnprintf(char *buf, size_t size, u64 n);
+
+#endif /* PERF_UNIT_H */
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
new file mode 100644
index 000000000..94aa40f6e
--- /dev/null
+++ b/tools/perf/util/unwind-libdw.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include "debug.h"
+#include "dso.h"
+#include "unwind.h"
+#include "unwind-libdw.h"
+#include "machine.h"
+#include "map.h"
+#include "symbol.h"
+#include "thread.h"
+#include <linux/types.h>
+#include <linux/zalloc.h>
+#include "event.h"
+#include "perf_regs.h"
+#include "callchain.h"
+
+static char *debuginfo_path;
+
+static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata,
+			    const char *modname __maybe_unused, Dwarf_Addr base __maybe_unused,
+			    const char *file_name, const char *debuglink_file __maybe_unused,
+			    GElf_Word debuglink_crc __maybe_unused, char **debuginfo_file_name)
+{
+	const struct dso *dso = *userdata;
+
+	assert(dso);
+	if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename))
+		*debuginfo_file_name = strdup(dso->symsrc_filename);
+	return -1;
+}
+
+static const Dwfl_Callbacks offline_callbacks = {
+	.find_debuginfo		= __find_debuginfo,
+	.debuginfo_path		= &debuginfo_path,
+	.section_address	= dwfl_offline_section_address,
+	// .find_elf is not set as we use dwfl_report_elf() instead.
+};
+
+static int __report_module(struct addr_location *al, u64 ip,
+			    struct unwind_info *ui)
+{
+	Dwfl_Module *mod;
+	struct dso *dso = NULL;
+	/*
+	 * Some callers will use al->sym, so we can't just use the
+	 * cheaper thread__find_map() here.
+	 */
+	thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al);
+
+	if (al->map)
+		dso = al->map->dso;
+
+	if (!dso)
+		return 0;
+
+	mod = dwfl_addrmodule(ui->dwfl, ip);
+	if (mod) {
+		Dwarf_Addr s;
+
+		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+		if (s != al->map->start - al->map->pgoff)
+			mod = 0;
+	}
+
+	if (!mod)
+		mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1,
+				      al->map->start - al->map->pgoff, false);
+	if (!mod) {
+		char filename[PATH_MAX];
+
+		if (dso__build_id_filename(dso, filename, sizeof(filename), false))
+			mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
+					      al->map->start - al->map->pgoff, false);
+	}
+
+	if (mod) {
+		void **userdatap;
+
+		dwfl_module_info(mod, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL);
+		*userdatap = dso;
+	}
+
+	return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
+}
+
+static int report_module(u64 ip, struct unwind_info *ui)
+{
+	struct addr_location al;
+
+	return __report_module(&al, ip, ui);
+}
+
+/*
+ * Store all entries within entries array,
+ * we will process it after we finish unwind.
+ */
+static int entry(u64 ip, struct unwind_info *ui)
+
+{
+	struct unwind_entry *e = &ui->entries[ui->idx++];
+	struct addr_location al;
+
+	if (__report_module(&al, ip, ui))
+		return -1;
+
+	e->ip	  = ip;
+	e->ms.maps = al.maps;
+	e->ms.map = al.map;
+	e->ms.sym = al.sym;
+
+	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+		 al.sym ? al.sym->name : "''",
+		 ip,
+		 al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+	return 0;
+}
+
+static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp)
+{
+	/* We want only single thread to be processed. */
+	if (*thread_argp != NULL)
+		return 0;
+
+	*thread_argp = arg;
+	return dwfl_pid(dwfl);
+}
+
+static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
+			  Dwarf_Word *data)
+{
+	struct addr_location al;
+	ssize_t size;
+
+	if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
+		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+		return -1;
+	}
+
+	if (!al.map->dso)
+		return -1;
+
+	size = dso__data_read_addr(al.map->dso, al.map, ui->machine,
+				   addr, (u8 *) data, sizeof(*data));
+
+	return !(size == sizeof(*data));
+}
+
+static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *result,
+			void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct stack_dump *stack = &ui->sample->user_stack;
+	u64 start, end;
+	int offset;
+	int ret;
+
+	ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
+	if (ret)
+		return false;
+
+	end = start + stack->size;
+
+	/* Check overflow. */
+	if (addr + sizeof(Dwarf_Word) < addr)
+		return false;
+
+	if (addr < start || addr + sizeof(Dwarf_Word) > end) {
+		ret = access_dso_mem(ui, addr, result);
+		if (ret) {
+			pr_debug("unwind: access_mem 0x%" PRIx64 " not inside range"
+				 " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+				addr, start, end);
+			return false;
+		}
+		return true;
+	}
+
+	offset  = addr - start;
+	*result = *(Dwarf_Word *)&stack->data[offset];
+	pr_debug("unwind: access_mem addr 0x%" PRIx64 ", val %lx, offset %d\n",
+		 addr, (unsigned long)*result, offset);
+	return true;
+}
+
+static const Dwfl_Thread_Callbacks callbacks = {
+	.next_thread		= next_thread,
+	.memory_read		= memory_read,
+	.set_initial_registers	= libdw__arch_set_initial_registers,
+};
+
+static int
+frame_callback(Dwfl_Frame *state, void *arg)
+{
+	struct unwind_info *ui = arg;
+	Dwarf_Addr pc;
+	bool isactivation;
+
+	if (!dwfl_frame_pc(state, &pc, NULL)) {
+		if (!ui->best_effort)
+			pr_err("%s", dwfl_errmsg(-1));
+		return DWARF_CB_ABORT;
+	}
+
+	// report the module before we query for isactivation
+	report_module(pc, ui);
+
+	if (!dwfl_frame_pc(state, &pc, &isactivation)) {
+		if (!ui->best_effort)
+			pr_err("%s", dwfl_errmsg(-1));
+		return DWARF_CB_ABORT;
+	}
+
+	if (!isactivation)
+		--pc;
+
+	return entry(pc, ui) || !(--ui->max_stack) ?
+	       DWARF_CB_ABORT : DWARF_CB_OK;
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			struct thread *thread,
+			struct perf_sample *data,
+			int max_stack,
+			bool best_effort)
+{
+	struct unwind_info *ui, ui_buf = {
+		.sample		= data,
+		.thread		= thread,
+		.machine	= thread->maps->machine,
+		.cb		= cb,
+		.arg		= arg,
+		.max_stack	= max_stack,
+		.best_effort    = best_effort
+	};
+	Dwarf_Word ip;
+	int err = -EINVAL, i;
+
+	if (!data->user_regs.regs)
+		return -EINVAL;
+
+	ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
+	if (!ui)
+		return -ENOMEM;
+
+	*ui = ui_buf;
+
+	ui->dwfl = dwfl_begin(&offline_callbacks);
+	if (!ui->dwfl)
+		goto out;
+
+	err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
+	if (err)
+		goto out;
+
+	err = report_module(ip, ui);
+	if (err)
+		goto out;
+
+	err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui);
+	if (err)
+		goto out;
+
+	err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
+
+	if (err && ui->max_stack != max_stack)
+		err = 0;
+
+	/*
+	 * Display what we got based on the order setup.
+	 */
+	for (i = 0; i < ui->idx && !err; i++) {
+		int j = i;
+
+		if (callchain_param.order == ORDER_CALLER)
+			j = ui->idx - i - 1;
+
+		err = ui->entries[j].ip ? ui->cb(&ui->entries[j], ui->arg) : 0;
+	}
+
+ out:
+	if (err)
+		pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1));
+
+	dwfl_end(ui->dwfl);
+	free(ui);
+	return 0;
+}
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
new file mode 100644
index 000000000..8c88bc4f2
--- /dev/null
+++ b/tools/perf/util/unwind-libdw.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UNWIND_LIBDW_H
+#define __PERF_UNWIND_LIBDW_H
+
+#include <elfutils/libdwfl.h>
+#include "unwind.h"
+
+struct machine;
+struct perf_sample;
+struct thread;
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg);
+
+struct unwind_info {
+	Dwfl			*dwfl;
+	struct perf_sample      *sample;
+	struct machine          *machine;
+	struct thread           *thread;
+	unwind_entry_cb_t	cb;
+	void			*arg;
+	int			max_stack;
+	int			idx;
+	bool			best_effort;
+	struct unwind_entry	entries[];
+};
+
+#endif /* __PERF_UNWIND_LIBDW_H */
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
new file mode 100644
index 000000000..81b6bd6e1
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -0,0 +1,784 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ *	Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#include <linux/zalloc.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "map.h"
+#include "symbol.h"
+#include "debug.h"
+#include "asm/bug.h"
+#include "dso.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+				    unw_word_t ip,
+				    unw_dyn_info_t *di,
+				    unw_proc_info_t *pi,
+				    int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+				 unw_word_t ip,
+				 unw_word_t segbase,
+				 const char *obj_name, unw_word_t start,
+				 unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK	0x0f	/* format of the encoded value */
+#define DW_EH_PE_APPL_MASK	0x70	/* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit		0xff
+#define DW_EH_PE_ptr		0x00	/* pointer-sized unsigned value */
+#define DW_EH_PE_udata4		0x03	/* unsigned 32-bit value */
+#define DW_EH_PE_udata8		0x04	/* unsigned 64-bit value */
+#define DW_EH_PE_sdata4		0x0b	/* signed 32-bit value */
+#define DW_EH_PE_sdata8		0x0c	/* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr		0x00	/* absolute value */
+#define DW_EH_PE_pcrel		0x10	/* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel	0x40	/* start-of-procedure-relative */
+#define DW_EH_PE_aligned	0x50	/* aligned pointer */
+
+/* Flags intentionally not handled, since they're not needed:
+ * #define DW_EH_PE_indirect      0x80
+ * #define DW_EH_PE_uleb128       0x01
+ * #define DW_EH_PE_udata2        0x02
+ * #define DW_EH_PE_sleb128       0x09
+ * #define DW_EH_PE_sdata2        0x0a
+ * #define DW_EH_PE_textrel       0x20
+ * #define DW_EH_PE_datarel       0x30
+ */
+
+struct unwind_info {
+	struct perf_sample	*sample;
+	struct machine		*machine;
+	struct thread		*thread;
+	bool			 best_effort;
+};
+
+#define dw_read(ptr, type, end) ({	\
+	type *__p = (type *) ptr;	\
+	type  __v;			\
+	if ((__p + 1) > (type *) end)	\
+		return -EINVAL;		\
+	__v = *__p++;			\
+	ptr = (typeof(ptr)) __p;	\
+	__v;				\
+	})
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+				   u8 encoding)
+{
+	u8 *cur = *p;
+	*val = 0;
+
+	switch (encoding) {
+	case DW_EH_PE_omit:
+		*val = 0;
+		goto out;
+	case DW_EH_PE_ptr:
+		*val = dw_read(cur, unsigned long, end);
+		goto out;
+	default:
+		break;
+	}
+
+	switch (encoding & DW_EH_PE_APPL_MASK) {
+	case DW_EH_PE_absptr:
+		break;
+	case DW_EH_PE_pcrel:
+		*val = (unsigned long) cur;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ((encoding & 0x07) == 0x00)
+		encoding |= DW_EH_PE_udata4;
+
+	switch (encoding & DW_EH_PE_FORMAT_MASK) {
+	case DW_EH_PE_sdata4:
+		*val += dw_read(cur, s32, end);
+		break;
+	case DW_EH_PE_udata4:
+		*val += dw_read(cur, u32, end);
+		break;
+	case DW_EH_PE_sdata8:
+		*val += dw_read(cur, s64, end);
+		break;
+	case DW_EH_PE_udata8:
+		*val += dw_read(cur, u64, end);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+ out:
+	*p = cur;
+	return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({			\
+	u64 __v;						\
+	if (__dw_read_encoded_value(&ptr, end, &__v, enc)) {	\
+		return -EINVAL;                                 \
+	}                                                       \
+	__v;                                                    \
+	})
+
+static int elf_section_address_and_offset(int fd, const char *name, u64 *address, u64 *offset)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	int ret = -1;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return -1;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out_err;
+
+	if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+		goto out_err;
+
+	*address = shdr.sh_addr;
+	*offset = shdr.sh_offset;
+	ret = 0;
+out_err:
+	elf_end(elf);
+	return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static u64 elf_section_offset(int fd, const char *name)
+{
+	u64 address, offset = 0;
+
+	if (elf_section_address_and_offset(fd, name, &address, &offset))
+		return 0;
+
+	return offset;
+}
+#endif
+
+static u64 elf_base_address(int fd)
+{
+	Elf *elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	GElf_Phdr phdr;
+	u64 retval = 0;
+	size_t i, phdrnum = 0;
+
+	if (elf == NULL)
+		return 0;
+	(void)elf_getphdrnum(elf, &phdrnum);
+	/* PT_LOAD segments are sorted by p_vaddr, so the first has the minimum p_vaddr. */
+	for (i = 0; i < phdrnum; i++) {
+		if (gelf_getphdr(elf, i, &phdr) && phdr.p_type == PT_LOAD) {
+			retval = phdr.p_vaddr & -getpagesize();
+			break;
+		}
+	}
+
+	elf_end(elf);
+	return retval;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	int retval = 0;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return 0;
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out;
+
+	retval = (ehdr.e_type == ET_EXEC);
+
+out:
+	elf_end(elf);
+	pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+	return retval;
+}
+#endif
+
+struct table_entry {
+	u32 start_ip_offset;
+	u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+	unsigned char version;
+	unsigned char eh_frame_ptr_enc;
+	unsigned char fde_count_enc;
+	unsigned char table_enc;
+
+	/*
+	 * The rest of the header is variable-length and consists of the
+	 * following members:
+	 *
+	 *	encoded_t eh_frame_ptr;
+	 *	encoded_t fde_count;
+	 */
+
+	/* A single encoded pointer should not be more than 8 bytes. */
+	u64 enc[2];
+
+	/*
+	 * struct {
+	 *    encoded_t start_ip;
+	 *    encoded_t fde_addr;
+	 * } binary_search_table[fde_count];
+	 */
+	char data[];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+			       u64 offset, u64 *table_data_offset, u64 *fde_count)
+{
+	struct eh_frame_hdr hdr;
+	u8 *enc = (u8 *) &hdr.enc;
+	u8 *end = (u8 *) &hdr.data;
+	ssize_t r;
+
+	r = dso__data_read_offset(dso, machine, offset,
+				  (u8 *) &hdr, sizeof(hdr));
+	if (r != sizeof(hdr))
+		return -EINVAL;
+
+	/* We dont need eh_frame_ptr, just skip it. */
+	dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+	*fde_count  = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+	*table_data_offset = enc - (u8 *) &hdr;
+	return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
+				     u64 *table_data, u64 *segbase,
+				     u64 *fde_count)
+{
+	struct map *map;
+	u64 base_addr = UINT64_MAX;
+	int ret, fd;
+
+	if (dso->data.eh_frame_hdr_offset == 0) {
+		fd = dso__data_get_fd(dso, ui->machine);
+		if (fd < 0)
+			return -EINVAL;
+
+		/* Check the .eh_frame section for unwinding info */
+		ret = elf_section_address_and_offset(fd, ".eh_frame_hdr",
+						     &dso->data.eh_frame_hdr_addr,
+						     &dso->data.eh_frame_hdr_offset);
+		dso->data.elf_base_addr = elf_base_address(fd);
+		dso__data_put_fd(dso);
+		if (ret || dso->data.eh_frame_hdr_offset == 0)
+			return -EINVAL;
+	}
+
+	maps__for_each_entry(ui->thread->maps, map) {
+		if (map->dso == dso && map->start < base_addr)
+			base_addr = map->start;
+	}
+	base_addr -= dso->data.elf_base_addr;
+	/* Address of .eh_frame_hdr */
+	*segbase = base_addr + dso->data.eh_frame_hdr_addr;
+	ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset,
+				   table_data, fde_count);
+	if (ret)
+		return ret;
+	/* binary_search_table offset plus .eh_frame_hdr address */
+	*table_data += *segbase;
+	return 0;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+					struct machine *machine, u64 *offset)
+{
+	int fd;
+	u64 ofs = dso->data.debug_frame_offset;
+
+	/* debug_frame can reside in:
+	 *  - dso
+	 *  - debug pointed by symsrc_filename
+	 *  - gnu_debuglink, which doesn't necessary
+	 *    has to be pointed by symsrc_filename
+	 */
+	if (ofs == 0) {
+		fd = dso__data_get_fd(dso, machine);
+		if (fd >= 0) {
+			ofs = elf_section_offset(fd, ".debug_frame");
+			dso__data_put_fd(dso);
+		}
+
+		if (ofs <= 0) {
+			fd = open(dso->symsrc_filename, O_RDONLY);
+			if (fd >= 0) {
+				ofs = elf_section_offset(fd, ".debug_frame");
+				close(fd);
+			}
+		}
+
+		if (ofs <= 0) {
+			char *debuglink = malloc(PATH_MAX);
+			int ret = 0;
+
+			ret = dso__read_binary_type_filename(
+				dso, DSO_BINARY_TYPE__DEBUGLINK,
+				machine->root_dir, debuglink, PATH_MAX);
+			if (!ret) {
+				fd = open(debuglink, O_RDONLY);
+				if (fd >= 0) {
+					ofs = elf_section_offset(fd,
+							".debug_frame");
+					close(fd);
+				}
+			}
+			if (ofs > 0) {
+				if (dso->symsrc_filename != NULL) {
+					pr_warning(
+						"%s: overwrite symsrc(%s,%s)\n",
+							__func__,
+							dso->symsrc_filename,
+							debuglink);
+					zfree(&dso->symsrc_filename);
+				}
+				dso->symsrc_filename = debuglink;
+			} else {
+				free(debuglink);
+			}
+		}
+
+		dso->data.debug_frame_offset = ofs;
+	}
+
+	*offset = ofs;
+	if (*offset)
+		return 0;
+
+	return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+	struct addr_location al;
+	return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+	       int need_unwind_info, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct map *map;
+	unw_dyn_info_t di;
+	u64 table_data, segbase, fde_count;
+	int ret = -EINVAL;
+
+	map = find_map(ip, ui);
+	if (!map || !map->dso)
+		return -EINVAL;
+
+	pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
+
+	/* Check the .eh_frame section for unwinding info */
+	if (!read_unwind_spec_eh_frame(map->dso, ui,
+				       &table_data, &segbase, &fde_count)) {
+		memset(&di, 0, sizeof(di));
+		di.format   = UNW_INFO_FORMAT_REMOTE_TABLE;
+		di.start_ip = map->start;
+		di.end_ip   = map->end;
+		di.u.rti.segbase    = segbase;
+		di.u.rti.table_data = table_data;
+		di.u.rti.table_len  = fde_count * sizeof(struct table_entry)
+				      / sizeof(unw_word_t);
+		ret = dwarf_search_unwind_table(as, ip, &di, pi,
+						need_unwind_info, arg);
+	}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+	/* Check the .debug_frame section for unwinding info */
+	if (ret < 0 &&
+	    !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
+		int fd = dso__data_get_fd(map->dso, ui->machine);
+		int is_exec = elf_is_exec(fd, map->dso->name);
+		unw_word_t base = is_exec ? 0 : map->start;
+		const char *symfile;
+
+		if (fd >= 0)
+			dso__data_put_fd(map->dso);
+
+		symfile = map->dso->symsrc_filename ?: map->dso->name;
+
+		memset(&di, 0, sizeof(di));
+		if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
+					   map->start, map->end))
+			return dwarf_search_unwind_table(as, ip, &di, pi,
+							 need_unwind_info, arg);
+	}
+#endif
+
+	return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+			unw_regnum_t __maybe_unused num,
+			unw_fpreg_t __maybe_unused *val,
+			int __maybe_unused __write,
+			void __maybe_unused *arg)
+{
+	pr_err("unwind: access_fpreg unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+				  unw_word_t __maybe_unused *dil_addr,
+				  void __maybe_unused *arg)
+{
+	return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+		  unw_cursor_t __maybe_unused *cu,
+		  void __maybe_unused *arg)
+{
+	pr_err("unwind: resume unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+	      unw_word_t __maybe_unused addr,
+		char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+		unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+	pr_err("unwind: get_proc_name unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+			  unw_word_t *data)
+{
+	struct map *map;
+	ssize_t size;
+
+	map = find_map(addr, ui);
+	if (!map) {
+		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+		return -1;
+	}
+
+	if (!map->dso)
+		return -1;
+
+	size = dso__data_read_addr(map->dso, map, ui->machine,
+				   addr, (u8 *) data, sizeof(*data));
+
+	return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+		      unw_word_t addr, unw_word_t *valp,
+		      int __write, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct stack_dump *stack = &ui->sample->user_stack;
+	u64 start, end;
+	int offset;
+	int ret;
+
+	/* Don't support write, probably not needed. */
+	if (__write || !stack || !ui->sample->user_regs.regs) {
+		*valp = 0;
+		return 0;
+	}
+
+	ret = perf_reg_value(&start, &ui->sample->user_regs,
+			     LIBUNWIND__ARCH_REG_SP);
+	if (ret)
+		return ret;
+
+	end = start + stack->size;
+
+	/* Check overflow. */
+	if (addr + sizeof(unw_word_t) < addr)
+		return -EINVAL;
+
+	if (addr < start || addr + sizeof(unw_word_t) >= end) {
+		ret = access_dso_mem(ui, addr, valp);
+		if (ret) {
+			pr_debug("unwind: access_mem %p not inside range"
+				 " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+				 (void *) (uintptr_t) addr, start, end);
+			*valp = 0;
+			return ret;
+		}
+		return 0;
+	}
+
+	offset = addr - start;
+	*valp  = *(unw_word_t *)&stack->data[offset];
+	pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+		 (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+	return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+		      unw_regnum_t regnum, unw_word_t *valp,
+		      int __write, void *arg)
+{
+	struct unwind_info *ui = arg;
+	int id, ret;
+	u64 val;
+
+	/* Don't support write, I suspect we don't need it. */
+	if (__write) {
+		pr_err("unwind: access_reg w %d\n", regnum);
+		return 0;
+	}
+
+	if (!ui->sample->user_regs.regs) {
+		*valp = 0;
+		return 0;
+	}
+
+	id = LIBUNWIND__ARCH_REG_ID(regnum);
+	if (id < 0)
+		return -EINVAL;
+
+	ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+	if (ret) {
+		if (!ui->best_effort)
+			pr_err("unwind: can't read reg %d\n", regnum);
+		return ret;
+	}
+
+	*valp = (unw_word_t) val;
+	pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+	return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+			    unw_proc_info_t *pi __maybe_unused,
+			    void *arg __maybe_unused)
+{
+	pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+		 unwind_entry_cb_t cb, void *arg)
+{
+	struct unwind_entry e;
+	struct addr_location al;
+
+	e.ms.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
+	e.ip     = ip;
+	e.ms.map = al.map;
+	e.ms.maps = al.maps;
+
+	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+		 al.sym ? al.sym->name : "''",
+		 ip,
+		 al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+
+	return cb(&e, arg);
+}
+
+static void display_error(int err)
+{
+	switch (err) {
+	case UNW_EINVAL:
+		pr_err("unwind: Only supports local.\n");
+		break;
+	case UNW_EUNSPEC:
+		pr_err("unwind: Unspecified error.\n");
+		break;
+	case UNW_EBADREG:
+		pr_err("unwind: Register unavailable.\n");
+		break;
+	default:
+		break;
+	}
+}
+
+static unw_accessors_t accessors = {
+	.find_proc_info		= find_proc_info,
+	.put_unwind_info	= put_unwind_info,
+	.get_dyn_info_list_addr	= get_dyn_info_list_addr,
+	.access_mem		= access_mem,
+	.access_reg		= access_reg,
+	.access_fpreg		= access_fpreg,
+	.resume			= resume,
+	.get_proc_name		= get_proc_name,
+};
+
+static int _unwind__prepare_access(struct maps *maps)
+{
+	maps->addr_space = unw_create_addr_space(&accessors, 0);
+	if (!maps->addr_space) {
+		pr_err("unwind: Can't create unwind address space.\n");
+		return -ENOMEM;
+	}
+
+	unw_set_caching_policy(maps->addr_space, UNW_CACHE_GLOBAL);
+	return 0;
+}
+
+static void _unwind__flush_access(struct maps *maps)
+{
+	unw_flush_cache(maps->addr_space, 0, 0);
+}
+
+static void _unwind__finish_access(struct maps *maps)
+{
+	unw_destroy_addr_space(maps->addr_space);
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+		       void *arg, int max_stack)
+{
+	u64 val;
+	unw_word_t ips[max_stack];
+	unw_addr_space_t addr_space;
+	unw_cursor_t c;
+	int ret, i = 0;
+
+	ret = perf_reg_value(&val, &ui->sample->user_regs,
+			     LIBUNWIND__ARCH_REG_IP);
+	if (ret)
+		return ret;
+
+	ips[i++] = (unw_word_t) val;
+
+	/*
+	 * If we need more than one entry, do the DWARF
+	 * unwind itself.
+	 */
+	if (max_stack - 1 > 0) {
+		WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+		addr_space = ui->thread->maps->addr_space;
+
+		if (addr_space == NULL)
+			return -1;
+
+		ret = unw_init_remote(&c, addr_space, ui);
+		if (ret && !ui->best_effort)
+			display_error(ret);
+
+		while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+			unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+
+			/*
+			 * Decrement the IP for any non-activation frames.
+			 * this is required to properly find the srcline
+			 * for caller frames.
+			 * See also the documentation for dwfl_frame_pc(),
+			 * which this code tries to replicate.
+			 */
+			if (unw_is_signal_frame(&c) <= 0)
+				--ips[i];
+
+			++i;
+		}
+
+		max_stack = i;
+	}
+
+	/*
+	 * Display what we got based on the order setup.
+	 */
+	for (i = 0; i < max_stack && !ret; i++) {
+		int j = i;
+
+		if (callchain_param.order == ORDER_CALLER)
+			j = max_stack - i - 1;
+		ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+	}
+
+	return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			struct thread *thread,
+			struct perf_sample *data, int max_stack,
+			bool best_effort)
+{
+	struct unwind_info ui = {
+		.sample       = data,
+		.thread       = thread,
+		.machine      = thread->maps->machine,
+		.best_effort  = best_effort
+	};
+
+	if (!data->user_regs.regs)
+		return -EINVAL;
+
+	if (max_stack <= 0)
+		return -EINVAL;
+
+	return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+	.prepare_access = _unwind__prepare_access,
+	.flush_access   = _unwind__flush_access,
+	.finish_access  = _unwind__finish_access,
+	.get_entries    = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
new file mode 100644
index 000000000..509c287ee
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "unwind.h"
+#include "dso.h"
+#include "map.h"
+#include "thread.h"
+#include "session.h"
+#include "debug.h"
+#include "env.h"
+#include "callchain.h"
+
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
+
+static void unwind__register_ops(struct maps *maps, struct unwind_libunwind_ops *ops)
+{
+	maps->unwind_libunwind_ops = ops;
+}
+
+int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized)
+{
+	const char *arch;
+	enum dso_type dso_type;
+	struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
+	int err;
+
+	if (!dwarf_callchain_users)
+		return 0;
+
+	if (maps->addr_space) {
+		pr_debug("unwind: thread map already set, dso=%s\n",
+			 map->dso->name);
+		if (initialized)
+			*initialized = true;
+		return 0;
+	}
+
+	/* env->arch is NULL for live-mode (i.e. perf top) */
+	if (!maps->machine->env || !maps->machine->env->arch)
+		goto out_register;
+
+	dso_type = dso__type(map->dso, maps->machine);
+	if (dso_type == DSO__TYPE_UNKNOWN)
+		return 0;
+
+	arch = perf_env__arch(maps->machine->env);
+
+	if (!strcmp(arch, "x86")) {
+		if (dso_type != DSO__TYPE_64BIT)
+			ops = x86_32_unwind_libunwind_ops;
+	} else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+		if (dso_type == DSO__TYPE_64BIT)
+			ops = arm64_unwind_libunwind_ops;
+	}
+
+	if (!ops) {
+		pr_err("unwind: target platform=%s is not supported\n", arch);
+		return 0;
+	}
+out_register:
+	unwind__register_ops(maps, ops);
+
+	err = maps->unwind_libunwind_ops->prepare_access(maps);
+	if (initialized)
+		*initialized = err ? false : true;
+	return err;
+}
+
+void unwind__flush_access(struct maps *maps)
+{
+	if (maps->unwind_libunwind_ops)
+		maps->unwind_libunwind_ops->flush_access(maps);
+}
+
+void unwind__finish_access(struct maps *maps)
+{
+	if (maps->unwind_libunwind_ops)
+		maps->unwind_libunwind_ops->finish_access(maps);
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			 struct thread *thread,
+			 struct perf_sample *data, int max_stack,
+			 bool best_effort)
+{
+	if (thread->maps->unwind_libunwind_ops)
+		return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data,
+								       max_stack, best_effort);
+	return 0;
+}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
new file mode 100644
index 000000000..b2a03fa52
--- /dev/null
+++ b/tools/perf/util/unwind.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UNWIND_H
+#define __UNWIND_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "util/map_symbol.h"
+
+struct maps;
+struct perf_sample;
+struct thread;
+
+struct unwind_entry {
+	struct map_symbol ms;
+	u64		  ip;
+};
+
+typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
+
+struct unwind_libunwind_ops {
+	int (*prepare_access)(struct maps *maps);
+	void (*flush_access)(struct maps *maps);
+	void (*finish_access)(struct maps *maps);
+	int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+			   struct thread *thread,
+			   struct perf_sample *data, int max_stack, bool best_effort);
+};
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+/*
+ * When best_effort is set, don't report errors and fail silently. This could
+ * be expanded in the future to be more permissive about things other than
+ * error messages.
+ */
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			struct thread *thread,
+			struct perf_sample *data, int max_stack,
+			bool best_effort);
+/* libunwind specific */
+#ifdef HAVE_LIBUNWIND_SUPPORT
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_SP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_IP
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized);
+void unwind__flush_access(struct maps *maps);
+void unwind__finish_access(struct maps *maps);
+#else
+static inline int unwind__prepare_access(struct maps *maps __maybe_unused,
+					 struct map *map __maybe_unused,
+					 bool *initialized __maybe_unused)
+{
+	return 0;
+}
+
+static inline void unwind__flush_access(struct maps *maps __maybe_unused) {}
+static inline void unwind__finish_access(struct maps *maps __maybe_unused) {}
+#endif
+#else
+static inline int
+unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
+		    void *arg __maybe_unused,
+		    struct thread *thread __maybe_unused,
+		    struct perf_sample *data __maybe_unused,
+		    int max_stack __maybe_unused,
+		    bool best_effort __maybe_unused)
+{
+	return 0;
+}
+
+static inline int unwind__prepare_access(struct maps *maps __maybe_unused,
+					 struct map *map __maybe_unused,
+					 bool *initialized __maybe_unused)
+{
+	return 0;
+}
+
+static inline void unwind__flush_access(struct maps *maps __maybe_unused) {}
+static inline void unwind__finish_access(struct maps *maps __maybe_unused) {}
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
+#endif /* __UNWIND_H */
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
new file mode 100644
index 000000000..196438ee4
--- /dev/null
+++ b/tools/perf/util/usage.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * usage.c
+ *
+ * Various reporting routines.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+static __noreturn void usage_builtin(const char *err)
+{
+	fprintf(stderr, "\n Usage: %s\n", err);
+	exit(129);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) __noreturn = usage_builtin;
+
+void usage(const char *err)
+{
+	usage_routine(err);
+}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
new file mode 100644
index 000000000..391c1e928
--- /dev/null
+++ b/tools/perf/util/util.c
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "debug.h"
+#include "event.h"
+#include <api/fs/fs.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/time64.h>
+#include <linux/overflow.h>
+#include <unistd.h>
+#include "cap.h"
+#include "strlist.h"
+#include "string2.h"
+
+/*
+ * XXX We need to find a better place for these things...
+ */
+
+bool perf_singlethreaded = true;
+
+void perf_set_singlethreaded(void)
+{
+	perf_singlethreaded = true;
+}
+
+void perf_set_multithreaded(void)
+{
+	perf_singlethreaded = false;
+}
+
+int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
+
+int sysctl__max_stack(void)
+{
+	int value;
+
+	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+		sysctl_perf_event_max_stack = value;
+
+	if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
+		sysctl_perf_event_max_contexts_per_stack = value;
+
+	return sysctl_perf_event_max_stack;
+}
+
+bool sysctl__nmi_watchdog_enabled(void)
+{
+	static bool cached;
+	static bool nmi_watchdog;
+	int value;
+
+	if (cached)
+		return nmi_watchdog;
+
+	if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0)
+		return false;
+
+	nmi_watchdog = (value > 0) ? true : false;
+	cached = true;
+
+	return nmi_watchdog;
+}
+
+bool test_attr__enabled;
+
+bool perf_host  = true;
+bool perf_guest = false;
+
+void event_attr_init(struct perf_event_attr *attr)
+{
+	if (!perf_host)
+		attr->exclude_host  = 1;
+	if (!perf_guest)
+		attr->exclude_guest = 1;
+	/* to capture ABI version */
+	attr->size = sizeof(*attr);
+}
+
+int mkdir_p(char *path, mode_t mode)
+{
+	struct stat st;
+	int err;
+	char *d = path;
+
+	if (*d != '/')
+		return -1;
+
+	if (stat(path, &st) == 0)
+		return 0;
+
+	while (*++d == '/');
+
+	while ((d = strchr(d, '/'))) {
+		*d = '\0';
+		err = stat(path, &st) && mkdir(path, mode);
+		*d++ = '/';
+		if (err)
+			return -1;
+		while (*d == '/')
+			++d;
+	}
+	return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
+}
+
+static bool match_pat(char *file, const char **pat)
+{
+	int i = 0;
+
+	if (!pat)
+		return true;
+
+	while (pat[i]) {
+		if (strglobmatch(file, pat[i]))
+			return true;
+
+		i++;
+	}
+
+	return false;
+}
+
+/*
+ * The depth specify how deep the removal will go.
+ * 0       - will remove only files under the 'path' directory
+ * 1 .. x  - will dive in x-level deep under the 'path' directory
+ *
+ * If specified the pat is array of string patterns ended with NULL,
+ * which are checked upon every file/directory found. Only matching
+ * ones are removed.
+ *
+ * The function returns:
+ *    0 on success
+ *   -1 on removal failure with errno set
+ *   -2 on pattern failure
+ */
+static int rm_rf_depth_pat(const char *path, int depth, const char **pat)
+{
+	DIR *dir;
+	int ret;
+	struct dirent *d;
+	char namebuf[PATH_MAX];
+	struct stat statbuf;
+
+	/* Do not fail if there's no file. */
+	ret = lstat(path, &statbuf);
+	if (ret)
+		return 0;
+
+	/* Try to remove any file we get. */
+	if (!(statbuf.st_mode & S_IFDIR))
+		return unlink(path);
+
+	/* We have directory in path. */
+	dir = opendir(path);
+	if (dir == NULL)
+		return -1;
+
+	while ((d = readdir(dir)) != NULL && !ret) {
+
+		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+			continue;
+
+		if (!match_pat(d->d_name, pat)) {
+			ret =  -2;
+			break;
+		}
+
+		scnprintf(namebuf, sizeof(namebuf), "%s/%s",
+			  path, d->d_name);
+
+		/* We have to check symbolic link itself */
+		ret = lstat(namebuf, &statbuf);
+		if (ret < 0) {
+			pr_debug("stat failed: %s\n", namebuf);
+			break;
+		}
+
+		if (S_ISDIR(statbuf.st_mode))
+			ret = depth ? rm_rf_depth_pat(namebuf, depth - 1, pat) : 0;
+		else
+			ret = unlink(namebuf);
+	}
+	closedir(dir);
+
+	if (ret < 0)
+		return ret;
+
+	return rmdir(path);
+}
+
+static int rm_rf_a_kcore_dir(const char *path, const char *name)
+{
+	char kcore_dir_path[PATH_MAX];
+	const char *pat[] = {
+		"kcore",
+		"kallsyms",
+		"modules",
+		NULL,
+	};
+
+	snprintf(kcore_dir_path, sizeof(kcore_dir_path), "%s/%s", path, name);
+
+	return rm_rf_depth_pat(kcore_dir_path, 0, pat);
+}
+
+static bool kcore_dir_filter(const char *name __maybe_unused, struct dirent *d)
+{
+	const char *pat[] = {
+		"kcore_dir",
+		"kcore_dir__[1-9]*",
+		NULL,
+	};
+
+	return match_pat(d->d_name, pat);
+}
+
+static int rm_rf_kcore_dir(const char *path)
+{
+	struct strlist *kcore_dirs;
+	struct str_node *nd;
+	int ret;
+
+	kcore_dirs = lsdir(path, kcore_dir_filter);
+
+	if (!kcore_dirs)
+		return 0;
+
+	strlist__for_each_entry(nd, kcore_dirs) {
+		ret = rm_rf_a_kcore_dir(path, nd->s);
+		if (ret)
+			return ret;
+	}
+
+	strlist__delete(kcore_dirs);
+
+	return 0;
+}
+
+int rm_rf_perf_data(const char *path)
+{
+	const char *pat[] = {
+		"data",
+		"data.*",
+		NULL,
+	};
+
+	rm_rf_kcore_dir(path);
+
+	return rm_rf_depth_pat(path, 0, pat);
+}
+
+int rm_rf(const char *path)
+{
+	return rm_rf_depth_pat(path, INT_MAX, NULL);
+}
+
+/* A filter which removes dot files */
+bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
+{
+	return d->d_name[0] != '.';
+}
+
+/* lsdir reads a directory and store it in strlist */
+struct strlist *lsdir(const char *name,
+		      bool (*filter)(const char *, struct dirent *))
+{
+	struct strlist *list = NULL;
+	DIR *dir;
+	struct dirent *d;
+
+	dir = opendir(name);
+	if (!dir)
+		return NULL;
+
+	list = strlist__new(NULL, NULL);
+	if (!list) {
+		errno = ENOMEM;
+		goto out;
+	}
+
+	while ((d = readdir(dir)) != NULL) {
+		if (!filter || filter(name, d))
+			strlist__add(list, d->d_name);
+	}
+
+out:
+	closedir(dir);
+	return list;
+}
+
+size_t hex_width(u64 v)
+{
+	size_t n = 1;
+
+	while ((v >>= 4))
+		++n;
+
+	return n;
+}
+
+int perf_event_paranoid(void)
+{
+	int value;
+
+	if (sysctl__read_int("kernel/perf_event_paranoid", &value))
+		return INT_MAX;
+
+	return value;
+}
+
+bool perf_event_paranoid_check(int max_level)
+{
+	return perf_cap__capable(CAP_SYS_ADMIN) ||
+			perf_cap__capable(CAP_PERFMON) ||
+			perf_event_paranoid() <= max_level;
+}
+
+static int
+fetch_ubuntu_kernel_version(unsigned int *puint)
+{
+	ssize_t len;
+	size_t line_len = 0;
+	char *ptr, *line = NULL;
+	int version, patchlevel, sublevel, err;
+	FILE *vsig;
+
+	if (!puint)
+		return 0;
+
+	vsig = fopen("/proc/version_signature", "r");
+	if (!vsig) {
+		pr_debug("Open /proc/version_signature failed: %s\n",
+			 strerror(errno));
+		return -1;
+	}
+
+	len = getline(&line, &line_len, vsig);
+	fclose(vsig);
+	err = -1;
+	if (len <= 0) {
+		pr_debug("Reading from /proc/version_signature failed: %s\n",
+			 strerror(errno));
+		goto errout;
+	}
+
+	ptr = strrchr(line, ' ');
+	if (!ptr) {
+		pr_debug("Parsing /proc/version_signature failed: %s\n", line);
+		goto errout;
+	}
+
+	err = sscanf(ptr + 1, "%d.%d.%d",
+		     &version, &patchlevel, &sublevel);
+	if (err != 3) {
+		pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n",
+			 line);
+		goto errout;
+	}
+
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	err = 0;
+errout:
+	free(line);
+	return err;
+}
+
+int
+fetch_kernel_version(unsigned int *puint, char *str,
+		     size_t str_size)
+{
+	struct utsname utsname;
+	int version, patchlevel, sublevel, err;
+	bool int_ver_ready = false;
+
+	if (access("/proc/version_signature", R_OK) == 0)
+		if (!fetch_ubuntu_kernel_version(puint))
+			int_ver_ready = true;
+
+	if (uname(&utsname))
+		return -1;
+
+	if (str && str_size) {
+		strncpy(str, utsname.release, str_size);
+		str[str_size - 1] = '\0';
+	}
+
+	if (!puint || int_ver_ready)
+		return 0;
+
+	err = sscanf(utsname.release, "%d.%d.%d",
+		     &version, &patchlevel, &sublevel);
+
+	if (err != 3) {
+		pr_debug("Unable to get kernel version from uname '%s'\n",
+			 utsname.release);
+		return -1;
+	}
+
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	return 0;
+}
+
+int perf_tip(char **strp, const char *dirpath)
+{
+	struct strlist *tips;
+	struct str_node *node;
+	struct strlist_config conf = {
+		.dirname = dirpath,
+		.file_only = true,
+	};
+	int ret = 0;
+
+	*strp = NULL;
+	tips = strlist__new("tips.txt", &conf);
+	if (tips == NULL)
+		return -errno;
+
+	if (strlist__nr_entries(tips) == 0)
+		goto out;
+
+	node = strlist__entry(tips, random() % strlist__nr_entries(tips));
+	if (asprintf(strp, "Tip: %s", node->s) < 0)
+		ret = -ENOMEM;
+
+out:
+	strlist__delete(tips);
+
+	return ret;
+}
+
+char *perf_exe(char *buf, int len)
+{
+	int n = readlink("/proc/self/exe", buf, len);
+	if (n > 0) {
+		buf[n] = 0;
+		return buf;
+	}
+	return strcpy(buf, "perf");
+}
+
+void perf_debuginfod_setup(struct perf_debuginfod *di)
+{
+	/*
+	 * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod
+	 * processing is not triggered, otherwise we set it to 'di->urls'
+	 * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value.
+	 */
+	if (!di->set)
+		setenv("DEBUGINFOD_URLS", "", 1);
+	else if (di->urls && strcmp(di->urls, "system"))
+		setenv("DEBUGINFOD_URLS", di->urls, 1);
+
+	pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS"));
+
+#ifndef HAVE_DEBUGINFOD_SUPPORT
+	if (di->set)
+		pr_warning("WARNING: debuginfod support requested, but perf is not built with it\n");
+#endif
+}
+
+/*
+ * Return a new filename prepended with task's root directory if it's in
+ * a chroot.  Callers should free the returned string.
+ */
+char *filename_with_chroot(int pid, const char *filename)
+{
+	char buf[PATH_MAX];
+	char proc_root[32];
+	char *new_name = NULL;
+	int ret;
+
+	scnprintf(proc_root, sizeof(proc_root), "/proc/%d/root", pid);
+	ret = readlink(proc_root, buf, sizeof(buf) - 1);
+	if (ret <= 0)
+		return NULL;
+
+	/* readlink(2) does not append a null byte to buf */
+	buf[ret] = '\0';
+
+	if (!strcmp(buf, "/"))
+		return NULL;
+
+	if (strstr(buf, "(deleted)"))
+		return NULL;
+
+	if (asprintf(&new_name, "%s/%s", buf, filename) < 0)
+		return NULL;
+
+	return new_name;
+}
+
+/*
+ * Reallocate an array *arr of size *arr_sz so that it is big enough to contain
+ * x elements of size msz, initializing new entries to *init_val or zero if
+ * init_val is NULL
+ */
+int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x, size_t msz, const void *init_val)
+{
+	size_t new_sz = *arr_sz;
+	void *new_arr;
+	size_t i;
+
+	if (!new_sz)
+		new_sz = msz >= 64 ? 1 : roundup(64, msz); /* Start with at least 64 bytes */
+	while (x >= new_sz) {
+		if (check_mul_overflow(new_sz, (size_t)2, &new_sz))
+			return -ENOMEM;
+	}
+	if (new_sz == *arr_sz)
+		return 0;
+	new_arr = calloc(new_sz, msz);
+	if (!new_arr)
+		return -ENOMEM;
+	memcpy(new_arr, *arr, *arr_sz * msz);
+	if (init_val) {
+		for (i = *arr_sz; i < new_sz; i++)
+			memcpy(new_arr + (i * msz), init_val, msz);
+	}
+	*arr = new_arr;
+	*arr_sz = new_sz;
+	return 0;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
new file mode 100644
index 000000000..c1f2d423a
--- /dev/null
+++ b/tools/perf/util/util.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef GIT_COMPAT_UTIL_H
+#define GIT_COMPAT_UTIL_H
+
+#define _BSD_SOURCE 1
+/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
+#define _DEFAULT_SOURCE 1
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+#ifndef __cplusplus
+#include <internal/cpumap.h>
+#endif
+
+/* General helper functions */
+void usage(const char *err) __noreturn;
+void die(const char *err, ...) __noreturn __printf(1, 2);
+
+struct dirent;
+struct strlist;
+
+int mkdir_p(char *path, mode_t mode);
+int rm_rf(const char *path);
+int rm_rf_perf_data(const char *path);
+struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
+bool lsdir_no_dot_filter(const char *name, struct dirent *d);
+
+size_t hex_width(u64 v);
+
+int sysctl__max_stack(void);
+
+bool sysctl__nmi_watchdog_enabled(void);
+
+int fetch_kernel_version(unsigned int *puint,
+			 char *str, size_t str_sz);
+#define KVER_VERSION(x)		(((x) >> 16) & 0xff)
+#define KVER_PATCHLEVEL(x)	(((x) >> 8) & 0xff)
+#define KVER_SUBLEVEL(x)	((x) & 0xff)
+#define KVER_FMT	"%d.%d.%d"
+#define KVER_PARAM(x)	KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
+
+int perf_tip(char **strp, const char *dirpath);
+
+#ifndef HAVE_SCHED_GETCPU_SUPPORT
+int sched_getcpu(void);
+#endif
+
+extern bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void);
+void perf_set_multithreaded(void);
+
+char *perf_exe(char *buf, int len);
+
+#ifndef O_CLOEXEC
+#ifdef __sparc__
+#define O_CLOEXEC      0x400000
+#elif defined(__alpha__) || defined(__hppa__)
+#define O_CLOEXEC      010000000
+#else
+#define O_CLOEXEC      02000000
+#endif
+#endif
+
+extern bool test_attr__enabled;
+void test_attr__ready(void);
+void test_attr__init(void);
+struct perf_event_attr;
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
+		     int fd, int group_fd, unsigned long flags);
+
+struct perf_debuginfod {
+	const char	*urls;
+	bool		 set;
+};
+void perf_debuginfod_setup(struct perf_debuginfod *di);
+
+char *filename_with_chroot(int pid, const char *filename);
+
+int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x,
+			       size_t msz, const void *init_val);
+
+#define realloc_array_as_needed(a, n, x, v) ({			\
+	typeof(x) __x = (x);					\
+	__x >= (n) ?						\
+		do_realloc_array_as_needed((void **)&(a),	\
+					   &(n),		\
+					   __x,			\
+					   sizeof(*(a)),	\
+					   (const void *)(v)) :	\
+		0;						\
+	})
+
+#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
new file mode 100644
index 000000000..b9823f414
--- /dev/null
+++ b/tools/perf/util/values.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/zalloc.h>
+
+#include "values.h"
+#include "debug.h"
+
+int perf_read_values_init(struct perf_read_values *values)
+{
+	values->threads_max = 16;
+	values->pid = malloc(values->threads_max * sizeof(*values->pid));
+	values->tid = malloc(values->threads_max * sizeof(*values->tid));
+	values->value = zalloc(values->threads_max * sizeof(*values->value));
+	if (!values->pid || !values->tid || !values->value) {
+		pr_debug("failed to allocate read_values threads arrays");
+		goto out_free_pid;
+	}
+	values->threads = 0;
+
+	values->counters_max = 16;
+	values->counterrawid = malloc(values->counters_max
+				      * sizeof(*values->counterrawid));
+	values->countername = malloc(values->counters_max
+				     * sizeof(*values->countername));
+	if (!values->counterrawid || !values->countername) {
+		pr_debug("failed to allocate read_values counters arrays");
+		goto out_free_counter;
+	}
+	values->counters = 0;
+
+	return 0;
+
+out_free_counter:
+	zfree(&values->counterrawid);
+	zfree(&values->countername);
+out_free_pid:
+	zfree(&values->pid);
+	zfree(&values->tid);
+	zfree(&values->value);
+	return -ENOMEM;
+}
+
+void perf_read_values_destroy(struct perf_read_values *values)
+{
+	int i;
+
+	if (!values->threads_max || !values->counters_max)
+		return;
+
+	for (i = 0; i < values->threads; i++)
+		zfree(&values->value[i]);
+	zfree(&values->value);
+	zfree(&values->pid);
+	zfree(&values->tid);
+	zfree(&values->counterrawid);
+	for (i = 0; i < values->counters; i++)
+		zfree(&values->countername[i]);
+	zfree(&values->countername);
+}
+
+static int perf_read_values__enlarge_threads(struct perf_read_values *values)
+{
+	int nthreads_max = values->threads_max * 2;
+	void *npid = realloc(values->pid, nthreads_max * sizeof(*values->pid)),
+	     *ntid = realloc(values->tid, nthreads_max * sizeof(*values->tid)),
+	     *nvalue = realloc(values->value, nthreads_max * sizeof(*values->value));
+
+	if (!npid || !ntid || !nvalue)
+		goto out_err;
+
+	values->threads_max = nthreads_max;
+	values->pid = npid;
+	values->tid = ntid;
+	values->value = nvalue;
+	return 0;
+out_err:
+	free(npid);
+	free(ntid);
+	free(nvalue);
+	pr_debug("failed to enlarge read_values threads arrays");
+	return -ENOMEM;
+}
+
+static int perf_read_values__findnew_thread(struct perf_read_values *values,
+					    u32 pid, u32 tid)
+{
+	int i;
+
+	for (i = 0; i < values->threads; i++)
+		if (values->pid[i] == pid && values->tid[i] == tid)
+			return i;
+
+	if (values->threads == values->threads_max) {
+		i = perf_read_values__enlarge_threads(values);
+		if (i < 0)
+			return i;
+	}
+
+	i = values->threads;
+
+	values->value[i] = zalloc(values->counters_max * sizeof(**values->value));
+	if (!values->value[i]) {
+		pr_debug("failed to allocate read_values counters array");
+		return -ENOMEM;
+	}
+	values->pid[i] = pid;
+	values->tid[i] = tid;
+	values->threads = i + 1;
+
+	return i;
+}
+
+static int perf_read_values__enlarge_counters(struct perf_read_values *values)
+{
+	char **countername;
+	int i, counters_max = values->counters_max * 2;
+	u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid));
+
+	if (!counterrawid) {
+		pr_debug("failed to enlarge read_values rawid array");
+		goto out_enomem;
+	}
+
+	countername = realloc(values->countername, counters_max * sizeof(*values->countername));
+	if (!countername) {
+		pr_debug("failed to enlarge read_values rawid array");
+		goto out_free_rawid;
+	}
+
+	for (i = 0; i < values->threads; i++) {
+		u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value));
+		int j;
+
+		if (!value) {
+			pr_debug("failed to enlarge read_values ->values array");
+			goto out_free_name;
+		}
+
+		for (j = values->counters_max; j < counters_max; j++)
+			value[j] = 0;
+
+		values->value[i] = value;
+	}
+
+	values->counters_max = counters_max;
+	values->counterrawid = counterrawid;
+	values->countername  = countername;
+
+	return 0;
+out_free_name:
+	free(countername);
+out_free_rawid:
+	free(counterrawid);
+out_enomem:
+	return -ENOMEM;
+}
+
+static int perf_read_values__findnew_counter(struct perf_read_values *values,
+					     u64 rawid, const char *name)
+{
+	int i;
+
+	for (i = 0; i < values->counters; i++)
+		if (values->counterrawid[i] == rawid)
+			return i;
+
+	if (values->counters == values->counters_max) {
+		i = perf_read_values__enlarge_counters(values);
+		if (i)
+			return i;
+	}
+
+	i = values->counters++;
+	values->counterrawid[i] = rawid;
+	values->countername[i] = strdup(name);
+
+	return i;
+}
+
+int perf_read_values_add_value(struct perf_read_values *values,
+				u32 pid, u32 tid,
+				u64 rawid, const char *name, u64 value)
+{
+	int tindex, cindex;
+
+	tindex = perf_read_values__findnew_thread(values, pid, tid);
+	if (tindex < 0)
+		return tindex;
+	cindex = perf_read_values__findnew_counter(values, rawid, name);
+	if (cindex < 0)
+		return cindex;
+
+	values->value[tindex][cindex] += value;
+	return 0;
+}
+
+static void perf_read_values__display_pretty(FILE *fp,
+					     struct perf_read_values *values)
+{
+	int i, j;
+	int pidwidth, tidwidth;
+	int *counterwidth;
+
+	counterwidth = malloc(values->counters * sizeof(*counterwidth));
+	if (!counterwidth) {
+		fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n");
+		return;
+	}
+	tidwidth = 3;
+	pidwidth = 3;
+	for (j = 0; j < values->counters; j++)
+		counterwidth[j] = strlen(values->countername[j]);
+	for (i = 0; i < values->threads; i++) {
+		int width;
+
+		width = snprintf(NULL, 0, "%d", values->pid[i]);
+		if (width > pidwidth)
+			pidwidth = width;
+		width = snprintf(NULL, 0, "%d", values->tid[i]);
+		if (width > tidwidth)
+			tidwidth = width;
+		for (j = 0; j < values->counters; j++) {
+			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+			if (width > counterwidth[j])
+				counterwidth[j] = width;
+		}
+	}
+
+	fprintf(fp, "# %*s  %*s", pidwidth, "PID", tidwidth, "TID");
+	for (j = 0; j < values->counters; j++)
+		fprintf(fp, "  %*s", counterwidth[j], values->countername[j]);
+	fprintf(fp, "\n");
+
+	for (i = 0; i < values->threads; i++) {
+		fprintf(fp, "  %*d  %*d", pidwidth, values->pid[i],
+			tidwidth, values->tid[i]);
+		for (j = 0; j < values->counters; j++)
+			fprintf(fp, "  %*" PRIu64,
+				counterwidth[j], values->value[i][j]);
+		fprintf(fp, "\n");
+	}
+	free(counterwidth);
+}
+
+static void perf_read_values__display_raw(FILE *fp,
+					  struct perf_read_values *values)
+{
+	int width, pidwidth, tidwidth, namewidth, rawwidth, countwidth;
+	int i, j;
+
+	tidwidth = 3; /* TID */
+	pidwidth = 3; /* PID */
+	namewidth = 4; /* "Name" */
+	rawwidth = 3; /* "Raw" */
+	countwidth = 5; /* "Count" */
+
+	for (i = 0; i < values->threads; i++) {
+		width = snprintf(NULL, 0, "%d", values->pid[i]);
+		if (width > pidwidth)
+			pidwidth = width;
+		width = snprintf(NULL, 0, "%d", values->tid[i]);
+		if (width > tidwidth)
+			tidwidth = width;
+	}
+	for (j = 0; j < values->counters; j++) {
+		width = strlen(values->countername[j]);
+		if (width > namewidth)
+			namewidth = width;
+		width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]);
+		if (width > rawwidth)
+			rawwidth = width;
+	}
+	for (i = 0; i < values->threads; i++) {
+		for (j = 0; j < values->counters; j++) {
+			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+			if (width > countwidth)
+				countwidth = width;
+		}
+	}
+
+	fprintf(fp, "# %*s  %*s  %*s  %*s  %*s\n",
+		pidwidth, "PID", tidwidth, "TID",
+		namewidth, "Name", rawwidth, "Raw",
+		countwidth, "Count");
+	for (i = 0; i < values->threads; i++)
+		for (j = 0; j < values->counters; j++)
+			fprintf(fp, "  %*d  %*d  %*s  %*" PRIx64 "  %*" PRIu64,
+				pidwidth, values->pid[i],
+				tidwidth, values->tid[i],
+				namewidth, values->countername[j],
+				rawwidth, values->counterrawid[j],
+				countwidth, values->value[i][j]);
+}
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw)
+{
+	if (raw)
+		perf_read_values__display_raw(fp, values);
+	else
+		perf_read_values__display_pretty(fp, values);
+}
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
new file mode 100644
index 000000000..8c41f22f4
--- /dev/null
+++ b/tools/perf/util/values.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VALUES_H
+#define __PERF_VALUES_H
+
+#include <linux/types.h>
+
+struct perf_read_values {
+	int threads;
+	int threads_max;
+	u32 *pid, *tid;
+	int counters;
+	int counters_max;
+	u64 *counterrawid;
+	char **countername;
+	u64 **value;
+};
+
+int perf_read_values_init(struct perf_read_values *values);
+void perf_read_values_destroy(struct perf_read_values *values);
+
+int perf_read_values_add_value(struct perf_read_values *values,
+				u32 pid, u32 tid,
+				u64 rawid, const char *name, u64 value);
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values,
+			      int raw);
+
+#endif /* __PERF_VALUES_H */
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
new file mode 100644
index 000000000..43beb1696
--- /dev/null
+++ b/tools/perf/util/vdso.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+
+#include "vdso.h"
+#include "dso.h"
+#include <internal/lib.h>
+#include "map.h"
+#include "symbol.h"
+#include "machine.h"
+#include "thread.h"
+#include "linux/string.h"
+#include <linux/zalloc.h>
+#include "debug.h"
+
+/*
+ * Include definition of find_map() also used in perf-read-vdso.c for
+ * building perf-read-vdso32 and perf-read-vdsox32.
+ */
+#include "find-map.c"
+
+#define VDSO__TEMP_FILE_NAME "/tmp/perf-vdso.so-XXXXXX"
+
+struct vdso_file {
+	bool found;
+	bool error;
+	char temp_file_name[sizeof(VDSO__TEMP_FILE_NAME)];
+	const char *dso_name;
+	const char *read_prog;
+};
+
+struct vdso_info {
+	struct vdso_file vdso;
+#if BITS_PER_LONG == 64
+	struct vdso_file vdso32;
+	struct vdso_file vdsox32;
+#endif
+};
+
+static struct vdso_info *vdso_info__new(void)
+{
+	static const struct vdso_info vdso_info_init = {
+		.vdso    = {
+			.temp_file_name = VDSO__TEMP_FILE_NAME,
+			.dso_name = DSO__NAME_VDSO,
+		},
+#if BITS_PER_LONG == 64
+		.vdso32  = {
+			.temp_file_name = VDSO__TEMP_FILE_NAME,
+			.dso_name = DSO__NAME_VDSO32,
+			.read_prog = "perf-read-vdso32",
+		},
+		.vdsox32  = {
+			.temp_file_name = VDSO__TEMP_FILE_NAME,
+			.dso_name = DSO__NAME_VDSOX32,
+			.read_prog = "perf-read-vdsox32",
+		},
+#endif
+	};
+
+	return memdup(&vdso_info_init, sizeof(vdso_info_init));
+}
+
+static char *get_file(struct vdso_file *vdso_file)
+{
+	char *vdso = NULL;
+	char *buf = NULL;
+	void *start, *end;
+	size_t size;
+	int fd;
+
+	if (vdso_file->found)
+		return vdso_file->temp_file_name;
+
+	if (vdso_file->error || find_map(&start, &end, VDSO__MAP_NAME))
+		return NULL;
+
+	size = end - start;
+
+	buf = memdup(start, size);
+	if (!buf)
+		return NULL;
+
+	fd = mkstemp(vdso_file->temp_file_name);
+	if (fd < 0)
+		goto out;
+
+	if (size == (size_t) write(fd, buf, size))
+		vdso = vdso_file->temp_file_name;
+
+	close(fd);
+
+ out:
+	free(buf);
+
+	vdso_file->found = (vdso != NULL);
+	vdso_file->error = !vdso_file->found;
+	return vdso;
+}
+
+void machine__exit_vdso(struct machine *machine)
+{
+	struct vdso_info *vdso_info = machine->vdso_info;
+
+	if (!vdso_info)
+		return;
+
+	if (vdso_info->vdso.found)
+		unlink(vdso_info->vdso.temp_file_name);
+#if BITS_PER_LONG == 64
+	if (vdso_info->vdso32.found)
+		unlink(vdso_info->vdso32.temp_file_name);
+	if (vdso_info->vdsox32.found)
+		unlink(vdso_info->vdsox32.temp_file_name);
+#endif
+
+	zfree(&machine->vdso_info);
+}
+
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+					  const char *long_name)
+{
+	struct dso *dso;
+
+	dso = dso__new(short_name);
+	if (dso != NULL) {
+		__dsos__add(&machine->dsos, dso);
+		dso__set_long_name(dso, long_name, false);
+		/* Put dso here because __dsos_add already got it */
+		dso__put(dso);
+	}
+
+	return dso;
+}
+
+static enum dso_type machine__thread_dso_type(struct machine *machine,
+					      struct thread *thread)
+{
+	enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+	struct map *map;
+
+	maps__for_each_entry(thread->maps, map) {
+		struct dso *dso = map->dso;
+		if (!dso || dso->long_name[0] != '/')
+			continue;
+		dso_type = dso__type(dso, machine);
+		if (dso_type != DSO__TYPE_UNKNOWN)
+			break;
+	}
+
+	return dso_type;
+}
+
+#if BITS_PER_LONG == 64
+
+static int vdso__do_copy_compat(FILE *f, int fd)
+{
+	char buf[4096];
+	size_t count;
+
+	while (1) {
+		count = fread(buf, 1, sizeof(buf), f);
+		if (ferror(f))
+			return -errno;
+		if (feof(f))
+			break;
+		if (count && writen(fd, buf, count) != (ssize_t)count)
+			return -errno;
+	}
+
+	return 0;
+}
+
+static int vdso__copy_compat(const char *prog, int fd)
+{
+	FILE *f;
+	int err;
+
+	f = popen(prog, "r");
+	if (!f)
+		return -errno;
+
+	err = vdso__do_copy_compat(f, fd);
+
+	if (pclose(f) == -1)
+		return -errno;
+
+	return err;
+}
+
+static int vdso__create_compat_file(const char *prog, char *temp_name)
+{
+	int fd, err;
+
+	fd = mkstemp(temp_name);
+	if (fd < 0)
+		return -errno;
+
+	err = vdso__copy_compat(prog, fd);
+
+	if (close(fd) == -1)
+		return -errno;
+
+	return err;
+}
+
+static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
+{
+	int err;
+
+	if (vdso_file->found)
+		return vdso_file->temp_file_name;
+
+	if (vdso_file->error)
+		return NULL;
+
+	err = vdso__create_compat_file(vdso_file->read_prog,
+				       vdso_file->temp_file_name);
+	if (err) {
+		pr_err("%s failed, error %d\n", vdso_file->read_prog, err);
+		vdso_file->error = true;
+		return NULL;
+	}
+
+	vdso_file->found = true;
+
+	return vdso_file->temp_file_name;
+}
+
+static struct dso *__machine__findnew_compat(struct machine *machine,
+					     struct vdso_file *vdso_file)
+{
+	const char *file_name;
+	struct dso *dso;
+
+	dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
+	if (dso)
+		goto out;
+
+	file_name = vdso__get_compat_file(vdso_file);
+	if (!file_name)
+		goto out;
+
+	dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out:
+	return dso;
+}
+
+static int __machine__findnew_vdso_compat(struct machine *machine,
+					  struct thread *thread,
+					  struct vdso_info *vdso_info,
+					  struct dso **dso)
+{
+	enum dso_type dso_type;
+
+	dso_type = machine__thread_dso_type(machine, thread);
+
+#ifndef HAVE_PERF_READ_VDSO32
+	if (dso_type == DSO__TYPE_32BIT)
+		return 0;
+#endif
+#ifndef HAVE_PERF_READ_VDSOX32
+	if (dso_type == DSO__TYPE_X32BIT)
+		return 0;
+#endif
+
+	switch (dso_type) {
+	case DSO__TYPE_32BIT:
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
+		return 1;
+	case DSO__TYPE_X32BIT:
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
+		return 1;
+	case DSO__TYPE_UNKNOWN:
+	case DSO__TYPE_64BIT:
+	default:
+		return 0;
+	}
+}
+
+#endif
+
+static struct dso *machine__find_vdso(struct machine *machine,
+				      struct thread *thread)
+{
+	struct dso *dso = NULL;
+	enum dso_type dso_type;
+
+	dso_type = machine__thread_dso_type(machine, thread);
+	switch (dso_type) {
+	case DSO__TYPE_32BIT:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
+		if (!dso) {
+			dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO,
+					   true);
+			if (dso && dso_type != dso__type(dso, machine))
+				dso = NULL;
+		}
+		break;
+	case DSO__TYPE_X32BIT:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
+		break;
+	case DSO__TYPE_64BIT:
+	case DSO__TYPE_UNKNOWN:
+	default:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+		break;
+	}
+
+	return dso;
+}
+
+struct dso *machine__findnew_vdso(struct machine *machine,
+				  struct thread *thread)
+{
+	struct vdso_info *vdso_info;
+	struct dso *dso = NULL;
+
+	down_write(&machine->dsos.lock);
+	if (!machine->vdso_info)
+		machine->vdso_info = vdso_info__new();
+
+	vdso_info = machine->vdso_info;
+	if (!vdso_info)
+		goto out_unlock;
+
+	dso = machine__find_vdso(machine, thread);
+	if (dso)
+		goto out_unlock;
+
+#if BITS_PER_LONG == 64
+	if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+		goto out_unlock;
+#endif
+
+	dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+	if (!dso) {
+		char *file;
+
+		file = get_file(&vdso_info->vdso);
+		if (file)
+			dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
+	}
+
+out_unlock:
+	dso__get(dso);
+	up_write(&machine->dsos.lock);
+	return dso;
+}
+
+bool dso__is_vdso(struct dso *dso)
+{
+	return !strcmp(dso->short_name, DSO__NAME_VDSO) ||
+	       !strcmp(dso->short_name, DSO__NAME_VDSO32) ||
+	       !strcmp(dso->short_name, DSO__NAME_VDSOX32);
+}
diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h
new file mode 100644
index 000000000..bc74ace60
--- /dev/null
+++ b/tools/perf/util/vdso.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VDSO__
+#define __PERF_VDSO__
+
+#include <linux/types.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define VDSO__MAP_NAME "[vdso]"
+
+#define DSO__NAME_VDSO    "[vdso]"
+#define DSO__NAME_VDSO32  "[vdso32]"
+#define DSO__NAME_VDSOX32 "[vdsox32]"
+
+static inline bool is_vdso_map(const char *filename)
+{
+	return !strcmp(filename, VDSO__MAP_NAME);
+}
+
+struct dso;
+
+bool dso__is_vdso(struct dso *dso);
+
+struct machine;
+struct thread;
+
+struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
+void machine__exit_vdso(struct machine *machine);
+
+#endif /* __PERF_VDSO__ */
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
new file mode 100644
index 000000000..78d2297c1
--- /dev/null
+++ b/tools/perf/util/zlib.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <zlib.h>
+#include <linux/compiler.h>
+#include <internal/lib.h>
+
+#include "util/compress.h"
+
+#define CHUNK_SIZE  16384
+
+int gzip_decompress_to_file(const char *input, int output_fd)
+{
+	int ret = Z_STREAM_ERROR;
+	int input_fd;
+	void *ptr;
+	int len;
+	struct stat stbuf;
+	unsigned char buf[CHUNK_SIZE];
+	z_stream zs = {
+		.zalloc		= Z_NULL,
+		.zfree		= Z_NULL,
+		.opaque		= Z_NULL,
+		.avail_in	= 0,
+		.next_in	= Z_NULL,
+	};
+
+	input_fd = open(input, O_RDONLY);
+	if (input_fd < 0)
+		return -1;
+
+	if (fstat(input_fd, &stbuf) < 0)
+		goto out_close;
+
+	ptr = mmap(NULL, stbuf.st_size, PROT_READ, MAP_PRIVATE, input_fd, 0);
+	if (ptr == MAP_FAILED)
+		goto out_close;
+
+	if (inflateInit2(&zs, 16 + MAX_WBITS) != Z_OK)
+		goto out_unmap;
+
+	zs.next_in = ptr;
+	zs.avail_in = stbuf.st_size;
+
+	do {
+		zs.next_out = buf;
+		zs.avail_out = CHUNK_SIZE;
+
+		ret = inflate(&zs, Z_NO_FLUSH);
+		switch (ret) {
+		case Z_NEED_DICT:
+			ret = Z_DATA_ERROR;
+			/* fall through */
+		case Z_DATA_ERROR:
+		case Z_MEM_ERROR:
+			goto out;
+		default:
+			break;
+		}
+
+		len = CHUNK_SIZE - zs.avail_out;
+		if (writen(output_fd, buf, len) != len) {
+			ret = Z_DATA_ERROR;
+			goto out;
+		}
+
+	} while (ret != Z_STREAM_END);
+
+out:
+	inflateEnd(&zs);
+out_unmap:
+	munmap(ptr, stbuf.st_size);
+out_close:
+	close(input_fd);
+
+	return ret == Z_STREAM_END ? 0 : -1;
+}
+
+bool gzip_is_compressed(const char *input)
+{
+	int fd = open(input, O_RDONLY);
+	const uint8_t magic[2] = { 0x1f, 0x8b };
+	char buf[2] = { 0 };
+	ssize_t rc;
+
+	if (fd < 0)
+		return -1;
+
+	rc = read(fd, buf, sizeof(buf));
+	close(fd);
+	return rc == sizeof(buf) ?
+	       memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
new file mode 100644
index 000000000..48dd2b018
--- /dev/null
+++ b/tools/perf/util/zstd.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include "util/compress.h"
+#include "util/debug.h"
+
+int zstd_init(struct zstd_data *data, int level)
+{
+	size_t ret;
+
+	data->dstream = ZSTD_createDStream();
+	if (data->dstream == NULL) {
+		pr_err("Couldn't create decompression stream.\n");
+		return -1;
+	}
+
+	ret = ZSTD_initDStream(data->dstream);
+	if (ZSTD_isError(ret)) {
+		pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret));
+		return -1;
+	}
+
+	if (!level)
+		return 0;
+
+	data->cstream = ZSTD_createCStream();
+	if (data->cstream == NULL) {
+		pr_err("Couldn't create compression stream.\n");
+		return -1;
+	}
+
+	ret = ZSTD_initCStream(data->cstream, level);
+	if (ZSTD_isError(ret)) {
+		pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
+		return -1;
+	}
+
+	return 0;
+}
+
+int zstd_fini(struct zstd_data *data)
+{
+	if (data->dstream) {
+		ZSTD_freeDStream(data->dstream);
+		data->dstream = NULL;
+	}
+
+	if (data->cstream) {
+		ZSTD_freeCStream(data->cstream);
+		data->cstream = NULL;
+	}
+
+	return 0;
+}
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
+				       void *src, size_t src_size, size_t max_record_size,
+				       size_t process_header(void *record, size_t increment))
+{
+	size_t ret, size, compressed = 0;
+	ZSTD_inBuffer input = { src, src_size, 0 };
+	ZSTD_outBuffer output;
+	void *record;
+
+	while (input.pos < input.size) {
+		record = dst;
+		size = process_header(record, 0);
+		compressed += size;
+		dst += size;
+		dst_size -= size;
+		output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
+						max_record_size : dst_size, 0 };
+		ret = ZSTD_compressStream(data->cstream, &output, &input);
+		ZSTD_flushStream(data->cstream, &output);
+		if (ZSTD_isError(ret)) {
+			pr_err("failed to compress %ld bytes: %s\n",
+				(long)src_size, ZSTD_getErrorName(ret));
+			memcpy(dst, src, src_size);
+			return src_size;
+		}
+		size = output.pos;
+		size = process_header(record, size);
+		compressed += size;
+		dst += size;
+		dst_size -= size;
+	}
+
+	return compressed;
+}
+
+size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
+			      void *dst, size_t dst_size)
+{
+	size_t ret;
+	ZSTD_inBuffer input = { src, src_size, 0 };
+	ZSTD_outBuffer output = { dst, dst_size, 0 };
+
+	while (input.pos < input.size) {
+		ret = ZSTD_decompressStream(data->dstream, &output, &input);
+		if (ZSTD_isError(ret)) {
+			pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n",
+			       src_size, output.size, dst_size, ZSTD_getErrorName(ret));
+			break;
+		}
+		output.dst  = dst + output.pos;
+		output.size = dst_size - output.pos;
+	}
+
+	return output.pos;
+}