Adding upstream version 5.10.209.upstream/5.10.209 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
commit: 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree: a94efe259b9009378be6d90eb30d2b019d95c194 /tools/lib
parent: Initial commit. (diff)
download: linux-upstream/5.10.209.tar.xz
linux-upstream/5.10.209.zip
230 files changed, 56199 insertions, 0 deletions
diff --git a/tools/lib/api/Build b/tools/lib/api/Build
new file mode 100644
index 000000000..6e2373db5
--- /dev/null
+++ b/tools/lib/api/Build
@@ -0,0 +1,9 @@
+libapi-y += fd/
+libapi-y += fs/
+libapi-y += cpu.o
+libapi-y += debug.o
+libapi-y += str_error_r.o
+
+$(OUTPUT)str_error_r.o: ../str_error_r.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
new file mode 100644
index 000000000..a13e9c7f1
--- /dev/null
+++ b/tools/lib/api/Makefile
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak		# QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC ?= $(CROSS_COMPILE)gcc
+AR ?= $(CROSS_COMPILE)ar
+LD ?= $(CROSS_COMPILE)ld
+
+MAKEFLAGS += --no-print-directory
+
+LIBFILE = $(OUTPUT)libapi.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC
+
+ifeq ($(DEBUG),0)
+ifeq ($(CC_NO_CLANG), 0)
+  CFLAGS += -O3
+else
+  CFLAGS += -O6
+endif
+endif
+
+ifeq ($(DEBUG),0)
+  CFLAGS += -D_FORTIFY_SOURCE
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+  CFLAGS += -Werror
+endif
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -I$(srctree)/tools/lib/api
+CFLAGS += -I$(srctree)/tools/include
+
+RM = rm -f
+
+API_IN := $(OUTPUT)libapi-in.o
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(API_IN): FORCE
+	@$(MAKE) $(build)=libapi
+
+$(LIBFILE): $(API_IN)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN)
+
+clean:
+	$(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \
+	find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/api/cpu.c b/tools/lib/api/cpu.c
new file mode 100644
index 000000000..4af6d4b7a
--- /dev/null
+++ b/tools/lib/api/cpu.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+
+#include "cpu.h"
+#include "fs/fs.h"
+
+int cpu__get_max_freq(unsigned long long *freq)
+{
+	char entry[PATH_MAX];
+	int cpu;
+
+	if (sysfs__read_int("devices/system/cpu/online", &cpu) < 0)
+		return -1;
+
+	snprintf(entry, sizeof(entry),
+		 "devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu);
+
+	return sysfs__read_ull(entry, freq);
+}
diff --git a/tools/lib/api/cpu.h b/tools/lib/api/cpu.h
new file mode 100644
index 000000000..90a102fb2
--- /dev/null
+++ b/tools/lib/api/cpu.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_CPU__
+#define __API_CPU__
+
+int cpu__get_max_freq(unsigned long long *freq);
+
+#endif /* __API_CPU__ */
diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h
new file mode 100644
index 000000000..5a5820c11
--- /dev/null
+++ b/tools/lib/api/debug-internal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_DEBUG_INTERNAL_H__
+#define __API_DEBUG_INTERNAL_H__
+
+#include "debug.h"
+
+#define __pr(func, fmt, ...)	\
+do {				\
+	if ((func))		\
+		(func)("libapi: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+extern libapi_print_fn_t __pr_warn;
+extern libapi_print_fn_t __pr_info;
+extern libapi_print_fn_t __pr_debug;
+
+#define pr_warn(fmt, ...)	__pr(__pr_warn, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...)	__pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...)	__pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+#endif /* __API_DEBUG_INTERNAL_H__ */
diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c
new file mode 100644
index 000000000..7708f0558
--- /dev/null
+++ b/tools/lib/api/debug.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdarg.h>
+#include "debug.h"
+#include "debug-internal.h"
+
+static int __base_pr(const char *format, ...)
+{
+	va_list args;
+	int err;
+
+	va_start(args, format);
+	err = vfprintf(stderr, format, args);
+	va_end(args);
+	return err;
+}
+
+libapi_print_fn_t __pr_warn    = __base_pr;
+libapi_print_fn_t __pr_info    = __base_pr;
+libapi_print_fn_t __pr_debug;
+
+void libapi_set_print(libapi_print_fn_t warn,
+		      libapi_print_fn_t info,
+		      libapi_print_fn_t debug)
+{
+	__pr_warn    = warn;
+	__pr_info    = info;
+	__pr_debug   = debug;
+}
diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h
new file mode 100644
index 000000000..3684dd6e0
--- /dev/null
+++ b/tools/lib/api/debug.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_DEBUG_H__
+#define __API_DEBUG_H__
+
+typedef int (*libapi_print_fn_t)(const char *, ...);
+
+void libapi_set_print(libapi_print_fn_t warn,
+		      libapi_print_fn_t info,
+		      libapi_print_fn_t debug);
+
+#endif /* __API_DEBUG_H__ */
diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build
new file mode 100644
index 000000000..605d99f6d
--- /dev/null
+++ b/tools/lib/api/fd/Build
@@ -0,0 +1 @@
+libapi-y += array.o
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
new file mode 100644
index 000000000..5e6cb9deb
--- /dev/null
+++ b/tools/lib/api/fd/array.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "array.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+void fdarray__init(struct fdarray *fda, int nr_autogrow)
+{
+	fda->entries	 = NULL;
+	fda->priv	 = NULL;
+	fda->nr		 = fda->nr_alloc = 0;
+	fda->nr_autogrow = nr_autogrow;
+}
+
+int fdarray__grow(struct fdarray *fda, int nr)
+{
+	struct priv *priv;
+	int nr_alloc = fda->nr_alloc + nr;
+	size_t psize = sizeof(fda->priv[0]) * nr_alloc;
+	size_t size  = sizeof(struct pollfd) * nr_alloc;
+	struct pollfd *entries = realloc(fda->entries, size);
+
+	if (entries == NULL)
+		return -ENOMEM;
+
+	priv = realloc(fda->priv, psize);
+	if (priv == NULL) {
+		free(entries);
+		return -ENOMEM;
+	}
+
+	memset(&entries[fda->nr_alloc], 0, sizeof(struct pollfd) * nr);
+	memset(&priv[fda->nr_alloc], 0, sizeof(fda->priv[0]) * nr);
+
+	fda->nr_alloc = nr_alloc;
+	fda->entries  = entries;
+	fda->priv     = priv;
+	return 0;
+}
+
+struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow)
+{
+	struct fdarray *fda = calloc(1, sizeof(*fda));
+
+	if (fda != NULL) {
+		if (fdarray__grow(fda, nr_alloc)) {
+			free(fda);
+			fda = NULL;
+		} else {
+			fda->nr_autogrow = nr_autogrow;
+		}
+	}
+
+	return fda;
+}
+
+void fdarray__exit(struct fdarray *fda)
+{
+	free(fda->entries);
+	free(fda->priv);
+	fdarray__init(fda, 0);
+}
+
+void fdarray__delete(struct fdarray *fda)
+{
+	fdarray__exit(fda);
+	free(fda);
+}
+
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags)
+{
+	int pos = fda->nr;
+
+	if (fda->nr == fda->nr_alloc &&
+	    fdarray__grow(fda, fda->nr_autogrow) < 0)
+		return -ENOMEM;
+
+	fda->entries[fda->nr].fd     = fd;
+	fda->entries[fda->nr].events = revents;
+	fda->priv[fda->nr].flags = flags;
+	fda->nr++;
+	return pos;
+}
+
+int fdarray__filter(struct fdarray *fda, short revents,
+		    void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+		    void *arg)
+{
+	int fd, nr = 0;
+
+	if (fda->nr == 0)
+		return 0;
+
+	for (fd = 0; fd < fda->nr; ++fd) {
+		if (!fda->entries[fd].events)
+			continue;
+
+		if (fda->entries[fd].revents & revents) {
+			if (entry_destructor)
+				entry_destructor(fda, fd, arg);
+
+			fda->entries[fd].revents = fda->entries[fd].events = 0;
+			continue;
+		}
+
+		if (!(fda->priv[fd].flags & fdarray_flag__nonfilterable))
+			++nr;
+	}
+
+	return nr;
+}
+
+int fdarray__poll(struct fdarray *fda, int timeout)
+{
+	return poll(fda->entries, fda->nr, timeout);
+}
+
+int fdarray__fprintf(struct fdarray *fda, FILE *fp)
+{
+	int fd, printed = fprintf(fp, "%d [ ", fda->nr);
+
+	for (fd = 0; fd < fda->nr; ++fd)
+		printed += fprintf(fp, "%s%d", fd ? ", " : "", fda->entries[fd].fd);
+
+	return printed + fprintf(fp, " ]");
+}
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
new file mode 100644
index 000000000..7fcf21a33
--- /dev/null
+++ b/tools/lib/api/fd/array.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_FD_ARRAY__
+#define __API_FD_ARRAY__
+
+#include <stdio.h>
+
+struct pollfd;
+
+/**
+ * struct fdarray: Array of file descriptors
+ *
+ * @priv: Per array entry priv area, users should access just its contents,
+ *	  not set it to anything, as it is kept in synch with @entries, being
+ *	  realloc'ed, * for instance, in fdarray__{grow,filter}.
+ *
+ *	  I.e. using 'fda->priv[N].idx = * value' where N < fda->nr is ok,
+ *	  but doing 'fda->priv = malloc(M)' is not allowed.
+ */
+struct fdarray {
+	int	       nr;
+	int	       nr_alloc;
+	int	       nr_autogrow;
+	struct pollfd *entries;
+	struct priv {
+		union {
+			int    idx;
+			void   *ptr;
+		};
+		unsigned int flags;
+	} *priv;
+};
+
+enum fdarray_flags {
+	fdarray_flag__default	    = 0x00000000,
+	fdarray_flag__nonfilterable = 0x00000001
+};
+
+void fdarray__init(struct fdarray *fda, int nr_autogrow);
+void fdarray__exit(struct fdarray *fda);
+
+struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
+void fdarray__delete(struct fdarray *fda);
+
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags);
+int fdarray__poll(struct fdarray *fda, int timeout);
+int fdarray__filter(struct fdarray *fda, short revents,
+		    void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+		    void *arg);
+int fdarray__grow(struct fdarray *fda, int extra);
+int fdarray__fprintf(struct fdarray *fda, FILE *fp);
+
+static inline int fdarray__available_entries(struct fdarray *fda)
+{
+	return fda->nr_alloc - fda->nr;
+}
+
+#endif /* __API_FD_ARRAY__ */
diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build
new file mode 100644
index 000000000..0f75b2865
--- /dev/null
+++ b/tools/lib/api/fs/Build
@@ -0,0 +1,3 @@
+libapi-y += fs.o
+libapi-y += tracing_path.o
+libapi-y += cgroup.o
diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c
new file mode 100644
index 000000000..889a6eb4a
--- /dev/null
+++ b/tools/lib/api/fs/cgroup.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fs.h"
+
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
+{
+	FILE *fp;
+	char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+	char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
+	char *token, *saved_ptr = NULL;
+
+	fp = fopen("/proc/mounts", "r");
+	if (!fp)
+		return -1;
+
+	/*
+	 * in order to handle split hierarchy, we need to scan /proc/mounts
+	 * and inspect every cgroupfs mount point to find one that has
+	 * perf_event subsystem
+	 */
+	path_v1[0] = '\0';
+	path_v2[0] = '\0';
+
+	while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
+				__stringify(PATH_MAX)"s %*d %*d\n",
+				mountpoint, type, tokens) == 3) {
+
+		if (!path_v1[0] && !strcmp(type, "cgroup")) {
+
+			token = strtok_r(tokens, ",", &saved_ptr);
+
+			while (token != NULL) {
+				if (subsys && !strcmp(token, subsys)) {
+					strcpy(path_v1, mountpoint);
+					break;
+				}
+				token = strtok_r(NULL, ",", &saved_ptr);
+			}
+		}
+
+		if (!path_v2[0] && !strcmp(type, "cgroup2"))
+			strcpy(path_v2, mountpoint);
+
+		if (path_v1[0] && path_v2[0])
+			break;
+	}
+	fclose(fp);
+
+	if (path_v1[0])
+		path = path_v1;
+	else if (path_v2[0])
+		path = path_v2;
+	else
+		return -1;
+
+	if (strlen(path) < maxlen) {
+		strcpy(buf, path);
+		return 0;
+	}
+	return -1;
+}
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
new file mode 100644
index 000000000..82f53d81a
--- /dev/null
+++ b/tools/lib/api/fs/fs.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/vfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mount.h>
+
+#include "fs.h"
+#include "debug-internal.h"
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+#ifndef SYSFS_MAGIC
+#define SYSFS_MAGIC            0x62656572
+#endif
+
+#ifndef PROC_SUPER_MAGIC
+#define PROC_SUPER_MAGIC       0x9fa0
+#endif
+
+#ifndef DEBUGFS_MAGIC
+#define DEBUGFS_MAGIC          0x64626720
+#endif
+
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC          0x74726163
+#endif
+
+#ifndef HUGETLBFS_MAGIC
+#define HUGETLBFS_MAGIC        0x958458f6
+#endif
+
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC           0xcafe4a11
+#endif
+
+static const char * const sysfs__fs_known_mountpoints[] = {
+	"/sys",
+	0,
+};
+
+static const char * const procfs__known_mountpoints[] = {
+	"/proc",
+	0,
+};
+
+#ifndef DEBUGFS_DEFAULT_PATH
+#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug"
+#endif
+
+static const char * const debugfs__known_mountpoints[] = {
+	DEBUGFS_DEFAULT_PATH,
+	"/debug",
+	0,
+};
+
+
+#ifndef TRACEFS_DEFAULT_PATH
+#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing"
+#endif
+
+static const char * const tracefs__known_mountpoints[] = {
+	TRACEFS_DEFAULT_PATH,
+	"/sys/kernel/debug/tracing",
+	"/tracing",
+	"/trace",
+	0,
+};
+
+static const char * const hugetlbfs__known_mountpoints[] = {
+	0,
+};
+
+static const char * const bpf_fs__known_mountpoints[] = {
+	"/sys/fs/bpf",
+	0,
+};
+
+struct fs {
+	const char		*name;
+	const char * const	*mounts;
+	char			 path[PATH_MAX];
+	bool			 found;
+	bool			 checked;
+	long			 magic;
+};
+
+enum {
+	FS__SYSFS   = 0,
+	FS__PROCFS  = 1,
+	FS__DEBUGFS = 2,
+	FS__TRACEFS = 3,
+	FS__HUGETLBFS = 4,
+	FS__BPF_FS = 5,
+};
+
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC 0x74726163
+#endif
+
+static struct fs fs__entries[] = {
+	[FS__SYSFS] = {
+		.name	= "sysfs",
+		.mounts	= sysfs__fs_known_mountpoints,
+		.magic	= SYSFS_MAGIC,
+		.checked = false,
+	},
+	[FS__PROCFS] = {
+		.name	= "proc",
+		.mounts	= procfs__known_mountpoints,
+		.magic	= PROC_SUPER_MAGIC,
+		.checked = false,
+	},
+	[FS__DEBUGFS] = {
+		.name	= "debugfs",
+		.mounts	= debugfs__known_mountpoints,
+		.magic	= DEBUGFS_MAGIC,
+		.checked = false,
+	},
+	[FS__TRACEFS] = {
+		.name	= "tracefs",
+		.mounts	= tracefs__known_mountpoints,
+		.magic	= TRACEFS_MAGIC,
+		.checked = false,
+	},
+	[FS__HUGETLBFS] = {
+		.name	= "hugetlbfs",
+		.mounts = hugetlbfs__known_mountpoints,
+		.magic	= HUGETLBFS_MAGIC,
+		.checked = false,
+	},
+	[FS__BPF_FS] = {
+		.name	= "bpf",
+		.mounts = bpf_fs__known_mountpoints,
+		.magic	= BPF_FS_MAGIC,
+		.checked = false,
+	},
+};
+
+static bool fs__read_mounts(struct fs *fs)
+{
+	bool found = false;
+	char type[100];
+	FILE *fp;
+
+	fp = fopen("/proc/mounts", "r");
+	if (fp == NULL)
+		return NULL;
+
+	while (!found &&
+	       fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+		      fs->path, type) == 2) {
+
+		if (strcmp(type, fs->name) == 0)
+			found = true;
+	}
+
+	fclose(fp);
+	fs->checked = true;
+	return fs->found = found;
+}
+
+static int fs__valid_mount(const char *fs, long magic)
+{
+	struct statfs st_fs;
+
+	if (statfs(fs, &st_fs) < 0)
+		return -ENOENT;
+	else if ((long)st_fs.f_type != magic)
+		return -ENOENT;
+
+	return 0;
+}
+
+static bool fs__check_mounts(struct fs *fs)
+{
+	const char * const *ptr;
+
+	ptr = fs->mounts;
+	while (*ptr) {
+		if (fs__valid_mount(*ptr, fs->magic) == 0) {
+			fs->found = true;
+			strcpy(fs->path, *ptr);
+			return true;
+		}
+		ptr++;
+	}
+
+	return false;
+}
+
+static void mem_toupper(char *f, size_t len)
+{
+	while (len) {
+		*f = toupper(*f);
+		f++;
+		len--;
+	}
+}
+
+/*
+ * Check for "NAME_PATH" environment variable to override fs location (for
+ * testing). This matches the recommendation in Documentation/admin-guide/sysfs-rules.rst
+ * for SYSFS_PATH.
+ */
+static bool fs__env_override(struct fs *fs)
+{
+	char *override_path;
+	size_t name_len = strlen(fs->name);
+	/* name + "_PATH" + '\0' */
+	char upper_name[name_len + 5 + 1];
+
+	memcpy(upper_name, fs->name, name_len);
+	mem_toupper(upper_name, name_len);
+	strcpy(&upper_name[name_len], "_PATH");
+
+	override_path = getenv(upper_name);
+	if (!override_path)
+		return false;
+
+	fs->found = true;
+	fs->checked = true;
+	strncpy(fs->path, override_path, sizeof(fs->path) - 1);
+	fs->path[sizeof(fs->path) - 1] = '\0';
+	return true;
+}
+
+static const char *fs__get_mountpoint(struct fs *fs)
+{
+	if (fs__env_override(fs))
+		return fs->path;
+
+	if (fs__check_mounts(fs))
+		return fs->path;
+
+	if (fs__read_mounts(fs))
+		return fs->path;
+
+	return NULL;
+}
+
+static const char *fs__mountpoint(int idx)
+{
+	struct fs *fs = &fs__entries[idx];
+
+	if (fs->found)
+		return (const char *)fs->path;
+
+	/* the mount point was already checked for the mount point
+	 * but and did not exist, so return NULL to avoid scanning again.
+	 * This makes the found and not found paths cost equivalent
+	 * in case of multiple calls.
+	 */
+	if (fs->checked)
+		return NULL;
+
+	return fs__get_mountpoint(fs);
+}
+
+static const char *mount_overload(struct fs *fs)
+{
+	size_t name_len = strlen(fs->name);
+	/* "PERF_" + name + "_ENVIRONMENT" + '\0' */
+	char upper_name[5 + name_len + 12 + 1];
+
+	snprintf(upper_name, name_len, "PERF_%s_ENVIRONMENT", fs->name);
+	mem_toupper(upper_name, name_len);
+
+	return getenv(upper_name) ?: *fs->mounts;
+}
+
+static const char *fs__mount(int idx)
+{
+	struct fs *fs = &fs__entries[idx];
+	const char *mountpoint;
+
+	if (fs__mountpoint(idx))
+		return (const char *)fs->path;
+
+	mountpoint = mount_overload(fs);
+
+	if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0)
+		return NULL;
+
+	return fs__check_mounts(fs) ? fs->path : NULL;
+}
+
+#define FS(name, idx)				\
+const char *name##__mountpoint(void)		\
+{						\
+	return fs__mountpoint(idx);		\
+}						\
+						\
+const char *name##__mount(void)			\
+{						\
+	return fs__mount(idx);			\
+}						\
+						\
+bool name##__configured(void)			\
+{						\
+	return name##__mountpoint() != NULL;	\
+}
+
+FS(sysfs,   FS__SYSFS);
+FS(procfs,  FS__PROCFS);
+FS(debugfs, FS__DEBUGFS);
+FS(tracefs, FS__TRACEFS);
+FS(hugetlbfs, FS__HUGETLBFS);
+FS(bpf_fs, FS__BPF_FS);
+
+int filename__read_int(const char *filename, int *value)
+{
+	char line[64];
+	int fd = open(filename, O_RDONLY), err = -1;
+
+	if (fd < 0)
+		return -1;
+
+	if (read(fd, line, sizeof(line)) > 0) {
+		*value = atoi(line);
+		err = 0;
+	}
+
+	close(fd);
+	return err;
+}
+
+static int filename__read_ull_base(const char *filename,
+				   unsigned long long *value, int base)
+{
+	char line[64];
+	int fd = open(filename, O_RDONLY), err = -1;
+
+	if (fd < 0)
+		return -1;
+
+	if (read(fd, line, sizeof(line)) > 0) {
+		*value = strtoull(line, NULL, base);
+		if (*value != ULLONG_MAX)
+			err = 0;
+	}
+
+	close(fd);
+	return err;
+}
+
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 16 for base to treat the number as hex.
+ */
+int filename__read_xll(const char *filename, unsigned long long *value)
+{
+	return filename__read_ull_base(filename, value, 16);
+}
+
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 0 for base, the strtoull detects the
+ * base automatically (see man strtoull).
+ */
+int filename__read_ull(const char *filename, unsigned long long *value)
+{
+	return filename__read_ull_base(filename, value, 0);
+}
+
+#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
+
+int filename__read_str(const char *filename, char **buf, size_t *sizep)
+{
+	size_t size = 0, alloc_size = 0;
+	void *bf = NULL, *nbf;
+	int fd, n, err = 0;
+	char sbuf[STRERR_BUFSIZE];
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		return -errno;
+
+	do {
+		if (size == alloc_size) {
+			alloc_size += BUFSIZ;
+			nbf = realloc(bf, alloc_size);
+			if (!nbf) {
+				err = -ENOMEM;
+				break;
+			}
+
+			bf = nbf;
+		}
+
+		n = read(fd, bf + size, alloc_size - size);
+		if (n < 0) {
+			if (size) {
+				pr_warn("read failed %d: %s\n", errno,
+					strerror_r(errno, sbuf, sizeof(sbuf)));
+				err = 0;
+			} else
+				err = -errno;
+
+			break;
+		}
+
+		size += n;
+	} while (n > 0);
+
+	if (!err) {
+		*sizep = size;
+		*buf   = bf;
+	} else
+		free(bf);
+
+	close(fd);
+	return err;
+}
+
+int filename__write_int(const char *filename, int value)
+{
+	int fd = open(filename, O_WRONLY), err = -1;
+	char buf[64];
+
+	if (fd < 0)
+		return err;
+
+	sprintf(buf, "%d", value);
+	if (write(fd, buf, sizeof(buf)) == sizeof(buf))
+		err = 0;
+
+	close(fd);
+	return err;
+}
+
+int procfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+	char path[PATH_MAX];
+	const char *procfs = procfs__mountpoint();
+
+	if (!procfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/%s", procfs, entry);
+
+	return filename__read_str(path, buf, sizep);
+}
+
+static int sysfs__read_ull_base(const char *entry,
+				unsigned long long *value, int base)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+	return filename__read_ull_base(path, value, base);
+}
+
+int sysfs__read_xll(const char *entry, unsigned long long *value)
+{
+	return sysfs__read_ull_base(entry, value, 16);
+}
+
+int sysfs__read_ull(const char *entry, unsigned long long *value)
+{
+	return sysfs__read_ull_base(entry, value, 0);
+}
+
+int sysfs__read_int(const char *entry, int *value)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+	return filename__read_int(path, value);
+}
+
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+	return filename__read_str(path, buf, sizep);
+}
+
+int sysfs__read_bool(const char *entry, bool *value)
+{
+	char *buf;
+	size_t size;
+	int ret;
+
+	ret = sysfs__read_str(entry, &buf, &size);
+	if (ret < 0)
+		return ret;
+
+	switch (buf[0]) {
+	case '1':
+	case 'y':
+	case 'Y':
+		*value = true;
+		break;
+	case '0':
+	case 'n':
+	case 'N':
+		*value = false;
+		break;
+	default:
+		ret = -1;
+	}
+
+	free(buf);
+
+	return ret;
+}
+int sysctl__read_int(const char *sysctl, int *value)
+{
+	char path[PATH_MAX];
+	const char *procfs = procfs__mountpoint();
+
+	if (!procfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl);
+
+	return filename__read_int(path, value);
+}
+
+int sysfs__write_int(const char *entry, int value)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
+		return -1;
+
+	return filename__write_int(path, value);
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
new file mode 100644
index 000000000..aa222ca30
--- /dev/null
+++ b/tools/lib/api/fs/fs.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_FS__
+#define __API_FS__
+
+#include <stdbool.h>
+#include <unistd.h>
+
+/*
+ * On most systems <limits.h> would have given us this, but  not on some systems
+ * (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#define FS(name)				\
+	const char *name##__mountpoint(void);	\
+	const char *name##__mount(void);	\
+	bool name##__configured(void);		\
+
+/*
+ * The xxxx__mountpoint() entry points find the first match mount point for each
+ * filesystems listed below, where xxxx is the filesystem type.
+ *
+ * The interface is as follows:
+ *
+ * - If a mount point is found on first call, it is cached and used for all
+ *   subsequent calls.
+ *
+ * - If a mount point is not found, NULL is returned on first call and all
+ *   subsequent calls.
+ */
+FS(sysfs)
+FS(procfs)
+FS(debugfs)
+FS(tracefs)
+FS(hugetlbfs)
+FS(bpf_fs)
+
+#undef FS
+
+
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys);
+
+int filename__read_int(const char *filename, int *value);
+int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_xll(const char *filename, unsigned long long *value);
+int filename__read_str(const char *filename, char **buf, size_t *sizep);
+
+int filename__write_int(const char *filename, int value);
+
+int procfs__read_str(const char *entry, char **buf, size_t *sizep);
+
+int sysctl__read_int(const char *sysctl, int *value);
+int sysfs__read_int(const char *entry, int *value);
+int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_xll(const char *entry, unsigned long long *value);
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
+int sysfs__read_bool(const char *entry, bool *value);
+
+int sysfs__write_int(const char *entry, int value);
+#endif /* __API_FS__ */
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
new file mode 100644
index 000000000..5afb11b30
--- /dev/null
+++ b/tools/lib/api/fs/tracing_path.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <errno.h>
+#include <unistd.h>
+#include "fs.h"
+
+#include "tracing_path.h"
+
+static char tracing_mnt[PATH_MAX]  = "/sys/kernel/debug";
+static char tracing_path[PATH_MAX]        = "/sys/kernel/debug/tracing";
+static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
+
+static void __tracing_path_set(const char *tracing, const char *mountpoint)
+{
+	snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
+	snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
+		 mountpoint, tracing);
+	snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
+		 mountpoint, tracing, "events");
+}
+
+static const char *tracing_path_tracefs_mount(void)
+{
+	const char *mnt;
+
+	mnt = tracefs__mount();
+	if (!mnt)
+		return NULL;
+
+	__tracing_path_set("", mnt);
+
+	return tracing_path;
+}
+
+static const char *tracing_path_debugfs_mount(void)
+{
+	const char *mnt;
+
+	mnt = debugfs__mount();
+	if (!mnt)
+		return NULL;
+
+	__tracing_path_set("tracing/", mnt);
+
+	return tracing_path;
+}
+
+const char *tracing_path_mount(void)
+{
+	const char *mnt;
+
+	mnt = tracing_path_tracefs_mount();
+	if (mnt)
+		return mnt;
+
+	mnt = tracing_path_debugfs_mount();
+
+	return mnt;
+}
+
+void tracing_path_set(const char *mntpt)
+{
+	__tracing_path_set("tracing/", mntpt);
+}
+
+char *get_tracing_file(const char *name)
+{
+	char *file;
+
+	if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0)
+		return NULL;
+
+	return file;
+}
+
+void put_tracing_file(char *file)
+{
+	free(file);
+}
+
+char *get_events_file(const char *name)
+{
+	char *file;
+
+	if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0)
+		return NULL;
+
+	return file;
+}
+
+void put_events_file(char *file)
+{
+	free(file);
+}
+
+DIR *tracing_events__opendir(void)
+{
+	DIR *dir = NULL;
+	char *path = get_tracing_file("events");
+
+	if (path) {
+		dir = opendir(path);
+		put_events_file(path);
+	}
+
+	return dir;
+}
+
+int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
+				   const char *sys, const char *name)
+{
+	char sbuf[128];
+	char filename[PATH_MAX];
+
+	snprintf(filename, PATH_MAX, "%s/%s", sys, name ?: "*");
+
+	switch (err) {
+	case ENOENT:
+		/*
+		 * We will get here if we can't find the tracepoint, but one of
+		 * debugfs or tracefs is configured, which means you probably
+		 * want some tracepoint which wasn't compiled in your kernel.
+		 * - jirka
+		 */
+		if (debugfs__configured() || tracefs__configured()) {
+			/* sdt markers */
+			if (!strncmp(filename, "sdt_", 4)) {
+				snprintf(buf, size,
+					"Error:\tFile %s/%s not found.\n"
+					"Hint:\tSDT event cannot be directly recorded on.\n"
+					"\tPlease first use 'perf probe %s:%s' before recording it.\n",
+					tracing_events_path, filename, sys, name);
+			} else {
+				snprintf(buf, size,
+					 "Error:\tFile %s/%s not found.\n"
+					 "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n",
+					 tracing_events_path, filename);
+			}
+			break;
+		}
+		snprintf(buf, size, "%s",
+			 "Error:\tUnable to find debugfs/tracefs\n"
+			 "Hint:\tWas your kernel compiled with debugfs/tracefs support?\n"
+			 "Hint:\tIs the debugfs/tracefs filesystem mounted?\n"
+			 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
+		break;
+	case EACCES: {
+		snprintf(buf, size,
+			 "Error:\tNo permissions to read %s/%s\n"
+			 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
+			 tracing_events_path, filename, tracing_path_mount());
+	}
+		break;
+	default:
+		snprintf(buf, size, "%s", str_error_r(err, sbuf, sizeof(sbuf)));
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
new file mode 100644
index 000000000..a19136b08
--- /dev/null
+++ b/tools/lib/api/fs/tracing_path.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_FS_TRACING_PATH_H
+#define __API_FS_TRACING_PATH_H
+
+#include <linux/types.h>
+#include <dirent.h>
+
+DIR *tracing_events__opendir(void);
+
+void tracing_path_set(const char *mountpoint);
+const char *tracing_path_mount(void);
+
+char *get_tracing_file(const char *name);
+void put_tracing_file(char *file);
+
+char *get_events_file(const char *name);
+void put_events_file(char *file);
+
+#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; })
+
+int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name);
+#endif /* __API_FS_TRACING_PATH_H */
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
new file mode 100644
index 000000000..458acd294
--- /dev/null
+++ b/tools/lib/api/io.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lightweight buffered reading library.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#ifndef __API_IO__
+#define __API_IO__
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/types.h>
+
+struct io {
+	/* File descriptor being read/ */
+	int fd;
+	/* Size of the read buffer. */
+	unsigned int buf_len;
+	/* Pointer to storage for buffering read. */
+	char *buf;
+	/* End of the storage. */
+	char *end;
+	/* Currently accessed data pointer. */
+	char *data;
+	/* Set true on when the end of file on read error. */
+	bool eof;
+};
+
+static inline void io__init(struct io *io, int fd,
+			    char *buf, unsigned int buf_len)
+{
+	io->fd = fd;
+	io->buf_len = buf_len;
+	io->buf = buf;
+	io->end = buf;
+	io->data = buf;
+	io->eof = false;
+}
+
+/* Reads one character from the "io" file with similar semantics to fgetc. */
+static inline int io__get_char(struct io *io)
+{
+	char *ptr = io->data;
+
+	if (io->eof)
+		return -1;
+
+	if (ptr == io->end) {
+		ssize_t n = read(io->fd, io->buf, io->buf_len);
+
+		if (n <= 0) {
+			io->eof = true;
+			return -1;
+		}
+		ptr = &io->buf[0];
+		io->end = &io->buf[n];
+	}
+	io->data = ptr + 1;
+	return *ptr;
+}
+
+/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
+ * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
+ * returns the character after the hexadecimal value which may be -1 for eof.
+ * If the read value is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_hex(struct io *io, __u64 *hex)
+{
+	bool first_read = true;
+
+	*hex = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*hex = (*hex << 4) | (ch - '0');
+		else if (ch >= 'a' && ch <= 'f')
+			*hex = (*hex << 4) | (ch - 'a' + 10);
+		else if (ch >= 'A' && ch <= 'F')
+			*hex = (*hex << 4) | (ch - 'A' + 10);
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+/* Read a positive decimal value with out argument dec. If the first character
+ * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
+ * character after the decimal value which may be -1 for eof. If the read value
+ * is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_dec(struct io *io, __u64 *dec)
+{
+	bool first_read = true;
+
+	*dec = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*dec = (*dec * 10) + ch - '0';
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+#endif /* __API_IO__ */
diff --git a/tools/lib/argv_split.c b/tools/lib/argv_split.c
new file mode 100644
index 000000000..0a58ccf3f
--- /dev/null
+++ b/tools/lib/argv_split.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helper function for splitting a string into an argv-like array.
+ */
+
+#include <stdlib.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+
+static const char *skip_arg(const char *cp)
+{
+	while (*cp && !isspace(*cp))
+		cp++;
+
+	return cp;
+}
+
+static int count_argc(const char *str)
+{
+	int count = 0;
+
+	while (*str) {
+		str = skip_spaces(str);
+		if (*str) {
+			count++;
+			str = skip_arg(str);
+		}
+	}
+
+	return count;
+}
+
+/**
+ * argv_free - free an argv
+ * @argv - the argument vector to be freed
+ *
+ * Frees an argv and the strings it points to.
+ */
+void argv_free(char **argv)
+{
+	char **p;
+	for (p = argv; *p; p++) {
+		free(*p);
+		*p = NULL;
+	}
+
+	free(argv);
+}
+
+/**
+ * argv_split - split a string at whitespace, returning an argv
+ * @str: the string to be split
+ * @argcp: returned argument count
+ *
+ * Returns an array of pointers to strings which are split out from
+ * @str.  This is performed by strictly splitting on white-space; no
+ * quote processing is performed.  Multiple whitespace characters are
+ * considered to be a single argument separator.  The returned array
+ * is always NULL-terminated.  Returns NULL on memory allocation
+ * failure.
+ */
+char **argv_split(const char *str, int *argcp)
+{
+	int argc = count_argc(str);
+	char **argv = calloc(argc + 1, sizeof(*argv));
+	char **argvp;
+
+	if (argv == NULL)
+		goto out;
+
+	if (argcp)
+		*argcp = argc;
+
+	argvp = argv;
+
+	while (*str) {
+		str = skip_spaces(str);
+
+		if (*str) {
+			const char *p = str;
+			char *t;
+
+			str = skip_arg(str);
+
+			t = strndup(p, str-p);
+			if (t == NULL)
+				goto fail;
+			*argvp++ = t;
+		}
+	}
+	*argvp = NULL;
+
+out:
+	return argv;
+
+fail:
+	argv_free(argv);
+	return NULL;
+}
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
new file mode 100644
index 000000000..5043747ef
--- /dev/null
+++ b/tools/lib/bitmap.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * From lib/bitmap.c
+ * Helper functions for bitmap.h.
+ */
+#include <linux/bitmap.h>
+
+int __bitmap_weight(const unsigned long *bitmap, int bits)
+{
+	int k, w = 0, lim = bits/BITS_PER_LONG;
+
+	for (k = 0; k < lim; k++)
+		w += hweight_long(bitmap[k]);
+
+	if (bits % BITS_PER_LONG)
+		w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+
+	return w;
+}
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+		 const unsigned long *bitmap2, int bits)
+{
+	int k;
+	int nr = BITS_TO_LONGS(bits);
+
+	for (k = 0; k < nr; k++)
+		dst[k] = bitmap1[k] | bitmap2[k];
+}
+
+size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+			char *buf, size_t size)
+{
+	/* current bit is 'cur', most recently seen range is [rbot, rtop] */
+	int cur, rbot, rtop;
+	bool first = true;
+	size_t ret = 0;
+
+	rbot = cur = find_first_bit(bitmap, nbits);
+	while (cur < nbits) {
+		rtop = cur;
+		cur = find_next_bit(bitmap, nbits, cur + 1);
+		if (cur < nbits && cur <= rtop + 1)
+			continue;
+
+		if (!first)
+			ret += scnprintf(buf + ret, size - ret, ",");
+
+		first = false;
+
+		ret += scnprintf(buf + ret, size - ret, "%d", rbot);
+		if (rbot < rtop)
+			ret += scnprintf(buf + ret, size - ret, "-%d", rtop);
+
+		rbot = cur;
+	}
+	return ret;
+}
+
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+		 const unsigned long *bitmap2, unsigned int bits)
+{
+	unsigned int k;
+	unsigned int lim = bits/BITS_PER_LONG;
+	unsigned long result = 0;
+
+	for (k = 0; k < lim; k++)
+		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
+	return result != 0;
+}
+
+int __bitmap_equal(const unsigned long *bitmap1,
+		const unsigned long *bitmap2, unsigned int bits)
+{
+	unsigned int k, lim = bits/BITS_PER_LONG;
+	for (k = 0; k < lim; ++k)
+		if (bitmap1[k] != bitmap2[k])
+			return 0;
+
+	if (bits % BITS_PER_LONG)
+		if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+			return 0;
+
+	return 1;
+}
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
new file mode 100644
index 000000000..8a81b3679
--- /dev/null
+++ b/tools/lib/bpf/.gitignore
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+libbpf_version.h
+libbpf.pc
+FEATURE-DUMP.libbpf
+libbpf.so.*
+TAGS
+tags
+cscope.*
+/bpf_helper_defs.h
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
new file mode 100644
index 000000000..190366d05
--- /dev/null
+++ b/tools/lib/bpf/Build
@@ -0,0 +1,3 @@
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
+	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
+	    btf_dump.o ringbuf.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
new file mode 100644
index 000000000..f2a353bba
--- /dev/null
+++ b/tools/lib/bpf/Makefile
@@ -0,0 +1,308 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/lib/traceevent/Makefile
+
+RM ?= rm
+srctree = $(abs_srctree)
+
+VERSION_SCRIPT := libbpf.map
+LIBBPF_VERSION := $(shell \
+	grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \
+	sort -rV | head -n1 | cut -d'_' -f2)
+LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
+
+MAKEFLAGS += --no-print-directory
+
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is a ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+man_dir = $(prefix)/share/man
+man_dir_SQ = '$(subst ','\'',$(man_dir))'
+
+export man_dir man_dir_SQ INSTALL
+export DESTDIR DESTDIR_SQ
+
+include $(srctree)/tools/scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+FEATURE_USER = .libbpf
+FEATURE_TESTS = libelf zlib bpf
+FEATURE_DISPLAY = libelf zlib bpf
+
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+  check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+export prefix libdir src obj
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+OBJ		= $@
+N		=
+
+LIB_TARGET	= libbpf.a libbpf.so.$(LIBBPF_VERSION)
+LIB_FILE	= libbpf.a libbpf.so*
+PC_FILE		= libbpf.pc
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -O2
+endif
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
+override CFLAGS += -Werror -Wall
+override CFLAGS += $(INCLUDES)
+override CFLAGS += -fvisibility=hidden
+override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+
+# flags specific for shared library
+SHLIB_FLAGS := -DSHARED -fPIC
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Disable command line variables (CFLAGS) override from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+SHARED_OBJDIR	:= $(OUTPUT)sharedobjs/
+STATIC_OBJDIR	:= $(OUTPUT)staticobjs/
+BPF_IN_SHARED	:= $(SHARED_OBJDIR)libbpf-in.o
+BPF_IN_STATIC	:= $(STATIC_OBJDIR)libbpf-in.o
+BPF_HELPER_DEFS	:= $(OUTPUT)bpf_helper_defs.h
+
+LIB_TARGET	:= $(addprefix $(OUTPUT),$(LIB_TARGET))
+LIB_FILE	:= $(addprefix $(OUTPUT),$(LIB_FILE))
+PC_FILE		:= $(addprefix $(OUTPUT),$(PC_FILE))
+
+TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags)
+
+GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
+			   cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
+			   sed 's/\[.*\]//' | \
+			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+			   sort -u | wc -l)
+VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+			      sed 's/\[.*\]//' | \
+			      awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
+			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
+
+CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
+
+all: fixdep
+	$(Q)$(MAKE) all_cmd
+
+all_cmd: $(CMD_TARGETS) check
+
+$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS)
+	@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
+	(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
+	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
+	@(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \
+	(diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \
+	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true
+	@(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \
+	(diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \
+	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true
+	@(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \
+	(diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \
+	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true
+	@(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \
+	(diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \
+	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
+	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
+
+$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS)
+	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
+
+$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
+	$(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
+		--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
+
+$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
+
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
+	$(QUIET_LINK)$(CC) $(LDFLAGS) \
+		--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
+		-Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@
+	@ln -sf $(@F) $(OUTPUT)libbpf.so
+	@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
+
+$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
+	$(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^
+
+$(OUTPUT)libbpf.pc:
+	$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+		-e "s|@LIBDIR@|$(libdir_SQ)|" \
+		-e "s|@VERSION@|$(LIBBPF_VERSION)|" \
+		< libbpf.pc.template > $@
+
+check: check_abi
+
+check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
+	@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then	 \
+		echo "Warning: Num of global symbols in $(BPF_IN_SHARED)"	 \
+		     "($(GLOBAL_SYM_COUNT)) does NOT match with num of"	 \
+		     "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
+		     "Please make sure all LIBBPF_API symbols are"	 \
+		     "versioned in $(VERSION_SCRIPT)." >&2;		 \
+		readelf -s --wide $(BPF_IN_SHARED) |			 \
+		    cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' |	 \
+		    sed 's/\[.*\]//' |					 \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
+		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
+		readelf --dyn-syms --wide $(OUTPUT)libbpf.so |		 \
+		    sed 's/\[.*\]//' |					 \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'|  \
+		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
+		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
+		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
+		     $(OUTPUT)libbpf_versioned_syms.tmp;		 \
+		rm $(OUTPUT)libbpf_global_syms.tmp			 \
+		   $(OUTPUT)libbpf_versioned_syms.tmp;			 \
+		exit 1;							 \
+	fi
+
+define do_install_mkdir
+	if [ ! -d '$(DESTDIR_SQ)$1' ]; then		\
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1';	\
+	fi
+endef
+
+define do_install
+	if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
+	fi;						\
+	$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: all_cmd
+	$(call QUIET_INSTALL, $(LIB_TARGET)) \
+		$(call do_install_mkdir,$(libdir_SQ)); \
+		cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
+
+install_headers: $(BPF_HELPER_DEFS)
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
+		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
+		$(call do_install,btf.h,$(prefix)/include/bpf,644); \
+		$(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
+		$(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
+		$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
+		$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
+		$(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
+		$(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
+		$(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
+		$(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
+
+install_pkgconfig: $(PC_FILE)
+	$(call QUIET_INSTALL, $(PC_FILE)) \
+		$(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
+
+install: install_lib install_pkgconfig install_headers
+
+### Cleaning rules
+
+config-clean:
+	$(call QUIET_CLEAN, feature-detect)
+	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+
+clean: config-clean
+	$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS)		     \
+		*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS)		     \
+		$(SHARED_OBJDIR) $(STATIC_OBJDIR)			     \
+		$(addprefix $(OUTPUT),					     \
+			    *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc)
+	$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
+
+
+
+PHONY += force elfdep zdep bpfdep cscope tags
+force:
+
+elfdep:
+	@if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi
+
+zdep:
+	@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
+
+bpfdep:
+	@if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi
+
+cscope:
+	ls *.c *.h > cscope.files
+	cscope -b -q -I $(srctree)/include -f cscope.out
+
+tags:
+	$(RM) -f TAGS tags
+	ls *.c *.h | xargs $(TAGS_PROG) -a
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
+
+# Delete partially updated (corrupted) files on error
+.DELETE_ON_ERROR:
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
new file mode 100644
index 000000000..8928f7787
--- /dev/null
+++ b/tools/lib/bpf/README.rst
@@ -0,0 +1,168 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+libbpf API naming convention
+============================
+
+libbpf API provides access to a few logically separated groups of
+functions and types. Every group has its own naming convention
+described here. It's recommended to follow these conventions whenever a
+new function or type is added to keep libbpf API clean and consistent.
+
+All types and functions provided by libbpf API should have one of the
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
+``perf_buffer_``.
+
+System call wrappers
+--------------------
+
+System call wrappers are simple wrappers for commands supported by
+sys_bpf system call. These wrappers should go to ``bpf.h`` header file
+and map one-on-one to corresponding commands.
+
+For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
+command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
+
+Objects
+-------
+
+Another class of types and functions provided by libbpf API is "objects"
+and functions to work with them. Objects are high-level abstractions
+such as BPF program or BPF map. They're represented by corresponding
+structures such as ``struct bpf_object``, ``struct bpf_program``,
+``struct bpf_map``, etc.
+
+Structures are forward declared and access to their fields should be
+provided via corresponding getters and setters rather than directly.
+
+These objects are associated with corresponding parts of ELF object that
+contains compiled BPF programs.
+
+For example ``struct bpf_object`` represents ELF object itself created
+from an ELF file or from a buffer, ``struct bpf_program`` represents a
+program in ELF object and ``struct bpf_map`` is a map.
+
+Functions that work with an object have names built from object name,
+double underscore and part that describes function purpose.
+
+For example ``bpf_object__open`` consists of the name of corresponding
+object, ``bpf_object``, double underscore and ``open`` that defines the
+purpose of the function to open ELF file and create ``bpf_object`` from
+it.
+
+Another example: ``bpf_program__load`` is named for corresponding
+object, ``bpf_program``, that is separated from other part of the name
+by double underscore.
+
+All objects and corresponding functions other than BTF related should go
+to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
+
+Auxiliary functions
+-------------------
+
+Auxiliary functions and types that don't fit well in any of categories
+described above should have ``libbpf_`` prefix, e.g.
+``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
+
+AF_XDP functions
+-------------------
+
+AF_XDP functions should have an ``xsk_`` prefix, e.g.
+``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
+of both low-level ring access functions and high-level configuration
+functions. These can be mixed and matched. Note that these functions
+are not reentrant for performance reasons.
+
+Please take a look at Documentation/networking/af_xdp.rst in the Linux
+kernel source tree on how to use XDP sockets and for some common
+mistakes in case you do not get any traffic up to user space.
+
+libbpf ABI
+==========
+
+libbpf can be both linked statically or used as DSO. To avoid possible
+conflicts with other libraries an application is linked with, all
+non-static libbpf symbols should have one of the prefixes mentioned in
+API documentation above. See API naming convention to choose the right
+name for a new symbol.
+
+Symbol visibility
+-----------------
+
+libbpf follow the model when all global symbols have visibility "hidden"
+by default and to make a symbol visible it has to be explicitly
+attributed with ``LIBBPF_API`` macro. For example:
+
+.. code-block:: c
+
+        LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+
+This prevents from accidentally exporting a symbol, that is not supposed
+to be a part of ABI what, in turn, improves both libbpf developer- and
+user-experiences.
+
+ABI versionning
+---------------
+
+To make future ABI extensions possible libbpf ABI is versioned.
+Versioning is implemented by ``libbpf.map`` version script that is
+passed to linker.
+
+Version name is ``LIBBPF_`` prefix + three-component numeric version,
+starting from ``0.0.1``.
+
+Every time ABI is being changed, e.g. because a new symbol is added or
+semantic of existing symbol is changed, ABI version should be bumped.
+This bump in ABI version is at most once per kernel development cycle.
+
+For example, if current state of ``libbpf.map`` is:
+
+.. code-block::
+        LIBBPF_0.0.1 {
+        	global:
+                        bpf_func_a;
+                        bpf_func_b;
+        	local:
+        		\*;
+        };
+
+, and a new symbol ``bpf_func_c`` is being introduced, then
+``libbpf.map`` should be changed like this:
+
+.. code-block::
+        LIBBPF_0.0.1 {
+        	global:
+                        bpf_func_a;
+                        bpf_func_b;
+        	local:
+        		\*;
+        };
+        LIBBPF_0.0.2 {
+                global:
+                        bpf_func_c;
+        } LIBBPF_0.0.1;
+
+, where new version ``LIBBPF_0.0.2`` depends on the previous
+``LIBBPF_0.0.1``.
+
+Format of version script and ways to handle ABI changes, including
+incompatible ones, described in details in [1].
+
+Stand-alone build
+=================
+
+Under https://github.com/libbpf/libbpf there is a (semi-)automated
+mirror of the mainline's version of libbpf for a stand-alone build.
+
+However, all changes to libbpf's code base must be upstreamed through
+the mainline kernel tree.
+
+License
+=======
+
+libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
+
+Links
+=====
+
+[1] https://www.akkadia.org/drepper/dsohowto.pdf
+    (Chapter 3. Maintaining APIs and ABIs).
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
new file mode 100644
index 000000000..d27e34133
--- /dev/null
+++ b/tools/lib/bpf/bpf.c
@@ -0,0 +1,933 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+/*
+ * When building perf, unistd.h is overridden. __NR_bpf is
+ * required to be defined explicitly.
+ */
+#ifndef __NR_bpf
+# if defined(__i386__)
+#  define __NR_bpf 357
+# elif defined(__x86_64__)
+#  define __NR_bpf 321
+# elif defined(__aarch64__)
+#  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
+# elif defined(__s390__)
+#  define __NR_bpf 351
+# elif defined(__arc__)
+#  define __NR_bpf 280
+# else
+#  error __NR_bpf not defined. libbpf does not support your arch.
+# endif
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
+static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+			  unsigned int size)
+{
+	return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+{
+	int fd;
+
+	do {
+		fd = sys_bpf(BPF_PROG_LOAD, attr, size);
+	} while (fd < 0 && errno == EAGAIN);
+
+	return fd;
+}
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+{
+	union bpf_attr attr;
+
+	memset(&attr, '\0', sizeof(attr));
+
+	attr.map_type = create_attr->map_type;
+	attr.key_size = create_attr->key_size;
+	attr.value_size = create_attr->value_size;
+	attr.max_entries = create_attr->max_entries;
+	attr.map_flags = create_attr->map_flags;
+	if (create_attr->name)
+		memcpy(attr.map_name, create_attr->name,
+		       min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.numa_node = create_attr->numa_node;
+	attr.btf_fd = create_attr->btf_fd;
+	attr.btf_key_type_id = create_attr->btf_key_type_id;
+	attr.btf_value_type_id = create_attr->btf_value_type_id;
+	attr.map_ifindex = create_attr->map_ifindex;
+	if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS)
+		attr.btf_vmlinux_value_type_id =
+			create_attr->btf_vmlinux_value_type_id;
+	else
+		attr.inner_map_fd = create_attr->inner_map_fd;
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+			int key_size, int value_size, int max_entries,
+			__u32 map_flags, int node)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+	if (node >= 0) {
+		map_attr.numa_node = node;
+		map_attr.map_flags |= BPF_F_NUMA_NODE;
+	}
+
+	return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+		   int value_size, int max_entries, __u32 map_flags)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+			int key_size, int value_size, int max_entries,
+			__u32 map_flags)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+			       int key_size, int inner_map_fd, int max_entries,
+			       __u32 map_flags, int node)
+{
+	union bpf_attr attr;
+
+	memset(&attr, '\0', sizeof(attr));
+
+	attr.map_type = map_type;
+	attr.key_size = key_size;
+	attr.value_size = 4;
+	attr.inner_map_fd = inner_map_fd;
+	attr.max_entries = max_entries;
+	attr.map_flags = map_flags;
+	if (name)
+		memcpy(attr.map_name, name,
+		       min(strlen(name), BPF_OBJ_NAME_LEN - 1));
+
+	if (node >= 0) {
+		attr.map_flags |= BPF_F_NUMA_NODE;
+		attr.numa_node = node;
+	}
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+			  int key_size, int inner_map_fd, int max_entries,
+			  __u32 map_flags)
+{
+	return bpf_create_map_in_map_node(map_type, name, key_size,
+					  inner_map_fd, max_entries, map_flags,
+					  -1);
+}
+
+static void *
+alloc_zero_tailing_info(const void *orecord, __u32 cnt,
+			__u32 actual_rec_size, __u32 expected_rec_size)
+{
+	__u64 info_len = (__u64)actual_rec_size * cnt;
+	void *info, *nrecord;
+	int i;
+
+	info = malloc(info_len);
+	if (!info)
+		return NULL;
+
+	/* zero out bytes kernel does not understand */
+	nrecord = info;
+	for (i = 0; i < cnt; i++) {
+		memcpy(nrecord, orecord, expected_rec_size);
+		memset(nrecord + expected_rec_size, 0,
+		       actual_rec_size - expected_rec_size);
+		orecord += actual_rec_size;
+		nrecord += actual_rec_size;
+	}
+
+	return info;
+}
+
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+			   char *log_buf, size_t log_buf_sz)
+{
+	void *finfo = NULL, *linfo = NULL;
+	union bpf_attr attr;
+	__u32 log_level;
+	int fd;
+
+	if (!load_attr || !log_buf != !log_buf_sz)
+		return -EINVAL;
+
+	log_level = load_attr->log_level;
+	if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = load_attr->prog_type;
+	attr.expected_attach_type = load_attr->expected_attach_type;
+	if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+	    attr.prog_type == BPF_PROG_TYPE_LSM) {
+		attr.attach_btf_id = load_attr->attach_btf_id;
+	} else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
+		   attr.prog_type == BPF_PROG_TYPE_EXT) {
+		attr.attach_btf_id = load_attr->attach_btf_id;
+		attr.attach_prog_fd = load_attr->attach_prog_fd;
+	} else {
+		attr.prog_ifindex = load_attr->prog_ifindex;
+		attr.kern_version = load_attr->kern_version;
+	}
+	attr.insn_cnt = (__u32)load_attr->insns_cnt;
+	attr.insns = ptr_to_u64(load_attr->insns);
+	attr.license = ptr_to_u64(load_attr->license);
+
+	attr.log_level = log_level;
+	if (log_level) {
+		attr.log_buf = ptr_to_u64(log_buf);
+		attr.log_size = log_buf_sz;
+	} else {
+		attr.log_buf = ptr_to_u64(NULL);
+		attr.log_size = 0;
+	}
+
+	attr.prog_btf_fd = load_attr->prog_btf_fd;
+	attr.func_info_rec_size = load_attr->func_info_rec_size;
+	attr.func_info_cnt = load_attr->func_info_cnt;
+	attr.func_info = ptr_to_u64(load_attr->func_info);
+	attr.line_info_rec_size = load_attr->line_info_rec_size;
+	attr.line_info_cnt = load_attr->line_info_cnt;
+	attr.line_info = ptr_to_u64(load_attr->line_info);
+	if (load_attr->name)
+		memcpy(attr.prog_name, load_attr->name,
+		       min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.prog_flags = load_attr->prog_flags;
+
+	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+	if (fd >= 0)
+		return fd;
+
+	/* After bpf_prog_load, the kernel may modify certain attributes
+	 * to give user space a hint how to deal with loading failure.
+	 * Check to see whether we can make some changes and load again.
+	 */
+	while (errno == E2BIG && (!finfo || !linfo)) {
+		if (!finfo && attr.func_info_cnt &&
+		    attr.func_info_rec_size < load_attr->func_info_rec_size) {
+			/* try with corrected func info records */
+			finfo = alloc_zero_tailing_info(load_attr->func_info,
+							load_attr->func_info_cnt,
+							load_attr->func_info_rec_size,
+							attr.func_info_rec_size);
+			if (!finfo)
+				goto done;
+
+			attr.func_info = ptr_to_u64(finfo);
+			attr.func_info_rec_size = load_attr->func_info_rec_size;
+		} else if (!linfo && attr.line_info_cnt &&
+			   attr.line_info_rec_size <
+			   load_attr->line_info_rec_size) {
+			linfo = alloc_zero_tailing_info(load_attr->line_info,
+							load_attr->line_info_cnt,
+							load_attr->line_info_rec_size,
+							attr.line_info_rec_size);
+			if (!linfo)
+				goto done;
+
+			attr.line_info = ptr_to_u64(linfo);
+			attr.line_info_rec_size = load_attr->line_info_rec_size;
+		} else {
+			break;
+		}
+
+		fd = sys_bpf_prog_load(&attr, sizeof(attr));
+
+		if (fd >= 0)
+			goto done;
+	}
+
+	if (log_level || !log_buf)
+		goto done;
+
+	/* Try again with log */
+	attr.log_buf = ptr_to_u64(log_buf);
+	attr.log_size = log_buf_sz;
+	attr.log_level = 1;
+	log_buf[0] = 0;
+	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+done:
+	free(finfo);
+	free(linfo);
+	return fd;
+}
+
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		     size_t insns_cnt, const char *license,
+		     __u32 kern_version, char *log_buf,
+		     size_t log_buf_sz)
+{
+	struct bpf_load_program_attr load_attr;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = type;
+	load_attr.expected_attach_type = 0;
+	load_attr.name = NULL;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	load_attr.kern_version = kern_version;
+
+	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
+
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		       size_t insns_cnt, __u32 prog_flags, const char *license,
+		       __u32 kern_version, char *log_buf, size_t log_buf_sz,
+		       int log_level)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = type;
+	attr.insn_cnt = (__u32)insns_cnt;
+	attr.insns = ptr_to_u64(insns);
+	attr.license = ptr_to_u64(license);
+	attr.log_buf = ptr_to_u64(log_buf);
+	attr.log_size = log_buf_sz;
+	attr.log_level = log_level;
+	log_buf[0] = 0;
+	attr.kern_version = kern_version;
+	attr.prog_flags = prog_flags;
+
+	return sys_bpf_prog_load(&attr, sizeof(attr));
+}
+
+int bpf_map_update_elem(int fd, const void *key, const void *value,
+			__u64 flags)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = flags;
+
+	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem(int fd, const void *key, void *value)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+
+	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = flags;
+
+	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+
+	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_delete_elem(int fd, const void *key)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+
+	return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_get_next_key(int fd, const void *key, void *next_key)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.next_key = ptr_to_u64(next_key);
+
+	return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+int bpf_map_freeze(int fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+
+	return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+}
+
+static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
+				void *out_batch, void *keys, void *values,
+				__u32 *count,
+				const struct bpf_map_batch_opts *opts)
+{
+	union bpf_attr attr;
+	int ret;
+
+	if (!OPTS_VALID(opts, bpf_map_batch_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.batch.map_fd = fd;
+	attr.batch.in_batch = ptr_to_u64(in_batch);
+	attr.batch.out_batch = ptr_to_u64(out_batch);
+	attr.batch.keys = ptr_to_u64(keys);
+	attr.batch.values = ptr_to_u64(values);
+	attr.batch.count = *count;
+	attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
+	attr.batch.flags = OPTS_GET(opts, flags, 0);
+
+	ret = sys_bpf(cmd, &attr, sizeof(attr));
+	*count = attr.batch.count;
+
+	return ret;
+}
+
+int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
+			 const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
+				    NULL, keys, NULL, count, opts);
+}
+
+int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
+			 void *values, __u32 *count,
+			 const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
+				    out_batch, keys, values, count, opts);
+}
+
+int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
+				    void *keys, void *values, __u32 *count,
+				    const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
+				    fd, in_batch, out_batch, keys, values,
+				    count, opts);
+}
+
+int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
+			 const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
+				    keys, values, count, opts);
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.pathname = ptr_to_u64((void *)pathname);
+	attr.bpf_fd = fd;
+
+	return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.pathname = ptr_to_u64((void *)pathname);
+
+	return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+		    unsigned int flags)
+{
+	DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
+		.flags = flags,
+	);
+
+	return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+}
+
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+			  enum bpf_attach_type type,
+			  const struct bpf_prog_attach_opts *opts)
+{
+	union bpf_attr attr;
+
+	if (!OPTS_VALID(opts, bpf_prog_attach_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.target_fd	   = target_fd;
+	attr.attach_bpf_fd = prog_fd;
+	attr.attach_type   = type;
+	attr.attach_flags  = OPTS_GET(opts, flags, 0);
+	attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
+
+	return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.target_fd	 = target_fd;
+	attr.attach_type = type;
+
+	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.target_fd	 = target_fd;
+	attr.attach_bpf_fd = prog_fd;
+	attr.attach_type = type;
+
+	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_link_create(int prog_fd, int target_fd,
+		    enum bpf_attach_type attach_type,
+		    const struct bpf_link_create_opts *opts)
+{
+	__u32 target_btf_id, iter_info_len;
+	union bpf_attr attr;
+
+	if (!OPTS_VALID(opts, bpf_link_create_opts))
+		return -EINVAL;
+
+	iter_info_len = OPTS_GET(opts, iter_info_len, 0);
+	target_btf_id = OPTS_GET(opts, target_btf_id, 0);
+
+	if (iter_info_len && target_btf_id)
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_create.prog_fd = prog_fd;
+	attr.link_create.target_fd = target_fd;
+	attr.link_create.attach_type = attach_type;
+	attr.link_create.flags = OPTS_GET(opts, flags, 0);
+
+	if (iter_info_len) {
+		attr.link_create.iter_info =
+			ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+		attr.link_create.iter_info_len = iter_info_len;
+	} else if (target_btf_id) {
+		attr.link_create.target_btf_id = target_btf_id;
+	}
+
+	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_link_detach(int link_fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_detach.link_fd = link_fd;
+
+	return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_link_update(int link_fd, int new_prog_fd,
+		    const struct bpf_link_update_opts *opts)
+{
+	union bpf_attr attr;
+
+	if (!OPTS_VALID(opts, bpf_link_update_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_update.link_fd = link_fd;
+	attr.link_update.new_prog_fd = new_prog_fd;
+	attr.link_update.flags = OPTS_GET(opts, flags, 0);
+	attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+
+	return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+}
+
+int bpf_iter_create(int link_fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.iter_create.link_fd = link_fd;
+
+	return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+{
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.query.target_fd	= target_fd;
+	attr.query.attach_type	= type;
+	attr.query.query_flags	= query_flags;
+	attr.query.prog_cnt	= *prog_cnt;
+	attr.query.prog_ids	= ptr_to_u64(prog_ids);
+
+	ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+	if (attach_flags)
+		*attach_flags = attr.query.attach_flags;
+	*prog_cnt = attr.query.prog_cnt;
+	return ret;
+}
+
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+		      void *data_out, __u32 *size_out, __u32 *retval,
+		      __u32 *duration)
+{
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.test.prog_fd = prog_fd;
+	attr.test.data_in = ptr_to_u64(data);
+	attr.test.data_out = ptr_to_u64(data_out);
+	attr.test.data_size_in = size;
+	attr.test.repeat = repeat;
+
+	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	if (size_out)
+		*size_out = attr.test.data_size_out;
+	if (retval)
+		*retval = attr.test.retval;
+	if (duration)
+		*duration = attr.test.duration;
+	return ret;
+}
+
+int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+{
+	union bpf_attr attr;
+	int ret;
+
+	if (!test_attr->data_out && test_attr->data_size_out > 0)
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.test.prog_fd = test_attr->prog_fd;
+	attr.test.data_in = ptr_to_u64(test_attr->data_in);
+	attr.test.data_out = ptr_to_u64(test_attr->data_out);
+	attr.test.data_size_in = test_attr->data_size_in;
+	attr.test.data_size_out = test_attr->data_size_out;
+	attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
+	attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
+	attr.test.ctx_size_in = test_attr->ctx_size_in;
+	attr.test.ctx_size_out = test_attr->ctx_size_out;
+	attr.test.repeat = test_attr->repeat;
+
+	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	test_attr->data_size_out = attr.test.data_size_out;
+	test_attr->ctx_size_out = attr.test.ctx_size_out;
+	test_attr->retval = attr.test.retval;
+	test_attr->duration = attr.test.duration;
+	return ret;
+}
+
+int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
+{
+	union bpf_attr attr;
+	int ret;
+
+	if (!OPTS_VALID(opts, bpf_test_run_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.test.prog_fd = prog_fd;
+	attr.test.cpu = OPTS_GET(opts, cpu, 0);
+	attr.test.flags = OPTS_GET(opts, flags, 0);
+	attr.test.repeat = OPTS_GET(opts, repeat, 0);
+	attr.test.duration = OPTS_GET(opts, duration, 0);
+	attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0);
+	attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0);
+	attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0);
+	attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0);
+	attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL));
+	attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL));
+	attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
+	attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
+
+	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	OPTS_SET(opts, data_size_out, attr.test.data_size_out);
+	OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
+	OPTS_SET(opts, duration, attr.test.duration);
+	OPTS_SET(opts, retval, attr.test.retval);
+	return ret;
+}
+
+static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
+{
+	union bpf_attr attr;
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.start_id = start_id;
+
+	err = sys_bpf(cmd, &attr, sizeof(attr));
+	if (!err)
+		*next_id = attr.next_id;
+
+	return err;
+}
+
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
+}
+
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
+}
+
+int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
+}
+
+int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
+}
+
+int bpf_prog_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_id = id;
+
+	return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_map_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_id = id;
+
+	return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.btf_id = id;
+
+	return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_link_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_id = id;
+
+	return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
+{
+	union bpf_attr attr;
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.info.bpf_fd = bpf_fd;
+	attr.info.info_len = *info_len;
+	attr.info.info = ptr_to_u64(info);
+
+	err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+	if (!err)
+		*info_len = attr.info.info_len;
+
+	return err;
+}
+
+int bpf_raw_tracepoint_open(const char *name, int prog_fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.raw_tracepoint.name = ptr_to_u64(name);
+	attr.raw_tracepoint.prog_fd = prog_fd;
+
+	return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+}
+
+int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+		 bool do_log)
+{
+	union bpf_attr attr = {};
+	int fd;
+
+	attr.btf = ptr_to_u64(btf);
+	attr.btf_size = btf_size;
+
+retry:
+	if (do_log && log_buf && log_buf_size) {
+		attr.btf_log_level = 1;
+		attr.btf_log_size = log_buf_size;
+		attr.btf_log_buf = ptr_to_u64(log_buf);
+	}
+
+	fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+	if (fd == -1 && !do_log && log_buf && log_buf_size) {
+		do_log = true;
+		goto retry;
+	}
+
+	return fd;
+}
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+		      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+		      __u64 *probe_addr)
+{
+	union bpf_attr attr = {};
+	int err;
+
+	attr.task_fd_query.pid = pid;
+	attr.task_fd_query.fd = fd;
+	attr.task_fd_query.flags = flags;
+	attr.task_fd_query.buf = ptr_to_u64(buf);
+	attr.task_fd_query.buf_len = *buf_len;
+
+	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+	*buf_len = attr.task_fd_query.buf_len;
+	*prog_id = attr.task_fd_query.prog_id;
+	*fd_type = attr.task_fd_query.fd_type;
+	*probe_offset = attr.task_fd_query.probe_offset;
+	*probe_addr = attr.task_fd_query.probe_addr;
+
+	return err;
+}
+
+int bpf_enable_stats(enum bpf_stats_type type)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.enable_stats.type = type;
+
+	return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+}
+
+int bpf_prog_bind_map(int prog_fd, int map_fd,
+		      const struct bpf_prog_bind_opts *opts)
+{
+	union bpf_attr attr;
+
+	if (!OPTS_VALID(opts, bpf_prog_bind_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_bind_map.prog_fd = prog_fd;
+	attr.prog_bind_map.map_fd = map_fd;
+	attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
+
+	return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
new file mode 100644
index 000000000..92a3eaa15
--- /dev/null
+++ b/tools/lib/bpf/bpf.h
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ */
+#ifndef __LIBBPF_BPF_H
+#define __LIBBPF_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct bpf_create_map_attr {
+	const char *name;
+	enum bpf_map_type map_type;
+	__u32 map_flags;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 max_entries;
+	__u32 numa_node;
+	__u32 btf_fd;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
+	__u32 map_ifindex;
+	union {
+		__u32 inner_map_fd;
+		__u32 btf_vmlinux_value_type_id;
+	};
+};
+
+LIBBPF_API int
+bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+				   int key_size, int value_size,
+				   int max_entries, __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+				   int key_size, int value_size,
+				   int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+			      int value_size, int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
+					  const char *name, int key_size,
+					  int inner_map_fd, int max_entries,
+					  __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
+				     const char *name, int key_size,
+				     int inner_map_fd, int max_entries,
+				     __u32 map_flags);
+
+struct bpf_load_program_attr {
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+	const char *name;
+	const struct bpf_insn *insns;
+	size_t insns_cnt;
+	const char *license;
+	union {
+		__u32 kern_version;
+		__u32 attach_prog_fd;
+	};
+	union {
+		__u32 prog_ifindex;
+		__u32 attach_btf_id;
+	};
+	__u32 prog_btf_fd;
+	__u32 func_info_rec_size;
+	const void *func_info;
+	__u32 func_info_cnt;
+	__u32 line_info_rec_size;
+	const void *line_info;
+	__u32 line_info_cnt;
+	__u32 log_level;
+	__u32 prog_flags;
+};
+
+/* Flags to direct loading requirements */
+#define MAPS_RELAX_COMPAT	0x01
+
+/* Recommend log buffer size */
+#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
+LIBBPF_API int
+bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+		       char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
+				const struct bpf_insn *insns, size_t insns_cnt,
+				const char *license, __u32 kern_version,
+				char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
+				  const struct bpf_insn *insns,
+				  size_t insns_cnt, __u32 prog_flags,
+				  const char *license, __u32 kern_version,
+				  char *log_buf, size_t log_buf_sz,
+				  int log_level);
+
+LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
+				   __u64 flags);
+
+LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
+LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
+					 __u64 flags);
+LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
+					      void *value);
+LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_map_freeze(int fd);
+
+struct bpf_map_batch_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u64 elem_flags;
+	__u64 flags;
+};
+#define bpf_map_batch_opts__last_field flags
+
+LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
+				    __u32 *count,
+				    const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
+				    void *keys, void *values, __u32 *count,
+				    const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
+					void *out_batch, void *keys,
+					void *values, __u32 *count,
+					const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
+				    __u32 *count,
+				    const struct bpf_map_batch_opts *opts);
+
+LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_get(const char *pathname);
+
+struct bpf_prog_attach_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	unsigned int flags;
+	int replace_prog_fd;
+};
+#define bpf_prog_attach_opts__last_field replace_prog_fd
+
+LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
+			       enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
+				     enum bpf_attach_type type,
+				     const struct bpf_prog_attach_opts *opts);
+LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
+				enum bpf_attach_type type);
+
+union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
+struct bpf_link_create_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 flags;
+	union bpf_iter_link_info *iter_info;
+	__u32 iter_info_len;
+	__u32 target_btf_id;
+};
+#define bpf_link_create_opts__last_field target_btf_id
+
+LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
+			       enum bpf_attach_type attach_type,
+			       const struct bpf_link_create_opts *opts);
+
+LIBBPF_API int bpf_link_detach(int link_fd);
+
+struct bpf_link_update_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 flags;	   /* extra flags */
+	__u32 old_prog_fd; /* expected old program FD */
+};
+#define bpf_link_update_opts__last_field old_prog_fd
+
+LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
+			       const struct bpf_link_update_opts *opts);
+
+LIBBPF_API int bpf_iter_create(int link_fd);
+
+struct bpf_prog_test_run_attr {
+	int prog_fd;
+	int repeat;
+	const void *data_in;
+	__u32 data_size_in;
+	void *data_out;      /* optional */
+	__u32 data_size_out; /* in: max length of data_out
+			      * out: length of data_out */
+	__u32 retval;        /* out: return code of the BPF program */
+	__u32 duration;      /* out: average per repetition in ns */
+	const void *ctx_in; /* optional */
+	__u32 ctx_size_in;
+	void *ctx_out;      /* optional */
+	__u32 ctx_size_out; /* in: max length of ctx_out
+			     * out: length of cxt_out */
+};
+
+LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
+
+/*
+ * bpf_prog_test_run does not check that data_out is large enough. Consider
+ * using bpf_prog_test_run_xattr instead.
+ */
+LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
+				 __u32 size, void *data_out, __u32 *size_out,
+				 __u32 *retval, __u32 *duration);
+LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
+			      __u32 query_flags, __u32 *attach_flags,
+			      __u32 *prog_ids, __u32 *prog_cnt);
+LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
+			    __u32 log_buf_size, bool do_log);
+LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
+				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+				 __u64 *probe_offset, __u64 *probe_addr);
+
+#ifdef __cplusplus
+/* forward-declaring enums in C++ isn't compatible with pure C enums, so
+ * instead define bpf_enable_stats() as accepting int as an input
+ */
+LIBBPF_API int bpf_enable_stats(int type);
+#else
+enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
+LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+#endif
+
+struct bpf_prog_bind_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 flags;
+};
+#define bpf_prog_bind_opts__last_field flags
+
+LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd,
+				 const struct bpf_prog_bind_opts *opts);
+
+struct bpf_test_run_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	const void *data_in; /* optional */
+	void *data_out;      /* optional */
+	__u32 data_size_in;
+	__u32 data_size_out; /* in: max length of data_out
+			      * out: length of data_out
+			      */
+	const void *ctx_in; /* optional */
+	void *ctx_out;      /* optional */
+	__u32 ctx_size_in;
+	__u32 ctx_size_out; /* in: max length of ctx_out
+			     * out: length of cxt_out
+			     */
+	__u32 retval;        /* out: return code of the BPF program */
+	int repeat;
+	__u32 duration;      /* out: average per repetition in ns */
+	__u32 flags;
+	__u32 cpu;
+};
+#define bpf_test_run_opts__last_field cpu
+
+LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
+				      struct bpf_test_run_opts *opts);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_BPF_H */
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
new file mode 100644
index 000000000..f05cfc082
--- /dev/null
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_CORE_READ_H__
+#define __BPF_CORE_READ_H__
+
+/*
+ * enum bpf_field_info_kind is passed as a second argument into
+ * __builtin_preserve_field_info() built-in to get a specific aspect of
+ * a field, captured as a first argument. __builtin_preserve_field_info(field,
+ * info_kind) returns __u32 integer and produces BTF field relocation, which
+ * is understood and processed by libbpf during BPF object loading. See
+ * selftests/bpf for examples.
+ */
+enum bpf_field_info_kind {
+	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */
+	BPF_FIELD_BYTE_SIZE = 1,
+	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */
+	BPF_FIELD_SIGNED = 3,
+	BPF_FIELD_LSHIFT_U64 = 4,
+	BPF_FIELD_RSHIFT_U64 = 5,
+};
+
+/* second argument to __builtin_btf_type_id() built-in */
+enum bpf_type_id_kind {
+	BPF_TYPE_ID_LOCAL = 0,		/* BTF type ID in local program */
+	BPF_TYPE_ID_TARGET = 1,		/* BTF type ID in target kernel */
+};
+
+/* second argument to __builtin_preserve_type_info() built-in */
+enum bpf_type_info_kind {
+	BPF_TYPE_EXISTS = 0,		/* type existence in target kernel */
+	BPF_TYPE_SIZE = 1,		/* type size in target kernel */
+};
+
+/* second argument to __builtin_preserve_enum_value() built-in */
+enum bpf_enum_value_kind {
+	BPF_ENUMVAL_EXISTS = 0,		/* enum value existence in kernel */
+	BPF_ENUMVAL_VALUE = 1,		/* enum value value relocation */
+};
+
+#define __CORE_RELO(src, field, info)					      \
+	__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \
+	bpf_probe_read_kernel(						      \
+			(void *)dst,				      \
+			__CORE_RELO(src, fld, BYTE_SIZE),		      \
+			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+#else
+/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
+ * for big-endian we need to adjust destination pointer accordingly, based on
+ * field byte size
+ */
+#define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \
+	bpf_probe_read_kernel(						      \
+			(void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+			__CORE_RELO(src, fld, BYTE_SIZE),		      \
+			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+#endif
+
+/*
+ * Extract bitfield, identified by s->field, and return its value as u64.
+ * All this is done in relocatable manner, so bitfield changes such as
+ * signedness, bit size, offset changes, this will be handled automatically.
+ * This version of macro is using bpf_probe_read_kernel() to read underlying
+ * integer storage. Macro functions as an expression and its return type is
+ * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
+ */
+#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({			      \
+	unsigned long long val = 0;					      \
+									      \
+	__CORE_BITFIELD_PROBE_READ(&val, s, field);			      \
+	val <<= __CORE_RELO(s, field, LSHIFT_U64);			      \
+	if (__CORE_RELO(s, field, SIGNED))				      \
+		val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64);  \
+	else								      \
+		val = val >> __CORE_RELO(s, field, RSHIFT_U64);		      \
+	val;								      \
+})
+
+/*
+ * Extract bitfield, identified by s->field, and return its value as u64.
+ * This version of macro is using direct memory reads and should be used from
+ * BPF program types that support such functionality (e.g., typed raw
+ * tracepoints).
+ */
+#define BPF_CORE_READ_BITFIELD(s, field) ({				      \
+	const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
+	unsigned long long val;						      \
+									      \
+	/* This is a so-called barrier_var() operation that makes specified   \
+	 * variable "a black box" for optimizing compiler.		      \
+	 * It forces compiler to perform BYTE_OFFSET relocation on p and use  \
+	 * its calculated value in the switch below, instead of applying      \
+	 * the same relocation 4 times for each individual memory load.       \
+	 */								      \
+	asm volatile("" : "=r"(p) : "0"(p));				      \
+									      \
+	switch (__CORE_RELO(s, field, BYTE_SIZE)) {			      \
+	case 1: val = *(const unsigned char *)p; break;			      \
+	case 2: val = *(const unsigned short *)p; break;		      \
+	case 4: val = *(const unsigned int *)p; break;			      \
+	case 8: val = *(const unsigned long long *)p; break;		      \
+	}								      \
+	val <<= __CORE_RELO(s, field, LSHIFT_U64);			      \
+	if (__CORE_RELO(s, field, SIGNED))				      \
+		val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64);  \
+	else								      \
+		val = val >> __CORE_RELO(s, field, RSHIFT_U64);		      \
+	val;								      \
+})
+
+/*
+ * Convenience macro to check that field actually exists in target kernel's.
+ * Returns:
+ *    1, if matching field is present in target kernel;
+ *    0, if no matching field found.
+ */
+#define bpf_core_field_exists(field)					    \
+	__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
+
+/*
+ * Convenience macro to get the byte size of a field. Works for integers,
+ * struct/unions, pointers, arrays, and enums.
+ */
+#define bpf_core_field_size(field)					    \
+	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
+
+/*
+ * Convenience macro to get BTF type ID of a specified type, using a local BTF
+ * information. Return 32-bit unsigned integer with type ID from program's own
+ * BTF. Always succeeds.
+ */
+#define bpf_core_type_id_local(type)					    \
+	__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+
+/*
+ * Convenience macro to get BTF type ID of a target kernel's type that matches
+ * specified local type.
+ * Returns:
+ *    - valid 32-bit unsigned type ID in kernel BTF;
+ *    - 0, if no matching type was found in a target kernel BTF.
+ */
+#define bpf_core_type_id_kernel(type)					    \
+	__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+
+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) exists in a target kernel.
+ * Returns:
+ *    1, if such type is present in target kernel's BTF;
+ *    0, if no matching type is found.
+ */
+#define bpf_core_type_exists(type)					    \
+	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+
+/*
+ * Convenience macro to get the byte size of a provided named type
+ * (struct/union/enum/typedef) in a target kernel.
+ * Returns:
+ *    >= 0 size (in bytes), if type is present in target kernel's BTF;
+ *    0, if no matching type is found.
+ */
+#define bpf_core_type_size(type)					    \
+	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+
+/*
+ * Convenience macro to check that provided enumerator value is defined in
+ * a target kernel.
+ * Returns:
+ *    1, if specified enum type and its enumerator value are present in target
+ *    kernel's BTF;
+ *    0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value_exists(enum_type, enum_value)		    \
+	__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+
+/*
+ * Convenience macro to get the integer value of an enumerator value in
+ * a target kernel.
+ * Returns:
+ *    64-bit value, if specified enum type and its enumerator value are
+ *    present in target kernel's BTF;
+ *    0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value(enum_type, enum_value)			    \
+	__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+
+/*
+ * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
+ * offset relocation for source address using __builtin_preserve_access_index()
+ * built-in, provided by Clang.
+ *
+ * __builtin_preserve_access_index() takes as an argument an expression of
+ * taking an address of a field within struct/union. It makes compiler emit
+ * a relocation, which records BTF type ID describing root struct/union and an
+ * accessor string which describes exact embedded field that was used to take
+ * an address. See detailed description of this relocation format and
+ * semantics in comments to struct bpf_field_reloc in libbpf_internal.h.
+ *
+ * This relocation allows libbpf to adjust BPF instruction to use correct
+ * actual field offset, based on target kernel BTF type that matches original
+ * (local) BTF, used to record relocation.
+ */
+#define bpf_core_read(dst, sz, src)					    \
+	bpf_probe_read_kernel(dst, sz,					    \
+			      (const void *)__builtin_preserve_access_index(src))
+
+/*
+ * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
+ * additionally emitting BPF CO-RE field relocation for specified source
+ * argument.
+ */
+#define bpf_core_read_str(dst, sz, src)					    \
+	bpf_probe_read_kernel_str(dst, sz,				    \
+				  (const void *)__builtin_preserve_access_index(src))
+
+#define ___concat(a, b) a ## b
+#define ___apply(fn, n) ___concat(fn, n)
+#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N
+
+/*
+ * return number of provided arguments; used for switch-based variadic macro
+ * definitions (see ___last, ___arrow, etc below)
+ */
+#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+/*
+ * return 0 if no arguments are passed, N - otherwise; used for
+ * recursively-defined macros to specify termination (0) case, and generic
+ * (N) case (e.g., ___read_ptrs, ___core_read)
+ */
+#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___last1(x) x
+#define ___last2(a, x) x
+#define ___last3(a, b, x) x
+#define ___last4(a, b, c, x) x
+#define ___last5(a, b, c, d, x) x
+#define ___last6(a, b, c, d, e, x) x
+#define ___last7(a, b, c, d, e, f, x) x
+#define ___last8(a, b, c, d, e, f, g, x) x
+#define ___last9(a, b, c, d, e, f, g, h, x) x
+#define ___last10(a, b, c, d, e, f, g, h, i, x) x
+#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___nolast2(a, _) a
+#define ___nolast3(a, b, _) a, b
+#define ___nolast4(a, b, c, _) a, b, c
+#define ___nolast5(a, b, c, d, _) a, b, c, d
+#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e
+#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f
+#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g
+#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h
+#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i
+#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___arrow1(a) a
+#define ___arrow2(a, b) a->b
+#define ___arrow3(a, b, c) a->b->c
+#define ___arrow4(a, b, c, d) a->b->c->d
+#define ___arrow5(a, b, c, d, e) a->b->c->d->e
+#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f
+#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g
+#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h
+#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i
+#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
+#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___type(...) typeof(___arrow(__VA_ARGS__))
+
+#define ___read(read_fn, dst, src_type, src, accessor)			    \
+	read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
+
+/* "recursively" read a sequence of inner pointers using local __t var */
+#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a);
+#define ___rd_last(...)							    \
+	___read(bpf_core_read, &__t,					    \
+		___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__));
+#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__)
+#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___read_ptrs(src, ...)						    \
+	___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__)
+
+#define ___core_read0(fn, dst, src, a)					    \
+	___read(fn, dst, ___type(src), src, a);
+#define ___core_readN(fn, dst, src, ...)				    \
+	___read_ptrs(src, ___nolast(__VA_ARGS__))			    \
+	___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t,	    \
+		___last(__VA_ARGS__));
+#define ___core_read(fn, dst, src, a, ...)				    \
+	___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst,		    \
+						      src, a, ##__VA_ARGS__)
+
+/*
+ * BPF_CORE_READ_INTO() is a more performance-conscious variant of
+ * BPF_CORE_READ(), in which final field is read into user-provided storage.
+ * See BPF_CORE_READ() below for more details on general usage.
+ */
+#define BPF_CORE_READ_INTO(dst, src, a, ...)				    \
+	({								    \
+		___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__)   \
+	})
+
+/*
+ * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as
+ * BPF_CORE_READ() for intermediate pointers, but then executes (and returns
+ * corresponding error code) bpf_core_read_str() for final string read.
+ */
+#define BPF_CORE_READ_STR_INTO(dst, src, a, ...)			    \
+	({								    \
+		___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\
+	})
+
+/*
+ * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially
+ * when there are few pointer chasing steps.
+ * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like:
+ *	int x = s->a.b.c->d.e->f->g;
+ * can be succinctly achieved using BPF_CORE_READ as:
+ *	int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
+ *
+ * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
+ * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
+ * equivalent to:
+ * 1. const void *__t = s->a.b.c;
+ * 2. __t = __t->d.e;
+ * 3. __t = __t->f;
+ * 4. return __t->g;
+ *
+ * Equivalence is logical, because there is a heavy type casting/preservation
+ * involved, as well as all the reads are happening through
+ * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
+ * emit CO-RE relocations.
+ *
+ * N.B. Only up to 9 "field accessors" are supported, which should be more
+ * than enough for any practical purpose.
+ */
+#define BPF_CORE_READ(src, a, ...)					    \
+	({								    \
+		___type((src), a, ##__VA_ARGS__) __r;			    \
+		BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__);	    \
+		__r;							    \
+	})
+
+#endif
+
diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h
new file mode 100644
index 000000000..ec9db4fec
--- /dev/null
+++ b/tools/lib/bpf/bpf_endian.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+/*
+ * Isolate byte #n and put it into byte #m, for __u##b type.
+ * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
+ * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
+ * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
+ * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
+ * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
+ */
+#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))
+
+#define ___bpf_swab16(x) ((__u16)(			\
+			  ___bpf_mvb(x, 16, 0, 1) |	\
+			  ___bpf_mvb(x, 16, 1, 0)))
+
+#define ___bpf_swab32(x) ((__u32)(			\
+			  ___bpf_mvb(x, 32, 0, 3) |	\
+			  ___bpf_mvb(x, 32, 1, 2) |	\
+			  ___bpf_mvb(x, 32, 2, 1) |	\
+			  ___bpf_mvb(x, 32, 3, 0)))
+
+#define ___bpf_swab64(x) ((__u64)(			\
+			  ___bpf_mvb(x, 64, 0, 7) |	\
+			  ___bpf_mvb(x, 64, 1, 6) |	\
+			  ___bpf_mvb(x, 64, 2, 5) |	\
+			  ___bpf_mvb(x, 64, 3, 4) |	\
+			  ___bpf_mvb(x, 64, 4, 3) |	\
+			  ___bpf_mvb(x, 64, 5, 2) |	\
+			  ___bpf_mvb(x, 64, 6, 1) |	\
+			  ___bpf_mvb(x, 64, 7, 0)))
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x)			__builtin_bswap16(x)
+# define __bpf_htons(x)			__builtin_bswap16(x)
+# define __bpf_constant_ntohs(x)	___bpf_swab16(x)
+# define __bpf_constant_htons(x)	___bpf_swab16(x)
+# define __bpf_ntohl(x)			__builtin_bswap32(x)
+# define __bpf_htonl(x)			__builtin_bswap32(x)
+# define __bpf_constant_ntohl(x)	___bpf_swab32(x)
+# define __bpf_constant_htonl(x)	___bpf_swab32(x)
+# define __bpf_be64_to_cpu(x)		__builtin_bswap64(x)
+# define __bpf_cpu_to_be64(x)		__builtin_bswap64(x)
+# define __bpf_constant_be64_to_cpu(x)	___bpf_swab64(x)
+# define __bpf_constant_cpu_to_be64(x)	___bpf_swab64(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x)			(x)
+# define __bpf_htons(x)			(x)
+# define __bpf_constant_ntohs(x)	(x)
+# define __bpf_constant_htons(x)	(x)
+# define __bpf_ntohl(x)			(x)
+# define __bpf_htonl(x)			(x)
+# define __bpf_constant_ntohl(x)	(x)
+# define __bpf_constant_htonl(x)	(x)
+# define __bpf_be64_to_cpu(x)		(x)
+# define __bpf_cpu_to_be64(x)		(x)
+# define __bpf_constant_be64_to_cpu(x)  (x)
+# define __bpf_constant_cpu_to_be64(x)  (x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+#define bpf_cpu_to_be64(x)			\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
+#define bpf_be64_to_cpu(x)			\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
new file mode 100644
index 000000000..1c389b0f5
--- /dev/null
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_HELPERS__
+#define __BPF_HELPERS__
+
+/*
+ * Note that bpf programs need to include either
+ * vmlinux.h (auto-generated from BTF) or linux/types.h
+ * in advance since bpf_helper_defs.h uses such types
+ * as __u64.
+ */
+#include "bpf_helper_defs.h"
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
+
+/* Helper macro to print out debug messages */
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			 ##__VA_ARGS__);		\
+})
+
+/*
+ * Helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#ifndef __always_inline
+#define __always_inline __attribute__((always_inline))
+#endif
+#ifndef __noinline
+#define __noinline __attribute__((noinline))
+#endif
+#ifndef __weak
+#define __weak __attribute__((weak))
+#endif
+
+/*
+ * Helper macro to manipulate data structures
+ */
+
+/* offsetof() definition that uses __builtin_offset() might not preserve field
+ * offset CO-RE relocation properly, so force-redefine offsetof() using
+ * old-school approach which works with CO-RE correctly
+ */
+#undef offsetof
+#define offsetof(type, member)	((unsigned long)&((type *)0)->member)
+
+/* redefined container_of() to ensure we use the above offsetof() macro */
+#undef container_of
+#define container_of(ptr, type, member)				\
+	({							\
+		void *__mptr = (void *)(ptr);			\
+		((type *)(__mptr - offsetof(type, member)));	\
+	})
+
+/*
+ * Helper macro to throw a compilation error if __bpf_unreachable() gets
+ * built into the resulting code. This works given BPF back end does not
+ * implement __builtin_trap(). This is useful to assert that certain paths
+ * of the program code are never used and hence eliminated by the compiler.
+ *
+ * For example, consider a switch statement that covers known cases used by
+ * the program. __bpf_unreachable() can then reside in the default case. If
+ * the program gets extended such that a case is not covered in the switch
+ * statement, then it will throw a build error due to the default case not
+ * being compiled out.
+ */
+#ifndef __bpf_unreachable
+# define __bpf_unreachable()	__builtin_trap()
+#endif
+
+/*
+ * Helper function to perform a tail call with a constant/immediate map slot.
+ */
+#if __clang_major__ >= 8 && defined(__bpf__)
+static __always_inline void
+bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
+{
+	if (!__builtin_constant_p(slot))
+		__bpf_unreachable();
+
+	/*
+	 * Provide a hard guarantee that LLVM won't optimize setting r2 (map
+	 * pointer) and r3 (constant map index) from _different paths_ ending
+	 * up at the _same_ call insn as otherwise we won't be able to use the
+	 * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
+	 * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
+	 * tracking for prog array pokes") for details on verifier tracking.
+	 *
+	 * Note on clobber list: we need to stay in-line with BPF calling
+	 * convention, so even if we don't end up using r0, r4, r5, we need
+	 * to mark them as clobber so that LLVM doesn't end up using them
+	 * before / after the call.
+	 */
+	asm volatile("r1 = %[ctx]\n\t"
+		     "r2 = %[map]\n\t"
+		     "r3 = %[slot]\n\t"
+		     "call 12"
+		     :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
+		     : "r0", "r1", "r2", "r3", "r4", "r5");
+}
+#endif
+
+/*
+ * Helper structure used by eBPF C program
+ * to describe BPF map attributes to libbpf loader
+ */
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+	unsigned int map_flags;
+};
+
+enum libbpf_pin_type {
+	LIBBPF_PIN_NONE,
+	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+	LIBBPF_PIN_BY_NAME,
+};
+
+enum libbpf_tristate {
+	TRI_NO = 0,
+	TRI_YES = 1,
+	TRI_MODULE = 2,
+};
+
+#define __kconfig __attribute__((section(".kconfig")))
+#define __ksym __attribute__((section(".ksyms")))
+
+#endif
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
new file mode 100644
index 000000000..3ed1a27b5
--- /dev/null
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <string.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+struct bpf_prog_linfo {
+	void *raw_linfo;
+	void *raw_jited_linfo;
+	__u32 *nr_jited_linfo_per_func;
+	__u32 *jited_linfo_func_idx;
+	__u32 nr_linfo;
+	__u32 nr_jited_func;
+	__u32 rec_size;
+	__u32 jited_rec_size;
+};
+
+static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo,
+			      const __u64 *ksym_func, const __u32 *ksym_len)
+{
+	__u32 nr_jited_func, nr_linfo;
+	const void *raw_jited_linfo;
+	const __u64 *jited_linfo;
+	__u64 last_jited_linfo;
+	/*
+	 * Index to raw_jited_linfo:
+	 *      i: Index for searching the next ksym_func
+	 * prev_i: Index to the last found ksym_func
+	 */
+	__u32 i, prev_i;
+	__u32 f; /* Index to ksym_func */
+
+	raw_jited_linfo = prog_linfo->raw_jited_linfo;
+	jited_linfo = raw_jited_linfo;
+	if (ksym_func[0] != *jited_linfo)
+		goto errout;
+
+	prog_linfo->jited_linfo_func_idx[0] = 0;
+	nr_jited_func = prog_linfo->nr_jited_func;
+	nr_linfo = prog_linfo->nr_linfo;
+
+	for (prev_i = 0, i = 1, f = 1;
+	     i < nr_linfo && f < nr_jited_func;
+	     i++) {
+		raw_jited_linfo += prog_linfo->jited_rec_size;
+		last_jited_linfo = *jited_linfo;
+		jited_linfo = raw_jited_linfo;
+
+		if (ksym_func[f] == *jited_linfo) {
+			prog_linfo->jited_linfo_func_idx[f] = i;
+
+			/* Sanity check */
+			if (last_jited_linfo - ksym_func[f - 1] + 1 >
+			    ksym_len[f - 1])
+				goto errout;
+
+			prog_linfo->nr_jited_linfo_per_func[f - 1] =
+				i - prev_i;
+			prev_i = i;
+
+			/*
+			 * The ksym_func[f] is found in jited_linfo.
+			 * Look for the next one.
+			 */
+			f++;
+		} else if (*jited_linfo <= last_jited_linfo) {
+			/* Ensure the addr is increasing _within_ a func */
+			goto errout;
+		}
+	}
+
+	if (f != nr_jited_func)
+		goto errout;
+
+	prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] =
+		nr_linfo - prev_i;
+
+	return 0;
+
+errout:
+	return -EINVAL;
+}
+
+void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo)
+{
+	if (!prog_linfo)
+		return;
+
+	free(prog_linfo->raw_linfo);
+	free(prog_linfo->raw_jited_linfo);
+	free(prog_linfo->nr_jited_linfo_per_func);
+	free(prog_linfo->jited_linfo_func_idx);
+	free(prog_linfo);
+}
+
+struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
+{
+	struct bpf_prog_linfo *prog_linfo;
+	__u32 nr_linfo, nr_jited_func;
+	__u64 data_sz;
+
+	nr_linfo = info->nr_line_info;
+
+	if (!nr_linfo)
+		return NULL;
+
+	/*
+	 * The min size that bpf_prog_linfo has to access for
+	 * searching purpose.
+	 */
+	if (info->line_info_rec_size <
+	    offsetof(struct bpf_line_info, file_name_off))
+		return NULL;
+
+	prog_linfo = calloc(1, sizeof(*prog_linfo));
+	if (!prog_linfo)
+		return NULL;
+
+	/* Copy xlated line_info */
+	prog_linfo->nr_linfo = nr_linfo;
+	prog_linfo->rec_size = info->line_info_rec_size;
+	data_sz = (__u64)nr_linfo * prog_linfo->rec_size;
+	prog_linfo->raw_linfo = malloc(data_sz);
+	if (!prog_linfo->raw_linfo)
+		goto err_free;
+	memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz);
+
+	nr_jited_func = info->nr_jited_ksyms;
+	if (!nr_jited_func ||
+	    !info->jited_line_info ||
+	    info->nr_jited_line_info != nr_linfo ||
+	    info->jited_line_info_rec_size < sizeof(__u64) ||
+	    info->nr_jited_func_lens != nr_jited_func ||
+	    !info->jited_ksyms ||
+	    !info->jited_func_lens)
+		/* Not enough info to provide jited_line_info */
+		return prog_linfo;
+
+	/* Copy jited_line_info */
+	prog_linfo->nr_jited_func = nr_jited_func;
+	prog_linfo->jited_rec_size = info->jited_line_info_rec_size;
+	data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size;
+	prog_linfo->raw_jited_linfo = malloc(data_sz);
+	if (!prog_linfo->raw_jited_linfo)
+		goto err_free;
+	memcpy(prog_linfo->raw_jited_linfo,
+	       (void *)(long)info->jited_line_info, data_sz);
+
+	/* Number of jited_line_info per jited func */
+	prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func *
+						     sizeof(__u32));
+	if (!prog_linfo->nr_jited_linfo_per_func)
+		goto err_free;
+
+	/*
+	 * For each jited func,
+	 * the start idx to the "linfo" and "jited_linfo" array,
+	 */
+	prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func *
+						  sizeof(__u32));
+	if (!prog_linfo->jited_linfo_func_idx)
+		goto err_free;
+
+	if (dissect_jited_func(prog_linfo,
+			       (__u64 *)(long)info->jited_ksyms,
+			       (__u32 *)(long)info->jited_func_lens))
+		goto err_free;
+
+	return prog_linfo;
+
+err_free:
+	bpf_prog_linfo__free(prog_linfo);
+	return NULL;
+}
+
+const struct bpf_line_info *
+bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
+				__u64 addr, __u32 func_idx, __u32 nr_skip)
+{
+	__u32 jited_rec_size, rec_size, nr_linfo, start, i;
+	const void *raw_jited_linfo, *raw_linfo;
+	const __u64 *jited_linfo;
+
+	if (func_idx >= prog_linfo->nr_jited_func)
+		return NULL;
+
+	nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
+	if (nr_skip >= nr_linfo)
+		return NULL;
+
+	start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
+	jited_rec_size = prog_linfo->jited_rec_size;
+	raw_jited_linfo = prog_linfo->raw_jited_linfo +
+		(start * jited_rec_size);
+	jited_linfo = raw_jited_linfo;
+	if (addr < *jited_linfo)
+		return NULL;
+
+	nr_linfo -= nr_skip;
+	rec_size = prog_linfo->rec_size;
+	raw_linfo = prog_linfo->raw_linfo + (start * rec_size);
+	for (i = 0; i < nr_linfo; i++) {
+		if (addr < *jited_linfo)
+			break;
+
+		raw_linfo += rec_size;
+		raw_jited_linfo += jited_rec_size;
+		jited_linfo = raw_jited_linfo;
+	}
+
+	return raw_linfo - rec_size;
+}
+
+const struct bpf_line_info *
+bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
+		      __u32 insn_off, __u32 nr_skip)
+{
+	const struct bpf_line_info *linfo;
+	__u32 rec_size, nr_linfo, i;
+	const void *raw_linfo;
+
+	nr_linfo = prog_linfo->nr_linfo;
+	if (nr_skip >= nr_linfo)
+		return NULL;
+
+	rec_size = prog_linfo->rec_size;
+	raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
+	linfo = raw_linfo;
+	if (insn_off < linfo->insn_off)
+		return NULL;
+
+	nr_linfo -= nr_skip;
+	for (i = 0; i < nr_linfo; i++) {
+		if (insn_off < linfo->insn_off)
+			break;
+
+		raw_linfo += rec_size;
+		linfo = raw_linfo;
+	}
+
+	return raw_linfo - rec_size;
+}
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
new file mode 100644
index 000000000..1c2e91ee0
--- /dev/null
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_H__
+#define __BPF_TRACING_H__
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+	#define bpf_target_x86
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_s390)
+	#define bpf_target_s390
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm)
+	#define bpf_target_arm
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+	#define bpf_target_arm64
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+	#define bpf_target_mips
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+	#define bpf_target_powerpc
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+	#define bpf_target_sparc
+	#define bpf_target_defined
+#else
+	#undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+	#define bpf_target_x86
+#elif defined(__s390__)
+	#define bpf_target_s390
+#elif defined(__arm__)
+	#define bpf_target_arm
+#elif defined(__aarch64__)
+	#define bpf_target_arm64
+#elif defined(__mips__)
+	#define bpf_target_mips
+#elif defined(__powerpc__)
+	#define bpf_target_powerpc
+#elif defined(__sparc__)
+	#define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#if defined(__KERNEL__) || defined(__VMLINUX_H__)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
+
+#else
+
+#ifdef __i386__
+/* i386 kernel is built with -mregparm=3 */
+#define PT_REGS_PARM1(x) ((x)->eax)
+#define PT_REGS_PARM2(x) ((x)->edx)
+#define PT_REGS_PARM3(x) ((x)->ecx)
+#define PT_REGS_PARM4(x) 0
+#define PT_REGS_PARM5(x) 0
+#define PT_REGS_RET(x) ((x)->esp)
+#define PT_REGS_FP(x) ((x)->ebp)
+#define PT_REGS_RC(x) ((x)->eax)
+#define PT_REGS_SP(x) ((x)->esp)
+#define PT_REGS_IP(x) ((x)->eip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
+#define PT_REGS_PARM4_CORE(x) 0
+#define PT_REGS_PARM5_CORE(x) 0
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
+
+#else
+
+#define PT_REGS_PARM1(x) ((x)->rdi)
+#define PT_REGS_PARM2(x) ((x)->rsi)
+#define PT_REGS_PARM3(x) ((x)->rdx)
+#define PT_REGS_PARM4(x) ((x)->rcx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->rsp)
+#define PT_REGS_FP(x) ((x)->rbp)
+#define PT_REGS_RC(x) ((x)->rax)
+#define PT_REGS_SP(x) ((x)->rsp)
+#define PT_REGS_IP(x) ((x)->rip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
+
+#endif
+#endif
+
+#elif defined(bpf_target_s390)
+
+/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_S390 const volatile user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
+#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
+#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
+#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
+#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
+#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
+#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr)
+
+#elif defined(bpf_target_arm)
+
+#define PT_REGS_PARM1(x) ((x)->uregs[0])
+#define PT_REGS_PARM2(x) ((x)->uregs[1])
+#define PT_REGS_PARM3(x) ((x)->uregs[2])
+#define PT_REGS_PARM4(x) ((x)->uregs[3])
+#define PT_REGS_PARM5(x) ((x)->uregs[4])
+#define PT_REGS_RET(x) ((x)->uregs[14])
+#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->uregs[0])
+#define PT_REGS_SP(x) ((x)->uregs[13])
+#define PT_REGS_IP(x) ((x)->uregs[12])
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
+
+#elif defined(bpf_target_arm64)
+
+/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_ARM64 const volatile struct user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
+#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
+#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
+#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
+#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
+#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
+#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[2])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
+#endif
+
+#endif
+
+#if defined(bpf_target_powerpc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#elif defined(bpf_target_sparc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \
+	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \
+	({ bpf_probe_read_kernel(&(ip), sizeof(ip),			    \
+			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#define ___bpf_concat(a, b) a ## b
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#define ___bpf_narg(...) \
+	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_empty(...) \
+	___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
+#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
+#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
+#define ___bpf_ctx_cast(args...) \
+	___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+
+/*
+ * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
+ * similar kinds of BPF programs, that accept input arguments as a single
+ * pointer to untyped u64 array, where each u64 can actually be a typed
+ * pointer or integer of different size. Instead of requring user to write
+ * manual casts and work with array elements by index, BPF_PROG macro
+ * allows user to declare a list of named and typed input arguments in the
+ * same syntax as for normal C function. All the casting is hidden and
+ * performed transparently, while user code can just assume working with
+ * function arguments of specified type and name.
+ *
+ * Original raw context argument is preserved as well as 'ctx' argument.
+ * This is useful when using BPF helpers that expect original context
+ * as one of the parameters (e.g., for bpf_perf_event_output()).
+ */
+#define BPF_PROG(name, args...)						    \
+name(unsigned long long *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(unsigned long long *ctx, ##args);				    \
+typeof(name(0)) name(unsigned long long *ctx)				    \
+{									    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_ctx_cast(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(unsigned long long *ctx, ##args)
+
+struct pt_regs;
+
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) \
+	___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) \
+	___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) \
+	___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) \
+	___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) \
+	___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...) \
+	___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
+ * low-level way of getting kprobe input arguments from struct pt_regs, and
+ * provides a familiar typed and named function arguments syntax and
+ * semantics of accessing kprobe input paremeters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ */
+#define BPF_KPROBE(name, args...)					    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_kprobe_args(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args)
+
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_args1(x) \
+	___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
+#define ___bpf_kretprobe_args(args...) \
+	___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
+ * return value (in addition to `struct pt_regs *ctx`), but no input
+ * arguments, because they will be clobbered by the time probed function
+ * returns.
+ */
+#define BPF_KRETPROBE(name, args...)					    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_kretprobe_args(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+	___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...)			\
+({								\
+	static const char ___fmt[] = fmt;			\
+	unsigned long long ___param[___bpf_narg(args)];		\
+								\
+	_Pragma("GCC diagnostic push")				\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
+	___bpf_fill(___param, args);				\
+	_Pragma("GCC diagnostic pop")				\
+								\
+	bpf_seq_printf(seq, ___fmt, sizeof(___fmt),		\
+		       ___param, sizeof(___param));		\
+})
+
+#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
new file mode 100644
index 000000000..f7c48b1fb
--- /dev/null
+++ b/tools/lib/bpf/btf.c
@@ -0,0 +1,4484 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/utsname.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <gelf.h>
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+#define BTF_MAX_NR_TYPES 0x7fffffffU
+#define BTF_MAX_STR_OFFSET 0x7fffffffU
+
+static struct btf_type btf_void;
+
+struct btf {
+	/* raw BTF data in native endianness */
+	void *raw_data;
+	/* raw BTF data in non-native endianness */
+	void *raw_data_swapped;
+	__u32 raw_size;
+	/* whether target endianness differs from the native one */
+	bool swapped_endian;
+
+	/*
+	 * When BTF is loaded from an ELF or raw memory it is stored
+	 * in a contiguous memory block. The hdr, type_data, and, strs_data
+	 * point inside that memory region to their respective parts of BTF
+	 * representation:
+	 *
+	 * +--------------------------------+
+	 * |  Header  |  Types  |  Strings  |
+	 * +--------------------------------+
+	 * ^          ^         ^
+	 * |          |         |
+	 * hdr        |         |
+	 * types_data-+         |
+	 * strs_data------------+
+	 *
+	 * If BTF data is later modified, e.g., due to types added or
+	 * removed, BTF deduplication performed, etc, this contiguous
+	 * representation is broken up into three independently allocated
+	 * memory regions to be able to modify them independently.
+	 * raw_data is nulled out at that point, but can be later allocated
+	 * and cached again if user calls btf__get_raw_data(), at which point
+	 * raw_data will contain a contiguous copy of header, types, and
+	 * strings:
+	 *
+	 * +----------+  +---------+  +-----------+
+	 * |  Header  |  |  Types  |  |  Strings  |
+	 * +----------+  +---------+  +-----------+
+	 * ^             ^            ^
+	 * |             |            |
+	 * hdr           |            |
+	 * types_data----+            |
+	 * strs_data------------------+
+	 *
+	 *               +----------+---------+-----------+
+	 *               |  Header  |  Types  |  Strings  |
+	 * raw_data----->+----------+---------+-----------+
+	 */
+	struct btf_header *hdr;
+
+	void *types_data;
+	size_t types_data_cap; /* used size stored in hdr->type_len */
+
+	/* type ID to `struct btf_type *` lookup index */
+	__u32 *type_offs;
+	size_t type_offs_cap;
+	__u32 nr_types;
+
+	void *strs_data;
+	size_t strs_data_cap; /* used size stored in hdr->str_len */
+
+	/* lookup index for each unique string in strings section */
+	struct hashmap *strs_hash;
+	/* whether strings are already deduplicated */
+	bool strs_deduped;
+	/* BTF object FD, if loaded into kernel */
+	int fd;
+
+	/* Pointer size (in bytes) for a target architecture of this BTF */
+	int ptr_sz;
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
+/* Ensure given dynamically allocated memory region pointed to by *data* with
+ * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
+ * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements
+ * are already used. At most *max_cnt* elements can be ever allocated.
+ * If necessary, memory is reallocated and all existing data is copied over,
+ * new pointer to the memory region is stored at *data, new memory region
+ * capacity (in number of elements) is stored in *cap.
+ * On success, memory pointer to the beginning of unused memory is returned.
+ * On error, NULL is returned.
+ */
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+		  size_t cur_cnt, size_t max_cnt, size_t add_cnt)
+{
+	size_t new_cnt;
+	void *new_data;
+
+	if (cur_cnt + add_cnt <= *cap_cnt)
+		return *data + cur_cnt * elem_sz;
+
+	/* requested more than the set limit */
+	if (cur_cnt + add_cnt > max_cnt)
+		return NULL;
+
+	new_cnt = *cap_cnt;
+	new_cnt += new_cnt / 4;		  /* expand by 25% */
+	if (new_cnt < 16)		  /* but at least 16 elements */
+		new_cnt = 16;
+	if (new_cnt > max_cnt)		  /* but not exceeding a set limit */
+		new_cnt = max_cnt;
+	if (new_cnt < cur_cnt + add_cnt)  /* also ensure we have enough memory */
+		new_cnt = cur_cnt + add_cnt;
+
+	new_data = libbpf_reallocarray(*data, new_cnt, elem_sz);
+	if (!new_data)
+		return NULL;
+
+	/* zero out newly allocated portion of memory */
+	memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz);
+
+	*data = new_data;
+	*cap_cnt = new_cnt;
+	return new_data + cur_cnt * elem_sz;
+}
+
+/* Ensure given dynamically allocated memory region has enough allocated space
+ * to accommodate *need_cnt* elements of size *elem_sz* bytes each
+ */
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+{
+	void *p;
+
+	if (need_cnt <= *cap_cnt)
+		return 0;
+
+	p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+	if (!p)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
+{
+	__u32 *p;
+
+	p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+			btf->nr_types + 1, BTF_MAX_NR_TYPES, 1);
+	if (!p)
+		return -ENOMEM;
+
+	*p = type_off;
+	return 0;
+}
+
+static void btf_bswap_hdr(struct btf_header *h)
+{
+	h->magic = bswap_16(h->magic);
+	h->hdr_len = bswap_32(h->hdr_len);
+	h->type_off = bswap_32(h->type_off);
+	h->type_len = bswap_32(h->type_len);
+	h->str_off = bswap_32(h->str_off);
+	h->str_len = bswap_32(h->str_len);
+}
+
+static int btf_parse_hdr(struct btf *btf)
+{
+	struct btf_header *hdr = btf->hdr;
+	__u32 meta_left;
+
+	if (btf->raw_size < sizeof(struct btf_header)) {
+		pr_debug("BTF header not found\n");
+		return -EINVAL;
+	}
+
+	if (hdr->magic == bswap_16(BTF_MAGIC)) {
+		btf->swapped_endian = true;
+		if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) {
+			pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n",
+				bswap_32(hdr->hdr_len));
+			return -ENOTSUP;
+		}
+		btf_bswap_hdr(hdr);
+	} else if (hdr->magic != BTF_MAGIC) {
+		pr_debug("Invalid BTF magic: %x\n", hdr->magic);
+		return -EINVAL;
+	}
+
+	if (btf->raw_size < hdr->hdr_len) {
+		pr_debug("BTF header len %u larger than data size %u\n",
+			 hdr->hdr_len, btf->raw_size);
+		return -EINVAL;
+	}
+
+	meta_left = btf->raw_size - hdr->hdr_len;
+	if (meta_left < (long long)hdr->str_off + hdr->str_len) {
+		pr_debug("Invalid BTF total size: %u\n", btf->raw_size);
+		return -EINVAL;
+	}
+
+	if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) {
+		pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
+			 hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
+		return -EINVAL;
+	}
+
+	if (hdr->type_off % 4) {
+		pr_debug("BTF type section is not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_parse_str_sec(struct btf *btf)
+{
+	const struct btf_header *hdr = btf->hdr;
+	const char *start = btf->strs_data;
+	const char *end = start + btf->hdr->str_len;
+
+	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
+	    start[0] || end[-1]) {
+		pr_debug("Invalid BTF string section\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_type_size(const struct btf_type *t)
+{
+	const int base_size = sizeof(struct btf_type);
+	__u16 vlen = btf_vlen(t);
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_FWD:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		return base_size;
+	case BTF_KIND_INT:
+		return base_size + sizeof(__u32);
+	case BTF_KIND_ENUM:
+		return base_size + vlen * sizeof(struct btf_enum);
+	case BTF_KIND_ARRAY:
+		return base_size + sizeof(struct btf_array);
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		return base_size + vlen * sizeof(struct btf_member);
+	case BTF_KIND_FUNC_PROTO:
+		return base_size + vlen * sizeof(struct btf_param);
+	case BTF_KIND_VAR:
+		return base_size + sizeof(struct btf_var);
+	case BTF_KIND_DATASEC:
+		return base_size + vlen * sizeof(struct btf_var_secinfo);
+	default:
+		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+		return -EINVAL;
+	}
+}
+
+static void btf_bswap_type_base(struct btf_type *t)
+{
+	t->name_off = bswap_32(t->name_off);
+	t->info = bswap_32(t->info);
+	t->type = bswap_32(t->type);
+}
+
+static int btf_bswap_type_rest(struct btf_type *t)
+{
+	struct btf_var_secinfo *v;
+	struct btf_member *m;
+	struct btf_array *a;
+	struct btf_param *p;
+	struct btf_enum *e;
+	__u16 vlen = btf_vlen(t);
+	int i;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_FWD:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		return 0;
+	case BTF_KIND_INT:
+		*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
+		return 0;
+	case BTF_KIND_ENUM:
+		for (i = 0, e = btf_enum(t); i < vlen; i++, e++) {
+			e->name_off = bswap_32(e->name_off);
+			e->val = bswap_32(e->val);
+		}
+		return 0;
+	case BTF_KIND_ARRAY:
+		a = btf_array(t);
+		a->type = bswap_32(a->type);
+		a->index_type = bswap_32(a->index_type);
+		a->nelems = bswap_32(a->nelems);
+		return 0;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		for (i = 0, m = btf_members(t); i < vlen; i++, m++) {
+			m->name_off = bswap_32(m->name_off);
+			m->type = bswap_32(m->type);
+			m->offset = bswap_32(m->offset);
+		}
+		return 0;
+	case BTF_KIND_FUNC_PROTO:
+		for (i = 0, p = btf_params(t); i < vlen; i++, p++) {
+			p->name_off = bswap_32(p->name_off);
+			p->type = bswap_32(p->type);
+		}
+		return 0;
+	case BTF_KIND_VAR:
+		btf_var(t)->linkage = bswap_32(btf_var(t)->linkage);
+		return 0;
+	case BTF_KIND_DATASEC:
+		for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) {
+			v->type = bswap_32(v->type);
+			v->offset = bswap_32(v->offset);
+			v->size = bswap_32(v->size);
+		}
+		return 0;
+	default:
+		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+		return -EINVAL;
+	}
+}
+
+static int btf_parse_type_sec(struct btf *btf)
+{
+	struct btf_header *hdr = btf->hdr;
+	void *next_type = btf->types_data;
+	void *end_type = next_type + hdr->type_len;
+	int err, i = 0, type_size;
+
+	/* VOID (type_id == 0) is specially handled by btf__get_type_by_id(),
+	 * so ensure we can never properly use its offset from index by
+	 * setting it to a large value
+	 */
+	err = btf_add_type_idx_entry(btf, UINT_MAX);
+	if (err)
+		return err;
+
+	while (next_type + sizeof(struct btf_type) <= end_type) {
+		i++;
+
+		if (btf->swapped_endian)
+			btf_bswap_type_base(next_type);
+
+		type_size = btf_type_size(next_type);
+		if (type_size < 0)
+			return type_size;
+		if (next_type + type_size > end_type) {
+			pr_warn("BTF type [%d] is malformed\n", i);
+			return -EINVAL;
+		}
+
+		if (btf->swapped_endian && btf_bswap_type_rest(next_type))
+			return -EINVAL;
+
+		err = btf_add_type_idx_entry(btf, next_type - btf->types_data);
+		if (err)
+			return err;
+
+		next_type += type_size;
+		btf->nr_types++;
+	}
+
+	if (next_type != end_type) {
+		pr_warn("BTF types data is malformed\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+__u32 btf__get_nr_types(const struct btf *btf)
+{
+	return btf->nr_types;
+}
+
+/* internal helper returning non-const pointer to a type */
+static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+{
+	if (type_id == 0)
+		return &btf_void;
+
+	return btf->types_data + btf->type_offs[type_id];
+}
+
+const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
+{
+	if (type_id > btf->nr_types)
+		return NULL;
+	return btf_type_by_id((struct btf *)btf, type_id);
+}
+
+static int determine_ptr_size(const struct btf *btf)
+{
+	const struct btf_type *t;
+	const char *name;
+	int i;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		t = btf__type_by_id(btf, i);
+		if (!btf_is_int(t))
+			continue;
+
+		name = btf__name_by_offset(btf, t->name_off);
+		if (!name)
+			continue;
+
+		if (strcmp(name, "long int") == 0 ||
+		    strcmp(name, "long unsigned int") == 0) {
+			if (t->size != 4 && t->size != 8)
+				continue;
+			return t->size;
+		}
+	}
+
+	return -1;
+}
+
+static size_t btf_ptr_sz(const struct btf *btf)
+{
+	if (!btf->ptr_sz)
+		((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+	return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz;
+}
+
+/* Return pointer size this BTF instance assumes. The size is heuristically
+ * determined by looking for 'long' or 'unsigned long' integer type and
+ * recording its size in bytes. If BTF type information doesn't have any such
+ * type, this function returns 0. In the latter case, native architecture's
+ * pointer size is assumed, so will be either 4 or 8, depending on
+ * architecture that libbpf was compiled for. It's possible to override
+ * guessed value by using btf__set_pointer_size() API.
+ */
+size_t btf__pointer_size(const struct btf *btf)
+{
+	if (!btf->ptr_sz)
+		((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+
+	if (btf->ptr_sz < 0)
+		/* not enough BTF type info to guess */
+		return 0;
+
+	return btf->ptr_sz;
+}
+
+/* Override or set pointer size in bytes. Only values of 4 and 8 are
+ * supported.
+ */
+int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
+{
+	if (ptr_sz != 4 && ptr_sz != 8)
+		return -EINVAL;
+	btf->ptr_sz = ptr_sz;
+	return 0;
+}
+
+static bool is_host_big_endian(void)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	return false;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+	return true;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+enum btf_endianness btf__endianness(const struct btf *btf)
+{
+	if (is_host_big_endian())
+		return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN;
+	else
+		return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN;
+}
+
+int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
+{
+	if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
+		return -EINVAL;
+
+	btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
+	if (!btf->swapped_endian) {
+		free(btf->raw_data_swapped);
+		btf->raw_data_swapped = NULL;
+	}
+	return 0;
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+	return t == &btf_void || btf_is_fwd(t);
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+	return !t || btf_type_is_void(t);
+}
+
+#define MAX_RESOLVE_DEPTH 32
+
+__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
+{
+	const struct btf_array *array;
+	const struct btf_type *t;
+	__u32 nelems = 1;
+	__s64 size = -1;
+	int i;
+
+	t = btf__type_by_id(btf, type_id);
+	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
+	     i++) {
+		switch (btf_kind(t)) {
+		case BTF_KIND_INT:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_DATASEC:
+			size = t->size;
+			goto done;
+		case BTF_KIND_PTR:
+			size = btf_ptr_sz(btf);
+			goto done;
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_VAR:
+			type_id = t->type;
+			break;
+		case BTF_KIND_ARRAY:
+			array = btf_array(t);
+			if (nelems && array->nelems > UINT32_MAX / nelems)
+				return -E2BIG;
+			nelems *= array->nelems;
+			type_id = array->type;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		t = btf__type_by_id(btf, type_id);
+	}
+
+done:
+	if (size < 0)
+		return -EINVAL;
+	if (nelems && size > UINT32_MAX / nelems)
+		return -E2BIG;
+
+	return nelems * size;
+}
+
+int btf__align_of(const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+	__u16 kind = btf_kind(t);
+
+	switch (kind) {
+	case BTF_KIND_INT:
+	case BTF_KIND_ENUM:
+		return min(btf_ptr_sz(btf), (size_t)t->size);
+	case BTF_KIND_PTR:
+		return btf_ptr_sz(btf);
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return btf__align_of(btf, t->type);
+	case BTF_KIND_ARRAY:
+		return btf__align_of(btf, btf_array(t)->type);
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = btf_members(t);
+		__u16 vlen = btf_vlen(t);
+		int i, max_align = 1, align;
+
+		for (i = 0; i < vlen; i++, m++) {
+			align = btf__align_of(btf, m->type);
+			if (align <= 0)
+				return align;
+			max_align = max(max_align, align);
+
+			/* if field offset isn't aligned according to field
+			 * type's alignment, then struct must be packed
+			 */
+			if (btf_member_bitfield_size(t, i) == 0 &&
+			    (m->offset % (8 * align)) != 0)
+				return 1;
+		}
+
+		/* if struct/union size isn't a multiple of its alignment,
+		 * then struct must be packed
+		 */
+		if ((t->size % max_align) != 0)
+			return 1;
+
+		return max_align;
+	}
+	default:
+		pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
+		return 0;
+	}
+}
+
+int btf__resolve_type(const struct btf *btf, __u32 type_id)
+{
+	const struct btf_type *t;
+	int depth = 0;
+
+	t = btf__type_by_id(btf, type_id);
+	while (depth < MAX_RESOLVE_DEPTH &&
+	       !btf_type_is_void_or_null(t) &&
+	       (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) {
+		type_id = t->type;
+		t = btf__type_by_id(btf, type_id);
+		depth++;
+	}
+
+	if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
+		return -EINVAL;
+
+	return type_id;
+}
+
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+	__u32 i;
+
+	if (!strcmp(type_name, "void"))
+		return 0;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		const struct btf_type *t = btf__type_by_id(btf, i);
+		const char *name = btf__name_by_offset(btf, t->name_off);
+
+		if (name && !strcmp(type_name, name))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
+			     __u32 kind)
+{
+	__u32 i;
+
+	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
+		return 0;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		const struct btf_type *t = btf__type_by_id(btf, i);
+		const char *name;
+
+		if (btf_kind(t) != kind)
+			continue;
+		name = btf__name_by_offset(btf, t->name_off);
+		if (name && !strcmp(type_name, name))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static bool btf_is_modifiable(const struct btf *btf)
+{
+	return (void *)btf->hdr != btf->raw_data;
+}
+
+void btf__free(struct btf *btf)
+{
+	if (IS_ERR_OR_NULL(btf))
+		return;
+
+	if (btf->fd >= 0)
+		close(btf->fd);
+
+	if (btf_is_modifiable(btf)) {
+		/* if BTF was modified after loading, it will have a split
+		 * in-memory representation for header, types, and strings
+		 * sections, so we need to free all of them individually. It
+		 * might still have a cached contiguous raw data present,
+		 * which will be unconditionally freed below.
+		 */
+		free(btf->hdr);
+		free(btf->types_data);
+		free(btf->strs_data);
+	}
+	free(btf->raw_data);
+	free(btf->raw_data_swapped);
+	free(btf->type_offs);
+	free(btf);
+}
+
+struct btf *btf__new_empty(void)
+{
+	struct btf *btf;
+
+	btf = calloc(1, sizeof(*btf));
+	if (!btf)
+		return ERR_PTR(-ENOMEM);
+
+	btf->fd = -1;
+	btf->ptr_sz = sizeof(void *);
+	btf->swapped_endian = false;
+
+	/* +1 for empty string at offset 0 */
+	btf->raw_size = sizeof(struct btf_header) + 1;
+	btf->raw_data = calloc(1, btf->raw_size);
+	if (!btf->raw_data) {
+		free(btf);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	btf->hdr = btf->raw_data;
+	btf->hdr->hdr_len = sizeof(struct btf_header);
+	btf->hdr->magic = BTF_MAGIC;
+	btf->hdr->version = BTF_VERSION;
+
+	btf->types_data = btf->raw_data + btf->hdr->hdr_len;
+	btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
+	btf->hdr->str_len = 1; /* empty string at offset 0 */
+
+	return btf;
+}
+
+struct btf *btf__new(const void *data, __u32 size)
+{
+	struct btf *btf;
+	int err;
+
+	btf = calloc(1, sizeof(struct btf));
+	if (!btf)
+		return ERR_PTR(-ENOMEM);
+
+	btf->raw_data = malloc(size);
+	if (!btf->raw_data) {
+		err = -ENOMEM;
+		goto done;
+	}
+	memcpy(btf->raw_data, data, size);
+	btf->raw_size = size;
+
+	btf->hdr = btf->raw_data;
+	err = btf_parse_hdr(btf);
+	if (err)
+		goto done;
+
+	btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off;
+	btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off;
+
+	err = btf_parse_str_sec(btf);
+	err = err ?: btf_parse_type_sec(btf);
+	if (err)
+		goto done;
+
+	btf->fd = -1;
+
+done:
+	if (err) {
+		btf__free(btf);
+		return ERR_PTR(err);
+	}
+
+	return btf;
+}
+
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+	Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
+	int err = 0, fd = -1, idx = 0;
+	struct btf *btf = NULL;
+	Elf_Scn *scn = NULL;
+	Elf *elf = NULL;
+	GElf_Ehdr ehdr;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_warn("failed to init libelf for %s\n", path);
+		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		err = -errno;
+		pr_warn("failed to open %s: %s\n", path, strerror(errno));
+		return ERR_PTR(err);
+	}
+
+	err = -LIBBPF_ERRNO__FORMAT;
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf) {
+		pr_warn("failed to open %s as ELF file\n", path);
+		goto done;
+	}
+	if (!gelf_getehdr(elf, &ehdr)) {
+		pr_warn("failed to get EHDR from %s\n", path);
+		goto done;
+	}
+	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+		pr_warn("failed to get e_shstrndx from %s\n", path);
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		GElf_Shdr sh;
+		char *name;
+
+		idx++;
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			pr_warn("failed to get section(%d) header from %s\n",
+				idx, path);
+			goto done;
+		}
+		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!name) {
+			pr_warn("failed to get section(%d) name from %s\n",
+				idx, path);
+			goto done;
+		}
+		if (strcmp(name, BTF_ELF_SEC) == 0) {
+			btf_data = elf_getdata(scn, 0);
+			if (!btf_data) {
+				pr_warn("failed to get section(%d, %s) data from %s\n",
+					idx, name, path);
+				goto done;
+			}
+			continue;
+		} else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+			btf_ext_data = elf_getdata(scn, 0);
+			if (!btf_ext_data) {
+				pr_warn("failed to get section(%d, %s) data from %s\n",
+					idx, name, path);
+				goto done;
+			}
+			continue;
+		}
+	}
+
+	err = 0;
+
+	if (!btf_data) {
+		err = -ENOENT;
+		goto done;
+	}
+	btf = btf__new(btf_data->d_buf, btf_data->d_size);
+	if (IS_ERR(btf))
+		goto done;
+
+	switch (gelf_getclass(elf)) {
+	case ELFCLASS32:
+		btf__set_pointer_size(btf, 4);
+		break;
+	case ELFCLASS64:
+		btf__set_pointer_size(btf, 8);
+		break;
+	default:
+		pr_warn("failed to get ELF class (bitness) for %s\n", path);
+		break;
+	}
+
+	if (btf_ext && btf_ext_data) {
+		*btf_ext = btf_ext__new(btf_ext_data->d_buf,
+					btf_ext_data->d_size);
+		if (IS_ERR(*btf_ext))
+			goto done;
+	} else if (btf_ext) {
+		*btf_ext = NULL;
+	}
+done:
+	if (elf)
+		elf_end(elf);
+	close(fd);
+
+	if (err)
+		return ERR_PTR(err);
+	/*
+	 * btf is always parsed before btf_ext, so no need to clean up
+	 * btf_ext, if btf loading failed
+	 */
+	if (IS_ERR(btf))
+		return btf;
+	if (btf_ext && IS_ERR(*btf_ext)) {
+		btf__free(btf);
+		err = PTR_ERR(*btf_ext);
+		return ERR_PTR(err);
+	}
+	return btf;
+}
+
+struct btf *btf__parse_raw(const char *path)
+{
+	struct btf *btf = NULL;
+	void *data = NULL;
+	FILE *f = NULL;
+	__u16 magic;
+	int err = 0;
+	long sz;
+
+	f = fopen(path, "rb");
+	if (!f) {
+		err = -errno;
+		goto err_out;
+	}
+
+	/* check BTF magic */
+	if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
+		err = -EIO;
+		goto err_out;
+	}
+	if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {
+		/* definitely not a raw BTF */
+		err = -EPROTO;
+		goto err_out;
+	}
+
+	/* get file size */
+	if (fseek(f, 0, SEEK_END)) {
+		err = -errno;
+		goto err_out;
+	}
+	sz = ftell(f);
+	if (sz < 0) {
+		err = -errno;
+		goto err_out;
+	}
+	/* rewind to the start */
+	if (fseek(f, 0, SEEK_SET)) {
+		err = -errno;
+		goto err_out;
+	}
+
+	/* pre-alloc memory and read all of BTF data */
+	data = malloc(sz);
+	if (!data) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	if (fread(data, 1, sz, f) < sz) {
+		err = -EIO;
+		goto err_out;
+	}
+
+	/* finally parse BTF data */
+	btf = btf__new(data, sz);
+
+err_out:
+	free(data);
+	if (f)
+		fclose(f);
+	return err ? ERR_PTR(err) : btf;
+}
+
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+{
+	struct btf *btf;
+
+	if (btf_ext)
+		*btf_ext = NULL;
+
+	btf = btf__parse_raw(path);
+	if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+		return btf;
+
+	return btf__parse_elf(path, btf_ext);
+}
+
+static int compare_vsi_off(const void *_a, const void *_b)
+{
+	const struct btf_var_secinfo *a = _a;
+	const struct btf_var_secinfo *b = _b;
+
+	return a->offset - b->offset;
+}
+
+static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
+			     struct btf_type *t)
+{
+	__u32 size = 0, off = 0, i, vars = btf_vlen(t);
+	const char *name = btf__name_by_offset(btf, t->name_off);
+	const struct btf_type *t_var;
+	struct btf_var_secinfo *vsi;
+	const struct btf_var *var;
+	int ret;
+
+	if (!name) {
+		pr_debug("No name found in string section for DATASEC kind.\n");
+		return -ENOENT;
+	}
+
+	/* .extern datasec size and var offsets were set correctly during
+	 * extern collection step, so just skip straight to sorting variables
+	 */
+	if (t->size)
+		goto sort_vars;
+
+	ret = bpf_object__section_size(obj, name, &size);
+	if (ret || !size || (t->size && t->size != size)) {
+		pr_debug("Invalid size for section %s: %u bytes\n", name, size);
+		return -ENOENT;
+	}
+
+	t->size = size;
+
+	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
+		t_var = btf__type_by_id(btf, vsi->type);
+		var = btf_var(t_var);
+
+		if (!btf_is_var(t_var)) {
+			pr_debug("Non-VAR type seen in section %s\n", name);
+			return -EINVAL;
+		}
+
+		if (var->linkage == BTF_VAR_STATIC)
+			continue;
+
+		name = btf__name_by_offset(btf, t_var->name_off);
+		if (!name) {
+			pr_debug("No name found in string section for VAR kind\n");
+			return -ENOENT;
+		}
+
+		ret = bpf_object__variable_offset(obj, name, &off);
+		if (ret) {
+			pr_debug("No offset found in symbol table for VAR %s\n",
+				 name);
+			return -ENOENT;
+		}
+
+		vsi->offset = off;
+	}
+
+sort_vars:
+	qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
+	return 0;
+}
+
+int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
+{
+	int err = 0;
+	__u32 i;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		struct btf_type *t = btf_type_by_id(btf, i);
+
+		/* Loader needs to fix up some of the things compiler
+		 * couldn't get its hands on while emitting BTF. This
+		 * is section size and global variable offset. We use
+		 * the info from the ELF itself for this purpose.
+		 */
+		if (btf_is_datasec(t)) {
+			err = btf_fixup_datasec(obj, btf, t);
+			if (err)
+				break;
+		}
+	}
+
+	return err;
+}
+
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
+
+int btf__load(struct btf *btf)
+{
+	__u32 log_buf_size = 0, raw_size;
+	char *log_buf = NULL;
+	void *raw_data;
+	int err = 0;
+
+	if (btf->fd >= 0)
+		return -EEXIST;
+
+retry_load:
+	if (log_buf_size) {
+		log_buf = malloc(log_buf_size);
+		if (!log_buf)
+			return -ENOMEM;
+
+		*log_buf = 0;
+	}
+
+	raw_data = btf_get_raw_data(btf, &raw_size, false);
+	if (!raw_data) {
+		err = -ENOMEM;
+		goto done;
+	}
+	/* cache native raw data representation */
+	btf->raw_size = raw_size;
+	btf->raw_data = raw_data;
+
+	btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);
+	if (btf->fd < 0) {
+		if (!log_buf || errno == ENOSPC) {
+			log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
+					   log_buf_size << 1);
+			free(log_buf);
+			goto retry_load;
+		}
+
+		err = -errno;
+		pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+		if (*log_buf)
+			pr_warn("%s\n", log_buf);
+		goto done;
+	}
+
+done:
+	free(log_buf);
+	return err;
+}
+
+int btf__fd(const struct btf *btf)
+{
+	return btf->fd;
+}
+
+void btf__set_fd(struct btf *btf, int fd)
+{
+	btf->fd = fd;
+}
+
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
+{
+	struct btf_header *hdr = btf->hdr;
+	struct btf_type *t;
+	void *data, *p;
+	__u32 data_sz;
+	int i;
+
+	data = swap_endian ? btf->raw_data_swapped : btf->raw_data;
+	if (data) {
+		*size = btf->raw_size;
+		return data;
+	}
+
+	data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len;
+	data = calloc(1, data_sz);
+	if (!data)
+		return NULL;
+	p = data;
+
+	memcpy(p, hdr, hdr->hdr_len);
+	if (swap_endian)
+		btf_bswap_hdr(p);
+	p += hdr->hdr_len;
+
+	memcpy(p, btf->types_data, hdr->type_len);
+	if (swap_endian) {
+		for (i = 1; i <= btf->nr_types; i++) {
+			t = p  + btf->type_offs[i];
+			/* btf_bswap_type_rest() relies on native t->info, so
+			 * we swap base type info after we swapped all the
+			 * additional information
+			 */
+			if (btf_bswap_type_rest(t))
+				goto err_out;
+			btf_bswap_type_base(t);
+		}
+	}
+	p += hdr->type_len;
+
+	memcpy(p, btf->strs_data, hdr->str_len);
+	p += hdr->str_len;
+
+	*size = data_sz;
+	return data;
+err_out:
+	free(data);
+	return NULL;
+}
+
+const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
+{
+	struct btf *btf = (struct btf *)btf_ro;
+	__u32 data_sz;
+	void *data;
+
+	data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
+	if (!data)
+		return NULL;
+
+	btf->raw_size = data_sz;
+	if (btf->swapped_endian)
+		btf->raw_data_swapped = data;
+	else
+		btf->raw_data = data;
+	*size = data_sz;
+	return data;
+}
+
+const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
+{
+	if (offset < btf->hdr->str_len)
+		return btf->strs_data + offset;
+	else
+		return NULL;
+}
+
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+	return btf__str_by_offset(btf, offset);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+	struct bpf_btf_info btf_info = { 0 };
+	__u32 len = sizeof(btf_info);
+	__u32 last_size;
+	int btf_fd;
+	void *ptr;
+	int err;
+
+	err = 0;
+	*btf = NULL;
+	btf_fd = bpf_btf_get_fd_by_id(id);
+	if (btf_fd < 0)
+		return 0;
+
+	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+	 * let's start with a sane default - 4KiB here - and resize it only if
+	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
+	 */
+	btf_info.btf_size = 4096;
+	last_size = btf_info.btf_size;
+	ptr = malloc(last_size);
+	if (!ptr) {
+		err = -ENOMEM;
+		goto exit_free;
+	}
+
+	memset(ptr, 0, last_size);
+	btf_info.btf = ptr_to_u64(ptr);
+	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+	if (!err && btf_info.btf_size > last_size) {
+		void *temp_ptr;
+
+		last_size = btf_info.btf_size;
+		temp_ptr = realloc(ptr, last_size);
+		if (!temp_ptr) {
+			err = -ENOMEM;
+			goto exit_free;
+		}
+		ptr = temp_ptr;
+		memset(ptr, 0, last_size);
+		btf_info.btf = ptr_to_u64(ptr);
+		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+	}
+
+	if (err || btf_info.btf_size > last_size) {
+		err = errno;
+		goto exit_free;
+	}
+
+	*btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
+	if (IS_ERR(*btf)) {
+		err = PTR_ERR(*btf);
+		*btf = NULL;
+	}
+
+exit_free:
+	close(btf_fd);
+	free(ptr);
+
+	return err;
+}
+
+int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+			 __u32 expected_key_size, __u32 expected_value_size,
+			 __u32 *key_type_id, __u32 *value_type_id)
+{
+	const struct btf_type *container_type;
+	const struct btf_member *key, *value;
+	const size_t max_name = 256;
+	char container_name[max_name];
+	__s64 key_size, value_size;
+	__s32 container_id;
+
+	if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
+	    max_name) {
+		pr_warn("map:%s length of '____btf_map_%s' is too long\n",
+			map_name, map_name);
+		return -EINVAL;
+	}
+
+	container_id = btf__find_by_name(btf, container_name);
+	if (container_id < 0) {
+		pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
+			 map_name, container_name);
+		return container_id;
+	}
+
+	container_type = btf__type_by_id(btf, container_id);
+	if (!container_type) {
+		pr_warn("map:%s cannot find BTF type for container_id:%u\n",
+			map_name, container_id);
+		return -EINVAL;
+	}
+
+	if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
+		pr_warn("map:%s container_name:%s is an invalid container struct\n",
+			map_name, container_name);
+		return -EINVAL;
+	}
+
+	key = btf_members(container_type);
+	value = key + 1;
+
+	key_size = btf__resolve_size(btf, key->type);
+	if (key_size < 0) {
+		pr_warn("map:%s invalid BTF key_type_size\n", map_name);
+		return key_size;
+	}
+
+	if (expected_key_size != key_size) {
+		pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+			map_name, (__u32)key_size, expected_key_size);
+		return -EINVAL;
+	}
+
+	value_size = btf__resolve_size(btf, value->type);
+	if (value_size < 0) {
+		pr_warn("map:%s invalid BTF value_type_size\n", map_name);
+		return value_size;
+	}
+
+	if (expected_value_size != value_size) {
+		pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+			map_name, (__u32)value_size, expected_value_size);
+		return -EINVAL;
+	}
+
+	*key_type_id = key->type;
+	*value_type_id = value->type;
+
+	return 0;
+}
+
+static size_t strs_hash_fn(const void *key, void *ctx)
+{
+	struct btf *btf = ctx;
+	const char *str = btf->strs_data + (long)key;
+
+	return str_hash(str);
+}
+
+static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+	struct btf *btf = ctx;
+	const char *str1 = btf->strs_data + (long)key1;
+	const char *str2 = btf->strs_data + (long)key2;
+
+	return strcmp(str1, str2) == 0;
+}
+
+static void btf_invalidate_raw_data(struct btf *btf)
+{
+	if (btf->raw_data) {
+		free(btf->raw_data);
+		btf->raw_data = NULL;
+	}
+	if (btf->raw_data_swapped) {
+		free(btf->raw_data_swapped);
+		btf->raw_data_swapped = NULL;
+	}
+}
+
+/* Ensure BTF is ready to be modified (by splitting into a three memory
+ * regions for header, types, and strings). Also invalidate cached
+ * raw_data, if any.
+ */
+static int btf_ensure_modifiable(struct btf *btf)
+{
+	void *hdr, *types, *strs, *strs_end, *s;
+	struct hashmap *hash = NULL;
+	long off;
+	int err;
+
+	if (btf_is_modifiable(btf)) {
+		/* any BTF modification invalidates raw_data */
+		btf_invalidate_raw_data(btf);
+		return 0;
+	}
+
+	/* split raw data into three memory regions */
+	hdr = malloc(btf->hdr->hdr_len);
+	types = malloc(btf->hdr->type_len);
+	strs = malloc(btf->hdr->str_len);
+	if (!hdr || !types || !strs)
+		goto err_out;
+
+	memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
+	memcpy(types, btf->types_data, btf->hdr->type_len);
+	memcpy(strs, btf->strs_data, btf->hdr->str_len);
+
+	/* build lookup index for all strings */
+	hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
+	if (IS_ERR(hash)) {
+		err = PTR_ERR(hash);
+		hash = NULL;
+		goto err_out;
+	}
+
+	strs_end = strs + btf->hdr->str_len;
+	for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
+		/* hashmap__add() returns EEXIST if string with the same
+		 * content already is in the hash map
+		 */
+		err = hashmap__add(hash, (void *)off, (void *)off);
+		if (err == -EEXIST)
+			continue; /* duplicate */
+		if (err)
+			goto err_out;
+	}
+
+	/* only when everything was successful, update internal state */
+	btf->hdr = hdr;
+	btf->types_data = types;
+	btf->types_data_cap = btf->hdr->type_len;
+	btf->strs_data = strs;
+	btf->strs_data_cap = btf->hdr->str_len;
+	btf->strs_hash = hash;
+	/* if BTF was created from scratch, all strings are guaranteed to be
+	 * unique and deduplicated
+	 */
+	btf->strs_deduped = btf->hdr->str_len <= 1;
+
+	/* invalidate raw_data representation */
+	btf_invalidate_raw_data(btf);
+
+	return 0;
+
+err_out:
+	hashmap__free(hash);
+	free(hdr);
+	free(types);
+	free(strs);
+	return -ENOMEM;
+}
+
+static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
+{
+	return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
+			   btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+}
+
+/* Find an offset in BTF string section that corresponds to a given string *s*.
+ * Returns:
+ *   - >0 offset into string section, if string is found;
+ *   - -ENOENT, if string is not in the string section;
+ *   - <0, on any other error.
+ */
+int btf__find_str(struct btf *btf, const char *s)
+{
+	long old_off, new_off, len;
+	void *p;
+
+	/* BTF needs to be in a modifiable state to build string lookup index */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	/* see btf__add_str() for why we do this */
+	len = strlen(s) + 1;
+	p = btf_add_str_mem(btf, len);
+	if (!p)
+		return -ENOMEM;
+
+	new_off = btf->hdr->str_len;
+	memcpy(p, s, len);
+
+	if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
+		return old_off;
+
+	return -ENOENT;
+}
+
+/* Add a string s to the BTF string section.
+ * Returns:
+ *   - > 0 offset into string section, on success;
+ *   - < 0, on error.
+ */
+int btf__add_str(struct btf *btf, const char *s)
+{
+	long old_off, new_off, len;
+	void *p;
+	int err;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	/* Hashmap keys are always offsets within btf->strs_data, so to even
+	 * look up some string from the "outside", we need to first append it
+	 * at the end, so that it can be addressed with an offset. Luckily,
+	 * until btf->hdr->str_len is incremented, that string is just a piece
+	 * of garbage for the rest of BTF code, so no harm, no foul. On the
+	 * other hand, if the string is unique, it's already appended and
+	 * ready to be used, only a simple btf->hdr->str_len increment away.
+	 */
+	len = strlen(s) + 1;
+	p = btf_add_str_mem(btf, len);
+	if (!p)
+		return -ENOMEM;
+
+	new_off = btf->hdr->str_len;
+	memcpy(p, s, len);
+
+	/* Now attempt to add the string, but only if the string with the same
+	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
+	 * string exists, we'll get its offset in old_off (that's old_key).
+	 */
+	err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
+			      HASHMAP_ADD, (const void **)&old_off, NULL);
+	if (err == -EEXIST)
+		return old_off; /* duplicated string, return existing offset */
+	if (err)
+		return err;
+
+	btf->hdr->str_len += len; /* new unique string, adjust data length */
+	return new_off;
+}
+
+static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
+{
+	return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+			   btf->hdr->type_len, UINT_MAX, add_sz);
+}
+
+static __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+	return (kflag << 31) | (kind << 24) | vlen;
+}
+
+static void btf_type_inc_vlen(struct btf_type *t)
+{
+	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
+}
+
+/*
+ * Append new BTF_KIND_INT type with:
+ *   - *name* - non-empty, non-NULL type name;
+ *   - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes;
+ *   - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL.
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
+{
+	struct btf_type *t;
+	int sz, err, name_off;
+
+	/* non-empty name */
+	if (!name || !name[0])
+		return -EINVAL;
+	/* byte_sz must be power of 2 */
+	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
+		return -EINVAL;
+	if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
+		return -EINVAL;
+
+	/* deconstruct BTF, if necessary, and invalidate raw_data */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type) + sizeof(int);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	/* if something goes wrong later, we might end up with an extra string,
+	 * but that shouldn't be a problem, because BTF can't be constructed
+	 * completely anyway and will most probably be just discarded
+	 */
+	name_off = btf__add_str(btf, name);
+	if (name_off < 0)
+		return name_off;
+
+	t->name_off = name_off;
+	t->info = btf_type_info(BTF_KIND_INT, 0, 0);
+	t->size = byte_sz;
+	/* set INT info, we don't allow setting legacy bit offset/size */
+	*(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/* it's completely legal to append BTF types with type IDs pointing forward to
+ * types that haven't been appended yet, so we only make sure that id looks
+ * sane, we can't guarantee that ID will always be valid
+ */
+static int validate_type_id(int id)
+{
+	if (id < 0 || id > BTF_MAX_NR_TYPES)
+		return -EINVAL;
+	return 0;
+}
+
+/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
+static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
+{
+	struct btf_type *t;
+	int sz, name_off = 0, err;
+
+	if (validate_type_id(ref_type_id))
+		return -EINVAL;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	if (name && name[0]) {
+		name_off = btf__add_str(btf, name);
+		if (name_off < 0)
+			return name_off;
+	}
+
+	t->name_off = name_off;
+	t->info = btf_type_info(kind, 0, 0);
+	t->type = ref_type_id;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_PTR type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_ptr(struct btf *btf, int ref_type_id)
+{
+	return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_ARRAY type with:
+ *   - *index_type_id* - type ID of the type describing array index;
+ *   - *elem_type_id* - type ID of the type describing array element;
+ *   - *nr_elems* - the size of the array;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems)
+{
+	struct btf_type *t;
+	struct btf_array *a;
+	int sz, err;
+
+	if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
+		return -EINVAL;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type) + sizeof(struct btf_array);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	t->name_off = 0;
+	t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
+	t->size = 0;
+
+	a = btf_array(t);
+	a->type = elem_type_id;
+	a->index_type = index_type_id;
+	a->nelems = nr_elems;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/* generic STRUCT/UNION append function */
+static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
+{
+	struct btf_type *t;
+	int sz, err, name_off = 0;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	if (name && name[0]) {
+		name_off = btf__add_str(btf, name);
+		if (name_off < 0)
+			return name_off;
+	}
+
+	/* start out with vlen=0 and no kflag; this will be adjusted when
+	 * adding each member
+	 */
+	t->name_off = name_off;
+	t->info = btf_type_info(kind, 0, 0);
+	t->size = bytes_sz;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_STRUCT type with:
+ *   - *name* - name of the struct, can be NULL or empty for anonymous structs;
+ *   - *byte_sz* - size of the struct, in bytes;
+ *
+ * Struct initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_struct() succeeds. 
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz)
+{
+	return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz);
+}
+
+/*
+ * Append new BTF_KIND_UNION type with:
+ *   - *name* - name of the union, can be NULL or empty for anonymous union;
+ *   - *byte_sz* - size of the union, in bytes;
+ *
+ * Union initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_union() succeeds. All fields
+ * should have *bit_offset* of 0.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
+{
+	return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
+}
+
+/*
+ * Append new field for the current STRUCT/UNION type with:
+ *   - *name* - name of the field, can be NULL or empty for anonymous field;
+ *   - *type_id* - type ID for the type describing field type;
+ *   - *bit_offset* - bit offset of the start of the field within struct/union;
+ *   - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields;
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_field(struct btf *btf, const char *name, int type_id,
+		   __u32 bit_offset, __u32 bit_size)
+{
+	struct btf_type *t;
+	struct btf_member *m;
+	bool is_bitfield;
+	int sz, name_off = 0;
+
+	/* last type should be union/struct */
+	if (btf->nr_types == 0)
+		return -EINVAL;
+	t = btf_type_by_id(btf, btf->nr_types);
+	if (!btf_is_composite(t))
+		return -EINVAL;
+
+	if (validate_type_id(type_id))
+		return -EINVAL;
+	/* best-effort bit field offset/size enforcement */
+	is_bitfield = bit_size || (bit_offset % 8 != 0);
+	if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
+		return -EINVAL;
+
+	/* only offset 0 is allowed for unions */
+	if (btf_is_union(t) && bit_offset)
+		return -EINVAL;
+
+	/* decompose and invalidate raw data */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_member);
+	m = btf_add_type_mem(btf, sz);
+	if (!m)
+		return -ENOMEM;
+
+	if (name && name[0]) {
+		name_off = btf__add_str(btf, name);
+		if (name_off < 0)
+			return name_off;
+	}
+
+	m->name_off = name_off;
+	m->type = type_id;
+	m->offset = bit_offset | (bit_size << 24);
+
+	/* btf_add_type_mem can invalidate t pointer */
+	t = btf_type_by_id(btf, btf->nr_types);
+	/* update parent type's vlen and kflag */
+	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	return 0;
+}
+
+/*
+ * Append new BTF_KIND_ENUM type with:
+ *   - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ *   - *byte_sz* - size of the enum, in bytes.
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum_value()
+ * immediately after btf__add_enum() succeeds.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+{
+	struct btf_type *t;
+	int sz, err, name_off = 0;
+
+	/* byte_sz must be power of 2 */
+	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
+		return -EINVAL;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	if (name && name[0]) {
+		name_off = btf__add_str(btf, name);
+		if (name_off < 0)
+			return name_off;
+	}
+
+	/* start out with vlen=0; it will be adjusted when adding enum values */
+	t->name_off = name_off;
+	t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
+	t->size = byte_sz;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/*
+ * Append new enum value for the current ENUM type with:
+ *   - *name* - name of the enumerator value, can't be NULL or empty;
+ *   - *value* - integer value corresponding to enum value *name*;
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
+{
+	struct btf_type *t;
+	struct btf_enum *v;
+	int sz, name_off;
+
+	/* last type should be BTF_KIND_ENUM */
+	if (btf->nr_types == 0)
+		return -EINVAL;
+	t = btf_type_by_id(btf, btf->nr_types);
+	if (!btf_is_enum(t))
+		return -EINVAL;
+
+	/* non-empty name */
+	if (!name || !name[0])
+		return -EINVAL;
+	if (value < INT_MIN || value > UINT_MAX)
+		return -E2BIG;
+
+	/* decompose and invalidate raw data */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_enum);
+	v = btf_add_type_mem(btf, sz);
+	if (!v)
+		return -ENOMEM;
+
+	name_off = btf__add_str(btf, name);
+	if (name_off < 0)
+		return name_off;
+
+	v->name_off = name_off;
+	v->val = value;
+
+	/* update parent type's vlen */
+	t = btf_type_by_id(btf, btf->nr_types);
+	btf_type_inc_vlen(t);
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	return 0;
+}
+
+/*
+ * Append new BTF_KIND_FWD type with:
+ *   - *name*, non-empty/non-NULL name;
+ *   - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT,
+ *     BTF_FWD_UNION, or BTF_FWD_ENUM;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
+{
+	if (!name || !name[0])
+		return -EINVAL;
+
+	switch (fwd_kind) {
+	case BTF_FWD_STRUCT:
+	case BTF_FWD_UNION: {
+		struct btf_type *t;
+		int id;
+
+		id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
+		if (id <= 0)
+			return id;
+		t = btf_type_by_id(btf, id);
+		t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION);
+		return id;
+	}
+	case BTF_FWD_ENUM:
+		/* enum forward in BTF currently is just an enum with no enum
+		 * values; we also assume a standard 4-byte size for it
+		 */
+		return btf__add_enum(btf, name, sizeof(int));
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * Append new BTF_KING_TYPEDEF type with:
+ *   - *name*, non-empty/non-NULL name;
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
+{
+	if (!name || !name[0])
+		return -EINVAL;
+
+	return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_VOLATILE type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_volatile(struct btf *btf, int ref_type_id)
+{
+	return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_CONST type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_const(struct btf *btf, int ref_type_id)
+{
+	return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_RESTRICT type with:
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_restrict(struct btf *btf, int ref_type_id)
+{
+	return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_FUNC type with:
+ *   - *name*, non-empty/non-NULL name;
+ *   - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_func(struct btf *btf, const char *name,
+		  enum btf_func_linkage linkage, int proto_type_id)
+{
+	int id;
+
+	if (!name || !name[0])
+		return -EINVAL;
+	if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
+	    linkage != BTF_FUNC_EXTERN)
+		return -EINVAL;
+
+	id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
+	if (id > 0) {
+		struct btf_type *t = btf_type_by_id(btf, id);
+
+		t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
+	}
+	return id;
+}
+
+/*
+ * Append new BTF_KIND_FUNC_PROTO with:
+ *   - *ret_type_id* - type ID for return result of a function.
+ *
+ * Function prototype initially has no arguments, but they can be added by
+ * btf__add_func_param() one by one, immediately after
+ * btf__add_func_proto() succeeded.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_func_proto(struct btf *btf, int ret_type_id)
+{
+	struct btf_type *t;
+	int sz, err;
+
+	if (validate_type_id(ret_type_id))
+		return -EINVAL;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	/* start out with vlen=0; this will be adjusted when adding enum
+	 * values, if necessary
+	 */
+	t->name_off = 0;
+	t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
+	t->type = ret_type_id;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/*
+ * Append new function parameter for current FUNC_PROTO type with:
+ *   - *name* - parameter name, can be NULL or empty;
+ *   - *type_id* - type ID describing the type of the parameter.
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_func_param(struct btf *btf, const char *name, int type_id)
+{
+	struct btf_type *t;
+	struct btf_param *p;
+	int sz, name_off = 0;
+
+	if (validate_type_id(type_id))
+		return -EINVAL;
+
+	/* last type should be BTF_KIND_FUNC_PROTO */
+	if (btf->nr_types == 0)
+		return -EINVAL;
+	t = btf_type_by_id(btf, btf->nr_types);
+	if (!btf_is_func_proto(t))
+		return -EINVAL;
+
+	/* decompose and invalidate raw data */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_param);
+	p = btf_add_type_mem(btf, sz);
+	if (!p)
+		return -ENOMEM;
+
+	if (name && name[0]) {
+		name_off = btf__add_str(btf, name);
+		if (name_off < 0)
+			return name_off;
+	}
+
+	p->name_off = name_off;
+	p->type = type_id;
+
+	/* update parent type's vlen */
+	t = btf_type_by_id(btf, btf->nr_types);
+	btf_type_inc_vlen(t);
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	return 0;
+}
+
+/*
+ * Append new BTF_KIND_VAR type with:
+ *   - *name* - non-empty/non-NULL name;
+ *   - *linkage* - variable linkage, one of BTF_VAR_STATIC,
+ *     BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN;
+ *   - *type_id* - type ID of the type describing the type of the variable.
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
+{
+	struct btf_type *t;
+	struct btf_var *v;
+	int sz, err, name_off;
+
+	/* non-empty name */
+	if (!name || !name[0])
+		return -EINVAL;
+	if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+	    linkage != BTF_VAR_GLOBAL_EXTERN)
+		return -EINVAL;
+	if (validate_type_id(type_id))
+		return -EINVAL;
+
+	/* deconstruct BTF, if necessary, and invalidate raw_data */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type) + sizeof(struct btf_var);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	name_off = btf__add_str(btf, name);
+	if (name_off < 0)
+		return name_off;
+
+	t->name_off = name_off;
+	t->info = btf_type_info(BTF_KIND_VAR, 0, 0);
+	t->type = type_id;
+
+	v = btf_var(t);
+	v->linkage = linkage;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_DATASEC type with:
+ *   - *name* - non-empty/non-NULL name;
+ *   - *byte_sz* - data section size, in bytes.
+ *
+ * Data section is initially empty. Variables info can be added with
+ * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds.
+ *
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
+{
+	struct btf_type *t;
+	int sz, err, name_off;
+
+	/* non-empty name */
+	if (!name || !name[0])
+		return -EINVAL;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	name_off = btf__add_str(btf, name);
+	if (name_off < 0)
+		return name_off;
+
+	/* start with vlen=0, which will be update as var_secinfos are added */
+	t->name_off = name_off;
+	t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
+	t->size = byte_sz;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
+/*
+ * Append new data section variable information entry for current DATASEC type:
+ *   - *var_type_id* - type ID, describing type of the variable;
+ *   - *offset* - variable offset within data section, in bytes;
+ *   - *byte_sz* - variable size, in bytes.
+ *
+ * Returns:
+ *   -  0, on success;
+ *   - <0, on error.
+ */
+int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz)
+{
+	struct btf_type *t;
+	struct btf_var_secinfo *v;
+	int sz;
+
+	/* last type should be BTF_KIND_DATASEC */
+	if (btf->nr_types == 0)
+		return -EINVAL;
+	t = btf_type_by_id(btf, btf->nr_types);
+	if (!btf_is_datasec(t))
+		return -EINVAL;
+
+	if (validate_type_id(var_type_id))
+		return -EINVAL;
+
+	/* decompose and invalidate raw data */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_var_secinfo);
+	v = btf_add_type_mem(btf, sz);
+	if (!v)
+		return -ENOMEM;
+
+	v->type = var_type_id;
+	v->offset = offset;
+	v->size = byte_sz;
+
+	/* update parent type's vlen */
+	t = btf_type_by_id(btf, btf->nr_types);
+	btf_type_inc_vlen(t);
+
+	btf->hdr->type_len += sz;
+	btf->hdr->str_off += sz;
+	return 0;
+}
+
+struct btf_ext_sec_setup_param {
+	__u32 off;
+	__u32 len;
+	__u32 min_rec_size;
+	struct btf_ext_info *ext_info;
+	const char *desc;
+};
+
+static int btf_ext_setup_info(struct btf_ext *btf_ext,
+			      struct btf_ext_sec_setup_param *ext_sec)
+{
+	const struct btf_ext_info_sec *sinfo;
+	struct btf_ext_info *ext_info;
+	__u32 info_left, record_size;
+	/* The start of the info sec (including the __u32 record_size). */
+	void *info;
+
+	if (ext_sec->len == 0)
+		return 0;
+
+	if (ext_sec->off & 0x03) {
+		pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
+		     ext_sec->desc);
+		return -EINVAL;
+	}
+
+	info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
+	info_left = ext_sec->len;
+
+	if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
+		pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
+			 ext_sec->desc, ext_sec->off, ext_sec->len);
+		return -EINVAL;
+	}
+
+	/* At least a record size */
+	if (info_left < sizeof(__u32)) {
+		pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
+		return -EINVAL;
+	}
+
+	/* The record size needs to meet the minimum standard */
+	record_size = *(__u32 *)info;
+	if (record_size < ext_sec->min_rec_size ||
+	    record_size & 0x03) {
+		pr_debug("%s section in .BTF.ext has invalid record size %u\n",
+			 ext_sec->desc, record_size);
+		return -EINVAL;
+	}
+
+	sinfo = info + sizeof(__u32);
+	info_left -= sizeof(__u32);
+
+	/* If no records, return failure now so .BTF.ext won't be used. */
+	if (!info_left) {
+		pr_debug("%s section in .BTF.ext has no records", ext_sec->desc);
+		return -EINVAL;
+	}
+
+	while (info_left) {
+		unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
+		__u64 total_record_size;
+		__u32 num_records;
+
+		if (info_left < sec_hdrlen) {
+			pr_debug("%s section header is not found in .BTF.ext\n",
+			     ext_sec->desc);
+			return -EINVAL;
+		}
+
+		num_records = sinfo->num_info;
+		if (num_records == 0) {
+			pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+			     ext_sec->desc);
+			return -EINVAL;
+		}
+
+		total_record_size = sec_hdrlen +
+				    (__u64)num_records * record_size;
+		if (info_left < total_record_size) {
+			pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+			     ext_sec->desc);
+			return -EINVAL;
+		}
+
+		info_left -= total_record_size;
+		sinfo = (void *)sinfo + total_record_size;
+	}
+
+	ext_info = ext_sec->ext_info;
+	ext_info->len = ext_sec->len - sizeof(__u32);
+	ext_info->rec_size = record_size;
+	ext_info->info = info + sizeof(__u32);
+
+	return 0;
+}
+
+static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
+{
+	struct btf_ext_sec_setup_param param = {
+		.off = btf_ext->hdr->func_info_off,
+		.len = btf_ext->hdr->func_info_len,
+		.min_rec_size = sizeof(struct bpf_func_info_min),
+		.ext_info = &btf_ext->func_info,
+		.desc = "func_info"
+	};
+
+	return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
+{
+	struct btf_ext_sec_setup_param param = {
+		.off = btf_ext->hdr->line_info_off,
+		.len = btf_ext->hdr->line_info_len,
+		.min_rec_size = sizeof(struct bpf_line_info_min),
+		.ext_info = &btf_ext->line_info,
+		.desc = "line_info",
+	};
+
+	return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
+{
+	struct btf_ext_sec_setup_param param = {
+		.off = btf_ext->hdr->core_relo_off,
+		.len = btf_ext->hdr->core_relo_len,
+		.min_rec_size = sizeof(struct bpf_core_relo),
+		.ext_info = &btf_ext->core_relo_info,
+		.desc = "core_relo",
+	};
+
+	return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
+{
+	const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+
+	if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
+	    data_size < hdr->hdr_len) {
+		pr_debug("BTF.ext header not found");
+		return -EINVAL;
+	}
+
+	if (hdr->magic == bswap_16(BTF_MAGIC)) {
+		pr_warn("BTF.ext in non-native endianness is not supported\n");
+		return -ENOTSUP;
+	} else if (hdr->magic != BTF_MAGIC) {
+		pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
+		return -EINVAL;
+	}
+
+	if (hdr->version != BTF_VERSION) {
+		pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
+		return -ENOTSUP;
+	}
+
+	if (hdr->flags) {
+		pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
+		return -ENOTSUP;
+	}
+
+	if (data_size == hdr->hdr_len) {
+		pr_debug("BTF.ext has no data\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void btf_ext__free(struct btf_ext *btf_ext)
+{
+	if (IS_ERR_OR_NULL(btf_ext))
+		return;
+	free(btf_ext->data);
+	free(btf_ext);
+}
+
+struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
+{
+	struct btf_ext *btf_ext;
+	int err;
+
+	err = btf_ext_parse_hdr(data, size);
+	if (err)
+		return ERR_PTR(err);
+
+	btf_ext = calloc(1, sizeof(struct btf_ext));
+	if (!btf_ext)
+		return ERR_PTR(-ENOMEM);
+
+	btf_ext->data_size = size;
+	btf_ext->data = malloc(size);
+	if (!btf_ext->data) {
+		err = -ENOMEM;
+		goto done;
+	}
+	memcpy(btf_ext->data, data, size);
+
+	if (btf_ext->hdr->hdr_len <
+	    offsetofend(struct btf_ext_header, line_info_len))
+		goto done;
+	err = btf_ext_setup_func_info(btf_ext);
+	if (err)
+		goto done;
+
+	err = btf_ext_setup_line_info(btf_ext);
+	if (err)
+		goto done;
+
+	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+		goto done;
+	err = btf_ext_setup_core_relos(btf_ext);
+	if (err)
+		goto done;
+
+done:
+	if (err) {
+		btf_ext__free(btf_ext);
+		return ERR_PTR(err);
+	}
+
+	return btf_ext;
+}
+
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+{
+	*size = btf_ext->data_size;
+	return btf_ext->data;
+}
+
+static int btf_ext_reloc_info(const struct btf *btf,
+			      const struct btf_ext_info *ext_info,
+			      const char *sec_name, __u32 insns_cnt,
+			      void **info, __u32 *cnt)
+{
+	__u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
+	__u32 i, record_size, existing_len, records_len;
+	struct btf_ext_info_sec *sinfo;
+	const char *info_sec_name;
+	__u64 remain_len;
+	void *data;
+
+	record_size = ext_info->rec_size;
+	sinfo = ext_info->info;
+	remain_len = ext_info->len;
+	while (remain_len > 0) {
+		records_len = sinfo->num_info * record_size;
+		info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
+		if (strcmp(info_sec_name, sec_name)) {
+			remain_len -= sec_hdrlen + records_len;
+			sinfo = (void *)sinfo + sec_hdrlen + records_len;
+			continue;
+		}
+
+		existing_len = (*cnt) * record_size;
+		data = realloc(*info, existing_len + records_len);
+		if (!data)
+			return -ENOMEM;
+
+		memcpy(data + existing_len, sinfo->data, records_len);
+		/* adjust insn_off only, the rest data will be passed
+		 * to the kernel.
+		 */
+		for (i = 0; i < sinfo->num_info; i++) {
+			__u32 *insn_off;
+
+			insn_off = data + existing_len + (i * record_size);
+			*insn_off = *insn_off / sizeof(struct bpf_insn) +
+				insns_cnt;
+		}
+		*info = data;
+		*cnt += sinfo->num_info;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+int btf_ext__reloc_func_info(const struct btf *btf,
+			     const struct btf_ext *btf_ext,
+			     const char *sec_name, __u32 insns_cnt,
+			     void **func_info, __u32 *cnt)
+{
+	return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
+				  insns_cnt, func_info, cnt);
+}
+
+int btf_ext__reloc_line_info(const struct btf *btf,
+			     const struct btf_ext *btf_ext,
+			     const char *sec_name, __u32 insns_cnt,
+			     void **line_info, __u32 *cnt)
+{
+	return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
+				  insns_cnt, line_info, cnt);
+}
+
+__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
+{
+	return btf_ext->func_info.rec_size;
+}
+
+__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
+{
+	return btf_ext->line_info.rec_size;
+}
+
+struct btf_dedup;
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+				       const struct btf_dedup_opts *opts);
+static void btf_dedup_free(struct btf_dedup *d);
+static int btf_dedup_strings(struct btf_dedup *d);
+static int btf_dedup_prim_types(struct btf_dedup *d);
+static int btf_dedup_struct_types(struct btf_dedup *d);
+static int btf_dedup_ref_types(struct btf_dedup *d);
+static int btf_dedup_compact_types(struct btf_dedup *d);
+static int btf_dedup_remap_types(struct btf_dedup *d);
+
+/*
+ * Deduplicate BTF types and strings.
+ *
+ * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
+ * section with all BTF type descriptors and string data. It overwrites that
+ * memory in-place with deduplicated types and strings without any loss of
+ * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
+ * is provided, all the strings referenced from .BTF.ext section are honored
+ * and updated to point to the right offsets after deduplication.
+ *
+ * If function returns with error, type/string data might be garbled and should
+ * be discarded.
+ *
+ * More verbose and detailed description of both problem btf_dedup is solving,
+ * as well as solution could be found at:
+ * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
+ *
+ * Problem description and justification
+ * =====================================
+ *
+ * BTF type information is typically emitted either as a result of conversion
+ * from DWARF to BTF or directly by compiler. In both cases, each compilation
+ * unit contains information about a subset of all the types that are used
+ * in an application. These subsets are frequently overlapping and contain a lot
+ * of duplicated information when later concatenated together into a single
+ * binary. This algorithm ensures that each unique type is represented by single
+ * BTF type descriptor, greatly reducing resulting size of BTF data.
+ *
+ * Compilation unit isolation and subsequent duplication of data is not the only
+ * problem. The same type hierarchy (e.g., struct and all the type that struct
+ * references) in different compilation units can be represented in BTF to
+ * various degrees of completeness (or, rather, incompleteness) due to
+ * struct/union forward declarations.
+ *
+ * Let's take a look at an example, that we'll use to better understand the
+ * problem (and solution). Suppose we have two compilation units, each using
+ * same `struct S`, but each of them having incomplete type information about
+ * struct's fields:
+ *
+ * // CU #1:
+ * struct S;
+ * struct A {
+ *	int a;
+ *	struct A* self;
+ *	struct S* parent;
+ * };
+ * struct B;
+ * struct S {
+ *	struct A* a_ptr;
+ *	struct B* b_ptr;
+ * };
+ *
+ * // CU #2:
+ * struct S;
+ * struct A;
+ * struct B {
+ *	int b;
+ *	struct B* self;
+ *	struct S* parent;
+ * };
+ * struct S {
+ *	struct A* a_ptr;
+ *	struct B* b_ptr;
+ * };
+ *
+ * In case of CU #1, BTF data will know only that `struct B` exist (but no
+ * more), but will know the complete type information about `struct A`. While
+ * for CU #2, it will know full type information about `struct B`, but will
+ * only know about forward declaration of `struct A` (in BTF terms, it will
+ * have `BTF_KIND_FWD` type descriptor with name `B`).
+ *
+ * This compilation unit isolation means that it's possible that there is no
+ * single CU with complete type information describing structs `S`, `A`, and
+ * `B`. Also, we might get tons of duplicated and redundant type information.
+ *
+ * Additional complication we need to keep in mind comes from the fact that
+ * types, in general, can form graphs containing cycles, not just DAGs.
+ *
+ * While algorithm does deduplication, it also merges and resolves type
+ * information (unless disabled throught `struct btf_opts`), whenever possible.
+ * E.g., in the example above with two compilation units having partial type
+ * information for structs `A` and `B`, the output of algorithm will emit
+ * a single copy of each BTF type that describes structs `A`, `B`, and `S`
+ * (as well as type information for `int` and pointers), as if they were defined
+ * in a single compilation unit as:
+ *
+ * struct A {
+ *	int a;
+ *	struct A* self;
+ *	struct S* parent;
+ * };
+ * struct B {
+ *	int b;
+ *	struct B* self;
+ *	struct S* parent;
+ * };
+ * struct S {
+ *	struct A* a_ptr;
+ *	struct B* b_ptr;
+ * };
+ *
+ * Algorithm summary
+ * =================
+ *
+ * Algorithm completes its work in 6 separate passes:
+ *
+ * 1. Strings deduplication.
+ * 2. Primitive types deduplication (int, enum, fwd).
+ * 3. Struct/union types deduplication.
+ * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
+ *    protos, and const/volatile/restrict modifiers).
+ * 5. Types compaction.
+ * 6. Types remapping.
+ *
+ * Algorithm determines canonical type descriptor, which is a single
+ * representative type for each truly unique type. This canonical type is the
+ * one that will go into final deduplicated BTF type information. For
+ * struct/unions, it is also the type that algorithm will merge additional type
+ * information into (while resolving FWDs), as it discovers it from data in
+ * other CUs. Each input BTF type eventually gets either mapped to itself, if
+ * that type is canonical, or to some other type, if that type is equivalent
+ * and was chosen as canonical representative. This mapping is stored in
+ * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
+ * FWD type got resolved to.
+ *
+ * To facilitate fast discovery of canonical types, we also maintain canonical
+ * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
+ * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
+ * that match that signature. With sufficiently good choice of type signature
+ * hashing function, we can limit number of canonical types for each unique type
+ * signature to a very small number, allowing to find canonical type for any
+ * duplicated type very quickly.
+ *
+ * Struct/union deduplication is the most critical part and algorithm for
+ * deduplicating structs/unions is described in greater details in comments for
+ * `btf_dedup_is_equiv` function.
+ */
+int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+	       const struct btf_dedup_opts *opts)
+{
+	struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
+	int err;
+
+	if (IS_ERR(d)) {
+		pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
+		return -EINVAL;
+	}
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	err = btf_dedup_strings(d);
+	if (err < 0) {
+		pr_debug("btf_dedup_strings failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_prim_types(d);
+	if (err < 0) {
+		pr_debug("btf_dedup_prim_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_struct_types(d);
+	if (err < 0) {
+		pr_debug("btf_dedup_struct_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_ref_types(d);
+	if (err < 0) {
+		pr_debug("btf_dedup_ref_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_compact_types(d);
+	if (err < 0) {
+		pr_debug("btf_dedup_compact_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_remap_types(d);
+	if (err < 0) {
+		pr_debug("btf_dedup_remap_types failed:%d\n", err);
+		goto done;
+	}
+
+done:
+	btf_dedup_free(d);
+	return err;
+}
+
+#define BTF_UNPROCESSED_ID ((__u32)-1)
+#define BTF_IN_PROGRESS_ID ((__u32)-2)
+
+struct btf_dedup {
+	/* .BTF section to be deduped in-place */
+	struct btf *btf;
+	/*
+	 * Optional .BTF.ext section. When provided, any strings referenced
+	 * from it will be taken into account when deduping strings
+	 */
+	struct btf_ext *btf_ext;
+	/*
+	 * This is a map from any type's signature hash to a list of possible
+	 * canonical representative type candidates. Hash collisions are
+	 * ignored, so even types of various kinds can share same list of
+	 * candidates, which is fine because we rely on subsequent
+	 * btf_xxx_equal() checks to authoritatively verify type equality.
+	 */
+	struct hashmap *dedup_table;
+	/* Canonical types map */
+	__u32 *map;
+	/* Hypothetical mapping, used during type graph equivalence checks */
+	__u32 *hypot_map;
+	__u32 *hypot_list;
+	size_t hypot_cnt;
+	size_t hypot_cap;
+	/* Various option modifying behavior of algorithm */
+	struct btf_dedup_opts opts;
+};
+
+struct btf_str_ptr {
+	const char *str;
+	__u32 new_off;
+	bool used;
+};
+
+struct btf_str_ptrs {
+	struct btf_str_ptr *ptrs;
+	const char *data;
+	__u32 cnt;
+	__u32 cap;
+};
+
+static long hash_combine(long h, long value)
+{
+	return h * 31 + value;
+}
+
+#define for_each_dedup_cand(d, node, hash) \
+	hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
+
+static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
+{
+	return hashmap__append(d->dedup_table,
+			       (void *)hash, (void *)(long)type_id);
+}
+
+static int btf_dedup_hypot_map_add(struct btf_dedup *d,
+				   __u32 from_id, __u32 to_id)
+{
+	if (d->hypot_cnt == d->hypot_cap) {
+		__u32 *new_list;
+
+		d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
+		new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
+		if (!new_list)
+			return -ENOMEM;
+		d->hypot_list = new_list;
+	}
+	d->hypot_list[d->hypot_cnt++] = from_id;
+	d->hypot_map[from_id] = to_id;
+	return 0;
+}
+
+static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
+{
+	int i;
+
+	for (i = 0; i < d->hypot_cnt; i++)
+		d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
+	d->hypot_cnt = 0;
+}
+
+static void btf_dedup_free(struct btf_dedup *d)
+{
+	hashmap__free(d->dedup_table);
+	d->dedup_table = NULL;
+
+	free(d->map);
+	d->map = NULL;
+
+	free(d->hypot_map);
+	d->hypot_map = NULL;
+
+	free(d->hypot_list);
+	d->hypot_list = NULL;
+
+	free(d);
+}
+
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
+{
+	return (size_t)key;
+}
+
+static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+{
+	return 0;
+}
+
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+	return k1 == k2;
+}
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+				       const struct btf_dedup_opts *opts)
+{
+	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
+	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
+	int i, err = 0;
+
+	if (!d)
+		return ERR_PTR(-ENOMEM);
+
+	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
+	/* dedup_table_size is now used only to force collisions in tests */
+	if (opts && opts->dedup_table_size == 1)
+		hash_fn = btf_dedup_collision_hash_fn;
+
+	d->btf = btf;
+	d->btf_ext = btf_ext;
+
+	d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
+	if (IS_ERR(d->dedup_table)) {
+		err = PTR_ERR(d->dedup_table);
+		d->dedup_table = NULL;
+		goto done;
+	}
+
+	d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	if (!d->map) {
+		err = -ENOMEM;
+		goto done;
+	}
+	/* special BTF "void" type is made canonical immediately */
+	d->map[0] = 0;
+	for (i = 1; i <= btf->nr_types; i++) {
+		struct btf_type *t = btf_type_by_id(d->btf, i);
+
+		/* VAR and DATASEC are never deduped and are self-canonical */
+		if (btf_is_var(t) || btf_is_datasec(t))
+			d->map[i] = i;
+		else
+			d->map[i] = BTF_UNPROCESSED_ID;
+	}
+
+	d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	if (!d->hypot_map) {
+		err = -ENOMEM;
+		goto done;
+	}
+	for (i = 0; i <= btf->nr_types; i++)
+		d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+done:
+	if (err) {
+		btf_dedup_free(d);
+		return ERR_PTR(err);
+	}
+
+	return d;
+}
+
+typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
+
+/*
+ * Iterate over all possible places in .BTF and .BTF.ext that can reference
+ * string and pass pointer to it to a provided callback `fn`.
+ */
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+{
+	void *line_data_cur, *line_data_end;
+	int i, j, r, rec_size;
+	struct btf_type *t;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		t = btf_type_by_id(d->btf, i);
+		r = fn(&t->name_off, ctx);
+		if (r)
+			return r;
+
+		switch (btf_kind(t)) {
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION: {
+			struct btf_member *m = btf_members(t);
+			__u16 vlen = btf_vlen(t);
+
+			for (j = 0; j < vlen; j++) {
+				r = fn(&m->name_off, ctx);
+				if (r)
+					return r;
+				m++;
+			}
+			break;
+		}
+		case BTF_KIND_ENUM: {
+			struct btf_enum *m = btf_enum(t);
+			__u16 vlen = btf_vlen(t);
+
+			for (j = 0; j < vlen; j++) {
+				r = fn(&m->name_off, ctx);
+				if (r)
+					return r;
+				m++;
+			}
+			break;
+		}
+		case BTF_KIND_FUNC_PROTO: {
+			struct btf_param *m = btf_params(t);
+			__u16 vlen = btf_vlen(t);
+
+			for (j = 0; j < vlen; j++) {
+				r = fn(&m->name_off, ctx);
+				if (r)
+					return r;
+				m++;
+			}
+			break;
+		}
+		default:
+			break;
+		}
+	}
+
+	if (!d->btf_ext)
+		return 0;
+
+	line_data_cur = d->btf_ext->line_info.info;
+	line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
+	rec_size = d->btf_ext->line_info.rec_size;
+
+	while (line_data_cur < line_data_end) {
+		struct btf_ext_info_sec *sec = line_data_cur;
+		struct bpf_line_info_min *line_info;
+		__u32 num_info = sec->num_info;
+
+		r = fn(&sec->sec_name_off, ctx);
+		if (r)
+			return r;
+
+		line_data_cur += sizeof(struct btf_ext_info_sec);
+		for (i = 0; i < num_info; i++) {
+			line_info = line_data_cur;
+			r = fn(&line_info->file_name_off, ctx);
+			if (r)
+				return r;
+			r = fn(&line_info->line_off, ctx);
+			if (r)
+				return r;
+			line_data_cur += rec_size;
+		}
+	}
+
+	return 0;
+}
+
+static int str_sort_by_content(const void *a1, const void *a2)
+{
+	const struct btf_str_ptr *p1 = a1;
+	const struct btf_str_ptr *p2 = a2;
+
+	return strcmp(p1->str, p2->str);
+}
+
+static int str_sort_by_offset(const void *a1, const void *a2)
+{
+	const struct btf_str_ptr *p1 = a1;
+	const struct btf_str_ptr *p2 = a2;
+
+	if (p1->str != p2->str)
+		return p1->str < p2->str ? -1 : 1;
+	return 0;
+}
+
+static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
+{
+	const struct btf_str_ptr *p = pelem;
+
+	if (str_ptr != p->str)
+		return (const char *)str_ptr < p->str ? -1 : 1;
+	return 0;
+}
+
+static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
+{
+	struct btf_str_ptrs *strs;
+	struct btf_str_ptr *s;
+
+	if (*str_off_ptr == 0)
+		return 0;
+
+	strs = ctx;
+	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+	if (!s)
+		return -EINVAL;
+	s->used = true;
+	return 0;
+}
+
+static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
+{
+	struct btf_str_ptrs *strs;
+	struct btf_str_ptr *s;
+
+	if (*str_off_ptr == 0)
+		return 0;
+
+	strs = ctx;
+	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+	if (!s)
+		return -EINVAL;
+	*str_off_ptr = s->new_off;
+	return 0;
+}
+
+/*
+ * Dedup string and filter out those that are not referenced from either .BTF
+ * or .BTF.ext (if provided) sections.
+ *
+ * This is done by building index of all strings in BTF's string section,
+ * then iterating over all entities that can reference strings (e.g., type
+ * names, struct field names, .BTF.ext line info, etc) and marking corresponding
+ * strings as used. After that all used strings are deduped and compacted into
+ * sequential blob of memory and new offsets are calculated. Then all the string
+ * references are iterated again and rewritten using new offsets.
+ */
+static int btf_dedup_strings(struct btf_dedup *d)
+{
+	char *start = d->btf->strs_data;
+	char *end = start + d->btf->hdr->str_len;
+	char *p = start, *tmp_strs = NULL;
+	struct btf_str_ptrs strs = {
+		.cnt = 0,
+		.cap = 0,
+		.ptrs = NULL,
+		.data = start,
+	};
+	int i, j, err = 0, grp_idx;
+	bool grp_used;
+
+	if (d->btf->strs_deduped)
+		return 0;
+
+	/* build index of all strings */
+	while (p < end) {
+		if (strs.cnt + 1 > strs.cap) {
+			struct btf_str_ptr *new_ptrs;
+
+			strs.cap += max(strs.cnt / 2, 16U);
+			new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
+			if (!new_ptrs) {
+				err = -ENOMEM;
+				goto done;
+			}
+			strs.ptrs = new_ptrs;
+		}
+
+		strs.ptrs[strs.cnt].str = p;
+		strs.ptrs[strs.cnt].used = false;
+
+		p += strlen(p) + 1;
+		strs.cnt++;
+	}
+
+	/* temporary storage for deduplicated strings */
+	tmp_strs = malloc(d->btf->hdr->str_len);
+	if (!tmp_strs) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	/* mark all used strings */
+	strs.ptrs[0].used = true;
+	err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
+	if (err)
+		goto done;
+
+	/* sort strings by context, so that we can identify duplicates */
+	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
+
+	/*
+	 * iterate groups of equal strings and if any instance in a group was
+	 * referenced, emit single instance and remember new offset
+	 */
+	p = tmp_strs;
+	grp_idx = 0;
+	grp_used = strs.ptrs[0].used;
+	/* iterate past end to avoid code duplication after loop */
+	for (i = 1; i <= strs.cnt; i++) {
+		/*
+		 * when i == strs.cnt, we want to skip string comparison and go
+		 * straight to handling last group of strings (otherwise we'd
+		 * need to handle last group after the loop w/ duplicated code)
+		 */
+		if (i < strs.cnt &&
+		    !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
+			grp_used = grp_used || strs.ptrs[i].used;
+			continue;
+		}
+
+		/*
+		 * this check would have been required after the loop to handle
+		 * last group of strings, but due to <= condition in a loop
+		 * we avoid that duplication
+		 */
+		if (grp_used) {
+			int new_off = p - tmp_strs;
+			__u32 len = strlen(strs.ptrs[grp_idx].str);
+
+			memmove(p, strs.ptrs[grp_idx].str, len + 1);
+			for (j = grp_idx; j < i; j++)
+				strs.ptrs[j].new_off = new_off;
+			p += len + 1;
+		}
+
+		if (i < strs.cnt) {
+			grp_idx = i;
+			grp_used = strs.ptrs[i].used;
+		}
+	}
+
+	/* replace original strings with deduped ones */
+	d->btf->hdr->str_len = p - tmp_strs;
+	memmove(start, tmp_strs, d->btf->hdr->str_len);
+	end = start + d->btf->hdr->str_len;
+
+	/* restore original order for further binary search lookups */
+	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
+
+	/* remap string offsets */
+	err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
+	if (err)
+		goto done;
+
+	d->btf->hdr->str_len = end - start;
+	d->btf->strs_deduped = true;
+
+done:
+	free(tmp_strs);
+	free(strs.ptrs);
+	return err;
+}
+
+static long btf_hash_common(struct btf_type *t)
+{
+	long h;
+
+	h = hash_combine(0, t->name_off);
+	h = hash_combine(h, t->info);
+	h = hash_combine(h, t->size);
+	return h;
+}
+
+static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
+{
+	return t1->name_off == t2->name_off &&
+	       t1->info == t2->info &&
+	       t1->size == t2->size;
+}
+
+/* Calculate type signature hash of INT. */
+static long btf_hash_int(struct btf_type *t)
+{
+	__u32 info = *(__u32 *)(t + 1);
+	long h;
+
+	h = btf_hash_common(t);
+	h = hash_combine(h, info);
+	return h;
+}
+
+/* Check structural equality of two INTs. */
+static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
+{
+	__u32 info1, info2;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+	info1 = *(__u32 *)(t1 + 1);
+	info2 = *(__u32 *)(t2 + 1);
+	return info1 == info2;
+}
+
+/* Calculate type signature hash of ENUM. */
+static long btf_hash_enum(struct btf_type *t)
+{
+	long h;
+
+	/* don't hash vlen and enum members to support enum fwd resolving */
+	h = hash_combine(0, t->name_off);
+	h = hash_combine(h, t->info & ~0xffff);
+	h = hash_combine(h, t->size);
+	return h;
+}
+
+/* Check structural equality of two ENUMs. */
+static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+{
+	const struct btf_enum *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	vlen = btf_vlen(t1);
+	m1 = btf_enum(t1);
+	m2 = btf_enum(t2);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off || m1->val != m2->val)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+static inline bool btf_is_enum_fwd(struct btf_type *t)
+{
+	return btf_is_enum(t) && btf_vlen(t) == 0;
+}
+
+static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
+{
+	if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+		return btf_equal_enum(t1, t2);
+	/* ignore vlen when comparing */
+	return t1->name_off == t2->name_off &&
+	       (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+	       t1->size == t2->size;
+}
+
+/*
+ * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static long btf_hash_struct(struct btf_type *t)
+{
+	const struct btf_member *member = btf_members(t);
+	__u32 vlen = btf_vlen(t);
+	long h = btf_hash_common(t);
+	int i;
+
+	for (i = 0; i < vlen; i++) {
+		h = hash_combine(h, member->name_off);
+		h = hash_combine(h, member->offset);
+		/* no hashing of referenced type ID, it can be unresolved yet */
+		member++;
+	}
+	return h;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
+{
+	const struct btf_member *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	vlen = btf_vlen(t1);
+	m1 = btf_members(t1);
+	m2 = btf_members(t2);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off || m1->offset != m2->offset)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+/*
+ * Calculate type signature hash of ARRAY, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static long btf_hash_array(struct btf_type *t)
+{
+	const struct btf_array *info = btf_array(t);
+	long h = btf_hash_common(t);
+
+	h = hash_combine(h, info->type);
+	h = hash_combine(h, info->index_type);
+	h = hash_combine(h, info->nelems);
+	return h;
+}
+
+/*
+ * Check exact equality of two ARRAYs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * ARRAY to potential canonical representative.
+ */
+static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
+{
+	const struct btf_array *info1, *info2;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	info1 = btf_array(t1);
+	info2 = btf_array(t2);
+	return info1->type == info2->type &&
+	       info1->index_type == info2->index_type &&
+	       info1->nelems == info2->nelems;
+}
+
+/*
+ * Check structural compatibility of two ARRAYs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
+{
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	return btf_array(t1)->nelems == btf_array(t2)->nelems;
+}
+
+/*
+ * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static long btf_hash_fnproto(struct btf_type *t)
+{
+	const struct btf_param *member = btf_params(t);
+	__u16 vlen = btf_vlen(t);
+	long h = btf_hash_common(t);
+	int i;
+
+	for (i = 0; i < vlen; i++) {
+		h = hash_combine(h, member->name_off);
+		h = hash_combine(h, member->type);
+		member++;
+	}
+	return h;
+}
+
+/*
+ * Check exact equality of two FUNC_PROTOs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * FUNC_PROTO to potential canonical representative.
+ */
+static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+	const struct btf_param *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	vlen = btf_vlen(t1);
+	m1 = btf_params(t1);
+	m2 = btf_params(t2);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off || m1->type != m2->type)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+	const struct btf_param *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	/* skip return type ID */
+	if (t1->name_off != t2->name_off || t1->info != t2->info)
+		return false;
+
+	vlen = btf_vlen(t1);
+	m1 = btf_params(t1);
+	m2 = btf_params(t2);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+/*
+ * Deduplicate primitive types, that can't reference other types, by calculating
+ * their type signature hash and comparing them with any possible canonical
+ * candidate. If no canonical candidate matches, type itself is marked as
+ * canonical and is added into `btf_dedup->dedup_table` as another candidate.
+ */
+static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct btf_type *t = btf_type_by_id(d->btf, type_id);
+	struct hashmap_entry *hash_entry;
+	struct btf_type *cand;
+	/* if we don't find equivalent type, then we are canonical */
+	__u32 new_id = type_id;
+	__u32 cand_id;
+	long h;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_ARRAY:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_FUNC:
+	case BTF_KIND_FUNC_PROTO:
+	case BTF_KIND_VAR:
+	case BTF_KIND_DATASEC:
+		return 0;
+
+	case BTF_KIND_INT:
+		h = btf_hash_int(t);
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = btf_type_by_id(d->btf, cand_id);
+			if (btf_equal_int(t, cand)) {
+				new_id = cand_id;
+				break;
+			}
+		}
+		break;
+
+	case BTF_KIND_ENUM:
+		h = btf_hash_enum(t);
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = btf_type_by_id(d->btf, cand_id);
+			if (btf_equal_enum(t, cand)) {
+				new_id = cand_id;
+				break;
+			}
+			if (d->opts.dont_resolve_fwds)
+				continue;
+			if (btf_compat_enum(t, cand)) {
+				if (btf_is_enum_fwd(t)) {
+					/* resolve fwd to full enum */
+					new_id = cand_id;
+					break;
+				}
+				/* resolve canonical enum fwd to full enum */
+				d->map[cand_id] = type_id;
+			}
+		}
+		break;
+
+	case BTF_KIND_FWD:
+		h = btf_hash_common(t);
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = btf_type_by_id(d->btf, cand_id);
+			if (btf_equal_common(t, cand)) {
+				new_id = cand_id;
+				break;
+			}
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	d->map[type_id] = new_id;
+	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int btf_dedup_prim_types(struct btf_dedup *d)
+{
+	int i, err;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		err = btf_dedup_prim_type(d, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/*
+ * Check whether type is already mapped into canonical one (could be to itself).
+ */
+static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
+{
+	return d->map[type_id] <= BTF_MAX_NR_TYPES;
+}
+
+/*
+ * Resolve type ID into its canonical type ID, if any; otherwise return original
+ * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
+ * STRUCT/UNION link and resolve it into canonical type ID as well.
+ */
+static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
+{
+	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+		type_id = d->map[type_id];
+	return type_id;
+}
+
+/*
+ * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
+ * type ID.
+ */
+static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
+{
+	__u32 orig_type_id = type_id;
+
+	if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
+		return type_id;
+
+	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+		type_id = d->map[type_id];
+
+	if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
+		return type_id;
+
+	return orig_type_id;
+}
+
+
+static inline __u16 btf_fwd_kind(struct btf_type *t)
+{
+	return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
+}
+
+/*
+ * Check equivalence of BTF type graph formed by candidate struct/union (we'll
+ * call it "candidate graph" in this description for brevity) to a type graph
+ * formed by (potential) canonical struct/union ("canonical graph" for brevity
+ * here, though keep in mind that not all types in canonical graph are
+ * necessarily canonical representatives themselves, some of them might be
+ * duplicates or its uniqueness might not have been established yet).
+ * Returns:
+ *  - >0, if type graphs are equivalent;
+ *  -  0, if not equivalent;
+ *  - <0, on error.
+ *
+ * Algorithm performs side-by-side DFS traversal of both type graphs and checks
+ * equivalence of BTF types at each step. If at any point BTF types in candidate
+ * and canonical graphs are not compatible structurally, whole graphs are
+ * incompatible. If types are structurally equivalent (i.e., all information
+ * except referenced type IDs is exactly the same), a mapping from `canon_id` to
+ * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
+ * If a type references other types, then those referenced types are checked
+ * for equivalence recursively.
+ *
+ * During DFS traversal, if we find that for current `canon_id` type we
+ * already have some mapping in hypothetical map, we check for two possible
+ * situations:
+ *   - `canon_id` is mapped to exactly the same type as `cand_id`. This will
+ *     happen when type graphs have cycles. In this case we assume those two
+ *     types are equivalent.
+ *   - `canon_id` is mapped to different type. This is contradiction in our
+ *     hypothetical mapping, because same graph in canonical graph corresponds
+ *     to two different types in candidate graph, which for equivalent type
+ *     graphs shouldn't happen. This condition terminates equivalence check
+ *     with negative result.
+ *
+ * If type graphs traversal exhausts types to check and find no contradiction,
+ * then type graphs are equivalent.
+ *
+ * When checking types for equivalence, there is one special case: FWD types.
+ * If FWD type resolution is allowed and one of the types (either from canonical
+ * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
+ * flag) and their names match, hypothetical mapping is updated to point from
+ * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
+ * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
+ *
+ * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
+ * if there are two exactly named (or anonymous) structs/unions that are
+ * compatible structurally, one of which has FWD field, while other is concrete
+ * STRUCT/UNION, but according to C sources they are different structs/unions
+ * that are referencing different types with the same name. This is extremely
+ * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
+ * this logic is causing problems.
+ *
+ * Doing FWD resolution means that both candidate and/or canonical graphs can
+ * consists of portions of the graph that come from multiple compilation units.
+ * This is due to the fact that types within single compilation unit are always
+ * deduplicated and FWDs are already resolved, if referenced struct/union
+ * definiton is available. So, if we had unresolved FWD and found corresponding
+ * STRUCT/UNION, they will be from different compilation units. This
+ * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
+ * type graph will likely have at least two different BTF types that describe
+ * same type (e.g., most probably there will be two different BTF types for the
+ * same 'int' primitive type) and could even have "overlapping" parts of type
+ * graph that describe same subset of types.
+ *
+ * This in turn means that our assumption that each type in canonical graph
+ * must correspond to exactly one type in candidate graph might not hold
+ * anymore and will make it harder to detect contradictions using hypothetical
+ * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
+ * resolution only in canonical graph. FWDs in candidate graphs are never
+ * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
+ * that can occur:
+ *   - Both types in canonical and candidate graphs are FWDs. If they are
+ *     structurally equivalent, then they can either be both resolved to the
+ *     same STRUCT/UNION or not resolved at all. In both cases they are
+ *     equivalent and there is no need to resolve FWD on candidate side.
+ *   - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
+ *     so nothing to resolve as well, algorithm will check equivalence anyway.
+ *   - Type in canonical graph is FWD, while type in candidate is concrete
+ *     STRUCT/UNION. In this case candidate graph comes from single compilation
+ *     unit, so there is exactly one BTF type for each unique C type. After
+ *     resolving FWD into STRUCT/UNION, there might be more than one BTF type
+ *     in canonical graph mapping to single BTF type in candidate graph, but
+ *     because hypothetical mapping maps from canonical to candidate types, it's
+ *     alright, and we still maintain the property of having single `canon_id`
+ *     mapping to single `cand_id` (there could be two different `canon_id`
+ *     mapped to the same `cand_id`, but it's not contradictory).
+ *   - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
+ *     graph is FWD. In this case we are just going to check compatibility of
+ *     STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
+ *     assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
+ *     a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
+ *     turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
+ *     canonical graph.
+ */
+static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
+			      __u32 canon_id)
+{
+	struct btf_type *cand_type;
+	struct btf_type *canon_type;
+	__u32 hypot_type_id;
+	__u16 cand_kind;
+	__u16 canon_kind;
+	int i, eq;
+
+	/* if both resolve to the same canonical, they must be equivalent */
+	if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
+		return 1;
+
+	canon_id = resolve_fwd_id(d, canon_id);
+
+	hypot_type_id = d->hypot_map[canon_id];
+	if (hypot_type_id <= BTF_MAX_NR_TYPES)
+		return hypot_type_id == cand_id;
+
+	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
+		return -ENOMEM;
+
+	cand_type = btf_type_by_id(d->btf, cand_id);
+	canon_type = btf_type_by_id(d->btf, canon_id);
+	cand_kind = btf_kind(cand_type);
+	canon_kind = btf_kind(canon_type);
+
+	if (cand_type->name_off != canon_type->name_off)
+		return 0;
+
+	/* FWD <--> STRUCT/UNION equivalence check, if enabled */
+	if (!d->opts.dont_resolve_fwds
+	    && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
+	    && cand_kind != canon_kind) {
+		__u16 real_kind;
+		__u16 fwd_kind;
+
+		if (cand_kind == BTF_KIND_FWD) {
+			real_kind = canon_kind;
+			fwd_kind = btf_fwd_kind(cand_type);
+		} else {
+			real_kind = cand_kind;
+			fwd_kind = btf_fwd_kind(canon_type);
+		}
+		return fwd_kind == real_kind;
+	}
+
+	if (cand_kind != canon_kind)
+		return 0;
+
+	switch (cand_kind) {
+	case BTF_KIND_INT:
+		return btf_equal_int(cand_type, canon_type);
+
+	case BTF_KIND_ENUM:
+		if (d->opts.dont_resolve_fwds)
+			return btf_equal_enum(cand_type, canon_type);
+		else
+			return btf_compat_enum(cand_type, canon_type);
+
+	case BTF_KIND_FWD:
+		return btf_equal_common(cand_type, canon_type);
+
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		if (cand_type->info != canon_type->info)
+			return 0;
+		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *cand_arr, *canon_arr;
+
+		if (!btf_compat_array(cand_type, canon_type))
+			return 0;
+		cand_arr = btf_array(cand_type);
+		canon_arr = btf_array(canon_type);
+		eq = btf_dedup_is_equiv(d,
+			cand_arr->index_type, canon_arr->index_type);
+		if (eq <= 0)
+			return eq;
+		return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
+	}
+
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *cand_m, *canon_m;
+		__u16 vlen;
+
+		if (!btf_shallow_equal_struct(cand_type, canon_type))
+			return 0;
+		vlen = btf_vlen(cand_type);
+		cand_m = btf_members(cand_type);
+		canon_m = btf_members(canon_type);
+		for (i = 0; i < vlen; i++) {
+			eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
+			if (eq <= 0)
+				return eq;
+			cand_m++;
+			canon_m++;
+		}
+
+		return 1;
+	}
+
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *cand_p, *canon_p;
+		__u16 vlen;
+
+		if (!btf_compat_fnproto(cand_type, canon_type))
+			return 0;
+		eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+		if (eq <= 0)
+			return eq;
+		vlen = btf_vlen(cand_type);
+		cand_p = btf_params(cand_type);
+		canon_p = btf_params(canon_type);
+		for (i = 0; i < vlen; i++) {
+			eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
+			if (eq <= 0)
+				return eq;
+			cand_p++;
+			canon_p++;
+		}
+		return 1;
+	}
+
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Use hypothetical mapping, produced by successful type graph equivalence
+ * check, to augment existing struct/union canonical mapping, where possible.
+ *
+ * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
+ * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
+ * it doesn't matter if FWD type was part of canonical graph or candidate one,
+ * we are recording the mapping anyway. As opposed to carefulness required
+ * for struct/union correspondence mapping (described below), for FWD resolution
+ * it's not important, as by the time that FWD type (reference type) will be
+ * deduplicated all structs/unions will be deduped already anyway.
+ *
+ * Recording STRUCT/UNION mapping is purely a performance optimization and is
+ * not required for correctness. It needs to be done carefully to ensure that
+ * struct/union from candidate's type graph is not mapped into corresponding
+ * struct/union from canonical type graph that itself hasn't been resolved into
+ * canonical representative. The only guarantee we have is that canonical
+ * struct/union was determined as canonical and that won't change. But any
+ * types referenced through that struct/union fields could have been not yet
+ * resolved, so in case like that it's too early to establish any kind of
+ * correspondence between structs/unions.
+ *
+ * No canonical correspondence is derived for primitive types (they are already
+ * deduplicated completely already anyway) or reference types (they rely on
+ * stability of struct/union canonical relationship for equivalence checks).
+ */
+static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
+{
+	__u32 cand_type_id, targ_type_id;
+	__u16 t_kind, c_kind;
+	__u32 t_id, c_id;
+	int i;
+
+	for (i = 0; i < d->hypot_cnt; i++) {
+		cand_type_id = d->hypot_list[i];
+		targ_type_id = d->hypot_map[cand_type_id];
+		t_id = resolve_type_id(d, targ_type_id);
+		c_id = resolve_type_id(d, cand_type_id);
+		t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
+		c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
+		/*
+		 * Resolve FWD into STRUCT/UNION.
+		 * It's ok to resolve FWD into STRUCT/UNION that's not yet
+		 * mapped to canonical representative (as opposed to
+		 * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
+		 * eventually that struct is going to be mapped and all resolved
+		 * FWDs will automatically resolve to correct canonical
+		 * representative. This will happen before ref type deduping,
+		 * which critically depends on stability of these mapping. This
+		 * stability is not a requirement for STRUCT/UNION equivalence
+		 * checks, though.
+		 */
+		if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
+			d->map[c_id] = t_id;
+		else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
+			d->map[t_id] = c_id;
+
+		if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
+		    c_kind != BTF_KIND_FWD &&
+		    is_type_mapped(d, c_id) &&
+		    !is_type_mapped(d, t_id)) {
+			/*
+			 * as a perf optimization, we can map struct/union
+			 * that's part of type graph we just verified for
+			 * equivalence. We can do that for struct/union that has
+			 * canonical representative only, though.
+			 */
+			d->map[t_id] = c_id;
+		}
+	}
+}
+
+/*
+ * Deduplicate struct/union types.
+ *
+ * For each struct/union type its type signature hash is calculated, taking
+ * into account type's name, size, number, order and names of fields, but
+ * ignoring type ID's referenced from fields, because they might not be deduped
+ * completely until after reference types deduplication phase. This type hash
+ * is used to iterate over all potential canonical types, sharing same hash.
+ * For each canonical candidate we check whether type graphs that they form
+ * (through referenced types in fields and so on) are equivalent using algorithm
+ * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
+ * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
+ * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
+ * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
+ * potentially map other structs/unions to their canonical representatives,
+ * if such relationship hasn't yet been established. This speeds up algorithm
+ * by eliminating some of the duplicate work.
+ *
+ * If no matching canonical representative was found, struct/union is marked
+ * as canonical for itself and is added into btf_dedup->dedup_table hash map
+ * for further look ups.
+ */
+static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct btf_type *cand_type, *t;
+	struct hashmap_entry *hash_entry;
+	/* if we don't find equivalent type, then we are canonical */
+	__u32 new_id = type_id;
+	__u16 kind;
+	long h;
+
+	/* already deduped or is in process of deduping (loop detected) */
+	if (d->map[type_id] <= BTF_MAX_NR_TYPES)
+		return 0;
+
+	t = btf_type_by_id(d->btf, type_id);
+	kind = btf_kind(t);
+
+	if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+		return 0;
+
+	h = btf_hash_struct(t);
+	for_each_dedup_cand(d, hash_entry, h) {
+		__u32 cand_id = (__u32)(long)hash_entry->value;
+		int eq;
+
+		/*
+		 * Even though btf_dedup_is_equiv() checks for
+		 * btf_shallow_equal_struct() internally when checking two
+		 * structs (unions) for equivalence, we need to guard here
+		 * from picking matching FWD type as a dedup candidate.
+		 * This can happen due to hash collision. In such case just
+		 * relying on btf_dedup_is_equiv() would lead to potentially
+		 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
+		 * FWD and compatible STRUCT/UNION are considered equivalent.
+		 */
+		cand_type = btf_type_by_id(d->btf, cand_id);
+		if (!btf_shallow_equal_struct(t, cand_type))
+			continue;
+
+		btf_dedup_clear_hypot_map(d);
+		eq = btf_dedup_is_equiv(d, type_id, cand_id);
+		if (eq < 0)
+			return eq;
+		if (!eq)
+			continue;
+		new_id = cand_id;
+		btf_dedup_merge_hypot_map(d);
+		break;
+	}
+
+	d->map[type_id] = new_id;
+	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int btf_dedup_struct_types(struct btf_dedup *d)
+{
+	int i, err;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		err = btf_dedup_struct_type(d, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/*
+ * Deduplicate reference type.
+ *
+ * Once all primitive and struct/union types got deduplicated, we can easily
+ * deduplicate all other (reference) BTF types. This is done in two steps:
+ *
+ * 1. Resolve all referenced type IDs into their canonical type IDs. This
+ * resolution can be done either immediately for primitive or struct/union types
+ * (because they were deduped in previous two phases) or recursively for
+ * reference types. Recursion will always terminate at either primitive or
+ * struct/union type, at which point we can "unwind" chain of reference types
+ * one by one. There is no danger of encountering cycles because in C type
+ * system the only way to form type cycle is through struct/union, so any chain
+ * of reference types, even those taking part in a type cycle, will inevitably
+ * reach struct/union at some point.
+ *
+ * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
+ * becomes "stable", in the sense that no further deduplication will cause
+ * any changes to it. With that, it's now possible to calculate type's signature
+ * hash (this time taking into account referenced type IDs) and loop over all
+ * potential canonical representatives. If no match was found, current type
+ * will become canonical representative of itself and will be added into
+ * btf_dedup->dedup_table as another possible canonical representative.
+ */
+static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct hashmap_entry *hash_entry;
+	__u32 new_id = type_id, cand_id;
+	struct btf_type *t, *cand;
+	/* if we don't find equivalent type, then we are representative type */
+	int ref_type_id;
+	long h;
+
+	if (d->map[type_id] == BTF_IN_PROGRESS_ID)
+		return -ELOOP;
+	if (d->map[type_id] <= BTF_MAX_NR_TYPES)
+		return resolve_type_id(d, type_id);
+
+	t = btf_type_by_id(d->btf, type_id);
+	d->map[type_id] = BTF_IN_PROGRESS_ID;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		ref_type_id = btf_dedup_ref_type(d, t->type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		t->type = ref_type_id;
+
+		h = btf_hash_common(t);
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = btf_type_by_id(d->btf, cand_id);
+			if (btf_equal_common(t, cand)) {
+				new_id = cand_id;
+				break;
+			}
+		}
+		break;
+
+	case BTF_KIND_ARRAY: {
+		struct btf_array *info = btf_array(t);
+
+		ref_type_id = btf_dedup_ref_type(d, info->type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		info->type = ref_type_id;
+
+		ref_type_id = btf_dedup_ref_type(d, info->index_type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		info->index_type = ref_type_id;
+
+		h = btf_hash_array(t);
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = btf_type_by_id(d->btf, cand_id);
+			if (btf_equal_array(t, cand)) {
+				new_id = cand_id;
+				break;
+			}
+		}
+		break;
+	}
+
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *param;
+		__u16 vlen;
+		int i;
+
+		ref_type_id = btf_dedup_ref_type(d, t->type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		t->type = ref_type_id;
+
+		vlen = btf_vlen(t);
+		param = btf_params(t);
+		for (i = 0; i < vlen; i++) {
+			ref_type_id = btf_dedup_ref_type(d, param->type);
+			if (ref_type_id < 0)
+				return ref_type_id;
+			param->type = ref_type_id;
+			param++;
+		}
+
+		h = btf_hash_fnproto(t);
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = btf_type_by_id(d->btf, cand_id);
+			if (btf_equal_fnproto(t, cand)) {
+				new_id = cand_id;
+				break;
+			}
+		}
+		break;
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	d->map[type_id] = new_id;
+	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+		return -ENOMEM;
+
+	return new_id;
+}
+
+static int btf_dedup_ref_types(struct btf_dedup *d)
+{
+	int i, err;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		err = btf_dedup_ref_type(d, i);
+		if (err < 0)
+			return err;
+	}
+	/* we won't need d->dedup_table anymore */
+	hashmap__free(d->dedup_table);
+	d->dedup_table = NULL;
+	return 0;
+}
+
+/*
+ * Compact types.
+ *
+ * After we established for each type its corresponding canonical representative
+ * type, we now can eliminate types that are not canonical and leave only
+ * canonical ones layed out sequentially in memory by copying them over
+ * duplicates. During compaction btf_dedup->hypot_map array is reused to store
+ * a map from original type ID to a new compacted type ID, which will be used
+ * during next phase to "fix up" type IDs, referenced from struct/union and
+ * reference types.
+ */
+static int btf_dedup_compact_types(struct btf_dedup *d)
+{
+	__u32 *new_offs;
+	__u32 next_type_id = 1;
+	void *p;
+	int i, len;
+
+	/* we are going to reuse hypot_map to store compaction remapping */
+	d->hypot_map[0] = 0;
+	for (i = 1; i <= d->btf->nr_types; i++)
+		d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+	p = d->btf->types_data;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		if (d->map[i] != i)
+			continue;
+
+		len = btf_type_size(btf__type_by_id(d->btf, i));
+		if (len < 0)
+			return len;
+
+		memmove(p, btf__type_by_id(d->btf, i), len);
+		d->hypot_map[i] = next_type_id;
+		d->btf->type_offs[next_type_id] = p - d->btf->types_data;
+		p += len;
+		next_type_id++;
+	}
+
+	/* shrink struct btf's internal types index and update btf_header */
+	d->btf->nr_types = next_type_id - 1;
+	d->btf->type_offs_cap = d->btf->nr_types + 1;
+	d->btf->hdr->type_len = p - d->btf->types_data;
+	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
+				       sizeof(*new_offs));
+	if (!new_offs)
+		return -ENOMEM;
+	d->btf->type_offs = new_offs;
+	d->btf->hdr->str_off = d->btf->hdr->type_len;
+	d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
+	return 0;
+}
+
+/*
+ * Figure out final (deduplicated and compacted) type ID for provided original
+ * `type_id` by first resolving it into corresponding canonical type ID and
+ * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
+ * which is populated during compaction phase.
+ */
+static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+{
+	__u32 resolved_type_id, new_type_id;
+
+	resolved_type_id = resolve_type_id(d, type_id);
+	new_type_id = d->hypot_map[resolved_type_id];
+	if (new_type_id > BTF_MAX_NR_TYPES)
+		return -EINVAL;
+	return new_type_id;
+}
+
+/*
+ * Remap referenced type IDs into deduped type IDs.
+ *
+ * After BTF types are deduplicated and compacted, their final type IDs may
+ * differ from original ones. The map from original to a corresponding
+ * deduped type ID is stored in btf_dedup->hypot_map and is populated during
+ * compaction phase. During remapping phase we are rewriting all type IDs
+ * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
+ * their final deduped type IDs.
+ */
+static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct btf_type *t = btf_type_by_id(d->btf, type_id);
+	int i, r;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_ENUM:
+		break;
+
+	case BTF_KIND_FWD:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+	case BTF_KIND_VAR:
+		r = btf_dedup_remap_type_id(d, t->type);
+		if (r < 0)
+			return r;
+		t->type = r;
+		break;
+
+	case BTF_KIND_ARRAY: {
+		struct btf_array *arr_info = btf_array(t);
+
+		r = btf_dedup_remap_type_id(d, arr_info->type);
+		if (r < 0)
+			return r;
+		arr_info->type = r;
+		r = btf_dedup_remap_type_id(d, arr_info->index_type);
+		if (r < 0)
+			return r;
+		arr_info->index_type = r;
+		break;
+	}
+
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		struct btf_member *member = btf_members(t);
+		__u16 vlen = btf_vlen(t);
+
+		for (i = 0; i < vlen; i++) {
+			r = btf_dedup_remap_type_id(d, member->type);
+			if (r < 0)
+				return r;
+			member->type = r;
+			member++;
+		}
+		break;
+	}
+
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *param = btf_params(t);
+		__u16 vlen = btf_vlen(t);
+
+		r = btf_dedup_remap_type_id(d, t->type);
+		if (r < 0)
+			return r;
+		t->type = r;
+
+		for (i = 0; i < vlen; i++) {
+			r = btf_dedup_remap_type_id(d, param->type);
+			if (r < 0)
+				return r;
+			param->type = r;
+			param++;
+		}
+		break;
+	}
+
+	case BTF_KIND_DATASEC: {
+		struct btf_var_secinfo *var = btf_var_secinfos(t);
+		__u16 vlen = btf_vlen(t);
+
+		for (i = 0; i < vlen; i++) {
+			r = btf_dedup_remap_type_id(d, var->type);
+			if (r < 0)
+				return r;
+			var->type = r;
+			var++;
+		}
+		break;
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_dedup_remap_types(struct btf_dedup *d)
+{
+	int i, r;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		r = btf_dedup_remap_type(d, i);
+		if (r < 0)
+			return r;
+	}
+	return 0;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+struct btf *libbpf_find_kernel_btf(void)
+{
+	struct {
+		const char *path_fmt;
+		bool raw_btf;
+	} locations[] = {
+		/* try canonical vmlinux BTF through sysfs first */
+		{ "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
+		/* fall back to trying to find vmlinux ELF on disk otherwise */
+		{ "/boot/vmlinux-%1$s" },
+		{ "/lib/modules/%1$s/vmlinux-%1$s" },
+		{ "/lib/modules/%1$s/build/vmlinux" },
+		{ "/usr/lib/modules/%1$s/kernel/vmlinux" },
+		{ "/usr/lib/debug/boot/vmlinux-%1$s" },
+		{ "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
+		{ "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+	};
+	char path[PATH_MAX + 1];
+	struct utsname buf;
+	struct btf *btf;
+	int i;
+
+	uname(&buf);
+
+	for (i = 0; i < ARRAY_SIZE(locations); i++) {
+		snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+
+		if (access(path, R_OK))
+			continue;
+
+		if (locations[i].raw_btf)
+			btf = btf__parse_raw(path);
+		else
+			btf = btf__parse_elf(path, NULL);
+
+		pr_debug("loading kernel BTF '%s': %ld\n",
+			 path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
+		if (IS_ERR(btf))
+			continue;
+
+		return btf;
+	}
+
+	pr_warn("failed to find valid kernel BTF\n");
+	return ERR_PTR(-ESRCH);
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
new file mode 100644
index 000000000..9cabc8b62
--- /dev/null
+++ b/tools/lib/bpf/btf.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __LIBBPF_BTF_H
+#define __LIBBPF_BTF_H
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BTF_ELF_SEC ".BTF"
+#define BTF_EXT_ELF_SEC ".BTF.ext"
+#define MAPS_ELF_SEC ".maps"
+
+struct btf;
+struct btf_ext;
+struct btf_type;
+
+struct bpf_object;
+
+enum btf_endianness {
+	BTF_LITTLE_ENDIAN = 0,
+	BTF_BIG_ENDIAN = 1,
+};
+
+LIBBPF_API void btf__free(struct btf *btf);
+LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+LIBBPF_API struct btf *btf__new_empty(void);
+LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_raw(const char *path);
+LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
+LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
+				   const char *type_name);
+LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
+					const char *type_name, __u32 kind);
+LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
+LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
+						  __u32 id);
+LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
+LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz);
+LIBBPF_API enum btf_endianness btf__endianness(const struct btf *btf);
+LIBBPF_API int btf__set_endianness(struct btf *btf, enum btf_endianness endian);
+LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
+LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+				    __u32 expected_key_size,
+				    __u32 expected_value_size,
+				    __u32 *key_type_id, __u32 *value_type_id);
+
+LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
+LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
+					     __u32 *size);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_func_info(const struct btf *btf,
+			     const struct btf_ext *btf_ext,
+			     const char *sec_name, __u32 insns_cnt,
+			     void **func_info, __u32 *cnt);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_line_info(const struct btf *btf,
+			     const struct btf_ext *btf_ext,
+			     const char *sec_name, __u32 insns_cnt,
+			     void **line_info, __u32 *cnt);
+LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
+
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+
+LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_array(struct btf *btf,
+			      int index_type_id, int elem_type_id, __u32 nr_elems);
+/* struct/union construction APIs */
+LIBBPF_API int btf__add_struct(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_union(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_id,
+			      __u32 bit_offset, __u32 bit_size);
+
+/* enum construction APIs */
+LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
+LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+
+enum btf_fwd_kind {
+	BTF_FWD_STRUCT = 0,
+	BTF_FWD_UNION = 1,
+	BTF_FWD_ENUM = 2,
+};
+
+LIBBPF_API int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind);
+LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id);
+LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
+
+/* func and func_proto construction APIs */
+LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
+			     enum btf_func_linkage linkage, int proto_type_id);
+LIBBPF_API int btf__add_func_proto(struct btf *btf, int ret_type_id);
+LIBBPF_API int btf__add_func_param(struct btf *btf, const char *name, int type_id);
+
+/* var & datasec construction APIs */
+LIBBPF_API int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id);
+LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz);
+LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
+					 __u32 offset, __u32 byte_sz);
+
+struct btf_dedup_opts {
+	unsigned int dedup_table_size;
+	bool dont_resolve_fwds;
+};
+
+LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+			  const struct btf_dedup_opts *opts);
+
+struct btf_dump;
+
+struct btf_dump_opts {
+	void *ctx;
+};
+
+typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
+
+LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
+					  const struct btf_ext *btf_ext,
+					  const struct btf_dump_opts *opts,
+					  btf_dump_printf_fn_t printf_fn);
+LIBBPF_API void btf_dump__free(struct btf_dump *d);
+
+LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
+
+struct btf_dump_emit_type_decl_opts {
+	/* size of this struct, for forward/backward compatiblity */
+	size_t sz;
+	/* optional field name for type declaration, e.g.:
+	 * - struct my_struct <FNAME>
+	 * - void (*<FNAME>)(int)
+	 * - char (*<FNAME>)[123]
+	 */
+	const char *field_name;
+	/* extra indentation level (in number of tabs) to emit for multi-line
+	 * type declarations (e.g., anonymous struct); applies for lines
+	 * starting from the second one (first line is assumed to have
+	 * necessary indentation already
+	 */
+	int indent_level;
+	/* strip all the const/volatile/restrict mods */
+	bool strip_mods;
+	size_t :0;
+};
+#define btf_dump_emit_type_decl_opts__last_field strip_mods
+
+LIBBPF_API int
+btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
+			 const struct btf_dump_emit_type_decl_opts *opts);
+
+/*
+ * A set of helpers for easier BTF types handling
+ */
+static inline __u16 btf_kind(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info);
+}
+
+static inline __u16 btf_vlen(const struct btf_type *t)
+{
+	return BTF_INFO_VLEN(t->info);
+}
+
+static inline bool btf_kflag(const struct btf_type *t)
+{
+	return BTF_INFO_KFLAG(t->info);
+}
+
+static inline bool btf_is_void(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_UNKN;
+}
+
+static inline bool btf_is_int(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_INT;
+}
+
+static inline bool btf_is_ptr(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_PTR;
+}
+
+static inline bool btf_is_array(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_ARRAY;
+}
+
+static inline bool btf_is_struct(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_is_union(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_composite(const struct btf_type *t)
+{
+	__u16 kind = btf_kind(t);
+
+	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_enum(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_ENUM;
+}
+
+static inline bool btf_is_fwd(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FWD;
+}
+
+static inline bool btf_is_typedef(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_TYPEDEF;
+}
+
+static inline bool btf_is_volatile(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_VOLATILE;
+}
+
+static inline bool btf_is_const(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_CONST;
+}
+
+static inline bool btf_is_restrict(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_mod(const struct btf_type *t)
+{
+	__u16 kind = btf_kind(t);
+
+	return kind == BTF_KIND_VOLATILE ||
+	       kind == BTF_KIND_CONST ||
+	       kind == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_func(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FUNC;
+}
+
+static inline bool btf_is_func_proto(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FUNC_PROTO;
+}
+
+static inline bool btf_is_var(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_VAR;
+}
+
+static inline bool btf_is_datasec(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_DATASEC;
+}
+
+static inline __u8 btf_int_encoding(const struct btf_type *t)
+{
+	return BTF_INT_ENCODING(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_offset(const struct btf_type *t)
+{
+	return BTF_INT_OFFSET(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_bits(const struct btf_type *t)
+{
+	return BTF_INT_BITS(*(__u32 *)(t + 1));
+}
+
+static inline struct btf_array *btf_array(const struct btf_type *t)
+{
+	return (struct btf_array *)(t + 1);
+}
+
+static inline struct btf_enum *btf_enum(const struct btf_type *t)
+{
+	return (struct btf_enum *)(t + 1);
+}
+
+static inline struct btf_member *btf_members(const struct btf_type *t)
+{
+	return (struct btf_member *)(t + 1);
+}
+
+/* Get bit offset of a member with specified index. */
+static inline __u32 btf_member_bit_offset(const struct btf_type *t,
+					  __u32 member_idx)
+{
+	const struct btf_member *m = btf_members(t) + member_idx;
+	bool kflag = btf_kflag(t);
+
+	return kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
+}
+/*
+ * Get bitfield size of a member, assuming t is BTF_KIND_STRUCT or
+ * BTF_KIND_UNION. If member is not a bitfield, zero is returned.
+ */
+static inline __u32 btf_member_bitfield_size(const struct btf_type *t,
+					     __u32 member_idx)
+{
+	const struct btf_member *m = btf_members(t) + member_idx;
+	bool kflag = btf_kflag(t);
+
+	return kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+}
+
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+	return (struct btf_param *)(t + 1);
+}
+
+static inline struct btf_var *btf_var(const struct btf_type *t)
+{
+	return (struct btf_var *)(t + 1);
+}
+
+static inline struct btf_var_secinfo *
+btf_var_secinfos(const struct btf_type *t)
+{
+	return (struct btf_var_secinfo *)(t + 1);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_BTF_H */
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
new file mode 100644
index 000000000..61aa2c47f
--- /dev/null
+++ b/tools/lib/bpf/btf_dump.c
@@ -0,0 +1,1531 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C type converter.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <linux/kernel.h>
+#include "btf.h"
+#include "hashmap.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
+static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
+
+static const char *pfx(int lvl)
+{
+	return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl];
+}
+
+enum btf_dump_type_order_state {
+	NOT_ORDERED,
+	ORDERING,
+	ORDERED,
+};
+
+enum btf_dump_type_emit_state {
+	NOT_EMITTED,
+	EMITTING,
+	EMITTED,
+};
+
+/* per-type auxiliary state */
+struct btf_dump_type_aux_state {
+	/* topological sorting state */
+	enum btf_dump_type_order_state order_state: 2;
+	/* emitting state used to determine the need for forward declaration */
+	enum btf_dump_type_emit_state emit_state: 2;
+	/* whether forward declaration was already emitted */
+	__u8 fwd_emitted: 1;
+	/* whether unique non-duplicate name was already assigned */
+	__u8 name_resolved: 1;
+	/* whether type is referenced from any other type */
+	__u8 referenced: 1;
+};
+
+struct btf_dump {
+	const struct btf *btf;
+	const struct btf_ext *btf_ext;
+	btf_dump_printf_fn_t printf_fn;
+	struct btf_dump_opts opts;
+	int ptr_sz;
+	bool strip_mods;
+	int last_id;
+
+	/* per-type auxiliary state */
+	struct btf_dump_type_aux_state *type_states;
+	size_t type_states_cap;
+	/* per-type optional cached unique name, must be freed, if present */
+	const char **cached_names;
+	size_t cached_names_cap;
+
+	/* topo-sorted list of dependent type definitions */
+	__u32 *emit_queue;
+	int emit_queue_cap;
+	int emit_queue_cnt;
+
+	/*
+	 * stack of type declarations (e.g., chain of modifiers, arrays,
+	 * funcs, etc)
+	 */
+	__u32 *decl_stack;
+	int decl_stack_cap;
+	int decl_stack_cnt;
+
+	/* maps struct/union/enum name to a number of name occurrences */
+	struct hashmap *type_names;
+	/*
+	 * maps typedef identifiers and enum value names to a number of such
+	 * name occurrences
+	 */
+	struct hashmap *ident_names;
+};
+
+static size_t str_hash_fn(const void *key, void *ctx)
+{
+	return str_hash(key);
+}
+
+static bool str_equal_fn(const void *a, const void *b, void *ctx)
+{
+	return strcmp(a, b) == 0;
+}
+
+static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
+{
+	return btf__name_by_offset(d->btf, name_off);
+}
+
+static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	d->printf_fn(d->opts.ctx, fmt, args);
+	va_end(args);
+}
+
+static int btf_dump_mark_referenced(struct btf_dump *d);
+static int btf_dump_resize(struct btf_dump *d);
+
+struct btf_dump *btf_dump__new(const struct btf *btf,
+			       const struct btf_ext *btf_ext,
+			       const struct btf_dump_opts *opts,
+			       btf_dump_printf_fn_t printf_fn)
+{
+	struct btf_dump *d;
+	int err;
+
+	d = calloc(1, sizeof(struct btf_dump));
+	if (!d)
+		return ERR_PTR(-ENOMEM);
+
+	d->btf = btf;
+	d->btf_ext = btf_ext;
+	d->printf_fn = printf_fn;
+	d->opts.ctx = opts ? opts->ctx : NULL;
+	d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);
+
+	d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (IS_ERR(d->type_names)) {
+		err = PTR_ERR(d->type_names);
+		d->type_names = NULL;
+		goto err;
+	}
+	d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (IS_ERR(d->ident_names)) {
+		err = PTR_ERR(d->ident_names);
+		d->ident_names = NULL;
+		goto err;
+	}
+
+	err = btf_dump_resize(d);
+	if (err)
+		goto err;
+
+	return d;
+err:
+	btf_dump__free(d);
+	return ERR_PTR(err);
+}
+
+static int btf_dump_resize(struct btf_dump *d)
+{
+	int err, last_id = btf__get_nr_types(d->btf);
+
+	if (last_id <= d->last_id)
+		return 0;
+
+	if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+			   sizeof(*d->type_states), last_id + 1))
+		return -ENOMEM;
+	if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+			   sizeof(*d->cached_names), last_id + 1))
+		return -ENOMEM;
+
+	if (d->last_id == 0) {
+		/* VOID is special */
+		d->type_states[0].order_state = ORDERED;
+		d->type_states[0].emit_state = EMITTED;
+	}
+
+	/* eagerly determine referenced types for anon enums */
+	err = btf_dump_mark_referenced(d);
+	if (err)
+		return err;
+
+	d->last_id = last_id;
+	return 0;
+}
+
+static void btf_dump_free_names(struct hashmap *map)
+{
+	size_t bkt;
+	struct hashmap_entry *cur;
+
+	hashmap__for_each_entry(map, cur, bkt)
+		free((void *)cur->key);
+
+	hashmap__free(map);
+}
+
+void btf_dump__free(struct btf_dump *d)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(d))
+		return;
+
+	free(d->type_states);
+	if (d->cached_names) {
+		/* any set cached name is owned by us and should be freed */
+		for (i = 0; i <= d->last_id; i++) {
+			if (d->cached_names[i])
+				free((void *)d->cached_names[i]);
+		}
+	}
+	free(d->cached_names);
+	free(d->emit_queue);
+	free(d->decl_stack);
+	btf_dump_free_names(d->type_names);
+	btf_dump_free_names(d->ident_names);
+
+	free(d);
+}
+
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
+
+/*
+ * Dump BTF type in a compilable C syntax, including all the necessary
+ * dependent types, necessary for compilation. If some of the dependent types
+ * were already emitted as part of previous btf_dump__dump_type() invocation
+ * for another type, they won't be emitted again. This API allows callers to
+ * filter out BTF types according to user-defined criterias and emitted only
+ * minimal subset of types, necessary to compile everything. Full struct/union
+ * definitions will still be emitted, even if the only usage is through
+ * pointer and could be satisfied with just a forward declaration.
+ *
+ * Dumping is done in two high-level passes:
+ *   1. Topologically sort type definitions to satisfy C rules of compilation.
+ *   2. Emit type definitions in C syntax.
+ *
+ * Returns 0 on success; <0, otherwise.
+ */
+int btf_dump__dump_type(struct btf_dump *d, __u32 id)
+{
+	int err, i;
+
+	if (id > btf__get_nr_types(d->btf))
+		return -EINVAL;
+
+	err = btf_dump_resize(d);
+	if (err)
+		return err;
+
+	d->emit_queue_cnt = 0;
+	err = btf_dump_order_type(d, id, false);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < d->emit_queue_cnt; i++)
+		btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
+
+	return 0;
+}
+
+/*
+ * Mark all types that are referenced from any other type. This is used to
+ * determine top-level anonymous enums that need to be emitted as an
+ * independent type declarations.
+ * Anonymous enums come in two flavors: either embedded in a struct's field
+ * definition, in which case they have to be declared inline as part of field
+ * type declaration; or as a top-level anonymous enum, typically used for
+ * declaring global constants. It's impossible to distinguish between two
+ * without knowning whether given enum type was referenced from other type:
+ * top-level anonymous enum won't be referenced by anything, while embedded
+ * one will.
+ */
+static int btf_dump_mark_referenced(struct btf_dump *d)
+{
+	int i, j, n = btf__get_nr_types(d->btf);
+	const struct btf_type *t;
+	__u16 vlen;
+
+	for (i = d->last_id + 1; i <= n; i++) {
+		t = btf__type_by_id(d->btf, i);
+		vlen = btf_vlen(t);
+
+		switch (btf_kind(t)) {
+		case BTF_KIND_INT:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_FWD:
+			break;
+
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_PTR:
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_FUNC:
+		case BTF_KIND_VAR:
+			d->type_states[t->type].referenced = 1;
+			break;
+
+		case BTF_KIND_ARRAY: {
+			const struct btf_array *a = btf_array(t);
+
+			d->type_states[a->index_type].referenced = 1;
+			d->type_states[a->type].referenced = 1;
+			break;
+		}
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION: {
+			const struct btf_member *m = btf_members(t);
+
+			for (j = 0; j < vlen; j++, m++)
+				d->type_states[m->type].referenced = 1;
+			break;
+		}
+		case BTF_KIND_FUNC_PROTO: {
+			const struct btf_param *p = btf_params(t);
+
+			for (j = 0; j < vlen; j++, p++)
+				d->type_states[p->type].referenced = 1;
+			break;
+		}
+		case BTF_KIND_DATASEC: {
+			const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+			for (j = 0; j < vlen; j++, v++)
+				d->type_states[v->type].referenced = 1;
+			break;
+		}
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
+{
+	__u32 *new_queue;
+	size_t new_cap;
+
+	if (d->emit_queue_cnt >= d->emit_queue_cap) {
+		new_cap = max(16, d->emit_queue_cap * 3 / 2);
+		new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0]));
+		if (!new_queue)
+			return -ENOMEM;
+		d->emit_queue = new_queue;
+		d->emit_queue_cap = new_cap;
+	}
+
+	d->emit_queue[d->emit_queue_cnt++] = id;
+	return 0;
+}
+
+/*
+ * Determine order of emitting dependent types and specified type to satisfy
+ * C compilation rules.  This is done through topological sorting with an
+ * additional complication which comes from C rules. The main idea for C is
+ * that if some type is "embedded" into a struct/union, it's size needs to be
+ * known at the time of definition of containing type. E.g., for:
+ *
+ *	struct A {};
+ *	struct B { struct A x; }
+ *
+ * struct A *HAS* to be defined before struct B, because it's "embedded",
+ * i.e., it is part of struct B layout. But in the following case:
+ *
+ *	struct A;
+ *	struct B { struct A *x; }
+ *	struct A {};
+ *
+ * it's enough to just have a forward declaration of struct A at the time of
+ * struct B definition, as struct B has a pointer to struct A, so the size of
+ * field x is known without knowing struct A size: it's sizeof(void *).
+ *
+ * Unfortunately, there are some trickier cases we need to handle, e.g.:
+ *
+ *	struct A {}; // if this was forward-declaration: compilation error
+ *	struct B {
+ *		struct { // anonymous struct
+ *			struct A y;
+ *		} *x;
+ *	};
+ *
+ * In this case, struct B's field x is a pointer, so it's size is known
+ * regardless of the size of (anonymous) struct it points to. But because this
+ * struct is anonymous and thus defined inline inside struct B, *and* it
+ * embeds struct A, compiler requires full definition of struct A to be known
+ * before struct B can be defined. This creates a transitive dependency
+ * between struct A and struct B. If struct A was forward-declared before
+ * struct B definition and fully defined after struct B definition, that would
+ * trigger compilation error.
+ *
+ * All this means that while we are doing topological sorting on BTF type
+ * graph, we need to determine relationships between different types (graph
+ * nodes):
+ *   - weak link (relationship) between X and Y, if Y *CAN* be
+ *   forward-declared at the point of X definition;
+ *   - strong link, if Y *HAS* to be fully-defined before X can be defined.
+ *
+ * The rule is as follows. Given a chain of BTF types from X to Y, if there is
+ * BTF_KIND_PTR type in the chain and at least one non-anonymous type
+ * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
+ * Weak/strong relationship is determined recursively during DFS traversal and
+ * is returned as a result from btf_dump_order_type().
+ *
+ * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
+ * but it is not guaranteeing that no extraneous forward declarations will be
+ * emitted.
+ *
+ * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
+ * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
+ * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
+ * entire graph path, so depending where from one came to that BTF type, it
+ * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
+ * once they are processed, there is no need to do it again, so they are
+ * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
+ * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
+ * in any case, once those are processed, no need to do it again, as the
+ * result won't change.
+ *
+ * Returns:
+ *   - 1, if type is part of strong link (so there is strong topological
+ *   ordering requirements);
+ *   - 0, if type is part of weak link (so can be satisfied through forward
+ *   declaration);
+ *   - <0, on error (e.g., unsatisfiable type loop detected).
+ */
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
+{
+	/*
+	 * Order state is used to detect strong link cycles, but only for BTF
+	 * kinds that are or could be an independent definition (i.e.,
+	 * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays,
+	 * func_protos, modifiers are just means to get to these definitions.
+	 * Int/void don't need definitions, they are assumed to be always
+	 * properly defined.  We also ignore datasec, var, and funcs for now.
+	 * So for all non-defining kinds, we never even set ordering state,
+	 * for defining kinds we set ORDERING and subsequently ORDERED if it
+	 * forms a strong link.
+	 */
+	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+	const struct btf_type *t;
+	__u16 vlen;
+	int err, i;
+
+	/* return true, letting typedefs know that it's ok to be emitted */
+	if (tstate->order_state == ORDERED)
+		return 1;
+
+	t = btf__type_by_id(d->btf, id);
+
+	if (tstate->order_state == ORDERING) {
+		/* type loop, but resolvable through fwd declaration */
+		if (btf_is_composite(t) && through_ptr && t->name_off != 0)
+			return 0;
+		pr_warn("unsatisfiable type cycle, id:[%u]\n", id);
+		return -ELOOP;
+	}
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_INT:
+		tstate->order_state = ORDERED;
+		return 0;
+
+	case BTF_KIND_PTR:
+		err = btf_dump_order_type(d, t->type, true);
+		tstate->order_state = ORDERED;
+		return err;
+
+	case BTF_KIND_ARRAY:
+		return btf_dump_order_type(d, btf_array(t)->type, false);
+
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = btf_members(t);
+		/*
+		 * struct/union is part of strong link, only if it's embedded
+		 * (so no ptr in a path) or it's anonymous (so has to be
+		 * defined inline, even if declared through ptr)
+		 */
+		if (through_ptr && t->name_off != 0)
+			return 0;
+
+		tstate->order_state = ORDERING;
+
+		vlen = btf_vlen(t);
+		for (i = 0; i < vlen; i++, m++) {
+			err = btf_dump_order_type(d, m->type, false);
+			if (err < 0)
+				return err;
+		}
+
+		if (t->name_off != 0) {
+			err = btf_dump_add_emit_queue_id(d, id);
+			if (err < 0)
+				return err;
+		}
+
+		tstate->order_state = ORDERED;
+		return 1;
+	}
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+		/*
+		 * non-anonymous or non-referenced enums are top-level
+		 * declarations and should be emitted. Same logic can be
+		 * applied to FWDs, it won't hurt anyways.
+		 */
+		if (t->name_off != 0 || !tstate->referenced) {
+			err = btf_dump_add_emit_queue_id(d, id);
+			if (err)
+				return err;
+		}
+		tstate->order_state = ORDERED;
+		return 1;
+
+	case BTF_KIND_TYPEDEF: {
+		int is_strong;
+
+		is_strong = btf_dump_order_type(d, t->type, through_ptr);
+		if (is_strong < 0)
+			return is_strong;
+
+		/* typedef is similar to struct/union w.r.t. fwd-decls */
+		if (through_ptr && !is_strong)
+			return 0;
+
+		/* typedef is always a named definition */
+		err = btf_dump_add_emit_queue_id(d, id);
+		if (err)
+			return err;
+
+		d->type_states[id].order_state = ORDERED;
+		return 1;
+	}
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return btf_dump_order_type(d, t->type, through_ptr);
+
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = btf_params(t);
+		bool is_strong;
+
+		err = btf_dump_order_type(d, t->type, through_ptr);
+		if (err < 0)
+			return err;
+		is_strong = err > 0;
+
+		vlen = btf_vlen(t);
+		for (i = 0; i < vlen; i++, p++) {
+			err = btf_dump_order_type(d, p->type, through_ptr);
+			if (err < 0)
+				return err;
+			if (err > 0)
+				is_strong = true;
+		}
+		return is_strong;
+	}
+	case BTF_KIND_FUNC:
+	case BTF_KIND_VAR:
+	case BTF_KIND_DATASEC:
+		d->type_states[id].order_state = ORDERED;
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+					  const struct btf_type *t);
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t);
+static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t);
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+				  const struct btf_type *t);
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+				      const struct btf_type *t, int lvl);
+
+/* a local view into a shared stack */
+struct id_stack {
+	const __u32 *ids;
+	int cnt;
+};
+
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+				    const char *fname, int lvl);
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+				     struct id_stack *decl_stack,
+				     const char *fname, int lvl);
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id);
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+				 const char *orig_name);
+
+static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(d->btf, id);
+
+	/* __builtin_va_list is a compiler built-in, which causes compilation
+	 * errors, when compiling w/ different compiler, then used to compile
+	 * original code (e.g., GCC to compile kernel, Clang to use generated
+	 * C header from BTF). As it is built-in, it should be already defined
+	 * properly internally in compiler.
+	 */
+	if (t->name_off == 0)
+		return false;
+	return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
+}
+
+/*
+ * Emit C-syntax definitions of types from chains of BTF types.
+ *
+ * High-level handling of determining necessary forward declarations are handled
+ * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
+ * declarations/definitions in C syntax  are handled by a combo of
+ * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
+ * corresponding btf_dump_emit_*_{def,fwd}() functions.
+ *
+ * We also keep track of "containing struct/union type ID" to determine when
+ * we reference it from inside and thus can avoid emitting unnecessary forward
+ * declaration.
+ *
+ * This algorithm is designed in such a way, that even if some error occurs
+ * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
+ * that doesn't comply to C rules completely), algorithm will try to proceed
+ * and produce as much meaningful output as possible.
+ */
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
+{
+	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+	bool top_level_def = cont_id == 0;
+	const struct btf_type *t;
+	__u16 kind;
+
+	if (tstate->emit_state == EMITTED)
+		return;
+
+	t = btf__type_by_id(d->btf, id);
+	kind = btf_kind(t);
+
+	if (tstate->emit_state == EMITTING) {
+		if (tstate->fwd_emitted)
+			return;
+
+		switch (kind) {
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			/*
+			 * if we are referencing a struct/union that we are
+			 * part of - then no need for fwd declaration
+			 */
+			if (id == cont_id)
+				return;
+			if (t->name_off == 0) {
+				pr_warn("anonymous struct/union loop, id:[%u]\n",
+					id);
+				return;
+			}
+			btf_dump_emit_struct_fwd(d, id, t);
+			btf_dump_printf(d, ";\n\n");
+			tstate->fwd_emitted = 1;
+			break;
+		case BTF_KIND_TYPEDEF:
+			/*
+			 * for typedef fwd_emitted means typedef definition
+			 * was emitted, but it can be used only for "weak"
+			 * references through pointer only, not for embedding
+			 */
+			if (!btf_dump_is_blacklisted(d, id)) {
+				btf_dump_emit_typedef_def(d, id, t, 0);
+				btf_dump_printf(d, ";\n\n");
+			}
+			tstate->fwd_emitted = 1;
+			break;
+		default:
+			break;
+		}
+
+		return;
+	}
+
+	switch (kind) {
+	case BTF_KIND_INT:
+		/* Emit type alias definitions if necessary */
+		btf_dump_emit_missing_aliases(d, id, t);
+
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_ENUM:
+		if (top_level_def) {
+			btf_dump_emit_enum_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+		}
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_PTR:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		btf_dump_emit_type(d, t->type, cont_id);
+		break;
+	case BTF_KIND_ARRAY:
+		btf_dump_emit_type(d, btf_array(t)->type, cont_id);
+		break;
+	case BTF_KIND_FWD:
+		btf_dump_emit_fwd_def(d, id, t);
+		btf_dump_printf(d, ";\n\n");
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_TYPEDEF:
+		tstate->emit_state = EMITTING;
+		btf_dump_emit_type(d, t->type, id);
+		/*
+		 * typedef can server as both definition and forward
+		 * declaration; at this stage someone depends on
+		 * typedef as a forward declaration (refers to it
+		 * through pointer), so unless we already did it,
+		 * emit typedef as a forward declaration
+		 */
+		if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
+			btf_dump_emit_typedef_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+		}
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		tstate->emit_state = EMITTING;
+		/* if it's a top-level struct/union definition or struct/union
+		 * is anonymous, then in C we'll be emitting all fields and
+		 * their types (as opposed to just `struct X`), so we need to
+		 * make sure that all types, referenced from struct/union
+		 * members have necessary forward-declarations, where
+		 * applicable
+		 */
+		if (top_level_def || t->name_off == 0) {
+			const struct btf_member *m = btf_members(t);
+			__u16 vlen = btf_vlen(t);
+			int i, new_cont_id;
+
+			new_cont_id = t->name_off == 0 ? cont_id : id;
+			for (i = 0; i < vlen; i++, m++)
+				btf_dump_emit_type(d, m->type, new_cont_id);
+		} else if (!tstate->fwd_emitted && id != cont_id) {
+			btf_dump_emit_struct_fwd(d, id, t);
+			btf_dump_printf(d, ";\n\n");
+			tstate->fwd_emitted = 1;
+		}
+
+		if (top_level_def) {
+			btf_dump_emit_struct_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+			tstate->emit_state = EMITTED;
+		} else {
+			tstate->emit_state = NOT_EMITTED;
+		}
+		break;
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = btf_params(t);
+		__u16 vlen = btf_vlen(t);
+		int i;
+
+		btf_dump_emit_type(d, t->type, cont_id);
+		for (i = 0; i < vlen; i++, p++)
+			btf_dump_emit_type(d, p->type, cont_id);
+
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+				 const struct btf_type *t)
+{
+	const struct btf_member *m;
+	int max_align = 1, align, i, bit_sz;
+	__u16 vlen;
+
+	m = btf_members(t);
+	vlen = btf_vlen(t);
+	/* all non-bitfield fields have to be naturally aligned */
+	for (i = 0; i < vlen; i++, m++) {
+		align = btf__align_of(btf, m->type);
+		bit_sz = btf_member_bitfield_size(t, i);
+		if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
+			return true;
+		max_align = max(align, max_align);
+	}
+	/* size of a non-packed struct has to be a multiple of its alignment */
+	if (t->size % max_align != 0)
+		return true;
+	/*
+	 * if original struct was marked as packed, but its layout is
+	 * naturally aligned, we'll detect that it's not packed
+	 */
+	return false;
+}
+
+static void btf_dump_emit_bit_padding(const struct btf_dump *d,
+				      int cur_off, int next_off, int next_align,
+				      bool in_bitfield, int lvl)
+{
+	const struct {
+		const char *name;
+		int bits;
+	} pads[] = {
+		{"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8}
+	};
+	int new_off, pad_bits, bits, i;
+	const char *pad_type;
+
+	if (cur_off >= next_off)
+		return; /* no gap */
+
+	/* For filling out padding we want to take advantage of
+	 * natural alignment rules to minimize unnecessary explicit
+	 * padding. First, we find the largest type (among long, int,
+	 * short, or char) that can be used to force naturally aligned
+	 * boundary. Once determined, we'll use such type to fill in
+	 * the remaining padding gap. In some cases we can rely on
+	 * compiler filling some gaps, but sometimes we need to force
+	 * alignment to close natural alignment with markers like
+	 * `long: 0` (this is always the case for bitfields).  Note
+	 * that even if struct itself has, let's say 4-byte alignment
+	 * (i.e., it only uses up to int-aligned types), using `long:
+	 * X;` explicit padding doesn't actually change struct's
+	 * overall alignment requirements, but compiler does take into
+	 * account that type's (long, in this example) natural
+	 * alignment requirements when adding implicit padding. We use
+	 * this fact heavily and don't worry about ruining correct
+	 * struct alignment requirement.
+	 */
+	for (i = 0; i < ARRAY_SIZE(pads); i++) {
+		pad_bits = pads[i].bits;
+		pad_type = pads[i].name;
+
+		new_off = roundup(cur_off, pad_bits);
+		if (new_off <= next_off)
+			break;
+	}
+
+	if (new_off > cur_off && new_off <= next_off) {
+		/* We need explicit `<type>: 0` aligning mark if next
+		 * field is right on alignment offset and its
+		 * alignment requirement is less strict than <type>'s
+		 * alignment (so compiler won't naturally align to the
+		 * offset we expect), or if subsequent `<type>: X`,
+		 * will actually completely fit in the remaining hole,
+		 * making compiler basically ignore `<type>: X`
+		 * completely.
+		 */
+		if (in_bitfield ||
+		    (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) ||
+		    (new_off != next_off && next_off - new_off <= new_off - cur_off))
+			/* but for bitfields we'll emit explicit bit count */
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type,
+					in_bitfield ? new_off - cur_off : 0);
+		cur_off = new_off;
+	}
+
+	/* Now we know we start at naturally aligned offset for a chosen
+	 * padding type (long, int, short, or char), and so the rest is just
+	 * a straightforward filling of remaining padding gap with full
+	 * `<type>: sizeof(<type>);` markers, except for the last one, which
+	 * might need smaller than sizeof(<type>) padding.
+	 */
+	while (cur_off != next_off) {
+		bits = min(next_off - cur_off, pad_bits);
+		if (bits == pad_bits) {
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+			cur_off += bits;
+			continue;
+		}
+		/* For the remainder padding that doesn't cover entire
+		 * pad_type bit length, we pick the smallest necessary type.
+		 * This is pure aesthetics, we could have just used `long`,
+		 * but having smallest necessary one communicates better the
+		 * scale of the padding gap.
+		 */
+		for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) {
+			pad_type = pads[i].name;
+			pad_bits = pads[i].bits;
+			if (pad_bits < bits)
+				continue;
+
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits);
+			cur_off += bits;
+			break;
+		}
+	}
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t)
+{
+	btf_dump_printf(d, "%s %s",
+			btf_is_struct(t) ? "struct" : "union",
+			btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_struct_def(struct btf_dump *d,
+				     __u32 id,
+				     const struct btf_type *t,
+				     int lvl)
+{
+	const struct btf_member *m = btf_members(t);
+	bool is_struct = btf_is_struct(t);
+	bool packed, prev_bitfield = false;
+	int align, i, off = 0;
+	__u16 vlen = btf_vlen(t);
+
+	align = btf__align_of(d->btf, id);
+	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
+
+	btf_dump_printf(d, "%s%s%s {",
+			is_struct ? "struct" : "union",
+			t->name_off ? " " : "",
+			btf_dump_type_name(d, id));
+
+	for (i = 0; i < vlen; i++, m++) {
+		const char *fname;
+		int m_off, m_sz, m_align;
+		bool in_bitfield;
+
+		fname = btf_name_of(d, m->name_off);
+		m_sz = btf_member_bitfield_size(t, i);
+		m_off = btf_member_bit_offset(t, i);
+		m_align = packed ? 1 : btf__align_of(d->btf, m->type);
+
+		in_bitfield = prev_bitfield && m_sz != 0;
+
+		btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1);
+		btf_dump_printf(d, "\n%s", pfx(lvl + 1));
+		btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
+
+		if (m_sz) {
+			btf_dump_printf(d, ": %d", m_sz);
+			off = m_off + m_sz;
+			prev_bitfield = true;
+		} else {
+			m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
+			off = m_off + m_sz * 8;
+			prev_bitfield = false;
+		}
+
+		btf_dump_printf(d, ";");
+	}
+
+	/* pad at the end, if necessary */
+	if (is_struct)
+		btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1);
+
+	/*
+	 * Keep `struct empty {}` on a single line,
+	 * only print newline when there are regular or padding fields.
+	 */
+	if (vlen || t->size) {
+		btf_dump_printf(d, "\n");
+		btf_dump_printf(d, "%s}", pfx(lvl));
+	} else {
+		btf_dump_printf(d, "}");
+	}
+	if (packed)
+		btf_dump_printf(d, " __attribute__((packed))");
+}
+
+static const char *missing_base_types[][2] = {
+	/*
+	 * GCC emits typedefs to its internal __PolyX_t types when compiling Arm
+	 * SIMD intrinsics. Alias them to standard base types.
+	 */
+	{ "__Poly8_t",		"unsigned char" },
+	{ "__Poly16_t",		"unsigned short" },
+	{ "__Poly64_t",		"unsigned long long" },
+	{ "__Poly128_t",	"unsigned __int128" },
+};
+
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+					  const struct btf_type *t)
+{
+	const char *name = btf_dump_type_name(d, id);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
+		if (strcmp(name, missing_base_types[i][0]) == 0) {
+			btf_dump_printf(d, "typedef %s %s;\n\n",
+					missing_base_types[i][1], name);
+			break;
+		}
+	}
+}
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t)
+{
+	btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t,
+				   int lvl)
+{
+	const struct btf_enum *v = btf_enum(t);
+	__u16 vlen = btf_vlen(t);
+	const char *name;
+	size_t dup_cnt;
+	int i;
+
+	btf_dump_printf(d, "enum%s%s",
+			t->name_off ? " " : "",
+			btf_dump_type_name(d, id));
+
+	if (vlen) {
+		btf_dump_printf(d, " {");
+		for (i = 0; i < vlen; i++, v++) {
+			name = btf_name_of(d, v->name_off);
+			/* enumerators share namespace with typedef idents */
+			dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+			if (dup_cnt > 1) {
+				btf_dump_printf(d, "\n%s%s___%zu = %u,",
+						pfx(lvl + 1), name, dup_cnt,
+						(__u32)v->val);
+			} else {
+				btf_dump_printf(d, "\n%s%s = %u,",
+						pfx(lvl + 1), name,
+						(__u32)v->val);
+			}
+		}
+		btf_dump_printf(d, "\n%s}", pfx(lvl));
+	}
+}
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+				  const struct btf_type *t)
+{
+	const char *name = btf_dump_type_name(d, id);
+
+	if (btf_kflag(t))
+		btf_dump_printf(d, "union %s", name);
+	else
+		btf_dump_printf(d, "struct %s", name);
+}
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t, int lvl)
+{
+	const char *name = btf_dump_ident_name(d, id);
+
+	/*
+	 * Old GCC versions are emitting invalid typedef for __gnuc_va_list
+	 * pointing to VOID. This generates warnings from btf_dump() and
+	 * results in uncompilable header file, so we are fixing it up here
+	 * with valid typedef into __builtin_va_list.
+	 */
+	if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) {
+		btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list");
+		return;
+	}
+
+	btf_dump_printf(d, "typedef ");
+	btf_dump_emit_type_decl(d, t->type, name, lvl);
+}
+
+static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
+{
+	__u32 *new_stack;
+	size_t new_cap;
+
+	if (d->decl_stack_cnt >= d->decl_stack_cap) {
+		new_cap = max(16, d->decl_stack_cap * 3 / 2);
+		new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0]));
+		if (!new_stack)
+			return -ENOMEM;
+		d->decl_stack = new_stack;
+		d->decl_stack_cap = new_cap;
+	}
+
+	d->decl_stack[d->decl_stack_cnt++] = id;
+
+	return 0;
+}
+
+/*
+ * Emit type declaration (e.g., field type declaration in a struct or argument
+ * declaration in function prototype) in correct C syntax.
+ *
+ * For most types it's trivial, but there are few quirky type declaration
+ * cases worth mentioning:
+ *   - function prototypes (especially nesting of function prototypes);
+ *   - arrays;
+ *   - const/volatile/restrict for pointers vs other types.
+ *
+ * For a good discussion of *PARSING* C syntax (as a human), see
+ * Peter van der Linden's "Expert C Programming: Deep C Secrets",
+ * Ch.3 "Unscrambling Declarations in C".
+ *
+ * It won't help with BTF to C conversion much, though, as it's an opposite
+ * problem. So we came up with this algorithm in reverse to van der Linden's
+ * parsing algorithm. It goes from structured BTF representation of type
+ * declaration to a valid compilable C syntax.
+ *
+ * For instance, consider this C typedef:
+ *	typedef const int * const * arr[10] arr_t;
+ * It will be represented in BTF with this chain of BTF types:
+ *	[typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int]
+ *
+ * Notice how [const] modifier always goes before type it modifies in BTF type
+ * graph, but in C syntax, const/volatile/restrict modifiers are written to
+ * the right of pointers, but to the left of other types. There are also other
+ * quirks, like function pointers, arrays of them, functions returning other
+ * functions, etc.
+ *
+ * We handle that by pushing all the types to a stack, until we hit "terminal"
+ * type (int/enum/struct/union/fwd). Then depending on the kind of a type on
+ * top of a stack, modifiers are handled differently. Array/function pointers
+ * have also wildly different syntax and how nesting of them are done. See
+ * code for authoritative definition.
+ *
+ * To avoid allocating new stack for each independent chain of BTF types, we
+ * share one bigger stack, with each chain working only on its own local view
+ * of a stack frame. Some care is required to "pop" stack frames after
+ * processing type declaration chain.
+ */
+int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
+			     const struct btf_dump_emit_type_decl_opts *opts)
+{
+	const char *fname;
+	int lvl, err;
+
+	if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
+		return -EINVAL;
+
+	err = btf_dump_resize(d);
+	if (err)
+		return -EINVAL;
+
+	fname = OPTS_GET(opts, field_name, "");
+	lvl = OPTS_GET(opts, indent_level, 0);
+	d->strip_mods = OPTS_GET(opts, strip_mods, false);
+	btf_dump_emit_type_decl(d, id, fname, lvl);
+	d->strip_mods = false;
+	return 0;
+}
+
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+				    const char *fname, int lvl)
+{
+	struct id_stack decl_stack;
+	const struct btf_type *t;
+	int err, stack_start;
+
+	stack_start = d->decl_stack_cnt;
+	for (;;) {
+		t = btf__type_by_id(d->btf, id);
+		if (d->strip_mods && btf_is_mod(t))
+			goto skip_mod;
+
+		err = btf_dump_push_decl_stack_id(d, id);
+		if (err < 0) {
+			/*
+			 * if we don't have enough memory for entire type decl
+			 * chain, restore stack, emit warning, and try to
+			 * proceed nevertheless
+			 */
+			pr_warn("not enough memory for decl stack:%d", err);
+			d->decl_stack_cnt = stack_start;
+			return;
+		}
+skip_mod:
+		/* VOID */
+		if (id == 0)
+			break;
+
+		switch (btf_kind(t)) {
+		case BTF_KIND_PTR:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_FUNC_PROTO:
+			id = t->type;
+			break;
+		case BTF_KIND_ARRAY:
+			id = btf_array(t)->type;
+			break;
+		case BTF_KIND_INT:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_FWD:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+		case BTF_KIND_TYPEDEF:
+			goto done;
+		default:
+			pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+				btf_kind(t), id);
+			goto done;
+		}
+	}
+done:
+	/*
+	 * We might be inside a chain of declarations (e.g., array of function
+	 * pointers returning anonymous (so inlined) structs, having another
+	 * array field). Each of those needs its own "stack frame" to handle
+	 * emitting of declarations. Those stack frames are non-overlapping
+	 * portions of shared btf_dump->decl_stack. To make it a bit nicer to
+	 * handle this set of nested stacks, we create a view corresponding to
+	 * our own "stack frame" and work with it as an independent stack.
+	 * We'll need to clean up after emit_type_chain() returns, though.
+	 */
+	decl_stack.ids = d->decl_stack + stack_start;
+	decl_stack.cnt = d->decl_stack_cnt - stack_start;
+	btf_dump_emit_type_chain(d, &decl_stack, fname, lvl);
+	/*
+	 * emit_type_chain() guarantees that it will pop its entire decl_stack
+	 * frame before returning. But it works with a read-only view into
+	 * decl_stack, so it doesn't actually pop anything from the
+	 * perspective of shared btf_dump->decl_stack, per se. We need to
+	 * reset decl_stack state to how it was before us to avoid it growing
+	 * all the time.
+	 */
+	d->decl_stack_cnt = stack_start;
+}
+
+static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
+{
+	const struct btf_type *t;
+	__u32 id;
+
+	while (decl_stack->cnt) {
+		id = decl_stack->ids[decl_stack->cnt - 1];
+		t = btf__type_by_id(d->btf, id);
+
+		switch (btf_kind(t)) {
+		case BTF_KIND_VOLATILE:
+			btf_dump_printf(d, "volatile ");
+			break;
+		case BTF_KIND_CONST:
+			btf_dump_printf(d, "const ");
+			break;
+		case BTF_KIND_RESTRICT:
+			btf_dump_printf(d, "restrict ");
+			break;
+		default:
+			return;
+		}
+		decl_stack->cnt--;
+	}
+}
+
+static void btf_dump_drop_mods(struct btf_dump *d, struct id_stack *decl_stack)
+{
+	const struct btf_type *t;
+	__u32 id;
+
+	while (decl_stack->cnt) {
+		id = decl_stack->ids[decl_stack->cnt - 1];
+		t = btf__type_by_id(d->btf, id);
+		if (!btf_is_mod(t))
+			return;
+		decl_stack->cnt--;
+	}
+}
+
+static void btf_dump_emit_name(const struct btf_dump *d,
+			       const char *name, bool last_was_ptr)
+{
+	bool separate = name[0] && !last_was_ptr;
+
+	btf_dump_printf(d, "%s%s", separate ? " " : "", name);
+}
+
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+				     struct id_stack *decls,
+				     const char *fname, int lvl)
+{
+	/*
+	 * last_was_ptr is used to determine if we need to separate pointer
+	 * asterisk (*) from previous part of type signature with space, so
+	 * that we get `int ***`, instead of `int * * *`. We default to true
+	 * for cases where we have single pointer in a chain. E.g., in ptr ->
+	 * func_proto case. func_proto will start a new emit_type_chain call
+	 * with just ptr, which should be emitted as (*) or (*<fname>), so we
+	 * don't want to prepend space for that last pointer.
+	 */
+	bool last_was_ptr = true;
+	const struct btf_type *t;
+	const char *name;
+	__u16 kind;
+	__u32 id;
+
+	while (decls->cnt) {
+		id = decls->ids[--decls->cnt];
+		if (id == 0) {
+			/* VOID is a special snowflake */
+			btf_dump_emit_mods(d, decls);
+			btf_dump_printf(d, "void");
+			last_was_ptr = false;
+			continue;
+		}
+
+		t = btf__type_by_id(d->btf, id);
+		kind = btf_kind(t);
+
+		switch (kind) {
+		case BTF_KIND_INT:
+			btf_dump_emit_mods(d, decls);
+			name = btf_name_of(d, t->name_off);
+			btf_dump_printf(d, "%s", name);
+			break;
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			btf_dump_emit_mods(d, decls);
+			/* inline anonymous struct/union */
+			if (t->name_off == 0)
+				btf_dump_emit_struct_def(d, id, t, lvl);
+			else
+				btf_dump_emit_struct_fwd(d, id, t);
+			break;
+		case BTF_KIND_ENUM:
+			btf_dump_emit_mods(d, decls);
+			/* inline anonymous enum */
+			if (t->name_off == 0)
+				btf_dump_emit_enum_def(d, id, t, lvl);
+			else
+				btf_dump_emit_enum_fwd(d, id, t);
+			break;
+		case BTF_KIND_FWD:
+			btf_dump_emit_mods(d, decls);
+			btf_dump_emit_fwd_def(d, id, t);
+			break;
+		case BTF_KIND_TYPEDEF:
+			btf_dump_emit_mods(d, decls);
+			btf_dump_printf(d, "%s", btf_dump_ident_name(d, id));
+			break;
+		case BTF_KIND_PTR:
+			btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *");
+			break;
+		case BTF_KIND_VOLATILE:
+			btf_dump_printf(d, " volatile");
+			break;
+		case BTF_KIND_CONST:
+			btf_dump_printf(d, " const");
+			break;
+		case BTF_KIND_RESTRICT:
+			btf_dump_printf(d, " restrict");
+			break;
+		case BTF_KIND_ARRAY: {
+			const struct btf_array *a = btf_array(t);
+			const struct btf_type *next_t;
+			__u32 next_id;
+			bool multidim;
+			/*
+			 * GCC has a bug
+			 * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354)
+			 * which causes it to emit extra const/volatile
+			 * modifiers for an array, if array's element type has
+			 * const/volatile modifiers. Clang doesn't do that.
+			 * In general, it doesn't seem very meaningful to have
+			 * a const/volatile modifier for array, so we are
+			 * going to silently skip them here.
+			 */
+			btf_dump_drop_mods(d, decls);
+
+			if (decls->cnt == 0) {
+				btf_dump_emit_name(d, fname, last_was_ptr);
+				btf_dump_printf(d, "[%u]", a->nelems);
+				return;
+			}
+
+			next_id = decls->ids[decls->cnt - 1];
+			next_t = btf__type_by_id(d->btf, next_id);
+			multidim = btf_is_array(next_t);
+			/* we need space if we have named non-pointer */
+			if (fname[0] && !last_was_ptr)
+				btf_dump_printf(d, " ");
+			/* no parentheses for multi-dimensional array */
+			if (!multidim)
+				btf_dump_printf(d, "(");
+			btf_dump_emit_type_chain(d, decls, fname, lvl);
+			if (!multidim)
+				btf_dump_printf(d, ")");
+			btf_dump_printf(d, "[%u]", a->nelems);
+			return;
+		}
+		case BTF_KIND_FUNC_PROTO: {
+			const struct btf_param *p = btf_params(t);
+			__u16 vlen = btf_vlen(t);
+			int i;
+
+			/*
+			 * GCC emits extra volatile qualifier for
+			 * __attribute__((noreturn)) function pointers. Clang
+			 * doesn't do it. It's a GCC quirk for backwards
+			 * compatibility with code written for GCC <2.5. So,
+			 * similarly to extra qualifiers for array, just drop
+			 * them, instead of handling them.
+			 */
+			btf_dump_drop_mods(d, decls);
+			if (decls->cnt) {
+				btf_dump_printf(d, " (");
+				btf_dump_emit_type_chain(d, decls, fname, lvl);
+				btf_dump_printf(d, ")");
+			} else {
+				btf_dump_emit_name(d, fname, last_was_ptr);
+			}
+			btf_dump_printf(d, "(");
+			/*
+			 * Clang for BPF target generates func_proto with no
+			 * args as a func_proto with a single void arg (e.g.,
+			 * `int (*f)(void)` vs just `int (*f)()`). We are
+			 * going to pretend there are no args for such case.
+			 */
+			if (vlen == 1 && p->type == 0) {
+				btf_dump_printf(d, ")");
+				return;
+			}
+
+			for (i = 0; i < vlen; i++, p++) {
+				if (i > 0)
+					btf_dump_printf(d, ", ");
+
+				/* last arg of type void is vararg */
+				if (i == vlen - 1 && p->type == 0) {
+					btf_dump_printf(d, "...");
+					break;
+				}
+
+				name = btf_name_of(d, p->name_off);
+				btf_dump_emit_type_decl(d, p->type, name, lvl);
+			}
+
+			btf_dump_printf(d, ")");
+			return;
+		}
+		default:
+			pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+				kind, id);
+			return;
+		}
+
+		last_was_ptr = kind == BTF_KIND_PTR;
+	}
+
+	btf_dump_emit_name(d, fname, last_was_ptr);
+}
+
+/* return number of duplicates (occurrences) of a given name */
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+				 const char *orig_name)
+{
+	char *old_name, *new_name;
+	size_t dup_cnt = 0;
+	int err;
+
+	new_name = strdup(orig_name);
+	if (!new_name)
+		return 1;
+
+	hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+	dup_cnt++;
+
+	err = hashmap__set(name_map, new_name, (void *)dup_cnt,
+			   (const void **)&old_name, NULL);
+	if (err)
+		free(new_name);
+
+	free(old_name);
+
+	return dup_cnt;
+}
+
+static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
+					 struct hashmap *name_map)
+{
+	struct btf_dump_type_aux_state *s = &d->type_states[id];
+	const struct btf_type *t = btf__type_by_id(d->btf, id);
+	const char *orig_name = btf_name_of(d, t->name_off);
+	const char **cached_name = &d->cached_names[id];
+	size_t dup_cnt;
+
+	if (t->name_off == 0)
+		return "";
+
+	if (s->name_resolved)
+		return *cached_name ? *cached_name : orig_name;
+
+	if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) {
+		s->name_resolved = 1;
+		return orig_name;
+	}
+
+	dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
+	if (dup_cnt > 1) {
+		const size_t max_len = 256;
+		char new_name[max_len];
+
+		snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt);
+		*cached_name = strdup(new_name);
+	}
+
+	s->name_resolved = 1;
+	return *cached_name ? *cached_name : orig_name;
+}
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id)
+{
+	return btf_dump_resolve_name(d, id, d->type_names);
+}
+
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
+{
+	return btf_dump_resolve_name(d, id, d->ident_names);
+}
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
new file mode 100644
index 000000000..3c20b126d
--- /dev/null
+++ b/tools/lib/bpf/hashmap.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	entry->next = *pprev;
+	*pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	*pprev = entry->next;
+	entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx)
+{
+	map->hash_fn = hash_fn;
+	map->equal_fn = equal_fn;
+	map->ctx = ctx;
+
+	map->buckets = NULL;
+	map->cap = 0;
+	map->cap_bits = 0;
+	map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx)
+{
+	struct hashmap *map = malloc(sizeof(struct hashmap));
+
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+	hashmap__init(map, hash_fn, equal_fn, ctx);
+	return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+	struct hashmap_entry *cur, *tmp;
+	size_t bkt;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		free(cur);
+	}
+	free(map->buckets);
+	map->buckets = NULL;
+	map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+	if (!map)
+		return;
+
+	hashmap__clear(map);
+	free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+	return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+	return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+	/* grow if empty or more than 75% filled */
+	return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+	struct hashmap_entry **new_buckets;
+	struct hashmap_entry *cur, *tmp;
+	size_t new_cap_bits, new_cap;
+	size_t h, bkt;
+
+	new_cap_bits = map->cap_bits + 1;
+	if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+		new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+	new_cap = 1UL << new_cap_bits;
+	new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+	if (!new_buckets)
+		return -ENOMEM;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+		hashmap_add_entry(&new_buckets[h], cur);
+	}
+
+	map->cap = new_cap;
+	map->cap_bits = new_cap_bits;
+	free(map->buckets);
+	map->buckets = new_buckets;
+
+	return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+			       const void *key, size_t hash,
+			       struct hashmap_entry ***pprev,
+			       struct hashmap_entry **entry)
+{
+	struct hashmap_entry *cur, **prev_ptr;
+
+	if (!map->buckets)
+		return false;
+
+	for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+	     cur;
+	     prev_ptr = &cur->next, cur = cur->next) {
+		if (map->equal_fn(cur->key, key, map->ctx)) {
+			if (pprev)
+				*pprev = prev_ptr;
+			*entry = cur;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+	int err;
+
+	if (old_key)
+		*old_key = NULL;
+	if (old_value)
+		*old_value = NULL;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (strategy != HASHMAP_APPEND &&
+	    hashmap_find_entry(map, key, h, NULL, &entry)) {
+		if (old_key)
+			*old_key = entry->key;
+		if (old_value)
+			*old_value = entry->value;
+
+		if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+			entry->key = key;
+			entry->value = value;
+			return 0;
+		} else if (strategy == HASHMAP_ADD) {
+			return -EEXIST;
+		}
+	}
+
+	if (strategy == HASHMAP_UPDATE)
+		return -ENOENT;
+
+	if (hashmap_needs_to_grow(map)) {
+		err = hashmap_grow(map);
+		if (err)
+			return err;
+		h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	}
+
+	entry = malloc(sizeof(struct hashmap_entry));
+	if (!entry)
+		return -ENOMEM;
+
+	entry->key = key;
+	entry->value = value;
+	hashmap_add_entry(&map->buckets[h], entry);
+	map->sz++;
+
+	return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, NULL, &entry))
+		return false;
+
+	if (value)
+		*value = entry->value;
+	return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value)
+{
+	struct hashmap_entry **pprev, *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+		return false;
+
+	if (old_key)
+		*old_key = entry->key;
+	if (old_value)
+		*old_value = entry->value;
+
+	hashmap_del_entry(pprev, entry);
+	free(entry);
+	map->sz--;
+
+	return true;
+}
+
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
new file mode 100644
index 000000000..10a4c4cd1
--- /dev/null
+++ b/tools/lib/bpf/hashmap.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <limits.h>
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+	/* shuffle bits and return requested number of upper bits */
+	if (bits == 0)
+		return 0;
+
+#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
+	/* LP64 case */
+	return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
+#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__)
+	return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits);
+#else
+#	error "Unsupported size_t size"
+#endif
+}
+
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+	size_t h = 0;
+
+	while (*s) {
+		h = h * 31 + *s;
+		s++;
+	}
+	return h;
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+	const void *key;
+	void *value;
+	struct hashmap_entry *next;
+};
+
+struct hashmap {
+	hashmap_hash_fn hash_fn;
+	hashmap_equal_fn equal_fn;
+	void *ctx;
+
+	struct hashmap_entry **buckets;
+	size_t cap;
+	size_t cap_bits;
+	size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) {	\
+	.hash_fn = (hash_fn),			\
+	.equal_fn = (equal_fn),			\
+	.ctx = (ctx),				\
+	.buckets = NULL,			\
+	.cap = 0,				\
+	.cap_bits = 0,				\
+	.sz = 0,				\
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ *   update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ *   nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ *   This turns hashmap into a multimap by allowing multiple values to be
+ *   associated with the same key. Most useful read API for such hashmap is
+ *   hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ *   used, it will return last inserted key/value entry (first in a bucket
+ *   chain).
+ */
+enum hashmap_insert_strategy {
+	HASHMAP_ADD,
+	HASHMAP_SET,
+	HASHMAP_UPDATE,
+	HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+			       const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+			       const void *key, void *value,
+			       const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_SET,
+			       old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+				  const void *key, void *value,
+				  const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+			       old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+				  const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt)				    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt)		    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt];				    \
+		     cur && ({tmp = cur->next; true; });		    \
+		     cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key)			    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
+	     cur;							    \
+	     cur = cur->next)						    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)		    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
+	     cur && ({ tmp = cur->next; true; });			    \
+	     cur = tmp)							    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
new file mode 100644
index 000000000..015ed8253
--- /dev/null
+++ b/tools/lib/bpf/libbpf.c
@@ -0,0 +1,10952 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ * Copyright (C) 2019 Isovalent, Inc.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <ctype.h>
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/list.h>
+#include <linux/limits.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <linux/version.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <sys/utsname.h>
+#include <sys/resource.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <zlib.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC		0xcafe4a11
+#endif
+
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
+
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
+ * compilation if user enables corresponding warning. Disable it explicitly.
+ */
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+
+#define __printf(a, b)	__attribute__((format(printf, a, b)))
+
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+
+static int __base_pr(enum libbpf_print_level level, const char *format,
+		     va_list args)
+{
+	if (level == LIBBPF_DEBUG)
+		return 0;
+
+	return vfprintf(stderr, format, args);
+}
+
+static libbpf_print_fn_t __libbpf_pr = __base_pr;
+
+libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
+{
+	libbpf_print_fn_t old_print_fn = __libbpf_pr;
+
+	__libbpf_pr = fn;
+	return old_print_fn;
+}
+
+__printf(2, 3)
+void libbpf_print(enum libbpf_print_level level, const char *format, ...)
+{
+	va_list args;
+
+	if (!__libbpf_pr)
+		return;
+
+	va_start(args, format);
+	__libbpf_pr(level, format, args);
+	va_end(args);
+}
+
+static void pr_perm_msg(int err)
+{
+	struct rlimit limit;
+	char buf[100];
+
+	if (err != -EPERM || geteuid() != 0)
+		return;
+
+	err = getrlimit(RLIMIT_MEMLOCK, &limit);
+	if (err)
+		return;
+
+	if (limit.rlim_cur == RLIM_INFINITY)
+		return;
+
+	if (limit.rlim_cur < 1024)
+		snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
+	else if (limit.rlim_cur < 1024*1024)
+		snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
+	else
+		snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
+
+	pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
+		buf);
+}
+
+#define STRERR_BUFSIZE  128
+
+/* Copied from tools/perf/util/util.h */
+#ifndef zfree
+# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+#endif
+
+#ifndef zclose
+# define zclose(fd) ({			\
+	int ___err = 0;			\
+	if ((fd) >= 0)			\
+		___err = close((fd));	\
+	fd = -1;			\
+	___err; })
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
+enum kern_feature_id {
+	/* v4.14: kernel support for program & map names. */
+	FEAT_PROG_NAME,
+	/* v5.2: kernel support for global data sections. */
+	FEAT_GLOBAL_DATA,
+	/* BTF support */
+	FEAT_BTF,
+	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
+	FEAT_BTF_FUNC,
+	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
+	FEAT_BTF_DATASEC,
+	/* BTF_FUNC_GLOBAL is supported */
+	FEAT_BTF_GLOBAL_FUNC,
+	/* BPF_F_MMAPABLE is supported for arrays */
+	FEAT_ARRAY_MMAP,
+	/* kernel support for expected_attach_type in BPF_PROG_LOAD */
+	FEAT_EXP_ATTACH_TYPE,
+	/* bpf_probe_read_{kernel,user}[_str] helpers */
+	FEAT_PROBE_READ_KERN,
+	/* BPF_PROG_BIND_MAP is supported */
+	FEAT_PROG_BIND_MAP,
+	__FEAT_CNT,
+};
+
+static bool kernel_supports(enum kern_feature_id feat_id);
+
+enum reloc_type {
+	RELO_LD64,
+	RELO_CALL,
+	RELO_DATA,
+	RELO_EXTERN,
+};
+
+struct reloc_desc {
+	enum reloc_type type;
+	int insn_idx;
+	int map_idx;
+	int sym_off;
+	bool processed;
+};
+
+struct bpf_sec_def;
+
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
+					struct bpf_program *prog);
+
+struct bpf_sec_def {
+	const char *sec;
+	size_t len;
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+	bool is_exp_attach_type_optional;
+	bool is_attachable;
+	bool is_attach_btf;
+	bool is_sleepable;
+	attach_fn_t attach_fn;
+};
+
+/*
+ * bpf_prog should be a better name but it has been used in
+ * linux/filter.h.
+ */
+struct bpf_program {
+	const struct bpf_sec_def *sec_def;
+	char *sec_name;
+	size_t sec_idx;
+	/* this program's instruction offset (in number of instructions)
+	 * within its containing ELF section
+	 */
+	size_t sec_insn_off;
+	/* number of original instructions in ELF section belonging to this
+	 * program, not taking into account subprogram instructions possible
+	 * appended later during relocation
+	 */
+	size_t sec_insn_cnt;
+	/* Offset (in number of instructions) of the start of instruction
+	 * belonging to this BPF program  within its containing main BPF
+	 * program. For the entry-point (main) BPF program, this is always
+	 * zero. For a sub-program, this gets reset before each of main BPF
+	 * programs are processed and relocated and is used to determined
+	 * whether sub-program was already appended to the main program, and
+	 * if yes, at which instruction offset.
+	 */
+	size_t sub_insn_off;
+
+	char *name;
+	/* sec_name with / replaced by _; makes recursive pinning
+	 * in bpf_object__pin_programs easier
+	 */
+	char *pin_name;
+
+	/* instructions that belong to BPF program; insns[0] is located at
+	 * sec_insn_off instruction within its ELF section in ELF file, so
+	 * when mapping ELF file instruction index to the local instruction,
+	 * one needs to subtract sec_insn_off; and vice versa.
+	 */
+	struct bpf_insn *insns;
+	/* actual number of instruction in this BPF program's image; for
+	 * entry-point BPF programs this includes the size of main program
+	 * itself plus all the used sub-programs, appended at the end
+	 */
+	size_t insns_cnt;
+
+	struct reloc_desc *reloc_desc;
+	int nr_reloc;
+	int log_level;
+
+	struct {
+		int nr;
+		int *fds;
+	} instances;
+	bpf_program_prep_t preprocessor;
+
+	struct bpf_object *obj;
+	void *priv;
+	bpf_program_clear_priv_t clear_priv;
+
+	bool load;
+	enum bpf_prog_type type;
+	enum bpf_attach_type expected_attach_type;
+	int prog_ifindex;
+	__u32 attach_btf_id;
+	__u32 attach_prog_fd;
+	void *func_info;
+	__u32 func_info_rec_size;
+	__u32 func_info_cnt;
+
+	void *line_info;
+	__u32 line_info_rec_size;
+	__u32 line_info_cnt;
+	__u32 prog_flags;
+};
+
+struct bpf_struct_ops {
+	const char *tname;
+	const struct btf_type *type;
+	struct bpf_program **progs;
+	__u32 *kern_func_off;
+	/* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
+	void *data;
+	/* e.g. struct bpf_struct_ops_tcp_congestion_ops in
+	 *      btf_vmlinux's format.
+	 * struct bpf_struct_ops_tcp_congestion_ops {
+	 *	[... some other kernel fields ...]
+	 *	struct tcp_congestion_ops data;
+	 * }
+	 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
+	 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
+	 * from "data".
+	 */
+	void *kern_vdata;
+	__u32 type_id;
+};
+
+#define DATA_SEC ".data"
+#define BSS_SEC ".bss"
+#define RODATA_SEC ".rodata"
+#define KCONFIG_SEC ".kconfig"
+#define KSYMS_SEC ".ksyms"
+#define STRUCT_OPS_SEC ".struct_ops"
+
+enum libbpf_map_type {
+	LIBBPF_MAP_UNSPEC,
+	LIBBPF_MAP_DATA,
+	LIBBPF_MAP_BSS,
+	LIBBPF_MAP_RODATA,
+	LIBBPF_MAP_KCONFIG,
+};
+
+static const char * const libbpf_type_to_btf_name[] = {
+	[LIBBPF_MAP_DATA]	= DATA_SEC,
+	[LIBBPF_MAP_BSS]	= BSS_SEC,
+	[LIBBPF_MAP_RODATA]	= RODATA_SEC,
+	[LIBBPF_MAP_KCONFIG]	= KCONFIG_SEC,
+};
+
+struct bpf_map {
+	char *name;
+	int fd;
+	int sec_idx;
+	size_t sec_offset;
+	int map_ifindex;
+	int inner_map_fd;
+	struct bpf_map_def def;
+	__u32 numa_node;
+	__u32 btf_var_idx;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
+	__u32 btf_vmlinux_value_type_id;
+	void *priv;
+	bpf_map_clear_priv_t clear_priv;
+	enum libbpf_map_type libbpf_type;
+	void *mmaped;
+	struct bpf_struct_ops *st_ops;
+	struct bpf_map *inner_map;
+	void **init_slots;
+	int init_slots_sz;
+	char *pin_path;
+	bool pinned;
+	bool reused;
+};
+
+enum extern_type {
+	EXT_UNKNOWN,
+	EXT_KCFG,
+	EXT_KSYM,
+};
+
+enum kcfg_type {
+	KCFG_UNKNOWN,
+	KCFG_CHAR,
+	KCFG_BOOL,
+	KCFG_INT,
+	KCFG_TRISTATE,
+	KCFG_CHAR_ARR,
+};
+
+struct extern_desc {
+	enum extern_type type;
+	int sym_idx;
+	int btf_id;
+	int sec_btf_id;
+	const char *name;
+	bool is_set;
+	bool is_weak;
+	union {
+		struct {
+			enum kcfg_type type;
+			int sz;
+			int align;
+			int data_off;
+			bool is_signed;
+		} kcfg;
+		struct {
+			unsigned long long addr;
+
+			/* target btf_id of the corresponding kernel var. */
+			int vmlinux_btf_id;
+
+			/* local btf_id of the ksym extern's type. */
+			__u32 type_id;
+		} ksym;
+	};
+};
+
+static LIST_HEAD(bpf_objects_list);
+
+struct bpf_object {
+	char name[BPF_OBJ_NAME_LEN];
+	char license[64];
+	__u32 kern_version;
+
+	struct bpf_program *programs;
+	size_t nr_programs;
+	struct bpf_map *maps;
+	size_t nr_maps;
+	size_t maps_cap;
+
+	char *kconfig;
+	struct extern_desc *externs;
+	int nr_extern;
+	int kconfig_map_idx;
+	int rodata_map_idx;
+
+	bool loaded;
+	bool has_subcalls;
+
+	/*
+	 * Information when doing elf related work. Only valid if fd
+	 * is valid.
+	 */
+	struct {
+		int fd;
+		const void *obj_buf;
+		size_t obj_buf_sz;
+		Elf *elf;
+		GElf_Ehdr ehdr;
+		Elf_Data *symbols;
+		Elf_Data *data;
+		Elf_Data *rodata;
+		Elf_Data *bss;
+		Elf_Data *st_ops_data;
+		size_t shstrndx; /* section index for section name strings */
+		size_t strtabidx;
+		struct {
+			GElf_Shdr shdr;
+			Elf_Data *data;
+		} *reloc_sects;
+		int nr_reloc_sects;
+		int maps_shndx;
+		int btf_maps_shndx;
+		__u32 btf_maps_sec_btf_id;
+		int text_shndx;
+		int symbols_shndx;
+		int data_shndx;
+		int rodata_shndx;
+		int bss_shndx;
+		int st_ops_shndx;
+	} efile;
+	/*
+	 * All loaded bpf_object is linked in a list, which is
+	 * hidden to caller. bpf_objects__<func> handlers deal with
+	 * all objects.
+	 */
+	struct list_head list;
+
+	struct btf *btf;
+	/* Parse and load BTF vmlinux if any of the programs in the object need
+	 * it at load time.
+	 */
+	struct btf *btf_vmlinux;
+	struct btf_ext *btf_ext;
+
+	void *priv;
+	bpf_object_clear_priv_t clear_priv;
+
+	char path[];
+};
+#define obj_elf_valid(o)	((o)->efile.elf)
+
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+			      size_t off, __u32 sym_type, GElf_Sym *sym);
+
+void bpf_program__unload(struct bpf_program *prog)
+{
+	int i;
+
+	if (!prog)
+		return;
+
+	/*
+	 * If the object is opened but the program was never loaded,
+	 * it is possible that prog->instances.nr == -1.
+	 */
+	if (prog->instances.nr > 0) {
+		for (i = 0; i < prog->instances.nr; i++)
+			zclose(prog->instances.fds[i]);
+	} else if (prog->instances.nr != -1) {
+		pr_warn("Internal error: instances.nr is %d\n",
+			prog->instances.nr);
+	}
+
+	prog->instances.nr = -1;
+	zfree(&prog->instances.fds);
+
+	zfree(&prog->func_info);
+	zfree(&prog->line_info);
+}
+
+static void bpf_program__exit(struct bpf_program *prog)
+{
+	if (!prog)
+		return;
+
+	if (prog->clear_priv)
+		prog->clear_priv(prog, prog->priv);
+
+	prog->priv = NULL;
+	prog->clear_priv = NULL;
+
+	bpf_program__unload(prog);
+	zfree(&prog->name);
+	zfree(&prog->sec_name);
+	zfree(&prog->pin_name);
+	zfree(&prog->insns);
+	zfree(&prog->reloc_desc);
+
+	prog->nr_reloc = 0;
+	prog->insns_cnt = 0;
+	prog->sec_idx = -1;
+}
+
+static char *__bpf_program__pin_name(struct bpf_program *prog)
+{
+	char *name, *p;
+
+	name = p = strdup(prog->sec_name);
+	while ((p = strchr(p, '/')))
+		*p = '_';
+
+	return name;
+}
+
+static bool insn_is_subprog_call(const struct bpf_insn *insn)
+{
+	return BPF_CLASS(insn->code) == BPF_JMP &&
+	       BPF_OP(insn->code) == BPF_CALL &&
+	       BPF_SRC(insn->code) == BPF_K &&
+	       insn->src_reg == BPF_PSEUDO_CALL &&
+	       insn->dst_reg == 0 &&
+	       insn->off == 0;
+}
+
+static int
+bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
+		      const char *name, size_t sec_idx, const char *sec_name,
+		      size_t sec_off, void *insn_data, size_t insn_data_sz)
+{
+	if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
+		pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
+			sec_name, name, sec_off, insn_data_sz);
+		return -EINVAL;
+	}
+
+	memset(prog, 0, sizeof(*prog));
+	prog->obj = obj;
+
+	prog->sec_idx = sec_idx;
+	prog->sec_insn_off = sec_off / BPF_INSN_SZ;
+	prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
+	/* insns_cnt can later be increased by appending used subprograms */
+	prog->insns_cnt = prog->sec_insn_cnt;
+
+	prog->type = BPF_PROG_TYPE_UNSPEC;
+	prog->load = true;
+
+	prog->instances.fds = NULL;
+	prog->instances.nr = -1;
+
+	prog->sec_name = strdup(sec_name);
+	if (!prog->sec_name)
+		goto errout;
+
+	prog->name = strdup(name);
+	if (!prog->name)
+		goto errout;
+
+	prog->pin_name = __bpf_program__pin_name(prog);
+	if (!prog->pin_name)
+		goto errout;
+
+	prog->insns = malloc(insn_data_sz);
+	if (!prog->insns)
+		goto errout;
+	memcpy(prog->insns, insn_data, insn_data_sz);
+
+	return 0;
+errout:
+	pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
+	bpf_program__exit(prog);
+	return -ENOMEM;
+}
+
+static int
+bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
+			 const char *sec_name, int sec_idx)
+{
+	struct bpf_program *prog, *progs;
+	void *data = sec_data->d_buf;
+	size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
+	int nr_progs, err;
+	const char *name;
+	GElf_Sym sym;
+
+	progs = obj->programs;
+	nr_progs = obj->nr_programs;
+	sec_off = 0;
+
+	while (sec_off < sec_sz) {
+		if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
+			pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
+				sec_name, sec_off);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+
+		prog_sz = sym.st_size;
+
+		name = elf_sym_str(obj, sym.st_name);
+		if (!name) {
+			pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
+				sec_name, sec_off);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+
+		if (sec_off + prog_sz > sec_sz) {
+			pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
+				sec_name, sec_off);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+
+		pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
+			 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
+
+		progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
+		if (!progs) {
+			/*
+			 * In this case the original obj->programs
+			 * is still valid, so don't need special treat for
+			 * bpf_close_object().
+			 */
+			pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
+				sec_name, name);
+			return -ENOMEM;
+		}
+		obj->programs = progs;
+
+		prog = &progs[nr_progs];
+
+		err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
+					    sec_off, data + sec_off, prog_sz);
+		if (err)
+			return err;
+
+		nr_progs++;
+		obj->nr_programs = nr_progs;
+
+		sec_off += prog_sz;
+	}
+
+	return 0;
+}
+
+static __u32 get_kernel_version(void)
+{
+	__u32 major, minor, patch;
+	struct utsname info;
+
+	uname(&info);
+	if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+		return 0;
+	return KERNEL_VERSION(major, minor, patch);
+}
+
+static const struct btf_member *
+find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
+{
+	struct btf_member *m;
+	int i;
+
+	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+		if (btf_member_bit_offset(t, i) == bit_offset)
+			return m;
+	}
+
+	return NULL;
+}
+
+static const struct btf_member *
+find_member_by_name(const struct btf *btf, const struct btf_type *t,
+		    const char *name)
+{
+	struct btf_member *m;
+	int i;
+
+	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+		if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
+			return m;
+	}
+
+	return NULL;
+}
+
+#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
+				   const char *name, __u32 kind);
+
+static int
+find_struct_ops_kern_types(const struct btf *btf, const char *tname,
+			   const struct btf_type **type, __u32 *type_id,
+			   const struct btf_type **vtype, __u32 *vtype_id,
+			   const struct btf_member **data_member)
+{
+	const struct btf_type *kern_type, *kern_vtype;
+	const struct btf_member *kern_data_member;
+	__s32 kern_vtype_id, kern_type_id;
+	__u32 i;
+
+	kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+	if (kern_type_id < 0) {
+		pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
+			tname);
+		return kern_type_id;
+	}
+	kern_type = btf__type_by_id(btf, kern_type_id);
+
+	/* Find the corresponding "map_value" type that will be used
+	 * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
+	 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
+	 * btf_vmlinux.
+	 */
+	kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
+						tname, BTF_KIND_STRUCT);
+	if (kern_vtype_id < 0) {
+		pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
+			STRUCT_OPS_VALUE_PREFIX, tname);
+		return kern_vtype_id;
+	}
+	kern_vtype = btf__type_by_id(btf, kern_vtype_id);
+
+	/* Find "struct tcp_congestion_ops" from
+	 * struct bpf_struct_ops_tcp_congestion_ops {
+	 *	[ ... ]
+	 *	struct tcp_congestion_ops data;
+	 * }
+	 */
+	kern_data_member = btf_members(kern_vtype);
+	for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
+		if (kern_data_member->type == kern_type_id)
+			break;
+	}
+	if (i == btf_vlen(kern_vtype)) {
+		pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
+			tname, STRUCT_OPS_VALUE_PREFIX, tname);
+		return -EINVAL;
+	}
+
+	*type = kern_type;
+	*type_id = kern_type_id;
+	*vtype = kern_vtype;
+	*vtype_id = kern_vtype_id;
+	*data_member = kern_data_member;
+
+	return 0;
+}
+
+static bool bpf_map__is_struct_ops(const struct bpf_map *map)
+{
+	return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
+}
+
+/* Init the map's fields that depend on kern_btf */
+static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
+					 const struct btf *btf,
+					 const struct btf *kern_btf)
+{
+	const struct btf_member *member, *kern_member, *kern_data_member;
+	const struct btf_type *type, *kern_type, *kern_vtype;
+	__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
+	struct bpf_struct_ops *st_ops;
+	void *data, *kern_data;
+	const char *tname;
+	int err;
+
+	st_ops = map->st_ops;
+	type = st_ops->type;
+	tname = st_ops->tname;
+	err = find_struct_ops_kern_types(kern_btf, tname,
+					 &kern_type, &kern_type_id,
+					 &kern_vtype, &kern_vtype_id,
+					 &kern_data_member);
+	if (err)
+		return err;
+
+	pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
+		 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
+
+	map->def.value_size = kern_vtype->size;
+	map->btf_vmlinux_value_type_id = kern_vtype_id;
+
+	st_ops->kern_vdata = calloc(1, kern_vtype->size);
+	if (!st_ops->kern_vdata)
+		return -ENOMEM;
+
+	data = st_ops->data;
+	kern_data_off = kern_data_member->offset / 8;
+	kern_data = st_ops->kern_vdata + kern_data_off;
+
+	member = btf_members(type);
+	for (i = 0; i < btf_vlen(type); i++, member++) {
+		const struct btf_type *mtype, *kern_mtype;
+		__u32 mtype_id, kern_mtype_id;
+		void *mdata, *kern_mdata;
+		__s64 msize, kern_msize;
+		__u32 moff, kern_moff;
+		__u32 kern_member_idx;
+		const char *mname;
+
+		mname = btf__name_by_offset(btf, member->name_off);
+		kern_member = find_member_by_name(kern_btf, kern_type, mname);
+		if (!kern_member) {
+			pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
+				map->name, mname);
+			return -ENOTSUP;
+		}
+
+		kern_member_idx = kern_member - btf_members(kern_type);
+		if (btf_member_bitfield_size(type, i) ||
+		    btf_member_bitfield_size(kern_type, kern_member_idx)) {
+			pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
+				map->name, mname);
+			return -ENOTSUP;
+		}
+
+		moff = member->offset / 8;
+		kern_moff = kern_member->offset / 8;
+
+		mdata = data + moff;
+		kern_mdata = kern_data + kern_moff;
+
+		mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
+		kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
+						    &kern_mtype_id);
+		if (BTF_INFO_KIND(mtype->info) !=
+		    BTF_INFO_KIND(kern_mtype->info)) {
+			pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
+				map->name, mname, BTF_INFO_KIND(mtype->info),
+				BTF_INFO_KIND(kern_mtype->info));
+			return -ENOTSUP;
+		}
+
+		if (btf_is_ptr(mtype)) {
+			struct bpf_program *prog;
+
+			prog = st_ops->progs[i];
+			if (!prog)
+				continue;
+
+			kern_mtype = skip_mods_and_typedefs(kern_btf,
+							    kern_mtype->type,
+							    &kern_mtype_id);
+
+			/* mtype->type must be a func_proto which was
+			 * guaranteed in bpf_object__collect_st_ops_relos(),
+			 * so only check kern_mtype for func_proto here.
+			 */
+			if (!btf_is_func_proto(kern_mtype)) {
+				pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
+					map->name, mname);
+				return -ENOTSUP;
+			}
+
+			prog->attach_btf_id = kern_type_id;
+			prog->expected_attach_type = kern_member_idx;
+
+			st_ops->kern_func_off[i] = kern_data_off + kern_moff;
+
+			pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
+				 map->name, mname, prog->name, moff,
+				 kern_moff);
+
+			continue;
+		}
+
+		msize = btf__resolve_size(btf, mtype_id);
+		kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
+		if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
+			pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
+				map->name, mname, (ssize_t)msize,
+				(ssize_t)kern_msize);
+			return -ENOTSUP;
+		}
+
+		pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
+			 map->name, mname, (unsigned int)msize,
+			 moff, kern_moff);
+		memcpy(kern_mdata, mdata, msize);
+	}
+
+	return 0;
+}
+
+static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
+{
+	struct bpf_map *map;
+	size_t i;
+	int err;
+
+	for (i = 0; i < obj->nr_maps; i++) {
+		map = &obj->maps[i];
+
+		if (!bpf_map__is_struct_ops(map))
+			continue;
+
+		err = bpf_map__init_kern_struct_ops(map, obj->btf,
+						    obj->btf_vmlinux);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
+{
+	const struct btf_type *type, *datasec;
+	const struct btf_var_secinfo *vsi;
+	struct bpf_struct_ops *st_ops;
+	const char *tname, *var_name;
+	__s32 type_id, datasec_id;
+	const struct btf *btf;
+	struct bpf_map *map;
+	__u32 i;
+
+	if (obj->efile.st_ops_shndx == -1)
+		return 0;
+
+	btf = obj->btf;
+	datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
+					    BTF_KIND_DATASEC);
+	if (datasec_id < 0) {
+		pr_warn("struct_ops init: DATASEC %s not found\n",
+			STRUCT_OPS_SEC);
+		return -EINVAL;
+	}
+
+	datasec = btf__type_by_id(btf, datasec_id);
+	vsi = btf_var_secinfos(datasec);
+	for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
+		type = btf__type_by_id(obj->btf, vsi->type);
+		var_name = btf__name_by_offset(obj->btf, type->name_off);
+
+		type_id = btf__resolve_type(obj->btf, vsi->type);
+		if (type_id < 0) {
+			pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
+				vsi->type, STRUCT_OPS_SEC);
+			return -EINVAL;
+		}
+
+		type = btf__type_by_id(obj->btf, type_id);
+		tname = btf__name_by_offset(obj->btf, type->name_off);
+		if (!tname[0]) {
+			pr_warn("struct_ops init: anonymous type is not supported\n");
+			return -ENOTSUP;
+		}
+		if (!btf_is_struct(type)) {
+			pr_warn("struct_ops init: %s is not a struct\n", tname);
+			return -EINVAL;
+		}
+
+		map = bpf_object__add_map(obj);
+		if (IS_ERR(map))
+			return PTR_ERR(map);
+
+		map->sec_idx = obj->efile.st_ops_shndx;
+		map->sec_offset = vsi->offset;
+		map->name = strdup(var_name);
+		if (!map->name)
+			return -ENOMEM;
+
+		map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
+		map->def.key_size = sizeof(int);
+		map->def.value_size = type->size;
+		map->def.max_entries = 1;
+
+		map->st_ops = calloc(1, sizeof(*map->st_ops));
+		if (!map->st_ops)
+			return -ENOMEM;
+		st_ops = map->st_ops;
+		st_ops->data = malloc(type->size);
+		st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
+		st_ops->kern_func_off = malloc(btf_vlen(type) *
+					       sizeof(*st_ops->kern_func_off));
+		if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
+			return -ENOMEM;
+
+		if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
+			pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
+				var_name, STRUCT_OPS_SEC);
+			return -EINVAL;
+		}
+
+		memcpy(st_ops->data,
+		       obj->efile.st_ops_data->d_buf + vsi->offset,
+		       type->size);
+		st_ops->tname = tname;
+		st_ops->type = type;
+		st_ops->type_id = type_id;
+
+		pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
+			 tname, type_id, var_name, vsi->offset);
+	}
+
+	return 0;
+}
+
+static struct bpf_object *bpf_object__new(const char *path,
+					  const void *obj_buf,
+					  size_t obj_buf_sz,
+					  const char *obj_name)
+{
+	struct bpf_object *obj;
+	char *end;
+
+	obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
+	if (!obj) {
+		pr_warn("alloc memory failed for %s\n", path);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	strcpy(obj->path, path);
+	if (obj_name) {
+		strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
+		obj->name[sizeof(obj->name) - 1] = 0;
+	} else {
+		/* Using basename() GNU version which doesn't modify arg. */
+		strncpy(obj->name, basename((void *)path),
+			sizeof(obj->name) - 1);
+		end = strchr(obj->name, '.');
+		if (end)
+			*end = 0;
+	}
+
+	obj->efile.fd = -1;
+	/*
+	 * Caller of this function should also call
+	 * bpf_object__elf_finish() after data collection to return
+	 * obj_buf to user. If not, we should duplicate the buffer to
+	 * avoid user freeing them before elf finish.
+	 */
+	obj->efile.obj_buf = obj_buf;
+	obj->efile.obj_buf_sz = obj_buf_sz;
+	obj->efile.maps_shndx = -1;
+	obj->efile.btf_maps_shndx = -1;
+	obj->efile.data_shndx = -1;
+	obj->efile.rodata_shndx = -1;
+	obj->efile.bss_shndx = -1;
+	obj->efile.st_ops_shndx = -1;
+	obj->kconfig_map_idx = -1;
+	obj->rodata_map_idx = -1;
+
+	obj->kern_version = get_kernel_version();
+	obj->loaded = false;
+
+	INIT_LIST_HEAD(&obj->list);
+	list_add(&obj->list, &bpf_objects_list);
+	return obj;
+}
+
+static void bpf_object__elf_finish(struct bpf_object *obj)
+{
+	if (!obj_elf_valid(obj))
+		return;
+
+	if (obj->efile.elf) {
+		elf_end(obj->efile.elf);
+		obj->efile.elf = NULL;
+	}
+	obj->efile.symbols = NULL;
+	obj->efile.data = NULL;
+	obj->efile.rodata = NULL;
+	obj->efile.bss = NULL;
+	obj->efile.st_ops_data = NULL;
+
+	zfree(&obj->efile.reloc_sects);
+	obj->efile.nr_reloc_sects = 0;
+	zclose(obj->efile.fd);
+	obj->efile.obj_buf = NULL;
+	obj->efile.obj_buf_sz = 0;
+}
+
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+static int bpf_object__elf_init(struct bpf_object *obj)
+{
+	int err = 0;
+	GElf_Ehdr *ep;
+
+	if (obj_elf_valid(obj)) {
+		pr_warn("elf: init internal error\n");
+		return -LIBBPF_ERRNO__LIBELF;
+	}
+
+	if (obj->efile.obj_buf_sz > 0) {
+		/*
+		 * obj_buf should have been validated by
+		 * bpf_object__open_buffer().
+		 */
+		obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
+					    obj->efile.obj_buf_sz);
+	} else {
+		obj->efile.fd = open(obj->path, O_RDONLY);
+		if (obj->efile.fd < 0) {
+			char errmsg[STRERR_BUFSIZE], *cp;
+
+			err = -errno;
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+			pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
+			return err;
+		}
+
+		obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
+	}
+
+	if (!obj->efile.elf) {
+		pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
+		err = -LIBBPF_ERRNO__LIBELF;
+		goto errout;
+	}
+
+	if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
+		pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
+		err = -LIBBPF_ERRNO__FORMAT;
+		goto errout;
+	}
+	ep = &obj->efile.ehdr;
+
+	if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
+		pr_warn("elf: failed to get section names section index for %s: %s\n",
+			obj->path, elf_errmsg(-1));
+		err = -LIBBPF_ERRNO__FORMAT;
+		goto errout;
+	}
+
+	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
+	if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
+		pr_warn("elf: failed to get section names strings from %s: %s\n",
+			obj->path, elf_errmsg(-1));
+		err = -LIBBPF_ERRNO__FORMAT;
+		goto errout;
+	}
+
+	/* Old LLVM set e_machine to EM_NONE */
+	if (ep->e_type != ET_REL ||
+	    (ep->e_machine && ep->e_machine != EM_BPF)) {
+		pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
+		err = -LIBBPF_ERRNO__FORMAT;
+		goto errout;
+	}
+
+	return 0;
+errout:
+	bpf_object__elf_finish(obj);
+	return err;
+}
+
+static int bpf_object__check_endianness(struct bpf_object *obj)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return 0;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+	if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
+		return 0;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+	pr_warn("elf: endianness mismatch in %s.\n", obj->path);
+	return -LIBBPF_ERRNO__ENDIAN;
+}
+
+static int
+bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
+{
+	memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
+	pr_debug("license of %s is %s\n", obj->path, obj->license);
+	return 0;
+}
+
+static int
+bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
+{
+	__u32 kver;
+
+	if (size != sizeof(kver)) {
+		pr_warn("invalid kver section in %s\n", obj->path);
+		return -LIBBPF_ERRNO__FORMAT;
+	}
+	memcpy(&kver, data, sizeof(kver));
+	obj->kern_version = kver;
+	pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
+	return 0;
+}
+
+static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
+{
+	if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+	    type == BPF_MAP_TYPE_HASH_OF_MAPS)
+		return true;
+	return false;
+}
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+			     __u32 *size)
+{
+	int ret = -ENOENT;
+
+	*size = 0;
+	if (!name) {
+		return -EINVAL;
+	} else if (!strcmp(name, DATA_SEC)) {
+		if (obj->efile.data)
+			*size = obj->efile.data->d_size;
+	} else if (!strcmp(name, BSS_SEC)) {
+		if (obj->efile.bss)
+			*size = obj->efile.bss->d_size;
+	} else if (!strcmp(name, RODATA_SEC)) {
+		if (obj->efile.rodata)
+			*size = obj->efile.rodata->d_size;
+	} else if (!strcmp(name, STRUCT_OPS_SEC)) {
+		if (obj->efile.st_ops_data)
+			*size = obj->efile.st_ops_data->d_size;
+	} else {
+		Elf_Scn *scn = elf_sec_by_name(obj, name);
+		Elf_Data *data = elf_sec_data(obj, scn);
+
+		if (data) {
+			ret = 0; /* found it */
+			*size = data->d_size;
+		}
+	}
+
+	return *size ? 0 : ret;
+}
+
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+				__u32 *off)
+{
+	Elf_Data *symbols = obj->efile.symbols;
+	const char *sname;
+	size_t si;
+
+	if (!name || !off)
+		return -EINVAL;
+
+	for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
+		GElf_Sym sym;
+
+		if (!gelf_getsym(symbols, si, &sym))
+			continue;
+		if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+		    GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
+			continue;
+
+		sname = elf_sym_str(obj, sym.st_name);
+		if (!sname) {
+			pr_warn("failed to get sym name string for var %s\n",
+				name);
+			return -EIO;
+		}
+		if (strcmp(name, sname) == 0) {
+			*off = sym.st_value;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
+{
+	struct bpf_map *new_maps;
+	size_t new_cap;
+	int i;
+
+	if (obj->nr_maps < obj->maps_cap)
+		return &obj->maps[obj->nr_maps++];
+
+	new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
+	new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
+	if (!new_maps) {
+		pr_warn("alloc maps for object failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	obj->maps_cap = new_cap;
+	obj->maps = new_maps;
+
+	/* zero out new maps */
+	memset(obj->maps + obj->nr_maps, 0,
+	       (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
+	/*
+	 * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
+	 * when failure (zclose won't close negative fd)).
+	 */
+	for (i = obj->nr_maps; i < obj->maps_cap; i++) {
+		obj->maps[i].fd = -1;
+		obj->maps[i].inner_map_fd = -1;
+	}
+
+	return &obj->maps[obj->nr_maps++];
+}
+
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+	long page_sz = sysconf(_SC_PAGE_SIZE);
+	size_t map_sz;
+
+	map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
+	map_sz = roundup(map_sz, page_sz);
+	return map_sz;
+}
+
+static char *internal_map_name(struct bpf_object *obj,
+			       enum libbpf_map_type type)
+{
+	char map_name[BPF_OBJ_NAME_LEN], *p;
+	const char *sfx = libbpf_type_to_btf_name[type];
+	int sfx_len = max((size_t)7, strlen(sfx));
+	int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
+			  strlen(obj->name));
+
+	snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
+		 sfx_len, libbpf_type_to_btf_name[type]);
+
+	/* sanitise map name to characters allowed by kernel */
+	for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
+		if (!isalnum(*p) && *p != '_' && *p != '.')
+			*p = '_';
+
+	return strdup(map_name);
+}
+
+static int
+bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
+			      int sec_idx, void *data, size_t data_sz)
+{
+	struct bpf_map_def *def;
+	struct bpf_map *map;
+	int err;
+
+	map = bpf_object__add_map(obj);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	map->libbpf_type = type;
+	map->sec_idx = sec_idx;
+	map->sec_offset = 0;
+	map->name = internal_map_name(obj, type);
+	if (!map->name) {
+		pr_warn("failed to alloc map name\n");
+		return -ENOMEM;
+	}
+
+	def = &map->def;
+	def->type = BPF_MAP_TYPE_ARRAY;
+	def->key_size = sizeof(int);
+	def->value_size = data_sz;
+	def->max_entries = 1;
+	def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
+			 ? BPF_F_RDONLY_PROG : 0;
+	def->map_flags |= BPF_F_MMAPABLE;
+
+	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
+		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
+
+	map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (map->mmaped == MAP_FAILED) {
+		err = -errno;
+		map->mmaped = NULL;
+		pr_warn("failed to alloc map '%s' content buffer: %d\n",
+			map->name, err);
+		zfree(&map->name);
+		return err;
+	}
+
+	if (data)
+		memcpy(map->mmaped, data, data_sz);
+
+	pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
+	return 0;
+}
+
+static int bpf_object__init_global_data_maps(struct bpf_object *obj)
+{
+	int err;
+
+	/*
+	 * Populate obj->maps with libbpf internal maps.
+	 */
+	if (obj->efile.data_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
+						    obj->efile.data_shndx,
+						    obj->efile.data->d_buf,
+						    obj->efile.data->d_size);
+		if (err)
+			return err;
+	}
+	if (obj->efile.rodata_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
+						    obj->efile.rodata_shndx,
+						    obj->efile.rodata->d_buf,
+						    obj->efile.rodata->d_size);
+		if (err)
+			return err;
+
+		obj->rodata_map_idx = obj->nr_maps - 1;
+	}
+	if (obj->efile.bss_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
+						    obj->efile.bss_shndx,
+						    NULL,
+						    obj->efile.bss->d_size);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+
+static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
+					       const void *name)
+{
+	int i;
+
+	for (i = 0; i < obj->nr_extern; i++) {
+		if (strcmp(obj->externs[i].name, name) == 0)
+			return &obj->externs[i];
+	}
+	return NULL;
+}
+
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
+			      char value)
+{
+	switch (ext->kcfg.type) {
+	case KCFG_BOOL:
+		if (value == 'm') {
+			pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
+				ext->name, value);
+			return -EINVAL;
+		}
+		*(bool *)ext_val = value == 'y' ? true : false;
+		break;
+	case KCFG_TRISTATE:
+		if (value == 'y')
+			*(enum libbpf_tristate *)ext_val = TRI_YES;
+		else if (value == 'm')
+			*(enum libbpf_tristate *)ext_val = TRI_MODULE;
+		else /* value == 'n' */
+			*(enum libbpf_tristate *)ext_val = TRI_NO;
+		break;
+	case KCFG_CHAR:
+		*(char *)ext_val = value;
+		break;
+	case KCFG_UNKNOWN:
+	case KCFG_INT:
+	case KCFG_CHAR_ARR:
+	default:
+		pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
+			ext->name, value);
+		return -EINVAL;
+	}
+	ext->is_set = true;
+	return 0;
+}
+
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
+			      const char *value)
+{
+	size_t len;
+
+	if (ext->kcfg.type != KCFG_CHAR_ARR) {
+		pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
+		return -EINVAL;
+	}
+
+	len = strlen(value);
+	if (value[len - 1] != '"') {
+		pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
+			ext->name, value);
+		return -EINVAL;
+	}
+
+	/* strip quotes */
+	len -= 2;
+	if (len >= ext->kcfg.sz) {
+		pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+			ext->name, value, len, ext->kcfg.sz - 1);
+		len = ext->kcfg.sz - 1;
+	}
+	memcpy(ext_val, value + 1, len);
+	ext_val[len] = '\0';
+	ext->is_set = true;
+	return 0;
+}
+
+static int parse_u64(const char *value, __u64 *res)
+{
+	char *value_end;
+	int err;
+
+	errno = 0;
+	*res = strtoull(value, &value_end, 0);
+	if (errno) {
+		err = -errno;
+		pr_warn("failed to parse '%s' as integer: %d\n", value, err);
+		return err;
+	}
+	if (*value_end) {
+		pr_warn("failed to parse '%s' as integer completely\n", value);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
+{
+	int bit_sz = ext->kcfg.sz * 8;
+
+	if (ext->kcfg.sz == 8)
+		return true;
+
+	/* Validate that value stored in u64 fits in integer of `ext->sz`
+	 * bytes size without any loss of information. If the target integer
+	 * is signed, we rely on the following limits of integer type of
+	 * Y bits and subsequent transformation:
+	 *
+	 *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
+	 *            0 <= X + 2^(Y-1) <= 2^Y - 1
+	 *            0 <= X + 2^(Y-1) <  2^Y
+	 *
+	 *  For unsigned target integer, check that all the (64 - Y) bits are
+	 *  zero.
+	 */
+	if (ext->kcfg.is_signed)
+		return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
+	else
+		return (v >> bit_sz) == 0;
+}
+
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
+			      __u64 value)
+{
+	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+		pr_warn("extern (kcfg) %s=%llu should be integer\n",
+			ext->name, (unsigned long long)value);
+		return -EINVAL;
+	}
+	if (!is_kcfg_value_in_range(ext, value)) {
+		pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+			ext->name, (unsigned long long)value, ext->kcfg.sz);
+		return -ERANGE;
+	}
+	switch (ext->kcfg.sz) {
+		case 1: *(__u8 *)ext_val = value; break;
+		case 2: *(__u16 *)ext_val = value; break;
+		case 4: *(__u32 *)ext_val = value; break;
+		case 8: *(__u64 *)ext_val = value; break;
+		default:
+			return -EINVAL;
+	}
+	ext->is_set = true;
+	return 0;
+}
+
+static int bpf_object__process_kconfig_line(struct bpf_object *obj,
+					    char *buf, void *data)
+{
+	struct extern_desc *ext;
+	char *sep, *value;
+	int len, err = 0;
+	void *ext_val;
+	__u64 num;
+
+	if (strncmp(buf, "CONFIG_", 7))
+		return 0;
+
+	sep = strchr(buf, '=');
+	if (!sep) {
+		pr_warn("failed to parse '%s': no separator\n", buf);
+		return -EINVAL;
+	}
+
+	/* Trim ending '\n' */
+	len = strlen(buf);
+	if (buf[len - 1] == '\n')
+		buf[len - 1] = '\0';
+	/* Split on '=' and ensure that a value is present. */
+	*sep = '\0';
+	if (!sep[1]) {
+		*sep = '=';
+		pr_warn("failed to parse '%s': no value\n", buf);
+		return -EINVAL;
+	}
+
+	ext = find_extern_by_name(obj, buf);
+	if (!ext || ext->is_set)
+		return 0;
+
+	ext_val = data + ext->kcfg.data_off;
+	value = sep + 1;
+
+	switch (*value) {
+	case 'y': case 'n': case 'm':
+		err = set_kcfg_value_tri(ext, ext_val, *value);
+		break;
+	case '"':
+		err = set_kcfg_value_str(ext, ext_val, value);
+		break;
+	default:
+		/* assume integer */
+		err = parse_u64(value, &num);
+		if (err) {
+			pr_warn("extern (kcfg) %s=%s should be integer\n",
+				ext->name, value);
+			return err;
+		}
+		err = set_kcfg_value_num(ext, ext_val, num);
+		break;
+	}
+	if (err)
+		return err;
+	pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
+	return 0;
+}
+
+static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
+{
+	char buf[PATH_MAX];
+	struct utsname uts;
+	int len, err = 0;
+	gzFile file;
+
+	uname(&uts);
+	len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
+	if (len < 0)
+		return -EINVAL;
+	else if (len >= PATH_MAX)
+		return -ENAMETOOLONG;
+
+	/* gzopen also accepts uncompressed files. */
+	file = gzopen(buf, "r");
+	if (!file)
+		file = gzopen("/proc/config.gz", "r");
+
+	if (!file) {
+		pr_warn("failed to open system Kconfig\n");
+		return -ENOENT;
+	}
+
+	while (gzgets(file, buf, sizeof(buf))) {
+		err = bpf_object__process_kconfig_line(obj, buf, data);
+		if (err) {
+			pr_warn("error parsing system Kconfig line '%s': %d\n",
+				buf, err);
+			goto out;
+		}
+	}
+
+out:
+	gzclose(file);
+	return err;
+}
+
+static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
+					const char *config, void *data)
+{
+	char buf[PATH_MAX];
+	int err = 0;
+	FILE *file;
+
+	file = fmemopen((void *)config, strlen(config), "r");
+	if (!file) {
+		err = -errno;
+		pr_warn("failed to open in-memory Kconfig: %d\n", err);
+		return err;
+	}
+
+	while (fgets(buf, sizeof(buf), file)) {
+		err = bpf_object__process_kconfig_line(obj, buf, data);
+		if (err) {
+			pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
+				buf, err);
+			break;
+		}
+	}
+
+	fclose(file);
+	return err;
+}
+
+static int bpf_object__init_kconfig_map(struct bpf_object *obj)
+{
+	struct extern_desc *last_ext = NULL, *ext;
+	size_t map_sz;
+	int i, err;
+
+	for (i = 0; i < obj->nr_extern; i++) {
+		ext = &obj->externs[i];
+		if (ext->type == EXT_KCFG)
+			last_ext = ext;
+	}
+
+	if (!last_ext)
+		return 0;
+
+	map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
+	err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
+					    obj->efile.symbols_shndx,
+					    NULL, map_sz);
+	if (err)
+		return err;
+
+	obj->kconfig_map_idx = obj->nr_maps - 1;
+
+	return 0;
+}
+
+static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
+{
+	Elf_Data *symbols = obj->efile.symbols;
+	int i, map_def_sz = 0, nr_maps = 0, nr_syms;
+	Elf_Data *data = NULL;
+	Elf_Scn *scn;
+
+	if (obj->efile.maps_shndx < 0)
+		return 0;
+
+	if (!symbols)
+		return -EINVAL;
+
+
+	scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
+	data = elf_sec_data(obj, scn);
+	if (!scn || !data) {
+		pr_warn("elf: failed to get legacy map definitions for %s\n",
+			obj->path);
+		return -EINVAL;
+	}
+
+	/*
+	 * Count number of maps. Each map has a name.
+	 * Array of maps is not supported: only the first element is
+	 * considered.
+	 *
+	 * TODO: Detect array of map and report error.
+	 */
+	nr_syms = symbols->d_size / sizeof(GElf_Sym);
+	for (i = 0; i < nr_syms; i++) {
+		GElf_Sym sym;
+
+		if (!gelf_getsym(symbols, i, &sym))
+			continue;
+		if (sym.st_shndx != obj->efile.maps_shndx)
+			continue;
+		nr_maps++;
+	}
+	/* Assume equally sized map definitions */
+	pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
+		 nr_maps, data->d_size, obj->path);
+
+	if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
+		pr_warn("elf: unable to determine legacy map definition size in %s\n",
+			obj->path);
+		return -EINVAL;
+	}
+	map_def_sz = data->d_size / nr_maps;
+
+	/* Fill obj->maps using data in "maps" section.  */
+	for (i = 0; i < nr_syms; i++) {
+		GElf_Sym sym;
+		const char *map_name;
+		struct bpf_map_def *def;
+		struct bpf_map *map;
+
+		if (!gelf_getsym(symbols, i, &sym))
+			continue;
+		if (sym.st_shndx != obj->efile.maps_shndx)
+			continue;
+
+		map = bpf_object__add_map(obj);
+		if (IS_ERR(map))
+			return PTR_ERR(map);
+
+		map_name = elf_sym_str(obj, sym.st_name);
+		if (!map_name) {
+			pr_warn("failed to get map #%d name sym string for obj %s\n",
+				i, obj->path);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+
+		map->libbpf_type = LIBBPF_MAP_UNSPEC;
+		map->sec_idx = sym.st_shndx;
+		map->sec_offset = sym.st_value;
+		pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
+			 map_name, map->sec_idx, map->sec_offset);
+		if (sym.st_value + map_def_sz > data->d_size) {
+			pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
+				obj->path, map_name);
+			return -EINVAL;
+		}
+
+		map->name = strdup(map_name);
+		if (!map->name) {
+			pr_warn("failed to alloc map name\n");
+			return -ENOMEM;
+		}
+		pr_debug("map %d is \"%s\"\n", i, map->name);
+		def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
+		/*
+		 * If the definition of the map in the object file fits in
+		 * bpf_map_def, copy it.  Any extra fields in our version
+		 * of bpf_map_def will default to zero as a result of the
+		 * calloc above.
+		 */
+		if (map_def_sz <= sizeof(struct bpf_map_def)) {
+			memcpy(&map->def, def, map_def_sz);
+		} else {
+			/*
+			 * Here the map structure being read is bigger than what
+			 * we expect, truncate if the excess bits are all zero.
+			 * If they are not zero, reject this map as
+			 * incompatible.
+			 */
+			char *b;
+
+			for (b = ((char *)def) + sizeof(struct bpf_map_def);
+			     b < ((char *)def) + map_def_sz; b++) {
+				if (*b != 0) {
+					pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
+						obj->path, map_name);
+					if (strict)
+						return -EINVAL;
+				}
+			}
+			memcpy(&map->def, def, sizeof(struct bpf_map_def));
+		}
+	}
+	return 0;
+}
+
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+
+	if (res_id)
+		*res_id = id;
+
+	while (btf_is_mod(t) || btf_is_typedef(t)) {
+		if (res_id)
+			*res_id = t->type;
+		t = btf__type_by_id(btf, t->type);
+	}
+
+	return t;
+}
+
+static const struct btf_type *
+resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
+{
+	const struct btf_type *t;
+
+	t = skip_mods_and_typedefs(btf, id, NULL);
+	if (!btf_is_ptr(t))
+		return NULL;
+
+	t = skip_mods_and_typedefs(btf, t->type, res_id);
+
+	return btf_is_func_proto(t) ? t : NULL;
+}
+
+static const char *btf_kind_str(const struct btf_type *t)
+{
+	switch (btf_kind(t)) {
+	case BTF_KIND_UNKN: return "void";
+	case BTF_KIND_INT: return "int";
+	case BTF_KIND_PTR: return "ptr";
+	case BTF_KIND_ARRAY: return "array";
+	case BTF_KIND_STRUCT: return "struct";
+	case BTF_KIND_UNION: return "union";
+	case BTF_KIND_ENUM: return "enum";
+	case BTF_KIND_FWD: return "fwd";
+	case BTF_KIND_TYPEDEF: return "typedef";
+	case BTF_KIND_VOLATILE: return "volatile";
+	case BTF_KIND_CONST: return "const";
+	case BTF_KIND_RESTRICT: return "restrict";
+	case BTF_KIND_FUNC: return "func";
+	case BTF_KIND_FUNC_PROTO: return "func_proto";
+	case BTF_KIND_VAR: return "var";
+	case BTF_KIND_DATASEC: return "datasec";
+	default: return "unknown";
+	}
+}
+
+/*
+ * Fetch integer attribute of BTF map definition. Such attributes are
+ * represented using a pointer to an array, in which dimensionality of array
+ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
+ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
+ * type definition, while using only sizeof(void *) space in ELF data section.
+ */
+static bool get_map_field_int(const char *map_name, const struct btf *btf,
+			      const struct btf_member *m, __u32 *res)
+{
+	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
+	const char *name = btf__name_by_offset(btf, m->name_off);
+	const struct btf_array *arr_info;
+	const struct btf_type *arr_t;
+
+	if (!btf_is_ptr(t)) {
+		pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
+			map_name, name, btf_kind_str(t));
+		return false;
+	}
+
+	arr_t = btf__type_by_id(btf, t->type);
+	if (!arr_t) {
+		pr_warn("map '%s': attr '%s': type [%u] not found.\n",
+			map_name, name, t->type);
+		return false;
+	}
+	if (!btf_is_array(arr_t)) {
+		pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
+			map_name, name, btf_kind_str(arr_t));
+		return false;
+	}
+	arr_info = btf_array(arr_t);
+	*res = arr_info->nelems;
+	return true;
+}
+
+static int build_map_pin_path(struct bpf_map *map, const char *path)
+{
+	char buf[PATH_MAX];
+	int len;
+
+	if (!path)
+		path = "/sys/fs/bpf";
+
+	len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
+	if (len < 0)
+		return -EINVAL;
+	else if (len >= PATH_MAX)
+		return -ENAMETOOLONG;
+
+	return bpf_map__set_pin_path(map, buf);
+}
+
+
+static int parse_btf_map_def(struct bpf_object *obj,
+			     struct bpf_map *map,
+			     const struct btf_type *def,
+			     bool strict, bool is_inner,
+			     const char *pin_root_path)
+{
+	const struct btf_type *t;
+	const struct btf_member *m;
+	int vlen, i;
+
+	vlen = btf_vlen(def);
+	m = btf_members(def);
+	for (i = 0; i < vlen; i++, m++) {
+		const char *name = btf__name_by_offset(obj->btf, m->name_off);
+
+		if (!name) {
+			pr_warn("map '%s': invalid field #%d.\n", map->name, i);
+			return -EINVAL;
+		}
+		if (strcmp(name, "type") == 0) {
+			if (!get_map_field_int(map->name, obj->btf, m,
+					       &map->def.type))
+				return -EINVAL;
+			pr_debug("map '%s': found type = %u.\n",
+				 map->name, map->def.type);
+		} else if (strcmp(name, "max_entries") == 0) {
+			if (!get_map_field_int(map->name, obj->btf, m,
+					       &map->def.max_entries))
+				return -EINVAL;
+			pr_debug("map '%s': found max_entries = %u.\n",
+				 map->name, map->def.max_entries);
+		} else if (strcmp(name, "map_flags") == 0) {
+			if (!get_map_field_int(map->name, obj->btf, m,
+					       &map->def.map_flags))
+				return -EINVAL;
+			pr_debug("map '%s': found map_flags = %u.\n",
+				 map->name, map->def.map_flags);
+		} else if (strcmp(name, "numa_node") == 0) {
+			if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+				return -EINVAL;
+			pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
+		} else if (strcmp(name, "key_size") == 0) {
+			__u32 sz;
+
+			if (!get_map_field_int(map->name, obj->btf, m, &sz))
+				return -EINVAL;
+			pr_debug("map '%s': found key_size = %u.\n",
+				 map->name, sz);
+			if (map->def.key_size && map->def.key_size != sz) {
+				pr_warn("map '%s': conflicting key size %u != %u.\n",
+					map->name, map->def.key_size, sz);
+				return -EINVAL;
+			}
+			map->def.key_size = sz;
+		} else if (strcmp(name, "key") == 0) {
+			__s64 sz;
+
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warn("map '%s': key type [%d] not found.\n",
+					map->name, m->type);
+				return -EINVAL;
+			}
+			if (!btf_is_ptr(t)) {
+				pr_warn("map '%s': key spec is not PTR: %s.\n",
+					map->name, btf_kind_str(t));
+				return -EINVAL;
+			}
+			sz = btf__resolve_size(obj->btf, t->type);
+			if (sz < 0) {
+				pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
+					map->name, t->type, (ssize_t)sz);
+				return sz;
+			}
+			pr_debug("map '%s': found key [%u], sz = %zd.\n",
+				 map->name, t->type, (ssize_t)sz);
+			if (map->def.key_size && map->def.key_size != sz) {
+				pr_warn("map '%s': conflicting key size %u != %zd.\n",
+					map->name, map->def.key_size, (ssize_t)sz);
+				return -EINVAL;
+			}
+			map->def.key_size = sz;
+			map->btf_key_type_id = t->type;
+		} else if (strcmp(name, "value_size") == 0) {
+			__u32 sz;
+
+			if (!get_map_field_int(map->name, obj->btf, m, &sz))
+				return -EINVAL;
+			pr_debug("map '%s': found value_size = %u.\n",
+				 map->name, sz);
+			if (map->def.value_size && map->def.value_size != sz) {
+				pr_warn("map '%s': conflicting value size %u != %u.\n",
+					map->name, map->def.value_size, sz);
+				return -EINVAL;
+			}
+			map->def.value_size = sz;
+		} else if (strcmp(name, "value") == 0) {
+			__s64 sz;
+
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warn("map '%s': value type [%d] not found.\n",
+					map->name, m->type);
+				return -EINVAL;
+			}
+			if (!btf_is_ptr(t)) {
+				pr_warn("map '%s': value spec is not PTR: %s.\n",
+					map->name, btf_kind_str(t));
+				return -EINVAL;
+			}
+			sz = btf__resolve_size(obj->btf, t->type);
+			if (sz < 0) {
+				pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
+					map->name, t->type, (ssize_t)sz);
+				return sz;
+			}
+			pr_debug("map '%s': found value [%u], sz = %zd.\n",
+				 map->name, t->type, (ssize_t)sz);
+			if (map->def.value_size && map->def.value_size != sz) {
+				pr_warn("map '%s': conflicting value size %u != %zd.\n",
+					map->name, map->def.value_size, (ssize_t)sz);
+				return -EINVAL;
+			}
+			map->def.value_size = sz;
+			map->btf_value_type_id = t->type;
+		}
+		else if (strcmp(name, "values") == 0) {
+			int err;
+
+			if (is_inner) {
+				pr_warn("map '%s': multi-level inner maps not supported.\n",
+					map->name);
+				return -ENOTSUP;
+			}
+			if (i != vlen - 1) {
+				pr_warn("map '%s': '%s' member should be last.\n",
+					map->name, name);
+				return -EINVAL;
+			}
+			if (!bpf_map_type__is_map_in_map(map->def.type)) {
+				pr_warn("map '%s': should be map-in-map.\n",
+					map->name);
+				return -ENOTSUP;
+			}
+			if (map->def.value_size && map->def.value_size != 4) {
+				pr_warn("map '%s': conflicting value size %u != 4.\n",
+					map->name, map->def.value_size);
+				return -EINVAL;
+			}
+			map->def.value_size = 4;
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
+					map->name, m->type);
+				return -EINVAL;
+			}
+			if (!btf_is_array(t) || btf_array(t)->nelems) {
+				pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
+					map->name);
+				return -EINVAL;
+			}
+			t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
+						   NULL);
+			if (!btf_is_ptr(t)) {
+				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+					map->name, btf_kind_str(t));
+				return -EINVAL;
+			}
+			t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+			if (!btf_is_struct(t)) {
+				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+					map->name, btf_kind_str(t));
+				return -EINVAL;
+			}
+
+			map->inner_map = calloc(1, sizeof(*map->inner_map));
+			if (!map->inner_map)
+				return -ENOMEM;
+			map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
+			map->inner_map->name = malloc(strlen(map->name) +
+						      sizeof(".inner") + 1);
+			if (!map->inner_map->name)
+				return -ENOMEM;
+			sprintf(map->inner_map->name, "%s.inner", map->name);
+
+			err = parse_btf_map_def(obj, map->inner_map, t, strict,
+						true /* is_inner */, NULL);
+			if (err)
+				return err;
+		} else if (strcmp(name, "pinning") == 0) {
+			__u32 val;
+			int err;
+
+			if (is_inner) {
+				pr_debug("map '%s': inner def can't be pinned.\n",
+					 map->name);
+				return -EINVAL;
+			}
+			if (!get_map_field_int(map->name, obj->btf, m, &val))
+				return -EINVAL;
+			pr_debug("map '%s': found pinning = %u.\n",
+				 map->name, val);
+
+			if (val != LIBBPF_PIN_NONE &&
+			    val != LIBBPF_PIN_BY_NAME) {
+				pr_warn("map '%s': invalid pinning value %u.\n",
+					map->name, val);
+				return -EINVAL;
+			}
+			if (val == LIBBPF_PIN_BY_NAME) {
+				err = build_map_pin_path(map, pin_root_path);
+				if (err) {
+					pr_warn("map '%s': couldn't build pin path.\n",
+						map->name);
+					return err;
+				}
+			}
+		} else {
+			if (strict) {
+				pr_warn("map '%s': unknown field '%s'.\n",
+					map->name, name);
+				return -ENOTSUP;
+			}
+			pr_debug("map '%s': ignoring unknown field '%s'.\n",
+				 map->name, name);
+		}
+	}
+
+	if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
+		pr_warn("map '%s': map type isn't specified.\n", map->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+					 const struct btf_type *sec,
+					 int var_idx, int sec_idx,
+					 const Elf_Data *data, bool strict,
+					 const char *pin_root_path)
+{
+	const struct btf_type *var, *def;
+	const struct btf_var_secinfo *vi;
+	const struct btf_var *var_extra;
+	const char *map_name;
+	struct bpf_map *map;
+
+	vi = btf_var_secinfos(sec) + var_idx;
+	var = btf__type_by_id(obj->btf, vi->type);
+	var_extra = btf_var(var);
+	map_name = btf__name_by_offset(obj->btf, var->name_off);
+
+	if (map_name == NULL || map_name[0] == '\0') {
+		pr_warn("map #%d: empty name.\n", var_idx);
+		return -EINVAL;
+	}
+	if ((__u64)vi->offset + vi->size > data->d_size) {
+		pr_warn("map '%s' BTF data is corrupted.\n", map_name);
+		return -EINVAL;
+	}
+	if (!btf_is_var(var)) {
+		pr_warn("map '%s': unexpected var kind %s.\n",
+			map_name, btf_kind_str(var));
+		return -EINVAL;
+	}
+	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+	    var_extra->linkage != BTF_VAR_STATIC) {
+		pr_warn("map '%s': unsupported var linkage %u.\n",
+			map_name, var_extra->linkage);
+		return -EOPNOTSUPP;
+	}
+
+	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+	if (!btf_is_struct(def)) {
+		pr_warn("map '%s': unexpected def kind %s.\n",
+			map_name, btf_kind_str(var));
+		return -EINVAL;
+	}
+	if (def->size > vi->size) {
+		pr_warn("map '%s': invalid def size.\n", map_name);
+		return -EINVAL;
+	}
+
+	map = bpf_object__add_map(obj);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	map->name = strdup(map_name);
+	if (!map->name) {
+		pr_warn("map '%s': failed to alloc map name.\n", map_name);
+		return -ENOMEM;
+	}
+	map->libbpf_type = LIBBPF_MAP_UNSPEC;
+	map->def.type = BPF_MAP_TYPE_UNSPEC;
+	map->sec_idx = sec_idx;
+	map->sec_offset = vi->offset;
+	map->btf_var_idx = var_idx;
+	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+		 map_name, map->sec_idx, map->sec_offset);
+
+	return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+}
+
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
+					  const char *pin_root_path)
+{
+	const struct btf_type *sec = NULL;
+	int nr_types, i, vlen, err;
+	const struct btf_type *t;
+	const char *name;
+	Elf_Data *data;
+	Elf_Scn *scn;
+
+	if (obj->efile.btf_maps_shndx < 0)
+		return 0;
+
+	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
+	data = elf_sec_data(obj, scn);
+	if (!scn || !data) {
+		pr_warn("elf: failed to get %s map definitions for %s\n",
+			MAPS_ELF_SEC, obj->path);
+		return -EINVAL;
+	}
+
+	nr_types = btf__get_nr_types(obj->btf);
+	for (i = 1; i <= nr_types; i++) {
+		t = btf__type_by_id(obj->btf, i);
+		if (!btf_is_datasec(t))
+			continue;
+		name = btf__name_by_offset(obj->btf, t->name_off);
+		if (strcmp(name, MAPS_ELF_SEC) == 0) {
+			sec = t;
+			obj->efile.btf_maps_sec_btf_id = i;
+			break;
+		}
+	}
+
+	if (!sec) {
+		pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
+		return -ENOENT;
+	}
+
+	vlen = btf_vlen(sec);
+	for (i = 0; i < vlen; i++) {
+		err = bpf_object__init_user_btf_map(obj, sec, i,
+						    obj->efile.btf_maps_shndx,
+						    data, strict,
+						    pin_root_path);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int bpf_object__init_maps(struct bpf_object *obj,
+				 const struct bpf_object_open_opts *opts)
+{
+	const char *pin_root_path;
+	bool strict;
+	int err;
+
+	strict = !OPTS_GET(opts, relaxed_maps, false);
+	pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
+
+	err = bpf_object__init_user_maps(obj, strict);
+	err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+	err = err ?: bpf_object__init_global_data_maps(obj);
+	err = err ?: bpf_object__init_kconfig_map(obj);
+	err = err ?: bpf_object__init_struct_ops_maps(obj);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static bool section_have_execinstr(struct bpf_object *obj, int idx)
+{
+	GElf_Shdr sh;
+
+	if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
+		return false;
+
+	return sh.sh_flags & SHF_EXECINSTR;
+}
+
+static bool btf_needs_sanitization(struct bpf_object *obj)
+{
+	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_func = kernel_supports(FEAT_BTF_FUNC);
+
+	return !has_func || !has_datasec || !has_func_global;
+}
+
+static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
+{
+	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_func = kernel_supports(FEAT_BTF_FUNC);
+	struct btf_type *t;
+	int i, j, vlen;
+
+	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+		t = (struct btf_type *)btf__type_by_id(btf, i);
+
+		if (!has_datasec && btf_is_var(t)) {
+			/* replace VAR with INT */
+			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
+			/*
+			 * using size = 1 is the safest choice, 4 will be too
+			 * big and cause kernel BTF validation failure if
+			 * original variable took less than 4 bytes
+			 */
+			t->size = 1;
+			*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
+		} else if (!has_datasec && btf_is_datasec(t)) {
+			/* replace DATASEC with STRUCT */
+			const struct btf_var_secinfo *v = btf_var_secinfos(t);
+			struct btf_member *m = btf_members(t);
+			struct btf_type *vt;
+			char *name;
+
+			name = (char *)btf__name_by_offset(btf, t->name_off);
+			while (*name) {
+				if (*name == '.')
+					*name = '_';
+				name++;
+			}
+
+			vlen = btf_vlen(t);
+			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
+			for (j = 0; j < vlen; j++, v++, m++) {
+				/* order of field assignments is important */
+				m->offset = v->offset * 8;
+				m->type = v->type;
+				/* preserve variable name as member name */
+				vt = (void *)btf__type_by_id(btf, v->type);
+				m->name_off = vt->name_off;
+			}
+		} else if (!has_func && btf_is_func_proto(t)) {
+			/* replace FUNC_PROTO with ENUM */
+			vlen = btf_vlen(t);
+			t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
+			t->size = sizeof(__u32); /* kernel enforced */
+		} else if (!has_func && btf_is_func(t)) {
+			/* replace FUNC with TYPEDEF */
+			t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
+		} else if (!has_func_global && btf_is_func(t)) {
+			/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
+			t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
+		}
+	}
+}
+
+static bool libbpf_needs_btf(const struct bpf_object *obj)
+{
+	return obj->efile.btf_maps_shndx >= 0 ||
+	       obj->efile.st_ops_shndx >= 0 ||
+	       obj->nr_extern > 0;
+}
+
+static bool kernel_needs_btf(const struct bpf_object *obj)
+{
+	return obj->efile.st_ops_shndx >= 0;
+}
+
+static int bpf_object__init_btf(struct bpf_object *obj,
+				Elf_Data *btf_data,
+				Elf_Data *btf_ext_data)
+{
+	int err = -ENOENT;
+
+	if (btf_data) {
+		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+		if (IS_ERR(obj->btf)) {
+			err = PTR_ERR(obj->btf);
+			obj->btf = NULL;
+			pr_warn("Error loading ELF section %s: %d.\n",
+				BTF_ELF_SEC, err);
+			goto out;
+		}
+		/* enforce 8-byte pointers for BPF-targeted BTFs */
+		btf__set_pointer_size(obj->btf, 8);
+		err = 0;
+	}
+	if (btf_ext_data) {
+		if (!obj->btf) {
+			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
+				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
+			goto out;
+		}
+		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
+					    btf_ext_data->d_size);
+		if (IS_ERR(obj->btf_ext)) {
+			pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
+				BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+			obj->btf_ext = NULL;
+			goto out;
+		}
+	}
+out:
+	if (err && libbpf_needs_btf(obj)) {
+		pr_warn("BTF is required, but is missing or corrupted.\n");
+		return err;
+	}
+	return 0;
+}
+
+static int bpf_object__finalize_btf(struct bpf_object *obj)
+{
+	int err;
+
+	if (!obj->btf)
+		return 0;
+
+	err = btf__finalize_data(obj, obj->btf);
+	if (err) {
+		pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+		return err;
+	}
+
+	return 0;
+}
+
+static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
+{
+	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+	    prog->type == BPF_PROG_TYPE_LSM)
+		return true;
+
+	/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
+	 * also need vmlinux BTF
+	 */
+	if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
+		return true;
+
+	return false;
+}
+
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
+{
+	bool need_vmlinux_btf = false;
+	struct bpf_program *prog;
+	int i, err;
+
+	/* CO-RE relocations need kernel BTF */
+	if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+		need_vmlinux_btf = true;
+
+	/* Support for typed ksyms needs kernel BTF */
+	for (i = 0; i < obj->nr_extern; i++) {
+		const struct extern_desc *ext;
+
+		ext = &obj->externs[i];
+		if (ext->type == EXT_KSYM && ext->ksym.type_id) {
+			need_vmlinux_btf = true;
+			break;
+		}
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		if (!prog->load)
+			continue;
+		if (libbpf_prog_needs_vmlinux_btf(prog)) {
+			need_vmlinux_btf = true;
+			break;
+		}
+	}
+
+	if (!need_vmlinux_btf)
+		return 0;
+
+	obj->btf_vmlinux = libbpf_find_kernel_btf();
+	if (IS_ERR(obj->btf_vmlinux)) {
+		err = PTR_ERR(obj->btf_vmlinux);
+		pr_warn("Error loading vmlinux BTF: %d\n", err);
+		obj->btf_vmlinux = NULL;
+		return err;
+	}
+	return 0;
+}
+
+static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
+{
+	struct btf *kern_btf = obj->btf;
+	bool btf_mandatory, sanitize;
+	int err = 0;
+
+	if (!obj->btf)
+		return 0;
+
+	if (!kernel_supports(FEAT_BTF)) {
+		if (kernel_needs_btf(obj)) {
+			err = -EOPNOTSUPP;
+			goto report;
+		}
+		pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
+		return 0;
+	}
+
+	sanitize = btf_needs_sanitization(obj);
+	if (sanitize) {
+		const void *raw_data;
+		__u32 sz;
+
+		/* clone BTF to sanitize a copy and leave the original intact */
+		raw_data = btf__get_raw_data(obj->btf, &sz);
+		kern_btf = btf__new(raw_data, sz);
+		if (IS_ERR(kern_btf))
+			return PTR_ERR(kern_btf);
+
+		/* enforce 8-byte pointers for BPF-targeted BTFs */
+		btf__set_pointer_size(obj->btf, 8);
+		bpf_object__sanitize_btf(obj, kern_btf);
+	}
+
+	err = btf__load(kern_btf);
+	if (sanitize) {
+		if (!err) {
+			/* move fd to libbpf's BTF */
+			btf__set_fd(obj->btf, btf__fd(kern_btf));
+			btf__set_fd(kern_btf, -1);
+		}
+		btf__free(kern_btf);
+	}
+report:
+	if (err) {
+		btf_mandatory = kernel_needs_btf(obj);
+		pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
+			btf_mandatory ? "BTF is mandatory, can't proceed."
+				      : "BTF is optional, ignoring.");
+		if (!btf_mandatory)
+			err = 0;
+	}
+	return err;
+}
+
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
+{
+	const char *name;
+
+	name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
+	if (!name) {
+		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+			off, obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return name;
+}
+
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
+{
+	const char *name;
+
+	name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
+	if (!name) {
+		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+			off, obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return name;
+}
+
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
+{
+	Elf_Scn *scn;
+
+	scn = elf_getscn(obj->efile.elf, idx);
+	if (!scn) {
+		pr_warn("elf: failed to get section(%zu) from %s: %s\n",
+			idx, obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+	return scn;
+}
+
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
+{
+	Elf_Scn *scn = NULL;
+	Elf *elf = obj->efile.elf;
+	const char *sec_name;
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		sec_name = elf_sec_name(obj, scn);
+		if (!sec_name)
+			return NULL;
+
+		if (strcmp(sec_name, name) != 0)
+			continue;
+
+		return scn;
+	}
+	return NULL;
+}
+
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
+{
+	if (!scn)
+		return -EINVAL;
+
+	if (gelf_getshdr(scn, hdr) != hdr) {
+		pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
+			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
+{
+	const char *name;
+	GElf_Shdr sh;
+
+	if (!scn)
+		return NULL;
+
+	if (elf_sec_hdr(obj, scn, &sh))
+		return NULL;
+
+	name = elf_sec_str(obj, sh.sh_name);
+	if (!name) {
+		pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
+			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return name;
+}
+
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
+{
+	Elf_Data *data;
+
+	if (!scn)
+		return NULL;
+
+	data = elf_getdata(scn, 0);
+	if (!data) {
+		pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
+			elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
+			obj->path, elf_errmsg(-1));
+		return NULL;
+	}
+
+	return data;
+}
+
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+			      size_t off, __u32 sym_type, GElf_Sym *sym)
+{
+	Elf_Data *symbols = obj->efile.symbols;
+	size_t n = symbols->d_size / sizeof(GElf_Sym);
+	int i;
+
+	for (i = 0; i < n; i++) {
+		if (!gelf_getsym(symbols, i, sym))
+			continue;
+		if (sym->st_shndx != sec_idx || sym->st_value != off)
+			continue;
+		if (GELF_ST_TYPE(sym->st_info) != sym_type)
+			continue;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static bool is_sec_name_dwarf(const char *name)
+{
+	/* approximation, but the actual list is too long */
+	return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
+{
+	/* no special handling of .strtab */
+	if (hdr->sh_type == SHT_STRTAB)
+		return true;
+
+	/* ignore .llvm_addrsig section as well */
+	if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+		return true;
+
+	/* no subprograms will lead to an empty .text section, ignore it */
+	if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
+	    strcmp(name, ".text") == 0)
+		return true;
+
+	/* DWARF sections */
+	if (is_sec_name_dwarf(name))
+		return true;
+
+	if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+		name += sizeof(".rel") - 1;
+		/* DWARF section relocations */
+		if (is_sec_name_dwarf(name))
+			return true;
+
+		/* .BTF and .BTF.ext don't need relocations */
+		if (strcmp(name, BTF_ELF_SEC) == 0 ||
+		    strcmp(name, BTF_EXT_ELF_SEC) == 0)
+			return true;
+	}
+
+	return false;
+}
+
+static int cmp_progs(const void *_a, const void *_b)
+{
+	const struct bpf_program *a = _a;
+	const struct bpf_program *b = _b;
+
+	if (a->sec_idx != b->sec_idx)
+		return a->sec_idx < b->sec_idx ? -1 : 1;
+
+	/* sec_insn_off can't be the same within the section */
+	return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
+}
+
+static int bpf_object__elf_collect(struct bpf_object *obj)
+{
+	Elf *elf = obj->efile.elf;
+	Elf_Data *btf_ext_data = NULL;
+	Elf_Data *btf_data = NULL;
+	int idx = 0, err = 0;
+	const char *name;
+	Elf_Data *data;
+	Elf_Scn *scn;
+	GElf_Shdr sh;
+
+	/* a bunch of ELF parsing functionality depends on processing symbols,
+	 * so do the first pass and find the symbol table
+	 */
+	scn = NULL;
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		if (elf_sec_hdr(obj, scn, &sh))
+			return -LIBBPF_ERRNO__FORMAT;
+
+		if (sh.sh_type == SHT_SYMTAB) {
+			if (obj->efile.symbols) {
+				pr_warn("elf: multiple symbol tables in %s\n", obj->path);
+				return -LIBBPF_ERRNO__FORMAT;
+			}
+
+			data = elf_sec_data(obj, scn);
+			if (!data)
+				return -LIBBPF_ERRNO__FORMAT;
+
+			obj->efile.symbols = data;
+			obj->efile.symbols_shndx = elf_ndxscn(scn);
+			obj->efile.strtabidx = sh.sh_link;
+		}
+	}
+
+	scn = NULL;
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		idx++;
+
+		if (elf_sec_hdr(obj, scn, &sh))
+			return -LIBBPF_ERRNO__FORMAT;
+
+		name = elf_sec_str(obj, sh.sh_name);
+		if (!name)
+			return -LIBBPF_ERRNO__FORMAT;
+
+		if (ignore_elf_section(&sh, name))
+			continue;
+
+		data = elf_sec_data(obj, scn);
+		if (!data)
+			return -LIBBPF_ERRNO__FORMAT;
+
+		pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+			 idx, name, (unsigned long)data->d_size,
+			 (int)sh.sh_link, (unsigned long)sh.sh_flags,
+			 (int)sh.sh_type);
+
+		if (strcmp(name, "license") == 0) {
+			err = bpf_object__init_license(obj, data->d_buf, data->d_size);
+			if (err)
+				return err;
+		} else if (strcmp(name, "version") == 0) {
+			err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
+			if (err)
+				return err;
+		} else if (strcmp(name, "maps") == 0) {
+			obj->efile.maps_shndx = idx;
+		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
+			obj->efile.btf_maps_shndx = idx;
+		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
+			btf_data = data;
+		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+			btf_ext_data = data;
+		} else if (sh.sh_type == SHT_SYMTAB) {
+			/* already processed during the first pass above */
+		} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
+			if (sh.sh_flags & SHF_EXECINSTR) {
+				if (strcmp(name, ".text") == 0)
+					obj->efile.text_shndx = idx;
+				err = bpf_object__add_programs(obj, data, name, idx);
+				if (err)
+					return err;
+			} else if (strcmp(name, DATA_SEC) == 0) {
+				obj->efile.data = data;
+				obj->efile.data_shndx = idx;
+			} else if (strcmp(name, RODATA_SEC) == 0) {
+				obj->efile.rodata = data;
+				obj->efile.rodata_shndx = idx;
+			} else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
+				obj->efile.st_ops_data = data;
+				obj->efile.st_ops_shndx = idx;
+			} else {
+				pr_info("elf: skipping unrecognized data section(%d) %s\n",
+					idx, name);
+			}
+		} else if (sh.sh_type == SHT_REL) {
+			int nr_sects = obj->efile.nr_reloc_sects;
+			void *sects = obj->efile.reloc_sects;
+			int sec = sh.sh_info; /* points to other section */
+
+			/* Only do relo for section with exec instructions */
+			if (!section_have_execinstr(obj, sec) &&
+			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
+			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
+				pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
+					idx, name, sec,
+					elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
+				continue;
+			}
+
+			sects = libbpf_reallocarray(sects, nr_sects + 1,
+						    sizeof(*obj->efile.reloc_sects));
+			if (!sects)
+				return -ENOMEM;
+
+			obj->efile.reloc_sects = sects;
+			obj->efile.nr_reloc_sects++;
+
+			obj->efile.reloc_sects[nr_sects].shdr = sh;
+			obj->efile.reloc_sects[nr_sects].data = data;
+		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
+			obj->efile.bss = data;
+			obj->efile.bss_shndx = idx;
+		} else {
+			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
+				(size_t)sh.sh_size);
+		}
+	}
+
+	if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
+		pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
+		return -LIBBPF_ERRNO__FORMAT;
+	}
+
+	/* sort BPF programs by section name and in-section instruction offset
+	 * for faster search */
+	qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
+
+	return bpf_object__init_btf(obj, btf_data, btf_ext_data);
+}
+
+static bool sym_is_extern(const GElf_Sym *sym)
+{
+	int bind = GELF_ST_BIND(sym->st_info);
+	/* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
+	return sym->st_shndx == SHN_UNDEF &&
+	       (bind == STB_GLOBAL || bind == STB_WEAK) &&
+	       GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
+}
+
+static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
+{
+	const struct btf_type *t;
+	const char *var_name;
+	int i, n;
+
+	if (!btf)
+		return -ESRCH;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(btf, i);
+
+		if (!btf_is_var(t))
+			continue;
+
+		var_name = btf__name_by_offset(btf, t->name_off);
+		if (strcmp(var_name, ext_name))
+			continue;
+
+		if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+			return -EINVAL;
+
+		return i;
+	}
+
+	return -ENOENT;
+}
+
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
+	const struct btf_var_secinfo *vs;
+	const struct btf_type *t;
+	int i, j, n;
+
+	if (!btf)
+		return -ESRCH;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(btf, i);
+
+		if (!btf_is_datasec(t))
+			continue;
+
+		vs = btf_var_secinfos(t);
+		for (j = 0; j < btf_vlen(t); j++, vs++) {
+			if (vs->type == ext_btf_id)
+				return i;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
+				     bool *is_signed)
+{
+	const struct btf_type *t;
+	const char *name;
+
+	t = skip_mods_and_typedefs(btf, id, NULL);
+	name = btf__name_by_offset(btf, t->name_off);
+
+	if (is_signed)
+		*is_signed = false;
+	switch (btf_kind(t)) {
+	case BTF_KIND_INT: {
+		int enc = btf_int_encoding(t);
+
+		if (enc & BTF_INT_BOOL)
+			return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
+		if (is_signed)
+			*is_signed = enc & BTF_INT_SIGNED;
+		if (t->size == 1)
+			return KCFG_CHAR;
+		if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
+			return KCFG_UNKNOWN;
+		return KCFG_INT;
+	}
+	case BTF_KIND_ENUM:
+		if (t->size != 4)
+			return KCFG_UNKNOWN;
+		if (strcmp(name, "libbpf_tristate"))
+			return KCFG_UNKNOWN;
+		return KCFG_TRISTATE;
+	case BTF_KIND_ARRAY:
+		if (btf_array(t)->nelems == 0)
+			return KCFG_UNKNOWN;
+		if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
+			return KCFG_UNKNOWN;
+		return KCFG_CHAR_ARR;
+	default:
+		return KCFG_UNKNOWN;
+	}
+}
+
+static int cmp_externs(const void *_a, const void *_b)
+{
+	const struct extern_desc *a = _a;
+	const struct extern_desc *b = _b;
+
+	if (a->type != b->type)
+		return a->type < b->type ? -1 : 1;
+
+	if (a->type == EXT_KCFG) {
+		/* descending order by alignment requirements */
+		if (a->kcfg.align != b->kcfg.align)
+			return a->kcfg.align > b->kcfg.align ? -1 : 1;
+		/* ascending order by size, within same alignment class */
+		if (a->kcfg.sz != b->kcfg.sz)
+			return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
+	}
+
+	/* resolve ties by name */
+	return strcmp(a->name, b->name);
+}
+
+static int find_int_btf_id(const struct btf *btf)
+{
+	const struct btf_type *t;
+	int i, n;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(btf, i);
+
+		if (btf_is_int(t) && btf_int_bits(t) == 32)
+			return i;
+	}
+
+	return 0;
+}
+
+static int bpf_object__collect_externs(struct bpf_object *obj)
+{
+	struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
+	const struct btf_type *t;
+	struct extern_desc *ext;
+	int i, n, off;
+	const char *ext_name, *sec_name;
+	Elf_Scn *scn;
+	GElf_Shdr sh;
+
+	if (!obj->efile.symbols)
+		return 0;
+
+	scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
+	if (elf_sec_hdr(obj, scn, &sh))
+		return -LIBBPF_ERRNO__FORMAT;
+
+	n = sh.sh_size / sh.sh_entsize;
+	pr_debug("looking for externs among %d symbols...\n", n);
+
+	for (i = 0; i < n; i++) {
+		GElf_Sym sym;
+
+		if (!gelf_getsym(obj->efile.symbols, i, &sym))
+			return -LIBBPF_ERRNO__FORMAT;
+		if (!sym_is_extern(&sym))
+			continue;
+		ext_name = elf_sym_str(obj, sym.st_name);
+		if (!ext_name || !ext_name[0])
+			continue;
+
+		ext = obj->externs;
+		ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
+		if (!ext)
+			return -ENOMEM;
+		obj->externs = ext;
+		ext = &ext[obj->nr_extern];
+		memset(ext, 0, sizeof(*ext));
+		obj->nr_extern++;
+
+		ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
+		if (ext->btf_id <= 0) {
+			pr_warn("failed to find BTF for extern '%s': %d\n",
+				ext_name, ext->btf_id);
+			return ext->btf_id;
+		}
+		t = btf__type_by_id(obj->btf, ext->btf_id);
+		ext->name = btf__name_by_offset(obj->btf, t->name_off);
+		ext->sym_idx = i;
+		ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
+
+		ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
+		if (ext->sec_btf_id <= 0) {
+			pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
+				ext_name, ext->btf_id, ext->sec_btf_id);
+			return ext->sec_btf_id;
+		}
+		sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
+		sec_name = btf__name_by_offset(obj->btf, sec->name_off);
+
+		if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+			kcfg_sec = sec;
+			ext->type = EXT_KCFG;
+			ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
+			if (ext->kcfg.sz <= 0) {
+				pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
+					ext_name, ext->kcfg.sz);
+				return ext->kcfg.sz;
+			}
+			ext->kcfg.align = btf__align_of(obj->btf, t->type);
+			if (ext->kcfg.align <= 0) {
+				pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
+					ext_name, ext->kcfg.align);
+				return -EINVAL;
+			}
+			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
+						        &ext->kcfg.is_signed);
+			if (ext->kcfg.type == KCFG_UNKNOWN) {
+				pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+				return -ENOTSUP;
+			}
+		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+			ksym_sec = sec;
+			ext->type = EXT_KSYM;
+			skip_mods_and_typedefs(obj->btf, t->type,
+					       &ext->ksym.type_id);
+		} else {
+			pr_warn("unrecognized extern section '%s'\n", sec_name);
+			return -ENOTSUP;
+		}
+	}
+	pr_debug("collected %d externs total\n", obj->nr_extern);
+
+	if (!obj->nr_extern)
+		return 0;
+
+	/* sort externs by type, for kcfg ones also by (align, size, name) */
+	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
+
+	/* for .ksyms section, we need to turn all externs into allocated
+	 * variables in BTF to pass kernel verification; we do this by
+	 * pretending that each extern is a 8-byte variable
+	 */
+	if (ksym_sec) {
+		/* find existing 4-byte integer type in BTF to use for fake
+		 * extern variables in DATASEC
+		 */
+		int int_btf_id = find_int_btf_id(obj->btf);
+
+		for (i = 0; i < obj->nr_extern; i++) {
+			ext = &obj->externs[i];
+			if (ext->type != EXT_KSYM)
+				continue;
+			pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
+				 i, ext->sym_idx, ext->name);
+		}
+
+		sec = ksym_sec;
+		n = btf_vlen(sec);
+		for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+			struct btf_type *vt;
+
+			vt = (void *)btf__type_by_id(obj->btf, vs->type);
+			ext_name = btf__name_by_offset(obj->btf, vt->name_off);
+			ext = find_extern_by_name(obj, ext_name);
+			if (!ext) {
+				pr_warn("failed to find extern definition for BTF var '%s'\n",
+					ext_name);
+				return -ESRCH;
+			}
+			btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+			vt->type = int_btf_id;
+			vs->offset = off;
+			vs->size = sizeof(int);
+		}
+		sec->size = off;
+	}
+
+	if (kcfg_sec) {
+		sec = kcfg_sec;
+		/* for kcfg externs calculate their offsets within a .kconfig map */
+		off = 0;
+		for (i = 0; i < obj->nr_extern; i++) {
+			ext = &obj->externs[i];
+			if (ext->type != EXT_KCFG)
+				continue;
+
+			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
+			off = ext->kcfg.data_off + ext->kcfg.sz;
+			pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
+				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
+		}
+		sec->size = off;
+		n = btf_vlen(sec);
+		for (i = 0; i < n; i++) {
+			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+			t = btf__type_by_id(obj->btf, vs->type);
+			ext_name = btf__name_by_offset(obj->btf, t->name_off);
+			ext = find_extern_by_name(obj, ext_name);
+			if (!ext) {
+				pr_warn("failed to find extern definition for BTF var '%s'\n",
+					ext_name);
+				return -ESRCH;
+			}
+			btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+			vs->offset = ext->kcfg.data_off;
+		}
+	}
+	return 0;
+}
+
+struct bpf_program *
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+				  const char *title)
+{
+	struct bpf_program *pos;
+
+	bpf_object__for_each_program(pos, obj) {
+		if (pos->sec_name && !strcmp(pos->sec_name, title))
+			return pos;
+	}
+	return NULL;
+}
+
+static bool prog_is_subprog(const struct bpf_object *obj,
+			    const struct bpf_program *prog)
+{
+	/* For legacy reasons, libbpf supports an entry-point BPF programs
+	 * without SEC() attribute, i.e., those in the .text section. But if
+	 * there are 2 or more such programs in the .text section, they all
+	 * must be subprograms called from entry-point BPF programs in
+	 * designated SEC()'tions, otherwise there is no way to distinguish
+	 * which of those programs should be loaded vs which are a subprogram.
+	 * Similarly, if there is a function/program in .text and at least one
+	 * other BPF program with custom SEC() attribute, then we just assume
+	 * .text programs are subprograms (even if they are not called from
+	 * other programs), because libbpf never explicitly supported mixing
+	 * SEC()-designated BPF programs and .text entry-point BPF programs.
+	 */
+	return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
+}
+
+struct bpf_program *
+bpf_object__find_program_by_name(const struct bpf_object *obj,
+				 const char *name)
+{
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj) {
+		if (prog_is_subprog(obj, prog))
+			continue;
+		if (!strcmp(prog->name, name))
+			return prog;
+	}
+	return NULL;
+}
+
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
+				      int shndx)
+{
+	return shndx == obj->efile.data_shndx ||
+	       shndx == obj->efile.bss_shndx ||
+	       shndx == obj->efile.rodata_shndx;
+}
+
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
+				      int shndx)
+{
+	return shndx == obj->efile.maps_shndx ||
+	       shndx == obj->efile.btf_maps_shndx;
+}
+
+static enum libbpf_map_type
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
+{
+	if (shndx == obj->efile.data_shndx)
+		return LIBBPF_MAP_DATA;
+	else if (shndx == obj->efile.bss_shndx)
+		return LIBBPF_MAP_BSS;
+	else if (shndx == obj->efile.rodata_shndx)
+		return LIBBPF_MAP_RODATA;
+	else if (shndx == obj->efile.symbols_shndx)
+		return LIBBPF_MAP_KCONFIG;
+	else
+		return LIBBPF_MAP_UNSPEC;
+}
+
+static int bpf_program__record_reloc(struct bpf_program *prog,
+				     struct reloc_desc *reloc_desc,
+				     __u32 insn_idx, const char *sym_name,
+				     const GElf_Sym *sym, const GElf_Rel *rel)
+{
+	struct bpf_insn *insn = &prog->insns[insn_idx];
+	size_t map_idx, nr_maps = prog->obj->nr_maps;
+	struct bpf_object *obj = prog->obj;
+	__u32 shdr_idx = sym->st_shndx;
+	enum libbpf_map_type type;
+	const char *sym_sec_name;
+	struct bpf_map *map;
+
+	reloc_desc->processed = false;
+
+	/* sub-program call relocation */
+	if (insn->code == (BPF_JMP | BPF_CALL)) {
+		if (insn->src_reg != BPF_PSEUDO_CALL) {
+			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		/* text_shndx can be 0, if no default "main" program exists */
+		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
+			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+				prog->name, sym_name, sym_sec_name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		if (sym->st_value % BPF_INSN_SZ) {
+			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+				prog->name, sym_name, (size_t)sym->st_value);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		reloc_desc->type = RELO_CALL;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->sym_off = sym->st_value;
+		return 0;
+	}
+
+	if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
+			prog->name, sym_name, insn_idx, insn->code);
+		return -LIBBPF_ERRNO__RELOC;
+	}
+
+	if (sym_is_extern(sym)) {
+		int sym_idx = GELF_R_SYM(rel->r_info);
+		int i, n = obj->nr_extern;
+		struct extern_desc *ext;
+
+		for (i = 0; i < n; i++) {
+			ext = &obj->externs[i];
+			if (ext->sym_idx == sym_idx)
+				break;
+		}
+		if (i >= n) {
+			pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
+				prog->name, sym_name, sym_idx);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
+			 prog->name, i, ext->name, ext->sym_idx, insn_idx);
+		reloc_desc->type = RELO_EXTERN;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->sym_off = i; /* sym_off stores extern index */
+		return 0;
+	}
+
+	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
+		pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
+			prog->name, sym_name, shdr_idx);
+		return -LIBBPF_ERRNO__RELOC;
+	}
+
+	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+
+	/* generic map reference relocation */
+	if (type == LIBBPF_MAP_UNSPEC) {
+		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
+			pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
+				prog->name, sym_name, sym_sec_name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+			map = &obj->maps[map_idx];
+			if (map->libbpf_type != type ||
+			    map->sec_idx != sym->st_shndx ||
+			    map->sec_offset != sym->st_value)
+				continue;
+			pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
+				 prog->name, map_idx, map->name, map->sec_idx,
+				 map->sec_offset, insn_idx);
+			break;
+		}
+		if (map_idx >= nr_maps) {
+			pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
+				prog->name, sym_sec_name, (size_t)sym->st_value);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		reloc_desc->type = RELO_LD64;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->map_idx = map_idx;
+		reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
+		return 0;
+	}
+
+	/* global data map relocation */
+	if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
+		pr_warn("prog '%s': bad data relo against section '%s'\n",
+			prog->name, sym_sec_name);
+		return -LIBBPF_ERRNO__RELOC;
+	}
+	for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+		map = &obj->maps[map_idx];
+		if (map->libbpf_type != type)
+			continue;
+		pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
+			 prog->name, map_idx, map->name, map->sec_idx,
+			 map->sec_offset, insn_idx);
+		break;
+	}
+	if (map_idx >= nr_maps) {
+		pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
+			prog->name, sym_sec_name);
+		return -LIBBPF_ERRNO__RELOC;
+	}
+
+	reloc_desc->type = RELO_DATA;
+	reloc_desc->insn_idx = insn_idx;
+	reloc_desc->map_idx = map_idx;
+	reloc_desc->sym_off = sym->st_value;
+	return 0;
+}
+
+static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
+{
+	return insn_idx >= prog->sec_insn_off &&
+	       insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
+}
+
+static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
+						 size_t sec_idx, size_t insn_idx)
+{
+	int l = 0, r = obj->nr_programs - 1, m;
+	struct bpf_program *prog;
+
+	if (!obj->nr_programs)
+		return NULL;
+
+	while (l < r) {
+		m = l + (r - l + 1) / 2;
+		prog = &obj->programs[m];
+
+		if (prog->sec_idx < sec_idx ||
+		    (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
+			l = m;
+		else
+			r = m - 1;
+	}
+	/* matching program could be at index l, but it still might be the
+	 * wrong one, so we need to double check conditions for the last time
+	 */
+	prog = &obj->programs[l];
+	if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
+		return prog;
+	return NULL;
+}
+
+static int
+bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
+{
+	Elf_Data *symbols = obj->efile.symbols;
+	const char *relo_sec_name, *sec_name;
+	size_t sec_idx = shdr->sh_info;
+	struct bpf_program *prog;
+	struct reloc_desc *relos;
+	int err, i, nrels;
+	const char *sym_name;
+	__u32 insn_idx;
+	GElf_Sym sym;
+	GElf_Rel rel;
+
+	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
+	sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+	if (!relo_sec_name || !sec_name)
+		return -EINVAL;
+
+	pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
+		 relo_sec_name, sec_idx, sec_name);
+	nrels = shdr->sh_size / shdr->sh_entsize;
+
+	for (i = 0; i < nrels; i++) {
+		if (!gelf_getrel(data, i, &rel)) {
+			pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+			pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
+				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+		if (rel.r_offset % BPF_INSN_SZ) {
+			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
+				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+
+		insn_idx = rel.r_offset / BPF_INSN_SZ;
+		/* relocations against static functions are recorded as
+		 * relocations against the section that contains a function;
+		 * in such case, symbol will be STT_SECTION and sym.st_name
+		 * will point to empty string (0), so fetch section name
+		 * instead
+		 */
+		if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
+			sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
+		else
+			sym_name = elf_sym_str(obj, sym.st_name);
+		sym_name = sym_name ?: "<?";
+
+		pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
+			 relo_sec_name, i, insn_idx, sym_name);
+
+		prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+		if (!prog) {
+			pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+				relo_sec_name, i, sec_name, insn_idx);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		relos = libbpf_reallocarray(prog->reloc_desc,
+					    prog->nr_reloc + 1, sizeof(*relos));
+		if (!relos)
+			return -ENOMEM;
+		prog->reloc_desc = relos;
+
+		/* adjust insn_idx to local BPF program frame of reference */
+		insn_idx -= prog->sec_insn_off;
+		err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
+						insn_idx, sym_name, &sym, &rel);
+		if (err)
+			return err;
+
+		prog->nr_reloc++;
+	}
+	return 0;
+}
+
+static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
+{
+	struct bpf_map_def *def = &map->def;
+	__u32 key_type_id = 0, value_type_id = 0;
+	int ret;
+
+	/* if it's BTF-defined map, we don't need to search for type IDs.
+	 * For struct_ops map, it does not need btf_key_type_id and
+	 * btf_value_type_id.
+	 */
+	if (map->sec_idx == obj->efile.btf_maps_shndx ||
+	    bpf_map__is_struct_ops(map))
+		return 0;
+
+	if (!bpf_map__is_internal(map)) {
+		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
+					   def->value_size, &key_type_id,
+					   &value_type_id);
+	} else {
+		/*
+		 * LLVM annotates global data differently in BTF, that is,
+		 * only as '.data', '.bss' or '.rodata'.
+		 */
+		ret = btf__find_by_name(obj->btf,
+				libbpf_type_to_btf_name[map->libbpf_type]);
+	}
+	if (ret < 0)
+		return ret;
+
+	map->btf_key_type_id = key_type_id;
+	map->btf_value_type_id = bpf_map__is_internal(map) ?
+				 ret : value_type_id;
+	return 0;
+}
+
+static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
+{
+	char file[PATH_MAX], buff[4096];
+	FILE *fp;
+	__u32 val;
+	int err;
+
+	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+	memset(info, 0, sizeof(*info));
+
+	fp = fopen(file, "r");
+	if (!fp) {
+		err = -errno;
+		pr_warn("failed to open %s: %d. No procfs support?\n", file,
+			err);
+		return err;
+	}
+
+	while (fgets(buff, sizeof(buff), fp)) {
+		if (sscanf(buff, "map_type:\t%u", &val) == 1)
+			info->type = val;
+		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+			info->key_size = val;
+		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+			info->value_size = val;
+		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+			info->max_entries = val;
+		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+			info->map_flags = val;
+	}
+
+	fclose(fp);
+
+	return 0;
+}
+
+int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info), name_len;
+	int new_fd, err;
+	char *new_name;
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	if (err && errno == EINVAL)
+		err = bpf_get_map_info_from_fdinfo(fd, &info);
+	if (err)
+		return err;
+
+	name_len = strlen(info.name);
+	if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
+		new_name = strdup(map->name);
+	else
+		new_name = strdup(info.name);
+
+	if (!new_name)
+		return -errno;
+
+	new_fd = open("/", O_RDONLY | O_CLOEXEC);
+	if (new_fd < 0) {
+		err = -errno;
+		goto err_free_new_name;
+	}
+
+	new_fd = dup3(fd, new_fd, O_CLOEXEC);
+	if (new_fd < 0) {
+		err = -errno;
+		goto err_close_new_fd;
+	}
+
+	err = zclose(map->fd);
+	if (err) {
+		err = -errno;
+		goto err_close_new_fd;
+	}
+	free(map->name);
+
+	map->fd = new_fd;
+	map->name = new_name;
+	map->def.type = info.type;
+	map->def.key_size = info.key_size;
+	map->def.value_size = info.value_size;
+	map->def.max_entries = info.max_entries;
+	map->def.map_flags = info.map_flags;
+	map->btf_key_type_id = info.btf_key_type_id;
+	map->btf_value_type_id = info.btf_value_type_id;
+	map->reused = true;
+
+	return 0;
+
+err_close_new_fd:
+	close(new_fd);
+err_free_new_name:
+	free(new_name);
+	return err;
+}
+
+__u32 bpf_map__max_entries(const struct bpf_map *map)
+{
+	return map->def.max_entries;
+}
+
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.max_entries = max_entries;
+	return 0;
+}
+
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+	if (!map || !max_entries)
+		return -EINVAL;
+
+	return bpf_map__set_max_entries(map, max_entries);
+}
+
+static int
+bpf_object__probe_loading(struct bpf_object *obj)
+{
+	struct bpf_load_program_attr attr;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret;
+
+	/* make sure basic loading works */
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
+
+	ret = bpf_load_program_xattr(&attr, NULL, 0);
+	if (ret < 0) {
+		ret = errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
+			"program. Make sure your kernel supports BPF "
+			"(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
+			"set to big enough value.\n", __func__, cp, ret);
+		return -ret;
+	}
+	close(ret);
+
+	return 0;
+}
+
+static int probe_fd(int fd)
+{
+	if (fd >= 0)
+		close(fd);
+	return fd >= 0;
+}
+
+static int probe_kern_prog_name(void)
+{
+	struct bpf_load_program_attr attr;
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret;
+
+	/* make sure loading with name works */
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
+	attr.name = "test";
+	ret = bpf_load_program_xattr(&attr, NULL, 0);
+	return probe_fd(ret);
+}
+
+static int probe_kern_global_data(void)
+{
+	struct bpf_load_program_attr prg_attr;
+	struct bpf_create_map_attr map_attr;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret, map;
+
+	memset(&map_attr, 0, sizeof(map_attr));
+	map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+	map_attr.key_size = sizeof(int);
+	map_attr.value_size = 32;
+	map_attr.max_entries = 1;
+
+	map = bpf_create_map_xattr(&map_attr);
+	if (map < 0) {
+		ret = -errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+			__func__, cp, -ret);
+		return ret;
+	}
+
+	insns[0].imm = map;
+
+	memset(&prg_attr, 0, sizeof(prg_attr));
+	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	prg_attr.insns = insns;
+	prg_attr.insns_cnt = ARRAY_SIZE(insns);
+	prg_attr.license = "GPL";
+
+	ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
+	close(map);
+	return probe_fd(ret);
+}
+
+static int probe_kern_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func(void)
+{
+	static const char strs[] = "\0int\0x\0a";
+	/* void x(int a) {} */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* FUNC_PROTO */                                /* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+		BTF_PARAM_ENC(7, 1),
+		/* FUNC x */                                    /* [3] */
+		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func_global(void)
+{
+	static const char strs[] = "\0int\0x\0a";
+	/* static void x(int a) {} */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* FUNC_PROTO */                                /* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+		BTF_PARAM_ENC(7, 1),
+		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
+		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_datasec(void)
+{
+	static const char strs[] = "\0x\0.data";
+	/* static int a; */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* DATASEC val */                               /* [3] */
+		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_array_mmap(void)
+{
+	struct bpf_create_map_attr attr = {
+		.map_type = BPF_MAP_TYPE_ARRAY,
+		.map_flags = BPF_F_MMAPABLE,
+		.key_size = sizeof(int),
+		.value_size = sizeof(int),
+		.max_entries = 1,
+	};
+
+	return probe_fd(bpf_create_map_xattr(&attr));
+}
+
+static int probe_kern_exp_attach_type(void)
+{
+	struct bpf_load_program_attr attr;
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+
+	memset(&attr, 0, sizeof(attr));
+	/* use any valid combination of program type and (optional)
+	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+	 * to see if kernel supports expected_attach_type field for
+	 * BPF_PROG_LOAD command
+	 */
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+	attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
+
+	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+}
+
+static int probe_kern_probe_read_kernel(void)
+{
+	struct bpf_load_program_attr attr;
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
+		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
+		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+		BPF_EXIT_INSN(),
+	};
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_KPROBE;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
+
+	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+}
+
+static int probe_prog_bind_map(void)
+{
+	struct bpf_load_program_attr prg_attr;
+	struct bpf_create_map_attr map_attr;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret, map, prog;
+
+	memset(&map_attr, 0, sizeof(map_attr));
+	map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+	map_attr.key_size = sizeof(int);
+	map_attr.value_size = 32;
+	map_attr.max_entries = 1;
+
+	map = bpf_create_map_xattr(&map_attr);
+	if (map < 0) {
+		ret = -errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+			__func__, cp, -ret);
+		return ret;
+	}
+
+	memset(&prg_attr, 0, sizeof(prg_attr));
+	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	prg_attr.insns = insns;
+	prg_attr.insns_cnt = ARRAY_SIZE(insns);
+	prg_attr.license = "GPL";
+
+	prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
+	if (prog < 0) {
+		close(map);
+		return 0;
+	}
+
+	ret = bpf_prog_bind_map(prog, map, NULL);
+
+	close(map);
+	close(prog);
+
+	return ret >= 0;
+}
+
+enum kern_feature_result {
+	FEAT_UNKNOWN = 0,
+	FEAT_SUPPORTED = 1,
+	FEAT_MISSING = 2,
+};
+
+typedef int (*feature_probe_fn)(void);
+
+static struct kern_feature_desc {
+	const char *desc;
+	feature_probe_fn probe;
+	enum kern_feature_result res;
+} feature_probes[__FEAT_CNT] = {
+	[FEAT_PROG_NAME] = {
+		"BPF program name", probe_kern_prog_name,
+	},
+	[FEAT_GLOBAL_DATA] = {
+		"global variables", probe_kern_global_data,
+	},
+	[FEAT_BTF] = {
+		"minimal BTF", probe_kern_btf,
+	},
+	[FEAT_BTF_FUNC] = {
+		"BTF functions", probe_kern_btf_func,
+	},
+	[FEAT_BTF_GLOBAL_FUNC] = {
+		"BTF global function", probe_kern_btf_func_global,
+	},
+	[FEAT_BTF_DATASEC] = {
+		"BTF data section and variable", probe_kern_btf_datasec,
+	},
+	[FEAT_ARRAY_MMAP] = {
+		"ARRAY map mmap()", probe_kern_array_mmap,
+	},
+	[FEAT_EXP_ATTACH_TYPE] = {
+		"BPF_PROG_LOAD expected_attach_type attribute",
+		probe_kern_exp_attach_type,
+	},
+	[FEAT_PROBE_READ_KERN] = {
+		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+	},
+	[FEAT_PROG_BIND_MAP] = {
+		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+	}
+};
+
+static bool kernel_supports(enum kern_feature_id feat_id)
+{
+	struct kern_feature_desc *feat = &feature_probes[feat_id];
+	int ret;
+
+	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
+		ret = feat->probe();
+		if (ret > 0) {
+			WRITE_ONCE(feat->res, FEAT_SUPPORTED);
+		} else if (ret == 0) {
+			WRITE_ONCE(feat->res, FEAT_MISSING);
+		} else {
+			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+			WRITE_ONCE(feat->res, FEAT_MISSING);
+		}
+	}
+
+	return READ_ONCE(feat->res) == FEAT_SUPPORTED;
+}
+
+static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
+{
+	struct bpf_map_info map_info = {};
+	char msg[STRERR_BUFSIZE];
+	__u32 map_info_len;
+	int err;
+
+	map_info_len = sizeof(map_info);
+
+	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+	if (err && errno == EINVAL)
+		err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
+	if (err) {
+		pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
+			libbpf_strerror_r(errno, msg, sizeof(msg)));
+		return false;
+	}
+
+	return (map_info.type == map->def.type &&
+		map_info.key_size == map->def.key_size &&
+		map_info.value_size == map->def.value_size &&
+		map_info.max_entries == map->def.max_entries &&
+		map_info.map_flags == map->def.map_flags);
+}
+
+static int
+bpf_object__reuse_map(struct bpf_map *map)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	int err, pin_fd;
+
+	pin_fd = bpf_obj_get(map->pin_path);
+	if (pin_fd < 0) {
+		err = -errno;
+		if (err == -ENOENT) {
+			pr_debug("found no pinned map to reuse at '%s'\n",
+				 map->pin_path);
+			return 0;
+		}
+
+		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+		pr_warn("couldn't retrieve pinned map '%s': %s\n",
+			map->pin_path, cp);
+		return err;
+	}
+
+	if (!map_is_reuse_compat(map, pin_fd)) {
+		pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
+			map->pin_path);
+		close(pin_fd);
+		return -EINVAL;
+	}
+
+	err = bpf_map__reuse_fd(map, pin_fd);
+	if (err) {
+		close(pin_fd);
+		return err;
+	}
+	map->pinned = true;
+	pr_debug("reused pinned map at '%s'\n", map->pin_path);
+
+	return 0;
+}
+
+static int
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
+{
+	enum libbpf_map_type map_type = map->libbpf_type;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	int err, zero = 0;
+
+	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+	if (err) {
+		err = -errno;
+		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+		pr_warn("Error setting initial map(%s) contents: %s\n",
+			map->name, cp);
+		return err;
+	}
+
+	/* Freeze .rodata and .kconfig map as read-only from syscall side. */
+	if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
+		err = bpf_map_freeze(map->fd);
+		if (err) {
+			err = -errno;
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+			pr_warn("Error freezing map(%s) as read-only: %s\n",
+				map->name, cp);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static void bpf_map__destroy(struct bpf_map *map);
+
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+{
+	struct bpf_create_map_attr create_attr;
+	struct bpf_map_def *def = &map->def;
+	int err = 0;
+
+	memset(&create_attr, 0, sizeof(create_attr));
+
+	if (kernel_supports(FEAT_PROG_NAME))
+		create_attr.name = map->name;
+	create_attr.map_ifindex = map->map_ifindex;
+	create_attr.map_type = def->type;
+	create_attr.map_flags = def->map_flags;
+	create_attr.key_size = def->key_size;
+	create_attr.value_size = def->value_size;
+	create_attr.numa_node = map->numa_node;
+
+	if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
+		int nr_cpus;
+
+		nr_cpus = libbpf_num_possible_cpus();
+		if (nr_cpus < 0) {
+			pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
+				map->name, nr_cpus);
+			return nr_cpus;
+		}
+		pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
+		create_attr.max_entries = nr_cpus;
+	} else {
+		create_attr.max_entries = def->max_entries;
+	}
+
+	if (bpf_map__is_struct_ops(map))
+		create_attr.btf_vmlinux_value_type_id =
+			map->btf_vmlinux_value_type_id;
+
+	create_attr.btf_fd = 0;
+	create_attr.btf_key_type_id = 0;
+	create_attr.btf_value_type_id = 0;
+	if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
+		create_attr.btf_fd = btf__fd(obj->btf);
+		create_attr.btf_key_type_id = map->btf_key_type_id;
+		create_attr.btf_value_type_id = map->btf_value_type_id;
+	}
+
+	if (bpf_map_type__is_map_in_map(def->type)) {
+		if (map->inner_map) {
+			err = bpf_object__create_map(obj, map->inner_map);
+			if (err) {
+				pr_warn("map '%s': failed to create inner map: %d\n",
+					map->name, err);
+				return err;
+			}
+			map->inner_map_fd = bpf_map__fd(map->inner_map);
+		}
+		if (map->inner_map_fd >= 0)
+			create_attr.inner_map_fd = map->inner_map_fd;
+	}
+
+	map->fd = bpf_create_map_xattr(&create_attr);
+	if (map->fd < 0 && (create_attr.btf_key_type_id ||
+			    create_attr.btf_value_type_id)) {
+		char *cp, errmsg[STRERR_BUFSIZE];
+
+		err = -errno;
+		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+		pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+			map->name, cp, err);
+		create_attr.btf_fd = 0;
+		create_attr.btf_key_type_id = 0;
+		create_attr.btf_value_type_id = 0;
+		map->btf_key_type_id = 0;
+		map->btf_value_type_id = 0;
+		map->fd = bpf_create_map_xattr(&create_attr);
+	}
+
+	err = map->fd < 0 ? -errno : 0;
+
+	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+		bpf_map__destroy(map->inner_map);
+		zfree(&map->inner_map);
+	}
+
+	return err;
+}
+
+static int init_map_slots(struct bpf_map *map)
+{
+	const struct bpf_map *targ_map;
+	unsigned int i;
+	int fd, err;
+
+	for (i = 0; i < map->init_slots_sz; i++) {
+		if (!map->init_slots[i])
+			continue;
+
+		targ_map = map->init_slots[i];
+		fd = bpf_map__fd(targ_map);
+		err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+		if (err) {
+			err = -errno;
+			pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
+				map->name, i, targ_map->name,
+				fd, err);
+			return err;
+		}
+		pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
+			 map->name, i, targ_map->name, fd);
+	}
+
+	zfree(&map->init_slots);
+	map->init_slots_sz = 0;
+
+	return 0;
+}
+
+static int
+bpf_object__create_maps(struct bpf_object *obj)
+{
+	struct bpf_map *map;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	unsigned int i, j;
+	int err;
+	bool retried;
+
+	for (i = 0; i < obj->nr_maps; i++) {
+		map = &obj->maps[i];
+
+		retried = false;
+retry:
+		if (map->pin_path) {
+			err = bpf_object__reuse_map(map);
+			if (err) {
+				pr_warn("map '%s': error reusing pinned map\n",
+					map->name);
+				goto err_out;
+			}
+			if (retried && map->fd < 0) {
+				pr_warn("map '%s': cannot find pinned map\n",
+					map->name);
+				err = -ENOENT;
+				goto err_out;
+			}
+		}
+
+		if (map->fd >= 0) {
+			pr_debug("map '%s': skipping creation (preset fd=%d)\n",
+				 map->name, map->fd);
+		} else {
+			err = bpf_object__create_map(obj, map);
+			if (err)
+				goto err_out;
+
+			pr_debug("map '%s': created successfully, fd=%d\n",
+				 map->name, map->fd);
+
+			if (bpf_map__is_internal(map)) {
+				err = bpf_object__populate_internal_map(obj, map);
+				if (err < 0) {
+					zclose(map->fd);
+					goto err_out;
+				}
+			}
+
+			if (map->init_slots_sz) {
+				err = init_map_slots(map);
+				if (err < 0) {
+					zclose(map->fd);
+					goto err_out;
+				}
+			}
+		}
+
+		if (map->pin_path && !map->pinned) {
+			err = bpf_map__pin(map, NULL);
+			if (err) {
+				zclose(map->fd);
+				if (!retried && err == -EEXIST) {
+					retried = true;
+					goto retry;
+				}
+				pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
+					map->name, map->pin_path, err);
+				goto err_out;
+			}
+		}
+	}
+
+	return 0;
+
+err_out:
+	cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+	pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
+	pr_perm_msg(err);
+	for (j = 0; j < i; j++)
+		zclose(obj->maps[j].fd);
+	return err;
+}
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+	__u32 type_id;		/* struct/union type or array element type */
+	__u32 idx;		/* field index or array index */
+	const char *name;	/* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+	const struct btf *btf;
+	/* high-level spec: named fields and array indices only */
+	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+	/* original unresolved (no skip_mods_or_typedefs) root type ID */
+	__u32 root_type_id;
+	/* CO-RE relocation kind */
+	enum bpf_core_relo_kind relo_kind;
+	/* high-level spec length */
+	int len;
+	/* raw, low-level spec: 1-to-1 with accessor spec string */
+	int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+	/* raw spec length */
+	int raw_len;
+	/* field bit offset represented by spec */
+	__u32 bit_offset;
+};
+
+static bool str_is_empty(const char *s)
+{
+	return !s || !s[0];
+}
+
+static bool is_flex_arr(const struct btf *btf,
+			const struct bpf_core_accessor *acc,
+			const struct btf_array *arr)
+{
+	const struct btf_type *t;
+
+	/* not a flexible array, if not inside a struct or has non-zero size */
+	if (!acc->name || arr->nelems > 0)
+		return false;
+
+	/* has to be the last member of enclosing struct */
+	t = btf__type_by_id(btf, acc->type_id);
+	return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+	case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+	case BPF_FIELD_EXISTS: return "field_exists";
+	case BPF_FIELD_SIGNED: return "signed";
+	case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+	case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+	case BPF_TYPE_ID_LOCAL: return "local_type_id";
+	case BPF_TYPE_ID_TARGET: return "target_type_id";
+	case BPF_TYPE_EXISTS: return "type_exists";
+	case BPF_TYPE_SIZE: return "type_size";
+	case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+	case BPF_ENUMVAL_VALUE: return "enumval_value";
+	default: return "unknown";
+	}
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_FIELD_BYTE_OFFSET:
+	case BPF_FIELD_BYTE_SIZE:
+	case BPF_FIELD_EXISTS:
+	case BPF_FIELD_SIGNED:
+	case BPF_FIELD_LSHIFT_U64:
+	case BPF_FIELD_RSHIFT_U64:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_TYPE_ID_LOCAL:
+	case BPF_TYPE_ID_TARGET:
+	case BPF_TYPE_EXISTS:
+	case BPF_TYPE_SIZE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+	switch (kind) {
+	case BPF_ENUMVAL_EXISTS:
+	case BPF_ENUMVAL_VALUE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ *   struct sample {
+ *       int __unimportant;
+ *       struct {
+ *           int __1;
+ *           int __2;
+ *           int a[7];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *
+ *   int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ *   - field 'a' access (corresponds to '2' in low-level spec);
+ *   - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+static int bpf_core_parse_spec(const struct btf *btf,
+			       __u32 type_id,
+			       const char *spec_str,
+			       enum bpf_core_relo_kind relo_kind,
+			       struct bpf_core_spec *spec)
+{
+	int access_idx, parsed_len, i;
+	struct bpf_core_accessor *acc;
+	const struct btf_type *t;
+	const char *name;
+	__u32 id;
+	__s64 sz;
+
+	if (str_is_empty(spec_str) || *spec_str == ':')
+		return -EINVAL;
+
+	memset(spec, 0, sizeof(*spec));
+	spec->btf = btf;
+	spec->root_type_id = type_id;
+	spec->relo_kind = relo_kind;
+
+	/* type-based relocations don't have a field access string */
+	if (core_relo_is_type_based(relo_kind)) {
+		if (strcmp(spec_str, "0"))
+			return -EINVAL;
+		return 0;
+	}
+
+	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+	while (*spec_str) {
+		if (*spec_str == ':')
+			++spec_str;
+		if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+			return -EINVAL;
+		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+			return -E2BIG;
+		spec_str += parsed_len;
+		spec->raw_spec[spec->raw_len++] = access_idx;
+	}
+
+	if (spec->raw_len == 0)
+		return -EINVAL;
+
+	t = skip_mods_and_typedefs(btf, type_id, &id);
+	if (!t)
+		return -EINVAL;
+
+	access_idx = spec->raw_spec[0];
+	acc = &spec->spec[0];
+	acc->type_id = id;
+	acc->idx = access_idx;
+	spec->len++;
+
+	if (core_relo_is_enumval_based(relo_kind)) {
+		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+			return -EINVAL;
+
+		/* record enumerator name in a first accessor */
+		acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+		return 0;
+	}
+
+	if (!core_relo_is_field_based(relo_kind))
+		return -EINVAL;
+
+	sz = btf__resolve_size(btf, id);
+	if (sz < 0)
+		return sz;
+	spec->bit_offset = access_idx * sz * 8;
+
+	for (i = 1; i < spec->raw_len; i++) {
+		t = skip_mods_and_typedefs(btf, id, &id);
+		if (!t)
+			return -EINVAL;
+
+		access_idx = spec->raw_spec[i];
+		acc = &spec->spec[spec->len];
+
+		if (btf_is_composite(t)) {
+			const struct btf_member *m;
+			__u32 bit_offset;
+
+			if (access_idx >= btf_vlen(t))
+				return -EINVAL;
+
+			bit_offset = btf_member_bit_offset(t, access_idx);
+			spec->bit_offset += bit_offset;
+
+			m = btf_members(t) + access_idx;
+			if (m->name_off) {
+				name = btf__name_by_offset(btf, m->name_off);
+				if (str_is_empty(name))
+					return -EINVAL;
+
+				acc->type_id = id;
+				acc->idx = access_idx;
+				acc->name = name;
+				spec->len++;
+			}
+
+			id = m->type;
+		} else if (btf_is_array(t)) {
+			const struct btf_array *a = btf_array(t);
+			bool flex;
+
+			t = skip_mods_and_typedefs(btf, a->type, &id);
+			if (!t)
+				return -EINVAL;
+
+			flex = is_flex_arr(btf, acc - 1, a);
+			if (!flex && access_idx >= a->nelems)
+				return -EINVAL;
+
+			spec->spec[spec->len].type_id = id;
+			spec->spec[spec->len].idx = access_idx;
+			spec->len++;
+
+			sz = btf__resolve_size(btf, id);
+			if (sz < 0)
+				return sz;
+			spec->bit_offset += access_idx * sz * 8;
+		} else {
+			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+				type_id, spec_str, i, id, btf_kind_str(t));
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static bool bpf_core_is_flavor_sep(const char *s)
+{
+	/* check X___Y name pattern, where X and Y are not underscores */
+	return s[0] != '_' &&				      /* X */
+	       s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
+	       s[4] != '_';				      /* Y */
+}
+
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
+ * before last triple underscore. Struct name part after last triple
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
+ */
+static size_t bpf_core_essential_name_len(const char *name)
+{
+	size_t n = strlen(name);
+	int i;
+
+	for (i = n - 5; i >= 0; i--) {
+		if (bpf_core_is_flavor_sep(name + i))
+			return i + 1;
+	}
+	return n;
+}
+
+/* dynamically sized list of type IDs */
+struct ids_vec {
+	__u32 *data;
+	int len;
+};
+
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
+{
+	free(cand_ids->data);
+	free(cand_ids);
+}
+
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
+					   __u32 local_type_id,
+					   const struct btf *targ_btf)
+{
+	size_t local_essent_len, targ_essent_len;
+	const char *local_name, *targ_name;
+	const struct btf_type *t, *local_t;
+	struct ids_vec *cand_ids;
+	__u32 *new_ids;
+	int i, err, n;
+
+	local_t = btf__type_by_id(local_btf, local_type_id);
+	if (!local_t)
+		return ERR_PTR(-EINVAL);
+
+	local_name = btf__name_by_offset(local_btf, local_t->name_off);
+	if (str_is_empty(local_name))
+		return ERR_PTR(-EINVAL);
+	local_essent_len = bpf_core_essential_name_len(local_name);
+
+	cand_ids = calloc(1, sizeof(*cand_ids));
+	if (!cand_ids)
+		return ERR_PTR(-ENOMEM);
+
+	n = btf__get_nr_types(targ_btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(targ_btf, i);
+		if (btf_kind(t) != btf_kind(local_t))
+			continue;
+
+		targ_name = btf__name_by_offset(targ_btf, t->name_off);
+		if (str_is_empty(targ_name))
+			continue;
+
+		targ_essent_len = bpf_core_essential_name_len(targ_name);
+		if (targ_essent_len != local_essent_len)
+			continue;
+
+		if (strncmp(local_name, targ_name, local_essent_len) == 0) {
+			pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
+				 local_type_id, btf_kind_str(local_t),
+				 local_name, i, btf_kind_str(t), targ_name);
+			new_ids = libbpf_reallocarray(cand_ids->data,
+						      cand_ids->len + 1,
+						      sizeof(*cand_ids->data));
+			if (!new_ids) {
+				err = -ENOMEM;
+				goto err_out;
+			}
+			cand_ids->data = new_ids;
+			cand_ids->data[cand_ids->len++] = i;
+		}
+	}
+	return cand_ids;
+err_out:
+	bpf_core_free_cands(cand_ids);
+	return ERR_PTR(err);
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ *   - any two STRUCTs/UNIONs are compatible and can be mixed;
+ *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ *   - any two PTRs are always compatible;
+ *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ *     least one of enums should be anonymous;
+ *   - for ENUMs, check sizes, names are ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+				      __u32 local_id,
+				      const struct btf *targ_btf,
+				      __u32 targ_id)
+{
+	const struct btf_type *local_type, *targ_type;
+
+recur:
+	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+		return 1;
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_PTR:
+		return 1;
+	case BTF_KIND_FWD:
+	case BTF_KIND_ENUM: {
+		const char *local_name, *targ_name;
+		size_t local_len, targ_len;
+
+		local_name = btf__name_by_offset(local_btf,
+						 local_type->name_off);
+		targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+		local_len = bpf_core_essential_name_len(local_name);
+		targ_len = bpf_core_essential_name_len(targ_name);
+		/* one of them is anonymous or both w/ same flavor-less names */
+		return local_len == 0 || targ_len == 0 ||
+		       (local_len == targ_len &&
+			strncmp(local_name, targ_name, local_len) == 0);
+	}
+	case BTF_KIND_INT:
+		/* just reject deprecated bitfield-like integers; all other
+		 * integers are by default compatible between each other
+		 */
+		return btf_int_offset(local_type) == 0 &&
+		       btf_int_offset(targ_type) == 0;
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	default:
+		pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+			btf_kind(local_type), local_id, targ_id);
+		return 0;
+	}
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+				 const struct bpf_core_accessor *local_acc,
+				 const struct btf *targ_btf,
+				 __u32 targ_id,
+				 struct bpf_core_spec *spec,
+				 __u32 *next_targ_id)
+{
+	const struct btf_type *local_type, *targ_type;
+	const struct btf_member *local_member, *m;
+	const char *local_name, *targ_name;
+	__u32 local_id;
+	int i, n, found;
+
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!targ_type)
+		return -EINVAL;
+	if (!btf_is_composite(targ_type))
+		return 0;
+
+	local_id = local_acc->type_id;
+	local_type = btf__type_by_id(local_btf, local_id);
+	local_member = btf_members(local_type) + local_acc->idx;
+	local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+	n = btf_vlen(targ_type);
+	m = btf_members(targ_type);
+	for (i = 0; i < n; i++, m++) {
+		__u32 bit_offset;
+
+		bit_offset = btf_member_bit_offset(targ_type, i);
+
+		/* too deep struct/union/array nesting */
+		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+			return -E2BIG;
+
+		/* speculate this member will be the good one */
+		spec->bit_offset += bit_offset;
+		spec->raw_spec[spec->raw_len++] = i;
+
+		targ_name = btf__name_by_offset(targ_btf, m->name_off);
+		if (str_is_empty(targ_name)) {
+			/* embedded struct/union, we need to go deeper */
+			found = bpf_core_match_member(local_btf, local_acc,
+						      targ_btf, m->type,
+						      spec, next_targ_id);
+			if (found) /* either found or error */
+				return found;
+		} else if (strcmp(local_name, targ_name) == 0) {
+			/* matching named field */
+			struct bpf_core_accessor *targ_acc;
+
+			targ_acc = &spec->spec[spec->len++];
+			targ_acc->type_id = targ_id;
+			targ_acc->idx = i;
+			targ_acc->name = targ_name;
+
+			*next_targ_id = m->type;
+			found = bpf_core_fields_are_compat(local_btf,
+							   local_member->type,
+							   targ_btf, m->type);
+			if (!found)
+				spec->len--; /* pop accessor */
+			return found;
+		}
+		/* member turned out not to be what we looked for */
+		spec->bit_offset -= bit_offset;
+		spec->raw_len--;
+	}
+
+	return 0;
+}
+
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ *     kind should match for local and target types (i.e., STRUCT is not
+ *     compatible with UNION);
+ *   - for ENUMs, the size is ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ *   - FUNC_PROTOs are compatible if they have compatible signature: same
+ *     number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				     const struct btf *targ_btf, __u32 targ_id)
+{
+	const struct btf_type *local_type, *targ_type;
+	int depth = 32; /* max recursion depth */
+
+	/* caller made sure that names match (ignoring flavor suffix) */
+	local_type = btf__type_by_id(local_btf, local_id);
+	targ_type = btf__type_by_id(targ_btf, targ_id);
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+		return 1;
+	case BTF_KIND_INT:
+		/* just reject deprecated bitfield-like integers; all other
+		 * integers are by default compatible between each other
+		 */
+		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+	case BTF_KIND_PTR:
+		local_id = local_type->type;
+		targ_id = targ_type->type;
+		goto recur;
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_type);
+		struct btf_param *targ_p = btf_params(targ_type);
+		__u16 local_vlen = btf_vlen(local_type);
+		__u16 targ_vlen = btf_vlen(targ_type);
+		int i, err;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+			skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+			err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+		skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+		goto recur;
+	}
+	default:
+		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+			btf_kind_str(local_type), local_id, targ_id);
+		return 0;
+	}
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+			       const struct btf *targ_btf, __u32 targ_id,
+			       struct bpf_core_spec *targ_spec)
+{
+	const struct btf_type *targ_type;
+	const struct bpf_core_accessor *local_acc;
+	struct bpf_core_accessor *targ_acc;
+	int i, sz, matched;
+
+	memset(targ_spec, 0, sizeof(*targ_spec));
+	targ_spec->btf = targ_btf;
+	targ_spec->root_type_id = targ_id;
+	targ_spec->relo_kind = local_spec->relo_kind;
+
+	if (core_relo_is_type_based(local_spec->relo_kind)) {
+		return bpf_core_types_are_compat(local_spec->btf,
+						 local_spec->root_type_id,
+						 targ_btf, targ_id);
+	}
+
+	local_acc = &local_spec->spec[0];
+	targ_acc = &targ_spec->spec[0];
+
+	if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+		size_t local_essent_len, targ_essent_len;
+		const struct btf_enum *e;
+		const char *targ_name;
+
+		/* has to resolve to an enum */
+		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+		if (!btf_is_enum(targ_type))
+			return 0;
+
+		local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+		for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+			targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+			targ_essent_len = bpf_core_essential_name_len(targ_name);
+			if (targ_essent_len != local_essent_len)
+				continue;
+			if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+				targ_acc->type_id = targ_id;
+				targ_acc->idx = i;
+				targ_acc->name = targ_name;
+				targ_spec->len++;
+				targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+				targ_spec->raw_len++;
+				return 1;
+			}
+		}
+		return 0;
+	}
+
+	if (!core_relo_is_field_based(local_spec->relo_kind))
+		return -EINVAL;
+
+	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+						   &targ_id);
+		if (!targ_type)
+			return -EINVAL;
+
+		if (local_acc->name) {
+			matched = bpf_core_match_member(local_spec->btf,
+							local_acc,
+							targ_btf, targ_id,
+							targ_spec, &targ_id);
+			if (matched <= 0)
+				return matched;
+		} else {
+			/* for i=0, targ_id is already treated as array element
+			 * type (because it's the original struct), for others
+			 * we should find array element type first
+			 */
+			if (i > 0) {
+				const struct btf_array *a;
+				bool flex;
+
+				if (!btf_is_array(targ_type))
+					return 0;
+
+				a = btf_array(targ_type);
+				flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+				if (!flex && local_acc->idx >= a->nelems)
+					return 0;
+				if (!skip_mods_and_typedefs(targ_btf, a->type,
+							    &targ_id))
+					return -EINVAL;
+			}
+
+			/* too deep struct/union/array nesting */
+			if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+				return -E2BIG;
+
+			targ_acc->type_id = targ_id;
+			targ_acc->idx = local_acc->idx;
+			targ_acc->name = NULL;
+			targ_spec->len++;
+			targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+			targ_spec->raw_len++;
+
+			sz = btf__resolve_size(targ_btf, targ_id);
+			if (sz < 0)
+				return sz;
+			targ_spec->bit_offset += local_acc->idx * sz * 8;
+		}
+	}
+
+	return 1;
+}
+
+static int bpf_core_calc_field_relo(const struct bpf_program *prog,
+				    const struct bpf_core_relo *relo,
+				    const struct bpf_core_spec *spec,
+				    __u32 *val, __u32 *field_sz, __u32 *type_id,
+				    bool *validate)
+{
+	const struct bpf_core_accessor *acc;
+	const struct btf_type *t;
+	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+	const struct btf_member *m;
+	const struct btf_type *mt;
+	bool bitfield;
+	__s64 sz;
+
+	*field_sz = 0;
+
+	if (relo->kind == BPF_FIELD_EXISTS) {
+		*val = spec ? 1 : 0;
+		return 0;
+	}
+
+	if (!spec)
+		return -EUCLEAN; /* request instruction poisoning */
+
+	acc = &spec->spec[spec->len - 1];
+	t = btf__type_by_id(spec->btf, acc->type_id);
+
+	/* a[n] accessor needs special handling */
+	if (!acc->name) {
+		if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+			*val = spec->bit_offset / 8;
+			/* remember field size for load/store mem size */
+			sz = btf__resolve_size(spec->btf, acc->type_id);
+			if (sz < 0)
+				return -EINVAL;
+			*field_sz = sz;
+			*type_id = acc->type_id;
+		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+			sz = btf__resolve_size(spec->btf, acc->type_id);
+			if (sz < 0)
+				return -EINVAL;
+			*val = sz;
+		} else {
+			pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+				prog->name, relo->kind, relo->insn_off / 8);
+			return -EINVAL;
+		}
+		if (validate)
+			*validate = true;
+		return 0;
+	}
+
+	m = btf_members(t) + acc->idx;
+	mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+	bit_off = spec->bit_offset;
+	bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+	bitfield = bit_sz > 0;
+	if (bitfield) {
+		byte_sz = mt->size;
+		byte_off = bit_off / 8 / byte_sz * byte_sz;
+		/* figure out smallest int size necessary for bitfield load */
+		while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+			if (byte_sz >= 8) {
+				/* bitfield can't be read with 64-bit read */
+				pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+					prog->name, relo->kind, relo->insn_off / 8);
+				return -E2BIG;
+			}
+			byte_sz *= 2;
+			byte_off = bit_off / 8 / byte_sz * byte_sz;
+		}
+	} else {
+		sz = btf__resolve_size(spec->btf, field_type_id);
+		if (sz < 0)
+			return -EINVAL;
+		byte_sz = sz;
+		byte_off = spec->bit_offset / 8;
+		bit_sz = byte_sz * 8;
+	}
+
+	/* for bitfields, all the relocatable aspects are ambiguous and we
+	 * might disagree with compiler, so turn off validation of expected
+	 * value, except for signedness
+	 */
+	if (validate)
+		*validate = !bitfield;
+
+	switch (relo->kind) {
+	case BPF_FIELD_BYTE_OFFSET:
+		*val = byte_off;
+		if (!bitfield) {
+			*field_sz = byte_sz;
+			*type_id = field_type_id;
+		}
+		break;
+	case BPF_FIELD_BYTE_SIZE:
+		*val = byte_sz;
+		break;
+	case BPF_FIELD_SIGNED:
+		/* enums will be assumed unsigned */
+		*val = btf_is_enum(mt) ||
+		       (btf_int_encoding(mt) & BTF_INT_SIGNED);
+		if (validate)
+			*validate = true; /* signedness is never ambiguous */
+		break;
+	case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+		*val = 64 - (bit_off + bit_sz - byte_off  * 8);
+#else
+		*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+		break;
+	case BPF_FIELD_RSHIFT_U64:
+		*val = 64 - bit_sz;
+		if (validate)
+			*validate = true; /* right shift is never ambiguous */
+		break;
+	case BPF_FIELD_EXISTS:
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+				   const struct bpf_core_spec *spec,
+				   __u32 *val)
+{
+	__s64 sz;
+
+	/* type-based relos return zero when target type is not found */
+	if (!spec) {
+		*val = 0;
+		return 0;
+	}
+
+	switch (relo->kind) {
+	case BPF_TYPE_ID_TARGET:
+		*val = spec->root_type_id;
+		break;
+	case BPF_TYPE_EXISTS:
+		*val = 1;
+		break;
+	case BPF_TYPE_SIZE:
+		sz = btf__resolve_size(spec->btf, spec->root_type_id);
+		if (sz < 0)
+			return -EINVAL;
+		*val = sz;
+		break;
+	case BPF_TYPE_ID_LOCAL:
+	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+				      const struct bpf_core_spec *spec,
+				      __u32 *val)
+{
+	const struct btf_type *t;
+	const struct btf_enum *e;
+
+	switch (relo->kind) {
+	case BPF_ENUMVAL_EXISTS:
+		*val = spec ? 1 : 0;
+		break;
+	case BPF_ENUMVAL_VALUE:
+		if (!spec)
+			return -EUCLEAN; /* request instruction poisoning */
+		t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+		e = btf_enum(t) + spec->spec[0].idx;
+		*val = e->val;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+struct bpf_core_relo_res
+{
+	/* expected value in the instruction, unless validate == false */
+	__u32 orig_val;
+	/* new value that needs to be patched up to */
+	__u32 new_val;
+	/* relocation unsuccessful, poison instruction, but don't fail load */
+	bool poison;
+	/* some relocations can't be validated against orig_val */
+	bool validate;
+	/* for field byte offset relocations or the forms:
+	 *     *(T *)(rX + <off>) = rY
+	 *     rX = *(T *)(rY + <off>),
+	 * we remember original and resolved field size to adjust direct
+	 * memory loads of pointers and integers; this is necessary for 32-bit
+	 * host kernel architectures, but also allows to automatically
+	 * relocate fields that were resized from, e.g., u32 to u64, etc.
+	 */
+	bool fail_memsz_adjust;
+	__u32 orig_sz;
+	__u32 orig_type_id;
+	__u32 new_sz;
+	__u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const struct bpf_program *prog,
+			      const struct bpf_core_relo *relo,
+			      int relo_idx,
+			      const struct bpf_core_spec *local_spec,
+			      const struct bpf_core_spec *targ_spec,
+			      struct bpf_core_relo_res *res)
+{
+	int err = -EOPNOTSUPP;
+
+	res->orig_val = 0;
+	res->new_val = 0;
+	res->poison = false;
+	res->validate = true;
+	res->fail_memsz_adjust = false;
+	res->orig_sz = res->new_sz = 0;
+	res->orig_type_id = res->new_type_id = 0;
+
+	if (core_relo_is_field_based(relo->kind)) {
+		err = bpf_core_calc_field_relo(prog, relo, local_spec,
+					       &res->orig_val, &res->orig_sz,
+					       &res->orig_type_id, &res->validate);
+		err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
+						      &res->new_val, &res->new_sz,
+						      &res->new_type_id, NULL);
+		if (err)
+			goto done;
+		/* Validate if it's safe to adjust load/store memory size.
+		 * Adjustments are performed only if original and new memory
+		 * sizes differ.
+		 */
+		res->fail_memsz_adjust = false;
+		if (res->orig_sz != res->new_sz) {
+			const struct btf_type *orig_t, *new_t;
+
+			orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+			new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+			/* There are two use cases in which it's safe to
+			 * adjust load/store's mem size:
+			 *   - reading a 32-bit kernel pointer, while on BPF
+			 *   size pointers are always 64-bit; in this case
+			 *   it's safe to "downsize" instruction size due to
+			 *   pointer being treated as unsigned integer with
+			 *   zero-extended upper 32-bits;
+			 *   - reading unsigned integers, again due to
+			 *   zero-extension is preserving the value correctly.
+			 *
+			 * In all other cases it's incorrect to attempt to
+			 * load/store field because read value will be
+			 * incorrect, so we poison relocated instruction.
+			 */
+			if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+				goto done;
+			if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+			    btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+			    btf_int_encoding(new_t) != BTF_INT_SIGNED)
+				goto done;
+
+			/* mark as invalid mem size adjustment, but this will
+			 * only be checked for LDX/STX/ST insns
+			 */
+			res->fail_memsz_adjust = true;
+		}
+	} else if (core_relo_is_type_based(relo->kind)) {
+		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+	} else if (core_relo_is_enumval_based(relo->kind)) {
+		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+	}
+
+done:
+	if (err == -EUCLEAN) {
+		/* EUCLEAN is used to signal instruction poisoning request */
+		res->poison = true;
+		err = 0;
+	} else if (err == -EOPNOTSUPP) {
+		/* EOPNOTSUPP means unknown/unsupported relocation */
+		pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+			prog->name, relo_idx, core_relo_kind_str(relo->kind),
+			relo->kind, relo->insn_off / 8);
+	}
+
+	return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
+				 int insn_idx, struct bpf_insn *insn)
+{
+	pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+		 prog->name, relo_idx, insn_idx);
+	insn->code = BPF_JMP | BPF_CALL;
+	insn->dst_reg = 0;
+	insn->src_reg = 0;
+	insn->off = 0;
+	/* if this instruction is reachable (not a dead code),
+	 * verifier will complain with the following message:
+	 * invalid func unknown#195896080
+	 */
+	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static bool is_ldimm64(struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+	switch (BPF_SIZE(insn->code)) {
+	case BPF_DW: return 8;
+	case BPF_W: return 4;
+	case BPF_H: return 2;
+	case BPF_B: return 1;
+	default: return -1;
+	}
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+	switch (sz) {
+	case 8: return BPF_DW;
+	case 4: return BPF_W;
+	case 2: return BPF_H;
+	case 1: return BPF_B;
+	default: return -1;
+	}
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+static int bpf_core_patch_insn(struct bpf_program *prog,
+			       const struct bpf_core_relo *relo,
+			       int relo_idx,
+			       const struct bpf_core_relo_res *res)
+{
+	__u32 orig_val, new_val;
+	struct bpf_insn *insn;
+	int insn_idx;
+	__u8 class;
+
+	if (relo->insn_off % BPF_INSN_SZ)
+		return -EINVAL;
+	insn_idx = relo->insn_off / BPF_INSN_SZ;
+	/* adjust insn_idx from section frame of reference to the local
+	 * program's frame of reference; (sub-)program code is not yet
+	 * relocated, so it's enough to just subtract in-section offset
+	 */
+	insn_idx = insn_idx - prog->sec_insn_off;
+	insn = &prog->insns[insn_idx];
+	class = BPF_CLASS(insn->code);
+
+	if (res->poison) {
+poison:
+		/* poison second part of ldimm64 to avoid confusing error from
+		 * verifier about "unknown opcode 00"
+		 */
+		if (is_ldimm64(insn))
+			bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
+		bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
+		return 0;
+	}
+
+	orig_val = res->orig_val;
+	new_val = res->new_val;
+
+	switch (class) {
+	case BPF_ALU:
+	case BPF_ALU64:
+		if (BPF_SRC(insn->code) != BPF_K)
+			return -EINVAL;
+		if (res->validate && insn->imm != orig_val) {
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+				prog->name, relo_idx,
+				insn_idx, insn->imm, orig_val, new_val);
+			return -EINVAL;
+		}
+		orig_val = insn->imm;
+		insn->imm = new_val;
+		pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+			 prog->name, relo_idx, insn_idx,
+			 orig_val, new_val);
+		break;
+	case BPF_LDX:
+	case BPF_ST:
+	case BPF_STX:
+		if (res->validate && insn->off != orig_val) {
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+				prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+			return -EINVAL;
+		}
+		if (new_val > SHRT_MAX) {
+			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+				prog->name, relo_idx, insn_idx, new_val);
+			return -ERANGE;
+		}
+		if (res->fail_memsz_adjust) {
+			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+				"Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+				prog->name, relo_idx, insn_idx);
+			goto poison;
+		}
+
+		orig_val = insn->off;
+		insn->off = new_val;
+		pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+			 prog->name, relo_idx, insn_idx, orig_val, new_val);
+
+		if (res->new_sz != res->orig_sz) {
+			int insn_bytes_sz, insn_bpf_sz;
+
+			insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+			if (insn_bytes_sz != res->orig_sz) {
+				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+					prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+				return -EINVAL;
+			}
+
+			insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+			if (insn_bpf_sz < 0) {
+				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+					prog->name, relo_idx, insn_idx, res->new_sz);
+				return -EINVAL;
+			}
+
+			insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+			pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+				 prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+		}
+		break;
+	case BPF_LD: {
+		__u64 imm;
+
+		if (!is_ldimm64(insn) ||
+		    insn[0].src_reg != 0 || insn[0].off != 0 ||
+		    insn_idx + 1 >= prog->insns_cnt ||
+		    insn[1].code != 0 || insn[1].dst_reg != 0 ||
+		    insn[1].src_reg != 0 || insn[1].off != 0) {
+			pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+				prog->name, relo_idx, insn_idx);
+			return -EINVAL;
+		}
+
+		imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+		if (res->validate && imm != orig_val) {
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+				prog->name, relo_idx,
+				insn_idx, (unsigned long long)imm,
+				orig_val, new_val);
+			return -EINVAL;
+		}
+
+		insn[0].imm = new_val;
+		insn[1].imm = 0; /* currently only 32-bit values are supported */
+		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+			 prog->name, relo_idx, insn_idx,
+			 (unsigned long long)imm, new_val);
+		break;
+	}
+	default:
+		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+			prog->name, relo_idx, insn_idx, insn->code,
+			insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+	const struct btf_type *t;
+	const struct btf_enum *e;
+	const char *s;
+	__u32 type_id;
+	int i;
+
+	type_id = spec->root_type_id;
+	t = btf__type_by_id(spec->btf, type_id);
+	s = btf__name_by_offset(spec->btf, t->name_off);
+
+	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+	if (core_relo_is_type_based(spec->relo_kind))
+		return;
+
+	if (core_relo_is_enumval_based(spec->relo_kind)) {
+		t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+		e = btf_enum(t) + spec->raw_spec[0];
+		s = btf__name_by_offset(spec->btf, e->name_off);
+
+		libbpf_print(level, "::%s = %u", s, e->val);
+		return;
+	}
+
+	if (core_relo_is_field_based(spec->relo_kind)) {
+		for (i = 0; i < spec->len; i++) {
+			if (spec->spec[i].name)
+				libbpf_print(level, ".%s", spec->spec[i].name);
+			else if (i > 0 || spec->spec[i].idx > 0)
+				libbpf_print(level, "[%u]", spec->spec[i].idx);
+		}
+
+		libbpf_print(level, " (");
+		for (i = 0; i < spec->raw_len; i++)
+			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+		if (spec->bit_offset % 8)
+			libbpf_print(level, " @ offset %u.%u)",
+				     spec->bit_offset / 8, spec->bit_offset % 8);
+		else
+			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+		return;
+	}
+}
+
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
+{
+	return (size_t)key;
+}
+
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+	return k1 == k2;
+}
+
+static void *u32_as_hash_key(__u32 x)
+{
+	return (void *)(uintptr_t)x;
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ *    Candidate type is a type with the same "essential" name, ignoring
+ *    everything after last triple underscore (___). E.g., `sample`,
+ *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ *    for each other. Names with triple underscore are referred to as
+ *    "flavors" and are useful, among other things, to allow to
+ *    specify/support incompatible variations of the same kernel struct, which
+ *    might differ between different kernel versions and/or build
+ *    configurations.
+ *
+ *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ *    converter, when deduplicated BTF of a kernel still contains more than
+ *    one different types with the same name. In that case, ___2, ___3, etc
+ *    are appended starting from second name conflict. But start flavors are
+ *    also useful to be defined "locally", in BPF program, to extract same
+ *    data from incompatible changes between different kernel
+ *    versions/configurations. For instance, to handle field renames between
+ *    kernel versions, one can use two flavors of the struct name with the
+ *    same common name and use conditional relocations to extract that field,
+ *    depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ *    candidate target type. Matching involves finding corresponding
+ *    high-level spec accessors, meaning that all named fields should match,
+ *    as well as all array accesses should be within the actual bounds. Also,
+ *    types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ *    matching the spec. As long as all the specs resolve to the same set of
+ *    offsets across all candidates, there is no error. If there is any
+ *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ *    imprefection of BTF deduplication, which can cause slight duplication of
+ *    the same BTF type, if some directly or indirectly referenced (by
+ *    pointer) type gets resolved to different actual types in different
+ *    object files. If such situation occurs, deduplicated BTF will end up
+ *    with two (or more) structurally identical types, which differ only in
+ *    types they refer to through pointer. This should be OK in most cases and
+ *    is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ *    types in target BTF. It is anticipated that this is overall more
+ *    efficient memory-wise and not significantly worse (if not better)
+ *    CPU-wise compared to prebuilding a map from all local type names to
+ *    a list of candidate type names. It's also sped up by caching resolved
+ *    list of matching candidates per each local "root" type ID, that has at
+ *    least one bpf_core_relo associated with it. This list is shared
+ *    between multiple relocations for the same type ID and is updated as some
+ *    of the candidates are pruned due to structural incompatibility.
+ */
+static int bpf_core_apply_relo(struct bpf_program *prog,
+			       const struct bpf_core_relo *relo,
+			       int relo_idx,
+			       const struct btf *local_btf,
+			       const struct btf *targ_btf,
+			       struct hashmap *cand_cache)
+{
+	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+	const void *type_key = u32_as_hash_key(relo->type_id);
+	struct bpf_core_relo_res cand_res, targ_res;
+	const struct btf_type *local_type;
+	const char *local_name;
+	struct ids_vec *cand_ids;
+	__u32 local_id, cand_id;
+	const char *spec_str;
+	int i, j, err;
+
+	local_id = relo->type_id;
+	local_type = btf__type_by_id(local_btf, local_id);
+	if (!local_type)
+		return -EINVAL;
+
+	local_name = btf__name_by_offset(local_btf, local_type->name_off);
+	if (!local_name)
+		return -EINVAL;
+
+	spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+	if (str_is_empty(spec_str))
+		return -EINVAL;
+
+	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+	if (err) {
+		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+			prog->name, relo_idx, local_id, btf_kind_str(local_type),
+			str_is_empty(local_name) ? "<anon>" : local_name,
+			spec_str, err);
+		return -EINVAL;
+	}
+
+	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
+		 relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+	libbpf_print(LIBBPF_DEBUG, "\n");
+
+	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+	if (relo->kind == BPF_TYPE_ID_LOCAL) {
+		targ_res.validate = true;
+		targ_res.poison = false;
+		targ_res.orig_val = local_spec.root_type_id;
+		targ_res.new_val = local_spec.root_type_id;
+		goto patch_insn;
+	}
+
+	/* libbpf doesn't support candidate search for anonymous types */
+	if (str_is_empty(spec_str)) {
+		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+			prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+		return -EOPNOTSUPP;
+	}
+
+	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
+		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
+		if (IS_ERR(cand_ids)) {
+			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
+				prog->name, relo_idx, local_id, btf_kind_str(local_type),
+				local_name, PTR_ERR(cand_ids));
+			return PTR_ERR(cand_ids);
+		}
+		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+		if (err) {
+			bpf_core_free_cands(cand_ids);
+			return err;
+		}
+	}
+
+	for (i = 0, j = 0; i < cand_ids->len; i++) {
+		cand_id = cand_ids->data[i];
+		err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
+		if (err < 0) {
+			pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+				prog->name, relo_idx, i);
+			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+			libbpf_print(LIBBPF_WARN, ": %d\n", err);
+			return err;
+		}
+
+		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
+			 relo_idx, err == 0 ? "non-matching" : "matching", i);
+		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+		libbpf_print(LIBBPF_DEBUG, "\n");
+
+		if (err == 0)
+			continue;
+
+		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+		if (err)
+			return err;
+
+		if (j == 0) {
+			targ_res = cand_res;
+			targ_spec = cand_spec;
+		} else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+			/* if there are many field relo candidates, they
+			 * should all resolve to the same bit offset
+			 */
+			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+				prog->name, relo_idx, cand_spec.bit_offset,
+				targ_spec.bit_offset);
+			return -EINVAL;
+		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+			/* all candidates should result in the same relocation
+			 * decision and value, otherwise it's dangerous to
+			 * proceed due to ambiguity
+			 */
+			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+				prog->name, relo_idx,
+				cand_res.poison ? "failure" : "success", cand_res.new_val,
+				targ_res.poison ? "failure" : "success", targ_res.new_val);
+			return -EINVAL;
+		}
+
+		cand_ids->data[j++] = cand_spec.root_type_id;
+	}
+
+	/*
+	 * For BPF_FIELD_EXISTS relo or when used BPF program has field
+	 * existence checks or kernel version/config checks, it's expected
+	 * that we might not find any candidates. In this case, if field
+	 * wasn't found in any candidate, the list of candidates shouldn't
+	 * change at all, we'll just handle relocating appropriately,
+	 * depending on relo's kind.
+	 */
+	if (j > 0)
+		cand_ids->len = j;
+
+	/*
+	 * If no candidates were found, it might be both a programmer error,
+	 * as well as expected case, depending whether instruction w/
+	 * relocation is guarded in some way that makes it unreachable (dead
+	 * code) if relocation can't be resolved. This is handled in
+	 * bpf_core_patch_insn() uniformly by replacing that instruction with
+	 * BPF helper call insn (using invalid helper ID). If that instruction
+	 * is indeed unreachable, then it will be ignored and eliminated by
+	 * verifier. If it was an error, then verifier will complain and point
+	 * to a specific instruction number in its log.
+	 */
+	if (j == 0) {
+		pr_debug("prog '%s': relo #%d: no matching targets found\n",
+			 prog->name, relo_idx);
+
+		/* calculate single target relo result explicitly */
+		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
+		if (err)
+			return err;
+	}
+
+patch_insn:
+	/* bpf_core_patch_insn() should know how to handle missing targ_spec */
+	err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
+	if (err) {
+		pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
+			prog->name, relo_idx, relo->insn_off, err);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+{
+	const struct btf_ext_info_sec *sec;
+	const struct bpf_core_relo *rec;
+	const struct btf_ext_info *seg;
+	struct hashmap_entry *entry;
+	struct hashmap *cand_cache = NULL;
+	struct bpf_program *prog;
+	struct btf *targ_btf;
+	const char *sec_name;
+	int i, err = 0, insn_idx, sec_idx;
+
+	if (obj->btf_ext->core_relo_info.len == 0)
+		return 0;
+
+	if (targ_btf_path)
+		targ_btf = btf__parse(targ_btf_path, NULL);
+	else
+		targ_btf = obj->btf_vmlinux;
+	if (IS_ERR_OR_NULL(targ_btf)) {
+		pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
+		return PTR_ERR(targ_btf);
+	}
+
+	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
+	if (IS_ERR(cand_cache)) {
+		err = PTR_ERR(cand_cache);
+		goto out;
+	}
+
+	seg = &obj->btf_ext->core_relo_info;
+	for_each_btf_ext_sec(seg, sec) {
+		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+		if (str_is_empty(sec_name)) {
+			err = -EINVAL;
+			goto out;
+		}
+		/* bpf_object's ELF is gone by now so it's not easy to find
+		 * section index by section name, but we can find *any*
+		 * bpf_program within desired section name and use it's
+		 * prog->sec_idx to do a proper search by section index and
+		 * instruction offset
+		 */
+		prog = NULL;
+		for (i = 0; i < obj->nr_programs; i++) {
+			if (strcmp(obj->programs[i].sec_name, sec_name) == 0) {
+				prog = &obj->programs[i];
+				break;
+			}
+		}
+		if (!prog) {
+			pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
+			return -ENOENT;
+		}
+		sec_idx = prog->sec_idx;
+
+		pr_debug("sec '%s': found %d CO-RE relocations\n",
+			 sec_name, sec->num_info);
+
+		for_each_btf_ext_rec(seg, sec, i, rec) {
+			insn_idx = rec->insn_off / BPF_INSN_SZ;
+			prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+			if (!prog) {
+				/* When __weak subprog is "overridden" by another instance
+				 * of the subprog from a different object file, linker still
+				 * appends all the .BTF.ext info that used to belong to that
+				 * eliminated subprogram.
+				 * This is similar to what x86-64 linker does for relocations.
+				 * So just ignore such relocations just like we ignore
+				 * subprog instructions when discovering subprograms.
+				 */
+				pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
+					 sec_name, i, insn_idx);
+				continue;
+			}
+			/* no need to apply CO-RE relocation if the program is
+			 * not going to be loaded
+			 */
+			if (!prog->load)
+				continue;
+
+			err = bpf_core_apply_relo(prog, rec, i, obj->btf,
+						  targ_btf, cand_cache);
+			if (err) {
+				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
+					prog->name, i, err);
+				goto out;
+			}
+		}
+	}
+
+out:
+	/* obj->btf_vmlinux is freed at the end of object load phase */
+	if (targ_btf != obj->btf_vmlinux)
+		btf__free(targ_btf);
+	if (!IS_ERR_OR_NULL(cand_cache)) {
+		hashmap__for_each_entry(cand_cache, entry, i) {
+			bpf_core_free_cands(entry->value);
+		}
+		hashmap__free(cand_cache);
+	}
+	return err;
+}
+
+/* Relocate data references within program code:
+ *  - map references;
+ *  - global variable references;
+ *  - extern references.
+ */
+static int
+bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
+{
+	int i;
+
+	for (i = 0; i < prog->nr_reloc; i++) {
+		struct reloc_desc *relo = &prog->reloc_desc[i];
+		struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+		struct extern_desc *ext;
+
+		switch (relo->type) {
+		case RELO_LD64:
+			insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+			insn[0].imm = obj->maps[relo->map_idx].fd;
+			relo->processed = true;
+			break;
+		case RELO_DATA:
+			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+			insn[1].imm = insn[0].imm + relo->sym_off;
+			insn[0].imm = obj->maps[relo->map_idx].fd;
+			relo->processed = true;
+			break;
+		case RELO_EXTERN:
+			ext = &obj->externs[relo->sym_off];
+			if (ext->type == EXT_KCFG) {
+				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+				insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+				insn[1].imm = ext->kcfg.data_off;
+			} else /* EXT_KSYM */ {
+				if (ext->ksym.type_id) { /* typed ksyms */
+					insn[0].src_reg = BPF_PSEUDO_BTF_ID;
+					insn[0].imm = ext->ksym.vmlinux_btf_id;
+				} else { /* typeless ksyms */
+					insn[0].imm = (__u32)ext->ksym.addr;
+					insn[1].imm = ext->ksym.addr >> 32;
+				}
+			}
+			relo->processed = true;
+			break;
+		case RELO_CALL:
+			/* will be handled as a follow up pass */
+			break;
+		default:
+			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
+				prog->name, i, relo->type);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
+				    const struct bpf_program *prog,
+				    const struct btf_ext_info *ext_info,
+				    void **prog_info, __u32 *prog_rec_cnt,
+				    __u32 *prog_rec_sz)
+{
+	void *copy_start = NULL, *copy_end = NULL;
+	void *rec, *rec_end, *new_prog_info;
+	const struct btf_ext_info_sec *sec;
+	size_t old_sz, new_sz;
+	const char *sec_name;
+	int i, off_adj;
+
+	for_each_btf_ext_sec(ext_info, sec) {
+		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+		if (!sec_name)
+			return -EINVAL;
+		if (strcmp(sec_name, prog->sec_name) != 0)
+			continue;
+
+		for_each_btf_ext_rec(ext_info, sec, i, rec) {
+			__u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
+
+			if (insn_off < prog->sec_insn_off)
+				continue;
+			if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
+				break;
+
+			if (!copy_start)
+				copy_start = rec;
+			copy_end = rec + ext_info->rec_size;
+		}
+
+		if (!copy_start)
+			return -ENOENT;
+
+		/* append func/line info of a given (sub-)program to the main
+		 * program func/line info
+		 */
+		old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
+		new_sz = old_sz + (copy_end - copy_start);
+		new_prog_info = realloc(*prog_info, new_sz);
+		if (!new_prog_info)
+			return -ENOMEM;
+		*prog_info = new_prog_info;
+		*prog_rec_cnt = new_sz / ext_info->rec_size;
+		memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
+
+		/* Kernel instruction offsets are in units of 8-byte
+		 * instructions, while .BTF.ext instruction offsets generated
+		 * by Clang are in units of bytes. So convert Clang offsets
+		 * into kernel offsets and adjust offset according to program
+		 * relocated position.
+		 */
+		off_adj = prog->sub_insn_off - prog->sec_insn_off;
+		rec = new_prog_info + old_sz;
+		rec_end = new_prog_info + new_sz;
+		for (; rec < rec_end; rec += ext_info->rec_size) {
+			__u32 *insn_off = rec;
+
+			*insn_off = *insn_off / BPF_INSN_SZ + off_adj;
+		}
+		*prog_rec_sz = ext_info->rec_size;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int
+reloc_prog_func_and_line_info(const struct bpf_object *obj,
+			      struct bpf_program *main_prog,
+			      const struct bpf_program *prog)
+{
+	int err;
+
+	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
+	 * supprot func/line info
+	 */
+	if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
+		return 0;
+
+	/* only attempt func info relocation if main program's func_info
+	 * relocation was successful
+	 */
+	if (main_prog != prog && !main_prog->func_info)
+		goto line_info;
+
+	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
+				       &main_prog->func_info,
+				       &main_prog->func_info_cnt,
+				       &main_prog->func_info_rec_size);
+	if (err) {
+		if (err != -ENOENT) {
+			pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
+				prog->name, err);
+			return err;
+		}
+		if (main_prog->func_info) {
+			/*
+			 * Some info has already been found but has problem
+			 * in the last btf_ext reloc. Must have to error out.
+			 */
+			pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
+			return err;
+		}
+		/* Have problem loading the very first info. Ignore the rest. */
+		pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
+			prog->name);
+	}
+
+line_info:
+	/* don't relocate line info if main program's relocation failed */
+	if (main_prog != prog && !main_prog->line_info)
+		return 0;
+
+	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
+				       &main_prog->line_info,
+				       &main_prog->line_info_cnt,
+				       &main_prog->line_info_rec_size);
+	if (err) {
+		if (err != -ENOENT) {
+			pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
+				prog->name, err);
+			return err;
+		}
+		if (main_prog->line_info) {
+			/*
+			 * Some info has already been found but has problem
+			 * in the last btf_ext reloc. Must have to error out.
+			 */
+			pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
+			return err;
+		}
+		/* Have problem loading the very first info. Ignore the rest. */
+		pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
+			prog->name);
+	}
+	return 0;
+}
+
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
+{
+	size_t insn_idx = *(const size_t *)key;
+	const struct reloc_desc *relo = elem;
+
+	if (insn_idx == relo->insn_idx)
+		return 0;
+	return insn_idx < relo->insn_idx ? -1 : 1;
+}
+
+static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
+{
+	return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
+		       sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
+}
+
+static int
+bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
+		       struct bpf_program *prog)
+{
+	size_t sub_insn_idx, insn_idx, new_cnt;
+	struct bpf_program *subprog;
+	struct bpf_insn *insns, *insn;
+	struct reloc_desc *relo;
+	int err;
+
+	err = reloc_prog_func_and_line_info(obj, main_prog, prog);
+	if (err)
+		return err;
+
+	for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
+		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+		if (!insn_is_subprog_call(insn))
+			continue;
+
+		relo = find_prog_insn_relo(prog, insn_idx);
+		if (relo && relo->type != RELO_CALL) {
+			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
+				prog->name, insn_idx, relo->type);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		if (relo) {
+			/* sub-program instruction index is a combination of
+			 * an offset of a symbol pointed to by relocation and
+			 * call instruction's imm field; for global functions,
+			 * call always has imm = -1, but for static functions
+			 * relocation is against STT_SECTION and insn->imm
+			 * points to a start of a static function
+			 */
+			sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+		} else {
+			/* if subprogram call is to a static function within
+			 * the same ELF section, there won't be any relocation
+			 * emitted, but it also means there is no additional
+			 * offset necessary, insns->imm is relative to
+			 * instruction's original position within the section
+			 */
+			sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
+		}
+
+		/* we enforce that sub-programs should be in .text section */
+		subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
+		if (!subprog) {
+			pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
+				prog->name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		/* if it's the first call instruction calling into this
+		 * subprogram (meaning this subprog hasn't been processed
+		 * yet) within the context of current main program:
+		 *   - append it at the end of main program's instructions blog;
+		 *   - process is recursively, while current program is put on hold;
+		 *   - if that subprogram calls some other not yet processes
+		 *   subprogram, same thing will happen recursively until
+		 *   there are no more unprocesses subprograms left to append
+		 *   and relocate.
+		 */
+		if (subprog->sub_insn_off == 0) {
+			subprog->sub_insn_off = main_prog->insns_cnt;
+
+			new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+			insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+			if (!insns) {
+				pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+				return -ENOMEM;
+			}
+			main_prog->insns = insns;
+			main_prog->insns_cnt = new_cnt;
+
+			memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+			       subprog->insns_cnt * sizeof(*insns));
+
+			pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+				 main_prog->name, subprog->insns_cnt, subprog->name);
+
+			err = bpf_object__reloc_code(obj, main_prog, subprog);
+			if (err)
+				return err;
+		}
+
+		/* main_prog->insns memory could have been re-allocated, so
+		 * calculate pointer again
+		 */
+		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+		/* calculate correct instruction position within current main
+		 * prog; each main prog can have a different set of
+		 * subprograms appended (potentially in different order as
+		 * well), so position of any subprog can be different for
+		 * different main programs */
+		insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
+
+		if (relo)
+			relo->processed = true;
+
+		pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
+			 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
+	}
+
+	return 0;
+}
+
+/*
+ * Relocate sub-program calls.
+ *
+ * Algorithm operates as follows. Each entry-point BPF program (referred to as
+ * main prog) is processed separately. For each subprog (non-entry functions,
+ * that can be called from either entry progs or other subprogs) gets their
+ * sub_insn_off reset to zero. This serves as indicator that this subprogram
+ * hasn't been yet appended and relocated within current main prog. Once its
+ * relocated, sub_insn_off will point at the position within current main prog
+ * where given subprog was appended. This will further be used to relocate all
+ * the call instructions jumping into this subprog.
+ *
+ * We start with main program and process all call instructions. If the call
+ * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
+ * is zero), subprog instructions are appended at the end of main program's
+ * instruction array. Then main program is "put on hold" while we recursively
+ * process newly appended subprogram. If that subprogram calls into another
+ * subprogram that hasn't been appended, new subprogram is appended again to
+ * the *main* prog's instructions (subprog's instructions are always left
+ * untouched, as they need to be in unmodified state for subsequent main progs
+ * and subprog instructions are always sent only as part of a main prog) and
+ * the process continues recursively. Once all the subprogs called from a main
+ * prog or any of its subprogs are appended (and relocated), all their
+ * positions within finalized instructions array are known, so it's easy to
+ * rewrite call instructions with correct relative offsets, corresponding to
+ * desired target subprog.
+ *
+ * Its important to realize that some subprogs might not be called from some
+ * main prog and any of its called/used subprogs. Those will keep their
+ * subprog->sub_insn_off as zero at all times and won't be appended to current
+ * main prog and won't be relocated within the context of current main prog.
+ * They might still be used from other main progs later.
+ *
+ * Visually this process can be shown as below. Suppose we have two main
+ * programs mainA and mainB and BPF object contains three subprogs: subA,
+ * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
+ * subC both call subB:
+ *
+ *        +--------+ +-------+
+ *        |        v v       |
+ *     +--+---+ +--+-+-+ +---+--+
+ *     | subA | | subB | | subC |
+ *     +--+---+ +------+ +---+--+
+ *        ^                  ^
+ *        |                  |
+ *    +---+-------+   +------+----+
+ *    |   mainA   |   |   mainB   |
+ *    +-----------+   +-----------+
+ *
+ * We'll start relocating mainA, will find subA, append it and start
+ * processing sub A recursively:
+ *
+ *    +-----------+------+
+ *    |   mainA   | subA |
+ *    +-----------+------+
+ *
+ * At this point we notice that subB is used from subA, so we append it and
+ * relocate (there are no further subcalls from subB):
+ *
+ *    +-----------+------+------+
+ *    |   mainA   | subA | subB |
+ *    +-----------+------+------+
+ *
+ * At this point, we relocate subA calls, then go one level up and finish with
+ * relocatin mainA calls. mainA is done.
+ *
+ * For mainB process is similar but results in different order. We start with
+ * mainB and skip subA and subB, as mainB never calls them (at least
+ * directly), but we see subC is needed, so we append and start processing it:
+ *
+ *    +-----------+------+
+ *    |   mainB   | subC |
+ *    +-----------+------+
+ * Now we see subC needs subB, so we go back to it, append and relocate it:
+ *
+ *    +-----------+------+------+
+ *    |   mainB   | subC | subB |
+ *    +-----------+------+------+
+ *
+ * At this point we unwind recursion, relocate calls in subC, then in mainB.
+ */
+static int
+bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
+{
+	struct bpf_program *subprog;
+	int i, j, err;
+
+	/* mark all subprogs as not relocated (yet) within the context of
+	 * current main program
+	 */
+	for (i = 0; i < obj->nr_programs; i++) {
+		subprog = &obj->programs[i];
+		if (!prog_is_subprog(obj, subprog))
+			continue;
+
+		subprog->sub_insn_off = 0;
+		for (j = 0; j < subprog->nr_reloc; j++)
+			if (subprog->reloc_desc[j].type == RELO_CALL)
+				subprog->reloc_desc[j].processed = false;
+	}
+
+	err = bpf_object__reloc_code(obj, prog, prog);
+	if (err)
+		return err;
+
+
+	return 0;
+}
+
+static int
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
+{
+	struct bpf_program *prog;
+	size_t i;
+	int err;
+
+	if (obj->btf_ext) {
+		err = bpf_object__relocate_core(obj, targ_btf_path);
+		if (err) {
+			pr_warn("failed to perform CO-RE relocations: %d\n",
+				err);
+			return err;
+		}
+	}
+	/* relocate data references first for all programs and sub-programs,
+	 * as they don't change relative to code locations, so subsequent
+	 * subprogram processing won't need to re-calculate any of them
+	 */
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		err = bpf_object__relocate_data(obj, prog);
+		if (err) {
+			pr_warn("prog '%s': failed to relocate data references: %d\n",
+				prog->name, err);
+			return err;
+		}
+	}
+	/* now relocate subprogram calls and append used subprograms to main
+	 * programs; each copy of subprogram code needs to be relocated
+	 * differently for each main program, because its code location might
+	 * have changed
+	 */
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		/* sub-program's sub-calls are relocated within the context of
+		 * its main program only
+		 */
+		if (prog_is_subprog(obj, prog))
+			continue;
+
+		err = bpf_object__relocate_calls(obj, prog);
+		if (err) {
+			pr_warn("prog '%s': failed to relocate calls: %d\n",
+				prog->name, err);
+			return err;
+		}
+	}
+	/* free up relocation descriptors */
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		zfree(&prog->reloc_desc);
+		prog->nr_reloc = 0;
+	}
+	return 0;
+}
+
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+					    GElf_Shdr *shdr, Elf_Data *data);
+
+static int bpf_object__collect_map_relos(struct bpf_object *obj,
+					 GElf_Shdr *shdr, Elf_Data *data)
+{
+	const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
+	int i, j, nrels, new_sz;
+	const struct btf_var_secinfo *vi = NULL;
+	const struct btf_type *sec, *var, *def;
+	struct bpf_map *map = NULL, *targ_map;
+	const struct btf_member *member;
+	const char *name, *mname;
+	Elf_Data *symbols;
+	unsigned int moff;
+	GElf_Sym sym;
+	GElf_Rel rel;
+	void *tmp;
+
+	if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
+		return -EINVAL;
+	sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
+	if (!sec)
+		return -EINVAL;
+
+	symbols = obj->efile.symbols;
+	nrels = shdr->sh_size / shdr->sh_entsize;
+	for (i = 0; i < nrels; i++) {
+		if (!gelf_getrel(data, i, &rel)) {
+			pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+			pr_warn(".maps relo #%d: symbol %zx not found\n",
+				i, (size_t)GELF_R_SYM(rel.r_info));
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+		name = elf_sym_str(obj, sym.st_name) ?: "<?>";
+		if (sym.st_shndx != obj->efile.btf_maps_shndx) {
+			pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
+				i, name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
+			 i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
+			 (size_t)rel.r_offset, sym.st_name, name);
+
+		for (j = 0; j < obj->nr_maps; j++) {
+			map = &obj->maps[j];
+			if (map->sec_idx != obj->efile.btf_maps_shndx)
+				continue;
+
+			vi = btf_var_secinfos(sec) + map->btf_var_idx;
+			if (vi->offset <= rel.r_offset &&
+			    rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
+				break;
+		}
+		if (j == obj->nr_maps) {
+			pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
+				i, name, (size_t)rel.r_offset);
+			return -EINVAL;
+		}
+
+		if (!bpf_map_type__is_map_in_map(map->def.type))
+			return -EINVAL;
+		if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
+		    map->def.key_size != sizeof(int)) {
+			pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
+				i, map->name, sizeof(int));
+			return -EINVAL;
+		}
+
+		targ_map = bpf_object__find_map_by_name(obj, name);
+		if (!targ_map)
+			return -ESRCH;
+
+		var = btf__type_by_id(obj->btf, vi->type);
+		def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+		if (btf_vlen(def) == 0)
+			return -EINVAL;
+		member = btf_members(def) + btf_vlen(def) - 1;
+		mname = btf__name_by_offset(obj->btf, member->name_off);
+		if (strcmp(mname, "values"))
+			return -EINVAL;
+
+		moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
+		if (rel.r_offset - vi->offset < moff)
+			return -EINVAL;
+
+		moff = rel.r_offset - vi->offset - moff;
+		/* here we use BPF pointer size, which is always 64 bit, as we
+		 * are parsing ELF that was built for BPF target
+		 */
+		if (moff % bpf_ptr_sz)
+			return -EINVAL;
+		moff /= bpf_ptr_sz;
+		if (moff >= map->init_slots_sz) {
+			new_sz = moff + 1;
+			tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
+			if (!tmp)
+				return -ENOMEM;
+			map->init_slots = tmp;
+			memset(map->init_slots + map->init_slots_sz, 0,
+			       (new_sz - map->init_slots_sz) * host_ptr_sz);
+			map->init_slots_sz = new_sz;
+		}
+		map->init_slots[moff] = targ_map;
+
+		pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
+			 i, map->name, moff, name);
+	}
+
+	return 0;
+}
+
+static int cmp_relocs(const void *_a, const void *_b)
+{
+	const struct reloc_desc *a = _a;
+	const struct reloc_desc *b = _b;
+
+	if (a->insn_idx != b->insn_idx)
+		return a->insn_idx < b->insn_idx ? -1 : 1;
+
+	/* no two relocations should have the same insn_idx, but ... */
+	if (a->type != b->type)
+		return a->type < b->type ? -1 : 1;
+
+	return 0;
+}
+
+static int bpf_object__collect_relos(struct bpf_object *obj)
+{
+	int i, err;
+
+	for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
+		GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
+		Elf_Data *data = obj->efile.reloc_sects[i].data;
+		int idx = shdr->sh_info;
+
+		if (shdr->sh_type != SHT_REL) {
+			pr_warn("internal error at %d\n", __LINE__);
+			return -LIBBPF_ERRNO__INTERNAL;
+		}
+
+		if (idx == obj->efile.st_ops_shndx)
+			err = bpf_object__collect_st_ops_relos(obj, shdr, data);
+		else if (idx == obj->efile.btf_maps_shndx)
+			err = bpf_object__collect_map_relos(obj, shdr, data);
+		else
+			err = bpf_object__collect_prog_relos(obj, shdr, data);
+		if (err)
+			return err;
+	}
+
+	for (i = 0; i < obj->nr_programs; i++) {
+		struct bpf_program *p = &obj->programs[i];
+		
+		if (!p->nr_reloc)
+			continue;
+
+		qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
+	}
+	return 0;
+}
+
+static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
+{
+	if (BPF_CLASS(insn->code) == BPF_JMP &&
+	    BPF_OP(insn->code) == BPF_CALL &&
+	    BPF_SRC(insn->code) == BPF_K &&
+	    insn->src_reg == 0 &&
+	    insn->dst_reg == 0) {
+		    *func_id = insn->imm;
+		    return true;
+	}
+	return false;
+}
+
+static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+{
+	struct bpf_insn *insn = prog->insns;
+	enum bpf_func_id func_id;
+	int i;
+
+	for (i = 0; i < prog->insns_cnt; i++, insn++) {
+		if (!insn_is_helper_call(insn, &func_id))
+			continue;
+
+		/* on kernels that don't yet support
+		 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
+		 * to bpf_probe_read() which works well for old kernels
+		 */
+		switch (func_id) {
+		case BPF_FUNC_probe_read_kernel:
+		case BPF_FUNC_probe_read_user:
+			if (!kernel_supports(FEAT_PROBE_READ_KERN))
+				insn->imm = BPF_FUNC_probe_read;
+			break;
+		case BPF_FUNC_probe_read_kernel_str:
+		case BPF_FUNC_probe_read_user_str:
+			if (!kernel_supports(FEAT_PROBE_READ_KERN))
+				insn->imm = BPF_FUNC_probe_read_str;
+			break;
+		default:
+			break;
+		}
+	}
+	return 0;
+}
+
+static int
+load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
+	     char *license, __u32 kern_version, int *pfd)
+{
+	struct bpf_load_program_attr load_attr;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	size_t log_buf_size = 0;
+	char *log_buf = NULL;
+	int btf_fd, ret;
+
+	if (!insns || !insns_cnt)
+		return -EINVAL;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = prog->type;
+	/* old kernels might not support specifying expected_attach_type */
+	if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
+	    prog->sec_def->is_exp_attach_type_optional)
+		load_attr.expected_attach_type = 0;
+	else
+		load_attr.expected_attach_type = prog->expected_attach_type;
+	if (kernel_supports(FEAT_PROG_NAME))
+		load_attr.name = prog->name;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+	    prog->type == BPF_PROG_TYPE_LSM) {
+		load_attr.attach_btf_id = prog->attach_btf_id;
+	} else if (prog->type == BPF_PROG_TYPE_TRACING ||
+		   prog->type == BPF_PROG_TYPE_EXT) {
+		load_attr.attach_prog_fd = prog->attach_prog_fd;
+		load_attr.attach_btf_id = prog->attach_btf_id;
+	} else {
+		load_attr.kern_version = kern_version;
+		load_attr.prog_ifindex = prog->prog_ifindex;
+	}
+	/* specify func_info/line_info only if kernel supports them */
+	btf_fd = bpf_object__btf_fd(prog->obj);
+	if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
+		load_attr.prog_btf_fd = btf_fd;
+		load_attr.func_info = prog->func_info;
+		load_attr.func_info_rec_size = prog->func_info_rec_size;
+		load_attr.func_info_cnt = prog->func_info_cnt;
+		load_attr.line_info = prog->line_info;
+		load_attr.line_info_rec_size = prog->line_info_rec_size;
+		load_attr.line_info_cnt = prog->line_info_cnt;
+	}
+	load_attr.log_level = prog->log_level;
+	load_attr.prog_flags = prog->prog_flags;
+
+retry_load:
+	if (log_buf_size) {
+		log_buf = malloc(log_buf_size);
+		if (!log_buf)
+			return -ENOMEM;
+
+		*log_buf = 0;
+	}
+
+	ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
+
+	if (ret >= 0) {
+		if (log_buf && load_attr.log_level)
+			pr_debug("verifier log:\n%s", log_buf);
+
+		if (prog->obj->rodata_map_idx >= 0 &&
+		    kernel_supports(FEAT_PROG_BIND_MAP)) {
+			struct bpf_map *rodata_map =
+				&prog->obj->maps[prog->obj->rodata_map_idx];
+
+			if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
+				cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+				pr_warn("prog '%s': failed to bind .rodata map: %s\n",
+					prog->name, cp);
+				/* Don't fail hard if can't bind rodata. */
+			}
+		}
+
+		*pfd = ret;
+		ret = 0;
+		goto out;
+	}
+
+	if (!log_buf || errno == ENOSPC) {
+		log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
+				   log_buf_size << 1);
+
+		free(log_buf);
+		goto retry_load;
+	}
+	ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
+	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+	pr_warn("load bpf program failed: %s\n", cp);
+	pr_perm_msg(ret);
+
+	if (log_buf && log_buf[0] != '\0') {
+		ret = -LIBBPF_ERRNO__VERIFY;
+		pr_warn("-- BEGIN DUMP LOG ---\n");
+		pr_warn("\n%s\n", log_buf);
+		pr_warn("-- END LOG --\n");
+	} else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+		pr_warn("Program too large (%zu insns), at most %d insns\n",
+			load_attr.insns_cnt, BPF_MAXINSNS);
+		ret = -LIBBPF_ERRNO__PROG2BIG;
+	} else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
+		/* Wrong program type? */
+		int fd;
+
+		load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
+		load_attr.expected_attach_type = 0;
+		fd = bpf_load_program_xattr(&load_attr, NULL, 0);
+		if (fd >= 0) {
+			close(fd);
+			ret = -LIBBPF_ERRNO__PROGTYPE;
+			goto out;
+		}
+	}
+
+out:
+	free(log_buf);
+	return ret;
+}
+
+static int libbpf_find_attach_btf_id(struct bpf_program *prog);
+
+int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
+{
+	int err = 0, fd, i, btf_id;
+
+	if (prog->obj->loaded) {
+		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
+		return -EINVAL;
+	}
+
+	if ((prog->type == BPF_PROG_TYPE_TRACING ||
+	     prog->type == BPF_PROG_TYPE_LSM ||
+	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
+		btf_id = libbpf_find_attach_btf_id(prog);
+		if (btf_id <= 0)
+			return btf_id;
+		prog->attach_btf_id = btf_id;
+	}
+
+	if (prog->instances.nr < 0 || !prog->instances.fds) {
+		if (prog->preprocessor) {
+			pr_warn("Internal error: can't load program '%s'\n",
+				prog->name);
+			return -LIBBPF_ERRNO__INTERNAL;
+		}
+
+		prog->instances.fds = malloc(sizeof(int));
+		if (!prog->instances.fds) {
+			pr_warn("Not enough memory for BPF fds\n");
+			return -ENOMEM;
+		}
+		prog->instances.nr = 1;
+		prog->instances.fds[0] = -1;
+	}
+
+	if (!prog->preprocessor) {
+		if (prog->instances.nr != 1) {
+			pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
+				prog->name, prog->instances.nr);
+		}
+		err = load_program(prog, prog->insns, prog->insns_cnt,
+				   license, kern_ver, &fd);
+		if (!err)
+			prog->instances.fds[0] = fd;
+		goto out;
+	}
+
+	for (i = 0; i < prog->instances.nr; i++) {
+		struct bpf_prog_prep_result result;
+		bpf_program_prep_t preprocessor = prog->preprocessor;
+
+		memset(&result, 0, sizeof(result));
+		err = preprocessor(prog, i, prog->insns,
+				   prog->insns_cnt, &result);
+		if (err) {
+			pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
+				i, prog->name);
+			goto out;
+		}
+
+		if (!result.new_insn_ptr || !result.new_insn_cnt) {
+			pr_debug("Skip loading the %dth instance of program '%s'\n",
+				 i, prog->name);
+			prog->instances.fds[i] = -1;
+			if (result.pfd)
+				*result.pfd = -1;
+			continue;
+		}
+
+		err = load_program(prog, result.new_insn_ptr,
+				   result.new_insn_cnt, license, kern_ver, &fd);
+		if (err) {
+			pr_warn("Loading the %dth instance of program '%s' failed\n",
+				i, prog->name);
+			goto out;
+		}
+
+		if (result.pfd)
+			*result.pfd = fd;
+		prog->instances.fds[i] = fd;
+	}
+out:
+	if (err)
+		pr_warn("failed to load program '%s'\n", prog->name);
+	zfree(&prog->insns);
+	prog->insns_cnt = 0;
+	return err;
+}
+
+static int
+bpf_object__load_progs(struct bpf_object *obj, int log_level)
+{
+	struct bpf_program *prog;
+	size_t i;
+	int err;
+
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		err = bpf_object__sanitize_prog(obj, prog);
+		if (err)
+			return err;
+	}
+
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		if (prog_is_subprog(obj, prog))
+			continue;
+		if (!prog->load) {
+			pr_debug("prog '%s': skipped loading\n", prog->name);
+			continue;
+		}
+		prog->log_level |= log_level;
+		err = bpf_program__load(prog, obj->license, obj->kern_version);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static const struct bpf_sec_def *find_sec_def(const char *sec_name);
+
+static struct bpf_object *
+__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
+		   const struct bpf_object_open_opts *opts)
+{
+	const char *obj_name, *kconfig;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	char tmp_name[64];
+	int err;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_warn("failed to init libelf for %s\n",
+			path ? : "(mem buf)");
+		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+	}
+
+	if (!OPTS_VALID(opts, bpf_object_open_opts))
+		return ERR_PTR(-EINVAL);
+
+	obj_name = OPTS_GET(opts, object_name, NULL);
+	if (obj_buf) {
+		if (!obj_name) {
+			snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+				 (unsigned long)obj_buf,
+				 (unsigned long)obj_buf_sz);
+			obj_name = tmp_name;
+		}
+		path = obj_name;
+		pr_debug("loading object '%s' from buffer\n", obj_name);
+	}
+
+	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
+	if (IS_ERR(obj))
+		return obj;
+
+	kconfig = OPTS_GET(opts, kconfig, NULL);
+	if (kconfig) {
+		obj->kconfig = strdup(kconfig);
+		if (!obj->kconfig) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	err = bpf_object__elf_init(obj);
+	err = err ? : bpf_object__check_endianness(obj);
+	err = err ? : bpf_object__elf_collect(obj);
+	err = err ? : bpf_object__collect_externs(obj);
+	err = err ? : bpf_object__finalize_btf(obj);
+	err = err ? : bpf_object__init_maps(obj, opts);
+	err = err ? : bpf_object__collect_relos(obj);
+	if (err)
+		goto out;
+	bpf_object__elf_finish(obj);
+
+	bpf_object__for_each_program(prog, obj) {
+		prog->sec_def = find_sec_def(prog->sec_name);
+		if (!prog->sec_def)
+			/* couldn't guess, but user might manually specify */
+			continue;
+
+		if (prog->sec_def->is_sleepable)
+			prog->prog_flags |= BPF_F_SLEEPABLE;
+		bpf_program__set_type(prog, prog->sec_def->prog_type);
+		bpf_program__set_expected_attach_type(prog,
+				prog->sec_def->expected_attach_type);
+
+		if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
+		    prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
+			prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+	}
+
+	return obj;
+out:
+	bpf_object__close(obj);
+	return ERR_PTR(err);
+}
+
+static struct bpf_object *
+__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
+{
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+		.relaxed_maps = flags & MAPS_RELAX_COMPAT,
+	);
+
+	/* param validation */
+	if (!attr->file)
+		return NULL;
+
+	pr_debug("loading %s\n", attr->file);
+	return __bpf_object__open(attr->file, NULL, 0, &opts);
+}
+
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+{
+	return __bpf_object__open_xattr(attr, 0);
+}
+
+struct bpf_object *bpf_object__open(const char *path)
+{
+	struct bpf_object_open_attr attr = {
+		.file		= path,
+		.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	};
+
+	return bpf_object__open_xattr(&attr);
+}
+
+struct bpf_object *
+bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
+{
+	if (!path)
+		return ERR_PTR(-EINVAL);
+
+	pr_debug("loading %s\n", path);
+
+	return __bpf_object__open(path, NULL, 0, opts);
+}
+
+struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+		     const struct bpf_object_open_opts *opts)
+{
+	if (!obj_buf || obj_buf_sz == 0)
+		return ERR_PTR(-EINVAL);
+
+	return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+}
+
+struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+			const char *name)
+{
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+		.object_name = name,
+		/* wrong default, but backwards-compatible */
+		.relaxed_maps = true,
+	);
+
+	/* returning NULL is wrong, but backwards-compatible */
+	if (!obj_buf || obj_buf_sz == 0)
+		return NULL;
+
+	return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+}
+
+int bpf_object__unload(struct bpf_object *obj)
+{
+	size_t i;
+
+	if (!obj)
+		return -EINVAL;
+
+	for (i = 0; i < obj->nr_maps; i++) {
+		zclose(obj->maps[i].fd);
+		if (obj->maps[i].st_ops)
+			zfree(&obj->maps[i].st_ops->kern_vdata);
+	}
+
+	for (i = 0; i < obj->nr_programs; i++)
+		bpf_program__unload(&obj->programs[i]);
+
+	return 0;
+}
+
+static int bpf_object__sanitize_maps(struct bpf_object *obj)
+{
+	struct bpf_map *m;
+
+	bpf_object__for_each_map(m, obj) {
+		if (!bpf_map__is_internal(m))
+			continue;
+		if (!kernel_supports(FEAT_GLOBAL_DATA)) {
+			pr_warn("kernel doesn't support global data\n");
+			return -ENOTSUP;
+		}
+		if (!kernel_supports(FEAT_ARRAY_MMAP))
+			m->def.map_flags ^= BPF_F_MMAPABLE;
+	}
+
+	return 0;
+}
+
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+{
+	char sym_type, sym_name[500];
+	unsigned long long sym_addr;
+	struct extern_desc *ext;
+	int ret, err = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (!f) {
+		err = -errno;
+		pr_warn("failed to open /proc/kallsyms: %d\n", err);
+		return err;
+	}
+
+	while (true) {
+		ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
+			     &sym_addr, &sym_type, sym_name);
+		if (ret == EOF && feof(f))
+			break;
+		if (ret != 3) {
+			pr_warn("failed to read kallsyms entry: %d\n", ret);
+			err = -EINVAL;
+			goto out;
+		}
+
+		ext = find_extern_by_name(obj, sym_name);
+		if (!ext || ext->type != EXT_KSYM)
+			continue;
+
+		if (ext->is_set && ext->ksym.addr != sym_addr) {
+			pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
+				sym_name, ext->ksym.addr, sym_addr);
+			err = -EINVAL;
+			goto out;
+		}
+		if (!ext->is_set) {
+			ext->is_set = true;
+			ext->ksym.addr = sym_addr;
+			pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+		}
+	}
+
+out:
+	fclose(f);
+	return err;
+}
+
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+	struct extern_desc *ext;
+	int i, id;
+
+	for (i = 0; i < obj->nr_extern; i++) {
+		const struct btf_type *targ_var, *targ_type;
+		__u32 targ_type_id, local_type_id;
+		const char *targ_var_name;
+		int ret;
+
+		ext = &obj->externs[i];
+		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+			continue;
+
+		id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
+					    BTF_KIND_VAR);
+		if (id <= 0) {
+			pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
+				ext->name);
+			return -ESRCH;
+		}
+
+		/* find local type_id */
+		local_type_id = ext->ksym.type_id;
+
+		/* find target type_id */
+		targ_var = btf__type_by_id(obj->btf_vmlinux, id);
+		targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
+						    targ_var->name_off);
+		targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
+						   targ_var->type,
+						   &targ_type_id);
+
+		ret = bpf_core_types_are_compat(obj->btf, local_type_id,
+						obj->btf_vmlinux, targ_type_id);
+		if (ret <= 0) {
+			const struct btf_type *local_type;
+			const char *targ_name, *local_name;
+
+			local_type = btf__type_by_id(obj->btf, local_type_id);
+			local_name = btf__name_by_offset(obj->btf,
+							 local_type->name_off);
+			targ_name = btf__name_by_offset(obj->btf_vmlinux,
+							targ_type->name_off);
+
+			pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+				ext->name, local_type_id,
+				btf_kind_str(local_type), local_name, targ_type_id,
+				btf_kind_str(targ_type), targ_name);
+			return -EINVAL;
+		}
+
+		ext->is_set = true;
+		ext->ksym.vmlinux_btf_id = id;
+		pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
+			 ext->name, id, btf_kind_str(targ_var), targ_var_name);
+	}
+	return 0;
+}
+
+static int bpf_object__resolve_externs(struct bpf_object *obj,
+				       const char *extra_kconfig)
+{
+	bool need_config = false, need_kallsyms = false;
+	bool need_vmlinux_btf = false;
+	struct extern_desc *ext;
+	void *kcfg_data = NULL;
+	int err, i;
+
+	if (obj->nr_extern == 0)
+		return 0;
+
+	if (obj->kconfig_map_idx >= 0)
+		kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
+
+	for (i = 0; i < obj->nr_extern; i++) {
+		ext = &obj->externs[i];
+
+		if (ext->type == EXT_KCFG &&
+		    strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+			void *ext_val = kcfg_data + ext->kcfg.data_off;
+			__u32 kver = get_kernel_version();
+
+			if (!kver) {
+				pr_warn("failed to get kernel version\n");
+				return -EINVAL;
+			}
+			err = set_kcfg_value_num(ext, ext_val, kver);
+			if (err)
+				return err;
+			pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
+		} else if (ext->type == EXT_KCFG &&
+			   strncmp(ext->name, "CONFIG_", 7) == 0) {
+			need_config = true;
+		} else if (ext->type == EXT_KSYM) {
+			if (ext->ksym.type_id)
+				need_vmlinux_btf = true;
+			else
+				need_kallsyms = true;
+		} else {
+			pr_warn("unrecognized extern '%s'\n", ext->name);
+			return -EINVAL;
+		}
+	}
+	if (need_config && extra_kconfig) {
+		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
+		if (err)
+			return -EINVAL;
+		need_config = false;
+		for (i = 0; i < obj->nr_extern; i++) {
+			ext = &obj->externs[i];
+			if (ext->type == EXT_KCFG && !ext->is_set) {
+				need_config = true;
+				break;
+			}
+		}
+	}
+	if (need_config) {
+		err = bpf_object__read_kconfig_file(obj, kcfg_data);
+		if (err)
+			return -EINVAL;
+	}
+	if (need_kallsyms) {
+		err = bpf_object__read_kallsyms_file(obj);
+		if (err)
+			return -EINVAL;
+	}
+	if (need_vmlinux_btf) {
+		err = bpf_object__resolve_ksyms_btf_id(obj);
+		if (err)
+			return -EINVAL;
+	}
+	for (i = 0; i < obj->nr_extern; i++) {
+		ext = &obj->externs[i];
+
+		if (!ext->is_set && !ext->is_weak) {
+			pr_warn("extern %s (strong) not resolved\n", ext->name);
+			return -ESRCH;
+		} else if (!ext->is_set) {
+			pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
+				 ext->name);
+		}
+	}
+
+	return 0;
+}
+
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
+{
+	struct bpf_object *obj;
+	int err, i;
+
+	if (!attr)
+		return -EINVAL;
+	obj = attr->obj;
+	if (!obj)
+		return -EINVAL;
+
+	if (obj->loaded) {
+		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
+		return -EINVAL;
+	}
+
+	err = bpf_object__probe_loading(obj);
+	err = err ? : bpf_object__load_vmlinux_btf(obj);
+	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
+	err = err ? : bpf_object__sanitize_and_load_btf(obj);
+	err = err ? : bpf_object__sanitize_maps(obj);
+	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
+	err = err ? : bpf_object__create_maps(obj);
+	err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+	err = err ? : bpf_object__load_progs(obj, attr->log_level);
+
+	btf__free(obj->btf_vmlinux);
+	obj->btf_vmlinux = NULL;
+
+	obj->loaded = true; /* doesn't matter if successfully or not */
+
+	if (err)
+		goto out;
+
+	return 0;
+out:
+	/* unpin any maps that were auto-pinned during load */
+	for (i = 0; i < obj->nr_maps; i++)
+		if (obj->maps[i].pinned && !obj->maps[i].reused)
+			bpf_map__unpin(&obj->maps[i], NULL);
+
+	bpf_object__unload(obj);
+	pr_warn("failed to load object '%s'\n", obj->path);
+	return err;
+}
+
+int bpf_object__load(struct bpf_object *obj)
+{
+	struct bpf_object_load_attr attr = {
+		.obj = obj,
+	};
+
+	return bpf_object__load_xattr(&attr);
+}
+
+static int make_parent_dir(const char *path)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	char *dname, *dir;
+	int err = 0;
+
+	dname = strdup(path);
+	if (dname == NULL)
+		return -ENOMEM;
+
+	dir = dirname(dname);
+	if (mkdir(dir, 0700) && errno != EEXIST)
+		err = -errno;
+
+	free(dname);
+	if (err) {
+		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+		pr_warn("failed to mkdir %s: %s\n", path, cp);
+	}
+	return err;
+}
+
+static int check_path(const char *path)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct statfs st_fs;
+	char *dname, *dir;
+	int err = 0;
+
+	if (path == NULL)
+		return -EINVAL;
+
+	dname = strdup(path);
+	if (dname == NULL)
+		return -ENOMEM;
+
+	dir = dirname(dname);
+	if (statfs(dir, &st_fs)) {
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		pr_warn("failed to statfs %s: %s\n", dir, cp);
+		err = -errno;
+	}
+	free(dname);
+
+	if (!err && st_fs.f_type != BPF_FS_MAGIC) {
+		pr_warn("specified path %s is not on BPF FS\n", path);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
+			      int instance)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	int err;
+
+	err = make_parent_dir(path);
+	if (err)
+		return err;
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	if (prog == NULL) {
+		pr_warn("invalid program pointer\n");
+		return -EINVAL;
+	}
+
+	if (instance < 0 || instance >= prog->instances.nr) {
+		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+			instance, prog->name, prog->instances.nr);
+		return -EINVAL;
+	}
+
+	if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+		err = -errno;
+		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+		pr_warn("failed to pin program: %s\n", cp);
+		return err;
+	}
+	pr_debug("pinned program '%s'\n", path);
+
+	return 0;
+}
+
+int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
+				int instance)
+{
+	int err;
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	if (prog == NULL) {
+		pr_warn("invalid program pointer\n");
+		return -EINVAL;
+	}
+
+	if (instance < 0 || instance >= prog->instances.nr) {
+		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+			instance, prog->name, prog->instances.nr);
+		return -EINVAL;
+	}
+
+	err = unlink(path);
+	if (err != 0)
+		return -errno;
+	pr_debug("unpinned program '%s'\n", path);
+
+	return 0;
+}
+
+int bpf_program__pin(struct bpf_program *prog, const char *path)
+{
+	int i, err;
+
+	err = make_parent_dir(path);
+	if (err)
+		return err;
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	if (prog == NULL) {
+		pr_warn("invalid program pointer\n");
+		return -EINVAL;
+	}
+
+	if (prog->instances.nr <= 0) {
+		pr_warn("no instances of prog %s to pin\n", prog->name);
+		return -EINVAL;
+	}
+
+	if (prog->instances.nr == 1) {
+		/* don't create subdirs when pinning single instance */
+		return bpf_program__pin_instance(prog, path, 0);
+	}
+
+	for (i = 0; i < prog->instances.nr; i++) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+		if (len < 0) {
+			err = -EINVAL;
+			goto err_unpin;
+		} else if (len >= PATH_MAX) {
+			err = -ENAMETOOLONG;
+			goto err_unpin;
+		}
+
+		err = bpf_program__pin_instance(prog, buf, i);
+		if (err)
+			goto err_unpin;
+	}
+
+	return 0;
+
+err_unpin:
+	for (i = i - 1; i >= 0; i--) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+		if (len < 0)
+			continue;
+		else if (len >= PATH_MAX)
+			continue;
+
+		bpf_program__unpin_instance(prog, buf, i);
+	}
+
+	rmdir(path);
+
+	return err;
+}
+
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
+{
+	int i, err;
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	if (prog == NULL) {
+		pr_warn("invalid program pointer\n");
+		return -EINVAL;
+	}
+
+	if (prog->instances.nr <= 0) {
+		pr_warn("no instances of prog %s to pin\n", prog->name);
+		return -EINVAL;
+	}
+
+	if (prog->instances.nr == 1) {
+		/* don't create subdirs when pinning single instance */
+		return bpf_program__unpin_instance(prog, path, 0);
+	}
+
+	for (i = 0; i < prog->instances.nr; i++) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+		if (len < 0)
+			return -EINVAL;
+		else if (len >= PATH_MAX)
+			return -ENAMETOOLONG;
+
+		err = bpf_program__unpin_instance(prog, buf, i);
+		if (err)
+			return err;
+	}
+
+	err = rmdir(path);
+	if (err)
+		return -errno;
+
+	return 0;
+}
+
+int bpf_map__pin(struct bpf_map *map, const char *path)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	int err;
+
+	if (map == NULL) {
+		pr_warn("invalid map pointer\n");
+		return -EINVAL;
+	}
+
+	if (map->pin_path) {
+		if (path && strcmp(path, map->pin_path)) {
+			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+				bpf_map__name(map), map->pin_path, path);
+			return -EINVAL;
+		} else if (map->pinned) {
+			pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
+				 bpf_map__name(map), map->pin_path);
+			return 0;
+		}
+	} else {
+		if (!path) {
+			pr_warn("missing a path to pin map '%s' at\n",
+				bpf_map__name(map));
+			return -EINVAL;
+		} else if (map->pinned) {
+			pr_warn("map '%s' already pinned\n", bpf_map__name(map));
+			return -EEXIST;
+		}
+
+		map->pin_path = strdup(path);
+		if (!map->pin_path) {
+			err = -errno;
+			goto out_err;
+		}
+	}
+
+	err = make_parent_dir(map->pin_path);
+	if (err)
+		return err;
+
+	err = check_path(map->pin_path);
+	if (err)
+		return err;
+
+	if (bpf_obj_pin(map->fd, map->pin_path)) {
+		err = -errno;
+		goto out_err;
+	}
+
+	map->pinned = true;
+	pr_debug("pinned map '%s'\n", map->pin_path);
+
+	return 0;
+
+out_err:
+	cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+	pr_warn("failed to pin map: %s\n", cp);
+	return err;
+}
+
+int bpf_map__unpin(struct bpf_map *map, const char *path)
+{
+	int err;
+
+	if (map == NULL) {
+		pr_warn("invalid map pointer\n");
+		return -EINVAL;
+	}
+
+	if (map->pin_path) {
+		if (path && strcmp(path, map->pin_path)) {
+			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+				bpf_map__name(map), map->pin_path, path);
+			return -EINVAL;
+		}
+		path = map->pin_path;
+	} else if (!path) {
+		pr_warn("no path to unpin map '%s' from\n",
+			bpf_map__name(map));
+		return -EINVAL;
+	}
+
+	err = check_path(path);
+	if (err)
+		return err;
+
+	err = unlink(path);
+	if (err != 0)
+		return -errno;
+
+	map->pinned = false;
+	pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
+
+	return 0;
+}
+
+int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
+{
+	char *new = NULL;
+
+	if (path) {
+		new = strdup(path);
+		if (!new)
+			return -errno;
+	}
+
+	free(map->pin_path);
+	map->pin_path = new;
+	return 0;
+}
+
+const char *bpf_map__get_pin_path(const struct bpf_map *map)
+{
+	return map->pin_path;
+}
+
+bool bpf_map__is_pinned(const struct bpf_map *map)
+{
+	return map->pinned;
+}
+
+static void sanitize_pin_path(char *s)
+{
+	/* bpffs disallows periods in path names */
+	while (*s) {
+		if (*s == '.')
+			*s = '_';
+		s++;
+	}
+}
+
+int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
+{
+	struct bpf_map *map;
+	int err;
+
+	if (!obj)
+		return -ENOENT;
+
+	if (!obj->loaded) {
+		pr_warn("object not yet loaded; load it first\n");
+		return -ENOENT;
+	}
+
+	bpf_object__for_each_map(map, obj) {
+		char *pin_path = NULL;
+		char buf[PATH_MAX];
+
+		if (path) {
+			int len;
+
+			len = snprintf(buf, PATH_MAX, "%s/%s", path,
+				       bpf_map__name(map));
+			if (len < 0) {
+				err = -EINVAL;
+				goto err_unpin_maps;
+			} else if (len >= PATH_MAX) {
+				err = -ENAMETOOLONG;
+				goto err_unpin_maps;
+			}
+			sanitize_pin_path(buf);
+			pin_path = buf;
+		} else if (!map->pin_path) {
+			continue;
+		}
+
+		err = bpf_map__pin(map, pin_path);
+		if (err)
+			goto err_unpin_maps;
+	}
+
+	return 0;
+
+err_unpin_maps:
+	while ((map = bpf_map__prev(map, obj))) {
+		if (!map->pin_path)
+			continue;
+
+		bpf_map__unpin(map, NULL);
+	}
+
+	return err;
+}
+
+int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
+{
+	struct bpf_map *map;
+	int err;
+
+	if (!obj)
+		return -ENOENT;
+
+	bpf_object__for_each_map(map, obj) {
+		char *pin_path = NULL;
+		char buf[PATH_MAX];
+
+		if (path) {
+			int len;
+
+			len = snprintf(buf, PATH_MAX, "%s/%s", path,
+				       bpf_map__name(map));
+			if (len < 0)
+				return -EINVAL;
+			else if (len >= PATH_MAX)
+				return -ENAMETOOLONG;
+			sanitize_pin_path(buf);
+			pin_path = buf;
+		} else if (!map->pin_path) {
+			continue;
+		}
+
+		err = bpf_map__unpin(map, pin_path);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
+{
+	struct bpf_program *prog;
+	int err;
+
+	if (!obj)
+		return -ENOENT;
+
+	if (!obj->loaded) {
+		pr_warn("object not yet loaded; load it first\n");
+		return -ENOENT;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%s", path,
+			       prog->pin_name);
+		if (len < 0) {
+			err = -EINVAL;
+			goto err_unpin_programs;
+		} else if (len >= PATH_MAX) {
+			err = -ENAMETOOLONG;
+			goto err_unpin_programs;
+		}
+
+		err = bpf_program__pin(prog, buf);
+		if (err)
+			goto err_unpin_programs;
+	}
+
+	return 0;
+
+err_unpin_programs:
+	while ((prog = bpf_program__prev(prog, obj))) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%s", path,
+			       prog->pin_name);
+		if (len < 0)
+			continue;
+		else if (len >= PATH_MAX)
+			continue;
+
+		bpf_program__unpin(prog, buf);
+	}
+
+	return err;
+}
+
+int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
+{
+	struct bpf_program *prog;
+	int err;
+
+	if (!obj)
+		return -ENOENT;
+
+	bpf_object__for_each_program(prog, obj) {
+		char buf[PATH_MAX];
+		int len;
+
+		len = snprintf(buf, PATH_MAX, "%s/%s", path,
+			       prog->pin_name);
+		if (len < 0)
+			return -EINVAL;
+		else if (len >= PATH_MAX)
+			return -ENAMETOOLONG;
+
+		err = bpf_program__unpin(prog, buf);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int bpf_object__pin(struct bpf_object *obj, const char *path)
+{
+	int err;
+
+	err = bpf_object__pin_maps(obj, path);
+	if (err)
+		return err;
+
+	err = bpf_object__pin_programs(obj, path);
+	if (err) {
+		bpf_object__unpin_maps(obj, path);
+		return err;
+	}
+
+	return 0;
+}
+
+static void bpf_map__destroy(struct bpf_map *map)
+{
+	if (map->clear_priv)
+		map->clear_priv(map, map->priv);
+	map->priv = NULL;
+	map->clear_priv = NULL;
+
+	if (map->inner_map) {
+		bpf_map__destroy(map->inner_map);
+		zfree(&map->inner_map);
+	}
+
+	zfree(&map->init_slots);
+	map->init_slots_sz = 0;
+
+	if (map->mmaped) {
+		munmap(map->mmaped, bpf_map_mmap_sz(map));
+		map->mmaped = NULL;
+	}
+
+	if (map->st_ops) {
+		zfree(&map->st_ops->data);
+		zfree(&map->st_ops->progs);
+		zfree(&map->st_ops->kern_func_off);
+		zfree(&map->st_ops);
+	}
+
+	zfree(&map->name);
+	zfree(&map->pin_path);
+
+	if (map->fd >= 0)
+		zclose(map->fd);
+}
+
+void bpf_object__close(struct bpf_object *obj)
+{
+	size_t i;
+
+	if (IS_ERR_OR_NULL(obj))
+		return;
+
+	if (obj->clear_priv)
+		obj->clear_priv(obj, obj->priv);
+
+	bpf_object__elf_finish(obj);
+	bpf_object__unload(obj);
+	btf__free(obj->btf);
+	btf_ext__free(obj->btf_ext);
+
+	for (i = 0; i < obj->nr_maps; i++)
+		bpf_map__destroy(&obj->maps[i]);
+
+	zfree(&obj->kconfig);
+	zfree(&obj->externs);
+	obj->nr_extern = 0;
+
+	zfree(&obj->maps);
+	obj->nr_maps = 0;
+
+	if (obj->programs && obj->nr_programs) {
+		for (i = 0; i < obj->nr_programs; i++)
+			bpf_program__exit(&obj->programs[i]);
+	}
+	zfree(&obj->programs);
+
+	list_del(&obj->list);
+	free(obj);
+}
+
+struct bpf_object *
+bpf_object__next(struct bpf_object *prev)
+{
+	struct bpf_object *next;
+
+	if (!prev)
+		next = list_first_entry(&bpf_objects_list,
+					struct bpf_object,
+					list);
+	else
+		next = list_next_entry(prev, list);
+
+	/* Empty list is noticed here so don't need checking on entry. */
+	if (&next->list == &bpf_objects_list)
+		return NULL;
+
+	return next;
+}
+
+const char *bpf_object__name(const struct bpf_object *obj)
+{
+	return obj ? obj->name : ERR_PTR(-EINVAL);
+}
+
+unsigned int bpf_object__kversion(const struct bpf_object *obj)
+{
+	return obj ? obj->kern_version : 0;
+}
+
+struct btf *bpf_object__btf(const struct bpf_object *obj)
+{
+	return obj ? obj->btf : NULL;
+}
+
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+	return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
+int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+			 bpf_object_clear_priv_t clear_priv)
+{
+	if (obj->priv && obj->clear_priv)
+		obj->clear_priv(obj, obj->priv);
+
+	obj->priv = priv;
+	obj->clear_priv = clear_priv;
+	return 0;
+}
+
+void *bpf_object__priv(const struct bpf_object *obj)
+{
+	return obj ? obj->priv : ERR_PTR(-EINVAL);
+}
+
+static struct bpf_program *
+__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
+		    bool forward)
+{
+	size_t nr_programs = obj->nr_programs;
+	ssize_t idx;
+
+	if (!nr_programs)
+		return NULL;
+
+	if (!p)
+		/* Iter from the beginning */
+		return forward ? &obj->programs[0] :
+			&obj->programs[nr_programs - 1];
+
+	if (p->obj != obj) {
+		pr_warn("error: program handler doesn't match object\n");
+		return NULL;
+	}
+
+	idx = (p - obj->programs) + (forward ? 1 : -1);
+	if (idx >= obj->nr_programs || idx < 0)
+		return NULL;
+	return &obj->programs[idx];
+}
+
+struct bpf_program *
+bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
+{
+	struct bpf_program *prog = prev;
+
+	do {
+		prog = __bpf_program__iter(prog, obj, true);
+	} while (prog && prog_is_subprog(obj, prog));
+
+	return prog;
+}
+
+struct bpf_program *
+bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
+{
+	struct bpf_program *prog = next;
+
+	do {
+		prog = __bpf_program__iter(prog, obj, false);
+	} while (prog && prog_is_subprog(obj, prog));
+
+	return prog;
+}
+
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+			  bpf_program_clear_priv_t clear_priv)
+{
+	if (prog->priv && prog->clear_priv)
+		prog->clear_priv(prog, prog->priv);
+
+	prog->priv = priv;
+	prog->clear_priv = clear_priv;
+	return 0;
+}
+
+void *bpf_program__priv(const struct bpf_program *prog)
+{
+	return prog ? prog->priv : ERR_PTR(-EINVAL);
+}
+
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
+{
+	prog->prog_ifindex = ifindex;
+}
+
+const char *bpf_program__name(const struct bpf_program *prog)
+{
+	return prog->name;
+}
+
+const char *bpf_program__section_name(const struct bpf_program *prog)
+{
+	return prog->sec_name;
+}
+
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
+{
+	const char *title;
+
+	title = prog->sec_name;
+	if (needs_copy) {
+		title = strdup(title);
+		if (!title) {
+			pr_warn("failed to strdup program title\n");
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	return title;
+}
+
+bool bpf_program__autoload(const struct bpf_program *prog)
+{
+	return prog->load;
+}
+
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
+{
+	if (prog->obj->loaded)
+		return -EINVAL;
+
+	prog->load = autoload;
+	return 0;
+}
+
+int bpf_program__fd(const struct bpf_program *prog)
+{
+	return bpf_program__nth_fd(prog, 0);
+}
+
+size_t bpf_program__size(const struct bpf_program *prog)
+{
+	return prog->insns_cnt * BPF_INSN_SZ;
+}
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
+			  bpf_program_prep_t prep)
+{
+	int *instances_fds;
+
+	if (nr_instances <= 0 || !prep)
+		return -EINVAL;
+
+	if (prog->instances.nr > 0 || prog->instances.fds) {
+		pr_warn("Can't set pre-processor after loading\n");
+		return -EINVAL;
+	}
+
+	instances_fds = malloc(sizeof(int) * nr_instances);
+	if (!instances_fds) {
+		pr_warn("alloc memory failed for fds\n");
+		return -ENOMEM;
+	}
+
+	/* fill all fd with -1 */
+	memset(instances_fds, -1, sizeof(int) * nr_instances);
+
+	prog->instances.nr = nr_instances;
+	prog->instances.fds = instances_fds;
+	prog->preprocessor = prep;
+	return 0;
+}
+
+int bpf_program__nth_fd(const struct bpf_program *prog, int n)
+{
+	int fd;
+
+	if (!prog)
+		return -EINVAL;
+
+	if (n >= prog->instances.nr || n < 0) {
+		pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
+			n, prog->name, prog->instances.nr);
+		return -EINVAL;
+	}
+
+	fd = prog->instances.fds[n];
+	if (fd < 0) {
+		pr_warn("%dth instance of program '%s' is invalid\n",
+			n, prog->name);
+		return -ENOENT;
+	}
+
+	return fd;
+}
+
+enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+{
+	return prog->type;
+}
+
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
+{
+	prog->type = type;
+}
+
+static bool bpf_program__is_type(const struct bpf_program *prog,
+				 enum bpf_prog_type type)
+{
+	return prog ? (prog->type == type) : false;
+}
+
+#define BPF_PROG_TYPE_FNS(NAME, TYPE)				\
+int bpf_program__set_##NAME(struct bpf_program *prog)		\
+{								\
+	if (!prog)						\
+		return -EINVAL;					\
+	bpf_program__set_type(prog, TYPE);			\
+	return 0;						\
+}								\
+								\
+bool bpf_program__is_##NAME(const struct bpf_program *prog)	\
+{								\
+	return bpf_program__is_type(prog, TYPE);		\
+}								\
+
+BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
+BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
+BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
+BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
+BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
+BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
+BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
+BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
+BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
+BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
+
+enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog)
+{
+	return prog->expected_attach_type;
+}
+
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+					   enum bpf_attach_type type)
+{
+	prog->expected_attach_type = type;
+}
+
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,	    \
+			  attachable, attach_btf)			    \
+	{								    \
+		.sec = string,						    \
+		.len = sizeof(string) - 1,				    \
+		.prog_type = ptype,					    \
+		.expected_attach_type = eatype,				    \
+		.is_exp_attach_type_optional = eatype_optional,		    \
+		.is_attachable = attachable,				    \
+		.is_attach_btf = attach_btf,				    \
+	}
+
+/* Programs that can NOT be attached. */
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
+
+/* Programs that can be attached. */
+#define BPF_APROG_SEC(string, ptype, atype) \
+	BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
+
+/* Programs that must specify expected attach type at load time. */
+#define BPF_EAPROG_SEC(string, ptype, eatype) \
+	BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
+
+/* Programs that use BTF to identify attach point */
+#define BPF_PROG_BTF(string, ptype, eatype) \
+	BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
+
+/* Programs that can be attached but attach type can't be identified by section
+ * name. Kept for backward compatibility.
+ */
+#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
+
+#define SEC_DEF(sec_pfx, ptype, ...) {					    \
+	.sec = sec_pfx,							    \
+	.len = sizeof(sec_pfx) - 1,					    \
+	.prog_type = BPF_PROG_TYPE_##ptype,				    \
+	__VA_ARGS__							    \
+}
+
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
+				      struct bpf_program *prog);
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
+				  struct bpf_program *prog);
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
+				      struct bpf_program *prog);
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
+				     struct bpf_program *prog);
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+				   struct bpf_program *prog);
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+				    struct bpf_program *prog);
+
+static const struct bpf_sec_def section_defs[] = {
+	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
+	BPF_PROG_SEC("sk_reuseport",		BPF_PROG_TYPE_SK_REUSEPORT),
+	SEC_DEF("kprobe/", KPROBE,
+		.attach_fn = attach_kprobe),
+	BPF_PROG_SEC("uprobe/",			BPF_PROG_TYPE_KPROBE),
+	SEC_DEF("kretprobe/", KPROBE,
+		.attach_fn = attach_kprobe),
+	BPF_PROG_SEC("uretprobe/",		BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("classifier",		BPF_PROG_TYPE_SCHED_CLS),
+	BPF_PROG_SEC("action",			BPF_PROG_TYPE_SCHED_ACT),
+	SEC_DEF("tracepoint/", TRACEPOINT,
+		.attach_fn = attach_tp),
+	SEC_DEF("tp/", TRACEPOINT,
+		.attach_fn = attach_tp),
+	SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
+		.attach_fn = attach_raw_tp),
+	SEC_DEF("raw_tp/", RAW_TRACEPOINT,
+		.attach_fn = attach_raw_tp),
+	SEC_DEF("tp_btf/", TRACING,
+		.expected_attach_type = BPF_TRACE_RAW_TP,
+		.is_attach_btf = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fentry/", TRACING,
+		.expected_attach_type = BPF_TRACE_FENTRY,
+		.is_attach_btf = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fmod_ret/", TRACING,
+		.expected_attach_type = BPF_MODIFY_RETURN,
+		.is_attach_btf = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fexit/", TRACING,
+		.expected_attach_type = BPF_TRACE_FEXIT,
+		.is_attach_btf = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fentry.s/", TRACING,
+		.expected_attach_type = BPF_TRACE_FENTRY,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fmod_ret.s/", TRACING,
+		.expected_attach_type = BPF_MODIFY_RETURN,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("fexit.s/", TRACING,
+		.expected_attach_type = BPF_TRACE_FEXIT,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("freplace/", EXT,
+		.is_attach_btf = true,
+		.attach_fn = attach_trace),
+	SEC_DEF("lsm/", LSM,
+		.is_attach_btf = true,
+		.expected_attach_type = BPF_LSM_MAC,
+		.attach_fn = attach_lsm),
+	SEC_DEF("lsm.s/", LSM,
+		.is_attach_btf = true,
+		.is_sleepable = true,
+		.expected_attach_type = BPF_LSM_MAC,
+		.attach_fn = attach_lsm),
+	SEC_DEF("iter/", TRACING,
+		.expected_attach_type = BPF_TRACE_ITER,
+		.is_attach_btf = true,
+		.attach_fn = attach_iter),
+	BPF_EAPROG_SEC("xdp_devmap/",		BPF_PROG_TYPE_XDP,
+						BPF_XDP_DEVMAP),
+	BPF_EAPROG_SEC("xdp_cpumap/",		BPF_PROG_TYPE_XDP,
+						BPF_XDP_CPUMAP),
+	BPF_APROG_SEC("xdp",			BPF_PROG_TYPE_XDP,
+						BPF_XDP),
+	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
+	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
+	BPF_PROG_SEC("lwt_out",			BPF_PROG_TYPE_LWT_OUT),
+	BPF_PROG_SEC("lwt_xmit",		BPF_PROG_TYPE_LWT_XMIT),
+	BPF_PROG_SEC("lwt_seg6local",		BPF_PROG_TYPE_LWT_SEG6LOCAL),
+	BPF_APROG_SEC("cgroup_skb/ingress",	BPF_PROG_TYPE_CGROUP_SKB,
+						BPF_CGROUP_INET_INGRESS),
+	BPF_APROG_SEC("cgroup_skb/egress",	BPF_PROG_TYPE_CGROUP_SKB,
+						BPF_CGROUP_INET_EGRESS),
+	BPF_APROG_COMPAT("cgroup/skb",		BPF_PROG_TYPE_CGROUP_SKB),
+	BPF_EAPROG_SEC("cgroup/sock_create",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET_SOCK_CREATE),
+	BPF_EAPROG_SEC("cgroup/sock_release",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET_SOCK_RELEASE),
+	BPF_APROG_SEC("cgroup/sock",		BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET_SOCK_CREATE),
+	BPF_EAPROG_SEC("cgroup/post_bind4",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET4_POST_BIND),
+	BPF_EAPROG_SEC("cgroup/post_bind6",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET6_POST_BIND),
+	BPF_APROG_SEC("cgroup/dev",		BPF_PROG_TYPE_CGROUP_DEVICE,
+						BPF_CGROUP_DEVICE),
+	BPF_APROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS,
+						BPF_CGROUP_SOCK_OPS),
+	BPF_APROG_SEC("sk_skb/stream_parser",	BPF_PROG_TYPE_SK_SKB,
+						BPF_SK_SKB_STREAM_PARSER),
+	BPF_APROG_SEC("sk_skb/stream_verdict",	BPF_PROG_TYPE_SK_SKB,
+						BPF_SK_SKB_STREAM_VERDICT),
+	BPF_APROG_COMPAT("sk_skb",		BPF_PROG_TYPE_SK_SKB),
+	BPF_APROG_SEC("sk_msg",			BPF_PROG_TYPE_SK_MSG,
+						BPF_SK_MSG_VERDICT),
+	BPF_APROG_SEC("lirc_mode2",		BPF_PROG_TYPE_LIRC_MODE2,
+						BPF_LIRC_MODE2),
+	BPF_APROG_SEC("flow_dissector",		BPF_PROG_TYPE_FLOW_DISSECTOR,
+						BPF_FLOW_DISSECTOR),
+	BPF_EAPROG_SEC("cgroup/bind4",		BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_BIND),
+	BPF_EAPROG_SEC("cgroup/bind6",		BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_BIND),
+	BPF_EAPROG_SEC("cgroup/connect4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_CONNECT),
+	BPF_EAPROG_SEC("cgroup/connect6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_CONNECT),
+	BPF_EAPROG_SEC("cgroup/sendmsg4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP4_SENDMSG),
+	BPF_EAPROG_SEC("cgroup/sendmsg6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP6_SENDMSG),
+	BPF_EAPROG_SEC("cgroup/recvmsg4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP4_RECVMSG),
+	BPF_EAPROG_SEC("cgroup/recvmsg6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP6_RECVMSG),
+	BPF_EAPROG_SEC("cgroup/getpeername4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_GETPEERNAME),
+	BPF_EAPROG_SEC("cgroup/getpeername6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_GETPEERNAME),
+	BPF_EAPROG_SEC("cgroup/getsockname4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_GETSOCKNAME),
+	BPF_EAPROG_SEC("cgroup/getsockname6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_GETSOCKNAME),
+	BPF_EAPROG_SEC("cgroup/sysctl",		BPF_PROG_TYPE_CGROUP_SYSCTL,
+						BPF_CGROUP_SYSCTL),
+	BPF_EAPROG_SEC("cgroup/getsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+						BPF_CGROUP_GETSOCKOPT),
+	BPF_EAPROG_SEC("cgroup/setsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+						BPF_CGROUP_SETSOCKOPT),
+	BPF_PROG_SEC("struct_ops",		BPF_PROG_TYPE_STRUCT_OPS),
+	BPF_EAPROG_SEC("sk_lookup/",		BPF_PROG_TYPE_SK_LOOKUP,
+						BPF_SK_LOOKUP),
+};
+
+#undef BPF_PROG_SEC_IMPL
+#undef BPF_PROG_SEC
+#undef BPF_APROG_SEC
+#undef BPF_EAPROG_SEC
+#undef BPF_APROG_COMPAT
+#undef SEC_DEF
+
+#define MAX_TYPE_NAME_SIZE 32
+
+static const struct bpf_sec_def *find_sec_def(const char *sec_name)
+{
+	int i, n = ARRAY_SIZE(section_defs);
+
+	for (i = 0; i < n; i++) {
+		if (strncmp(sec_name,
+			    section_defs[i].sec, section_defs[i].len))
+			continue;
+		return &section_defs[i];
+	}
+	return NULL;
+}
+
+static char *libbpf_get_type_names(bool attach_type)
+{
+	int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
+	char *buf;
+
+	buf = malloc(len);
+	if (!buf)
+		return NULL;
+
+	buf[0] = '\0';
+	/* Forge string buf with all available names */
+	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+		if (attach_type && !section_defs[i].is_attachable)
+			continue;
+
+		if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
+			free(buf);
+			return NULL;
+		}
+		strcat(buf, " ");
+		strcat(buf, section_defs[i].sec);
+	}
+
+	return buf;
+}
+
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+			     enum bpf_attach_type *expected_attach_type)
+{
+	const struct bpf_sec_def *sec_def;
+	char *type_names;
+
+	if (!name)
+		return -EINVAL;
+
+	sec_def = find_sec_def(name);
+	if (sec_def) {
+		*prog_type = sec_def->prog_type;
+		*expected_attach_type = sec_def->expected_attach_type;
+		return 0;
+	}
+
+	pr_debug("failed to guess program type from ELF section '%s'\n", name);
+	type_names = libbpf_get_type_names(false);
+	if (type_names != NULL) {
+		pr_debug("supported section(type) names are:%s\n", type_names);
+		free(type_names);
+	}
+
+	return -ESRCH;
+}
+
+static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
+						     size_t offset)
+{
+	struct bpf_map *map;
+	size_t i;
+
+	for (i = 0; i < obj->nr_maps; i++) {
+		map = &obj->maps[i];
+		if (!bpf_map__is_struct_ops(map))
+			continue;
+		if (map->sec_offset <= offset &&
+		    offset - map->sec_offset < map->def.value_size)
+			return map;
+	}
+
+	return NULL;
+}
+
+/* Collect the reloc from ELF and populate the st_ops->progs[] */
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+					    GElf_Shdr *shdr, Elf_Data *data)
+{
+	const struct btf_member *member;
+	struct bpf_struct_ops *st_ops;
+	struct bpf_program *prog;
+	unsigned int shdr_idx;
+	const struct btf *btf;
+	struct bpf_map *map;
+	Elf_Data *symbols;
+	unsigned int moff, insn_idx;
+	const char *name;
+	__u32 member_idx;
+	GElf_Sym sym;
+	GElf_Rel rel;
+	int i, nrels;
+
+	symbols = obj->efile.symbols;
+	btf = obj->btf;
+	nrels = shdr->sh_size / shdr->sh_entsize;
+	for (i = 0; i < nrels; i++) {
+		if (!gelf_getrel(data, i, &rel)) {
+			pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+
+		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+			pr_warn("struct_ops reloc: symbol %zx not found\n",
+				(size_t)GELF_R_SYM(rel.r_info));
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+
+		name = elf_sym_str(obj, sym.st_name) ?: "<?>";
+		map = find_struct_ops_map_by_offset(obj, rel.r_offset);
+		if (!map) {
+			pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
+				(size_t)rel.r_offset);
+			return -EINVAL;
+		}
+
+		moff = rel.r_offset - map->sec_offset;
+		shdr_idx = sym.st_shndx;
+		st_ops = map->st_ops;
+		pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
+			 map->name,
+			 (long long)(rel.r_info >> 32),
+			 (long long)sym.st_value,
+			 shdr_idx, (size_t)rel.r_offset,
+			 map->sec_offset, sym.st_name, name);
+
+		if (shdr_idx >= SHN_LORESERVE) {
+			pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
+				map->name, (size_t)rel.r_offset, shdr_idx);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		if (sym.st_value % BPF_INSN_SZ) {
+			pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
+				map->name, (unsigned long long)sym.st_value);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+		insn_idx = sym.st_value / BPF_INSN_SZ;
+
+		member = find_member_by_offset(st_ops->type, moff * 8);
+		if (!member) {
+			pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
+				map->name, moff);
+			return -EINVAL;
+		}
+		member_idx = member - btf_members(st_ops->type);
+		name = btf__name_by_offset(btf, member->name_off);
+
+		if (!resolve_func_ptr(btf, member->type, NULL)) {
+			pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
+				map->name, name);
+			return -EINVAL;
+		}
+
+		prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
+		if (!prog) {
+			pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
+				map->name, shdr_idx, name);
+			return -EINVAL;
+		}
+
+		if (prog->type == BPF_PROG_TYPE_UNSPEC) {
+			const struct bpf_sec_def *sec_def;
+
+			sec_def = find_sec_def(prog->sec_name);
+			if (sec_def &&
+			    sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
+				/* for pr_warn */
+				prog->type = sec_def->prog_type;
+				goto invalid_prog;
+			}
+
+			prog->type = BPF_PROG_TYPE_STRUCT_OPS;
+			prog->attach_btf_id = st_ops->type_id;
+			prog->expected_attach_type = member_idx;
+		} else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
+			   prog->attach_btf_id != st_ops->type_id ||
+			   prog->expected_attach_type != member_idx) {
+			goto invalid_prog;
+		}
+		st_ops->progs[member_idx] = prog;
+	}
+
+	return 0;
+
+invalid_prog:
+	pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
+		map->name, prog->name, prog->sec_name, prog->type,
+		prog->attach_btf_id, prog->expected_attach_type, name);
+	return -EINVAL;
+}
+
+#define BTF_TRACE_PREFIX "btf_trace_"
+#define BTF_LSM_PREFIX "bpf_lsm_"
+#define BTF_ITER_PREFIX "bpf_iter_"
+#define BTF_MAX_NAME_SIZE 128
+
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
+				   const char *name, __u32 kind)
+{
+	char btf_type_name[BTF_MAX_NAME_SIZE];
+	int ret;
+
+	ret = snprintf(btf_type_name, sizeof(btf_type_name),
+		       "%s%s", prefix, name);
+	/* snprintf returns the number of characters written excluding the
+	 * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+	 * indicates truncation.
+	 */
+	if (ret < 0 || ret >= sizeof(btf_type_name))
+		return -ENAMETOOLONG;
+	return btf__find_by_name_kind(btf, btf_type_name, kind);
+}
+
+static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
+					enum bpf_attach_type attach_type)
+{
+	int err;
+
+	if (attach_type == BPF_TRACE_RAW_TP)
+		err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
+					      BTF_KIND_TYPEDEF);
+	else if (attach_type == BPF_LSM_MAC)
+		err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
+					      BTF_KIND_FUNC);
+	else if (attach_type == BPF_TRACE_ITER)
+		err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
+					      BTF_KIND_FUNC);
+	else
+		err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+
+	if (err <= 0)
+		pr_warn("%s is not found in vmlinux BTF\n", name);
+
+	return err;
+}
+
+int libbpf_find_vmlinux_btf_id(const char *name,
+			       enum bpf_attach_type attach_type)
+{
+	struct btf *btf;
+	int err;
+
+	btf = libbpf_find_kernel_btf();
+	if (IS_ERR(btf)) {
+		pr_warn("vmlinux BTF is not found\n");
+		return -EINVAL;
+	}
+
+	err = __find_vmlinux_btf_id(btf, name, attach_type);
+	btf__free(btf);
+	return err;
+}
+
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+{
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_info *info;
+	struct btf *btf = NULL;
+	int err = -EINVAL;
+
+	info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
+	if (IS_ERR_OR_NULL(info_linear)) {
+		pr_warn("failed get_prog_info_linear for FD %d\n",
+			attach_prog_fd);
+		return -EINVAL;
+	}
+	info = &info_linear->info;
+	if (!info->btf_id) {
+		pr_warn("The target program doesn't have BTF\n");
+		goto out;
+	}
+	if (btf__get_from_id(info->btf_id, &btf)) {
+		pr_warn("Failed to get BTF of the program\n");
+		goto out;
+	}
+	err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+	btf__free(btf);
+	if (err <= 0) {
+		pr_warn("%s is not found in prog's BTF\n", name);
+		goto out;
+	}
+out:
+	free(info_linear);
+	return err;
+}
+
+static int libbpf_find_attach_btf_id(struct bpf_program *prog)
+{
+	enum bpf_attach_type attach_type = prog->expected_attach_type;
+	__u32 attach_prog_fd = prog->attach_prog_fd;
+	const char *name = prog->sec_name;
+	int i, err;
+
+	if (!name)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+		if (!section_defs[i].is_attach_btf)
+			continue;
+		if (strncmp(name, section_defs[i].sec, section_defs[i].len))
+			continue;
+		if (attach_prog_fd)
+			err = libbpf_find_prog_btf_id(name + section_defs[i].len,
+						      attach_prog_fd);
+		else
+			err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
+						    name + section_defs[i].len,
+						    attach_type);
+		return err;
+	}
+	pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
+	return -ESRCH;
+}
+
+int libbpf_attach_type_by_name(const char *name,
+			       enum bpf_attach_type *attach_type)
+{
+	char *type_names;
+	int i;
+
+	if (!name)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+		if (strncmp(name, section_defs[i].sec, section_defs[i].len))
+			continue;
+		if (!section_defs[i].is_attachable)
+			return -EINVAL;
+		*attach_type = section_defs[i].expected_attach_type;
+		return 0;
+	}
+	pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
+	type_names = libbpf_get_type_names(true);
+	if (type_names != NULL) {
+		pr_debug("attachable section(type) names are:%s\n", type_names);
+		free(type_names);
+	}
+
+	return -EINVAL;
+}
+
+int bpf_map__fd(const struct bpf_map *map)
+{
+	return map ? map->fd : -EINVAL;
+}
+
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
+{
+	return map ? &map->def : ERR_PTR(-EINVAL);
+}
+
+const char *bpf_map__name(const struct bpf_map *map)
+{
+	return map ? map->name : NULL;
+}
+
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
+{
+	return map->def.type;
+}
+
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.type = type;
+	return 0;
+}
+
+__u32 bpf_map__map_flags(const struct bpf_map *map)
+{
+	return map->def.map_flags;
+}
+
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.map_flags = flags;
+	return 0;
+}
+
+__u32 bpf_map__numa_node(const struct bpf_map *map)
+{
+	return map->numa_node;
+}
+
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->numa_node = numa_node;
+	return 0;
+}
+
+__u32 bpf_map__key_size(const struct bpf_map *map)
+{
+	return map->def.key_size;
+}
+
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.key_size = size;
+	return 0;
+}
+
+__u32 bpf_map__value_size(const struct bpf_map *map)
+{
+	return map->def.value_size;
+}
+
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.value_size = size;
+	return 0;
+}
+
+__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
+{
+	return map ? map->btf_key_type_id : 0;
+}
+
+__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
+{
+	return map ? map->btf_value_type_id : 0;
+}
+
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+		     bpf_map_clear_priv_t clear_priv)
+{
+	if (!map)
+		return -EINVAL;
+
+	if (map->priv) {
+		if (map->clear_priv)
+			map->clear_priv(map, map->priv);
+	}
+
+	map->priv = priv;
+	map->clear_priv = clear_priv;
+	return 0;
+}
+
+void *bpf_map__priv(const struct bpf_map *map)
+{
+	return map ? map->priv : ERR_PTR(-EINVAL);
+}
+
+int bpf_map__set_initial_value(struct bpf_map *map,
+			       const void *data, size_t size)
+{
+	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
+	    size != map->def.value_size || map->fd >= 0)
+		return -EINVAL;
+
+	memcpy(map->mmaped, data, size);
+	return 0;
+}
+
+bool bpf_map__is_offload_neutral(const struct bpf_map *map)
+{
+	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
+bool bpf_map__is_internal(const struct bpf_map *map)
+{
+	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
+}
+
+__u32 bpf_map__ifindex(const struct bpf_map *map)
+{
+	return map->map_ifindex;
+}
+
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->map_ifindex = ifindex;
+	return 0;
+}
+
+int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
+{
+	if (!bpf_map_type__is_map_in_map(map->def.type)) {
+		pr_warn("error: unsupported map type\n");
+		return -EINVAL;
+	}
+	if (map->inner_map_fd != -1) {
+		pr_warn("error: inner_map_fd already specified\n");
+		return -EINVAL;
+	}
+	map->inner_map_fd = fd;
+	return 0;
+}
+
+static struct bpf_map *
+__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
+{
+	ssize_t idx;
+	struct bpf_map *s, *e;
+
+	if (!obj || !obj->maps)
+		return NULL;
+
+	s = obj->maps;
+	e = obj->maps + obj->nr_maps;
+
+	if ((m < s) || (m >= e)) {
+		pr_warn("error in %s: map handler doesn't belong to object\n",
+			 __func__);
+		return NULL;
+	}
+
+	idx = (m - obj->maps) + i;
+	if (idx >= obj->nr_maps || idx < 0)
+		return NULL;
+	return &obj->maps[idx];
+}
+
+struct bpf_map *
+bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
+{
+	if (prev == NULL)
+		return obj->maps;
+
+	return __bpf_map__iter(prev, obj, 1);
+}
+
+struct bpf_map *
+bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
+{
+	if (next == NULL) {
+		if (!obj->nr_maps)
+			return NULL;
+		return obj->maps + obj->nr_maps - 1;
+	}
+
+	return __bpf_map__iter(next, obj, -1);
+}
+
+struct bpf_map *
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
+{
+	struct bpf_map *pos;
+
+	bpf_object__for_each_map(pos, obj) {
+		if (pos->name && !strcmp(pos->name, name))
+			return pos;
+	}
+	return NULL;
+}
+
+int
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
+{
+	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
+}
+
+struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
+{
+	return ERR_PTR(-ENOTSUP);
+}
+
+long libbpf_get_error(const void *ptr)
+{
+	return PTR_ERR_OR_ZERO(ptr);
+}
+
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+		  struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_prog_load_attr attr;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = file;
+	attr.prog_type = type;
+	attr.expected_attach_type = 0;
+
+	return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+			struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_object_open_attr open_attr = {};
+	struct bpf_program *prog, *first_prog = NULL;
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	int err;
+
+	if (!attr)
+		return -EINVAL;
+	if (!attr->file)
+		return -EINVAL;
+
+	open_attr.file = attr->file;
+	open_attr.prog_type = attr->prog_type;
+
+	obj = bpf_object__open_xattr(&open_attr);
+	if (IS_ERR_OR_NULL(obj))
+		return -ENOENT;
+
+	bpf_object__for_each_program(prog, obj) {
+		enum bpf_attach_type attach_type = attr->expected_attach_type;
+		/*
+		 * to preserve backwards compatibility, bpf_prog_load treats
+		 * attr->prog_type, if specified, as an override to whatever
+		 * bpf_object__open guessed
+		 */
+		if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
+			bpf_program__set_type(prog, attr->prog_type);
+			bpf_program__set_expected_attach_type(prog,
+							      attach_type);
+		}
+		if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
+			/*
+			 * we haven't guessed from section name and user
+			 * didn't provide a fallback type, too bad...
+			 */
+			bpf_object__close(obj);
+			return -EINVAL;
+		}
+
+		prog->prog_ifindex = attr->ifindex;
+		prog->log_level = attr->log_level;
+		prog->prog_flags |= attr->prog_flags;
+		if (!first_prog)
+			first_prog = prog;
+	}
+
+	bpf_object__for_each_map(map, obj) {
+		if (!bpf_map__is_offload_neutral(map))
+			map->map_ifindex = attr->ifindex;
+	}
+
+	if (!first_prog) {
+		pr_warn("object file doesn't contain bpf program\n");
+		bpf_object__close(obj);
+		return -ENOENT;
+	}
+
+	err = bpf_object__load(obj);
+	if (err) {
+		bpf_object__close(obj);
+		return err;
+	}
+
+	*pobj = obj;
+	*prog_fd = bpf_program__fd(first_prog);
+	return 0;
+}
+
+struct bpf_link {
+	int (*detach)(struct bpf_link *link);
+	int (*destroy)(struct bpf_link *link);
+	char *pin_path;		/* NULL, if not pinned */
+	int fd;			/* hook FD, -1 if not applicable */
+	bool disconnected;
+};
+
+/* Replace link's underlying BPF program with the new one */
+int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
+{
+	return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+}
+
+/* Release "ownership" of underlying BPF resource (typically, BPF program
+ * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
+ * link, when destructed through bpf_link__destroy() call won't attempt to
+ * detach/unregisted that BPF resource. This is useful in situations where,
+ * say, attached BPF program has to outlive userspace program that attached it
+ * in the system. Depending on type of BPF program, though, there might be
+ * additional steps (like pinning BPF program in BPF FS) necessary to ensure
+ * exit of userspace program doesn't trigger automatic detachment and clean up
+ * inside the kernel.
+ */
+void bpf_link__disconnect(struct bpf_link *link)
+{
+	link->disconnected = true;
+}
+
+int bpf_link__destroy(struct bpf_link *link)
+{
+	int err = 0;
+
+	if (IS_ERR_OR_NULL(link))
+		return 0;
+
+	if (!link->disconnected && link->detach)
+		err = link->detach(link);
+	if (link->destroy)
+		link->destroy(link);
+	if (link->pin_path)
+		free(link->pin_path);
+	free(link);
+
+	return err;
+}
+
+int bpf_link__fd(const struct bpf_link *link)
+{
+	return link->fd;
+}
+
+const char *bpf_link__pin_path(const struct bpf_link *link)
+{
+	return link->pin_path;
+}
+
+static int bpf_link__detach_fd(struct bpf_link *link)
+{
+	return close(link->fd);
+}
+
+struct bpf_link *bpf_link__open(const char *path)
+{
+	struct bpf_link *link;
+	int fd;
+
+	fd = bpf_obj_get(path);
+	if (fd < 0) {
+		fd = -errno;
+		pr_warn("failed to open link at %s: %d\n", path, fd);
+		return ERR_PTR(fd);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link) {
+		close(fd);
+		return ERR_PTR(-ENOMEM);
+	}
+	link->detach = &bpf_link__detach_fd;
+	link->fd = fd;
+
+	link->pin_path = strdup(path);
+	if (!link->pin_path) {
+		bpf_link__destroy(link);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return link;
+}
+
+int bpf_link__detach(struct bpf_link *link)
+{
+	return bpf_link_detach(link->fd) ? -errno : 0;
+}
+
+int bpf_link__pin(struct bpf_link *link, const char *path)
+{
+	int err;
+
+	if (link->pin_path)
+		return -EBUSY;
+	err = make_parent_dir(path);
+	if (err)
+		return err;
+	err = check_path(path);
+	if (err)
+		return err;
+
+	link->pin_path = strdup(path);
+	if (!link->pin_path)
+		return -ENOMEM;
+
+	if (bpf_obj_pin(link->fd, link->pin_path)) {
+		err = -errno;
+		zfree(&link->pin_path);
+		return err;
+	}
+
+	pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
+	return 0;
+}
+
+int bpf_link__unpin(struct bpf_link *link)
+{
+	int err;
+
+	if (!link->pin_path)
+		return -EINVAL;
+
+	err = unlink(link->pin_path);
+	if (err != 0)
+		return -errno;
+
+	pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
+	zfree(&link->pin_path);
+	return 0;
+}
+
+static int bpf_link__detach_perf_event(struct bpf_link *link)
+{
+	int err;
+
+	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
+	if (err)
+		err = -errno;
+
+	close(link->fd);
+	return err;
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
+						int pfd)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, err;
+
+	if (pfd < 0) {
+		pr_warn("prog '%s': invalid perf event FD %d\n",
+			prog->name, pfd);
+		return ERR_PTR(-EINVAL);
+	}
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
+			prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->detach = &bpf_link__detach_perf_event;
+	link->fd = pfd;
+
+	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+		err = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
+			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		if (err == -EPROTO)
+			pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+				prog->name, pfd);
+		return ERR_PTR(err);
+	}
+	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+		err = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return ERR_PTR(err);
+	}
+	return link;
+}
+
+/*
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
+ * given file using scanf format string fmt. If actual parsed value is
+ * negative, the result might be indistinguishable from error
+ */
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+	char buf[STRERR_BUFSIZE];
+	int err, ret;
+	FILE *f;
+
+	f = fopen(file, "r");
+	if (!f) {
+		err = -errno;
+		pr_debug("failed to open '%s': %s\n", file,
+			 libbpf_strerror_r(err, buf, sizeof(buf)));
+		return err;
+	}
+	err = fscanf(f, fmt, &ret);
+	if (err != 1) {
+		err = err == EOF ? -EIO : -errno;
+		pr_debug("failed to parse '%s': %s\n", file,
+			libbpf_strerror_r(err, buf, sizeof(buf)));
+		fclose(f);
+		return err;
+	}
+	fclose(f);
+	return ret;
+}
+
+static int determine_kprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_kprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
+				 uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	type = uprobe ? determine_uprobe_perf_type()
+		      : determine_kprobe_perf_type();
+	if (type < 0) {
+		pr_warn("failed to determine %s perf type: %s\n",
+			uprobe ? "uprobe" : "kprobe",
+			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+	if (retprobe) {
+		int bit = uprobe ? determine_uprobe_retprobe_bit()
+				 : determine_kprobe_retprobe_bit();
+
+		if (bit < 0) {
+			pr_warn("failed to determine %s retprobe bit: %s\n",
+				uprobe ? "uprobe" : "kprobe",
+				libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
+			return bit;
+		}
+		attr.config |= 1 << bit;
+	}
+	attr.size = sizeof(attr);
+	attr.type = type;
+	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
+	attr.config2 = offset;		 /* kprobe_addr or probe_offset */
+
+	/* pid filter is meaningful only for uprobes */
+	pfd = syscall(__NR_perf_event_open, &attr,
+		      pid < 0 ? -1 : pid /* pid */,
+		      pid == -1 ? 0 : -1 /* cpu */,
+		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warn("%s perf_event_open() failed: %s\n",
+			uprobe ? "uprobe" : "kprobe",
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+					    bool retprobe,
+					    const char *func_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+				    0 /* offset */, -1 /* pid */);
+	if (pfd < 0) {
+		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
+			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
+			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
+			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
+				      struct bpf_program *prog)
+{
+	const char *func_name;
+	bool retprobe;
+
+	func_name = prog->sec_name + sec->len;
+	retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+
+	return bpf_program__attach_kprobe(prog, retprobe, func_name);
+}
+
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+					    bool retprobe, pid_t pid,
+					    const char *binary_path,
+					    size_t func_offset)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(true /* uprobe */, retprobe,
+				    binary_path, func_offset, pid);
+	if (pfd < 0) {
+		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+			prog->name, retprobe ? "uretprobe" : "uprobe",
+			binary_path, func_offset,
+			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
+			prog->name, retprobe ? "uretprobe" : "uprobe",
+			binary_path, func_offset,
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+static int determine_tracepoint_id(const char *tp_category,
+				   const char *tp_name)
+{
+	char file[PATH_MAX];
+	int ret;
+
+	ret = snprintf(file, sizeof(file),
+		       "/sys/kernel/debug/tracing/events/%s/%s/id",
+		       tp_category, tp_name);
+	if (ret < 0)
+		return -errno;
+	if (ret >= sizeof(file)) {
+		pr_debug("tracepoint %s/%s path is too long\n",
+			 tp_category, tp_name);
+		return -E2BIG;
+	}
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int perf_event_open_tracepoint(const char *tp_category,
+				      const char *tp_name)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int tp_id, pfd, err;
+
+	tp_id = determine_tracepoint_id(tp_category, tp_name);
+	if (tp_id < 0) {
+		pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
+			tp_category, tp_name,
+			libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
+		return tp_id;
+	}
+
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.size = sizeof(attr);
+	attr.config = tp_id;
+
+	pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
+		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
+			tp_category, tp_name,
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+						const char *tp_category,
+						const char *tp_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_tracepoint(tp_category, tp_name);
+	if (pfd < 0) {
+		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+			prog->name, tp_category, tp_name,
+			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
+			prog->name, tp_category, tp_name,
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
+				  struct bpf_program *prog)
+{
+	char *sec_name, *tp_cat, *tp_name;
+	struct bpf_link *link;
+
+	sec_name = strdup(prog->sec_name);
+	if (!sec_name)
+		return ERR_PTR(-ENOMEM);
+
+	/* extract "tp/<category>/<name>" */
+	tp_cat = sec_name + sec->len;
+	tp_name = strchr(tp_cat, '/');
+	if (!tp_name) {
+		link = ERR_PTR(-EINVAL);
+		goto out;
+	}
+	*tp_name = '\0';
+	tp_name++;
+
+	link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
+out:
+	free(sec_name);
+	return link;
+}
+
+struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+						    const char *tp_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, pfd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->detach = &bpf_link__detach_fd;
+
+	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
+	if (pfd < 0) {
+		pfd = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
+			prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link->fd = pfd;
+	return link;
+}
+
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
+				      struct bpf_program *prog)
+{
+	const char *tp_name = prog->sec_name + sec->len;
+
+	return bpf_program__attach_raw_tracepoint(prog, tp_name);
+}
+
+/* Common logic for all BPF program types that attach to a btf_id */
+static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, pfd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->detach = &bpf_link__detach_fd;
+
+	pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
+	if (pfd < 0) {
+		pfd = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to attach: %s\n",
+			prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link->fd = pfd;
+	return (struct bpf_link *)link;
+}
+
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+{
+	return bpf_program__attach_btf_id(prog);
+}
+
+struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
+{
+	return bpf_program__attach_btf_id(prog);
+}
+
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
+				     struct bpf_program *prog)
+{
+	return bpf_program__attach_trace(prog);
+}
+
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+				   struct bpf_program *prog)
+{
+	return bpf_program__attach_lsm(prog);
+}
+
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+				    struct bpf_program *prog)
+{
+	return bpf_program__attach_iter(prog, NULL);
+}
+
+static struct bpf_link *
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
+		       const char *target_name)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+			    .target_btf_id = btf_id);
+	enum bpf_attach_type attach_type;
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, link_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->detach = &bpf_link__detach_fd;
+
+	attach_type = bpf_program__get_expected_attach_type(prog);
+	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
+	if (link_fd < 0) {
+		link_fd = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to attach to %s: %s\n",
+			prog->name, target_name,
+			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(link_fd);
+	}
+	link->fd = link_fd;
+	return link;
+}
+
+struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+	return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
+}
+
+struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
+{
+	return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
+}
+
+struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
+{
+	/* target_fd/target_ifindex use the same field in LINK_CREATE */
+	return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
+}
+
+struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
+					      int target_fd,
+					      const char *attach_func_name)
+{
+	int btf_id;
+
+	if (!!target_fd != !!attach_func_name) {
+		pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
+			prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (prog->type != BPF_PROG_TYPE_EXT) {
+		pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
+			prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (target_fd) {
+		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
+		if (btf_id < 0)
+			return ERR_PTR(btf_id);
+
+		return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
+	} else {
+		/* no target, so use raw_tracepoint_open for compatibility
+		 * with old kernels
+		 */
+		return bpf_program__attach_trace(prog);
+	}
+}
+
+struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+			 const struct bpf_iter_attach_opts *opts)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, link_fd;
+	__u32 target_fd = 0;
+
+	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
+		return ERR_PTR(-EINVAL);
+
+	link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
+	link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->detach = &bpf_link__detach_fd;
+
+	link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
+				  &link_create_opts);
+	if (link_fd < 0) {
+		link_fd = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to attach to iterator: %s\n",
+			prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(link_fd);
+	}
+	link->fd = link_fd;
+	return link;
+}
+
+struct bpf_link *bpf_program__attach(struct bpf_program *prog)
+{
+	const struct bpf_sec_def *sec_def;
+
+	sec_def = find_sec_def(prog->sec_name);
+	if (!sec_def || !sec_def->attach_fn)
+		return ERR_PTR(-ESRCH);
+
+	return sec_def->attach_fn(sec_def, prog);
+}
+
+static int bpf_link__detach_struct_ops(struct bpf_link *link)
+{
+	__u32 zero = 0;
+
+	if (bpf_map_delete_elem(link->fd, &zero))
+		return -errno;
+
+	return 0;
+}
+
+struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
+{
+	struct bpf_struct_ops *st_ops;
+	struct bpf_link *link;
+	__u32 i, zero = 0;
+	int err;
+
+	if (!bpf_map__is_struct_ops(map) || map->fd == -1)
+		return ERR_PTR(-EINVAL);
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-EINVAL);
+
+	st_ops = map->st_ops;
+	for (i = 0; i < btf_vlen(st_ops->type); i++) {
+		struct bpf_program *prog = st_ops->progs[i];
+		void *kern_data;
+		int prog_fd;
+
+		if (!prog)
+			continue;
+
+		prog_fd = bpf_program__fd(prog);
+		kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
+		*(unsigned long *)kern_data = prog_fd;
+	}
+
+	err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
+	if (err) {
+		err = -errno;
+		free(link);
+		return ERR_PTR(err);
+	}
+
+	link->detach = bpf_link__detach_struct_ops;
+	link->fd = map->fd;
+
+	return link;
+}
+
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+			   void **copy_mem, size_t *copy_size,
+			   bpf_perf_event_print_t fn, void *private_data)
+{
+	struct perf_event_mmap_page *header = mmap_mem;
+	__u64 data_head = ring_buffer_read_head(header);
+	__u64 data_tail = header->data_tail;
+	void *base = ((__u8 *)header) + page_size;
+	int ret = LIBBPF_PERF_EVENT_CONT;
+	struct perf_event_header *ehdr;
+	size_t ehdr_size;
+
+	while (data_head != data_tail) {
+		ehdr = base + (data_tail & (mmap_size - 1));
+		ehdr_size = ehdr->size;
+
+		if (((void *)ehdr) + ehdr_size > base + mmap_size) {
+			void *copy_start = ehdr;
+			size_t len_first = base + mmap_size - copy_start;
+			size_t len_secnd = ehdr_size - len_first;
+
+			if (*copy_size < ehdr_size) {
+				free(*copy_mem);
+				*copy_mem = malloc(ehdr_size);
+				if (!*copy_mem) {
+					*copy_size = 0;
+					ret = LIBBPF_PERF_EVENT_ERROR;
+					break;
+				}
+				*copy_size = ehdr_size;
+			}
+
+			memcpy(*copy_mem, copy_start, len_first);
+			memcpy(*copy_mem + len_first, base, len_secnd);
+			ehdr = *copy_mem;
+		}
+
+		ret = fn(ehdr, private_data);
+		data_tail += ehdr_size;
+		if (ret != LIBBPF_PERF_EVENT_CONT)
+			break;
+	}
+
+	ring_buffer_write_tail(header, data_tail);
+	return ret;
+}
+
+struct perf_buffer;
+
+struct perf_buffer_params {
+	struct perf_event_attr *attr;
+	/* if event_cb is specified, it takes precendence */
+	perf_buffer_event_fn event_cb;
+	/* sample_cb and lost_cb are higher-level common-case callbacks */
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx;
+	int cpu_cnt;
+	int *cpus;
+	int *map_keys;
+};
+
+struct perf_cpu_buf {
+	struct perf_buffer *pb;
+	void *base; /* mmap()'ed memory */
+	void *buf; /* for reconstructing segmented data */
+	size_t buf_size;
+	int fd;
+	int cpu;
+	int map_key;
+};
+
+struct perf_buffer {
+	perf_buffer_event_fn event_cb;
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx; /* passed into callbacks */
+
+	size_t page_size;
+	size_t mmap_size;
+	struct perf_cpu_buf **cpu_bufs;
+	struct epoll_event *events;
+	int cpu_cnt; /* number of allocated CPU buffers */
+	int epoll_fd; /* perf event FD */
+	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
+				      struct perf_cpu_buf *cpu_buf)
+{
+	if (!cpu_buf)
+		return;
+	if (cpu_buf->base &&
+	    munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
+		pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
+	if (cpu_buf->fd >= 0) {
+		ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
+		close(cpu_buf->fd);
+	}
+	free(cpu_buf->buf);
+	free(cpu_buf);
+}
+
+void perf_buffer__free(struct perf_buffer *pb)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(pb))
+		return;
+	if (pb->cpu_bufs) {
+		for (i = 0; i < pb->cpu_cnt; i++) {
+			struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+			if (!cpu_buf)
+				continue;
+
+			bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
+			perf_buffer__free_cpu_buf(pb, cpu_buf);
+		}
+		free(pb->cpu_bufs);
+	}
+	if (pb->epoll_fd >= 0)
+		close(pb->epoll_fd);
+	free(pb->events);
+	free(pb);
+}
+
+static struct perf_cpu_buf *
+perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
+			  int cpu, int map_key)
+{
+	struct perf_cpu_buf *cpu_buf;
+	char msg[STRERR_BUFSIZE];
+	int err;
+
+	cpu_buf = calloc(1, sizeof(*cpu_buf));
+	if (!cpu_buf)
+		return ERR_PTR(-ENOMEM);
+
+	cpu_buf->pb = pb;
+	cpu_buf->cpu = cpu;
+	cpu_buf->map_key = map_key;
+
+	cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
+			      -1, PERF_FLAG_FD_CLOEXEC);
+	if (cpu_buf->fd < 0) {
+		err = -errno;
+		pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
+			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
+			     PROT_READ | PROT_WRITE, MAP_SHARED,
+			     cpu_buf->fd, 0);
+	if (cpu_buf->base == MAP_FAILED) {
+		cpu_buf->base = NULL;
+		err = -errno;
+		pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
+			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+		err = -errno;
+		pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
+			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	return cpu_buf;
+
+error:
+	perf_buffer__free_cpu_buf(pb, cpu_buf);
+	return (struct perf_cpu_buf *)ERR_PTR(err);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+					      struct perf_buffer_params *p);
+
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+				     const struct perf_buffer_opts *opts)
+{
+	struct perf_buffer_params p = {};
+	struct perf_event_attr attr = { 0, };
+
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	p.attr = &attr;
+	p.sample_cb = opts ? opts->sample_cb : NULL;
+	p.lost_cb = opts ? opts->lost_cb : NULL;
+	p.ctx = opts ? opts->ctx : NULL;
+
+	return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+		     const struct perf_buffer_raw_opts *opts)
+{
+	struct perf_buffer_params p = {};
+
+	p.attr = opts->attr;
+	p.event_cb = opts->event_cb;
+	p.ctx = opts->ctx;
+	p.cpu_cnt = opts->cpu_cnt;
+	p.cpus = opts->cpus;
+	p.map_keys = opts->map_keys;
+
+	return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+					      struct perf_buffer_params *p)
+{
+	const char *online_cpus_file = "/sys/devices/system/cpu/online";
+	struct bpf_map_info map;
+	char msg[STRERR_BUFSIZE];
+	struct perf_buffer *pb;
+	bool *online = NULL;
+	__u32 map_info_len;
+	int err, i, j, n;
+
+	if (page_cnt & (page_cnt - 1)) {
+		pr_warn("page count should be power of two, but is %zu\n",
+			page_cnt);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* best-effort sanity checks */
+	memset(&map, 0, sizeof(map));
+	map_info_len = sizeof(map);
+	err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
+	if (err) {
+		err = -errno;
+		/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
+		 * -EBADFD, -EFAULT, or -E2BIG on real error
+		 */
+		if (err != -EINVAL) {
+			pr_warn("failed to get map info for map FD %d: %s\n",
+				map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+			return ERR_PTR(err);
+		}
+		pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
+			 map_fd);
+	} else {
+		if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+			pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+				map.name);
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	pb = calloc(1, sizeof(*pb));
+	if (!pb)
+		return ERR_PTR(-ENOMEM);
+
+	pb->event_cb = p->event_cb;
+	pb->sample_cb = p->sample_cb;
+	pb->lost_cb = p->lost_cb;
+	pb->ctx = p->ctx;
+
+	pb->page_size = getpagesize();
+	pb->mmap_size = pb->page_size * page_cnt;
+	pb->map_fd = map_fd;
+
+	pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (pb->epoll_fd < 0) {
+		err = -errno;
+		pr_warn("failed to create epoll instance: %s\n",
+			libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	if (p->cpu_cnt > 0) {
+		pb->cpu_cnt = p->cpu_cnt;
+	} else {
+		pb->cpu_cnt = libbpf_num_possible_cpus();
+		if (pb->cpu_cnt < 0) {
+			err = pb->cpu_cnt;
+			goto error;
+		}
+		if (map.max_entries && map.max_entries < pb->cpu_cnt)
+			pb->cpu_cnt = map.max_entries;
+	}
+
+	pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
+	if (!pb->events) {
+		err = -ENOMEM;
+		pr_warn("failed to allocate events: out of memory\n");
+		goto error;
+	}
+	pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
+	if (!pb->cpu_bufs) {
+		err = -ENOMEM;
+		pr_warn("failed to allocate buffers: out of memory\n");
+		goto error;
+	}
+
+	err = parse_cpu_mask_file(online_cpus_file, &online, &n);
+	if (err) {
+		pr_warn("failed to get online CPU mask: %d\n", err);
+		goto error;
+	}
+
+	for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
+		struct perf_cpu_buf *cpu_buf;
+		int cpu, map_key;
+
+		cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
+		map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
+
+		/* in case user didn't explicitly requested particular CPUs to
+		 * be attached to, skip offline/not present CPUs
+		 */
+		if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
+			continue;
+
+		cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
+		if (IS_ERR(cpu_buf)) {
+			err = PTR_ERR(cpu_buf);
+			goto error;
+		}
+
+		pb->cpu_bufs[j] = cpu_buf;
+
+		err = bpf_map_update_elem(pb->map_fd, &map_key,
+					  &cpu_buf->fd, 0);
+		if (err) {
+			err = -errno;
+			pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
+				cpu, map_key, cpu_buf->fd,
+				libbpf_strerror_r(err, msg, sizeof(msg)));
+			goto error;
+		}
+
+		pb->events[j].events = EPOLLIN;
+		pb->events[j].data.ptr = cpu_buf;
+		if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
+			      &pb->events[j]) < 0) {
+			err = -errno;
+			pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
+				cpu, cpu_buf->fd,
+				libbpf_strerror_r(err, msg, sizeof(msg)));
+			goto error;
+		}
+		j++;
+	}
+	pb->cpu_cnt = j;
+	free(online);
+
+	return pb;
+
+error:
+	free(online);
+	if (pb)
+		perf_buffer__free(pb);
+	return ERR_PTR(err);
+}
+
+struct perf_sample_raw {
+	struct perf_event_header header;
+	uint32_t size;
+	char data[];
+};
+
+struct perf_sample_lost {
+	struct perf_event_header header;
+	uint64_t id;
+	uint64_t lost;
+	uint64_t sample_id;
+};
+
+static enum bpf_perf_event_ret
+perf_buffer__process_record(struct perf_event_header *e, void *ctx)
+{
+	struct perf_cpu_buf *cpu_buf = ctx;
+	struct perf_buffer *pb = cpu_buf->pb;
+	void *data = e;
+
+	/* user wants full control over parsing perf event */
+	if (pb->event_cb)
+		return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
+
+	switch (e->type) {
+	case PERF_RECORD_SAMPLE: {
+		struct perf_sample_raw *s = data;
+
+		if (pb->sample_cb)
+			pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
+		break;
+	}
+	case PERF_RECORD_LOST: {
+		struct perf_sample_lost *s = data;
+
+		if (pb->lost_cb)
+			pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
+		break;
+	}
+	default:
+		pr_warn("unknown perf sample type %d\n", e->type);
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static int perf_buffer__process_records(struct perf_buffer *pb,
+					struct perf_cpu_buf *cpu_buf)
+{
+	enum bpf_perf_event_ret ret;
+
+	ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+					 pb->page_size, &cpu_buf->buf,
+					 &cpu_buf->buf_size,
+					 perf_buffer__process_record, cpu_buf);
+	if (ret != LIBBPF_PERF_EVENT_CONT)
+		return ret;
+	return 0;
+}
+
+int perf_buffer__epoll_fd(const struct perf_buffer *pb)
+{
+	return pb->epoll_fd;
+}
+
+int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
+{
+	int i, cnt, err;
+
+	cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+	for (i = 0; i < cnt; i++) {
+		struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
+
+		err = perf_buffer__process_records(pb, cpu_buf);
+		if (err) {
+			pr_warn("error while processing records: %d\n", err);
+			return err;
+		}
+	}
+	return cnt < 0 ? -errno : cnt;
+}
+
+/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
+ * manager.
+ */
+size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
+{
+	return pb->cpu_cnt;
+}
+
+/*
+ * Return perf_event FD of a ring buffer in *buf_idx* slot of
+ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
+ * select()/poll()/epoll() Linux syscalls.
+ */
+int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
+{
+	struct perf_cpu_buf *cpu_buf;
+
+	if (buf_idx >= pb->cpu_cnt)
+		return -EINVAL;
+
+	cpu_buf = pb->cpu_bufs[buf_idx];
+	if (!cpu_buf)
+		return -ENOENT;
+
+	return cpu_buf->fd;
+}
+
+/*
+ * Consume data from perf ring buffer corresponding to slot *buf_idx* in
+ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
+ * consume, do nothing and return success.
+ * Returns:
+ *   - 0 on success;
+ *   - <0 on failure.
+ */
+int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
+{
+	struct perf_cpu_buf *cpu_buf;
+
+	if (buf_idx >= pb->cpu_cnt)
+		return -EINVAL;
+
+	cpu_buf = pb->cpu_bufs[buf_idx];
+	if (!cpu_buf)
+		return -ENOENT;
+
+	return perf_buffer__process_records(pb, cpu_buf);
+}
+
+int perf_buffer__consume(struct perf_buffer *pb)
+{
+	int i, err;
+
+	for (i = 0; i < pb->cpu_cnt; i++) {
+		struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+		if (!cpu_buf)
+			continue;
+
+		err = perf_buffer__process_records(pb, cpu_buf);
+		if (err) {
+			pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+struct bpf_prog_info_array_desc {
+	int	array_offset;	/* e.g. offset of jited_prog_insns */
+	int	count_offset;	/* e.g. offset of jited_prog_len */
+	int	size_offset;	/* > 0: offset of rec size,
+				 * < 0: fix size of -size_offset
+				 */
+};
+
+static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
+	[BPF_PROG_INFO_JITED_INSNS] = {
+		offsetof(struct bpf_prog_info, jited_prog_insns),
+		offsetof(struct bpf_prog_info, jited_prog_len),
+		-1,
+	},
+	[BPF_PROG_INFO_XLATED_INSNS] = {
+		offsetof(struct bpf_prog_info, xlated_prog_insns),
+		offsetof(struct bpf_prog_info, xlated_prog_len),
+		-1,
+	},
+	[BPF_PROG_INFO_MAP_IDS] = {
+		offsetof(struct bpf_prog_info, map_ids),
+		offsetof(struct bpf_prog_info, nr_map_ids),
+		-(int)sizeof(__u32),
+	},
+	[BPF_PROG_INFO_JITED_KSYMS] = {
+		offsetof(struct bpf_prog_info, jited_ksyms),
+		offsetof(struct bpf_prog_info, nr_jited_ksyms),
+		-(int)sizeof(__u64),
+	},
+	[BPF_PROG_INFO_JITED_FUNC_LENS] = {
+		offsetof(struct bpf_prog_info, jited_func_lens),
+		offsetof(struct bpf_prog_info, nr_jited_func_lens),
+		-(int)sizeof(__u32),
+	},
+	[BPF_PROG_INFO_FUNC_INFO] = {
+		offsetof(struct bpf_prog_info, func_info),
+		offsetof(struct bpf_prog_info, nr_func_info),
+		offsetof(struct bpf_prog_info, func_info_rec_size),
+	},
+	[BPF_PROG_INFO_LINE_INFO] = {
+		offsetof(struct bpf_prog_info, line_info),
+		offsetof(struct bpf_prog_info, nr_line_info),
+		offsetof(struct bpf_prog_info, line_info_rec_size),
+	},
+	[BPF_PROG_INFO_JITED_LINE_INFO] = {
+		offsetof(struct bpf_prog_info, jited_line_info),
+		offsetof(struct bpf_prog_info, nr_jited_line_info),
+		offsetof(struct bpf_prog_info, jited_line_info_rec_size),
+	},
+	[BPF_PROG_INFO_PROG_TAGS] = {
+		offsetof(struct bpf_prog_info, prog_tags),
+		offsetof(struct bpf_prog_info, nr_prog_tags),
+		-(int)sizeof(__u8) * BPF_TAG_SIZE,
+	},
+
+};
+
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
+					   int offset)
+{
+	__u32 *array = (__u32 *)info;
+
+	if (offset >= 0)
+		return array[offset / sizeof(__u32)];
+	return -(int)offset;
+}
+
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
+					   int offset)
+{
+	__u64 *array = (__u64 *)info;
+
+	if (offset >= 0)
+		return array[offset / sizeof(__u64)];
+	return -(int)offset;
+}
+
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
+					 __u32 val)
+{
+	__u32 *array = (__u32 *)info;
+
+	if (offset >= 0)
+		array[offset / sizeof(__u32)] = val;
+}
+
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
+					 __u64 val)
+{
+	__u64 *array = (__u64 *)info;
+
+	if (offset >= 0)
+		array[offset / sizeof(__u64)] = val;
+}
+
+struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays)
+{
+	struct bpf_prog_info_linear *info_linear;
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	__u32 data_len = 0;
+	int i, err;
+	void *ptr;
+
+	if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
+		return ERR_PTR(-EINVAL);
+
+	/* step 1: get array dimensions */
+	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	if (err) {
+		pr_debug("can't get prog info: %s", strerror(errno));
+		return ERR_PTR(-EFAULT);
+	}
+
+	/* step 2: calculate total size of all arrays */
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		bool include_array = (arrays & (1UL << i)) > 0;
+		struct bpf_prog_info_array_desc *desc;
+		__u32 count, size;
+
+		desc = bpf_prog_info_array_desc + i;
+
+		/* kernel is too old to support this field */
+		if (info_len < desc->array_offset + sizeof(__u32) ||
+		    info_len < desc->count_offset + sizeof(__u32) ||
+		    (desc->size_offset > 0 && info_len < desc->size_offset))
+			include_array = false;
+
+		if (!include_array) {
+			arrays &= ~(1UL << i);	/* clear the bit */
+			continue;
+		}
+
+		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+
+		data_len += count * size;
+	}
+
+	/* step 3: allocate continuous memory */
+	data_len = roundup(data_len, sizeof(__u64));
+	info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
+	if (!info_linear)
+		return ERR_PTR(-ENOMEM);
+
+	/* step 4: fill data to info_linear->info */
+	info_linear->arrays = arrays;
+	memset(&info_linear->info, 0, sizeof(info));
+	ptr = info_linear->data;
+
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u32 count, size;
+
+		if ((arrays & (1UL << i)) == 0)
+			continue;
+
+		desc  = bpf_prog_info_array_desc + i;
+		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+		bpf_prog_info_set_offset_u32(&info_linear->info,
+					     desc->count_offset, count);
+		bpf_prog_info_set_offset_u32(&info_linear->info,
+					     desc->size_offset, size);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset,
+					     ptr_to_u64(ptr));
+		ptr += count * size;
+	}
+
+	/* step 5: call syscall again to get required arrays */
+	err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
+	if (err) {
+		pr_debug("can't get prog info: %s", strerror(errno));
+		free(info_linear);
+		return ERR_PTR(-EFAULT);
+	}
+
+	/* step 6: verify the data */
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u32 v1, v2;
+
+		if ((arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpf_prog_info_array_desc + i;
+		v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+						   desc->count_offset);
+		if (v1 != v2)
+			pr_warn("%s: mismatch in element count\n", __func__);
+
+		v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+						   desc->size_offset);
+		if (v1 != v2)
+			pr_warn("%s: mismatch in rec size\n", __func__);
+	}
+
+	/* step 7: update info_len and data_len */
+	info_linear->info_len = sizeof(struct bpf_prog_info);
+	info_linear->data_len = data_len;
+
+	return info_linear;
+}
+
+void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
+{
+	int i;
+
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u64 addr, offs;
+
+		if ((info_linear->arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpf_prog_info_array_desc + i;
+		addr = bpf_prog_info_read_offset_u64(&info_linear->info,
+						     desc->array_offset);
+		offs = addr - ptr_to_u64(info_linear->data);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset, offs);
+	}
+}
+
+void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
+{
+	int i;
+
+	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+		struct bpf_prog_info_array_desc *desc;
+		__u64 addr, offs;
+
+		if ((info_linear->arrays & (1UL << i)) == 0)
+			continue;
+
+		desc = bpf_prog_info_array_desc + i;
+		offs = bpf_prog_info_read_offset_u64(&info_linear->info,
+						     desc->array_offset);
+		addr = offs + ptr_to_u64(info_linear->data);
+		bpf_prog_info_set_offset_u64(&info_linear->info,
+					     desc->array_offset, addr);
+	}
+}
+
+int bpf_program__set_attach_target(struct bpf_program *prog,
+				   int attach_prog_fd,
+				   const char *attach_func_name)
+{
+	int btf_id;
+
+	if (!prog || attach_prog_fd < 0 || !attach_func_name)
+		return -EINVAL;
+
+	if (attach_prog_fd)
+		btf_id = libbpf_find_prog_btf_id(attach_func_name,
+						 attach_prog_fd);
+	else
+		btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
+						    prog->expected_attach_type);
+
+	if (btf_id < 0)
+		return btf_id;
+
+	prog->attach_btf_id = btf_id;
+	prog->attach_prog_fd = attach_prog_fd;
+	return 0;
+}
+
+int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
+{
+	int err = 0, n, len, start, end = -1;
+	bool *tmp;
+
+	*mask = NULL;
+	*mask_sz = 0;
+
+	/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
+	while (*s) {
+		if (*s == ',' || *s == '\n') {
+			s++;
+			continue;
+		}
+		n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
+		if (n <= 0 || n > 2) {
+			pr_warn("Failed to get CPU range %s: %d\n", s, n);
+			err = -EINVAL;
+			goto cleanup;
+		} else if (n == 1) {
+			end = start;
+		}
+		if (start < 0 || start > end) {
+			pr_warn("Invalid CPU range [%d,%d] in %s\n",
+				start, end, s);
+			err = -EINVAL;
+			goto cleanup;
+		}
+		tmp = realloc(*mask, end + 1);
+		if (!tmp) {
+			err = -ENOMEM;
+			goto cleanup;
+		}
+		*mask = tmp;
+		memset(tmp + *mask_sz, 0, start - *mask_sz);
+		memset(tmp + start, 1, end - start + 1);
+		*mask_sz = end + 1;
+		s += len;
+	}
+	if (!*mask_sz) {
+		pr_warn("Empty CPU range\n");
+		return -EINVAL;
+	}
+	return 0;
+cleanup:
+	free(*mask);
+	*mask = NULL;
+	return err;
+}
+
+int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
+{
+	int fd, err = 0, len;
+	char buf[128];
+
+	fd = open(fcpu, O_RDONLY);
+	if (fd < 0) {
+		err = -errno;
+		pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
+		return err;
+	}
+	len = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (len <= 0) {
+		err = len ? -errno : -EINVAL;
+		pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
+		return err;
+	}
+	if (len >= sizeof(buf)) {
+		pr_warn("CPU mask is too big in file %s\n", fcpu);
+		return -E2BIG;
+	}
+	buf[len] = '\0';
+
+	return parse_cpu_mask_str(buf, mask, mask_sz);
+}
+
+int libbpf_num_possible_cpus(void)
+{
+	static const char *fcpu = "/sys/devices/system/cpu/possible";
+	static int cpus;
+	int err, n, i, tmp_cpus;
+	bool *mask;
+
+	tmp_cpus = READ_ONCE(cpus);
+	if (tmp_cpus > 0)
+		return tmp_cpus;
+
+	err = parse_cpu_mask_file(fcpu, &mask, &n);
+	if (err)
+		return err;
+
+	tmp_cpus = 0;
+	for (i = 0; i < n; i++) {
+		if (mask[i])
+			tmp_cpus++;
+	}
+	free(mask);
+
+	WRITE_ONCE(cpus, tmp_cpus);
+	return tmp_cpus;
+}
+
+int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
+			      const struct bpf_object_open_opts *opts)
+{
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
+		.object_name = s->name,
+	);
+	struct bpf_object *obj;
+	int i;
+
+	/* Attempt to preserve opts->object_name, unless overriden by user
+	 * explicitly. Overwriting object name for skeletons is discouraged,
+	 * as it breaks global data maps, because they contain object name
+	 * prefix as their own map name prefix. When skeleton is generated,
+	 * bpftool is making an assumption that this name will stay the same.
+	 */
+	if (opts) {
+		memcpy(&skel_opts, opts, sizeof(*opts));
+		if (!opts->object_name)
+			skel_opts.object_name = s->name;
+	}
+
+	obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
+	if (IS_ERR(obj)) {
+		pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
+			s->name, PTR_ERR(obj));
+		return PTR_ERR(obj);
+	}
+
+	*s->obj = obj;
+
+	for (i = 0; i < s->map_cnt; i++) {
+		struct bpf_map **map = s->maps[i].map;
+		const char *name = s->maps[i].name;
+		void **mmaped = s->maps[i].mmaped;
+
+		*map = bpf_object__find_map_by_name(obj, name);
+		if (!*map) {
+			pr_warn("failed to find skeleton map '%s'\n", name);
+			return -ESRCH;
+		}
+
+		/* externs shouldn't be pre-setup from user code */
+		if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
+			*mmaped = (*map)->mmaped;
+	}
+
+	for (i = 0; i < s->prog_cnt; i++) {
+		struct bpf_program **prog = s->progs[i].prog;
+		const char *name = s->progs[i].name;
+
+		*prog = bpf_object__find_program_by_name(obj, name);
+		if (!*prog) {
+			pr_warn("failed to find skeleton program '%s'\n", name);
+			return -ESRCH;
+		}
+	}
+
+	return 0;
+}
+
+int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
+{
+	int i, err;
+
+	err = bpf_object__load(*s->obj);
+	if (err) {
+		pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
+		return err;
+	}
+
+	for (i = 0; i < s->map_cnt; i++) {
+		struct bpf_map *map = *s->maps[i].map;
+		size_t mmap_sz = bpf_map_mmap_sz(map);
+		int prot, map_fd = bpf_map__fd(map);
+		void **mmaped = s->maps[i].mmaped;
+
+		if (!mmaped)
+			continue;
+
+		if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
+			*mmaped = NULL;
+			continue;
+		}
+
+		if (map->def.map_flags & BPF_F_RDONLY_PROG)
+			prot = PROT_READ;
+		else
+			prot = PROT_READ | PROT_WRITE;
+
+		/* Remap anonymous mmap()-ed "map initialization image" as
+		 * a BPF map-backed mmap()-ed memory, but preserving the same
+		 * memory address. This will cause kernel to change process'
+		 * page table to point to a different piece of kernel memory,
+		 * but from userspace point of view memory address (and its
+		 * contents, being identical at this point) will stay the
+		 * same. This mapping will be released by bpf_object__close()
+		 * as per normal clean up procedure, so we don't need to worry
+		 * about it from skeleton's clean up perspective.
+		 */
+		*mmaped = mmap(map->mmaped, mmap_sz, prot,
+				MAP_SHARED | MAP_FIXED, map_fd, 0);
+		if (*mmaped == MAP_FAILED) {
+			err = -errno;
+			*mmaped = NULL;
+			pr_warn("failed to re-mmap() map '%s': %d\n",
+				 bpf_map__name(map), err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
+{
+	int i;
+
+	for (i = 0; i < s->prog_cnt; i++) {
+		struct bpf_program *prog = *s->progs[i].prog;
+		struct bpf_link **link = s->progs[i].link;
+		const struct bpf_sec_def *sec_def;
+
+		if (!prog->load)
+			continue;
+
+		sec_def = find_sec_def(prog->sec_name);
+		if (!sec_def || !sec_def->attach_fn)
+			continue;
+
+		*link = sec_def->attach_fn(sec_def, prog);
+		if (IS_ERR(*link)) {
+			pr_warn("failed to auto-attach program '%s': %ld\n",
+				bpf_program__name(prog), PTR_ERR(*link));
+			return PTR_ERR(*link);
+		}
+	}
+
+	return 0;
+}
+
+void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
+{
+	int i;
+
+	for (i = 0; i < s->prog_cnt; i++) {
+		struct bpf_link **link = s->progs[i].link;
+
+		bpf_link__destroy(*link);
+		*link = NULL;
+	}
+}
+
+void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
+{
+	if (!s)
+		return;
+
+	if (s->progs)
+		bpf_object__detach_skeleton(s);
+	if (s->obj)
+		bpf_object__close(*s->obj);
+	free(s->maps);
+	free(s->progs);
+	free(s);
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
new file mode 100644
index 000000000..57d10b779
--- /dev/null
+++ b/tools/lib/bpf/libbpf.h
@@ -0,0 +1,766 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+#ifndef __LIBBPF_LIBBPF_H
+#define __LIBBPF_LIBBPF_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/types.h>  // for size_t
+#include <linux/bpf.h>
+
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum libbpf_errno {
+	__LIBBPF_ERRNO__START = 4000,
+
+	/* Something wrong in libelf */
+	LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START,
+	LIBBPF_ERRNO__FORMAT,	/* BPF object format invalid */
+	LIBBPF_ERRNO__KVERSION,	/* Incorrect or no 'version' section */
+	LIBBPF_ERRNO__ENDIAN,	/* Endian mismatch */
+	LIBBPF_ERRNO__INTERNAL,	/* Internal error in libbpf */
+	LIBBPF_ERRNO__RELOC,	/* Relocation failed */
+	LIBBPF_ERRNO__LOAD,	/* Load program failure for unknown reason */
+	LIBBPF_ERRNO__VERIFY,	/* Kernel verifier blocks program loading */
+	LIBBPF_ERRNO__PROG2BIG,	/* Program too big */
+	LIBBPF_ERRNO__KVER,	/* Incorrect kernel version */
+	LIBBPF_ERRNO__PROGTYPE,	/* Kernel doesn't support this program type */
+	LIBBPF_ERRNO__WRNGPID,	/* Wrong pid in netlink message */
+	LIBBPF_ERRNO__INVSEQ,	/* Invalid netlink sequence */
+	LIBBPF_ERRNO__NLPARSE,	/* netlink parsing error */
+	__LIBBPF_ERRNO__END,
+};
+
+LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
+
+enum libbpf_print_level {
+        LIBBPF_WARN,
+        LIBBPF_INFO,
+        LIBBPF_DEBUG,
+};
+
+typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level,
+				 const char *, va_list ap);
+
+LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
+
+/* Hide internal to user */
+struct bpf_object;
+
+struct bpf_object_open_attr {
+	const char *file;
+	enum bpf_prog_type prog_type;
+};
+
+struct bpf_object_open_opts {
+	/* size of this struct, for forward/backward compatiblity */
+	size_t sz;
+	/* object name override, if provided:
+	 * - for object open from file, this will override setting object
+	 *   name from file path's base name;
+	 * - for object open from memory buffer, this will specify an object
+	 *   name and will override default "<addr>-<buf-size>" name;
+	 */
+	const char *object_name;
+	/* parse map definitions non-strictly, allowing extra attributes/data */
+	bool relaxed_maps;
+	/* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures.
+	 * Value is ignored. Relocations always are processed non-strictly.
+	 * Non-relocatable instructions are replaced with invalid ones to
+	 * prevent accidental errors.
+	 * */
+	bool relaxed_core_relocs;
+	/* maps that set the 'pinning' attribute in their definition will have
+	 * their pin_path attribute set to a file in this directory, and be
+	 * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
+	 */
+	const char *pin_root_path;
+	__u32 attach_prog_fd;
+	/* Additional kernel config content that augments and overrides
+	 * system Kconfig for CONFIG_xxx externs.
+	 */
+	const char *kconfig;
+};
+#define bpf_object_open_opts__last_field kconfig
+
+LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
+LIBBPF_API struct bpf_object *
+bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts);
+LIBBPF_API struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+		     const struct bpf_object_open_opts *opts);
+
+/* deprecated bpf_object__open variants */
+LIBBPF_API struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+			const char *name);
+LIBBPF_API struct bpf_object *
+bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+
+enum libbpf_pin_type {
+	LIBBPF_PIN_NONE,
+	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+	LIBBPF_PIN_BY_NAME,
+};
+
+/* pin_maps and unpin_maps can both be called with a NULL path, in which case
+ * they will use the pin_path attribute of each map (and ignore all maps that
+ * don't have a pin_path set).
+ */
+LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
+LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
+				      const char *path);
+LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
+					const char *path);
+LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
+					  const char *path);
+LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
+
+struct bpf_object_load_attr {
+	struct bpf_object *obj;
+	int log_level;
+	const char *target_btf_path;
+};
+
+/* Load/unload object into/from kernel */
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
+LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
+
+LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
+LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+
+struct btf;
+LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
+
+LIBBPF_API struct bpf_program *
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+				  const char *title);
+LIBBPF_API struct bpf_program *
+bpf_object__find_program_by_name(const struct bpf_object *obj,
+				 const char *name);
+
+LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
+#define bpf_object__for_each_safe(pos, tmp)			\
+	for ((pos) = bpf_object__next(NULL),		\
+		(tmp) = bpf_object__next(pos);		\
+	     (pos) != NULL;				\
+	     (pos) = (tmp), (tmp) = bpf_object__next(tmp))
+
+typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
+LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+				    bpf_object_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
+
+LIBBPF_API int
+libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+			 enum bpf_attach_type *expected_attach_type);
+LIBBPF_API int libbpf_attach_type_by_name(const char *name,
+					  enum bpf_attach_type *attach_type);
+LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
+					  enum bpf_attach_type attach_type);
+
+/* Accessors of bpf_program */
+struct bpf_program;
+LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
+						 const struct bpf_object *obj);
+
+#define bpf_object__for_each_program(pos, obj)		\
+	for ((pos) = bpf_program__next(NULL, (obj));	\
+	     (pos) != NULL;				\
+	     (pos) = bpf_program__next((pos), (obj)))
+
+LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
+						 const struct bpf_object *obj);
+
+typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
+
+LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+				     bpf_program_clear_priv_t clear_priv);
+
+LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
+					 __u32 ifindex);
+
+LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
+LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
+LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
+LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
+
+/* returns program size in bytes */
+LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
+
+LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
+				 __u32 kern_version);
+LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
+					 const char *path,
+					 int instance);
+LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
+					   const char *path,
+					   int instance);
+LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
+LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
+
+struct bpf_link;
+
+LIBBPF_API struct bpf_link *bpf_link__open(const char *path);
+LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
+LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
+LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
+LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
+LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
+					struct bpf_program *prog);
+LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
+LIBBPF_API int bpf_link__detach(struct bpf_link *link);
+LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+			   const char *func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+			   pid_t pid, const char *binary_path,
+			   size_t func_offset);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint(struct bpf_program *prog,
+			       const char *tp_category,
+			       const char *tp_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+				   const char *tp_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_trace(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_lsm(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_freplace(struct bpf_program *prog,
+			     int target_fd, const char *attach_func_name);
+
+struct bpf_map;
+
+LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+
+struct bpf_iter_attach_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	union bpf_iter_link_info *link_info;
+	__u32 link_info_len;
+};
+#define bpf_iter_attach_opts__last_field link_info_len
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+			 const struct bpf_iter_attach_opts *opts);
+
+struct bpf_insn;
+
+/*
+ * Libbpf allows callers to adjust BPF programs before being loaded
+ * into kernel. One program in an object file can be transformed into
+ * multiple variants to be attached to different hooks.
+ *
+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
+ * form an API for this purpose.
+ *
+ * - bpf_program_prep_t:
+ *   Defines a 'preprocessor', which is a caller defined function
+ *   passed to libbpf through bpf_program__set_prep(), and will be
+ *   called before program is loaded. The processor should adjust
+ *   the program one time for each instance according to the instance id
+ *   passed to it.
+ *
+ * - bpf_program__set_prep:
+ *   Attaches a preprocessor to a BPF program. The number of instances
+ *   that should be created is also passed through this function.
+ *
+ * - bpf_program__nth_fd:
+ *   After the program is loaded, get resulting FD of a given instance
+ *   of the BPF program.
+ *
+ * If bpf_program__set_prep() is not used, the program would be loaded
+ * without adjustment during bpf_object__load(). The program has only
+ * one instance. In this case bpf_program__fd(prog) is equal to
+ * bpf_program__nth_fd(prog, 0).
+ */
+
+struct bpf_prog_prep_result {
+	/*
+	 * If not NULL, load new instruction array.
+	 * If set to NULL, don't load this instance.
+	 */
+	struct bpf_insn *new_insn_ptr;
+	int new_insn_cnt;
+
+	/* If not NULL, result FD is written to it. */
+	int *pfd;
+};
+
+/*
+ * Parameters of bpf_program_prep_t:
+ *  - prog:	The bpf_program being loaded.
+ *  - n:	Index of instance being generated.
+ *  - insns:	BPF instructions array.
+ *  - insns_cnt:Number of instructions in insns.
+ *  - res:	Output parameter, result of transformation.
+ *
+ * Return value:
+ *  - Zero:	pre-processing success.
+ *  - Non-zero:	pre-processing error, stop loading.
+ */
+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
+				  struct bpf_insn *insns, int insns_cnt,
+				  struct bpf_prog_prep_result *res);
+
+LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+				     bpf_program_prep_t prep);
+
+LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
+
+/*
+ * Adjust type of BPF program. Default is kprobe.
+ */
+LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
+				      enum bpf_prog_type type);
+
+LIBBPF_API enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog);
+LIBBPF_API void
+bpf_program__set_expected_attach_type(struct bpf_program *prog,
+				      enum bpf_attach_type type);
+
+LIBBPF_API int
+bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
+			       const char *attach_func_name);
+
+LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
+
+/*
+ * No need for __attribute__((packed)), all members of 'bpf_map_def'
+ * are all aligned.  In addition, using __attribute__((packed))
+ * would trigger a -Wpacked warning message, and lead to an error
+ * if -Werror is set.
+ */
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+	unsigned int map_flags;
+};
+
+/*
+ * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
+ * so no need to worry about a name clash.
+ */
+LIBBPF_API struct bpf_map *
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
+
+LIBBPF_API int
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
+
+/*
+ * Get bpf_map through the offset of corresponding struct bpf_map_def
+ * in the BPF object file.
+ */
+LIBBPF_API struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
+
+LIBBPF_API struct bpf_map *
+bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
+#define bpf_object__for_each_map(pos, obj)		\
+	for ((pos) = bpf_map__next(NULL, (obj));	\
+	     (pos) != NULL;				\
+	     (pos) = bpf_map__next((pos), (obj)))
+#define bpf_map__for_each bpf_object__for_each_map
+
+LIBBPF_API struct bpf_map *
+bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
+
+/* get/set map FD */
+LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+/* get map definition */
+LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+/* get map name */
+LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+/* get/set map type */
+LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
+/* get/set map size (max_entries) */
+LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+/* get/set map flags */
+LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
+/* get/set map NUMA node */
+LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
+/* get/set map key size */
+LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
+/* get/set map value size */
+LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
+/* get map key/value BTF type IDs */
+LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+/* get/set map if_index */
+LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+
+typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
+				 bpf_map_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
+					  const void *data, size_t size);
+LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
+LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
+LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
+
+LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
+struct bpf_prog_load_attr {
+	const char *file;
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+	int ifindex;
+	int log_level;
+	int prog_flags;
+};
+
+LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+				   struct bpf_object **pobj, int *prog_fd);
+LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
+			     struct bpf_object **pobj, int *prog_fd);
+
+struct xdp_link_info {
+	__u32 prog_id;
+	__u32 drv_prog_id;
+	__u32 hw_prog_id;
+	__u32 skb_prog_id;
+	__u8 attach_mode;
+};
+
+struct bpf_xdp_set_link_opts {
+	size_t sz;
+	int old_fd;
+	size_t :0;
+};
+#define bpf_xdp_set_link_opts__last_field old_fd
+
+LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+					const struct bpf_xdp_set_link_opts *opts);
+LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+				     size_t info_size, __u32 flags);
+
+/* Ring buffer APIs */
+struct ring_buffer;
+
+typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
+
+struct ring_buffer_opts {
+	size_t sz; /* size of this struct, for forward/backward compatiblity */
+};
+
+#define ring_buffer_opts__last_field sz
+
+LIBBPF_API struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+		 const struct ring_buffer_opts *opts);
+LIBBPF_API void ring_buffer__free(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+				ring_buffer_sample_fn sample_cb, void *ctx);
+LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
+LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+
+/* Perf buffer APIs */
+struct perf_buffer;
+
+typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
+				      void *data, __u32 size);
+typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
+
+/* common use perf buffer options */
+struct perf_buffer_opts {
+	/* if specified, sample_cb is called for each sample */
+	perf_buffer_sample_fn sample_cb;
+	/* if specified, lost_cb is called for each batch of lost samples */
+	perf_buffer_lost_fn lost_cb;
+	/* ctx is provided to sample_cb and lost_cb */
+	void *ctx;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new(int map_fd, size_t page_cnt,
+		 const struct perf_buffer_opts *opts);
+
+enum bpf_perf_event_ret {
+	LIBBPF_PERF_EVENT_DONE	= 0,
+	LIBBPF_PERF_EVENT_ERROR	= -1,
+	LIBBPF_PERF_EVENT_CONT	= -2,
+};
+
+struct perf_event_header;
+
+typedef enum bpf_perf_event_ret
+(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event);
+
+/* raw perf buffer options, giving most power and control */
+struct perf_buffer_raw_opts {
+	/* perf event attrs passed directly into perf_event_open() */
+	struct perf_event_attr *attr;
+	/* raw event callback */
+	perf_buffer_event_fn event_cb;
+	/* ctx is provided to event_cb */
+	void *ctx;
+	/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
+	 * max_entries of given PERF_EVENT_ARRAY map)
+	 */
+	int cpu_cnt;
+	/* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */
+	int *cpus;
+	/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
+	int *map_keys;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+		     const struct perf_buffer_raw_opts *opts);
+
+LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
+LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
+
+typedef enum bpf_perf_event_ret
+	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+				  void *private_data);
+LIBBPF_API enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+			   void **copy_mem, size_t *copy_size,
+			   bpf_perf_event_print_t fn, void *private_data);
+
+struct bpf_prog_linfo;
+struct bpf_prog_info;
+
+LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo);
+LIBBPF_API struct bpf_prog_linfo *
+bpf_prog_linfo__new(const struct bpf_prog_info *info);
+LIBBPF_API const struct bpf_line_info *
+bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
+				__u64 addr, __u32 func_idx, __u32 nr_skip);
+LIBBPF_API const struct bpf_line_info *
+bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
+		      __u32 insn_off, __u32 nr_skip);
+
+/*
+ * Probe for supported system features
+ *
+ * Note that running many of these probes in a short amount of time can cause
+ * the kernel to reach the maximal size of lockable memory allowed for the
+ * user, causing subsequent probes to fail. In this case, the caller may want
+ * to adjust that limit with setrlimit().
+ */
+LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type,
+				    __u32 ifindex);
+LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
+LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
+				 enum bpf_prog_type prog_type, __u32 ifindex);
+LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex);
+
+/*
+ * Get bpf_prog_info in continuous memory
+ *
+ * struct bpf_prog_info has multiple arrays. The user has option to choose
+ * arrays to fetch from kernel. The following APIs provide an uniform way to
+ * fetch these data. All arrays in bpf_prog_info are stored in a single
+ * continuous memory region. This makes it easy to store the info in a
+ * file.
+ *
+ * Before writing bpf_prog_info_linear to files, it is necessary to
+ * translate pointers in bpf_prog_info to offsets. Helper functions
+ * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
+ * are introduced to switch between pointers and offsets.
+ *
+ * Examples:
+ *   # To fetch map_ids and prog_tags:
+ *   __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
+ *           (1UL << BPF_PROG_INFO_PROG_TAGS);
+ *   struct bpf_prog_info_linear *info_linear =
+ *           bpf_program__get_prog_info_linear(fd, arrays);
+ *
+ *   # To save data in file
+ *   bpf_program__bpil_addr_to_offs(info_linear);
+ *   write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
+ *
+ *   # To read data from file
+ *   read(f, info_linear, <proper_size>);
+ *   bpf_program__bpil_offs_to_addr(info_linear);
+ */
+enum bpf_prog_info_array {
+	BPF_PROG_INFO_FIRST_ARRAY = 0,
+	BPF_PROG_INFO_JITED_INSNS = 0,
+	BPF_PROG_INFO_XLATED_INSNS,
+	BPF_PROG_INFO_MAP_IDS,
+	BPF_PROG_INFO_JITED_KSYMS,
+	BPF_PROG_INFO_JITED_FUNC_LENS,
+	BPF_PROG_INFO_FUNC_INFO,
+	BPF_PROG_INFO_LINE_INFO,
+	BPF_PROG_INFO_JITED_LINE_INFO,
+	BPF_PROG_INFO_PROG_TAGS,
+	BPF_PROG_INFO_LAST_ARRAY,
+};
+
+struct bpf_prog_info_linear {
+	/* size of struct bpf_prog_info, when the tool is compiled */
+	__u32			info_len;
+	/* total bytes allocated for data, round up to 8 bytes */
+	__u32			data_len;
+	/* which arrays are included in data */
+	__u64			arrays;
+	struct bpf_prog_info	info;
+	__u8			data[];
+};
+
+LIBBPF_API struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays);
+
+LIBBPF_API void
+bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
+
+LIBBPF_API void
+bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
+
+/*
+ * A helper function to get the number of possible CPUs before looking up
+ * per-CPU maps. Negative errno is returned on failure.
+ *
+ * Example usage:
+ *
+ *     int ncpus = libbpf_num_possible_cpus();
+ *     if (ncpus < 0) {
+ *          // error handling
+ *     }
+ *     long values[ncpus];
+ *     bpf_map_lookup_elem(per_cpu_map_fd, key, values);
+ *
+ */
+LIBBPF_API int libbpf_num_possible_cpus(void);
+
+struct bpf_map_skeleton {
+	const char *name;
+	struct bpf_map **map;
+	void **mmaped;
+};
+
+struct bpf_prog_skeleton {
+	const char *name;
+	struct bpf_program **prog;
+	struct bpf_link **link;
+};
+
+struct bpf_object_skeleton {
+	size_t sz; /* size of this struct, for forward/backward compatibility */
+
+	const char *name;
+	void *data;
+	size_t data_sz;
+
+	struct bpf_object **obj;
+
+	int map_cnt;
+	int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */
+	struct bpf_map_skeleton *maps;
+
+	int prog_cnt;
+	int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */
+	struct bpf_prog_skeleton *progs;
+};
+
+LIBBPF_API int
+bpf_object__open_skeleton(struct bpf_object_skeleton *s,
+			  const struct bpf_object_open_opts *opts);
+LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s);
+LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s);
+LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s);
+LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s);
+
+enum libbpf_tristate {
+	TRI_NO = 0,
+	TRI_YES = 1,
+	TRI_MODULE = 2,
+};
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LIBBPF_H */
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
new file mode 100644
index 000000000..4ebfadf45
--- /dev/null
+++ b/tools/lib/bpf/libbpf.map
@@ -0,0 +1,339 @@
+LIBBPF_0.0.1 {
+	global:
+		bpf_btf_get_fd_by_id;
+		bpf_create_map;
+		bpf_create_map_in_map;
+		bpf_create_map_in_map_node;
+		bpf_create_map_name;
+		bpf_create_map_node;
+		bpf_create_map_xattr;
+		bpf_load_btf;
+		bpf_load_program;
+		bpf_load_program_xattr;
+		bpf_map__btf_key_type_id;
+		bpf_map__btf_value_type_id;
+		bpf_map__def;
+		bpf_map__fd;
+		bpf_map__is_offload_neutral;
+		bpf_map__name;
+		bpf_map__next;
+		bpf_map__pin;
+		bpf_map__prev;
+		bpf_map__priv;
+		bpf_map__reuse_fd;
+		bpf_map__set_ifindex;
+		bpf_map__set_inner_map_fd;
+		bpf_map__set_priv;
+		bpf_map__unpin;
+		bpf_map_delete_elem;
+		bpf_map_get_fd_by_id;
+		bpf_map_get_next_id;
+		bpf_map_get_next_key;
+		bpf_map_lookup_and_delete_elem;
+		bpf_map_lookup_elem;
+		bpf_map_update_elem;
+		bpf_obj_get;
+		bpf_obj_get_info_by_fd;
+		bpf_obj_pin;
+		bpf_object__btf_fd;
+		bpf_object__close;
+		bpf_object__find_map_by_name;
+		bpf_object__find_map_by_offset;
+		bpf_object__find_program_by_title;
+		bpf_object__kversion;
+		bpf_object__load;
+		bpf_object__name;
+		bpf_object__next;
+		bpf_object__open;
+		bpf_object__open_buffer;
+		bpf_object__open_xattr;
+		bpf_object__pin;
+		bpf_object__pin_maps;
+		bpf_object__pin_programs;
+		bpf_object__priv;
+		bpf_object__set_priv;
+		bpf_object__unload;
+		bpf_object__unpin_maps;
+		bpf_object__unpin_programs;
+		bpf_perf_event_read_simple;
+		bpf_prog_attach;
+		bpf_prog_detach;
+		bpf_prog_detach2;
+		bpf_prog_get_fd_by_id;
+		bpf_prog_get_next_id;
+		bpf_prog_load;
+		bpf_prog_load_xattr;
+		bpf_prog_query;
+		bpf_prog_test_run;
+		bpf_prog_test_run_xattr;
+		bpf_program__fd;
+		bpf_program__is_kprobe;
+		bpf_program__is_perf_event;
+		bpf_program__is_raw_tracepoint;
+		bpf_program__is_sched_act;
+		bpf_program__is_sched_cls;
+		bpf_program__is_socket_filter;
+		bpf_program__is_tracepoint;
+		bpf_program__is_xdp;
+		bpf_program__load;
+		bpf_program__next;
+		bpf_program__nth_fd;
+		bpf_program__pin;
+		bpf_program__pin_instance;
+		bpf_program__prev;
+		bpf_program__priv;
+		bpf_program__set_expected_attach_type;
+		bpf_program__set_ifindex;
+		bpf_program__set_kprobe;
+		bpf_program__set_perf_event;
+		bpf_program__set_prep;
+		bpf_program__set_priv;
+		bpf_program__set_raw_tracepoint;
+		bpf_program__set_sched_act;
+		bpf_program__set_sched_cls;
+		bpf_program__set_socket_filter;
+		bpf_program__set_tracepoint;
+		bpf_program__set_type;
+		bpf_program__set_xdp;
+		bpf_program__title;
+		bpf_program__unload;
+		bpf_program__unpin;
+		bpf_program__unpin_instance;
+		bpf_prog_linfo__free;
+		bpf_prog_linfo__new;
+		bpf_prog_linfo__lfind_addr_func;
+		bpf_prog_linfo__lfind;
+		bpf_raw_tracepoint_open;
+		bpf_set_link_xdp_fd;
+		bpf_task_fd_query;
+		bpf_verify_program;
+		btf__fd;
+		btf__find_by_name;
+		btf__free;
+		btf__get_from_id;
+		btf__name_by_offset;
+		btf__new;
+		btf__resolve_size;
+		btf__resolve_type;
+		btf__type_by_id;
+		libbpf_attach_type_by_name;
+		libbpf_get_error;
+		libbpf_prog_type_by_name;
+		libbpf_set_print;
+		libbpf_strerror;
+	local:
+		*;
+};
+
+LIBBPF_0.0.2 {
+	global:
+		bpf_probe_helper;
+		bpf_probe_map_type;
+		bpf_probe_prog_type;
+		bpf_map__resize;
+		bpf_map_lookup_elem_flags;
+		bpf_object__btf;
+		bpf_object__find_map_fd_by_name;
+		bpf_get_link_xdp_id;
+		btf__dedup;
+		btf__get_map_kv_tids;
+		btf__get_nr_types;
+		btf__get_raw_data;
+		btf__load;
+		btf_ext__free;
+		btf_ext__func_info_rec_size;
+		btf_ext__get_raw_data;
+		btf_ext__line_info_rec_size;
+		btf_ext__new;
+		btf_ext__reloc_func_info;
+		btf_ext__reloc_line_info;
+		xsk_umem__create;
+		xsk_socket__create;
+		xsk_umem__delete;
+		xsk_socket__delete;
+		xsk_umem__fd;
+		xsk_socket__fd;
+		bpf_program__get_prog_info_linear;
+		bpf_program__bpil_addr_to_offs;
+		bpf_program__bpil_offs_to_addr;
+} LIBBPF_0.0.1;
+
+LIBBPF_0.0.3 {
+	global:
+		bpf_map__is_internal;
+		bpf_map_freeze;
+		btf__finalize_data;
+} LIBBPF_0.0.2;
+
+LIBBPF_0.0.4 {
+	global:
+		bpf_link__destroy;
+		bpf_object__load_xattr;
+		bpf_program__attach_kprobe;
+		bpf_program__attach_perf_event;
+		bpf_program__attach_raw_tracepoint;
+		bpf_program__attach_tracepoint;
+		bpf_program__attach_uprobe;
+		btf_dump__dump_type;
+		btf_dump__free;
+		btf_dump__new;
+		btf__parse_elf;
+		libbpf_num_possible_cpus;
+		perf_buffer__free;
+		perf_buffer__new;
+		perf_buffer__new_raw;
+		perf_buffer__poll;
+		xsk_umem__create;
+} LIBBPF_0.0.3;
+
+LIBBPF_0.0.5 {
+	global:
+		bpf_btf_get_next_id;
+} LIBBPF_0.0.4;
+
+LIBBPF_0.0.6 {
+	global:
+		bpf_get_link_xdp_info;
+		bpf_map__get_pin_path;
+		bpf_map__is_pinned;
+		bpf_map__set_pin_path;
+		bpf_object__open_file;
+		bpf_object__open_mem;
+		bpf_program__attach_trace;
+		bpf_program__get_expected_attach_type;
+		bpf_program__get_type;
+		bpf_program__is_tracing;
+		bpf_program__set_tracing;
+		bpf_program__size;
+		btf__find_by_name_kind;
+		libbpf_find_vmlinux_btf_id;
+} LIBBPF_0.0.5;
+
+LIBBPF_0.0.7 {
+	global:
+		btf_dump__emit_type_decl;
+		bpf_link__disconnect;
+		bpf_map__attach_struct_ops;
+		bpf_map_delete_batch;
+		bpf_map_lookup_and_delete_batch;
+		bpf_map_lookup_batch;
+		bpf_map_update_batch;
+		bpf_object__find_program_by_name;
+		bpf_object__attach_skeleton;
+		bpf_object__destroy_skeleton;
+		bpf_object__detach_skeleton;
+		bpf_object__load_skeleton;
+		bpf_object__open_skeleton;
+		bpf_probe_large_insn_limit;
+		bpf_prog_attach_xattr;
+		bpf_program__attach;
+		bpf_program__name;
+		bpf_program__is_extension;
+		bpf_program__is_struct_ops;
+		bpf_program__set_extension;
+		bpf_program__set_struct_ops;
+		btf__align_of;
+		libbpf_find_kernel_btf;
+} LIBBPF_0.0.6;
+
+LIBBPF_0.0.8 {
+	global:
+		bpf_link__fd;
+		bpf_link__open;
+		bpf_link__pin;
+		bpf_link__pin_path;
+		bpf_link__unpin;
+		bpf_link__update_program;
+		bpf_link_create;
+		bpf_link_update;
+		bpf_map__set_initial_value;
+		bpf_program__attach_cgroup;
+		bpf_program__attach_lsm;
+		bpf_program__is_lsm;
+		bpf_program__set_attach_target;
+		bpf_program__set_lsm;
+		bpf_set_link_xdp_fd_opts;
+} LIBBPF_0.0.7;
+
+LIBBPF_0.0.9 {
+	global:
+		bpf_enable_stats;
+		bpf_iter_create;
+		bpf_link_get_fd_by_id;
+		bpf_link_get_next_id;
+		bpf_program__attach_iter;
+		bpf_program__attach_netns;
+		perf_buffer__consume;
+		ring_buffer__add;
+		ring_buffer__consume;
+		ring_buffer__free;
+		ring_buffer__new;
+		ring_buffer__poll;
+} LIBBPF_0.0.8;
+
+LIBBPF_0.1.0 {
+	global:
+		bpf_link__detach;
+		bpf_link_detach;
+		bpf_map__ifindex;
+		bpf_map__key_size;
+		bpf_map__map_flags;
+		bpf_map__max_entries;
+		bpf_map__numa_node;
+		bpf_map__set_key_size;
+		bpf_map__set_map_flags;
+		bpf_map__set_max_entries;
+		bpf_map__set_numa_node;
+		bpf_map__set_type;
+		bpf_map__set_value_size;
+		bpf_map__type;
+		bpf_map__value_size;
+		bpf_program__attach_xdp;
+		bpf_program__autoload;
+		bpf_program__is_sk_lookup;
+		bpf_program__set_autoload;
+		bpf_program__set_sk_lookup;
+		btf__parse;
+		btf__parse_raw;
+		btf__pointer_size;
+		btf__set_fd;
+		btf__set_pointer_size;
+} LIBBPF_0.0.9;
+
+LIBBPF_0.2.0 {
+	global:
+		bpf_prog_bind_map;
+		bpf_prog_test_run_opts;
+		bpf_program__attach_freplace;
+		bpf_program__section_name;
+		btf__add_array;
+		btf__add_const;
+		btf__add_enum;
+		btf__add_enum_value;
+		btf__add_datasec;
+		btf__add_datasec_var_info;
+		btf__add_field;
+		btf__add_func;
+		btf__add_func_param;
+		btf__add_func_proto;
+		btf__add_fwd;
+		btf__add_int;
+		btf__add_ptr;
+		btf__add_restrict;
+		btf__add_str;
+		btf__add_struct;
+		btf__add_typedef;
+		btf__add_union;
+		btf__add_var;
+		btf__add_volatile;
+		btf__endianness;
+		btf__find_str;
+		btf__new_empty;
+		btf__set_endianness;
+		btf__str_by_offset;
+		perf_buffer__buffer_cnt;
+		perf_buffer__buffer_fd;
+		perf_buffer__epoll_fd;
+		perf_buffer__consume_buffer;
+		xsk_socket__create_shared;
+} LIBBPF_0.1.0;
diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template
new file mode 100644
index 000000000..b45ed534b
--- /dev/null
+++ b/tools/lib/bpf/libbpf.pc.template
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libbpf
+Description: BPF library
+Version: @VERSION@
+Libs: -L${libdir} -lbpf
+Requires.private: libelf zlib
+Cflags: -I${includedir}
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
new file mode 100644
index 000000000..947d8bd8a
--- /dev/null
+++ b/tools/lib/bpf/libbpf_common.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Common user-facing libbpf helpers.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#ifndef __LIBBPF_LIBBPF_COMMON_H
+#define __LIBBPF_LIBBPF_COMMON_H
+
+#include <string.h>
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))
+
+/* Helper macro to declare and initialize libbpf options struct
+ *
+ * This dance with uninitialized declaration, followed by memset to zero,
+ * followed by assignment using compound literal syntax is done to preserve
+ * ability to use a nice struct field initialization syntax and **hopefully**
+ * have all the padding bytes initialized to zero. It's not guaranteed though,
+ * when copying literal, that compiler won't copy garbage in literal's padding
+ * bytes, but that's the best way I've found and it seems to work in practice.
+ *
+ * Macro declares opts struct of given type and name, zero-initializes,
+ * including any extra padding, it with memset() and then assigns initial
+ * values provided by users in struct initializer-syntax as varargs.
+ */
+#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...)				    \
+	struct TYPE NAME = ({ 						    \
+		memset(&NAME, 0, sizeof(struct TYPE));			    \
+		(struct TYPE) {						    \
+			.sz = sizeof(struct TYPE),			    \
+			__VA_ARGS__					    \
+		};							    \
+	})
+
+#endif /* __LIBBPF_LIBBPF_COMMON_H */
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
new file mode 100644
index 000000000..0afb51f7a
--- /dev/null
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ */
+
+#undef _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+
+#include "libbpf.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+#define ERRNO_OFFSET(e)		((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c)	ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO	(__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+	[ERRCODE_OFFSET(LIBELF)]	= "Something wrong in libelf",
+	[ERRCODE_OFFSET(FORMAT)]	= "BPF object format invalid",
+	[ERRCODE_OFFSET(KVERSION)]	= "'version' section incorrect or lost",
+	[ERRCODE_OFFSET(ENDIAN)]	= "Endian mismatch",
+	[ERRCODE_OFFSET(INTERNAL)]	= "Internal error in libbpf",
+	[ERRCODE_OFFSET(RELOC)]		= "Relocation failed",
+	[ERRCODE_OFFSET(VERIFY)]	= "Kernel verifier blocks program loading",
+	[ERRCODE_OFFSET(PROG2BIG)]	= "Program too big",
+	[ERRCODE_OFFSET(KVER)]		= "Incorrect kernel version",
+	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
+	[ERRCODE_OFFSET(WRNGPID)]	= "Wrong pid in netlink message",
+	[ERRCODE_OFFSET(INVSEQ)]	= "Invalid netlink sequence",
+	[ERRCODE_OFFSET(NLPARSE)]	= "Incorrect netlink message parsing",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+	if (!buf || !size)
+		return -1;
+
+	err = err > 0 ? err : -err;
+
+	if (err < __LIBBPF_ERRNO__START) {
+		int ret;
+
+		ret = strerror_r(err, buf, size);
+		buf[size - 1] = '\0';
+		return ret;
+	}
+
+	if (err < __LIBBPF_ERRNO__END) {
+		const char *msg;
+
+		msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+		snprintf(buf, size, "%s", msg);
+		buf[size - 1] = '\0';
+		return 0;
+	}
+
+	snprintf(buf, size, "Unknown libbpf error %d", err);
+	buf[size - 1] = '\0';
+	return -1;
+}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
new file mode 100644
index 000000000..d99bc847b
--- /dev/null
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Internal libbpf helpers.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#ifndef __LIBBPF_LIBBPF_INTERNAL_H
+#define __LIBBPF_LIBBPF_INTERNAL_H
+
+#include <stdlib.h>
+#include <limits.h>
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
+#include "libbpf.h"
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+	BTF_INT_ENC(encoding, bits_offset, bits)
+#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
+#define BTF_PARAM_ENC(name, type) (name), (type)
+#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+#ifndef min
+# define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
+#ifndef max
+# define max(x, y) ((x) < (y) ? (y) : (x))
+#endif
+#ifndef offsetofend
+# define offsetofend(TYPE, FIELD) \
+	(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
+#endif
+
+/* Symbol versioning is different between static and shared library.
+ * Properly versioned symbols are needed for shared library, but
+ * only the symbol of the new version is needed for static library.
+ */
+#ifdef SHARED
+# define COMPAT_VERSION(internal_name, api_name, version) \
+	asm(".symver " #internal_name "," #api_name "@" #version);
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+	asm(".symver " #internal_name "," #api_name "@@" #version);
+#else
+# define COMPAT_VERSION(internal_name, api_name, version)
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+	extern typeof(internal_name) api_name \
+	__attribute__((alias(#internal_name)));
+#endif
+
+extern void libbpf_print(enum libbpf_print_level level,
+			 const char *format, ...)
+	__attribute__((format(printf, 2, 3)));
+
+#define __pr(level, fmt, ...)	\
+do {				\
+	libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__);	\
+} while (0)
+
+#define pr_warn(fmt, ...)	__pr(LIBBPF_WARN, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...)	__pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...)	__pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+/*
+ * Re-implement glibc's reallocarray() for libbpf internal-only use.
+ * reallocarray(), unfortunately, is not available in all versions of glibc,
+ * so requires extra feature detection and using reallocarray() stub from
+ * <tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates
+ * build of libbpf unnecessarily and is just a maintenance burden. Instead,
+ * it's trivial to implement libbpf-specific internal version and use it
+ * throughout libbpf.
+ */
+static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+	size_t total;
+
+#if __has_builtin(__builtin_mul_overflow)
+	if (unlikely(__builtin_mul_overflow(nmemb, size, &total)))
+		return NULL;
+#else
+	if (size == 0 || nmemb > ULONG_MAX / size)
+		return NULL;
+	total = nmemb * size;
+#endif
+	return realloc(ptr, total);
+}
+
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+		  size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+
+static inline bool libbpf_validate_opts(const char *opts,
+					size_t opts_sz, size_t user_sz,
+					const char *type_name)
+{
+	if (user_sz < sizeof(size_t)) {
+		pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
+		return false;
+	}
+	if (user_sz > opts_sz) {
+		size_t i;
+
+		for (i = opts_sz; i < user_sz; i++) {
+			if (opts[i]) {
+				pr_warn("%s has non-zero extra bytes\n",
+					type_name);
+				return false;
+			}
+		}
+	}
+	return true;
+}
+
+#define OPTS_VALID(opts, type)						      \
+	(!(opts) || libbpf_validate_opts((const char *)opts,		      \
+					 offsetofend(struct type,	      \
+						     type##__last_field),     \
+					 (opts)->sz, #type))
+#define OPTS_HAS(opts, field) \
+	((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
+#define OPTS_GET(opts, field, fallback_value) \
+	(OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+#define OPTS_SET(opts, field, value)		\
+	do {					\
+		if (OPTS_HAS(opts, field))	\
+			(opts)->field = value;	\
+	} while (0)
+
+int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
+int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+			 const char *str_sec, size_t str_len);
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+			     __u32 *size);
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+				__u32 *off);
+
+struct btf_ext_info {
+	/*
+	 * info points to the individual info section (e.g. func_info and
+	 * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
+	 */
+	void *info;
+	__u32 rec_size;
+	__u32 len;
+};
+
+#define for_each_btf_ext_sec(seg, sec)					\
+	for (sec = (seg)->info;						\
+	     (void *)sec < (seg)->info + (seg)->len;			\
+	     sec = (void *)sec + sizeof(struct btf_ext_info_sec) +	\
+		   (seg)->rec_size * sec->num_info)
+
+#define for_each_btf_ext_rec(seg, sec, i, rec)				\
+	for (i = 0, rec = (void *)&(sec)->data;				\
+	     i < (sec)->num_info;					\
+	     i++, rec = (void *)rec + (seg)->rec_size)
+
+/*
+ * The .BTF.ext ELF section layout defined as
+ *   struct btf_ext_header
+ *   func_info subsection
+ *
+ * The func_info subsection layout:
+ *   record size for struct bpf_func_info in the func_info subsection
+ *   struct btf_sec_func_info for section #1
+ *   a list of bpf_func_info records for section #1
+ *     where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ *     but may not be identical
+ *   struct btf_sec_func_info for section #2
+ *   a list of bpf_func_info records for section #2
+ *   ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+	__u16	magic;
+	__u8	version;
+	__u8	flags;
+	__u32	hdr_len;
+
+	/* All offsets are in bytes relative to the end of this header */
+	__u32	func_info_off;
+	__u32	func_info_len;
+	__u32	line_info_off;
+	__u32	line_info_len;
+
+	/* optional part of .BTF.ext header */
+	__u32	core_relo_off;
+	__u32	core_relo_len;
+};
+
+struct btf_ext {
+	union {
+		struct btf_ext_header *hdr;
+		void *data;
+	};
+	struct btf_ext_info func_info;
+	struct btf_ext_info line_info;
+	struct btf_ext_info core_relo_info;
+	__u32 data_size;
+};
+
+struct btf_ext_info_sec {
+	__u32	sec_name_off;
+	__u32	num_info;
+	/* Followed by num_info * record_size number of bytes */
+	__u8	data[];
+};
+
+/* The minimum bpf_func_info checked by the loader */
+struct bpf_func_info_min {
+	__u32   insn_off;
+	__u32   type_id;
+};
+
+/* The minimum bpf_line_info checked by the loader */
+struct bpf_line_info_min {
+	__u32	insn_off;
+	__u32	file_name_off;
+	__u32	line_off;
+	__u32	line_col;
+};
+
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
+ */
+enum bpf_core_relo_kind {
+	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */
+	BPF_FIELD_BYTE_SIZE = 1,	/* field size in bytes */
+	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */
+	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */
+	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */
+	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */
+	BPF_TYPE_ID_LOCAL = 6,		/* type ID in local BPF object */
+	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */
+	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */
+	BPF_TYPE_SIZE = 9,		/* type size in bytes */
+	BPF_ENUMVAL_EXISTS = 10,	/* enum value existence in target kernel */
+	BPF_ENUMVAL_VALUE = 11,		/* enum value integer value */
+};
+
+/* The minimum bpf_core_relo checked by the loader
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ *   its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ *   type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ *   interpretation depends on specific relocation kind:
+ *     - for field-based relocations, string encodes an accessed field using
+ *     a sequence of field and array indices, separated by colon (:). It's
+ *     conceptually very close to LLVM's getelementptr ([0]) instruction's
+ *     arguments for identifying offset to a field.
+ *     - for type-based relocations, strings is expected to be just "0";
+ *     - for enum value-based relocations, string contains an index of enum
+ *     value within its enum type;
+ *
+ * Example to provide a better feel.
+ *
+ *   struct sample {
+ *       int a;
+ *       struct {
+ *           int b[10];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *   int x = &s->a;     // encoded as "0:0" (a is field #0)
+ *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1, 
+ *                      // b is field #0 inside anon struct, accessing elem #5)
+ *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example  will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ *		  __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+	__u32   insn_off;
+	__u32   type_id;
+	__u32   access_str_off;
+	enum bpf_core_relo_kind kind;
+};
+
+#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
new file mode 100644
index 000000000..13393f0ea
--- /dev/null
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Netronome Systems, Inc. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <sys/utsname.h>
+
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+static bool grep(const char *buffer, const char *pattern)
+{
+	return !!strstr(buffer, pattern);
+}
+
+static int get_vendor_id(int ifindex)
+{
+	char ifname[IF_NAMESIZE], path[64], buf[8];
+	ssize_t len;
+	int fd;
+
+	if (!if_indextoname(ifindex, ifname))
+		return -1;
+
+	snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	len = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (len < 0)
+		return -1;
+	if (len >= (ssize_t)sizeof(buf))
+		return -1;
+	buf[len] = '\0';
+
+	return strtol(buf, NULL, 0);
+}
+
+static int get_kernel_version(void)
+{
+	int version, subversion, patchlevel;
+	struct utsname utsn;
+
+	/* Return 0 on failure, and attempt to probe with empty kversion */
+	if (uname(&utsn))
+		return 0;
+
+	if (sscanf(utsn.release, "%d.%d.%d",
+		   &version, &subversion, &patchlevel) != 3)
+		return 0;
+
+	return (version << 16) + (subversion << 8) + patchlevel;
+}
+
+static void
+probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
+	   size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex)
+{
+	struct bpf_load_program_attr xattr = {};
+	int fd;
+
+	switch (prog_type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+		xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+		break;
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+		xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT;
+		break;
+	case BPF_PROG_TYPE_SK_LOOKUP:
+		xattr.expected_attach_type = BPF_SK_LOOKUP;
+		break;
+	case BPF_PROG_TYPE_KPROBE:
+		xattr.kern_version = get_kernel_version();
+		break;
+	case BPF_PROG_TYPE_UNSPEC:
+	case BPF_PROG_TYPE_SOCKET_FILTER:
+	case BPF_PROG_TYPE_SCHED_CLS:
+	case BPF_PROG_TYPE_SCHED_ACT:
+	case BPF_PROG_TYPE_TRACEPOINT:
+	case BPF_PROG_TYPE_XDP:
+	case BPF_PROG_TYPE_PERF_EVENT:
+	case BPF_PROG_TYPE_CGROUP_SKB:
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_LWT_IN:
+	case BPF_PROG_TYPE_LWT_OUT:
+	case BPF_PROG_TYPE_LWT_XMIT:
+	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_CGROUP_DEVICE:
+	case BPF_PROG_TYPE_SK_MSG:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
+	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+	case BPF_PROG_TYPE_LIRC_MODE2:
+	case BPF_PROG_TYPE_SK_REUSEPORT:
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_TRACING:
+	case BPF_PROG_TYPE_STRUCT_OPS:
+	case BPF_PROG_TYPE_EXT:
+	case BPF_PROG_TYPE_LSM:
+	default:
+		break;
+	}
+
+	xattr.prog_type = prog_type;
+	xattr.insns = insns;
+	xattr.insns_cnt = insns_cnt;
+	xattr.license = "GPL";
+	xattr.prog_ifindex = ifindex;
+
+	fd = bpf_load_program_xattr(&xattr, buf, buf_len);
+	if (fd >= 0)
+		close(fd);
+}
+
+bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
+{
+	struct bpf_insn insns[2] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN()
+	};
+
+	if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
+		/* nfp returns -EINVAL on exit(0) with TC offload */
+		insns[0].imm = 2;
+
+	errno = 0;
+	probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
+
+	return errno != EINVAL && errno != EOPNOTSUPP;
+}
+
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+			 const char *str_sec, size_t str_len)
+{
+	struct btf_header hdr = {
+		.magic = BTF_MAGIC,
+		.version = BTF_VERSION,
+		.hdr_len = sizeof(struct btf_header),
+		.type_len = types_len,
+		.str_off = types_len,
+		.str_len = str_len,
+	};
+	int btf_fd, btf_len;
+	__u8 *raw_btf;
+
+	btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len;
+	raw_btf = malloc(btf_len);
+	if (!raw_btf)
+		return -ENOMEM;
+
+	memcpy(raw_btf, &hdr, sizeof(hdr));
+	memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
+	memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
+
+	btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false);
+
+	free(raw_btf);
+	return btf_fd;
+}
+
+static int load_local_storage_btf(void)
+{
+	const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
+	/* struct bpf_spin_lock {
+	 *   int val;
+	 * };
+	 * struct val {
+	 *   int cnt;
+	 *   struct bpf_spin_lock l;
+	 * };
+	 */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* struct bpf_spin_lock */                      /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),
+		BTF_MEMBER_ENC(15, 1, 0), /* int val; */
+		/* struct val */                                /* [3] */
+		BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
+		BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+	};
+
+	return libbpf__load_raw_btf((char *)types, sizeof(types),
+				     strs, sizeof(strs));
+}
+
+bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+{
+	int key_size, value_size, max_entries, map_flags;
+	__u32 btf_key_type_id = 0, btf_value_type_id = 0;
+	struct bpf_create_map_attr attr = {};
+	int fd = -1, btf_fd = -1, fd_inner;
+
+	key_size	= sizeof(__u32);
+	value_size	= sizeof(__u32);
+	max_entries	= 1;
+	map_flags	= 0;
+
+	switch (map_type) {
+	case BPF_MAP_TYPE_STACK_TRACE:
+		value_size	= sizeof(__u64);
+		break;
+	case BPF_MAP_TYPE_LPM_TRIE:
+		key_size	= sizeof(__u64);
+		value_size	= sizeof(__u64);
+		map_flags	= BPF_F_NO_PREALLOC;
+		break;
+	case BPF_MAP_TYPE_CGROUP_STORAGE:
+	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+		key_size	= sizeof(struct bpf_cgroup_storage_key);
+		value_size	= sizeof(__u64);
+		max_entries	= 0;
+		break;
+	case BPF_MAP_TYPE_QUEUE:
+	case BPF_MAP_TYPE_STACK:
+		key_size	= 0;
+		break;
+	case BPF_MAP_TYPE_SK_STORAGE:
+	case BPF_MAP_TYPE_INODE_STORAGE:
+		btf_key_type_id = 1;
+		btf_value_type_id = 3;
+		value_size = 8;
+		max_entries = 0;
+		map_flags = BPF_F_NO_PREALLOC;
+		btf_fd = load_local_storage_btf();
+		if (btf_fd < 0)
+			return false;
+		break;
+	case BPF_MAP_TYPE_RINGBUF:
+		key_size = 0;
+		value_size = 0;
+		max_entries = sysconf(_SC_PAGE_SIZE);
+		break;
+	case BPF_MAP_TYPE_UNSPEC:
+	case BPF_MAP_TYPE_HASH:
+	case BPF_MAP_TYPE_ARRAY:
+	case BPF_MAP_TYPE_PROG_ARRAY:
+	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+	case BPF_MAP_TYPE_PERCPU_HASH:
+	case BPF_MAP_TYPE_PERCPU_ARRAY:
+	case BPF_MAP_TYPE_CGROUP_ARRAY:
+	case BPF_MAP_TYPE_LRU_HASH:
+	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+	case BPF_MAP_TYPE_HASH_OF_MAPS:
+	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+	case BPF_MAP_TYPE_SOCKMAP:
+	case BPF_MAP_TYPE_CPUMAP:
+	case BPF_MAP_TYPE_XSKMAP:
+	case BPF_MAP_TYPE_SOCKHASH:
+	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+	case BPF_MAP_TYPE_STRUCT_OPS:
+	default:
+		break;
+	}
+
+	if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+	    map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+		/* TODO: probe for device, once libbpf has a function to create
+		 * map-in-map for offload
+		 */
+		if (ifindex)
+			return false;
+
+		fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH,
+					  sizeof(__u32), sizeof(__u32), 1, 0);
+		if (fd_inner < 0)
+			return false;
+		fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32),
+					   fd_inner, 1, 0);
+		close(fd_inner);
+	} else {
+		/* Note: No other restriction on map type probes for offload */
+		attr.map_type = map_type;
+		attr.key_size = key_size;
+		attr.value_size = value_size;
+		attr.max_entries = max_entries;
+		attr.map_flags = map_flags;
+		attr.map_ifindex = ifindex;
+		if (btf_fd >= 0) {
+			attr.btf_fd = btf_fd;
+			attr.btf_key_type_id = btf_key_type_id;
+			attr.btf_value_type_id = btf_value_type_id;
+		}
+
+		fd = bpf_create_map_xattr(&attr);
+	}
+	if (fd >= 0)
+		close(fd);
+	if (btf_fd >= 0)
+		close(btf_fd);
+
+	return fd >= 0;
+}
+
+bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
+		      __u32 ifindex)
+{
+	struct bpf_insn insns[2] = {
+		BPF_EMIT_CALL(id),
+		BPF_EXIT_INSN()
+	};
+	char buf[4096] = {};
+	bool res;
+
+	probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf),
+		   ifindex);
+	res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
+
+	if (ifindex) {
+		switch (get_vendor_id(ifindex)) {
+		case 0x19ee: /* Netronome specific */
+			res = res && !grep(buf, "not supported by FW") &&
+				!grep(buf, "unsupported function id");
+			break;
+		default:
+			break;
+		}
+	}
+
+	return res;
+}
+
+/*
+ * Probe for availability of kernel commit (5.3):
+ *
+ * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
+ */
+bool bpf_probe_large_insn_limit(__u32 ifindex)
+{
+	struct bpf_insn insns[BPF_MAXINSNS + 1];
+	int i;
+
+	for (i = 0; i < BPF_MAXINSNS; i++)
+		insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
+	insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
+
+	errno = 0;
+	probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
+		   ifindex);
+
+	return errno != E2BIG && errno != EINVAL;
+}
diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
new file mode 100644
index 000000000..59c779c57
--- /dev/null
+++ b/tools/lib/bpf/libbpf_util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __LIBBPF_LIBBPF_UTIL_H
+#define __LIBBPF_LIBBPF_UTIL_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Use these barrier functions instead of smp_[rw]mb() when they are
+ * used in a libbpf header file. That way they can be built into the
+ * application that uses libbpf.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_rmb() asm volatile("" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("" : : : "memory")
+# define libbpf_smp_mb() \
+	asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
+/* Hinders stores to be observed before older loads. */
+# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+#elif defined(__aarch64__)
+# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#elif defined(__arm__)
+/* These are only valid for armv7 and above */
+# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#else
+/* Architecture missing native barrier functions. */
+# define libbpf_smp_rmb() __sync_synchronize()
+# define libbpf_smp_wmb() __sync_synchronize()
+# define libbpf_smp_mb() __sync_synchronize()
+# define libbpf_smp_rwmb() __sync_synchronize()
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
new file mode 100644
index 000000000..d2cb28e9e
--- /dev/null
+++ b/tools/lib/bpf/netlink.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <time.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "nlattr.h"
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
+			      void *cookie);
+
+struct xdp_id_md {
+	int ifindex;
+	__u32 flags;
+	struct xdp_link_info info;
+};
+
+static int libbpf_netlink_open(__u32 *nl_pid)
+{
+	struct sockaddr_nl sa;
+	socklen_t addrlen;
+	int one = 1, ret;
+	int sock;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+	if (sock < 0)
+		return -errno;
+
+	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+		       &one, sizeof(one)) < 0) {
+		pr_warn("Netlink error reporting not supported\n");
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	addrlen = sizeof(sa);
+	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	if (addrlen != sizeof(sa)) {
+		ret = -LIBBPF_ERRNO__INTERNAL;
+		goto cleanup;
+	}
+
+	*nl_pid = sa.nl_pid;
+	return sock;
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+			    __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+			    void *cookie)
+{
+	bool multipart = true;
+	struct nlmsgerr *err;
+	struct nlmsghdr *nh;
+	char buf[4096];
+	int len, ret;
+
+	while (multipart) {
+		multipart = false;
+		len = recv(sock, buf, sizeof(buf), 0);
+		if (len < 0) {
+			ret = -errno;
+			goto done;
+		}
+
+		if (len == 0)
+			break;
+
+		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+		     nh = NLMSG_NEXT(nh, len)) {
+			if (nh->nlmsg_pid != nl_pid) {
+				ret = -LIBBPF_ERRNO__WRNGPID;
+				goto done;
+			}
+			if (nh->nlmsg_seq != seq) {
+				ret = -LIBBPF_ERRNO__INVSEQ;
+				goto done;
+			}
+			if (nh->nlmsg_flags & NLM_F_MULTI)
+				multipart = true;
+			switch (nh->nlmsg_type) {
+			case NLMSG_ERROR:
+				err = (struct nlmsgerr *)NLMSG_DATA(nh);
+				if (!err->error)
+					continue;
+				ret = err->error;
+				libbpf_nla_dump_errormsg(nh);
+				goto done;
+			case NLMSG_DONE:
+				return 0;
+			default:
+				break;
+			}
+			if (_fn) {
+				ret = _fn(nh, fn, cookie);
+				if (ret)
+					return ret;
+			}
+		}
+	}
+	ret = 0;
+done:
+	return ret;
+}
+
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+					 __u32 flags)
+{
+	int sock, seq = 0, ret;
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	__u32 nl_pid = 0;
+
+	sock = libbpf_netlink_open(&nl_pid);
+	if (sock < 0)
+		return sock;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+
+	/* started nested attribute for XDP */
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+	nla->nla_len = NLA_HDRLEN;
+
+	/* add XDP fd */
+	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+	nla_xdp->nla_type = IFLA_XDP_FD;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len += nla_xdp->nla_len;
+
+	/* if user passed in any flags, add those too */
+	if (flags) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = IFLA_XDP_FLAGS;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	if (flags & XDP_FLAGS_REPLACE) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+	ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+			     const struct bpf_xdp_set_link_opts *opts)
+{
+	int old_fd = -1;
+
+	if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
+		return -EINVAL;
+
+	if (OPTS_HAS(opts, old_fd)) {
+		old_fd = OPTS_GET(opts, old_fd, -1);
+		flags |= XDP_FLAGS_REPLACE;
+	}
+
+	return __bpf_set_link_xdp_fd_replace(ifindex, fd,
+					     old_fd,
+					     flags);
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct nlattr *tb[IFLA_MAX + 1], *attr;
+	struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+	attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+	if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
+{
+	struct nlattr *xdp_tb[IFLA_XDP_MAX + 1];
+	struct xdp_id_md *xdp_id = cookie;
+	struct ifinfomsg *ifinfo = msg;
+	int ret;
+
+	if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index)
+		return 0;
+
+	if (!tb[IFLA_XDP])
+		return 0;
+
+	ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL);
+	if (ret)
+		return ret;
+
+	if (!xdp_tb[IFLA_XDP_ATTACHED])
+		return 0;
+
+	xdp_id->info.attach_mode = libbpf_nla_getattr_u8(
+		xdp_tb[IFLA_XDP_ATTACHED]);
+
+	if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE)
+		return 0;
+
+	if (xdp_tb[IFLA_XDP_PROG_ID])
+		xdp_id->info.prog_id = libbpf_nla_getattr_u32(
+			xdp_tb[IFLA_XDP_PROG_ID]);
+
+	if (xdp_tb[IFLA_XDP_SKB_PROG_ID])
+		xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32(
+			xdp_tb[IFLA_XDP_SKB_PROG_ID]);
+
+	if (xdp_tb[IFLA_XDP_DRV_PROG_ID])
+		xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32(
+			xdp_tb[IFLA_XDP_DRV_PROG_ID]);
+
+	if (xdp_tb[IFLA_XDP_HW_PROG_ID])
+		xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32(
+			xdp_tb[IFLA_XDP_HW_PROG_ID]);
+
+	return 0;
+}
+
+static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+			      libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+			  size_t info_size, __u32 flags)
+{
+	struct xdp_id_md xdp_id = {};
+	int sock, ret;
+	__u32 nl_pid = 0;
+	__u32 mask;
+
+	if (flags & ~XDP_FLAGS_MASK || !info_size)
+		return -EINVAL;
+
+	/* Check whether the single {HW,DRV,SKB} mode is set */
+	flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
+	mask = flags - 1;
+	if (flags && flags & mask)
+		return -EINVAL;
+
+	sock = libbpf_netlink_open(&nl_pid);
+	if (sock < 0)
+		return sock;
+
+	xdp_id.ifindex = ifindex;
+	xdp_id.flags = flags;
+
+	ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id);
+	if (!ret) {
+		size_t sz = min(info_size, sizeof(xdp_id.info));
+
+		memcpy(info, &xdp_id.info, sz);
+		memset((void *) info + sz, 0, info_size - sz);
+	}
+
+	close(sock);
+	return ret;
+}
+
+static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+{
+	flags &= XDP_FLAGS_MODES;
+
+	if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
+		return info->prog_id;
+	if (flags & XDP_FLAGS_DRV_MODE)
+		return info->drv_prog_id;
+	if (flags & XDP_FLAGS_HW_MODE)
+		return info->hw_prog_id;
+	if (flags & XDP_FLAGS_SKB_MODE)
+		return info->skb_prog_id;
+
+	return 0;
+}
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+	struct xdp_link_info info;
+	int ret;
+
+	ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
+	if (!ret)
+		*prog_id = get_xdp_id(&info, flags);
+
+	return ret;
+}
+
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct ifinfomsg ifm;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+		.nlh.nlmsg_type = RTM_GETLINK,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.ifm.ifi_family = AF_PACKET,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+				dump_link_nlmsg, cookie);
+}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
new file mode 100644
index 000000000..1a04299a2
--- /dev/null
+++ b/tools/lib/bpf/nlattr.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * NETLINK      Netlink attributes
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <linux/rtnetlink.h>
+#include "nlattr.h"
+#include "libbpf_internal.h"
+
+static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
+	[LIBBPF_NLA_U8]		= sizeof(uint8_t),
+	[LIBBPF_NLA_U16]	= sizeof(uint16_t),
+	[LIBBPF_NLA_U32]	= sizeof(uint32_t),
+	[LIBBPF_NLA_U64]	= sizeof(uint64_t),
+	[LIBBPF_NLA_STRING]	= 1,
+	[LIBBPF_NLA_FLAG]	= 0,
+};
+
+static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+	int totlen = NLA_ALIGN(nla->nla_len);
+
+	*remaining -= totlen;
+	return (struct nlattr *) ((char *) nla + totlen);
+}
+
+static int nla_ok(const struct nlattr *nla, int remaining)
+{
+	return remaining >= sizeof(*nla) &&
+	       nla->nla_len >= sizeof(*nla) &&
+	       nla->nla_len <= remaining;
+}
+
+static int nla_type(const struct nlattr *nla)
+{
+	return nla->nla_type & NLA_TYPE_MASK;
+}
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+			struct libbpf_nla_policy *policy)
+{
+	struct libbpf_nla_policy *pt;
+	unsigned int minlen = 0;
+	int type = nla_type(nla);
+
+	if (type < 0 || type > maxtype)
+		return 0;
+
+	pt = &policy[type];
+
+	if (pt->type > LIBBPF_NLA_TYPE_MAX)
+		return 0;
+
+	if (pt->minlen)
+		minlen = pt->minlen;
+	else if (pt->type != LIBBPF_NLA_UNSPEC)
+		minlen = nla_attr_minlen[pt->type];
+
+	if (libbpf_nla_len(nla) < minlen)
+		return -1;
+
+	if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
+		return -1;
+
+	if (pt->type == LIBBPF_NLA_STRING) {
+		char *data = libbpf_nla_data(nla);
+
+		if (data[libbpf_nla_len(nla) - 1] != '\0')
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+	return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * Create attribute index based on a stream of attributes.
+ * @arg tb		Index array to be filled (maxtype+1 elements).
+ * @arg maxtype		Maximum attribute type expected and accepted.
+ * @arg head		Head of attribute stream.
+ * @arg len		Length of attribute stream.
+ * @arg policy		Attribute validation policy.
+ *
+ * Iterates over the stream of attributes and stores a pointer to each
+ * attribute in the index array using the attribute type as index to
+ * the array. Attribute with a type greater than the maximum type
+ * specified will be silently ignored in order to maintain backwards
+ * compatibility. If \a policy is not NULL, the attribute will be
+ * validated using the specified policy.
+ *
+ * @see nla_validate
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+		     int len, struct libbpf_nla_policy *policy)
+{
+	struct nlattr *nla;
+	int rem, err;
+
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+	libbpf_nla_for_each_attr(nla, head, len, rem) {
+		int type = nla_type(nla);
+
+		if (type > maxtype)
+			continue;
+
+		if (policy) {
+			err = validate_nla(nla, maxtype, policy);
+			if (err < 0)
+				goto errout;
+		}
+
+		if (tb[type])
+			pr_warn("Attribute of type %#x found multiple times in message, "
+				"previous attribute is being ignored.\n", type);
+
+		tb[type] = nla;
+	}
+
+	err = 0;
+errout:
+	return err;
+}
+
+/**
+ * Create attribute index based on nested attribute
+ * @arg tb              Index array to be filled (maxtype+1 elements).
+ * @arg maxtype         Maximum attribute type expected and accepted.
+ * @arg nla             Nested Attribute.
+ * @arg policy          Attribute validation policy.
+ *
+ * Feeds the stream of attributes nested into the specified attribute
+ * to libbpf_nla_parse().
+ *
+ * @see libbpf_nla_parse
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+			    struct nlattr *nla,
+			    struct libbpf_nla_policy *policy)
+{
+	return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla),
+				libbpf_nla_len(nla), policy);
+}
+
+/* dump netlink extended ack error message */
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
+{
+	struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+		[NLMSGERR_ATTR_MSG]	= { .type = LIBBPF_NLA_STRING },
+		[NLMSGERR_ATTR_OFFS]	= { .type = LIBBPF_NLA_U32 },
+	};
+	struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
+	struct nlmsgerr *err;
+	char *errmsg = NULL;
+	int hlen, alen;
+
+	/* no TLVs, nothing to do here */
+	if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+		return 0;
+
+	err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+	hlen = sizeof(*err);
+
+	/* if NLM_F_CAPPED is set then the inner err msg was capped */
+	if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+		hlen += nlmsg_len(&err->msg);
+
+	attr = (struct nlattr *) ((void *) err + hlen);
+	alen = (void *)nlh + nlh->nlmsg_len - (void *)attr;
+
+	if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
+			     extack_policy) != 0) {
+		pr_warn("Failed to parse extended error attributes\n");
+		return 0;
+	}
+
+	if (tb[NLMSGERR_ATTR_MSG])
+		errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]);
+
+	pr_warn("Kernel error message: %s\n", errmsg);
+
+	return 0;
+}
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
new file mode 100644
index 000000000..6cc3ac916
--- /dev/null
+++ b/tools/lib/bpf/nlattr.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * NETLINK      Netlink attributes
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#ifndef __LIBBPF_NLATTR_H
+#define __LIBBPF_NLATTR_H
+
+#include <stdint.h>
+#include <linux/netlink.h>
+/* avoid multiple definition of netlink features */
+#define __LINUX_NETLINK_H
+
+/**
+ * Standard attribute types to specify validation policy
+ */
+enum {
+	LIBBPF_NLA_UNSPEC,	/**< Unspecified type, binary data chunk */
+	LIBBPF_NLA_U8,		/**< 8 bit integer */
+	LIBBPF_NLA_U16,		/**< 16 bit integer */
+	LIBBPF_NLA_U32,		/**< 32 bit integer */
+	LIBBPF_NLA_U64,		/**< 64 bit integer */
+	LIBBPF_NLA_STRING,	/**< NUL terminated character string */
+	LIBBPF_NLA_FLAG,	/**< Flag */
+	LIBBPF_NLA_MSECS,	/**< Micro seconds (64bit) */
+	LIBBPF_NLA_NESTED,	/**< Nested attributes */
+	__LIBBPF_NLA_TYPE_MAX,
+};
+
+#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1)
+
+/**
+ * @ingroup attr
+ * Attribute validation policy.
+ *
+ * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
+ */
+struct libbpf_nla_policy {
+	/** Type of attribute or LIBBPF_NLA_UNSPEC */
+	uint16_t	type;
+
+	/** Minimal length of payload required */
+	uint16_t	minlen;
+
+	/** Maximal length of payload allowed */
+	uint16_t	maxlen;
+};
+
+/**
+ * @ingroup attr
+ * Iterate over a stream of attributes
+ * @arg pos	loop counter, set to current attribute
+ * @arg head	head of attribute stream
+ * @arg len	length of attribute stream
+ * @arg rem	initialized to len, holds bytes currently remaining in stream
+ */
+#define libbpf_nla_for_each_attr(pos, head, len, rem) \
+	for (pos = head, rem = len; \
+	     nla_ok(pos, rem); \
+	     pos = nla_next(pos, &(rem)))
+
+/**
+ * libbpf_nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *libbpf_nla_data(const struct nlattr *nla)
+{
+	return (char *) nla + NLA_HDRLEN;
+}
+
+static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
+{
+	return *(uint8_t *)libbpf_nla_data(nla);
+}
+
+static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla)
+{
+	return *(uint32_t *)libbpf_nla_data(nla);
+}
+
+static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla)
+{
+	return (const char *)libbpf_nla_data(nla);
+}
+
+/**
+ * libbpf_nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int libbpf_nla_len(const struct nlattr *nla)
+{
+	return nla->nla_len - NLA_HDRLEN;
+}
+
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+		     int len, struct libbpf_nla_policy *policy);
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+			    struct nlattr *nla,
+			    struct libbpf_nla_policy *policy);
+
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
+
+#endif /* __LIBBPF_NLATTR_H */
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
new file mode 100644
index 000000000..5e242be45
--- /dev/null
+++ b/tools/lib/bpf/ringbuf.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Ring buffer operations.
+ *
+ * Copyright (C) 2020 Facebook, Inc.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include <asm/barrier.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "bpf.h"
+
+struct ring {
+	ring_buffer_sample_fn sample_cb;
+	void *ctx;
+	void *data;
+	unsigned long *consumer_pos;
+	unsigned long *producer_pos;
+	unsigned long mask;
+	int map_fd;
+};
+
+struct ring_buffer {
+	struct epoll_event *events;
+	struct ring *rings;
+	size_t page_size;
+	int epoll_fd;
+	int ring_cnt;
+};
+
+static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+{
+	if (r->consumer_pos) {
+		munmap(r->consumer_pos, rb->page_size);
+		r->consumer_pos = NULL;
+	}
+	if (r->producer_pos) {
+		munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
+		r->producer_pos = NULL;
+	}
+}
+
+/* Add extra RINGBUF maps to this ring buffer manager */
+int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+		     ring_buffer_sample_fn sample_cb, void *ctx)
+{
+	struct bpf_map_info info;
+	__u32 len = sizeof(info);
+	struct epoll_event *e;
+	struct ring *r;
+	__u64 mmap_sz;
+	void *tmp;
+	int err;
+
+	memset(&info, 0, sizeof(info));
+
+	err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+	if (err) {
+		err = -errno;
+		pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+
+	if (info.type != BPF_MAP_TYPE_RINGBUF) {
+		pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
+			map_fd);
+		return -EINVAL;
+	}
+
+	tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+	if (!tmp)
+		return -ENOMEM;
+	rb->rings = tmp;
+
+	tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+	if (!tmp)
+		return -ENOMEM;
+	rb->events = tmp;
+
+	r = &rb->rings[rb->ring_cnt];
+	memset(r, 0, sizeof(*r));
+
+	r->map_fd = map_fd;
+	r->sample_cb = sample_cb;
+	r->ctx = ctx;
+	r->mask = info.max_entries - 1;
+
+	/* Map writable consumer page */
+	tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+	r->consumer_pos = tmp;
+
+	/* Map read-only producer page and data pages. We map twice as big
+	 * data size to allow simple reading of samples that wrap around the
+	 * end of a ring buffer. See kernel implementation for details.
+	 * */
+	mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
+	if (mmap_sz != (__u64)(size_t)mmap_sz) {
+		pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
+		return -E2BIG;
+	}
+	tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		ringbuf_unmap_ring(rb, r);
+		pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+	r->producer_pos = tmp;
+	r->data = tmp + rb->page_size;
+
+	e = &rb->events[rb->ring_cnt];
+	memset(e, 0, sizeof(*e));
+
+	e->events = EPOLLIN;
+	e->data.fd = rb->ring_cnt;
+	if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
+		err = -errno;
+		ringbuf_unmap_ring(rb, r);
+		pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+
+	rb->ring_cnt++;
+	return 0;
+}
+
+void ring_buffer__free(struct ring_buffer *rb)
+{
+	int i;
+
+	if (!rb)
+		return;
+
+	for (i = 0; i < rb->ring_cnt; ++i)
+		ringbuf_unmap_ring(rb, &rb->rings[i]);
+	if (rb->epoll_fd >= 0)
+		close(rb->epoll_fd);
+
+	free(rb->events);
+	free(rb->rings);
+	free(rb);
+}
+
+struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+		 const struct ring_buffer_opts *opts)
+{
+	struct ring_buffer *rb;
+	int err;
+
+	if (!OPTS_VALID(opts, ring_buffer_opts))
+		return NULL;
+
+	rb = calloc(1, sizeof(*rb));
+	if (!rb)
+		return NULL;
+
+	rb->page_size = getpagesize();
+
+	rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (rb->epoll_fd < 0) {
+		err = -errno;
+		pr_warn("ringbuf: failed to create epoll instance: %d\n", err);
+		goto err_out;
+	}
+
+	err = ring_buffer__add(rb, map_fd, sample_cb, ctx);
+	if (err)
+		goto err_out;
+
+	return rb;
+
+err_out:
+	ring_buffer__free(rb);
+	return NULL;
+}
+
+static inline int roundup_len(__u32 len)
+{
+	/* clear out top 2 bits (discard and busy, if set) */
+	len <<= 2;
+	len >>= 2;
+	/* add length prefix */
+	len += BPF_RINGBUF_HDR_SZ;
+	/* round up to 8 byte alignment */
+	return (len + 7) / 8 * 8;
+}
+
+static int64_t ringbuf_process_ring(struct ring* r)
+{
+	int *len_ptr, len, err;
+	/* 64-bit to avoid overflow in case of extreme application behavior */
+	int64_t cnt = 0;
+	unsigned long cons_pos, prod_pos;
+	bool got_new_data;
+	void *sample;
+
+	cons_pos = smp_load_acquire(r->consumer_pos);
+	do {
+		got_new_data = false;
+		prod_pos = smp_load_acquire(r->producer_pos);
+		while (cons_pos < prod_pos) {
+			len_ptr = r->data + (cons_pos & r->mask);
+			len = smp_load_acquire(len_ptr);
+
+			/* sample not committed yet, bail out for now */
+			if (len & BPF_RINGBUF_BUSY_BIT)
+				goto done;
+
+			got_new_data = true;
+			cons_pos += roundup_len(len);
+
+			if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
+				sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
+				err = r->sample_cb(r->ctx, sample, len);
+				if (err < 0) {
+					/* update consumer pos and bail out */
+					smp_store_release(r->consumer_pos,
+							  cons_pos);
+					return err;
+				}
+				cnt++;
+			}
+
+			smp_store_release(r->consumer_pos, cons_pos);
+		}
+	} while (got_new_data);
+done:
+	return cnt;
+}
+
+/* Consume available ring buffer(s) data without event polling.
+ * Returns number of records consumed across all registered ring buffers (or
+ * INT_MAX, whichever is less), or negative number if any of the callbacks
+ * return error.
+ */
+int ring_buffer__consume(struct ring_buffer *rb)
+{
+	int64_t err, res = 0;
+	int i;
+
+	for (i = 0; i < rb->ring_cnt; i++) {
+		struct ring *ring = &rb->rings[i];
+
+		err = ringbuf_process_ring(ring);
+		if (err < 0)
+			return err;
+		res += err;
+	}
+	if (res > INT_MAX)
+		return INT_MAX;
+	return res;
+}
+
+/* Poll for available data and consume records, if any are available.
+ * Returns number of records consumed (or INT_MAX, whichever is less), or
+ * negative number, if any of the registered callbacks returned error.
+ */
+int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
+{
+	int i, cnt;
+	int64_t err, res = 0;
+
+	cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+	if (cnt < 0)
+		return -errno;
+
+	for (i = 0; i < cnt; i++) {
+		__u32 ring_id = rb->events[i].data.fd;
+		struct ring *ring = &rb->rings[ring_id];
+
+		err = ringbuf_process_ring(ring);
+		if (err < 0)
+			return err;
+		res += err;
+	}
+	if (res > INT_MAX)
+		return INT_MAX;
+	return res;
+}
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
new file mode 100644
index 000000000..146da0197
--- /dev/null
+++ b/tools/lib/bpf/str_error.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include "str_error.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/*
+ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
+ * libc, while checking strerror_r() return to avoid having to check this in
+ * all places calling it.
+ */
+char *libbpf_strerror_r(int err, char *dst, int len)
+{
+	int ret = strerror_r(err < 0 ? -err : err, dst, len);
+	if (ret)
+		snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
+	return dst;
+}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
new file mode 100644
index 000000000..a139334d5
--- /dev/null
+++ b/tools/lib/bpf/str_error.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBBPF_STR_ERROR_H
+#define __LIBBPF_STR_ERROR_H
+
+char *libbpf_strerror_r(int err, char *dst, int len);
+#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
new file mode 100644
index 000000000..fa1f8faf7
--- /dev/null
+++ b/tools/lib/bpf/xsk.c
@@ -0,0 +1,959 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "xsk.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+struct xsk_umem {
+	struct xsk_ring_prod *fill_save;
+	struct xsk_ring_cons *comp_save;
+	char *umem_area;
+	struct xsk_umem_config config;
+	int fd;
+	int refcount;
+	struct list_head ctx_list;
+	bool rx_ring_setup_done;
+	bool tx_ring_setup_done;
+};
+
+struct xsk_ctx {
+	struct xsk_ring_prod *fill;
+	struct xsk_ring_cons *comp;
+	__u32 queue_id;
+	struct xsk_umem *umem;
+	int refcount;
+	int ifindex;
+	struct list_head list;
+	int prog_fd;
+	int xsks_map_fd;
+	char ifname[IFNAMSIZ];
+};
+
+struct xsk_socket {
+	struct xsk_ring_cons *rx;
+	struct xsk_ring_prod *tx;
+	__u64 outstanding_tx;
+	struct xsk_ctx *ctx;
+	struct xsk_socket_config config;
+	int fd;
+};
+
+struct xsk_nl_info {
+	bool xdp_prog_attached;
+	int ifindex;
+	int fd;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_ring_offset_v1 {
+	__u64 producer;
+	__u64 consumer;
+	__u64 desc;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_mmap_offsets_v1 {
+	struct xdp_ring_offset_v1 rx;
+	struct xdp_ring_offset_v1 tx;
+	struct xdp_ring_offset_v1 fr;
+	struct xdp_ring_offset_v1 cr;
+};
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+	return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+	return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+	unsigned long addr = (unsigned long)buffer;
+
+	return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+				const struct xsk_umem_config *usr_cfg)
+{
+	if (!usr_cfg) {
+		cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+		cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+		cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+		cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+		cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+		return;
+	}
+
+	cfg->fill_size = usr_cfg->fill_size;
+	cfg->comp_size = usr_cfg->comp_size;
+	cfg->frame_size = usr_cfg->frame_size;
+	cfg->frame_headroom = usr_cfg->frame_headroom;
+	cfg->flags = usr_cfg->flags;
+}
+
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+				     const struct xsk_socket_config *usr_cfg)
+{
+	if (!usr_cfg) {
+		cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+		cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+		cfg->libbpf_flags = 0;
+		cfg->xdp_flags = 0;
+		cfg->bind_flags = 0;
+		return 0;
+	}
+
+	if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
+		return -EINVAL;
+
+	cfg->rx_size = usr_cfg->rx_size;
+	cfg->tx_size = usr_cfg->tx_size;
+	cfg->libbpf_flags = usr_cfg->libbpf_flags;
+	cfg->xdp_flags = usr_cfg->xdp_flags;
+	cfg->bind_flags = usr_cfg->bind_flags;
+
+	return 0;
+}
+
+static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
+{
+	struct xdp_mmap_offsets_v1 off_v1;
+
+	/* getsockopt on a kernel <= 5.3 has no flags fields.
+	 * Copy over the offsets to the correct places in the >=5.4 format
+	 * and put the flags where they would have been on that kernel.
+	 */
+	memcpy(&off_v1, off, sizeof(off_v1));
+
+	off->rx.producer = off_v1.rx.producer;
+	off->rx.consumer = off_v1.rx.consumer;
+	off->rx.desc = off_v1.rx.desc;
+	off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
+
+	off->tx.producer = off_v1.tx.producer;
+	off->tx.consumer = off_v1.tx.consumer;
+	off->tx.desc = off_v1.tx.desc;
+	off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
+
+	off->fr.producer = off_v1.fr.producer;
+	off->fr.consumer = off_v1.fr.consumer;
+	off->fr.desc = off_v1.fr.desc;
+	off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
+
+	off->cr.producer = off_v1.cr.producer;
+	off->cr.consumer = off_v1.cr.consumer;
+	off->cr.desc = off_v1.cr.desc;
+	off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+	socklen_t optlen;
+	int err;
+
+	optlen = sizeof(*off);
+	err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+	if (err)
+		return err;
+
+	if (optlen == sizeof(*off))
+		return 0;
+
+	if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
+		xsk_mmap_offsets_v1(off);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+				 struct xsk_ring_prod *fill,
+				 struct xsk_ring_cons *comp)
+{
+	struct xdp_mmap_offsets off;
+	void *map;
+	int err;
+
+	err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+			 &umem->config.fill_size,
+			 sizeof(umem->config.fill_size));
+	if (err)
+		return -errno;
+
+	err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+			 &umem->config.comp_size,
+			 sizeof(umem->config.comp_size));
+	if (err)
+		return -errno;
+
+	err = xsk_get_mmap_offsets(fd, &off);
+	if (err)
+		return -errno;
+
+	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+		   XDP_UMEM_PGOFF_FILL_RING);
+	if (map == MAP_FAILED)
+		return -errno;
+
+	fill->mask = umem->config.fill_size - 1;
+	fill->size = umem->config.fill_size;
+	fill->producer = map + off.fr.producer;
+	fill->consumer = map + off.fr.consumer;
+	fill->flags = map + off.fr.flags;
+	fill->ring = map + off.fr.desc;
+	fill->cached_cons = umem->config.fill_size;
+
+	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+		   XDP_UMEM_PGOFF_COMPLETION_RING);
+	if (map == MAP_FAILED) {
+		err = -errno;
+		goto out_mmap;
+	}
+
+	comp->mask = umem->config.comp_size - 1;
+	comp->size = umem->config.comp_size;
+	comp->producer = map + off.cr.producer;
+	comp->consumer = map + off.cr.consumer;
+	comp->flags = map + off.cr.flags;
+	comp->ring = map + off.cr.desc;
+
+	return 0;
+
+out_mmap:
+	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+	return err;
+}
+
+int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
+			    __u64 size, struct xsk_ring_prod *fill,
+			    struct xsk_ring_cons *comp,
+			    const struct xsk_umem_config *usr_config)
+{
+	struct xdp_umem_reg mr;
+	struct xsk_umem *umem;
+	int err;
+
+	if (!umem_area || !umem_ptr || !fill || !comp)
+		return -EFAULT;
+	if (!size && !xsk_page_aligned(umem_area))
+		return -EINVAL;
+
+	umem = calloc(1, sizeof(*umem));
+	if (!umem)
+		return -ENOMEM;
+
+	umem->fd = socket(AF_XDP, SOCK_RAW, 0);
+	if (umem->fd < 0) {
+		err = -errno;
+		goto out_umem_alloc;
+	}
+
+	umem->umem_area = umem_area;
+	INIT_LIST_HEAD(&umem->ctx_list);
+	xsk_set_umem_config(&umem->config, usr_config);
+
+	memset(&mr, 0, sizeof(mr));
+	mr.addr = (uintptr_t)umem_area;
+	mr.len = size;
+	mr.chunk_size = umem->config.frame_size;
+	mr.headroom = umem->config.frame_headroom;
+	mr.flags = umem->config.flags;
+
+	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+	if (err) {
+		err = -errno;
+		goto out_socket;
+	}
+
+	err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+	if (err)
+		goto out_socket;
+
+	umem->fill_save = fill;
+	umem->comp_save = comp;
+	*umem_ptr = umem;
+	return 0;
+
+out_socket:
+	close(umem->fd);
+out_umem_alloc:
+	free(umem);
+	return err;
+}
+
+struct xsk_umem_config_v1 {
+	__u32 fill_size;
+	__u32 comp_size;
+	__u32 frame_size;
+	__u32 frame_headroom;
+};
+
+int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
+			    __u64 size, struct xsk_ring_prod *fill,
+			    struct xsk_ring_cons *comp,
+			    const struct xsk_umem_config *usr_config)
+{
+	struct xsk_umem_config config;
+
+	memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
+	config.flags = 0;
+
+	return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
+					&config);
+}
+COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
+DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
+
+static int xsk_load_xdp_prog(struct xsk_socket *xsk)
+{
+	static const int log_buf_size = 16 * 1024;
+	struct xsk_ctx *ctx = xsk->ctx;
+	char log_buf[log_buf_size];
+	int err, prog_fd;
+
+	/* This is the C-program:
+	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+	 * {
+	 *     int ret, index = ctx->rx_queue_index;
+	 *
+	 *     // A set entry here means that the correspnding queue_id
+	 *     // has an active AF_XDP socket bound to it.
+	 *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
+	 *     if (ret > 0)
+	 *         return ret;
+	 *
+	 *     // Fallback for pre-5.3 kernels, not supporting default
+	 *     // action in the flags parameter.
+	 *     if (bpf_map_lookup_elem(&xsks_map, &index))
+	 *         return bpf_redirect_map(&xsks_map, index, 0);
+	 *     return XDP_PASS;
+	 * }
+	 */
+	struct bpf_insn prog[] = {
+		/* r2 = *(u32 *)(r1 + 16) */
+		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
+		/* *(u32 *)(r10 - 4) = r2 */
+		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
+		/* r1 = xskmap[] */
+		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		/* r3 = XDP_PASS */
+		BPF_MOV64_IMM(BPF_REG_3, 2),
+		/* call bpf_redirect_map */
+		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+		/* if w0 != 0 goto pc+13 */
+		BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
+		/* r2 = r10 */
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		/* r2 += -4 */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		/* r1 = xskmap[] */
+		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		/* call bpf_map_lookup_elem */
+		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+		/* r1 = r0 */
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		/* r0 = XDP_PASS */
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		/* if r1 == 0 goto pc+5 */
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+		/* r2 = *(u32 *)(r10 - 4) */
+		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
+		/* r1 = xskmap[] */
+		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		/* r3 = 0 */
+		BPF_MOV64_IMM(BPF_REG_3, 0),
+		/* call bpf_redirect_map */
+		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+		/* The jumps are to this instruction */
+		BPF_EXIT_INSN(),
+	};
+	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+	prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
+				   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
+				   log_buf_size);
+	if (prog_fd < 0) {
+		pr_warn("BPF log buffer:\n%s", log_buf);
+		return prog_fd;
+	}
+
+	err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
+				  xsk->config.xdp_flags);
+	if (err) {
+		close(prog_fd);
+		return err;
+	}
+
+	ctx->prog_fd = prog_fd;
+	return 0;
+}
+
+static int xsk_get_max_queues(struct xsk_socket *xsk)
+{
+	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+	struct xsk_ctx *ctx = xsk->ctx;
+	struct ifreq ifr = {};
+	int fd, err, ret;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd < 0)
+		return -errno;
+
+	ifr.ifr_data = (void *)&channels;
+	memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
+	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+	err = ioctl(fd, SIOCETHTOOL, &ifr);
+	if (err && errno != EOPNOTSUPP) {
+		ret = -errno;
+		goto out;
+	}
+
+	if (err) {
+		/* If the device says it has no channels, then all traffic
+		 * is sent to a single stream, so max queues = 1.
+		 */
+		ret = 1;
+	} else {
+		/* Take the max of rx, tx, combined. Drivers return
+		 * the number of channels in different ways.
+		 */
+		ret = max(channels.max_rx, channels.max_tx);
+		ret = max(ret, (int)channels.max_combined);
+	}
+
+out:
+	close(fd);
+	return ret;
+}
+
+static int xsk_create_bpf_maps(struct xsk_socket *xsk)
+{
+	struct xsk_ctx *ctx = xsk->ctx;
+	int max_queues;
+	int fd;
+
+	max_queues = xsk_get_max_queues(xsk);
+	if (max_queues < 0)
+		return max_queues;
+
+	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
+				 sizeof(int), sizeof(int), max_queues, 0);
+	if (fd < 0)
+		return fd;
+
+	ctx->xsks_map_fd = fd;
+
+	return 0;
+}
+
+static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
+{
+	struct xsk_ctx *ctx = xsk->ctx;
+
+	bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
+	close(ctx->xsks_map_fd);
+}
+
+static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
+{
+	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+	__u32 map_len = sizeof(struct bpf_map_info);
+	struct bpf_prog_info prog_info = {};
+	struct xsk_ctx *ctx = xsk->ctx;
+	struct bpf_map_info map_info;
+	int fd, err;
+
+	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
+	if (err)
+		return err;
+
+	num_maps = prog_info.nr_map_ids;
+
+	map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
+	if (!map_ids)
+		return -ENOMEM;
+
+	memset(&prog_info, 0, prog_len);
+	prog_info.nr_map_ids = num_maps;
+	prog_info.map_ids = (__u64)(unsigned long)map_ids;
+
+	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
+	if (err)
+		goto out_map_ids;
+
+	ctx->xsks_map_fd = -1;
+
+	for (i = 0; i < prog_info.nr_map_ids; i++) {
+		fd = bpf_map_get_fd_by_id(map_ids[i]);
+		if (fd < 0)
+			continue;
+
+		memset(&map_info, 0, map_len);
+		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
+		if (err) {
+			close(fd);
+			continue;
+		}
+
+		if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
+			ctx->xsks_map_fd = fd;
+			break;
+		}
+
+		close(fd);
+	}
+
+	err = 0;
+	if (ctx->xsks_map_fd == -1)
+		err = -ENOENT;
+
+out_map_ids:
+	free(map_ids);
+	return err;
+}
+
+static int xsk_set_bpf_maps(struct xsk_socket *xsk)
+{
+	struct xsk_ctx *ctx = xsk->ctx;
+
+	return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
+				   &xsk->fd, 0);
+}
+
+static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+{
+	struct xsk_ctx *ctx = xsk->ctx;
+	__u32 prog_id = 0;
+	int err;
+
+	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
+				  xsk->config.xdp_flags);
+	if (err)
+		return err;
+
+	if (!prog_id) {
+		err = xsk_create_bpf_maps(xsk);
+		if (err)
+			return err;
+
+		err = xsk_load_xdp_prog(xsk);
+		if (err) {
+			xsk_delete_bpf_maps(xsk);
+			return err;
+		}
+	} else {
+		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+		if (ctx->prog_fd < 0)
+			return -errno;
+		err = xsk_lookup_bpf_maps(xsk);
+		if (err) {
+			close(ctx->prog_fd);
+			return err;
+		}
+	}
+
+	if (xsk->rx)
+		err = xsk_set_bpf_maps(xsk);
+	if (err) {
+		xsk_delete_bpf_maps(xsk);
+		close(ctx->prog_fd);
+		return err;
+	}
+
+	return 0;
+}
+
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+				   __u32 queue_id)
+{
+	struct xsk_ctx *ctx;
+
+	if (list_empty(&umem->ctx_list))
+		return NULL;
+
+	list_for_each_entry(ctx, &umem->ctx_list, list) {
+		if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+			ctx->refcount++;
+			return ctx;
+		}
+	}
+
+	return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
+{
+	struct xsk_umem *umem = ctx->umem;
+	struct xdp_mmap_offsets off;
+	int err;
+
+	if (--ctx->refcount)
+		return;
+
+	if (!unmap)
+		goto out_free;
+
+	err = xsk_get_mmap_offsets(umem->fd, &off);
+	if (err)
+		goto out_free;
+
+	munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
+	       sizeof(__u64));
+	munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
+	       sizeof(__u64));
+
+out_free:
+	list_del(&ctx->list);
+	free(ctx);
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+				      struct xsk_umem *umem, int ifindex,
+				      const char *ifname, __u32 queue_id,
+				      struct xsk_ring_prod *fill,
+				      struct xsk_ring_cons *comp)
+{
+	struct xsk_ctx *ctx;
+	int err;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx)
+		return NULL;
+
+	if (!umem->fill_save) {
+		err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+		if (err) {
+			free(ctx);
+			return NULL;
+		}
+	} else if (umem->fill_save != fill || umem->comp_save != comp) {
+		/* Copy over rings to new structs. */
+		memcpy(fill, umem->fill_save, sizeof(*fill));
+		memcpy(comp, umem->comp_save, sizeof(*comp));
+	}
+
+	ctx->ifindex = ifindex;
+	ctx->refcount = 1;
+	ctx->umem = umem;
+	ctx->queue_id = queue_id;
+	memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+	ctx->ifname[IFNAMSIZ - 1] = '\0';
+
+	ctx->fill = fill;
+	ctx->comp = comp;
+	list_add(&ctx->list, &umem->ctx_list);
+	return ctx;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+			      const char *ifname,
+			      __u32 queue_id, struct xsk_umem *umem,
+			      struct xsk_ring_cons *rx,
+			      struct xsk_ring_prod *tx,
+			      struct xsk_ring_prod *fill,
+			      struct xsk_ring_cons *comp,
+			      const struct xsk_socket_config *usr_config)
+{
+	bool unmap, rx_setup_done = false, tx_setup_done = false;
+	void *rx_map = NULL, *tx_map = NULL;
+	struct sockaddr_xdp sxdp = {};
+	struct xdp_mmap_offsets off;
+	struct xsk_socket *xsk;
+	struct xsk_ctx *ctx;
+	int err, ifindex;
+
+	if (!umem || !xsk_ptr || !(rx || tx))
+		return -EFAULT;
+
+	unmap = umem->fill_save != fill;
+
+	xsk = calloc(1, sizeof(*xsk));
+	if (!xsk)
+		return -ENOMEM;
+
+	err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+	if (err)
+		goto out_xsk_alloc;
+
+	xsk->outstanding_tx = 0;
+	ifindex = if_nametoindex(ifname);
+	if (!ifindex) {
+		err = -errno;
+		goto out_xsk_alloc;
+	}
+
+	if (umem->refcount++ > 0) {
+		xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
+		if (xsk->fd < 0) {
+			err = -errno;
+			goto out_xsk_alloc;
+		}
+	} else {
+		xsk->fd = umem->fd;
+		rx_setup_done = umem->rx_ring_setup_done;
+		tx_setup_done = umem->tx_ring_setup_done;
+	}
+
+	ctx = xsk_get_ctx(umem, ifindex, queue_id);
+	if (!ctx) {
+		if (!fill || !comp) {
+			err = -EFAULT;
+			goto out_socket;
+		}
+
+		ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
+				     fill, comp);
+		if (!ctx) {
+			err = -ENOMEM;
+			goto out_socket;
+		}
+	}
+	xsk->ctx = ctx;
+
+	if (rx && !rx_setup_done) {
+		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+				 &xsk->config.rx_size,
+				 sizeof(xsk->config.rx_size));
+		if (err) {
+			err = -errno;
+			goto out_put_ctx;
+		}
+		if (xsk->fd == umem->fd)
+			umem->rx_ring_setup_done = true;
+	}
+	if (tx && !tx_setup_done) {
+		err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+				 &xsk->config.tx_size,
+				 sizeof(xsk->config.tx_size));
+		if (err) {
+			err = -errno;
+			goto out_put_ctx;
+		}
+		if (xsk->fd == umem->fd)
+			umem->tx_ring_setup_done = true;
+	}
+
+	err = xsk_get_mmap_offsets(xsk->fd, &off);
+	if (err) {
+		err = -errno;
+		goto out_put_ctx;
+	}
+
+	if (rx) {
+		rx_map = mmap(NULL, off.rx.desc +
+			      xsk->config.rx_size * sizeof(struct xdp_desc),
+			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+			      xsk->fd, XDP_PGOFF_RX_RING);
+		if (rx_map == MAP_FAILED) {
+			err = -errno;
+			goto out_put_ctx;
+		}
+
+		rx->mask = xsk->config.rx_size - 1;
+		rx->size = xsk->config.rx_size;
+		rx->producer = rx_map + off.rx.producer;
+		rx->consumer = rx_map + off.rx.consumer;
+		rx->flags = rx_map + off.rx.flags;
+		rx->ring = rx_map + off.rx.desc;
+		rx->cached_prod = *rx->producer;
+		rx->cached_cons = *rx->consumer;
+	}
+	xsk->rx = rx;
+
+	if (tx) {
+		tx_map = mmap(NULL, off.tx.desc +
+			      xsk->config.tx_size * sizeof(struct xdp_desc),
+			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+			      xsk->fd, XDP_PGOFF_TX_RING);
+		if (tx_map == MAP_FAILED) {
+			err = -errno;
+			goto out_mmap_rx;
+		}
+
+		tx->mask = xsk->config.tx_size - 1;
+		tx->size = xsk->config.tx_size;
+		tx->producer = tx_map + off.tx.producer;
+		tx->consumer = tx_map + off.tx.consumer;
+		tx->flags = tx_map + off.tx.flags;
+		tx->ring = tx_map + off.tx.desc;
+		tx->cached_prod = *tx->producer;
+		/* cached_cons is r->size bigger than the real consumer pointer
+		 * See xsk_prod_nb_free
+		 */
+		tx->cached_cons = *tx->consumer + xsk->config.tx_size;
+	}
+	xsk->tx = tx;
+
+	sxdp.sxdp_family = PF_XDP;
+	sxdp.sxdp_ifindex = ctx->ifindex;
+	sxdp.sxdp_queue_id = ctx->queue_id;
+	if (umem->refcount > 1) {
+		sxdp.sxdp_flags |= XDP_SHARED_UMEM;
+		sxdp.sxdp_shared_umem_fd = umem->fd;
+	} else {
+		sxdp.sxdp_flags = xsk->config.bind_flags;
+	}
+
+	err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+	if (err) {
+		err = -errno;
+		goto out_mmap_tx;
+	}
+
+	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+		err = xsk_setup_xdp_prog(xsk);
+		if (err)
+			goto out_mmap_tx;
+	}
+
+	*xsk_ptr = xsk;
+	umem->fill_save = NULL;
+	umem->comp_save = NULL;
+	return 0;
+
+out_mmap_tx:
+	if (tx)
+		munmap(tx_map, off.tx.desc +
+		       xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+	if (rx)
+		munmap(rx_map, off.rx.desc +
+		       xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+	xsk_put_ctx(ctx, unmap);
+out_socket:
+	if (--umem->refcount)
+		close(xsk->fd);
+out_xsk_alloc:
+	free(xsk);
+	return err;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+		       __u32 queue_id, struct xsk_umem *umem,
+		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+		       const struct xsk_socket_config *usr_config)
+{
+	if (!umem)
+		return -EFAULT;
+
+	return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+					 rx, tx, umem->fill_save,
+					 umem->comp_save, usr_config);
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+	struct xdp_mmap_offsets off;
+	int err;
+
+	if (!umem)
+		return 0;
+
+	if (umem->refcount)
+		return -EBUSY;
+
+	err = xsk_get_mmap_offsets(umem->fd, &off);
+	if (!err && umem->fill_save && umem->comp_save) {
+		munmap(umem->fill_save->ring - off.fr.desc,
+		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
+		munmap(umem->comp_save->ring - off.cr.desc,
+		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
+	}
+
+	close(umem->fd);
+	free(umem);
+
+	return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+	size_t desc_sz = sizeof(struct xdp_desc);
+	struct xdp_mmap_offsets off;
+	struct xsk_umem *umem;
+	struct xsk_ctx *ctx;
+	int err;
+
+	if (!xsk)
+		return;
+
+	ctx = xsk->ctx;
+	umem = ctx->umem;
+
+	if (ctx->refcount == 1) {
+		xsk_delete_bpf_maps(xsk);
+		close(ctx->prog_fd);
+	}
+
+	xsk_put_ctx(ctx, true);
+
+	err = xsk_get_mmap_offsets(xsk->fd, &off);
+	if (!err) {
+		if (xsk->rx) {
+			munmap(xsk->rx->ring - off.rx.desc,
+			       off.rx.desc + xsk->config.rx_size * desc_sz);
+		}
+		if (xsk->tx) {
+			munmap(xsk->tx->ring - off.tx.desc,
+			       off.tx.desc + xsk->config.tx_size * desc_sz);
+		}
+	}
+
+	umem->refcount--;
+	/* Do not close an fd that also has an associated umem connected
+	 * to it.
+	 */
+	if (xsk->fd != umem->fd)
+		close(xsk->fd);
+	free(xsk);
+}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
new file mode 100644
index 000000000..1069c4636
--- /dev/null
+++ b/tools/lib/bpf/xsk.h
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __LIBBPF_XSK_H
+#define __LIBBPF_XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/if_xdp.h>
+
+#include "libbpf.h"
+#include "libbpf_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+	__u32 cached_prod; \
+	__u32 cached_cons; \
+	__u32 mask; \
+	__u32 size; \
+	__u32 *producer; \
+	__u32 *consumer; \
+	void *ring; \
+	__u32 *flags; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+					      __u32 idx)
+{
+	__u64 *addrs = (__u64 *)fill->ring;
+
+	return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+	const __u64 *addrs = (const __u64 *)comp->ring;
+
+	return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+						      __u32 idx)
+{
+	struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+	return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+	const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+	return &descs[idx & rx->mask];
+}
+
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+	return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+	__u32 free_entries = r->cached_cons - r->cached_prod;
+
+	if (free_entries >= nb)
+		return free_entries;
+
+	/* Refresh the local tail pointer.
+	 * cached_cons is r->size bigger than the real consumer pointer so
+	 * that this addition can be avoided in the more frequently
+	 * executed code that computs free_entries in the beginning of
+	 * this function. Without this optimization it whould have been
+	 * free_entries = r->cached_prod - r->cached_cons + r->size.
+	 */
+	r->cached_cons = *r->consumer + r->size;
+
+	return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+	__u32 entries = r->cached_prod - r->cached_cons;
+
+	if (entries == 0) {
+		r->cached_prod = *r->producer;
+		entries = r->cached_prod - r->cached_cons;
+	}
+
+	return (entries > nb) ? nb : entries;
+}
+
+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
+					    size_t nb, __u32 *idx)
+{
+	if (xsk_prod_nb_free(prod, nb) < nb)
+		return 0;
+
+	*idx = prod->cached_prod;
+	prod->cached_prod += nb;
+
+	return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
+{
+	/* Make sure everything has been written to the ring before indicating
+	 * this to the kernel by writing the producer pointer.
+	 */
+	libbpf_smp_wmb();
+
+	*prod->producer += nb;
+}
+
+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
+					 size_t nb, __u32 *idx)
+{
+	size_t entries = xsk_cons_nb_avail(cons, nb);
+
+	if (entries > 0) {
+		/* Make sure we do not speculatively read the data before
+		 * we have received the packet buffers from the ring.
+		 */
+		libbpf_smp_rmb();
+
+		*idx = cons->cached_cons;
+		cons->cached_cons += entries;
+	}
+
+	return entries;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
+{
+	/* Make sure data has been read before indicating we are done
+	 * with the entries by updating the consumer pointer.
+	 */
+	libbpf_smp_rwmb();
+
+	*cons->consumer += nb;
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+	return &((char *)umem_area)[addr];
+}
+
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+	return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+	return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+	return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
+
+struct xsk_umem_config {
+	__u32 fill_size;
+	__u32 comp_size;
+	__u32 frame_size;
+	__u32 frame_headroom;
+	__u32 flags;
+};
+
+/* Flags for the libbpf_flags field. */
+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
+
+struct xsk_socket_config {
+	__u32 rx_size;
+	__u32 tx_size;
+	__u32 libbpf_flags;
+	__u32 xdp_flags;
+	__u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
+				void *umem_area, __u64 size,
+				struct xsk_ring_prod *fill,
+				struct xsk_ring_cons *comp,
+				const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
+				       void *umem_area, __u64 size,
+				       struct xsk_ring_prod *fill,
+				       struct xsk_ring_cons *comp,
+				       const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
+				       void *umem_area, __u64 size,
+				       struct xsk_ring_prod *fill,
+				       struct xsk_ring_cons *comp,
+				       const struct xsk_umem_config *config);
+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
+				  const char *ifname, __u32 queue_id,
+				  struct xsk_umem *umem,
+				  struct xsk_ring_cons *rx,
+				  struct xsk_ring_prod *tx,
+				  const struct xsk_socket_config *config);
+LIBBPF_API int
+xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+			  const char *ifname,
+			  __u32 queue_id, struct xsk_umem *umem,
+			  struct xsk_ring_cons *rx,
+			  struct xsk_ring_prod *tx,
+			  struct xsk_ring_prod *fill,
+			  struct xsk_ring_cons *comp,
+			  const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_XSK_H */
diff --git a/tools/lib/ctype.c b/tools/lib/ctype.c
new file mode 100644
index 000000000..4d2e05fd3
--- /dev/null
+++ b/tools/lib/ctype.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/lib/ctype.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/ctype.h>
+#include <linux/compiler.h>
+
+const unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,			/* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,				/* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P,				/* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D,				/* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P,				/* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,		/* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U,				/* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U,				/* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P,				/* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,		/* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L,				/* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L,				/* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C,				/* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,	/* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,	/* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,	/* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};	/* 240-255 */
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
new file mode 100644
index 000000000..ac37022e9
--- /dev/null
+++ b/tools/lib/find_bit.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* bit search implementation
+ *
+ * Copied from lib/find_bit.c to tools/lib/find_bit.c
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+
+#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
+		!defined(find_next_and_bit)
+
+/*
+ * This is a common helper function for find_next_bit, find_next_zero_bit, and
+ * find_next_and_bit. The differences are:
+ *  - The "invert" argument, which is XORed with each fetched word before
+ *    searching it for one bits.
+ *  - The optional "addr2", which is anded with "addr1" if present.
+ */
+static inline unsigned long _find_next_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
+{
+	unsigned long tmp;
+
+	if (unlikely(start >= nbits))
+		return nbits;
+
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
+
+	/* Handle 1st word. */
+	tmp &= BITMAP_FIRST_WORD_MASK(start);
+	start = round_down(start, BITS_PER_LONG);
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
+	}
+
+	return min(start + __ffs(tmp), nbits);
+}
+#endif
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+	return _find_next_bit(addr, NULL, size, offset, 0UL);
+}
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx])
+			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
+	}
+
+	return size;
+}
+#endif
+
+#ifndef find_first_zero_bit
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx] != ~0UL)
+			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
+	}
+
+	return size;
+}
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+	return _find_next_bit(addr, NULL, size, offset, ~0UL);
+}
+#endif
+
+#ifndef find_next_and_bit
+unsigned long find_next_and_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long size,
+		unsigned long offset)
+{
+	return _find_next_bit(addr1, addr2, size, offset, 0UL);
+}
+#endif
diff --git a/tools/lib/hweight.c b/tools/lib/hweight.c
new file mode 100644
index 000000000..a16ebf515
--- /dev/null
+++ b/tools/lib/hweight.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+unsigned int __sw_hweight32(unsigned int w)
+{
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
+	w -= (w >> 1) & 0x55555555;
+	w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
+	w =  (w + (w >> 4)) & 0x0f0f0f0f;
+	return (w * 0x01010101) >> 24;
+#else
+	unsigned int res = w - ((w >> 1) & 0x55555555);
+	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+	res = (res + (res >> 4)) & 0x0F0F0F0F;
+	res = res + (res >> 8);
+	return (res + (res >> 16)) & 0x000000FF;
+#endif
+}
+
+unsigned int __sw_hweight16(unsigned int w)
+{
+	unsigned int res = w - ((w >> 1) & 0x5555);
+	res = (res & 0x3333) + ((res >> 2) & 0x3333);
+	res = (res + (res >> 4)) & 0x0F0F;
+	return (res + (res >> 8)) & 0x00FF;
+}
+
+unsigned int __sw_hweight8(unsigned int w)
+{
+	unsigned int res = w - ((w >> 1) & 0x55);
+	res = (res & 0x33) + ((res >> 2) & 0x33);
+	return (res + (res >> 4)) & 0x0F;
+}
+
+unsigned long __sw_hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+	return __sw_hweight32((unsigned int)(w >> 32)) +
+	       __sw_hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
+	w -= (w >> 1) & 0x5555555555555555ul;
+	w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
+	w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
+	return (w * 0x0101010101010101ul) >> 56;
+#else
+	__u64 res = w - ((w >> 1) & 0x5555555555555555ul);
+	res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+	res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
+	res = res + (res >> 8);
+	res = res + (res >> 16);
+	return (res + (res >> 32)) & 0x00000000000000FFul;
+#endif
+#endif
+}
diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore
new file mode 100644
index 000000000..6c308ac43
--- /dev/null
+++ b/tools/lib/lockdep/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+liblockdep.so.*
diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build
new file mode 100644
index 000000000..6f667355b
--- /dev/null
+++ b/tools/lib/lockdep/Build
@@ -0,0 +1 @@
+liblockdep-y += common.o lockdep.o preload.o rbtree.o
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
new file mode 100644
index 000000000..9dafb8cb7
--- /dev/null
+++ b/tools/lib/lockdep/Makefile
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: GPL-2.0
+# file format version
+FILE_VERSION = 1
+
+LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+prefix ?= /usr/local
+libdir_relative = lib
+libdir = $(prefix)/$(libdir_relative)
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+
+export DESTDIR DESTDIR_SQ INSTALL
+
+MAKEFLAGS += --no-print-directory
+
+include ../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+bindir_SQ = $(subst ','\'',$(bindir))
+
+LIB_IN := $(OUTPUT)liblockdep-in.o
+
+BIN_FILE = lockdep
+LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION)
+
+CONFIG_INCLUDES =
+CONFIG_LIBS	=
+CONFIG_FLAGS	=
+
+OBJ		= $@
+N		=
+
+export Q VERBOSE
+
+INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS if not set elsewhere
+CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
+CFLAGS += -fPIC
+CFLAGS += -Wall
+
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+
+ifeq ($(VERBOSE),1)
+  Q =
+  print_shared_lib_compile =
+  print_install =
+else
+  Q = @
+  print_shared_lib_compile =	echo '  LD       '$(OBJ);
+  print_static_lib_build =	echo '  LD       '$(OBJ);
+  print_install =		echo '  INSTALL  '$1'	to	$(DESTDIR_SQ)$2';
+endif
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+do_compile_shared_library =			\
+	($(print_shared_lib_compile)		\
+	$(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so))
+
+do_build_static_lib =				\
+	($(print_static_lib_build)		\
+	$(RM) $@;  $(AR) rcs $@ $^)
+
+CMD_TARGETS = $(LIB_FILE)
+
+TARGETS = $(CMD_TARGETS)
+
+
+all: fixdep all_cmd
+
+all_cmd: $(CMD_TARGETS)
+
+$(LIB_IN): force
+	$(Q)$(MAKE) $(build)=liblockdep
+
+$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
+	$(Q)$(do_compile_shared_library)
+
+$(OUTPUT)liblockdep.a: $(LIB_IN)
+	$(Q)$(do_build_static_lib)
+
+tags:	force
+	$(RM) tags
+	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
+
+TAGS:	force
+	$(RM) TAGS
+	find . -name '*.[ch]' | xargs etags \
+	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
+
+define do_install
+	$(print_install)				\
+	if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
+	fi;						\
+	$(INSTALL) $1 '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: all_cmd
+	$(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ))
+	$(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ))
+
+install: install_lib
+
+clean:
+	$(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd
+	$(RM) tags TAGS
+
+PHONY += force
+force:
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
new file mode 100644
index 000000000..5c3b58cce
--- /dev/null
+++ b/tools/lib/lockdep/common.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/compiler.h>
+#include <linux/lockdep.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static __thread struct task_struct current_obj;
+
+/* lockdep wants these */
+bool debug_locks = true;
+bool debug_locks_silent;
+
+__attribute__((destructor)) static void liblockdep_exit(void)
+{
+	debug_check_no_locks_held();
+}
+
+struct task_struct *__curr(void)
+{
+	if (current_obj.pid == 0) {
+		/* Makes lockdep output pretty */
+		prctl(PR_GET_NAME, current_obj.comm);
+		current_obj.pid = syscall(__NR_gettid);
+	}
+
+	return &current_obj;
+}
diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
new file mode 100644
index 000000000..a6d7ee5f1
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_COMMON_H
+#define _LIBLOCKDEP_COMMON_H
+
+#include <pthread.h>
+
+#define NR_LOCKDEP_CACHING_CLASSES 2
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+#ifndef CALLER_ADDR0
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+#endif
+
+#ifndef _RET_IP_
+#define _RET_IP_ CALLER_ADDR0
+#endif
+
+#ifndef _THIS_IP_
+#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+#endif
+
+struct lockdep_subclass_key {
+	char __one_byte;
+};
+
+struct lock_class_key {
+	struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+};
+
+struct lockdep_map {
+	struct lock_class_key	*key;
+	struct lock_class	*class_cache[NR_LOCKDEP_CACHING_CLASSES];
+	const char		*name;
+#ifdef CONFIG_LOCK_STAT
+	int			cpu;
+	unsigned long		ip;
+#endif
+};
+
+void lockdep_init_map(struct lockdep_map *lock, const char *name,
+			struct lock_class_key *key, int subclass);
+void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+			int trylock, int read, int check,
+			struct lockdep_map *nest_lock, unsigned long ip);
+void lock_release(struct lockdep_map *lock, unsigned long ip);
+void lockdep_reset_lock(struct lockdep_map *lock);
+void lockdep_register_key(struct lock_class_key *key);
+void lockdep_unregister_key(struct lock_class_key *key);
+extern void debug_check_no_locks_freed(const void *from, unsigned long len);
+
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+	{ .name = (_name), .key = (void *)(_key), }
+
+#endif
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h
new file mode 100644
index 000000000..bd106b827
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_MUTEX_H
+#define _LIBLOCKDEP_MUTEX_H
+
+#include <pthread.h>
+#include "common.h"
+
+struct liblockdep_pthread_mutex {
+	pthread_mutex_t mutex;
+	struct lock_class_key key;
+	struct lockdep_map dep_map;
+};
+
+typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t;
+
+#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx)			\
+		(const struct liblockdep_pthread_mutex) {		\
+	.mutex = PTHREAD_MUTEX_INITIALIZER,				\
+	.dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)),	\
+}
+
+static inline int __mutex_init(liblockdep_pthread_mutex_t *lock,
+				const char *name,
+				struct lock_class_key *key,
+				const pthread_mutexattr_t *__mutexattr)
+{
+	lockdep_init_map(&lock->dep_map, name, key, 0);
+	return pthread_mutex_init(&lock->mutex, __mutexattr);
+}
+
+#define liblockdep_pthread_mutex_init(mutex, mutexattr)			\
+({									\
+	lockdep_register_key(&(mutex)->key);				\
+	__mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr));	\
+})
+
+static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock)
+{
+	lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+	return pthread_mutex_lock(&lock->mutex);
+}
+
+static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock)
+{
+	lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
+	return pthread_mutex_unlock(&lock->mutex);
+}
+
+static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock)
+{
+	lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+	return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock)
+{
+	lockdep_reset_lock(&lock->dep_map);
+	lockdep_unregister_key(&lock->key);
+	return pthread_mutex_destroy(&lock->mutex);
+}
+
+#ifdef __USE_LIBLOCKDEP
+
+#define pthread_mutex_t         liblockdep_pthread_mutex_t
+#define pthread_mutex_init      liblockdep_pthread_mutex_init
+#define pthread_mutex_lock      liblockdep_pthread_mutex_lock
+#define pthread_mutex_unlock    liblockdep_pthread_mutex_unlock
+#define pthread_mutex_trylock   liblockdep_pthread_mutex_trylock
+#define pthread_mutex_destroy   liblockdep_pthread_mutex_destroy
+
+#endif
+
+#endif
diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h
new file mode 100644
index 000000000..6d5d2932b
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/rwlock.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_RWLOCK_H
+#define _LIBLOCKDEP_RWLOCK_H
+
+#include <pthread.h>
+#include "common.h"
+
+struct liblockdep_pthread_rwlock {
+	pthread_rwlock_t rwlock;
+	struct lockdep_map dep_map;
+};
+
+typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t;
+
+#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl)			\
+		(struct liblockdep_pthread_rwlock) {			\
+	.rwlock = PTHREAD_RWLOCK_INITIALIZER,				\
+	.dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)),	\
+}
+
+static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock,
+				const char *name,
+				struct lock_class_key *key,
+				const pthread_rwlockattr_t *attr)
+{
+	lockdep_init_map(&lock->dep_map, name, key, 0);
+
+	return pthread_rwlock_init(&lock->rwlock, attr);
+}
+
+#define liblockdep_pthread_rwlock_init(lock, attr)		\
+({							\
+	static struct lock_class_key __key;		\
+							\
+	__rwlock_init((lock), #lock, &__key, (attr));	\
+})
+
+static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock)
+{
+	lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
+	return pthread_rwlock_rdlock(&lock->rwlock);
+
+}
+
+static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock)
+{
+	lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
+	return pthread_rwlock_unlock(&lock->rwlock);
+}
+
+static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock)
+{
+	lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+	return pthread_rwlock_wrlock(&lock->rwlock);
+}
+
+static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock)
+{
+	lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
+	return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_pthread_rwlock_trywrlock(liblockdep_pthread_rwlock_t *lock)
+{
+	lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+	return pthread_rwlock_trywrlock(&lock->rwlock) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
+{
+	return pthread_rwlock_destroy(&lock->rwlock);
+}
+
+#ifdef __USE_LIBLOCKDEP
+
+#define pthread_rwlock_t		liblockdep_pthread_rwlock_t
+#define pthread_rwlock_init		liblockdep_pthread_rwlock_init
+#define pthread_rwlock_rdlock		liblockdep_pthread_rwlock_rdlock
+#define pthread_rwlock_unlock		liblockdep_pthread_rwlock_unlock
+#define pthread_rwlock_wrlock		liblockdep_pthread_rwlock_wrlock
+#define pthread_rwlock_tryrdlock	liblockdep_pthread_rwlock_tryrdlock
+#define pthread_rwlock_trywrlock	liblockdep_pthread_rwlock_trywrlock
+#define pthread_rwlock_destroy		liblockdep_rwlock_destroy
+
+#endif
+
+#endif
diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep
new file mode 100755
index 000000000..49af9fe19
--- /dev/null
+++ b/tools/lib/lockdep/lockdep
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@"
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
new file mode 100644
index 000000000..348a9d0fb
--- /dev/null
+++ b/tools/lib/lockdep/lockdep.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/lockdep.h>
+#include <stdlib.h>
+
+/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
+#define hlist_for_each_entry_rcu	hlist_for_each_entry
+#define hlist_add_head_rcu		hlist_add_head
+#define hlist_del_rcu			hlist_del
+#define list_for_each_entry_rcu		list_for_each_entry
+#define list_add_tail_rcu		list_add_tail
+
+u32 prandom_u32(void)
+{
+	/* Used only by lock_pin_lock() which is dead code */
+	abort();
+}
+
+void print_irqtrace_events(struct task_struct *curr)
+{
+	abort();
+}
+
+static struct new_utsname *init_utsname(void)
+{
+	static struct new_utsname n = (struct new_utsname) {
+		.release = "liblockdep",
+		.version = LIBLOCKDEP_VERSION,
+	};
+
+	return &n;
+}
+
+#include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h
new file mode 100644
index 000000000..29d0c954c
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_internals.h
@@ -0,0 +1 @@
+#include "../../../kernel/locking/lockdep_internals.h"
diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h
new file mode 100644
index 000000000..248d235ef
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_states.h
@@ -0,0 +1 @@
+#include "../../../kernel/locking/lockdep_states.h"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
new file mode 100644
index 000000000..8f1adbe88
--- /dev/null
+++ b/tools/lib/lockdep/preload.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include "include/liblockdep/mutex.h"
+#include "../../include/linux/rbtree.h"
+
+/**
+ * struct lock_lookup - liblockdep's view of a single unique lock
+ * @orig: pointer to the original pthread lock, used for lookups
+ * @dep_map: lockdep's dep_map structure
+ * @key: lockdep's key structure
+ * @node: rb-tree node used to store the lock in a global tree
+ * @name: a unique name for the lock
+ */
+struct lock_lookup {
+	void *orig; /* Original pthread lock, used for lookups */
+	struct lockdep_map dep_map; /* Since all locks are dynamic, we need
+				     * a dep_map and a key for each lock */
+	/*
+	 * Wait, there's no support for key classes? Yup :(
+	 * Most big projects wrap the pthread api with their own calls to
+	 * be compatible with different locking methods. This means that
+	 * "classes" will be brokes since the function that creates all
+	 * locks will point to a generic locking function instead of the
+	 * actual code that wants to do the locking.
+	 */
+	struct lock_class_key key;
+	struct rb_node node;
+#define LIBLOCKDEP_MAX_LOCK_NAME 22
+	char name[LIBLOCKDEP_MAX_LOCK_NAME];
+};
+
+/* This is where we store our locks */
+static struct rb_root locks = RB_ROOT;
+static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER;
+
+/* pthread mutex API */
+
+#ifdef __GLIBC__
+extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr);
+extern int __pthread_mutex_lock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_trylock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_unlock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_destroy(pthread_mutex_t *mutex);
+#else
+#define __pthread_mutex_init	NULL
+#define __pthread_mutex_lock	NULL
+#define __pthread_mutex_trylock	NULL
+#define __pthread_mutex_unlock	NULL
+#define __pthread_mutex_destroy	NULL
+#endif
+static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex,
+			const pthread_mutexattr_t *attr)	= __pthread_mutex_init;
+static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex)	= __pthread_mutex_lock;
+static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex)	= __pthread_mutex_trylock;
+static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex)	= __pthread_mutex_unlock;
+static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex)	= __pthread_mutex_destroy;
+
+/* pthread rwlock API */
+
+#ifdef __GLIBC__
+extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);
+extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
+#else
+#define __pthread_rwlock_init		NULL
+#define __pthread_rwlock_destroy	NULL
+#define __pthread_rwlock_wrlock		NULL
+#define __pthread_rwlock_trywrlock	NULL
+#define __pthread_rwlock_rdlock		NULL
+#define __pthread_rwlock_tryrdlock	NULL
+#define __pthread_rwlock_unlock		NULL
+#endif
+
+static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock,
+			const pthread_rwlockattr_t *attr)		= __pthread_rwlock_init;
+static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_destroy;
+static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_rdlock;
+static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_tryrdlock;
+static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_trywrlock;
+static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_wrlock;
+static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_unlock;
+
+enum { none, prepare, done, } __init_state;
+static void init_preload(void);
+static void try_init_preload(void)
+{
+	if (__init_state != done)
+		init_preload();
+}
+
+static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
+{
+	struct rb_node **node = &locks.rb_node;
+	struct lock_lookup *l;
+
+	*parent = NULL;
+
+	while (*node) {
+		l = rb_entry(*node, struct lock_lookup, node);
+
+		*parent = *node;
+		if (lock < l->orig)
+			node = &l->node.rb_left;
+		else if (lock > l->orig)
+			node = &l->node.rb_right;
+		else
+			return node;
+	}
+
+	return node;
+}
+
+#ifndef LIBLOCKDEP_STATIC_ENTRIES
+#define LIBLOCKDEP_STATIC_ENTRIES	1024
+#endif
+
+static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
+static int __locks_nr;
+
+static inline bool is_static_lock(struct lock_lookup *lock)
+{
+	return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks);
+}
+
+static struct lock_lookup *alloc_lock(void)
+{
+	if (__init_state != done) {
+		/*
+		 * Some programs attempt to initialize and use locks in their
+		 * allocation path. This means that a call to malloc() would
+		 * result in locks being initialized and locked.
+		 *
+		 * Why is it an issue for us? dlsym() below will try allocating
+		 * to give us the original function. Since this allocation will
+		 * result in a locking operations, we have to let pthread deal
+		 * with it, but we can't! we don't have the pointer to the
+		 * original API since we're inside dlsym() trying to get it
+		 */
+
+		int idx = __locks_nr++;
+		if (idx >= ARRAY_SIZE(__locks)) {
+			dprintf(STDERR_FILENO,
+		"LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
+			exit(EX_UNAVAILABLE);
+		}
+		return __locks + idx;
+	}
+
+	return malloc(sizeof(struct lock_lookup));
+}
+
+static inline void free_lock(struct lock_lookup *lock)
+{
+	if (likely(!is_static_lock(lock)))
+		free(lock);
+}
+
+/**
+ * __get_lock - find or create a lock instance
+ * @lock: pointer to a pthread lock function
+ *
+ * Try to find an existing lock in the rbtree using the provided pointer. If
+ * one wasn't found - create it.
+ */
+static struct lock_lookup *__get_lock(void *lock)
+{
+	struct rb_node **node, *parent;
+	struct lock_lookup *l;
+
+	ll_pthread_rwlock_rdlock(&locks_rwlock);
+	node = __get_lock_node(lock, &parent);
+	ll_pthread_rwlock_unlock(&locks_rwlock);
+	if (*node) {
+		return rb_entry(*node, struct lock_lookup, node);
+	}
+
+	/* We didn't find the lock, let's create it */
+	l = alloc_lock();
+	if (l == NULL)
+		return NULL;
+
+	l->orig = lock;
+	/*
+	 * Currently the name of the lock is the ptr value of the pthread lock,
+	 * while not optimal, it makes debugging a bit easier.
+	 *
+	 * TODO: Get the real name of the lock using libdwarf
+	 */
+	sprintf(l->name, "%p", lock);
+	lockdep_init_map(&l->dep_map, l->name, &l->key, 0);
+
+	ll_pthread_rwlock_wrlock(&locks_rwlock);
+	/* This might have changed since the last time we fetched it */
+	node = __get_lock_node(lock, &parent);
+	rb_link_node(&l->node, parent, node);
+	rb_insert_color(&l->node, &locks);
+	ll_pthread_rwlock_unlock(&locks_rwlock);
+
+	return l;
+}
+
+static void __del_lock(struct lock_lookup *lock)
+{
+	ll_pthread_rwlock_wrlock(&locks_rwlock);
+	rb_erase(&lock->node, &locks);
+	ll_pthread_rwlock_unlock(&locks_rwlock);
+	free_lock(lock);
+}
+
+int pthread_mutex_init(pthread_mutex_t *mutex,
+			const pthread_mutexattr_t *attr)
+{
+	int r;
+
+	/*
+	 * We keep trying to init our preload module because there might be
+	 * code in init sections that tries to touch locks before we are
+	 * initialized, in that case we'll need to manually call preload
+	 * to get us going.
+	 *
+	 * Funny enough, kernel's lockdep had the same issue, and used
+	 * (almost) the same solution. See look_up_lock_class() in
+	 * kernel/locking/lockdep.c for details.
+	 */
+	try_init_preload();
+
+	r = ll_pthread_mutex_init(mutex, attr);
+	if (r == 0)
+		/*
+		 * We do a dummy initialization here so that lockdep could
+		 * warn us if something fishy is going on - such as
+		 * initializing a held lock.
+		 */
+		__get_lock(mutex);
+
+	return r;
+}
+
+int pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+	int r;
+
+	try_init_preload();
+
+	lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL,
+			(unsigned long)_RET_IP_);
+	/*
+	 * Here's the thing with pthread mutexes: unlike the kernel variant,
+	 * they can fail.
+	 *
+	 * This means that the behaviour here is a bit different from what's
+	 * going on in the kernel: there we just tell lockdep that we took the
+	 * lock before actually taking it, but here we must deal with the case
+	 * that locking failed.
+	 *
+	 * To do that we'll "release" the lock if locking failed - this way
+	 * we'll get lockdep doing the correct checks when we try to take
+	 * the lock, and if that fails - we'll be back to the correct
+	 * state by releasing it.
+	 */
+	r = ll_pthread_mutex_lock(mutex);
+	if (r)
+		lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+	int r;
+
+	try_init_preload();
+
+	lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+	r = ll_pthread_mutex_trylock(mutex);
+	if (r)
+		lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+int pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+	int r;
+
+	try_init_preload();
+
+	lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
+	/*
+	 * Just like taking a lock, only in reverse!
+	 *
+	 * If we fail releasing the lock, tell lockdep we're holding it again.
+	 */
+	r = ll_pthread_mutex_unlock(mutex);
+	if (r)
+		lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+	try_init_preload();
+
+	/*
+	 * Let's see if we're releasing a lock that's held.
+	 *
+	 * TODO: Hook into free() and add that check there as well.
+	 */
+	debug_check_no_locks_freed(mutex, sizeof(*mutex));
+	__del_lock(__get_lock(mutex));
+	return ll_pthread_mutex_destroy(mutex);
+}
+
+/* This is the rwlock part, very similar to what happened with mutex above */
+int pthread_rwlock_init(pthread_rwlock_t *rwlock,
+			const pthread_rwlockattr_t *attr)
+{
+	int r;
+
+	try_init_preload();
+
+	r = ll_pthread_rwlock_init(rwlock, attr);
+	if (r == 0)
+		__get_lock(rwlock);
+
+	return r;
+}
+
+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
+{
+	try_init_preload();
+
+	debug_check_no_locks_freed(rwlock, sizeof(*rwlock));
+	__del_lock(__get_lock(rwlock));
+	return ll_pthread_rwlock_destroy(rwlock);
+}
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
+{
+	int r;
+
+        init_preload();
+
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
+	r = ll_pthread_rwlock_rdlock(rwlock);
+	if (r)
+		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
+{
+	int r;
+
+        init_preload();
+
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
+	r = ll_pthread_rwlock_tryrdlock(rwlock);
+	if (r)
+		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
+{
+	int r;
+
+        init_preload();
+
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+	r = ll_pthread_rwlock_trywrlock(rwlock);
+	if (r)
+		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
+{
+	int r;
+
+        init_preload();
+
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+	r = ll_pthread_rwlock_wrlock(rwlock);
+	if (r)
+		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
+{
+	int r;
+
+        init_preload();
+
+	lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
+	r = ll_pthread_rwlock_unlock(rwlock);
+	if (r)
+		lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+
+	return r;
+}
+
+__attribute__((constructor)) static void init_preload(void)
+{
+	if (__init_state == done)
+		return;
+
+#ifndef __GLIBC__
+	__init_state = prepare;
+
+	ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
+	ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
+	ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
+	ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
+	ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy");
+
+	ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init");
+	ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy");
+	ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock");
+	ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock");
+	ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock");
+	ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock");
+	ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
+#endif
+
+	__init_state = done;
+}
diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
new file mode 100644
index 000000000..297c30457
--- /dev/null
+++ b/tools/lib/lockdep/rbtree.c
@@ -0,0 +1 @@
+#include "../../lib/rbtree.c"
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
new file mode 100755
index 000000000..11f425662
--- /dev/null
+++ b/tools/lib/lockdep/run_tests.sh
@@ -0,0 +1,47 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+if ! make >/dev/null; then
+    echo "Building liblockdep failed."
+    echo "FAILED!"
+    exit 1
+fi
+
+find tests -name '*.c' | sort | while read -r i; do
+	testname=$(basename "$i" .c)
+	echo -ne "$testname... "
+	if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &&
+		timeout 1 "tests/$testname" 2>&1 | /bin/bash "tests/${testname}.sh"; then
+		echo "PASSED!"
+	else
+		echo "FAILED!"
+	fi
+	rm -f "tests/$testname"
+done
+
+find tests -name '*.c' | sort | while read -r i; do
+	testname=$(basename "$i" .c)
+	echo -ne "(PRELOAD) $testname... "
+	if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
+		timeout 1 ./lockdep "tests/$testname" 2>&1 |
+		/bin/bash "tests/${testname}.sh"; then
+		echo "PASSED!"
+	else
+		echo "FAILED!"
+	fi
+	rm -f "tests/$testname"
+done
+
+find tests -name '*.c' | sort | while read -r i; do
+	testname=$(basename "$i" .c)
+	echo -ne "(PRELOAD + Valgrind) $testname... "
+	if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
+		{ timeout 10 valgrind --read-var-info=yes ./lockdep "./tests/$testname" >& "tests/${testname}.vg.out"; true; } &&
+		/bin/bash "tests/${testname}.sh" < "tests/${testname}.vg.out" &&
+		! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then
+		echo "PASSED!"
+	else
+		echo "FAILED!"
+	fi
+	rm -f "tests/$testname"
+done
diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
new file mode 100644
index 000000000..63c7ce97b
--- /dev/null
+++ b/tools/lib/lockdep/tests/AA.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+
+int main(void)
+{
+	pthread_mutex_t a;
+
+	pthread_mutex_init(&a, NULL);
+
+	pthread_mutex_lock(&a);
+	pthread_mutex_lock(&a);
+
+	return 0;
+}
diff --git a/tools/lib/lockdep/tests/AA.sh b/tools/lib/lockdep/tests/AA.sh
new file mode 100644
index 000000000..f39b32865
--- /dev/null
+++ b/tools/lib/lockdep/tests/AA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c
new file mode 100644
index 000000000..efa39b23f
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABA.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+	pthread_mutex_t a, b;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+
+	pthread_mutex_lock(&a);
+	pthread_mutex_lock(&b);
+	pthread_mutex_lock(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABA.sh b/tools/lib/lockdep/tests/ABA.sh
new file mode 100644
index 000000000..f39b32865
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
new file mode 100644
index 000000000..543789bc3
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+	pthread_mutex_t a, b;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+
+	LOCK_UNLOCK_2(a, b);
+	LOCK_UNLOCK_2(b, a);
+
+	pthread_mutex_destroy(&b);
+	pthread_mutex_destroy(&a);
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+
+	LOCK_UNLOCK_2(a, b);
+	LOCK_UNLOCK_2(b, a);
+
+	pthread_mutex_destroy(&b);
+	pthread_mutex_destroy(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABBA.sh b/tools/lib/lockdep/tests/ABBA.sh
new file mode 100644
index 000000000..fc31c607a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c
new file mode 100644
index 000000000..39325ef8a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA_2threads.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <pthread.h>
+
+pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
+pthread_barrier_t bar;
+
+void *ba_lock(void *arg)
+{
+	int ret, i;
+
+	pthread_mutex_lock(&b);
+
+	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+		pthread_barrier_destroy(&bar);
+
+	pthread_mutex_lock(&a);
+
+	pthread_mutex_unlock(&a);
+	pthread_mutex_unlock(&b);
+}
+
+int main(void)
+{
+	pthread_t t;
+
+	pthread_barrier_init(&bar, NULL, 2);
+
+	if (pthread_create(&t, NULL, ba_lock, NULL)) {
+		fprintf(stderr, "pthread_create() failed\n");
+		return 1;
+	}
+	pthread_mutex_lock(&a);
+
+	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+		pthread_barrier_destroy(&bar);
+
+	pthread_mutex_lock(&b);
+
+	pthread_mutex_unlock(&b);
+	pthread_mutex_unlock(&a);
+
+	pthread_join(t, NULL);
+
+	return 0;
+}
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.sh b/tools/lib/lockdep/tests/ABBA_2threads.sh
new file mode 100644
index 000000000..fc31c607a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA_2threads.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
new file mode 100644
index 000000000..48446129d
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCA.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+	pthread_mutex_t a, b, c;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+	pthread_mutex_init(&c, NULL);
+
+	LOCK_UNLOCK_2(a, b);
+	LOCK_UNLOCK_2(b, c);
+	LOCK_UNLOCK_2(c, a);
+
+	pthread_mutex_destroy(&c);
+	pthread_mutex_destroy(&b);
+	pthread_mutex_destroy(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABBCCA.sh b/tools/lib/lockdep/tests/ABBCCA.sh
new file mode 100644
index 000000000..fc31c607a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c
new file mode 100644
index 000000000..3570bf7b3
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCDDA.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+	pthread_mutex_t a, b, c, d;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+	pthread_mutex_init(&c, NULL);
+	pthread_mutex_init(&d, NULL);
+
+	LOCK_UNLOCK_2(a, b);
+	LOCK_UNLOCK_2(b, c);
+	LOCK_UNLOCK_2(c, d);
+	LOCK_UNLOCK_2(d, a);
+
+	pthread_mutex_destroy(&d);
+	pthread_mutex_destroy(&c);
+	pthread_mutex_destroy(&b);
+	pthread_mutex_destroy(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.sh b/tools/lib/lockdep/tests/ABBCCDDA.sh
new file mode 100644
index 000000000..fc31c607a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCDDA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
new file mode 100644
index 000000000..a1c465989
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCABC.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+	pthread_mutex_t a, b, c;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+	pthread_mutex_init(&c, NULL);
+
+	LOCK_UNLOCK_2(a, b);
+	LOCK_UNLOCK_2(c, a);
+	LOCK_UNLOCK_2(b, c);
+
+	pthread_mutex_destroy(&c);
+	pthread_mutex_destroy(&b);
+	pthread_mutex_destroy(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABCABC.sh b/tools/lib/lockdep/tests/ABCABC.sh
new file mode 100644
index 000000000..fc31c607a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCABC.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c
new file mode 100644
index 000000000..335af1c90
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBCDA.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+	pthread_mutex_t a, b, c, d;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+	pthread_mutex_init(&c, NULL);
+	pthread_mutex_init(&d, NULL);
+
+	LOCK_UNLOCK_2(a, b);
+	LOCK_UNLOCK_2(c, d);
+	LOCK_UNLOCK_2(b, c);
+	LOCK_UNLOCK_2(d, a);
+
+	pthread_mutex_destroy(&d);
+	pthread_mutex_destroy(&c);
+	pthread_mutex_destroy(&b);
+	pthread_mutex_destroy(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.sh b/tools/lib/lockdep/tests/ABCDBCDA.sh
new file mode 100644
index 000000000..fc31c607a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBCDA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c
new file mode 100644
index 000000000..3c5972863
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBDDA.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+	pthread_mutex_t a, b, c, d;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+	pthread_mutex_init(&c, NULL);
+	pthread_mutex_init(&d, NULL);
+
+	LOCK_UNLOCK_2(a, b);
+	LOCK_UNLOCK_2(c, d);
+	LOCK_UNLOCK_2(b, d);
+	LOCK_UNLOCK_2(d, a);
+
+	pthread_mutex_destroy(&d);
+	pthread_mutex_destroy(&c);
+	pthread_mutex_destroy(&b);
+	pthread_mutex_destroy(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.sh b/tools/lib/lockdep/tests/ABCDBDDA.sh
new file mode 100644
index 000000000..fc31c607a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBDDA.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c
new file mode 100644
index 000000000..eee88df7f
--- /dev/null
+++ b/tools/lib/lockdep/tests/WW.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/rwlock.h>
+
+void main(void)
+{
+	pthread_rwlock_t a, b;
+
+	pthread_rwlock_init(&a, NULL);
+	pthread_rwlock_init(&b, NULL);
+
+	pthread_rwlock_wrlock(&a);
+	pthread_rwlock_rdlock(&b);
+	pthread_rwlock_wrlock(&a);
+}
diff --git a/tools/lib/lockdep/tests/WW.sh b/tools/lib/lockdep/tests/WW.sh
new file mode 100644
index 000000000..f39b32865
--- /dev/null
+++ b/tools/lib/lockdep/tests/WW.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h
new file mode 100644
index 000000000..3026c29cc
--- /dev/null
+++ b/tools/lib/lockdep/tests/common.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_TEST_COMMON_H
+#define _LIBLOCKDEP_TEST_COMMON_H
+
+#define LOCK_UNLOCK_2(a, b)			\
+	do {					\
+		pthread_mutex_lock(&(a));	\
+		pthread_mutex_lock(&(b));	\
+		pthread_mutex_unlock(&(b));	\
+		pthread_mutex_unlock(&(a));	\
+	} while(0)
+
+#endif
diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c
new file mode 100644
index 000000000..dba25064b
--- /dev/null
+++ b/tools/lib/lockdep/tests/unlock_balance.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+	pthread_mutex_t a;
+
+	pthread_mutex_init(&a, NULL);
+
+	pthread_mutex_lock(&a);
+	pthread_mutex_unlock(&a);
+	pthread_mutex_unlock(&a);
+
+	pthread_mutex_destroy(&a);
+}
diff --git a/tools/lib/lockdep/tests/unlock_balance.sh b/tools/lib/lockdep/tests/unlock_balance.sh
new file mode 100644
index 000000000..c6e395230
--- /dev/null
+++ b/tools/lib/lockdep/tests/unlock_balance.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+grep -q 'WARNING: bad unlock balance detected'
diff --git a/tools/lib/perf/Build b/tools/lib/perf/Build
new file mode 100644
index 000000000..2ef9a4ec6
--- /dev/null
+++ b/tools/lib/perf/Build
@@ -0,0 +1,13 @@
+libperf-y += core.o
+libperf-y += cpumap.o
+libperf-y += threadmap.o
+libperf-y += evsel.o
+libperf-y += evlist.o
+libperf-y += mmap.o
+libperf-y += zalloc.o
+libperf-y += xyarray.o
+libperf-y += lib.o
+
+$(OUTPUT)zalloc.o: ../../lib/zalloc.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile
new file mode 100644
index 000000000..972754082
--- /dev/null
+++ b/tools/lib/perf/Documentation/Makefile
@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/perf/Documentation/Makefile
+
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+MAN3_TXT  = libperf.txt
+MAN7_TXT  = libperf-counting.txt libperf-sampling.txt
+MAN_EX    = examples/*.c
+
+MAN_TXT   = $(MAN3_TXT) $(MAN7_TXT)
+
+_MAN_XML  = $(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML = $(patsubst %.txt,%.html,$(MAN_TXT))
+_MAN_3    = $(patsubst %.txt,%.3,$(MAN3_TXT))
+_MAN_7    = $(patsubst %.txt,%.7,$(MAN7_TXT))
+
+MAN_XML   = $(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML  = $(addprefix $(OUTPUT),$(_MAN_HTML))
+MAN_3     = $(addprefix $(OUTPUT),$(_MAN_3))
+MAN_7     = $(addprefix $(OUTPUT),$(_MAN_7))
+MAN_X     = $(MAN_3) $(MAN_7)
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+  prefix ?=$(HOME)
+endif
+
+mandir  ?= $(prefix)/share/man
+man3dir  = $(mandir)/man3
+man7dir  = $(mandir)/man7
+
+docdir  ?= $(prefix)/share/doc/libperf
+htmldir  = $(docdir)/html
+exdir    = $(docdir)/examples
+
+ASCIIDOC        = asciidoc
+ASCIIDOC_EXTRA  = --unsafe -f asciidoc.conf
+ASCIIDOC_HTML   = xhtml11
+MANPAGE_XSL     = manpage-normal.xsl
+XMLTO_EXTRA     =
+XMLTO           =xmlto
+
+INSTALL ?= install
+RM      ?= rm -f
+
+# For asciidoc ...
+#	-7.1.2,	no extra settings are needed.
+#	8.0-,	set ASCIIDOC8.
+#
+
+# For docbook-xsl ...
+#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+#	1.69.0,		no extra settings are needed?
+#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
+#	1.71.1,		no extra settings are needed?
+#	1.72.0,		set DOCBOOK_XSL_172.
+#	1.73.0-,	set ASCIIDOC_NO_ROFF
+
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+
+ifdef ASCIIDOC8
+  ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+  ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff
+  MANPAGE_XSL = manpage-1.72.xsl
+else
+  ifdef ASCIIDOC_NO_ROFF
+    # docbook-xsl after 1.72 needs the regular XSL, but will not
+    # pass-thru raw roff codes from asciidoc.conf, so turn them off.
+    ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff
+  endif
+endif
+ifdef MAN_BOLD_LITERAL
+  XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+  XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+export DESTDIR DESTDIR_SQ
+
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+#   http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+  PRINT_DIR = --no-print-directory
+else # "make -w"
+  NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+  ifneq ($(V),1)
+    QUIET_ASCIIDOC = @echo '  ASCIIDOC '$@;
+    QUIET_XMLTO    = @echo '  XMLTO    '$@;
+  endif
+endif
+
+all: $(MAN_X) $(MAN_HTML)
+
+$(MAN_HTML) $(MAN_X): asciidoc.conf
+
+install-man: all
+	$(call QUIET_INSTALL, man) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \
+		$(INSTALL) -m 644 $(MAN_3) $(DESTDIR)$(man3dir); \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
+		$(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir);
+
+install-html:
+	$(call QUIET_INSTALL, html) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \
+		$(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \
+
+install-examples:
+	$(call QUIET_INSTALL, examples) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(exdir); \
+		$(INSTALL) -m 644 $(MAN_EX) $(DESTDIR)$(exdir); \
+
+CLEAN_FILES =					\
+	$(MAN_XML) $(addsuffix +,$(MAN_XML))	\
+	$(MAN_HTML) $(addsuffix +,$(MAN_HTML))	\
+	$(MAN_X)
+
+clean:
+	$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
+
+$(MAN_3): $(OUTPUT)%.3: %.xml
+	$(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+$(MAN_7): $(OUTPUT)%.7: %.xml
+	$(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+$(MAN_XML): $(OUTPUT)%.xml: %.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b docbook -d manpage \
+		$(ASCIIDOC_EXTRA) -alibperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+$(MAN_HTML): $(OUTPUT)%.html: %.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
+	$(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
+	mv $@+ $@
diff --git a/tools/lib/perf/Documentation/asciidoc.conf b/tools/lib/perf/Documentation/asciidoc.conf
new file mode 100644
index 000000000..9d5a5a5ee
--- /dev/null
+++ b/tools/lib/perf/Documentation/asciidoc.conf
@@ -0,0 +1,120 @@
+## linktep: macro
+#
+# Usage: linktep:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show TEP link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linktep-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::tep-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::tep-asciidoc-no-roff[]
+
+ifdef::tep-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::tep-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">libperf</refmiscinfo>
+<refmiscinfo class="version">{libperf_version}</refmiscinfo>
+<refmiscinfo class="manual">libperf Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+  <refname>{manname1}</refname>
+  <refname>{manname2}</refname>
+  <refname>{manname3}</refname>
+  <refname>{manname4}</refname>
+  <refname>{manname5}</refname>
+  <refname>{manname6}</refname>
+  <refname>{manname7}</refname>
+  <refname>{manname8}</refname>
+  <refname>{manname9}</refname>
+  <refname>{manname10}</refname>
+  <refname>{manname11}</refname>
+  <refname>{manname12}</refname>
+  <refname>{manname13}</refname>
+  <refname>{manname14}</refname>
+  <refname>{manname15}</refname>
+  <refname>{manname16}</refname>
+  <refname>{manname17}</refname>
+  <refname>{manname18}</refname>
+  <refname>{manname19}</refname>
+  <refname>{manname20}</refname>
+  <refname>{manname21}</refname>
+  <refname>{manname22}</refname>
+  <refname>{manname23}</refname>
+  <refname>{manname24}</refname>
+  <refname>{manname25}</refname>
+  <refname>{manname26}</refname>
+  <refname>{manname27}</refname>
+  <refname>{manname28}</refname>
+  <refname>{manname29}</refname>
+  <refname>{manname30}</refname>
+  <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linktep-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/tools/lib/perf/Documentation/examples/counting.c b/tools/lib/perf/Documentation/examples/counting.c
new file mode 100644
index 000000000..608569357
--- /dev/null
+++ b/tools/lib/perf/Documentation/examples/counting.c
@@ -0,0 +1,83 @@
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static int libperf_print(enum libperf_print_level level,
+                         const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+int main(int argc, char **argv)
+{
+	int count = 100000, err = 0;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_thread_map *threads;
+	struct perf_counts_values counts;
+
+	struct perf_event_attr attr1 = {
+		.type        = PERF_TYPE_SOFTWARE,
+		.config      = PERF_COUNT_SW_CPU_CLOCK,
+		.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+		.disabled    = 1,
+	};
+	struct perf_event_attr attr2 = {
+		.type        = PERF_TYPE_SOFTWARE,
+		.config      = PERF_COUNT_SW_TASK_CLOCK,
+		.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+		.disabled    = 1,
+	};
+
+	libperf_init(libperf_print);
+	threads = perf_thread_map__new_dummy();
+	if (!threads) {
+		fprintf(stderr, "failed to create threads\n");
+		return -1;
+	}
+	perf_thread_map__set_pid(threads, 0, 0);
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		fprintf(stderr, "failed to create evlist\n");
+		goto out_threads;
+	}
+	evsel = perf_evsel__new(&attr1);
+	if (!evsel) {
+		fprintf(stderr, "failed to create evsel1\n");
+		goto out_evlist;
+	}
+	perf_evlist__add(evlist, evsel);
+	evsel = perf_evsel__new(&attr2);
+	if (!evsel) {
+		fprintf(stderr, "failed to create evsel2\n");
+		goto out_evlist;
+	}
+	perf_evlist__add(evlist, evsel);
+	perf_evlist__set_maps(evlist, NULL, threads);
+	err = perf_evlist__open(evlist);
+	if (err) {
+		fprintf(stderr, "failed to open evsel\n");
+		goto out_evlist;
+	}
+	perf_evlist__enable(evlist);
+	while (count--);
+	perf_evlist__disable(evlist);
+	perf_evlist__for_each_evsel(evlist, evsel) {
+		perf_evsel__read(evsel, 0, 0, &counts);
+		fprintf(stdout, "count %llu, enabled %llu, run %llu\n",
+				counts.val, counts.ena, counts.run);
+	}
+	perf_evlist__close(evlist);
+out_evlist:
+	perf_evlist__delete(evlist);
+out_threads:
+	perf_thread_map__put(threads);
+	return err;
+}
diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c
new file mode 100644
index 000000000..8e1a926a9
--- /dev/null
+++ b/tools/lib/perf/Documentation/examples/sampling.c
@@ -0,0 +1,119 @@
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static int libperf_print(enum libperf_print_level level,
+                         const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+union u64_swap {
+	__u64 val64;
+	__u32 val32[2];
+};
+
+int main(int argc, char **argv)
+{
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_mmap *map;
+	struct perf_cpu_map *cpus;
+	struct perf_event_attr attr = {
+		.type        = PERF_TYPE_HARDWARE,
+		.config      = PERF_COUNT_HW_CPU_CYCLES,
+		.disabled    = 1,
+		.freq        = 1,
+		.sample_freq = 10,
+		.sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
+	};
+	int err = -1;
+	union perf_event *event;
+
+	libperf_init(libperf_print);
+
+	cpus = perf_cpu_map__new(NULL);
+	if (!cpus) {
+		fprintf(stderr, "failed to create cpus\n");
+		return -1;
+	}
+
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		fprintf(stderr, "failed to create evlist\n");
+		goto out_cpus;
+	}
+
+	evsel = perf_evsel__new(&attr);
+	if (!evsel) {
+		fprintf(stderr, "failed to create cycles\n");
+		goto out_cpus;
+	}
+
+	perf_evlist__add(evlist, evsel);
+
+	perf_evlist__set_maps(evlist, cpus, NULL);
+
+	err = perf_evlist__open(evlist);
+	if (err) {
+		fprintf(stderr, "failed to open evlist\n");
+		goto out_evlist;
+	}
+
+	err = perf_evlist__mmap(evlist, 4);
+	if (err) {
+		fprintf(stderr, "failed to mmap evlist\n");
+		goto out_evlist;
+	}
+
+	perf_evlist__enable(evlist);
+	sleep(3);
+	perf_evlist__disable(evlist);
+
+	perf_evlist__for_each_mmap(evlist, map, false) {
+		if (perf_mmap__read_init(map) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(map)) != NULL) {
+			int cpu, pid, tid;
+			__u64 ip, period, *array;
+			union u64_swap u;
+
+			array = event->sample.array;
+
+			ip = *array;
+			array++;
+
+			u.val64 = *array;
+			pid = u.val32[0];
+			tid = u.val32[1];
+			array++;
+
+			u.val64 = *array;
+			cpu = u.val32[0];
+			array++;
+
+			period = *array;
+
+			fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
+				cpu, pid, tid, ip, period);
+
+			perf_mmap__consume(map);
+		}
+
+		perf_mmap__read_done(map);
+	}
+
+out_evlist:
+	perf_evlist__delete(evlist);
+out_cpus:
+	perf_cpu_map__put(cpus);
+	return err;
+}
diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt
new file mode 100644
index 000000000..8b75efcd6
--- /dev/null
+++ b/tools/lib/perf/Documentation/libperf-counting.txt
@@ -0,0 +1,213 @@
+libperf-counting(7)
+===================
+
+NAME
+----
+libperf-counting - counting interface
+
+DESCRIPTION
+-----------
+The counting interface provides API to measure and get count for specific perf events.
+
+The following test tries to explain count on `counting.c` example.
+
+It is by no means complete guide to counting, but shows libperf basic API for counting.
+
+The `counting.c` comes with libperf package and can be compiled and run like:
+
+[source,bash]
+--
+$ gcc -o counting counting.c -lperf
+$ sudo ./counting
+count 176792, enabled 176944, run 176944
+count 176242, enabled 176242, run 176242
+--
+
+It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
+which is available only for root.
+
+The `counting.c` example monitors two events on the current process and displays
+their count, in a nutshell it:
+
+* creates events
+* adds them to the event list
+* opens and enables events through the event list
+* does some workload
+* disables events
+* reads and displays event counts
+* destroys the event list
+
+The first thing you need to do before using libperf is to call init function:
+
+[source,c]
+--
+  8 static int libperf_print(enum libperf_print_level level,
+  9                          const char *fmt, va_list ap)
+ 10 {
+ 11         return vfprintf(stderr, fmt, ap);
+ 12 }
+
+ 14 int main(int argc, char **argv)
+ 15 {
+ ...
+ 35         libperf_init(libperf_print);
+--
+
+It will setup the library and sets function for debug output from library.
+
+The `libperf_print` callback will receive any message with its debug level,
+defined as:
+
+[source,c]
+--
+enum libperf_print_level {
+        LIBPERF_ERR,
+        LIBPERF_WARN,
+        LIBPERF_INFO,
+        LIBPERF_DEBUG,
+        LIBPERF_DEBUG2,
+        LIBPERF_DEBUG3,
+};
+--
+
+Once the setup is complete we start by defining specific events using the `struct perf_event_attr`.
+
+We create software events for cpu and task:
+
+[source,c]
+--
+ 20         struct perf_event_attr attr1 = {
+ 21                 .type        = PERF_TYPE_SOFTWARE,
+ 22                 .config      = PERF_COUNT_SW_CPU_CLOCK,
+ 23                 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+ 24                 .disabled    = 1,
+ 25         };
+ 26         struct perf_event_attr attr2 = {
+ 27                 .type        = PERF_TYPE_SOFTWARE,
+ 28                 .config      = PERF_COUNT_SW_TASK_CLOCK,
+ 29                 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+ 30                 .disabled    = 1,
+ 31         };
+--
+
+The `read_format` setup tells perf to include timing details together with each count.
+
+Next step is to prepare threads map.
+
+In this case we will monitor current process, so we create threads map with single pid (0):
+
+[source,c]
+--
+ 37         threads = perf_thread_map__new_dummy();
+ 38         if (!threads) {
+ 39                 fprintf(stderr, "failed to create threads\n");
+ 40                 return -1;
+ 41         }
+ 42
+ 43         perf_thread_map__set_pid(threads, 0, 0);
+--
+
+Now we create libperf's event list, which will serve as holder for the events we want:
+
+[source,c]
+--
+ 45         evlist = perf_evlist__new();
+ 46         if (!evlist) {
+ 47                 fprintf(stderr, "failed to create evlist\n");
+ 48                 goto out_threads;
+ 49         }
+--
+
+We create libperf's events for the attributes we defined earlier and add them to the list:
+
+[source,c]
+--
+ 51         evsel = perf_evsel__new(&attr1);
+ 52         if (!evsel) {
+ 53                 fprintf(stderr, "failed to create evsel1\n");
+ 54                 goto out_evlist;
+ 55         }
+ 56
+ 57         perf_evlist__add(evlist, evsel);
+ 58
+ 59         evsel = perf_evsel__new(&attr2);
+ 60         if (!evsel) {
+ 61                 fprintf(stderr, "failed to create evsel2\n");
+ 62                 goto out_evlist;
+ 63         }
+ 64
+ 65         perf_evlist__add(evlist, evsel);
+--
+
+Configure event list with the thread map and open events:
+
+[source,c]
+--
+ 67         perf_evlist__set_maps(evlist, NULL, threads);
+ 68
+ 69         err = perf_evlist__open(evlist);
+ 70         if (err) {
+ 71                 fprintf(stderr, "failed to open evsel\n");
+ 72                 goto out_evlist;
+ 73         }
+--
+
+Both events are created as disabled (note the `disabled = 1` assignment above),
+so we need to enable the whole list explicitly (both events).
+
+From this moment events are counting and we can do our workload.
+
+When we are done we disable the events list.
+
+[source,c]
+--
+ 75         perf_evlist__enable(evlist);
+ 76
+ 77         while (count--);
+ 78
+ 79         perf_evlist__disable(evlist);
+--
+
+Now we need to get the counts from events, following code iterates through the
+events list and read counts:
+
+[source,c]
+--
+ 81         perf_evlist__for_each_evsel(evlist, evsel) {
+ 82                 perf_evsel__read(evsel, 0, 0, &counts);
+ 83                 fprintf(stdout, "count %llu, enabled %llu, run %llu\n",
+ 84                         counts.val, counts.ena, counts.run);
+ 85         }
+--
+
+And finally cleanup.
+
+We close the whole events list (both events) and remove it together with the threads map:
+
+[source,c]
+--
+ 87         perf_evlist__close(evlist);
+ 88
+ 89 out_evlist:
+ 90         perf_evlist__delete(evlist);
+ 91 out_threads:
+ 92         perf_thread_map__put(threads);
+ 93         return err;
+ 94 }
+--
+
+REPORTING BUGS
+--------------
+Report bugs to <linux-perf-users@vger.kernel.org>.
+
+LICENSE
+-------
+libperf is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+SEE ALSO
+--------
+libperf(3), libperf-sampling(7)
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
new file mode 100644
index 000000000..d6ca24f6e
--- /dev/null
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -0,0 +1,244 @@
+libperf-sampling(7)
+===================
+
+NAME
+----
+libperf-sampling - sampling interface
+
+
+DESCRIPTION
+-----------
+The sampling interface provides API to measure and get count for specific perf events.
+
+The following test tries to explain count on `sampling.c` example.
+
+It is by no means complete guide to sampling, but shows libperf basic API for sampling.
+
+The `sampling.c` comes with libperf package and can be compiled and run like:
+
+[source,bash]
+--
+$ gcc -o sampling sampling.c -lperf
+$ sudo ./sampling
+cpu   0, pid      0, tid      0, ip     ffffffffad06c4e6, period                    1
+cpu   0, pid   4465, tid   4469, ip     ffffffffad118748, period             18322959
+cpu   0, pid      0, tid      0, ip     ffffffffad115722, period             33544846
+cpu   0, pid   4465, tid   4470, ip         7f84fe0cdad6, period             23687474
+cpu   0, pid      0, tid      0, ip     ffffffffad9e0349, period             34255790
+cpu   0, pid   4465, tid   4469, ip     ffffffffad136581, period             38664069
+cpu   0, pid      0, tid      0, ip     ffffffffad9e55e2, period             21922384
+cpu   0, pid   4465, tid   4470, ip         7f84fe0ebebf, period             17655175
+...
+--
+
+It requires root access, because it uses hardware cycles event.
+
+The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
+nutshell it:
+
+- creates events
+- adds them to the event list
+- opens and enables events through the event list
+- sleeps for 3 seconds
+- disables events
+- reads and displays recorded samples
+- destroys the event list
+
+The first thing you need to do before using libperf is to call init function:
+
+[source,c]
+--
+ 12 static int libperf_print(enum libperf_print_level level,
+ 13                          const char *fmt, va_list ap)
+ 14 {
+ 15         return vfprintf(stderr, fmt, ap);
+ 16 }
+
+ 23 int main(int argc, char **argv)
+ 24 {
+ ...
+ 40         libperf_init(libperf_print);
+--
+
+It will setup the library and sets function for debug output from library.
+
+The `libperf_print` callback will receive any message with its debug level,
+defined as:
+
+[source,c]
+--
+enum libperf_print_level {
+        LIBPERF_ERR,
+        LIBPERF_WARN,
+        LIBPERF_INFO,
+        LIBPERF_DEBUG,
+        LIBPERF_DEBUG2,
+        LIBPERF_DEBUG3,
+};
+--
+
+Once the setup is complete we start by defining cycles event using the `struct perf_event_attr`:
+
+[source,c]
+--
+ 29         struct perf_event_attr attr = {
+ 30                 .type        = PERF_TYPE_HARDWARE,
+ 31                 .config      = PERF_COUNT_HW_CPU_CYCLES,
+ 32                 .disabled    = 1,
+ 33                 .freq        = 1,
+ 34                 .sample_freq = 10,
+ 35                 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
+ 36         };
+--
+
+Next step is to prepare CPUs map.
+
+In this case we will monitor all the available CPUs:
+
+[source,c]
+--
+ 42         cpus = perf_cpu_map__new(NULL);
+ 43         if (!cpus) {
+ 44                 fprintf(stderr, "failed to create cpus\n");
+ 45                 return -1;
+ 46         }
+--
+
+Now we create libperf's event list, which will serve as holder for the cycles event:
+
+[source,c]
+--
+ 48         evlist = perf_evlist__new();
+ 49         if (!evlist) {
+ 50                 fprintf(stderr, "failed to create evlist\n");
+ 51                 goto out_cpus;
+ 52         }
+--
+
+We create libperf's event for the cycles attribute we defined earlier and add it to the list:
+
+[source,c]
+--
+ 54         evsel = perf_evsel__new(&attr);
+ 55         if (!evsel) {
+ 56                 fprintf(stderr, "failed to create cycles\n");
+ 57                 goto out_cpus;
+ 58         }
+ 59
+ 60         perf_evlist__add(evlist, evsel);
+--
+
+Configure event list with the cpus map and open event:
+
+[source,c]
+--
+ 62         perf_evlist__set_maps(evlist, cpus, NULL);
+ 63
+ 64         err = perf_evlist__open(evlist);
+ 65         if (err) {
+ 66                 fprintf(stderr, "failed to open evlist\n");
+ 67                 goto out_evlist;
+ 68         }
+--
+
+Once the events list is open, we can create memory maps AKA perf ring buffers:
+
+[source,c]
+--
+ 70         err = perf_evlist__mmap(evlist, 4);
+ 71         if (err) {
+ 72                 fprintf(stderr, "failed to mmap evlist\n");
+ 73                 goto out_evlist;
+ 74         }
+--
+
+The event is created as disabled (note the `disabled = 1` assignment above),
+so we need to enable the events list explicitly.
+
+From this moment the cycles event is sampling.
+
+We will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list.
+
+[source,c]
+--
+ 76         perf_evlist__enable(evlist);
+ 77         sleep(3);
+ 78         perf_evlist__disable(evlist);
+--
+
+Following code walks through the ring buffers and reads stored events/samples:
+
+[source,c]
+--
+ 80         perf_evlist__for_each_mmap(evlist, map, false) {
+ 81                 if (perf_mmap__read_init(map) < 0)
+ 82                         continue;
+ 83
+ 84                 while ((event = perf_mmap__read_event(map)) != NULL) {
+
+                            /* process event */
+
+108                         perf_mmap__consume(map);
+109                 }
+110                 perf_mmap__read_done(map);
+111         }
+
+--
+
+Each sample needs to get parsed:
+
+[source,c]
+--
+ 85                         int cpu, pid, tid;
+ 86                         __u64 ip, period, *array;
+ 87                         union u64_swap u;
+ 88
+ 89                         array = event->sample.array;
+ 90
+ 91                         ip = *array;
+ 92                         array++;
+ 93
+ 94                         u.val64 = *array;
+ 95                         pid = u.val32[0];
+ 96                         tid = u.val32[1];
+ 97                         array++;
+ 98
+ 99                         u.val64 = *array;
+100                         cpu = u.val32[0];
+101                         array++;
+102
+103                         period = *array;
+104
+105                         fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
+106                                 cpu, pid, tid, ip, period);
+--
+
+And finally cleanup.
+
+We close the whole events list (both events) and remove it together with the threads map:
+
+[source,c]
+--
+113 out_evlist:
+114         perf_evlist__delete(evlist);
+115 out_cpus:
+116         perf_cpu_map__put(cpus);
+117         return err;
+118 }
+--
+
+REPORTING BUGS
+--------------
+Report bugs to <linux-perf-users@vger.kernel.org>.
+
+LICENSE
+-------
+libperf is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+SEE ALSO
+--------
+libperf(3), libperf-counting(7)
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
new file mode 100644
index 000000000..0c74c30ed
--- /dev/null
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -0,0 +1,246 @@
+libperf(3)
+==========
+
+NAME
+----
+libperf - Linux kernel perf event library
+
+
+SYNOPSIS
+--------
+*Generic API:*
+
+[source,c]
+--
+  #include <perf/core.h>
+
+  enum libperf_print_level {
+          LIBPERF_ERR,
+          LIBPERF_WARN,
+          LIBPERF_INFO,
+          LIBPERF_DEBUG,
+          LIBPERF_DEBUG2,
+          LIBPERF_DEBUG3,
+  };
+
+  typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
+                                    const char *, va_list ap);
+
+  void libperf_init(libperf_print_fn_t fn);
+--
+
+*API to handle CPU maps:*
+
+[source,c]
+--
+  #include <perf/cpumap.h>
+
+  struct perf_cpu_map;
+
+  struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+  struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
+  struct perf_cpu_map *perf_cpu_map__read(FILE *file);
+  struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
+  struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
+                                           struct perf_cpu_map *other);
+  void perf_cpu_map__put(struct perf_cpu_map *map);
+  int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+  int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
+  bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+  int perf_cpu_map__max(struct perf_cpu_map *map);
+
+  #define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+--
+
+*API to handle thread maps:*
+
+[source,c]
+--
+  #include <perf/threadmap.h>
+
+  struct perf_thread_map;
+
+  struct perf_thread_map *perf_thread_map__new_dummy(void);
+
+  void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
+  char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+  int perf_thread_map__nr(struct perf_thread_map *threads);
+  pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+
+  struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
+  void perf_thread_map__put(struct perf_thread_map *map);
+--
+
+*API to handle event lists:*
+
+[source,c]
+--
+  #include <perf/evlist.h>
+
+  struct perf_evlist;
+
+  void perf_evlist__add(struct perf_evlist *evlist,
+                        struct perf_evsel *evsel);
+  void perf_evlist__remove(struct perf_evlist *evlist,
+                           struct perf_evsel *evsel);
+  struct perf_evlist *perf_evlist__new(void);
+  void perf_evlist__delete(struct perf_evlist *evlist);
+  struct perf_evsel* perf_evlist__next(struct perf_evlist *evlist,
+                                       struct perf_evsel *evsel);
+  int perf_evlist__open(struct perf_evlist *evlist);
+  void perf_evlist__close(struct perf_evlist *evlist);
+  void perf_evlist__enable(struct perf_evlist *evlist);
+  void perf_evlist__disable(struct perf_evlist *evlist);
+
+  #define perf_evlist__for_each_evsel(evlist, pos)
+
+  void perf_evlist__set_maps(struct perf_evlist *evlist,
+                             struct perf_cpu_map *cpus,
+                             struct perf_thread_map *threads);
+  int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
+  int perf_evlist__filter_pollfd(struct perf_evlist *evlist,
+                                 short revents_and_mask);
+
+  int perf_evlist__mmap(struct perf_evlist *evlist, int pages);
+  void perf_evlist__munmap(struct perf_evlist *evlist);
+
+  struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
+                                           struct perf_mmap *map,
+                                           bool overwrite);
+
+  #define perf_evlist__for_each_mmap(evlist, pos, overwrite)
+--
+
+*API to handle events:*
+
+[source,c]
+--
+  #include <perf/evsel.h>*
+
+  struct perf_evsel;
+
+  struct perf_counts_values {
+          union {
+                  struct {
+                          uint64_t val;
+                          uint64_t ena;
+                          uint64_t run;
+                  };
+                  uint64_t values[3];
+          };
+  };
+
+  struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr);
+  void perf_evsel__delete(struct perf_evsel *evsel);
+  int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
+                       struct perf_thread_map *threads);
+  void perf_evsel__close(struct perf_evsel *evsel);
+  void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+  int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+                       struct perf_counts_values *count);
+  int perf_evsel__enable(struct perf_evsel *evsel);
+  int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+  int perf_evsel__disable(struct perf_evsel *evsel);
+  int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+  struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
+  struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
+  struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
+--
+
+*API to handle maps (perf ring buffers):*
+
+[source,c]
+--
+  #include <perf/mmap.h>
+
+  struct perf_mmap;
+
+  void perf_mmap__consume(struct perf_mmap *map);
+  int perf_mmap__read_init(struct perf_mmap *map);
+  void perf_mmap__read_done(struct perf_mmap *map);
+  union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+--
+
+*Structures to access perf API events:*
+
+[source,c]
+--
+  #include <perf/event.h>
+
+  struct perf_record_mmap;
+  struct perf_record_mmap2;
+  struct perf_record_comm;
+  struct perf_record_namespaces;
+  struct perf_record_fork;
+  struct perf_record_lost;
+  struct perf_record_lost_samples;
+  struct perf_record_read;
+  struct perf_record_throttle;
+  struct perf_record_ksymbol;
+  struct perf_record_bpf_event;
+  struct perf_record_sample;
+  struct perf_record_switch;
+  struct perf_record_header_attr;
+  struct perf_record_record_cpu_map;
+  struct perf_record_cpu_map_data;
+  struct perf_record_cpu_map;
+  struct perf_record_event_update_cpus;
+  struct perf_record_event_update_scale;
+  struct perf_record_event_update;
+  struct perf_trace_event_type;
+  struct perf_record_header_event_type;
+  struct perf_record_header_tracing_data;
+  struct perf_record_header_build_id;
+  struct perf_record_id_index;
+  struct perf_record_auxtrace_info;
+  struct perf_record_auxtrace;
+  struct perf_record_auxtrace_error;
+  struct perf_record_aux;
+  struct perf_record_itrace_start;
+  struct perf_record_thread_map_entry;
+  struct perf_record_thread_map;
+  struct perf_record_stat_config_entry;
+  struct perf_record_stat_config;
+  struct perf_record_stat;
+  struct perf_record_stat_round;
+  struct perf_record_time_conv;
+  struct perf_record_header_feature;
+  struct perf_record_compressed;
+--
+
+DESCRIPTION
+-----------
+The libperf library provides an API to access the linux kernel perf
+events subsystem.
+
+Following objects are key to the libperf interface:
+
+[horizontal]
+
+struct perf_cpu_map:: Provides a CPU list abstraction.
+
+struct perf_thread_map:: Provides a thread list abstraction.
+
+struct perf_evsel:: Provides an abstraction for single a perf event.
+
+struct perf_evlist:: Gathers several struct perf_evsel object and performs functions on all of them.
+
+struct perf_mmap:: Provides an abstraction for accessing perf ring buffer.
+
+The exported API functions bind these objects together.
+
+REPORTING BUGS
+--------------
+Report bugs to <linux-perf-users@vger.kernel.org>.
+
+LICENSE
+-------
+libperf is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+SEE ALSO
+--------
+libperf-sampling(7), libperf-counting(7)
diff --git a/tools/lib/perf/Documentation/manpage-1.72.xsl b/tools/lib/perf/Documentation/manpage-1.72.xsl
new file mode 100644
index 000000000..b4d315cb8
--- /dev/null
+++ b/tools/lib/perf/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+     needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/lib/perf/Documentation/manpage-base.xsl b/tools/lib/perf/Documentation/manpage-base.xsl
new file mode 100644
index 000000000..a264fa616
--- /dev/null
+++ b/tools/lib/perf/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+     git.docbook.backslash and git.docbook.dot params
+     must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB(',
+			      substring-after(@id,'-'),')',
+			      $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>sp&#10;</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB',
+			      substring-after(@arearefs,'-'),
+			      '. ',$git.docbook.backslash,'fR')"/>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/lib/perf/Documentation/manpage-bold-literal.xsl b/tools/lib/perf/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 000000000..608eb5df6
--- /dev/null
+++ b/tools/lib/perf/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+     this makes literal text easier to distinguish in manpages
+     viewed on a tty -->
+<xsl:template match="literal">
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fB</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/lib/perf/Documentation/manpage-normal.xsl b/tools/lib/perf/Documentation/manpage-normal.xsl
new file mode 100644
index 000000000..a48f5b11f
--- /dev/null
+++ b/tools/lib/perf/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot"	>.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/lib/perf/Documentation/manpage-suppress-sp.xsl b/tools/lib/perf/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 000000000..a63c7632a
--- /dev/null
+++ b/tools/lib/perf/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles erroneous, inline .sp in manpage output of some
+     versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+     that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+  <xsl:variable name="content">
+    <xsl:apply-templates/>
+  </xsl:variable>
+  <xsl:value-of select="normalize-space($content)"/>
+  <xsl:if test="not(ancestor::authorblurb) and
+                not(ancestor::personblurb)">
+    <xsl:text>&#10;&#10;</xsl:text>
+  </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
new file mode 100644
index 000000000..3718d65cf
--- /dev/null
+++ b/tools/lib/perf/Makefile
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/lib/bpf/Makefile
+
+LIBPERF_VERSION = 0
+LIBPERF_PATCHLEVEL = 0
+LIBPERF_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+INCLUDES = \
+-I$(srctree)/tools/lib/perf/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+override CFLAGS += -fvisibility=hidden
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+export DESTDIR DESTDIR_SQ
+
+include $(srctree)/tools/build/Makefile.include
+
+VERSION_SCRIPT := libperf.map
+
+PATCHLEVEL    = $(LIBPERF_PATCHLEVEL)
+EXTRAVERSION  = $(LIBPERF_EXTRAVERSION)
+VERSION       = $(LIBPERF_VERSION).$(LIBPERF_PATCHLEVEL).$(LIBPERF_EXTRAVERSION)
+
+LIBPERF_SO := $(OUTPUT)libperf.so.$(VERSION)
+LIBPERF_A  := $(OUTPUT)libperf.a
+LIBPERF_IN := $(OUTPUT)libperf-in.o
+LIBPERF_PC := $(OUTPUT)libperf.pc
+
+LIBPERF_ALL := $(LIBPERF_A) $(OUTPUT)libperf.so*
+
+LIB_DIR := $(srctree)/tools/lib/api/
+
+ifneq ($(OUTPUT),)
+ifneq ($(subdir),)
+  API_PATH=$(OUTPUT)/../lib/api/
+else
+  API_PATH=$(OUTPUT)
+endif
+else
+  API_PATH=$(LIB_DIR)
+endif
+
+LIBAPI = $(API_PATH)libapi.a
+export LIBAPI
+
+$(LIBAPI): FORCE
+	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
+
+$(LIBAPI)-clean:
+	$(call QUIET_CLEAN, libapi)
+	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+
+$(LIBPERF_IN): FORCE
+	$(Q)$(MAKE) $(build)=libperf
+
+$(LIBPERF_A): $(LIBPERF_IN)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN)
+
+$(LIBPERF_SO): $(LIBPERF_IN) $(LIBAPI)
+	$(QUIET_LINK)$(CC) --shared -Wl,-soname,libperf.so \
+                                    -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
+	@ln -sf $(@F) $(OUTPUT)libperf.so
+	@ln -sf $(@F) $(OUTPUT)libperf.so.$(LIBPERF_VERSION)
+
+
+libs: $(LIBPERF_A) $(LIBPERF_SO) $(LIBPERF_PC)
+
+all: fixdep
+	$(Q)$(MAKE) libs
+
+clean: $(LIBAPI)-clean
+	$(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
+                *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC)
+	$(Q)$(MAKE) -C tests clean
+
+tests: libs
+	$(Q)$(MAKE) -C tests
+	$(Q)$(MAKE) -C tests run
+
+$(LIBPERF_PC):
+	$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+		-e "s|@LIBDIR@|$(libdir_SQ)|" \
+		-e "s|@VERSION@|$(VERSION)|" \
+		< libperf.pc.template > $@
+
+define do_install_mkdir
+	if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+	fi
+endef
+
+define do_install
+	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+	fi;                                             \
+	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: libs
+	$(call QUIET_INSTALL, $(LIBPERF_ALL)) \
+		$(call do_install_mkdir,$(libdir_SQ)); \
+		cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \
+		$(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
+		$(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
+		$(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
+		$(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
+		$(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
+		$(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644);
+
+install_pkgconfig: $(LIBPERF_PC)
+	$(call QUIET_INSTALL, $(LIBPERF_PC)) \
+		$(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644)
+
+install_doc:
+	$(Q)$(MAKE) -C Documentation install-man install-html install-examples
+
+install: install_lib install_headers install_pkgconfig install_doc
+
+FORCE:
+
+.PHONY: all install clean tests FORCE
diff --git a/tools/lib/perf/core.c b/tools/lib/perf/core.c
new file mode 100644
index 000000000..58fc894b7
--- /dev/null
+++ b/tools/lib/perf/core.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define __printf(a, b)  __attribute__((format(printf, a, b)))
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+#include <perf/core.h>
+#include <internal/lib.h>
+#include "internal.h"
+
+static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format,
+		     va_list args)
+{
+	return vfprintf(stderr, format, args);
+}
+
+static libperf_print_fn_t __libperf_pr = __base_pr;
+
+__printf(2, 3)
+void libperf_print(enum libperf_print_level level, const char *format, ...)
+{
+	va_list args;
+
+	if (!__libperf_pr)
+		return;
+
+	va_start(args, format);
+	__libperf_pr(level, format, args);
+	va_end(args);
+}
+
+void libperf_init(libperf_print_fn_t fn)
+{
+	page_size = sysconf(_SC_PAGE_SIZE);
+	__libperf_pr = fn;
+}
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
new file mode 100644
index 000000000..ca0215047
--- /dev/null
+++ b/tools/lib/perf/cpumap.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <perf/cpumap.h>
+#include <stdlib.h>
+#include <linux/refcount.h>
+#include <internal/cpumap.h>
+#include <asm/bug.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <limits.h>
+
+struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+{
+	struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+
+	if (cpus != NULL) {
+		cpus->nr = 1;
+		cpus->map[0] = -1;
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+static void cpu_map__delete(struct perf_cpu_map *map)
+{
+	if (map) {
+		WARN_ONCE(refcount_read(&map->refcnt) != 0,
+			  "cpu_map refcnt unbalanced\n");
+		free(map);
+	}
+}
+
+struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map)
+{
+	if (map)
+		refcount_inc(&map->refcnt);
+	return map;
+}
+
+void perf_cpu_map__put(struct perf_cpu_map *map)
+{
+	if (map && refcount_dec_and_test(&map->refcnt))
+		cpu_map__delete(map);
+}
+
+static struct perf_cpu_map *cpu_map__default_new(void)
+{
+	struct perf_cpu_map *cpus;
+	int nr_cpus;
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (nr_cpus < 0)
+		return NULL;
+
+	cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+	if (cpus != NULL) {
+		int i;
+
+		for (i = 0; i < nr_cpus; ++i)
+			cpus->map[i] = i;
+
+		cpus->nr = nr_cpus;
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+static int cmp_int(const void *a, const void *b)
+{
+	return *(const int *)a - *(const int*)b;
+}
+
+static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
+{
+	size_t payload_size = nr_cpus * sizeof(int);
+	struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+	int i, j;
+
+	if (cpus != NULL) {
+		memcpy(cpus->map, tmp_cpus, payload_size);
+		qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
+		/* Remove dups */
+		j = 0;
+		for (i = 0; i < nr_cpus; i++) {
+			if (i == 0 || cpus->map[i] != cpus->map[i - 1])
+				cpus->map[j++] = cpus->map[i];
+		}
+		cpus->nr = j;
+		assert(j <= nr_cpus);
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__read(FILE *file)
+{
+	struct perf_cpu_map *cpus = NULL;
+	int nr_cpus = 0;
+	int *tmp_cpus = NULL, *tmp;
+	int max_entries = 0;
+	int n, cpu, prev;
+	char sep;
+
+	sep = 0;
+	prev = -1;
+	for (;;) {
+		n = fscanf(file, "%u%c", &cpu, &sep);
+		if (n <= 0)
+			break;
+		if (prev >= 0) {
+			int new_max = nr_cpus + cpu - prev - 1;
+
+			WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. "
+							  "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
+
+			if (new_max >= max_entries) {
+				max_entries = new_max + MAX_NR_CPUS / 2;
+				tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+				if (tmp == NULL)
+					goto out_free_tmp;
+				tmp_cpus = tmp;
+			}
+
+			while (++prev < cpu)
+				tmp_cpus[nr_cpus++] = prev;
+		}
+		if (nr_cpus == max_entries) {
+			max_entries += MAX_NR_CPUS;
+			tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+			if (tmp == NULL)
+				goto out_free_tmp;
+			tmp_cpus = tmp;
+		}
+
+		tmp_cpus[nr_cpus++] = cpu;
+		if (n == 2 && sep == '-')
+			prev = cpu;
+		else
+			prev = -1;
+		if (n == 1 || sep == '\n')
+			break;
+	}
+
+	if (nr_cpus > 0)
+		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+	else
+		cpus = cpu_map__default_new();
+out_free_tmp:
+	free(tmp_cpus);
+	return cpus;
+}
+
+static struct perf_cpu_map *cpu_map__read_all_cpu_map(void)
+{
+	struct perf_cpu_map *cpus = NULL;
+	FILE *onlnf;
+
+	onlnf = fopen("/sys/devices/system/cpu/online", "r");
+	if (!onlnf)
+		return cpu_map__default_new();
+
+	cpus = perf_cpu_map__read(onlnf);
+	fclose(onlnf);
+	return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
+{
+	struct perf_cpu_map *cpus = NULL;
+	unsigned long start_cpu, end_cpu = 0;
+	char *p = NULL;
+	int i, nr_cpus = 0;
+	int *tmp_cpus = NULL, *tmp;
+	int max_entries = 0;
+
+	if (!cpu_list)
+		return cpu_map__read_all_cpu_map();
+
+	/*
+	 * must handle the case of empty cpumap to cover
+	 * TOPOLOGY header for NUMA nodes with no CPU
+	 * ( e.g., because of CPU hotplug)
+	 */
+	if (!isdigit(*cpu_list) && *cpu_list != '\0')
+		goto out;
+
+	while (isdigit(*cpu_list)) {
+		p = NULL;
+		start_cpu = strtoul(cpu_list, &p, 0);
+		if (start_cpu >= INT_MAX
+		    || (*p != '\0' && *p != ',' && *p != '-'))
+			goto invalid;
+
+		if (*p == '-') {
+			cpu_list = ++p;
+			p = NULL;
+			end_cpu = strtoul(cpu_list, &p, 0);
+
+			if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
+				goto invalid;
+
+			if (end_cpu < start_cpu)
+				goto invalid;
+		} else {
+			end_cpu = start_cpu;
+		}
+
+		WARN_ONCE(end_cpu >= MAX_NR_CPUS, "Perf can support %d CPUs. "
+						  "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
+
+		for (; start_cpu <= end_cpu; start_cpu++) {
+			/* check for duplicates */
+			for (i = 0; i < nr_cpus; i++)
+				if (tmp_cpus[i] == (int)start_cpu)
+					goto invalid;
+
+			if (nr_cpus == max_entries) {
+				max_entries += MAX_NR_CPUS;
+				tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+				if (tmp == NULL)
+					goto invalid;
+				tmp_cpus = tmp;
+			}
+			tmp_cpus[nr_cpus++] = (int)start_cpu;
+		}
+		if (*p)
+			++p;
+
+		cpu_list = p;
+	}
+
+	if (nr_cpus > 0)
+		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+	else if (*cpu_list != '\0')
+		cpus = cpu_map__default_new();
+	else
+		cpus = perf_cpu_map__dummy_new();
+invalid:
+	free(tmp_cpus);
+out:
+	return cpus;
+}
+
+int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+{
+	if (cpus && idx < cpus->nr)
+		return cpus->map[idx];
+
+	return -1;
+}
+
+int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
+{
+	return cpus ? cpus->nr : 1;
+}
+
+bool perf_cpu_map__empty(const struct perf_cpu_map *map)
+{
+	return map ? map->map[0] == -1 : true;
+}
+
+int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
+{
+	int i;
+
+	for (i = 0; i < cpus->nr; ++i) {
+		if (cpus->map[i] == cpu)
+			return i;
+	}
+
+	return -1;
+}
+
+int perf_cpu_map__max(struct perf_cpu_map *map)
+{
+	int i, max = -1;
+
+	for (i = 0; i < map->nr; i++) {
+		if (map->map[i] > max)
+			max = map->map[i];
+	}
+
+	return max;
+}
+
+/*
+ * Merge two cpumaps
+ *
+ * orig either gets freed and replaced with a new map, or reused
+ * with no reference count change (similar to "realloc")
+ * other has its reference count increased.
+ */
+
+struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
+					 struct perf_cpu_map *other)
+{
+	int *tmp_cpus;
+	int tmp_len;
+	int i, j, k;
+	struct perf_cpu_map *merged;
+
+	if (!orig && !other)
+		return NULL;
+	if (!orig) {
+		perf_cpu_map__get(other);
+		return other;
+	}
+	if (!other)
+		return orig;
+	if (orig->nr == other->nr &&
+	    !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
+		return orig;
+
+	tmp_len = orig->nr + other->nr;
+	tmp_cpus = malloc(tmp_len * sizeof(int));
+	if (!tmp_cpus)
+		return NULL;
+
+	/* Standard merge algorithm from wikipedia */
+	i = j = k = 0;
+	while (i < orig->nr && j < other->nr) {
+		if (orig->map[i] <= other->map[j]) {
+			if (orig->map[i] == other->map[j])
+				j++;
+			tmp_cpus[k++] = orig->map[i++];
+		} else
+			tmp_cpus[k++] = other->map[j++];
+	}
+
+	while (i < orig->nr)
+		tmp_cpus[k++] = orig->map[i++];
+
+	while (j < other->nr)
+		tmp_cpus[k++] = other->map[j++];
+	assert(k <= tmp_len);
+
+	merged = cpu_map__trim_new(k, tmp_cpus);
+	free(tmp_cpus);
+	perf_cpu_map__put(orig);
+	return merged;
+}
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
new file mode 100644
index 000000000..f76b1a9d5
--- /dev/null
+++ b/tools/lib/perf/evlist.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <sys/ioctl.h>
+#include <internal/evlist.h>
+#include <internal/evsel.h>
+#include <internal/xyarray.h>
+#include <internal/mmap.h>
+#include <internal/cpumap.h>
+#include <internal/threadmap.h>
+#include <internal/lib.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <poll.h>
+#include <sys/mman.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <api/fd/array.h>
+
+void perf_evlist__init(struct perf_evlist *evlist)
+{
+	int i;
+
+	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+		INIT_HLIST_HEAD(&evlist->heads[i]);
+	INIT_LIST_HEAD(&evlist->entries);
+	evlist->nr_entries = 0;
+	fdarray__init(&evlist->pollfd, 64);
+}
+
+static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
+					  struct perf_evsel *evsel)
+{
+	/*
+	 * We already have cpus for evsel (via PMU sysfs) so
+	 * keep it, if there's no target cpu list defined.
+	 */
+	if (!evsel->own_cpus || evlist->has_user_cpus) {
+		perf_cpu_map__put(evsel->cpus);
+		evsel->cpus = perf_cpu_map__get(evlist->cpus);
+	} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) {
+		perf_cpu_map__put(evsel->cpus);
+		evsel->cpus = perf_cpu_map__get(evlist->cpus);
+	} else if (evsel->cpus != evsel->own_cpus) {
+		perf_cpu_map__put(evsel->cpus);
+		evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
+	}
+
+	perf_thread_map__put(evsel->threads);
+	evsel->threads = perf_thread_map__get(evlist->threads);
+	evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
+}
+
+static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	perf_evlist__for_each_evsel(evlist, evsel)
+		__perf_evlist__propagate_maps(evlist, evsel);
+}
+
+void perf_evlist__add(struct perf_evlist *evlist,
+		      struct perf_evsel *evsel)
+{
+	list_add_tail(&evsel->node, &evlist->entries);
+	evlist->nr_entries += 1;
+	__perf_evlist__propagate_maps(evlist, evsel);
+}
+
+void perf_evlist__remove(struct perf_evlist *evlist,
+			 struct perf_evsel *evsel)
+{
+	list_del_init(&evsel->node);
+	evlist->nr_entries -= 1;
+}
+
+struct perf_evlist *perf_evlist__new(void)
+{
+	struct perf_evlist *evlist = zalloc(sizeof(*evlist));
+
+	if (evlist != NULL)
+		perf_evlist__init(evlist);
+
+	return evlist;
+}
+
+struct perf_evsel *
+perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev)
+{
+	struct perf_evsel *next;
+
+	if (!prev) {
+		next = list_first_entry(&evlist->entries,
+					struct perf_evsel,
+					node);
+	} else {
+		next = list_next_entry(prev, node);
+	}
+
+	/* Empty list is noticed here so don't need checking on entry. */
+	if (&next->node == &evlist->entries)
+		return NULL;
+
+	return next;
+}
+
+static void perf_evlist__purge(struct perf_evlist *evlist)
+{
+	struct perf_evsel *pos, *n;
+
+	perf_evlist__for_each_entry_safe(evlist, n, pos) {
+		list_del_init(&pos->node);
+		perf_evsel__delete(pos);
+	}
+
+	evlist->nr_entries = 0;
+}
+
+void perf_evlist__exit(struct perf_evlist *evlist)
+{
+	perf_cpu_map__put(evlist->cpus);
+	perf_cpu_map__put(evlist->all_cpus);
+	perf_thread_map__put(evlist->threads);
+	evlist->cpus = NULL;
+	evlist->all_cpus = NULL;
+	evlist->threads = NULL;
+	fdarray__exit(&evlist->pollfd);
+}
+
+void perf_evlist__delete(struct perf_evlist *evlist)
+{
+	if (evlist == NULL)
+		return;
+
+	perf_evlist__munmap(evlist);
+	perf_evlist__close(evlist);
+	perf_evlist__purge(evlist);
+	perf_evlist__exit(evlist);
+	free(evlist);
+}
+
+void perf_evlist__set_maps(struct perf_evlist *evlist,
+			   struct perf_cpu_map *cpus,
+			   struct perf_thread_map *threads)
+{
+	/*
+	 * Allow for the possibility that one or another of the maps isn't being
+	 * changed i.e. don't put it.  Note we are assuming the maps that are
+	 * being applied are brand new and evlist is taking ownership of the
+	 * original reference count of 1.  If that is not the case it is up to
+	 * the caller to increase the reference count.
+	 */
+	if (cpus != evlist->cpus) {
+		perf_cpu_map__put(evlist->cpus);
+		evlist->cpus = perf_cpu_map__get(cpus);
+	}
+
+	if (threads != evlist->threads) {
+		perf_thread_map__put(evlist->threads);
+		evlist->threads = perf_thread_map__get(threads);
+	}
+
+	if (!evlist->all_cpus && cpus)
+		evlist->all_cpus = perf_cpu_map__get(cpus);
+
+	perf_evlist__propagate_maps(evlist);
+}
+
+int perf_evlist__open(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	int err;
+
+	perf_evlist__for_each_entry(evlist, evsel) {
+		err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
+		if (err < 0)
+			goto out_err;
+	}
+
+	return 0;
+
+out_err:
+	perf_evlist__close(evlist);
+	return err;
+}
+
+void perf_evlist__close(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	perf_evlist__for_each_entry_reverse(evlist, evsel)
+		perf_evsel__close(evsel);
+}
+
+void perf_evlist__enable(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	perf_evlist__for_each_entry(evlist, evsel)
+		perf_evsel__enable(evsel);
+}
+
+void perf_evlist__disable(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	perf_evlist__for_each_entry(evlist, evsel)
+		perf_evsel__disable(evsel);
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+
+	return first->attr.read_format;
+}
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static void perf_evlist__id_hash(struct perf_evlist *evlist,
+				 struct perf_evsel *evsel,
+				 int cpu, int thread, u64 id)
+{
+	int hash;
+	struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+	sid->id = id;
+	sid->evsel = evsel;
+	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+	hlist_add_head(&sid->node, &evlist->heads[hash]);
+}
+
+void perf_evlist__id_add(struct perf_evlist *evlist,
+			 struct perf_evsel *evsel,
+			 int cpu, int thread, u64 id)
+{
+	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+	evsel->id[evsel->ids++] = id;
+}
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+			   struct perf_evsel *evsel,
+			   int cpu, int thread, int fd)
+{
+	u64 read_data[4] = { 0, };
+	int id_idx = 1; /* The first entry is the counter value */
+	u64 id;
+	int ret;
+
+	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
+	if (!ret)
+		goto add;
+
+	if (errno != ENOTTY)
+		return -1;
+
+	/* Legacy way to get event id.. All hail to old kernels! */
+
+	/*
+	 * This way does not work with group format read, so bail
+	 * out in that case.
+	 */
+	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
+		return -1;
+
+	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+	    read(fd, &read_data, sizeof(read_data)) == -1)
+		return -1;
+
+	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		++id_idx;
+	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		++id_idx;
+
+	id = read_data[id_idx];
+
+add:
+	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+	return 0;
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+{
+	int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+	int nr_threads = perf_thread_map__nr(evlist->threads);
+	int nfds = 0;
+	struct perf_evsel *evsel;
+
+	perf_evlist__for_each_entry(evlist, evsel) {
+		if (evsel->system_wide)
+			nfds += nr_cpus;
+		else
+			nfds += nr_cpus * nr_threads;
+	}
+
+	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
+	    fdarray__grow(&evlist->pollfd, nfds) < 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+			    void *ptr, short revent, enum fdarray_flags flags)
+{
+	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags);
+
+	if (pos >= 0) {
+		evlist->pollfd.priv[pos].ptr = ptr;
+		fcntl(fd, F_SETFL, O_NONBLOCK);
+	}
+
+	return pos;
+}
+
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+					 void *arg __maybe_unused)
+{
+	struct perf_mmap *map = fda->priv[fd].ptr;
+
+	if (map)
+		perf_mmap__put(map);
+}
+
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
+{
+	return fdarray__filter(&evlist->pollfd, revents_and_mask,
+			       perf_evlist__munmap_filtered, NULL);
+}
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
+{
+	return fdarray__poll(&evlist->pollfd, timeout);
+}
+
+static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite)
+{
+	int i;
+	struct perf_mmap *map;
+
+	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+	if (!map)
+		return NULL;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct perf_mmap *prev = i ? &map[i - 1] : NULL;
+
+		/*
+		 * When the perf_mmap() call is made we grab one refcount, plus
+		 * one extra to let perf_mmap__consume() get the last
+		 * events after all real references (perf_mmap__get()) are
+		 * dropped.
+		 *
+		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
+		 * thus does perf_mmap__get() on it.
+		 */
+		perf_mmap__init(&map[i], prev, overwrite, NULL);
+	}
+
+	return map;
+}
+
+static void perf_evsel__set_sid_idx(struct perf_evsel *evsel, int idx, int cpu, int thread)
+{
+	struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+	sid->idx = idx;
+	sid->cpu = perf_cpu_map__cpu(evsel->cpus, cpu);
+	sid->tid = perf_thread_map__pid(evsel->threads, thread);
+}
+
+static struct perf_mmap*
+perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
+{
+	struct perf_mmap *maps;
+
+	maps = overwrite ? evlist->mmap_ovw : evlist->mmap;
+
+	if (!maps) {
+		maps = perf_evlist__alloc_mmap(evlist, overwrite);
+		if (!maps)
+			return NULL;
+
+		if (overwrite)
+			evlist->mmap_ovw = maps;
+		else
+			evlist->mmap = maps;
+	}
+
+	return &maps[idx];
+}
+
+#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
+
+static int
+perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
+			  int output, int cpu)
+{
+	return perf_mmap__mmap(map, mp, output, cpu);
+}
+
+static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map,
+					bool overwrite)
+{
+	if (overwrite)
+		evlist->mmap_ovw_first = map;
+	else
+		evlist->mmap_first = map;
+}
+
+static int
+mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+	       int idx, struct perf_mmap_param *mp, int cpu_idx,
+	       int thread, int *_output, int *_output_overwrite)
+{
+	int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
+	struct perf_evsel *evsel;
+	int revent;
+
+	perf_evlist__for_each_entry(evlist, evsel) {
+		bool overwrite = evsel->attr.write_backward;
+		struct perf_mmap *map;
+		int *output, fd, cpu;
+
+		if (evsel->system_wide && thread)
+			continue;
+
+		cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu);
+		if (cpu == -1)
+			continue;
+
+		map = ops->get(evlist, overwrite, idx);
+		if (map == NULL)
+			return -ENOMEM;
+
+		if (overwrite) {
+			mp->prot = PROT_READ;
+			output   = _output_overwrite;
+		} else {
+			mp->prot = PROT_READ | PROT_WRITE;
+			output   = _output;
+		}
+
+		fd = FD(evsel, cpu, thread);
+
+		if (*output == -1) {
+			*output = fd;
+
+			/*
+			 * The last one will be done at perf_mmap__consume(), so that we
+			 * make sure we don't prevent tools from consuming every last event in
+			 * the ring buffer.
+			 *
+			 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
+			 * anymore, but the last events for it are still in the ring buffer,
+			 * waiting to be consumed.
+			 *
+			 * Tools can chose to ignore this at their own discretion, but the
+			 * evlist layer can't just drop it when filtering events in
+			 * perf_evlist__filter_pollfd().
+			 */
+			refcount_set(&map->refcnt, 2);
+
+			if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
+				return -1;
+
+			if (!idx)
+				perf_evlist__set_mmap_first(evlist, map, overwrite);
+		} else {
+			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
+				return -1;
+
+			perf_mmap__get(map);
+		}
+
+		revent = !overwrite ? POLLIN : 0;
+
+		if (!evsel->system_wide &&
+		    perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
+			perf_mmap__put(map);
+			return -1;
+		}
+
+		if (evsel->attr.read_format & PERF_FORMAT_ID) {
+			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+						   fd) < 0)
+				return -1;
+			perf_evsel__set_sid_idx(evsel, idx, cpu, thread);
+		}
+	}
+
+	return 0;
+}
+
+static int
+mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+		struct perf_mmap_param *mp)
+{
+	int thread;
+	int nr_threads = perf_thread_map__nr(evlist->threads);
+
+	for (thread = 0; thread < nr_threads; thread++) {
+		int output = -1;
+		int output_overwrite = -1;
+
+		if (ops->idx)
+			ops->idx(evlist, mp, thread, false);
+
+		if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
+				   &output, &output_overwrite))
+			goto out_unmap;
+	}
+
+	return 0;
+
+out_unmap:
+	perf_evlist__munmap(evlist);
+	return -1;
+}
+
+static int
+mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+	     struct perf_mmap_param *mp)
+{
+	int nr_threads = perf_thread_map__nr(evlist->threads);
+	int nr_cpus    = perf_cpu_map__nr(evlist->cpus);
+	int cpu, thread;
+
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		int output = -1;
+		int output_overwrite = -1;
+
+		if (ops->idx)
+			ops->idx(evlist, mp, cpu, true);
+
+		for (thread = 0; thread < nr_threads; thread++) {
+			if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
+					   thread, &output, &output_overwrite))
+				goto out_unmap;
+		}
+	}
+
+	return 0;
+
+out_unmap:
+	perf_evlist__munmap(evlist);
+	return -1;
+}
+
+static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
+{
+	int nr_mmaps;
+
+	nr_mmaps = perf_cpu_map__nr(evlist->cpus);
+	if (perf_cpu_map__empty(evlist->cpus))
+		nr_mmaps = perf_thread_map__nr(evlist->threads);
+
+	return nr_mmaps;
+}
+
+int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+			  struct perf_evlist_mmap_ops *ops,
+			  struct perf_mmap_param *mp)
+{
+	struct perf_evsel *evsel;
+	const struct perf_cpu_map *cpus = evlist->cpus;
+
+	if (!ops || !ops->get || !ops->mmap)
+		return -EINVAL;
+
+	mp->mask = evlist->mmap_len - page_size - 1;
+
+	evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist);
+
+	perf_evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+		    evsel->sample_id == NULL &&
+		    perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
+			return -ENOMEM;
+	}
+
+	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+		return -ENOMEM;
+
+	if (perf_cpu_map__empty(cpus))
+		return mmap_per_thread(evlist, ops, mp);
+
+	return mmap_per_cpu(evlist, ops, mp);
+}
+
+int perf_evlist__mmap(struct perf_evlist *evlist, int pages)
+{
+	struct perf_mmap_param mp;
+	struct perf_evlist_mmap_ops ops = {
+		.get  = perf_evlist__mmap_cb_get,
+		.mmap = perf_evlist__mmap_cb_mmap,
+	};
+
+	evlist->mmap_len = (pages + 1) * page_size;
+
+	return perf_evlist__mmap_ops(evlist, &ops, &mp);
+}
+
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+	int i;
+
+	if (evlist->mmap) {
+		for (i = 0; i < evlist->nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->mmap[i]);
+	}
+
+	if (evlist->mmap_ovw) {
+		for (i = 0; i < evlist->nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->mmap_ovw[i]);
+	}
+
+	zfree(&evlist->mmap);
+	zfree(&evlist->mmap_ovw);
+}
+
+struct perf_mmap*
+perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
+		       bool overwrite)
+{
+	if (map)
+		return map->next;
+
+	return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
+}
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
new file mode 100644
index 000000000..4dc06289f
--- /dev/null
+++ b/tools/lib/perf/evsel.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <linux/list.h>
+#include <internal/evsel.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <internal/xyarray.h>
+#include <internal/cpumap.h>
+#include <internal/threadmap.h>
+#include <internal/lib.h>
+#include <linux/string.h>
+#include <sys/ioctl.h>
+
+void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr)
+{
+	INIT_LIST_HEAD(&evsel->node);
+	evsel->attr = *attr;
+}
+
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
+{
+	struct perf_evsel *evsel = zalloc(sizeof(*evsel));
+
+	if (evsel != NULL)
+		perf_evsel__init(evsel, attr);
+
+	return evsel;
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+	free(evsel);
+}
+
+#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
+
+int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+	evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
+
+	if (evsel->fd) {
+		int cpu, thread;
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			for (thread = 0; thread < nthreads; thread++) {
+				FD(evsel, cpu, thread) = -1;
+			}
+		}
+	}
+
+	return evsel->fd != NULL ? 0 : -ENOMEM;
+}
+
+static int
+sys_perf_event_open(struct perf_event_attr *attr,
+		    pid_t pid, int cpu, int group_fd,
+		    unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
+		     struct perf_thread_map *threads)
+{
+	int cpu, thread, err = 0;
+
+	if (cpus == NULL) {
+		static struct perf_cpu_map *empty_cpu_map;
+
+		if (empty_cpu_map == NULL) {
+			empty_cpu_map = perf_cpu_map__dummy_new();
+			if (empty_cpu_map == NULL)
+				return -ENOMEM;
+		}
+
+		cpus = empty_cpu_map;
+	}
+
+	if (threads == NULL) {
+		static struct perf_thread_map *empty_thread_map;
+
+		if (empty_thread_map == NULL) {
+			empty_thread_map = perf_thread_map__new_dummy();
+			if (empty_thread_map == NULL)
+				return -ENOMEM;
+		}
+
+		threads = empty_thread_map;
+	}
+
+	if (evsel->fd == NULL &&
+	    perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
+		return -ENOMEM;
+
+	for (cpu = 0; cpu < cpus->nr; cpu++) {
+		for (thread = 0; thread < threads->nr; thread++) {
+			int fd;
+
+			fd = sys_perf_event_open(&evsel->attr,
+						 threads->map[thread].pid,
+						 cpus->map[cpu], -1, 0);
+
+			if (fd < 0)
+				return -errno;
+
+			FD(evsel, cpu, thread) = fd;
+		}
+	}
+
+	return err;
+}
+
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+{
+	int thread;
+
+	for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
+		if (FD(evsel, cpu, thread) >= 0)
+			close(FD(evsel, cpu, thread));
+		FD(evsel, cpu, thread) = -1;
+	}
+}
+
+void perf_evsel__close_fd(struct perf_evsel *evsel)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
+		perf_evsel__close_fd_cpu(evsel, cpu);
+}
+
+void perf_evsel__free_fd(struct perf_evsel *evsel)
+{
+	xyarray__delete(evsel->fd);
+	evsel->fd = NULL;
+}
+
+void perf_evsel__close(struct perf_evsel *evsel)
+{
+	if (evsel->fd == NULL)
+		return;
+
+	perf_evsel__close_fd(evsel);
+	perf_evsel__free_fd(evsel);
+}
+
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+{
+	if (evsel->fd == NULL)
+		return;
+
+	perf_evsel__close_fd_cpu(evsel, cpu);
+}
+
+int perf_evsel__read_size(struct perf_evsel *evsel)
+{
+	u64 read_format = evsel->attr.read_format;
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		nr = evsel->nr_members;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+	return size;
+}
+
+int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+		     struct perf_counts_values *count)
+{
+	size_t size = perf_evsel__read_size(evsel);
+
+	memset(count, 0, sizeof(*count));
+
+	if (FD(evsel, cpu, thread) < 0)
+		return -EINVAL;
+
+	if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
+		return -errno;
+
+	return 0;
+}
+
+static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
+				 int ioc,  void *arg,
+				 int cpu)
+{
+	int thread;
+
+	for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+		int fd = FD(evsel, cpu, thread),
+		    err = ioctl(fd, ioc, arg);
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+{
+	return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
+}
+
+int perf_evsel__enable(struct perf_evsel *evsel)
+{
+	int i;
+	int err = 0;
+
+	for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
+		err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, i);
+	return err;
+}
+
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+{
+	return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
+}
+
+int perf_evsel__disable(struct perf_evsel *evsel)
+{
+	int i;
+	int err = 0;
+
+	for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
+		err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, i);
+	return err;
+}
+
+int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
+{
+	int err = 0, i;
+
+	for (i = 0; i < evsel->cpus->nr && !err; i++)
+		err = perf_evsel__run_ioctl(evsel,
+				     PERF_EVENT_IOC_SET_FILTER,
+				     (void *)filter, i);
+	return err;
+}
+
+struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+{
+	return evsel->cpus;
+}
+
+struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel)
+{
+	return evsel->threads;
+}
+
+struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel)
+{
+	return &evsel->attr;
+}
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+	if (ncpus == 0 || nthreads == 0)
+		return 0;
+
+	if (evsel->system_wide)
+		nthreads = 1;
+
+	evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+	if (evsel->sample_id == NULL)
+		return -ENOMEM;
+
+	evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
+	if (evsel->id == NULL) {
+		xyarray__delete(evsel->sample_id);
+		evsel->sample_id = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+	xyarray__delete(evsel->sample_id);
+	evsel->sample_id = NULL;
+	zfree(&evsel->id);
+	evsel->ids = 0;
+}
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
new file mode 100644
index 000000000..840d40325
--- /dev/null
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_CPUMAP_H
+#define __LIBPERF_INTERNAL_CPUMAP_H
+
+#include <linux/refcount.h>
+
+struct perf_cpu_map {
+	refcount_t	refcnt;
+	int		nr;
+	int		map[];
+};
+
+#ifndef MAX_NR_CPUS
+#define MAX_NR_CPUS	2048
+#endif
+
+int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu);
+
+#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
new file mode 100644
index 000000000..2d0fa02b0
--- /dev/null
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_EVLIST_H
+#define __LIBPERF_INTERNAL_EVLIST_H
+
+#include <linux/list.h>
+#include <api/fd/array.h>
+#include <internal/evsel.h>
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+
+struct perf_cpu_map;
+struct perf_thread_map;
+struct perf_mmap_param;
+
+struct perf_evlist {
+	struct list_head	 entries;
+	int			 nr_entries;
+	bool			 has_user_cpus;
+	struct perf_cpu_map	*cpus;
+	struct perf_cpu_map	*all_cpus;
+	struct perf_thread_map	*threads;
+	int			 nr_mmaps;
+	size_t			 mmap_len;
+	struct fdarray		 pollfd;
+	struct hlist_head	 heads[PERF_EVLIST__HLIST_SIZE];
+	struct perf_mmap	*mmap;
+	struct perf_mmap	*mmap_ovw;
+	struct perf_mmap	*mmap_first;
+	struct perf_mmap	*mmap_ovw_first;
+};
+
+typedef void
+(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool);
+typedef struct perf_mmap*
+(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
+typedef int
+(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
+
+struct perf_evlist_mmap_ops {
+	perf_evlist_mmap__cb_idx_t	idx;
+	perf_evlist_mmap__cb_get_t	get;
+	perf_evlist_mmap__cb_mmap_t	mmap;
+};
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+			    void *ptr, short revent, enum fdarray_flags flags);
+
+int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+			  struct perf_evlist_mmap_ops *ops,
+			  struct perf_mmap_param *mp);
+
+void perf_evlist__init(struct perf_evlist *evlist);
+void perf_evlist__exit(struct perf_evlist *evlist);
+
+/**
+ * __perf_evlist__for_each_entry - iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct perf_evsel iterator
+ */
+#define __perf_evlist__for_each_entry(list, evsel) \
+	list_for_each_entry(evsel, list, node)
+
+/**
+ * evlist__for_each_entry - iterate thru all the evsels
+ * @evlist: perf_evlist instance to iterate
+ * @evsel: struct perf_evsel iterator
+ */
+#define perf_evlist__for_each_entry(evlist, evsel) \
+	__perf_evlist__for_each_entry(&(evlist)->entries, evsel)
+
+/**
+ * __perf_evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __perf_evlist__for_each_entry_reverse(list, evsel) \
+	list_for_each_entry_reverse(evsel, list, node)
+
+/**
+ * perf_evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define perf_evlist__for_each_entry_reverse(evlist, evsel) \
+	__perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
+
+/**
+ * __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @tmp: struct evsel temp iterator
+ * @evsel: struct evsel iterator
+ */
+#define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \
+	list_for_each_entry_safe(evsel, tmp, list, node)
+
+/**
+ * perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ * @tmp: struct evsel temp iterator
+ */
+#define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \
+	__perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
+
+static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
+{
+	return list_entry(evlist->entries.next, struct perf_evsel, node);
+}
+
+static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
+{
+	return list_entry(evlist->entries.prev, struct perf_evsel, node);
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist);
+
+void perf_evlist__id_add(struct perf_evlist *evlist,
+			 struct perf_evsel *evsel,
+			 int cpu, int thread, u64 id);
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+			   struct perf_evsel *evsel,
+			   int cpu, int thread, int fd);
+
+#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
new file mode 100644
index 000000000..1ffd083b2
--- /dev/null
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_EVSEL_H
+#define __LIBPERF_INTERNAL_EVSEL_H
+
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+struct perf_cpu_map;
+struct perf_thread_map;
+struct xyarray;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+	struct hlist_node	 node;
+	u64			 id;
+	struct perf_evsel	*evsel;
+       /*
+	* 'idx' will be used for AUX area sampling. A sample will have AUX area
+	* data that will be queued for decoding, where there are separate
+	* queues for each CPU (per-cpu tracing) or task (per-thread tracing).
+	* The sample ID can be used to lookup 'idx' which is effectively the
+	* queue number.
+	*/
+	int			 idx;
+	int			 cpu;
+	pid_t			 tid;
+
+	/* Holds total ID period value for PERF_SAMPLE_READ processing. */
+	u64			 period;
+};
+
+struct perf_evsel {
+	struct list_head	 node;
+	struct perf_event_attr	 attr;
+	struct perf_cpu_map	*cpus;
+	struct perf_cpu_map	*own_cpus;
+	struct perf_thread_map	*threads;
+	struct xyarray		*fd;
+	struct xyarray		*sample_id;
+	u64			*id;
+	u32			 ids;
+
+	/* parse modifier helper */
+	int			 nr_members;
+	bool			 system_wide;
+};
+
+void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr);
+int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__close_fd(struct perf_evsel *evsel);
+void perf_evsel__free_fd(struct perf_evsel *evsel);
+int perf_evsel__read_size(struct perf_evsel *evsel);
+int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__free_id(struct perf_evsel *evsel);
+
+#endif /* __LIBPERF_INTERNAL_EVSEL_H */
diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h
new file mode 100644
index 000000000..5175d491b
--- /dev/null
+++ b/tools/lib/perf/include/internal/lib.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_LIB_H
+#define __LIBPERF_INTERNAL_LIB_H
+
+#include <sys/types.h>
+
+extern unsigned int page_size;
+
+ssize_t readn(int fd, void *buf, size_t n);
+ssize_t writen(int fd, const void *buf, size_t n);
+
+#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
new file mode 100644
index 000000000..be7556e0a
--- /dev/null
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_MMAP_H
+#define __LIBPERF_INTERNAL_MMAP_H
+
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <stdbool.h>
+
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
+struct perf_mmap;
+
+typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
+
+/**
+ * struct perf_mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
+struct perf_mmap {
+	void			*base;
+	int			 mask;
+	int			 fd;
+	int			 cpu;
+	refcount_t		 refcnt;
+	u64			 prev;
+	u64			 start;
+	u64			 end;
+	bool			 overwrite;
+	u64			 flush;
+	libperf_unmap_cb_t	 unmap_cb;
+	char			 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+	struct perf_mmap	*next;
+};
+
+struct perf_mmap_param {
+	int	prot;
+	int	mask;
+};
+
+size_t perf_mmap__mmap_len(struct perf_mmap *map);
+
+void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
+		     bool overwrite, libperf_unmap_cb_t unmap_cb);
+int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
+		    int fd, int cpu);
+void perf_mmap__munmap(struct perf_mmap *map);
+void perf_mmap__get(struct perf_mmap *map);
+void perf_mmap__put(struct perf_mmap *map);
+
+u64 perf_mmap__read_head(struct perf_mmap *map);
+
+#endif /* __LIBPERF_INTERNAL_MMAP_H */
diff --git a/tools/lib/perf/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h
new file mode 100644
index 000000000..2093e8868
--- /dev/null
+++ b/tools/lib/perf/include/internal/tests.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_TESTS_H
+#define __LIBPERF_INTERNAL_TESTS_H
+
+#include <stdio.h>
+
+int tests_failed;
+
+#define __T_START					\
+do {							\
+	fprintf(stdout, "- running %s...", __FILE__);	\
+	fflush(NULL);					\
+	tests_failed = 0;				\
+} while (0)
+
+#define __T_END								\
+do {									\
+	if (tests_failed)						\
+		fprintf(stdout, "  FAILED (%d)\n", tests_failed);	\
+	else								\
+		fprintf(stdout, "OK\n");				\
+} while (0)
+
+#define __T(text, cond)                                                          \
+do {                                                                             \
+	if (!(cond)) {                                                           \
+		fprintf(stderr, "FAILED %s:%d %s\n", __FILE__, __LINE__, text);  \
+		tests_failed++;                                                  \
+		return -1;                                                       \
+	}                                                                        \
+} while (0)
+
+#endif /* __LIBPERF_INTERNAL_TESTS_H */
diff --git a/tools/lib/perf/include/internal/threadmap.h b/tools/lib/perf/include/internal/threadmap.h
new file mode 100644
index 000000000..df748baf9
--- /dev/null
+++ b/tools/lib/perf/include/internal/threadmap.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_THREADMAP_H
+#define __LIBPERF_INTERNAL_THREADMAP_H
+
+#include <linux/refcount.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+struct thread_map_data {
+	pid_t	 pid;
+	char	*comm;
+};
+
+struct perf_thread_map {
+	refcount_t	refcnt;
+	int		nr;
+	int		err_thread;
+	struct thread_map_data map[];
+};
+
+struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, int nr);
+
+#endif /* __LIBPERF_INTERNAL_THREADMAP_H */
diff --git a/tools/lib/perf/include/internal/xyarray.h b/tools/lib/perf/include/internal/xyarray.h
new file mode 100644
index 000000000..51e35d6c8
--- /dev/null
+++ b/tools/lib/perf/include/internal/xyarray.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_XYARRAY_H
+#define __LIBPERF_INTERNAL_XYARRAY_H
+
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+struct xyarray {
+	size_t row_size;
+	size_t entry_size;
+	size_t entries;
+	size_t max_x;
+	size_t max_y;
+	char contents[] __aligned(8);
+};
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
+void xyarray__delete(struct xyarray *xy);
+void xyarray__reset(struct xyarray *xy);
+
+static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
+{
+	return &xy->contents[x * xy->row_size + y * xy->entry_size];
+}
+
+static inline int xyarray__max_y(struct xyarray *xy)
+{
+	return xy->max_y;
+}
+
+static inline int xyarray__max_x(struct xyarray *xy)
+{
+	return xy->max_x;
+}
+
+#endif /* __LIBPERF_INTERNAL_XYARRAY_H */
diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h
new file mode 100644
index 000000000..a3f6d68ed
--- /dev/null
+++ b/tools/lib/perf/include/perf/core.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_CORE_H
+#define __LIBPERF_CORE_H
+
+#include <stdarg.h>
+
+#ifndef LIBPERF_API
+#define LIBPERF_API __attribute__((visibility("default")))
+#endif
+
+enum libperf_print_level {
+	LIBPERF_ERR,
+	LIBPERF_WARN,
+	LIBPERF_INFO,
+	LIBPERF_DEBUG,
+	LIBPERF_DEBUG2,
+	LIBPERF_DEBUG3,
+};
+
+typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
+				  const char *, va_list ap);
+
+LIBPERF_API void libperf_init(libperf_print_fn_t fn);
+
+#endif /* __LIBPERF_CORE_H */
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
new file mode 100644
index 000000000..6a17ad730
--- /dev/null
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_CPUMAP_H
+#define __LIBPERF_CPUMAP_H
+
+#include <perf/core.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+struct perf_cpu_map;
+
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
+						     struct perf_cpu_map *other);
+LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
+LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
+LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
+
+#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)		\
+	for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx);	\
+	     (idx) < perf_cpu_map__nr(cpus);			\
+	     (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
+
+#endif /* __LIBPERF_CPUMAP_H */
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
new file mode 100644
index 000000000..4a24b855d
--- /dev/null
+++ b/tools/lib/perf/include/perf/event.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_EVENT_H
+#define __LIBPERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/limits.h>
+#include <linux/bpf.h>
+#include <sys/types.h> /* pid_t */
+
+#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem))
+
+struct perf_record_mmap {
+	struct perf_event_header header;
+	__u32			 pid, tid;
+	__u64			 start;
+	__u64			 len;
+	__u64			 pgoff;
+	char			 filename[PATH_MAX];
+};
+
+struct perf_record_mmap2 {
+	struct perf_event_header header;
+	__u32			 pid, tid;
+	__u64			 start;
+	__u64			 len;
+	__u64			 pgoff;
+	__u32			 maj;
+	__u32			 min;
+	__u64			 ino;
+	__u64			 ino_generation;
+	__u32			 prot;
+	__u32			 flags;
+	char			 filename[PATH_MAX];
+};
+
+struct perf_record_comm {
+	struct perf_event_header header;
+	__u32			 pid, tid;
+	char			 comm[16];
+};
+
+struct perf_record_namespaces {
+	struct perf_event_header header;
+	__u32			 pid, tid;
+	__u64			 nr_namespaces;
+	struct perf_ns_link_info link_info[];
+};
+
+struct perf_record_fork {
+	struct perf_event_header header;
+	__u32			 pid, ppid;
+	__u32			 tid, ptid;
+	__u64			 time;
+};
+
+struct perf_record_lost {
+	struct perf_event_header header;
+	__u64			 id;
+	__u64			 lost;
+};
+
+struct perf_record_lost_samples {
+	struct perf_event_header header;
+	__u64			 lost;
+};
+
+/*
+ * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
+ */
+struct perf_record_read {
+	struct perf_event_header header;
+	__u32			 pid, tid;
+	__u64			 value;
+	__u64			 time_enabled;
+	__u64			 time_running;
+	__u64			 id;
+};
+
+struct perf_record_throttle {
+	struct perf_event_header header;
+	__u64			 time;
+	__u64			 id;
+	__u64			 stream_id;
+};
+
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 256
+#endif
+
+struct perf_record_ksymbol {
+	struct perf_event_header header;
+	__u64			 addr;
+	__u32			 len;
+	__u16			 ksym_type;
+	__u16			 flags;
+	char			 name[KSYM_NAME_LEN];
+};
+
+struct perf_record_bpf_event {
+	struct perf_event_header header;
+	__u16			 type;
+	__u16			 flags;
+	__u32			 id;
+
+	/* for bpf_prog types */
+	__u8			 tag[BPF_TAG_SIZE];  // prog tag
+};
+
+struct perf_record_cgroup {
+	struct perf_event_header header;
+	__u64			 id;
+	char			 path[PATH_MAX];
+};
+
+struct perf_record_text_poke_event {
+	struct perf_event_header header;
+	__u64			addr;
+	__u16			old_len;
+	__u16			new_len;
+	__u8			bytes[];
+};
+
+struct perf_record_sample {
+	struct perf_event_header header;
+	__u64			 array[];
+};
+
+struct perf_record_switch {
+	struct perf_event_header header;
+	__u32			 next_prev_pid;
+	__u32			 next_prev_tid;
+};
+
+struct perf_record_header_attr {
+	struct perf_event_header header;
+	struct perf_event_attr	 attr;
+	__u64			 id[];
+};
+
+enum {
+	PERF_CPU_MAP__CPUS = 0,
+	PERF_CPU_MAP__MASK = 1,
+};
+
+struct cpu_map_entries {
+	__u16			 nr;
+	__u16			 cpu[];
+};
+
+struct perf_record_record_cpu_map {
+	__u16			 nr;
+	__u16			 long_size;
+	unsigned long		 mask[];
+};
+
+struct perf_record_cpu_map_data {
+	__u16			 type;
+	char			 data[];
+};
+
+struct perf_record_cpu_map {
+	struct perf_event_header	 header;
+	struct perf_record_cpu_map_data	 data;
+};
+
+enum {
+	PERF_EVENT_UPDATE__UNIT  = 0,
+	PERF_EVENT_UPDATE__SCALE = 1,
+	PERF_EVENT_UPDATE__NAME  = 2,
+	PERF_EVENT_UPDATE__CPUS  = 3,
+};
+
+struct perf_record_event_update_cpus {
+	struct perf_record_cpu_map_data	 cpus;
+};
+
+struct perf_record_event_update_scale {
+	double			 scale;
+};
+
+struct perf_record_event_update {
+	struct perf_event_header header;
+	__u64			 type;
+	__u64			 id;
+	char			 data[];
+};
+
+#define MAX_EVENT_NAME 64
+
+struct perf_trace_event_type {
+	__u64			 event_id;
+	char			 name[MAX_EVENT_NAME];
+};
+
+struct perf_record_header_event_type {
+	struct perf_event_header	 header;
+	struct perf_trace_event_type	 event_type;
+};
+
+struct perf_record_header_tracing_data {
+	struct perf_event_header header;
+	__u32			 size;
+};
+
+#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15)
+
+struct perf_record_header_build_id {
+	struct perf_event_header header;
+	pid_t			 pid;
+	union {
+		__u8		 build_id[24];
+		struct {
+			__u8	 data[20];
+			__u8	 size;
+			__u8	 reserved1__;
+			__u16	 reserved2__;
+		};
+	};
+	char			 filename[];
+};
+
+struct id_index_entry {
+	__u64			 id;
+	__u64			 idx;
+	__u64			 cpu;
+	__u64			 tid;
+};
+
+struct perf_record_id_index {
+	struct perf_event_header header;
+	__u64			 nr;
+	struct id_index_entry	 entries[0];
+};
+
+struct perf_record_auxtrace_info {
+	struct perf_event_header header;
+	__u32			 type;
+	__u32			 reserved__; /* For alignment */
+	__u64			 priv[];
+};
+
+struct perf_record_auxtrace {
+	struct perf_event_header header;
+	__u64			 size;
+	__u64			 offset;
+	__u64			 reference;
+	__u32			 idx;
+	__u32			 tid;
+	__u32			 cpu;
+	__u32			 reserved__; /* For alignment */
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct perf_record_auxtrace_error {
+	struct perf_event_header header;
+	__u32			 type;
+	__u32			 code;
+	__u32			 cpu;
+	__u32			 pid;
+	__u32			 tid;
+	__u32			 fmt;
+	__u64			 ip;
+	__u64			 time;
+	char			 msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+struct perf_record_aux {
+	struct perf_event_header header;
+	__u64			 aux_offset;
+	__u64			 aux_size;
+	__u64			 flags;
+};
+
+struct perf_record_itrace_start {
+	struct perf_event_header header;
+	__u32			 pid;
+	__u32			 tid;
+};
+
+struct perf_record_thread_map_entry {
+	__u64			 pid;
+	char			 comm[16];
+};
+
+struct perf_record_thread_map {
+	struct perf_event_header		 header;
+	__u64					 nr;
+	struct perf_record_thread_map_entry	 entries[];
+};
+
+enum {
+	PERF_STAT_CONFIG_TERM__AGGR_MODE	= 0,
+	PERF_STAT_CONFIG_TERM__INTERVAL		= 1,
+	PERF_STAT_CONFIG_TERM__SCALE		= 2,
+	PERF_STAT_CONFIG_TERM__MAX		= 3,
+};
+
+struct perf_record_stat_config_entry {
+	__u64			 tag;
+	__u64			 val;
+};
+
+struct perf_record_stat_config {
+	struct perf_event_header		 header;
+	__u64					 nr;
+	struct perf_record_stat_config_entry	 data[];
+};
+
+struct perf_record_stat {
+	struct perf_event_header header;
+
+	__u64			 id;
+	__u32			 cpu;
+	__u32			 thread;
+
+	union {
+		struct {
+			__u64	 val;
+			__u64	 ena;
+			__u64	 run;
+		};
+		__u64		 values[3];
+	};
+};
+
+struct perf_record_stat_round {
+	struct perf_event_header header;
+	__u64			 type;
+	__u64			 time;
+};
+
+struct perf_record_time_conv {
+	struct perf_event_header header;
+	__u64			 time_shift;
+	__u64			 time_mult;
+	__u64			 time_zero;
+	__u64			 time_cycles;
+	__u64			 time_mask;
+	__u8			 cap_user_time_zero;
+	__u8			 cap_user_time_short;
+	__u8			 reserved[6];	/* For alignment */
+};
+
+struct perf_record_header_feature {
+	struct perf_event_header header;
+	__u64			 feat_id;
+	char			 data[];
+};
+
+struct perf_record_compressed {
+	struct perf_event_header header;
+	char			 data[];
+};
+
+enum perf_user_event_type { /* above any possible kernel type */
+	PERF_RECORD_USER_TYPE_START		= 64,
+	PERF_RECORD_HEADER_ATTR			= 64,
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
+	PERF_RECORD_HEADER_TRACING_DATA		= 66,
+	PERF_RECORD_HEADER_BUILD_ID		= 67,
+	PERF_RECORD_FINISHED_ROUND		= 68,
+	PERF_RECORD_ID_INDEX			= 69,
+	PERF_RECORD_AUXTRACE_INFO		= 70,
+	PERF_RECORD_AUXTRACE			= 71,
+	PERF_RECORD_AUXTRACE_ERROR		= 72,
+	PERF_RECORD_THREAD_MAP			= 73,
+	PERF_RECORD_CPU_MAP			= 74,
+	PERF_RECORD_STAT_CONFIG			= 75,
+	PERF_RECORD_STAT			= 76,
+	PERF_RECORD_STAT_ROUND			= 77,
+	PERF_RECORD_EVENT_UPDATE		= 78,
+	PERF_RECORD_TIME_CONV			= 79,
+	PERF_RECORD_HEADER_FEATURE		= 80,
+	PERF_RECORD_COMPRESSED			= 81,
+	PERF_RECORD_HEADER_MAX
+};
+
+union perf_event {
+	struct perf_event_header		header;
+	struct perf_record_mmap			mmap;
+	struct perf_record_mmap2		mmap2;
+	struct perf_record_comm			comm;
+	struct perf_record_namespaces		namespaces;
+	struct perf_record_cgroup		cgroup;
+	struct perf_record_fork			fork;
+	struct perf_record_lost			lost;
+	struct perf_record_lost_samples		lost_samples;
+	struct perf_record_read			read;
+	struct perf_record_throttle		throttle;
+	struct perf_record_sample		sample;
+	struct perf_record_bpf_event		bpf;
+	struct perf_record_ksymbol		ksymbol;
+	struct perf_record_text_poke_event	text_poke;
+	struct perf_record_header_attr		attr;
+	struct perf_record_event_update		event_update;
+	struct perf_record_header_event_type	event_type;
+	struct perf_record_header_tracing_data	tracing_data;
+	struct perf_record_header_build_id	build_id;
+	struct perf_record_id_index		id_index;
+	struct perf_record_auxtrace_info	auxtrace_info;
+	struct perf_record_auxtrace		auxtrace;
+	struct perf_record_auxtrace_error	auxtrace_error;
+	struct perf_record_aux			aux;
+	struct perf_record_itrace_start		itrace_start;
+	struct perf_record_switch		context_switch;
+	struct perf_record_thread_map		thread_map;
+	struct perf_record_cpu_map		cpu_map;
+	struct perf_record_stat_config		stat_config;
+	struct perf_record_stat			stat;
+	struct perf_record_stat_round		stat_round;
+	struct perf_record_time_conv		time_conv;
+	struct perf_record_header_feature	feat;
+	struct perf_record_compressed		pack;
+};
+
+#endif /* __LIBPERF_EVENT_H */
diff --git a/tools/lib/perf/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h
new file mode 100644
index 000000000..0a7479dc1
--- /dev/null
+++ b/tools/lib/perf/include/perf/evlist.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_EVLIST_H
+#define __LIBPERF_EVLIST_H
+
+#include <perf/core.h>
+#include <stdbool.h>
+
+struct perf_evlist;
+struct perf_evsel;
+struct perf_cpu_map;
+struct perf_thread_map;
+
+LIBPERF_API void perf_evlist__add(struct perf_evlist *evlist,
+				  struct perf_evsel *evsel);
+LIBPERF_API void perf_evlist__remove(struct perf_evlist *evlist,
+				     struct perf_evsel *evsel);
+LIBPERF_API struct perf_evlist *perf_evlist__new(void);
+LIBPERF_API void perf_evlist__delete(struct perf_evlist *evlist);
+LIBPERF_API struct perf_evsel* perf_evlist__next(struct perf_evlist *evlist,
+						 struct perf_evsel *evsel);
+LIBPERF_API int perf_evlist__open(struct perf_evlist *evlist);
+LIBPERF_API void perf_evlist__close(struct perf_evlist *evlist);
+LIBPERF_API void perf_evlist__enable(struct perf_evlist *evlist);
+LIBPERF_API void perf_evlist__disable(struct perf_evlist *evlist);
+
+#define perf_evlist__for_each_evsel(evlist, pos)	\
+	for ((pos) = perf_evlist__next((evlist), NULL);	\
+	     (pos) != NULL;				\
+	     (pos) = perf_evlist__next((evlist), (pos)))
+
+LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist,
+				       struct perf_cpu_map *cpus,
+				       struct perf_thread_map *threads);
+LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
+LIBPERF_API int perf_evlist__filter_pollfd(struct perf_evlist *evlist,
+					   short revents_and_mask);
+
+LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages);
+LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist);
+
+LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
+						     struct perf_mmap *map,
+						     bool overwrite);
+#define perf_evlist__for_each_mmap(evlist, pos, overwrite)		\
+	for ((pos) = perf_evlist__next_mmap((evlist), NULL, overwrite);	\
+	     (pos) != NULL;						\
+	     (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite))
+
+#endif /* __LIBPERF_EVLIST_H */
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
new file mode 100644
index 000000000..c82ec39a4
--- /dev/null
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_EVSEL_H
+#define __LIBPERF_EVSEL_H
+
+#include <stdint.h>
+#include <perf/core.h>
+
+struct perf_evsel;
+struct perf_event_attr;
+struct perf_cpu_map;
+struct perf_thread_map;
+
+struct perf_counts_values {
+	union {
+		struct {
+			uint64_t val;
+			uint64_t ena;
+			uint64_t run;
+		};
+		uint64_t values[3];
+	};
+};
+
+LIBPERF_API struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr);
+LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
+				 struct perf_thread_map *threads);
+LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+				 struct perf_counts_values *count);
+LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
+LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
+LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
+
+#endif /* __LIBPERF_EVSEL_H */
diff --git a/tools/lib/perf/include/perf/mmap.h b/tools/lib/perf/include/perf/mmap.h
new file mode 100644
index 000000000..9508ad90d
--- /dev/null
+++ b/tools/lib/perf/include/perf/mmap.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_MMAP_H
+#define __LIBPERF_MMAP_H
+
+#include <perf/core.h>
+
+struct perf_mmap;
+union perf_event;
+
+LIBPERF_API void perf_mmap__consume(struct perf_mmap *map);
+LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map);
+LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map);
+LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+
+#endif /* __LIBPERF_MMAP_H */
diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h
new file mode 100644
index 000000000..a7c50de8d
--- /dev/null
+++ b/tools/lib/perf/include/perf/threadmap.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_THREADMAP_H
+#define __LIBPERF_THREADMAP_H
+
+#include <perf/core.h>
+#include <sys/types.h>
+
+struct perf_thread_map;
+
+LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void);
+
+LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
+LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
+LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+
+LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
+LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
+
+#endif /* __LIBPERF_THREADMAP_H */
diff --git a/tools/lib/perf/internal.h b/tools/lib/perf/internal.h
new file mode 100644
index 000000000..2c27e158d
--- /dev/null
+++ b/tools/lib/perf/internal.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_INTERNAL_H
+#define __LIBPERF_INTERNAL_H
+
+#include <perf/core.h>
+
+void libperf_print(enum libperf_print_level level,
+		   const char *format, ...)
+	__attribute__((format(printf, 2, 3)));
+
+#define __pr(level, fmt, ...)   \
+do {                            \
+	libperf_print(level, "libperf: " fmt, ##__VA_ARGS__);     \
+} while (0)
+
+#define pr_err(fmt, ...)        __pr(LIBPERF_ERR, fmt, ##__VA_ARGS__)
+#define pr_warning(fmt, ...)    __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...)       __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...)      __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__)
+#define pr_debug2(fmt, ...)     __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__)
+#define pr_debug3(fmt, ...)     __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__)
+
+#endif /* __LIBPERF_INTERNAL_H */
diff --git a/tools/lib/perf/lib.c b/tools/lib/perf/lib.c
new file mode 100644
index 000000000..18658931f
--- /dev/null
+++ b/tools/lib/perf/lib.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/kernel.h>
+#include <internal/lib.h>
+
+unsigned int page_size;
+
+static ssize_t ion(bool is_read, int fd, void *buf, size_t n)
+{
+	void *buf_start = buf;
+	size_t left = n;
+
+	while (left) {
+		/* buf must be treated as const if !is_read. */
+		ssize_t ret = is_read ? read(fd, buf, left) :
+					write(fd, buf, left);
+
+		if (ret < 0 && errno == EINTR)
+			continue;
+		if (ret <= 0)
+			return ret;
+
+		left -= ret;
+		buf  += ret;
+	}
+
+	BUG_ON((size_t)(buf - buf_start) != n);
+	return n;
+}
+
+/*
+ * Read exactly 'n' bytes or return an error.
+ */
+ssize_t readn(int fd, void *buf, size_t n)
+{
+	return ion(true, fd, buf, n);
+}
+
+/*
+ * Write exactly 'n' bytes or return an error.
+ */
+ssize_t writen(int fd, const void *buf, size_t n)
+{
+	/* ion does not modify buf. */
+	return ion(false, fd, (void *)buf, n);
+}
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
new file mode 100644
index 000000000..7be1af8a5
--- /dev/null
+++ b/tools/lib/perf/libperf.map
@@ -0,0 +1,51 @@
+LIBPERF_0.0.1 {
+	global:
+		libperf_init;
+		perf_cpu_map__dummy_new;
+		perf_cpu_map__get;
+		perf_cpu_map__put;
+		perf_cpu_map__new;
+		perf_cpu_map__read;
+		perf_cpu_map__nr;
+		perf_cpu_map__cpu;
+		perf_cpu_map__empty;
+		perf_cpu_map__max;
+		perf_thread_map__new_dummy;
+		perf_thread_map__set_pid;
+		perf_thread_map__comm;
+		perf_thread_map__nr;
+		perf_thread_map__pid;
+		perf_thread_map__get;
+		perf_thread_map__put;
+		perf_evsel__new;
+		perf_evsel__delete;
+		perf_evsel__enable;
+		perf_evsel__disable;
+		perf_evsel__open;
+		perf_evsel__close;
+		perf_evsel__read;
+		perf_evsel__cpus;
+		perf_evsel__threads;
+		perf_evsel__attr;
+		perf_evlist__new;
+		perf_evlist__delete;
+		perf_evlist__open;
+		perf_evlist__close;
+		perf_evlist__enable;
+		perf_evlist__disable;
+		perf_evlist__add;
+		perf_evlist__remove;
+		perf_evlist__next;
+		perf_evlist__set_maps;
+		perf_evlist__poll;
+		perf_evlist__mmap;
+		perf_evlist__munmap;
+		perf_evlist__filter_pollfd;
+		perf_evlist__next_mmap;
+		perf_mmap__consume;
+		perf_mmap__read_init;
+		perf_mmap__read_done;
+		perf_mmap__read_event;
+	local:
+		*;
+};
diff --git a/tools/lib/perf/libperf.pc.template b/tools/lib/perf/libperf.pc.template
new file mode 100644
index 000000000..117e4a237
--- /dev/null
+++ b/tools/lib/perf/libperf.pc.template
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libperf
+Description: perf library
+Version: @VERSION@
+Libs: -L${libdir} -lperf
+Cflags: -I${includedir}
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
new file mode 100644
index 000000000..79d5ed6c3
--- /dev/null
+++ b/tools/lib/perf/mmap.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/mman.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/ring_buffer.h>
+#include <linux/perf_event.h>
+#include <perf/mmap.h>
+#include <perf/event.h>
+#include <internal/mmap.h>
+#include <internal/lib.h>
+#include <linux/kernel.h>
+#include "internal.h"
+
+void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
+		     bool overwrite, libperf_unmap_cb_t unmap_cb)
+{
+	map->fd = -1;
+	map->overwrite = overwrite;
+	map->unmap_cb  = unmap_cb;
+	refcount_set(&map->refcnt, 0);
+	if (prev)
+		prev->next = map;
+}
+
+size_t perf_mmap__mmap_len(struct perf_mmap *map)
+{
+	return map->mask + 1 + page_size;
+}
+
+int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
+		    int fd, int cpu)
+{
+	map->prev = 0;
+	map->mask = mp->mask;
+	map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+			 MAP_SHARED, fd, 0);
+	if (map->base == MAP_FAILED) {
+		map->base = NULL;
+		return -1;
+	}
+
+	map->fd  = fd;
+	map->cpu = cpu;
+	return 0;
+}
+
+void perf_mmap__munmap(struct perf_mmap *map)
+{
+	if (map && map->base != NULL) {
+		munmap(map->base, perf_mmap__mmap_len(map));
+		map->base = NULL;
+		map->fd = -1;
+		refcount_set(&map->refcnt, 0);
+	}
+	if (map && map->unmap_cb)
+		map->unmap_cb(map);
+}
+
+void perf_mmap__get(struct perf_mmap *map)
+{
+	refcount_inc(&map->refcnt);
+}
+
+void perf_mmap__put(struct perf_mmap *map)
+{
+	BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
+
+	if (refcount_dec_and_test(&map->refcnt))
+		perf_mmap__munmap(map);
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
+{
+	ring_buffer_write_tail(md->base, tail);
+}
+
+u64 perf_mmap__read_head(struct perf_mmap *map)
+{
+	return ring_buffer_read_head(map->base);
+}
+
+static bool perf_mmap__empty(struct perf_mmap *map)
+{
+	struct perf_event_mmap_page *pc = map->base;
+
+	return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
+}
+
+void perf_mmap__consume(struct perf_mmap *map)
+{
+	if (!map->overwrite) {
+		u64 old = map->prev;
+
+		perf_mmap__write_tail(map, old);
+	}
+
+	if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
+		perf_mmap__put(map);
+}
+
+static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
+{
+	struct perf_event_header *pheader;
+	u64 evt_head = *start;
+	int size = mask + 1;
+
+	pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
+	pheader = (struct perf_event_header *)(buf + (*start & mask));
+	while (true) {
+		if (evt_head - *start >= (unsigned int)size) {
+			pr_debug("Finished reading overwrite ring buffer: rewind\n");
+			if (evt_head - *start > (unsigned int)size)
+				evt_head -= pheader->size;
+			*end = evt_head;
+			return 0;
+		}
+
+		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
+
+		if (pheader->size == 0) {
+			pr_debug("Finished reading overwrite ring buffer: get start\n");
+			*end = evt_head;
+			return 0;
+		}
+
+		evt_head += pheader->size;
+		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
+	}
+	WARN_ONCE(1, "Shouldn't get here\n");
+	return -1;
+}
+
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+static int __perf_mmap__read_init(struct perf_mmap *md)
+{
+	u64 head = perf_mmap__read_head(md);
+	u64 old = md->prev;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+
+	md->start = md->overwrite ? head : old;
+	md->end = md->overwrite ? old : head;
+
+	if ((md->end - md->start) < md->flush)
+		return -EAGAIN;
+
+	size = md->end - md->start;
+	if (size > (unsigned long)(md->mask) + 1) {
+		if (!md->overwrite) {
+			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+
+			md->prev = head;
+			perf_mmap__consume(md);
+			return -EAGAIN;
+		}
+
+		/*
+		 * Backward ring buffer is full. We still have a chance to read
+		 * most of data from it.
+		 */
+		if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int perf_mmap__read_init(struct perf_mmap *map)
+{
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return -ENOENT;
+
+	return __perf_mmap__read_init(map);
+}
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->core.prev.
+ * Need to correct the map->core.prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return;
+
+	map->prev = perf_mmap__read_head(map);
+}
+
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *perf_mmap__read(struct perf_mmap *map,
+					 u64 *startp, u64 end)
+{
+	unsigned char *data = map->base + page_size;
+	union perf_event *event = NULL;
+	int diff = end - *startp;
+
+	if (diff >= (int)sizeof(event->header)) {
+		size_t size;
+
+		event = (union perf_event *)&data[*startp & map->mask];
+		size = event->header.size;
+
+		if (size < sizeof(event->header) || diff < (int)size)
+			return NULL;
+
+		/*
+		 * Event straddles the mmap boundary -- header should always
+		 * be inside due to u64 alignment of output.
+		 */
+		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+			unsigned int offset = *startp;
+			unsigned int len = min(sizeof(*event), size), cpy;
+			void *dst = map->event_copy;
+
+			do {
+				cpy = min(map->mask + 1 - (offset & map->mask), len);
+				memcpy(dst, &data[offset & map->mask], cpy);
+				offset += cpy;
+				dst += cpy;
+				len -= cpy;
+			} while (len);
+
+			event = (union perf_event *)map->event_copy;
+		}
+
+		*startp += size;
+	}
+
+	return event;
+}
+
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ *	//process the event
+ *	perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map)
+{
+	union perf_event *event;
+
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return NULL;
+
+	/* non-overwirte doesn't pause the ringbuffer */
+	if (!map->overwrite)
+		map->end = perf_mmap__read_head(map);
+
+	event = perf_mmap__read(map, &map->start, map->end);
+
+	if (!map->overwrite)
+		map->prev = map->start;
+
+	return event;
+}
diff --git a/tools/lib/perf/tests/Makefile b/tools/lib/perf/tests/Makefile
new file mode 100644
index 000000000..96841775f
--- /dev/null
+++ b/tools/lib/perf/tests/Makefile
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+TESTS = test-cpumap test-threadmap test-evlist test-evsel
+
+TESTS_SO := $(addsuffix -so,$(TESTS))
+TESTS_A  := $(addsuffix -a,$(TESTS))
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+all:
+
+include $(srctree)/tools/scripts/Makefile.include
+
+INCLUDE = -I$(srctree)/tools/lib/perf/include -I$(srctree)/tools/include -I$(srctree)/tools/lib
+
+$(TESTS_A): FORCE
+	$(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI)
+
+$(TESTS_SO): FORCE
+	$(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -L.. -o $@ $(subst -so,.c,$@) $(LIBAPI) -lperf
+
+all: $(TESTS_A) $(TESTS_SO)
+
+run:
+	@echo "running static:"
+	@for i in $(TESTS_A); do ./$$i; done
+	@echo "running dynamic:"
+	@for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i; done
+
+clean:
+	$(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO)
+
+.PHONY: all clean FORCE
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
new file mode 100644
index 000000000..c70e9e03a
--- /dev/null
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
+#include <perf/cpumap.h>
+#include <internal/tests.h>
+
+static int libperf_print(enum libperf_print_level level,
+			 const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+int main(int argc, char **argv)
+{
+	struct perf_cpu_map *cpus;
+
+	__T_START;
+
+	libperf_init(libperf_print);
+
+	cpus = perf_cpu_map__dummy_new();
+	if (!cpus)
+		return -1;
+
+	perf_cpu_map__get(cpus);
+	perf_cpu_map__put(cpus);
+	perf_cpu_map__put(cpus);
+
+	__T_END;
+	return tests_failed == 0 ? 0 : -1;
+}
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
new file mode 100644
index 000000000..60b5d1801
--- /dev/null
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET)
+#include <sched.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <linux/perf_event.h>
+#include <linux/limits.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/mmap.h>
+#include <perf/event.h>
+#include <internal/tests.h>
+#include <api/fs/fs.h>
+
+static int libperf_print(enum libperf_print_level level,
+			 const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+static int test_stat_cpu(void)
+{
+	struct perf_cpu_map *cpus;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr1 = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config	= PERF_COUNT_SW_CPU_CLOCK,
+	};
+	struct perf_event_attr attr2 = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config	= PERF_COUNT_SW_TASK_CLOCK,
+	};
+	int err, idx;
+
+	cpus = perf_cpu_map__new(NULL);
+	__T("failed to create cpus", cpus);
+
+	evlist = perf_evlist__new();
+	__T("failed to create evlist", evlist);
+
+	evsel = perf_evsel__new(&attr1);
+	__T("failed to create evsel1", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	evsel = perf_evsel__new(&attr2);
+	__T("failed to create evsel2", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	perf_evlist__set_maps(evlist, cpus, NULL);
+
+	err = perf_evlist__open(evlist);
+	__T("failed to open evsel", err == 0);
+
+	perf_evlist__for_each_evsel(evlist, evsel) {
+		cpus = perf_evsel__cpus(evsel);
+
+		for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) {
+			struct perf_counts_values counts = { .val = 0 };
+
+			perf_evsel__read(evsel, idx, 0, &counts);
+			__T("failed to read value for evsel", counts.val != 0);
+		}
+	}
+
+	perf_evlist__close(evlist);
+	perf_evlist__delete(evlist);
+
+	perf_cpu_map__put(cpus);
+	return 0;
+}
+
+static int test_stat_thread(void)
+{
+	struct perf_counts_values counts = { .val = 0 };
+	struct perf_thread_map *threads;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr1 = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config	= PERF_COUNT_SW_CPU_CLOCK,
+	};
+	struct perf_event_attr attr2 = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config	= PERF_COUNT_SW_TASK_CLOCK,
+	};
+	int err;
+
+	threads = perf_thread_map__new_dummy();
+	__T("failed to create threads", threads);
+
+	perf_thread_map__set_pid(threads, 0, 0);
+
+	evlist = perf_evlist__new();
+	__T("failed to create evlist", evlist);
+
+	evsel = perf_evsel__new(&attr1);
+	__T("failed to create evsel1", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	evsel = perf_evsel__new(&attr2);
+	__T("failed to create evsel2", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	perf_evlist__set_maps(evlist, NULL, threads);
+
+	err = perf_evlist__open(evlist);
+	__T("failed to open evsel", err == 0);
+
+	perf_evlist__for_each_evsel(evlist, evsel) {
+		perf_evsel__read(evsel, 0, 0, &counts);
+		__T("failed to read value for evsel", counts.val != 0);
+	}
+
+	perf_evlist__close(evlist);
+	perf_evlist__delete(evlist);
+
+	perf_thread_map__put(threads);
+	return 0;
+}
+
+static int test_stat_thread_enable(void)
+{
+	struct perf_counts_values counts = { .val = 0 };
+	struct perf_thread_map *threads;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr1 = {
+		.type	  = PERF_TYPE_SOFTWARE,
+		.config	  = PERF_COUNT_SW_CPU_CLOCK,
+		.disabled = 1,
+	};
+	struct perf_event_attr attr2 = {
+		.type	  = PERF_TYPE_SOFTWARE,
+		.config	  = PERF_COUNT_SW_TASK_CLOCK,
+		.disabled = 1,
+	};
+	int err;
+
+	threads = perf_thread_map__new_dummy();
+	__T("failed to create threads", threads);
+
+	perf_thread_map__set_pid(threads, 0, 0);
+
+	evlist = perf_evlist__new();
+	__T("failed to create evlist", evlist);
+
+	evsel = perf_evsel__new(&attr1);
+	__T("failed to create evsel1", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	evsel = perf_evsel__new(&attr2);
+	__T("failed to create evsel2", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	perf_evlist__set_maps(evlist, NULL, threads);
+
+	err = perf_evlist__open(evlist);
+	__T("failed to open evsel", err == 0);
+
+	perf_evlist__for_each_evsel(evlist, evsel) {
+		perf_evsel__read(evsel, 0, 0, &counts);
+		__T("failed to read value for evsel", counts.val == 0);
+	}
+
+	perf_evlist__enable(evlist);
+
+	perf_evlist__for_each_evsel(evlist, evsel) {
+		perf_evsel__read(evsel, 0, 0, &counts);
+		__T("failed to read value for evsel", counts.val != 0);
+	}
+
+	perf_evlist__disable(evlist);
+
+	perf_evlist__close(evlist);
+	perf_evlist__delete(evlist);
+
+	perf_thread_map__put(threads);
+	return 0;
+}
+
+static int test_mmap_thread(void)
+{
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_mmap *map;
+	struct perf_cpu_map *cpus;
+	struct perf_thread_map *threads;
+	struct perf_event_attr attr = {
+		.type             = PERF_TYPE_TRACEPOINT,
+		.sample_period    = 1,
+		.wakeup_watermark = 1,
+		.disabled         = 1,
+	};
+	char path[PATH_MAX];
+	int id, err, pid, go_pipe[2];
+	union perf_event *event;
+	char bf;
+	int count = 0;
+
+	snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
+		 sysfs__mountpoint());
+
+	if (filename__read_int(path, &id)) {
+		tests_failed++;
+		fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
+		return -1;
+	}
+
+	attr.config = id;
+
+	err = pipe(go_pipe);
+	__T("failed to create pipe", err == 0);
+
+	fflush(NULL);
+
+	pid = fork();
+	if (!pid) {
+		int i;
+
+		read(go_pipe[0], &bf, 1);
+
+		/* Generate 100 prctl calls. */
+		for (i = 0; i < 100; i++)
+			prctl(0, 0, 0, 0, 0);
+
+		exit(0);
+	}
+
+	threads = perf_thread_map__new_dummy();
+	__T("failed to create threads", threads);
+
+	cpus = perf_cpu_map__dummy_new();
+	__T("failed to create cpus", cpus);
+
+	perf_thread_map__set_pid(threads, 0, pid);
+
+	evlist = perf_evlist__new();
+	__T("failed to create evlist", evlist);
+
+	evsel = perf_evsel__new(&attr);
+	__T("failed to create evsel1", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	err = perf_evlist__open(evlist);
+	__T("failed to open evlist", err == 0);
+
+	err = perf_evlist__mmap(evlist, 4);
+	__T("failed to mmap evlist", err == 0);
+
+	perf_evlist__enable(evlist);
+
+	/* kick the child and wait for it to finish */
+	write(go_pipe[1], &bf, 1);
+	waitpid(pid, NULL, 0);
+
+	/*
+	 * There's no need to call perf_evlist__disable,
+	 * monitored process is dead now.
+	 */
+
+	perf_evlist__for_each_mmap(evlist, map, false) {
+		if (perf_mmap__read_init(map) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(map)) != NULL) {
+			count++;
+			perf_mmap__consume(map);
+		}
+
+		perf_mmap__read_done(map);
+	}
+
+	/* calls perf_evlist__munmap/perf_evlist__close */
+	perf_evlist__delete(evlist);
+
+	perf_thread_map__put(threads);
+	perf_cpu_map__put(cpus);
+
+	/*
+	 * The generated prctl calls should match the
+	 * number of events in the buffer.
+	 */
+	__T("failed count", count == 100);
+
+	return 0;
+}
+
+static int test_mmap_cpus(void)
+{
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_mmap *map;
+	struct perf_cpu_map *cpus;
+	struct perf_event_attr attr = {
+		.type             = PERF_TYPE_TRACEPOINT,
+		.sample_period    = 1,
+		.wakeup_watermark = 1,
+		.disabled         = 1,
+	};
+	cpu_set_t saved_mask;
+	char path[PATH_MAX];
+	int id, err, cpu, tmp;
+	union perf_event *event;
+	int count = 0;
+
+	snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
+		 sysfs__mountpoint());
+
+	if (filename__read_int(path, &id)) {
+		fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
+		return -1;
+	}
+
+	attr.config = id;
+
+	cpus = perf_cpu_map__new(NULL);
+	__T("failed to create cpus", cpus);
+
+	evlist = perf_evlist__new();
+	__T("failed to create evlist", evlist);
+
+	evsel = perf_evsel__new(&attr);
+	__T("failed to create evsel1", evsel);
+
+	perf_evlist__add(evlist, evsel);
+
+	perf_evlist__set_maps(evlist, cpus, NULL);
+
+	err = perf_evlist__open(evlist);
+	__T("failed to open evlist", err == 0);
+
+	err = perf_evlist__mmap(evlist, 4);
+	__T("failed to mmap evlist", err == 0);
+
+	perf_evlist__enable(evlist);
+
+	err = sched_getaffinity(0, sizeof(saved_mask), &saved_mask);
+	__T("sched_getaffinity failed", err == 0);
+
+	perf_cpu_map__for_each_cpu(cpu, tmp, cpus) {
+		cpu_set_t mask;
+
+		CPU_ZERO(&mask);
+		CPU_SET(cpu, &mask);
+
+		err = sched_setaffinity(0, sizeof(mask), &mask);
+		__T("sched_setaffinity failed", err == 0);
+
+		prctl(0, 0, 0, 0, 0);
+	}
+
+	err = sched_setaffinity(0, sizeof(saved_mask), &saved_mask);
+	__T("sched_setaffinity failed", err == 0);
+
+	perf_evlist__disable(evlist);
+
+	perf_evlist__for_each_mmap(evlist, map, false) {
+		if (perf_mmap__read_init(map) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(map)) != NULL) {
+			count++;
+			perf_mmap__consume(map);
+		}
+
+		perf_mmap__read_done(map);
+	}
+
+	/* calls perf_evlist__munmap/perf_evlist__close */
+	perf_evlist__delete(evlist);
+
+	/*
+	 * The generated prctl events should match the
+	 * number of cpus or be bigger (we are system-wide).
+	 */
+	__T("failed count", count >= perf_cpu_map__nr(cpus));
+
+	perf_cpu_map__put(cpus);
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	__T_START;
+
+	libperf_init(libperf_print);
+
+	test_stat_cpu();
+	test_stat_thread();
+	test_stat_thread_enable();
+	test_mmap_thread();
+	test_mmap_cpus();
+
+	__T_END;
+	return tests_failed == 0 ? 0 : -1;
+}
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
new file mode 100644
index 000000000..2de98768d
--- /dev/null
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
+#include <linux/perf_event.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/evsel.h>
+#include <internal/tests.h>
+
+static int libperf_print(enum libperf_print_level level,
+			 const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+static int test_stat_cpu(void)
+{
+	struct perf_cpu_map *cpus;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config	= PERF_COUNT_SW_CPU_CLOCK,
+	};
+	int err, idx;
+
+	cpus = perf_cpu_map__new(NULL);
+	__T("failed to create cpus", cpus);
+
+	evsel = perf_evsel__new(&attr);
+	__T("failed to create evsel", evsel);
+
+	err = perf_evsel__open(evsel, cpus, NULL);
+	__T("failed to open evsel", err == 0);
+
+	for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) {
+		struct perf_counts_values counts = { .val = 0 };
+
+		perf_evsel__read(evsel, idx, 0, &counts);
+		__T("failed to read value for evsel", counts.val != 0);
+	}
+
+	perf_evsel__close(evsel);
+	perf_evsel__delete(evsel);
+
+	perf_cpu_map__put(cpus);
+	return 0;
+}
+
+static int test_stat_thread(void)
+{
+	struct perf_counts_values counts = { .val = 0 };
+	struct perf_thread_map *threads;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config	= PERF_COUNT_SW_TASK_CLOCK,
+	};
+	int err;
+
+	threads = perf_thread_map__new_dummy();
+	__T("failed to create threads", threads);
+
+	perf_thread_map__set_pid(threads, 0, 0);
+
+	evsel = perf_evsel__new(&attr);
+	__T("failed to create evsel", evsel);
+
+	err = perf_evsel__open(evsel, NULL, threads);
+	__T("failed to open evsel", err == 0);
+
+	perf_evsel__read(evsel, 0, 0, &counts);
+	__T("failed to read value for evsel", counts.val != 0);
+
+	perf_evsel__close(evsel);
+	perf_evsel__delete(evsel);
+
+	perf_thread_map__put(threads);
+	return 0;
+}
+
+static int test_stat_thread_enable(void)
+{
+	struct perf_counts_values counts = { .val = 0 };
+	struct perf_thread_map *threads;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type	  = PERF_TYPE_SOFTWARE,
+		.config	  = PERF_COUNT_SW_TASK_CLOCK,
+		.disabled = 1,
+	};
+	int err;
+
+	threads = perf_thread_map__new_dummy();
+	__T("failed to create threads", threads);
+
+	perf_thread_map__set_pid(threads, 0, 0);
+
+	evsel = perf_evsel__new(&attr);
+	__T("failed to create evsel", evsel);
+
+	err = perf_evsel__open(evsel, NULL, threads);
+	__T("failed to open evsel", err == 0);
+
+	perf_evsel__read(evsel, 0, 0, &counts);
+	__T("failed to read value for evsel", counts.val == 0);
+
+	err = perf_evsel__enable(evsel);
+	__T("failed to enable evsel", err == 0);
+
+	perf_evsel__read(evsel, 0, 0, &counts);
+	__T("failed to read value for evsel", counts.val != 0);
+
+	err = perf_evsel__disable(evsel);
+	__T("failed to enable evsel", err == 0);
+
+	perf_evsel__close(evsel);
+	perf_evsel__delete(evsel);
+
+	perf_thread_map__put(threads);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	__T_START;
+
+	libperf_init(libperf_print);
+
+	test_stat_cpu();
+	test_stat_thread();
+	test_stat_thread_enable();
+
+	__T_END;
+	return tests_failed == 0 ? 0 : -1;
+}
diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c
new file mode 100644
index 000000000..384471441
--- /dev/null
+++ b/tools/lib/perf/tests/test-threadmap.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdarg.h>
+#include <stdio.h>
+#include <perf/threadmap.h>
+#include <internal/tests.h>
+
+static int libperf_print(enum libperf_print_level level,
+			 const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+int main(int argc, char **argv)
+{
+	struct perf_thread_map *threads;
+
+	__T_START;
+
+	libperf_init(libperf_print);
+
+	threads = perf_thread_map__new_dummy();
+	if (!threads)
+		return -1;
+
+	perf_thread_map__get(threads);
+	perf_thread_map__put(threads);
+	perf_thread_map__put(threads);
+
+	__T_END;
+	return tests_failed == 0 ? 0 : -1;
+}
diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c
new file mode 100644
index 000000000..e92c368b0
--- /dev/null
+++ b/tools/lib/perf/threadmap.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <perf/threadmap.h>
+#include <stdlib.h>
+#include <linux/refcount.h>
+#include <internal/threadmap.h>
+#include <string.h>
+#include <asm/bug.h>
+#include <stdio.h>
+
+static void perf_thread_map__reset(struct perf_thread_map *map, int start, int nr)
+{
+	size_t size = (nr - start) * sizeof(map->map[0]);
+
+	memset(&map->map[start], 0, size);
+	map->err_thread = -1;
+}
+
+struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, int nr)
+{
+	size_t size = sizeof(*map) + sizeof(map->map[0]) * nr;
+	int start = map ? map->nr : 0;
+
+	map = realloc(map, size);
+	/*
+	 * We only realloc to add more items, let's reset new items.
+	 */
+	if (map)
+		perf_thread_map__reset(map, start, nr);
+
+	return map;
+}
+
+#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
+
+void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid)
+{
+	map->map[thread].pid = pid;
+}
+
+char *perf_thread_map__comm(struct perf_thread_map *map, int thread)
+{
+	return map->map[thread].comm;
+}
+
+struct perf_thread_map *perf_thread_map__new_dummy(void)
+{
+	struct perf_thread_map *threads = thread_map__alloc(1);
+
+	if (threads != NULL) {
+		perf_thread_map__set_pid(threads, 0, -1);
+		threads->nr = 1;
+		refcount_set(&threads->refcnt, 1);
+	}
+	return threads;
+}
+
+static void perf_thread_map__delete(struct perf_thread_map *threads)
+{
+	if (threads) {
+		int i;
+
+		WARN_ONCE(refcount_read(&threads->refcnt) != 0,
+			  "thread map refcnt unbalanced\n");
+		for (i = 0; i < threads->nr; i++)
+			free(perf_thread_map__comm(threads, i));
+		free(threads);
+	}
+}
+
+struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map)
+{
+	if (map)
+		refcount_inc(&map->refcnt);
+	return map;
+}
+
+void perf_thread_map__put(struct perf_thread_map *map)
+{
+	if (map && refcount_dec_and_test(&map->refcnt))
+		perf_thread_map__delete(map);
+}
+
+int perf_thread_map__nr(struct perf_thread_map *threads)
+{
+	return threads ? threads->nr : 1;
+}
+
+pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread)
+{
+	return map->map[thread].pid;
+}
diff --git a/tools/lib/perf/xyarray.c b/tools/lib/perf/xyarray.c
new file mode 100644
index 000000000..dcd901d15
--- /dev/null
+++ b/tools/lib/perf/xyarray.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <internal/xyarray.h>
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
+{
+	size_t row_size = ylen * entry_size;
+	struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
+
+	if (xy != NULL) {
+		xy->entry_size = entry_size;
+		xy->row_size   = row_size;
+		xy->entries    = xlen * ylen;
+		xy->max_x      = xlen;
+		xy->max_y      = ylen;
+	}
+
+	return xy;
+}
+
+void xyarray__reset(struct xyarray *xy)
+{
+	size_t n = xy->entries * xy->entry_size;
+
+	memset(xy->contents, 0, n);
+}
+
+void xyarray__delete(struct xyarray *xy)
+{
+	free(xy);
+}
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
new file mode 100644
index 000000000..727396de6
--- /dev/null
+++ b/tools/lib/rbtree.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+
+  linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree_augmented.h>
+#include <linux/export.h>
+
+/*
+ * red-black trees properties:  https://en.wikipedia.org/wiki/Rbtree
+ *
+ *  1) A node is either red or black
+ *  2) The root is black
+ *  3) All leaves (NULL) are black
+ *  4) Both children of every red node are black
+ *  5) Every simple path from root to leaves contains the same number
+ *     of black nodes.
+ *
+ *  4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
+ *  consecutive red nodes in a path and every red node is therefore followed by
+ *  a black. So if B is the number of black nodes on every simple path (as per
+ *  5), then the longest possible path due to 4 is 2B.
+ *
+ *  We shall indicate color with case, where black nodes are uppercase and red
+ *  nodes will be lowercase. Unknown color nodes shall be drawn as red within
+ *  parentheses and have some accompanying text comment.
+ */
+
+/*
+ * Notes on lockless lookups:
+ *
+ * All stores to the tree structure (rb_left and rb_right) must be done using
+ * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
+ * tree structure as seen in program order.
+ *
+ * These two requirements will allow lockless iteration of the tree -- not
+ * correct iteration mind you, tree rotations are not atomic so a lookup might
+ * miss entire subtrees.
+ *
+ * But they do guarantee that any such traversal will only see valid elements
+ * and that it will indeed complete -- does not get stuck in a loop.
+ *
+ * It also guarantees that if the lookup returns an element it is the 'correct'
+ * one. But not returning an element does _NOT_ mean it's not present.
+ *
+ * NOTE:
+ *
+ * Stores to __rb_parent_color are not important for simple lookups so those
+ * are left undone as of now. Nor did I check for loops involving parent
+ * pointers.
+ */
+
+static inline void rb_set_black(struct rb_node *rb)
+{
+	rb->__rb_parent_color |= RB_BLACK;
+}
+
+static inline struct rb_node *rb_red_parent(struct rb_node *red)
+{
+	return (struct rb_node *)red->__rb_parent_color;
+}
+
+/*
+ * Helper function for rotations:
+ * - old's parent and color get assigned to new
+ * - old gets assigned new as a parent and 'color' as a color.
+ */
+static inline void
+__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
+			struct rb_root *root, int color)
+{
+	struct rb_node *parent = rb_parent(old);
+	new->__rb_parent_color = old->__rb_parent_color;
+	rb_set_parent_color(old, new, color);
+	__rb_change_child(old, new, parent, root);
+}
+
+static __always_inline void
+__rb_insert(struct rb_node *node, struct rb_root *root,
+	    void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
+
+	while (true) {
+		/*
+		 * Loop invariant: node is red.
+		 */
+		if (unlikely(!parent)) {
+			/*
+			 * The inserted node is root. Either this is the
+			 * first node, or we recursed at Case 1 below and
+			 * are no longer violating 4).
+			 */
+			rb_set_parent_color(node, NULL, RB_BLACK);
+			break;
+		}
+
+		/*
+		 * If there is a black parent, we are done.
+		 * Otherwise, take some corrective action as,
+		 * per 4), we don't want a red root or two
+		 * consecutive red nodes.
+		 */
+		if(rb_is_black(parent))
+			break;
+
+		gparent = rb_red_parent(parent);
+
+		tmp = gparent->rb_right;
+		if (parent != tmp) {	/* parent == gparent->rb_left */
+			if (tmp && rb_is_red(tmp)) {
+				/*
+				 * Case 1 - node's uncle is red (color flips).
+				 *
+				 *       G            g
+				 *      / \          / \
+				 *     p   u  -->   P   U
+				 *    /            /
+				 *   n            n
+				 *
+				 * However, since g's parent might be red, and
+				 * 4) does not allow this, we need to recurse
+				 * at g.
+				 */
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+				rb_set_parent_color(parent, gparent, RB_BLACK);
+				node = gparent;
+				parent = rb_parent(node);
+				rb_set_parent_color(node, parent, RB_RED);
+				continue;
+			}
+
+			tmp = parent->rb_right;
+			if (node == tmp) {
+				/*
+				 * Case 2 - node's uncle is black and node is
+				 * the parent's right child (left rotate at parent).
+				 *
+				 *      G             G
+				 *     / \           / \
+				 *    p   U  -->    n   U
+				 *     \           /
+				 *      n         p
+				 *
+				 * This still leaves us in violation of 4), the
+				 * continuation into Case 3 will fix that.
+				 */
+				tmp = node->rb_left;
+				WRITE_ONCE(parent->rb_right, tmp);
+				WRITE_ONCE(node->rb_left, parent);
+				if (tmp)
+					rb_set_parent_color(tmp, parent,
+							    RB_BLACK);
+				rb_set_parent_color(parent, node, RB_RED);
+				augment_rotate(parent, node);
+				parent = node;
+				tmp = node->rb_right;
+			}
+
+			/*
+			 * Case 3 - node's uncle is black and node is
+			 * the parent's left child (right rotate at gparent).
+			 *
+			 *        G           P
+			 *       / \         / \
+			 *      p   U  -->  n   g
+			 *     /                 \
+			 *    n                   U
+			 */
+			WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
+			WRITE_ONCE(parent->rb_right, gparent);
+			if (tmp)
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+			__rb_rotate_set_parents(gparent, parent, root, RB_RED);
+			augment_rotate(gparent, parent);
+			break;
+		} else {
+			tmp = gparent->rb_left;
+			if (tmp && rb_is_red(tmp)) {
+				/* Case 1 - color flips */
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+				rb_set_parent_color(parent, gparent, RB_BLACK);
+				node = gparent;
+				parent = rb_parent(node);
+				rb_set_parent_color(node, parent, RB_RED);
+				continue;
+			}
+
+			tmp = parent->rb_left;
+			if (node == tmp) {
+				/* Case 2 - right rotate at parent */
+				tmp = node->rb_right;
+				WRITE_ONCE(parent->rb_left, tmp);
+				WRITE_ONCE(node->rb_right, parent);
+				if (tmp)
+					rb_set_parent_color(tmp, parent,
+							    RB_BLACK);
+				rb_set_parent_color(parent, node, RB_RED);
+				augment_rotate(parent, node);
+				parent = node;
+				tmp = node->rb_left;
+			}
+
+			/* Case 3 - left rotate at gparent */
+			WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
+			WRITE_ONCE(parent->rb_left, gparent);
+			if (tmp)
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+			__rb_rotate_set_parents(gparent, parent, root, RB_RED);
+			augment_rotate(gparent, parent);
+			break;
+		}
+	}
+}
+
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
+
+	while (true) {
+		/*
+		 * Loop invariants:
+		 * - node is black (or NULL on first iteration)
+		 * - node is not the root (parent is not NULL)
+		 * - All leaf paths going through parent and node have a
+		 *   black node count that is 1 lower than other leaf paths.
+		 */
+		sibling = parent->rb_right;
+		if (node != sibling) {	/* node == parent->rb_left */
+			if (rb_is_red(sibling)) {
+				/*
+				 * Case 1 - left rotate at parent
+				 *
+				 *     P               S
+				 *    / \             / \
+				 *   N   s    -->    p   Sr
+				 *      / \         / \
+				 *     Sl  Sr      N   Sl
+				 */
+				tmp1 = sibling->rb_left;
+				WRITE_ONCE(parent->rb_right, tmp1);
+				WRITE_ONCE(sibling->rb_left, parent);
+				rb_set_parent_color(tmp1, parent, RB_BLACK);
+				__rb_rotate_set_parents(parent, sibling, root,
+							RB_RED);
+				augment_rotate(parent, sibling);
+				sibling = tmp1;
+			}
+			tmp1 = sibling->rb_right;
+			if (!tmp1 || rb_is_black(tmp1)) {
+				tmp2 = sibling->rb_left;
+				if (!tmp2 || rb_is_black(tmp2)) {
+					/*
+					 * Case 2 - sibling color flip
+					 * (p could be either color here)
+					 *
+					 *    (p)           (p)
+					 *    / \           / \
+					 *   N   S    -->  N   s
+					 *      / \           / \
+					 *     Sl  Sr        Sl  Sr
+					 *
+					 * This leaves us violating 5) which
+					 * can be fixed by flipping p to black
+					 * if it was red, or by recursing at p.
+					 * p is red when coming from Case 1.
+					 */
+					rb_set_parent_color(sibling, parent,
+							    RB_RED);
+					if (rb_is_red(parent))
+						rb_set_black(parent);
+					else {
+						node = parent;
+						parent = rb_parent(node);
+						if (parent)
+							continue;
+					}
+					break;
+				}
+				/*
+				 * Case 3 - right rotate at sibling
+				 * (p could be either color here)
+				 *
+				 *   (p)           (p)
+				 *   / \           / \
+				 *  N   S    -->  N   sl
+				 *     / \             \
+				 *    sl  Sr            S
+				 *                       \
+				 *                        Sr
+				 *
+				 * Note: p might be red, and then both
+				 * p and sl are red after rotation(which
+				 * breaks property 4). This is fixed in
+				 * Case 4 (in __rb_rotate_set_parents()
+				 *         which set sl the color of p
+				 *         and set p RB_BLACK)
+				 *
+				 *   (p)            (sl)
+				 *   / \            /  \
+				 *  N   sl   -->   P    S
+				 *       \        /      \
+				 *        S      N        Sr
+				 *         \
+				 *          Sr
+				 */
+				tmp1 = tmp2->rb_right;
+				WRITE_ONCE(sibling->rb_left, tmp1);
+				WRITE_ONCE(tmp2->rb_right, sibling);
+				WRITE_ONCE(parent->rb_right, tmp2);
+				if (tmp1)
+					rb_set_parent_color(tmp1, sibling,
+							    RB_BLACK);
+				augment_rotate(sibling, tmp2);
+				tmp1 = sibling;
+				sibling = tmp2;
+			}
+			/*
+			 * Case 4 - left rotate at parent + color flips
+			 * (p and sl could be either color here.
+			 *  After rotation, p becomes black, s acquires
+			 *  p's color, and sl keeps its color)
+			 *
+			 *      (p)             (s)
+			 *      / \             / \
+			 *     N   S     -->   P   Sr
+			 *        / \         / \
+			 *      (sl) sr      N  (sl)
+			 */
+			tmp2 = sibling->rb_left;
+			WRITE_ONCE(parent->rb_right, tmp2);
+			WRITE_ONCE(sibling->rb_left, parent);
+			rb_set_parent_color(tmp1, sibling, RB_BLACK);
+			if (tmp2)
+				rb_set_parent(tmp2, parent);
+			__rb_rotate_set_parents(parent, sibling, root,
+						RB_BLACK);
+			augment_rotate(parent, sibling);
+			break;
+		} else {
+			sibling = parent->rb_left;
+			if (rb_is_red(sibling)) {
+				/* Case 1 - right rotate at parent */
+				tmp1 = sibling->rb_right;
+				WRITE_ONCE(parent->rb_left, tmp1);
+				WRITE_ONCE(sibling->rb_right, parent);
+				rb_set_parent_color(tmp1, parent, RB_BLACK);
+				__rb_rotate_set_parents(parent, sibling, root,
+							RB_RED);
+				augment_rotate(parent, sibling);
+				sibling = tmp1;
+			}
+			tmp1 = sibling->rb_left;
+			if (!tmp1 || rb_is_black(tmp1)) {
+				tmp2 = sibling->rb_right;
+				if (!tmp2 || rb_is_black(tmp2)) {
+					/* Case 2 - sibling color flip */
+					rb_set_parent_color(sibling, parent,
+							    RB_RED);
+					if (rb_is_red(parent))
+						rb_set_black(parent);
+					else {
+						node = parent;
+						parent = rb_parent(node);
+						if (parent)
+							continue;
+					}
+					break;
+				}
+				/* Case 3 - left rotate at sibling */
+				tmp1 = tmp2->rb_left;
+				WRITE_ONCE(sibling->rb_right, tmp1);
+				WRITE_ONCE(tmp2->rb_left, sibling);
+				WRITE_ONCE(parent->rb_left, tmp2);
+				if (tmp1)
+					rb_set_parent_color(tmp1, sibling,
+							    RB_BLACK);
+				augment_rotate(sibling, tmp2);
+				tmp1 = sibling;
+				sibling = tmp2;
+			}
+			/* Case 4 - right rotate at parent + color flips */
+			tmp2 = sibling->rb_right;
+			WRITE_ONCE(parent->rb_left, tmp2);
+			WRITE_ONCE(sibling->rb_right, parent);
+			rb_set_parent_color(tmp1, sibling, RB_BLACK);
+			if (tmp2)
+				rb_set_parent(tmp2, parent);
+			__rb_rotate_set_parents(parent, sibling, root,
+						RB_BLACK);
+			augment_rotate(parent, sibling);
+			break;
+		}
+	}
+}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	____rb_erase_color(parent, root, augment_rotate);
+}
+
+/*
+ * Non-augmented rbtree manipulation functions.
+ *
+ * We use dummy augmented callbacks here, and have the compiler optimize them
+ * out of the rb_insert_color() and rb_erase() function definitions.
+ */
+
+static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
+static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
+static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
+
+static const struct rb_augment_callbacks dummy_callbacks = {
+	.propagate = dummy_propagate,
+	.copy = dummy_copy,
+	.rotate = dummy_rotate
+};
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	__rb_insert(node, root, dummy_rotate);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *rebalance;
+	rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+	if (rebalance)
+		____rb_erase_color(rebalance, root, dummy_rotate);
+}
+
+/*
+ * Augmented rbtree manipulation functions.
+ *
+ * This instantiates the same __always_inline functions as in the non-augmented
+ * case, but this time with user-defined callbacks.
+ */
+
+void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	__rb_insert(node, root, augment_rotate);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (RB_EMPTY_NODE(node))
+		return NULL;
+
+	/*
+	 * If we have a right-hand child, go down and then left as far
+	 * as we can.
+	 */
+	if (node->rb_right) {
+		node = node->rb_right;
+		while (node->rb_left)
+			node = node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/*
+	 * No right-hand children. Everything down and left is smaller than us,
+	 * so any 'next' node must be in the general direction of our parent.
+	 * Go up the tree; any time the ancestor is a right-hand child of its
+	 * parent, keep going up. First time it's a left-hand child of its
+	 * parent, said parent is our 'next' node.
+	 */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (RB_EMPTY_NODE(node))
+		return NULL;
+
+	/*
+	 * If we have a left-hand child, go down and then right as far
+	 * as we can.
+	 */
+	if (node->rb_left) {
+		node = node->rb_left;
+		while (node->rb_right)
+			node = node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/*
+	 * No left-hand children. Go up till we find an ancestor which
+	 * is a right-hand child of its parent.
+	 */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+	__rb_change_child(victim, new, parent, root);
+}
+
+static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
+{
+	for (;;) {
+		if (node->rb_left)
+			node = node->rb_left;
+		else if (node->rb_right)
+			node = node->rb_right;
+		else
+			return (struct rb_node *)node;
+	}
+}
+
+struct rb_node *rb_next_postorder(const struct rb_node *node)
+{
+	const struct rb_node *parent;
+	if (!node)
+		return NULL;
+	parent = rb_parent(node);
+
+	/* If we're sitting on node, we've already seen our children */
+	if (parent && node == parent->rb_left && parent->rb_right) {
+		/* If we are the parent's left node, go to the parent's right
+		 * node then all the way down to the left */
+		return rb_left_deepest_node(parent->rb_right);
+	} else
+		/* Otherwise we are the parent's right node, and the parent
+		 * should be next */
+		return (struct rb_node *)parent;
+}
+
+struct rb_node *rb_first_postorder(const struct rb_root *root)
+{
+	if (!root->rb_node)
+		return NULL;
+
+	return rb_left_deepest_node(root->rb_node);
+}
diff --git a/tools/lib/str_error_r.c b/tools/lib/str_error_r.c
new file mode 100644
index 000000000..6aad8308a
--- /dev/null
+++ b/tools/lib/str_error_r.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include <linux/string.h>
+
+/*
+ * The tools so far have been using the strerror_r() GNU variant, that returns
+ * a string, be it the buffer passed or something else.
+ *
+ * But that, besides being tricky in cases where we expect that the function
+ * using strerror_r() returns the error formatted in a provided buffer (we have
+ * to check if it returned something else and copy that instead), breaks the
+ * build on systems not using glibc, like Alpine Linux, where musl libc is
+ * used.
+ *
+ * So, introduce yet another wrapper, str_error_r(), that has the GNU
+ * interface, but uses the portable XSI variant of strerror_r(), so that users
+ * rest asured that the provided buffer is used and it is what is returned.
+ */
+char *str_error_r(int errnum, char *buf, size_t buflen)
+{
+	int err = strerror_r(errnum, buf, buflen);
+	if (err)
+		snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, [buf], %zd)=%d", errnum, buflen, err);
+	return buf;
+}
diff --git a/tools/lib/string.c b/tools/lib/string.c
new file mode 100644
index 000000000..8b6892f95
--- /dev/null
+++ b/tools/lib/string.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/tools/lib/string.c
+ *
+ *  Copied from linux/lib/string.c, where it is:
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  More specifically, the first copied function was strtobool, which
+ *  was introduced by:
+ *
+ *  d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents")
+ *  Author: Jonathan Cameron <jic23@cam.ac.uk>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/compiler.h>
+
+/**
+ * memdup - duplicate region of memory
+ *
+ * @src: memory region to duplicate
+ * @len: memory region length
+ */
+void *memdup(const void *src, size_t len)
+{
+	void *p = malloc(len);
+
+	if (p)
+		memcpy(p, src, len);
+
+	return p;
+}
+
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
+ * pointed to by res is updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+	if (!s)
+		return -EINVAL;
+
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		return 0;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		return 0;
+	case 'o':
+	case 'O':
+		switch (s[1]) {
+		case 'n':
+		case 'N':
+			*res = true;
+			return 0;
+		case 'f':
+		case 'F':
+			*res = false;
+			return 0;
+		default:
+			break;
+		}
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * strlcpy - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ *
+ * If libc has strlcpy() then that version will override this
+ * implementation:
+ */
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wignored-attributes"
+#endif
+size_t __weak strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = strlen(src);
+
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memcpy(dest, src, len);
+		dest[len] = '\0';
+	}
+	return ret;
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+/**
+ * skip_spaces - Removes leading whitespace from @str.
+ * @str: The string to be stripped.
+ *
+ * Returns a pointer to the first non-whitespace character in @str.
+ */
+char *skip_spaces(const char *str)
+{
+	while (isspace(*str))
+		++str;
+	return (char *)str;
+}
+
+/**
+ * strim - Removes leading and trailing whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Note that the first trailing whitespace is replaced with a %NUL-terminator
+ * in the given string @s. Returns a pointer to the first non-whitespace
+ * character in @s.
+ */
+char *strim(char *s)
+{
+	size_t size;
+	char *end;
+
+	size = strlen(s);
+	if (!size)
+		return s;
+
+	end = s + size - 1;
+	while (end >= s && isspace(*end))
+		end--;
+	*(end + 1) = '\0';
+
+	return skip_spaces(s);
+}
+
+/**
+ * strreplace - Replace all occurrences of character in string.
+ * @s: The string to operate on.
+ * @old: The character being replaced.
+ * @new: The character @old is replaced with.
+ *
+ * Returns pointer to the nul byte at the end of @s.
+ */
+char *strreplace(char *s, char old, char new)
+{
+	for (; *s; ++s)
+		if (*s == old)
+			*s = new;
+	return s;
+}
+
+static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes)
+{
+	while (bytes) {
+		if (*start != value)
+			return (void *)start;
+		start++;
+		bytes--;
+	}
+	return NULL;
+}
+
+/**
+ * memchr_inv - Find an unmatching character in an area of memory.
+ * @start: The memory area
+ * @c: Find a character other than c
+ * @bytes: The size of the area.
+ *
+ * returns the address of the first character other than @c, or %NULL
+ * if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *start, int c, size_t bytes)
+{
+	u8 value = c;
+	u64 value64;
+	unsigned int words, prefix;
+
+	if (bytes <= 16)
+		return check_bytes8(start, value, bytes);
+
+	value64 = value;
+	value64 |= value64 << 8;
+	value64 |= value64 << 16;
+	value64 |= value64 << 32;
+
+	prefix = (unsigned long)start % 8;
+	if (prefix) {
+		u8 *r;
+
+		prefix = 8 - prefix;
+		r = check_bytes8(start, value, prefix);
+		if (r)
+			return r;
+		start += prefix;
+		bytes -= prefix;
+	}
+
+	words = bytes / 8;
+
+	while (words) {
+		if (*(u64 *)start != value64)
+			return check_bytes8(start, value, 8);
+		start += 8;
+		words--;
+	}
+
+	return check_bytes8(start, value, bytes % 8);
+}
diff --git a/tools/lib/subcmd/Build b/tools/lib/subcmd/Build
new file mode 100644
index 000000000..ee3128878
--- /dev/null
+++ b/tools/lib/subcmd/Build
@@ -0,0 +1,7 @@
+libsubcmd-y += exec-cmd.o
+libsubcmd-y += help.o
+libsubcmd-y += pager.o
+libsubcmd-y += parse-options.o
+libsubcmd-y += run-command.o
+libsubcmd-y += sigchain.o
+libsubcmd-y += subcmd-config.o
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
new file mode 100644
index 000000000..1c777a72b
--- /dev/null
+++ b/tools/lib/subcmd/Makefile
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak		# QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC ?= $(CROSS_COMPILE)gcc
+LD ?= $(CROSS_COMPILE)ld
+AR ?= $(CROSS_COMPILE)ar
+
+RM = rm -f
+
+MAKEFLAGS += --no-print-directory
+
+LIBFILE = $(OUTPUT)libsubcmd.a
+
+CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
+
+ifeq ($(DEBUG),0)
+  ifeq ($(feature-fortify-source), 1)
+    CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
+  endif
+endif
+
+ifeq ($(DEBUG),1)
+  CFLAGS += -O0
+else ifeq ($(CC_NO_CLANG), 0)
+  CFLAGS += -O3
+else
+  CFLAGS += -O6
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+  CFLAGS += -Werror
+endif
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
+CFLAGS += -I$(srctree)/tools/include/
+
+CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+
+SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(SUBCMD_IN): FORCE
+	@$(MAKE) $(build)=libsubcmd
+
+$(LIBFILE): $(SUBCMD_IN)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN)
+
+clean:
+	$(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \
+	find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c
new file mode 100644
index 000000000..33e94fb83
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "subcmd-util.h"
+#include "exec-cmd.h"
+#include "subcmd-config.h"
+
+#define MAX_ARGS	32
+#define PATH_MAX	4096
+
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+void exec_cmd_init(const char *exec_name, const char *prefix,
+		   const char *exec_path, const char *exec_path_env)
+{
+	subcmd_config.exec_name		= exec_name;
+	subcmd_config.prefix		= prefix;
+	subcmd_config.exec_path		= exec_path;
+	subcmd_config.exec_path_env	= exec_path_env;
+}
+
+#define is_dir_sep(c) ((c) == '/')
+
+static int is_absolute_path(const char *path)
+{
+	return path[0] == '/';
+}
+
+static const char *get_pwd_cwd(void)
+{
+	static char cwd[PATH_MAX + 1];
+	char *pwd;
+	struct stat cwd_stat, pwd_stat;
+	if (getcwd(cwd, PATH_MAX) == NULL)
+		return NULL;
+	pwd = getenv("PWD");
+	if (pwd && strcmp(pwd, cwd)) {
+		stat(cwd, &cwd_stat);
+		if (!stat(pwd, &pwd_stat) &&
+		    pwd_stat.st_dev == cwd_stat.st_dev &&
+		    pwd_stat.st_ino == cwd_stat.st_ino) {
+			strlcpy(cwd, pwd, PATH_MAX);
+		}
+	}
+	return cwd;
+}
+
+static const char *make_nonrelative_path(const char *path)
+{
+	static char buf[PATH_MAX + 1];
+
+	if (is_absolute_path(path)) {
+		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	} else {
+		const char *cwd = get_pwd_cwd();
+		if (!cwd)
+			die("Cannot determine the current working directory");
+		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	}
+	return buf;
+}
+
+char *system_path(const char *path)
+{
+	char *buf = NULL;
+
+	if (is_absolute_path(path))
+		return strdup(path);
+
+	astrcatf(&buf, "%s/%s", subcmd_config.prefix, path);
+
+	return buf;
+}
+
+const char *extract_argv0_path(const char *argv0)
+{
+	const char *slash;
+
+	if (!argv0 || !*argv0)
+		return NULL;
+	slash = argv0 + strlen(argv0);
+
+	while (argv0 <= slash && !is_dir_sep(*slash))
+		slash--;
+
+	if (slash >= argv0) {
+		argv0_path = strndup(argv0, slash - argv0);
+		return argv0_path ? slash + 1 : NULL;
+	}
+
+	return argv0;
+}
+
+void set_argv_exec_path(const char *exec_path)
+{
+	argv_exec_path = exec_path;
+	/*
+	 * Propagate this setting to external programs.
+	 */
+	setenv(subcmd_config.exec_path_env, exec_path, 1);
+}
+
+
+/* Returns the highest-priority location to look for subprograms. */
+char *get_argv_exec_path(void)
+{
+	char *env;
+
+	if (argv_exec_path)
+		return strdup(argv_exec_path);
+
+	env = getenv(subcmd_config.exec_path_env);
+	if (env && *env)
+		return strdup(env);
+
+	return system_path(subcmd_config.exec_path);
+}
+
+static void add_path(char **out, const char *path)
+{
+	if (path && *path) {
+		if (is_absolute_path(path))
+			astrcat(out, path);
+		else
+			astrcat(out, make_nonrelative_path(path));
+
+		astrcat(out, ":");
+	}
+}
+
+void setup_path(void)
+{
+	const char *old_path = getenv("PATH");
+	char *new_path = NULL;
+	char *tmp = get_argv_exec_path();
+
+	add_path(&new_path, tmp);
+	add_path(&new_path, argv0_path);
+	free(tmp);
+
+	if (old_path)
+		astrcat(&new_path, old_path);
+	else
+		astrcat(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+	setenv("PATH", new_path, 1);
+
+	free(new_path);
+}
+
+static const char **prepare_exec_cmd(const char **argv)
+{
+	int argc;
+	const char **nargv;
+
+	for (argc = 0; argv[argc]; argc++)
+		; /* just counting */
+	nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+	nargv[0] = subcmd_config.exec_name;
+	for (argc = 0; argv[argc]; argc++)
+		nargv[argc + 1] = argv[argc];
+	nargv[argc + 1] = NULL;
+	return nargv;
+}
+
+int execv_cmd(const char **argv) {
+	const char **nargv = prepare_exec_cmd(argv);
+
+	/* execvp() can only ever return if it fails */
+	execvp(subcmd_config.exec_name, (char **)nargv);
+
+	free(nargv);
+	return -1;
+}
+
+
+int execl_cmd(const char *cmd,...)
+{
+	int argc;
+	const char *argv[MAX_ARGS + 1];
+	const char *arg;
+	va_list param;
+
+	va_start(param, cmd);
+	argv[0] = cmd;
+	argc = 1;
+	while (argc < MAX_ARGS) {
+		arg = argv[argc++] = va_arg(param, char *);
+		if (!arg)
+			break;
+	}
+	va_end(param);
+	if (MAX_ARGS <= argc) {
+		fprintf(stderr, " Error: too many args to run %s\n", cmd);
+		return -1;
+	}
+
+	argv[argc] = NULL;
+	return execv_cmd(argv);
+}
diff --git a/tools/lib/subcmd/exec-cmd.h b/tools/lib/subcmd/exec-cmd.h
new file mode 100644
index 000000000..aba591b8d
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_EXEC_CMD_H
+#define __SUBCMD_EXEC_CMD_H
+
+extern void exec_cmd_init(const char *exec_name, const char *prefix,
+			  const char *exec_path, const char *exec_path_env);
+
+extern void set_argv_exec_path(const char *exec_path);
+extern const char *extract_argv0_path(const char *path);
+extern void setup_path(void);
+extern int execv_cmd(const char **argv); /* NULL terminated */
+extern int execl_cmd(const char *cmd, ...);
+/* get_argv_exec_path and system_path return malloc'd string, caller must free it */
+extern char *get_argv_exec_path(void);
+extern char *system_path(const char *path);
+
+#endif /* __SUBCMD_EXEC_CMD_H */
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
new file mode 100644
index 000000000..bf02d62a3
--- /dev/null
+++ b/tools/lib/subcmd/help.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <termios.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include "subcmd-util.h"
+#include "help.h"
+#include "exec-cmd.h"
+
+void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
+{
+	struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+
+	ent->len = len;
+	memcpy(ent->name, name, len);
+	ent->name[len] = 0;
+
+	ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
+	cmds->names[cmds->cnt++] = ent;
+}
+
+void clean_cmdnames(struct cmdnames *cmds)
+{
+	unsigned int i;
+
+	for (i = 0; i < cmds->cnt; ++i)
+		zfree(&cmds->names[i]);
+	zfree(&cmds->names);
+	cmds->cnt = 0;
+	cmds->alloc = 0;
+}
+
+int cmdname_compare(const void *a_, const void *b_)
+{
+	struct cmdname *a = *(struct cmdname **)a_;
+	struct cmdname *b = *(struct cmdname **)b_;
+	return strcmp(a->name, b->name);
+}
+
+void uniq(struct cmdnames *cmds)
+{
+	unsigned int i, j;
+
+	if (!cmds->cnt)
+		return;
+
+	for (i = j = 1; i < cmds->cnt; i++)
+		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+			cmds->names[j++] = cmds->names[i];
+
+	cmds->cnt = j;
+}
+
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
+{
+	size_t ci, cj, ei;
+	int cmp;
+
+	ci = cj = ei = 0;
+	while (ci < cmds->cnt && ei < excludes->cnt) {
+		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
+		if (cmp < 0) {
+			cmds->names[cj++] = cmds->names[ci++];
+		} else if (cmp == 0) {
+			ci++;
+			ei++;
+		} else if (cmp > 0) {
+			ei++;
+		}
+	}
+
+	while (ci < cmds->cnt)
+		cmds->names[cj++] = cmds->names[ci++];
+
+	cmds->cnt = cj;
+}
+
+static void get_term_dimensions(struct winsize *ws)
+{
+	char *s = getenv("LINES");
+
+	if (s != NULL) {
+		ws->ws_row = atoi(s);
+		s = getenv("COLUMNS");
+		if (s != NULL) {
+			ws->ws_col = atoi(s);
+			if (ws->ws_row && ws->ws_col)
+				return;
+		}
+	}
+#ifdef TIOCGWINSZ
+	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+	    ws->ws_row && ws->ws_col)
+		return;
+#endif
+	ws->ws_row = 25;
+	ws->ws_col = 80;
+}
+
+static void pretty_print_string_list(struct cmdnames *cmds, int longest)
+{
+	int cols = 1, rows;
+	int space = longest + 1; /* min 1 SP between words */
+	struct winsize win;
+	int max_cols;
+	int i, j;
+
+	get_term_dimensions(&win);
+	max_cols = win.ws_col - 1; /* don't print *on* the edge */
+
+	if (space < max_cols)
+		cols = max_cols / space;
+	rows = (cmds->cnt + cols - 1) / cols;
+
+	for (i = 0; i < rows; i++) {
+		printf("  ");
+
+		for (j = 0; j < cols; j++) {
+			unsigned int n = j * rows + i;
+			unsigned int size = space;
+
+			if (n >= cmds->cnt)
+				break;
+			if (j == cols-1 || n + rows >= cmds->cnt)
+				size = 1;
+			printf("%-*s", size, cmds->names[n]->name);
+		}
+		putchar('\n');
+	}
+}
+
+static int is_executable(const char *name)
+{
+	struct stat st;
+
+	if (stat(name, &st) || /* stat, not lstat */
+	    !S_ISREG(st.st_mode))
+		return 0;
+
+	return st.st_mode & S_IXUSR;
+}
+
+static int has_extension(const char *filename, const char *ext)
+{
+	size_t len = strlen(filename);
+	size_t extlen = strlen(ext);
+
+	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
+static void list_commands_in_dir(struct cmdnames *cmds,
+					 const char *path,
+					 const char *prefix)
+{
+	int prefix_len;
+	DIR *dir = opendir(path);
+	struct dirent *de;
+	char *buf = NULL;
+
+	if (!dir)
+		return;
+	if (!prefix)
+		prefix = "perf-";
+	prefix_len = strlen(prefix);
+
+	astrcatf(&buf, "%s/", path);
+
+	while ((de = readdir(dir)) != NULL) {
+		int entlen;
+
+		if (!strstarts(de->d_name, prefix))
+			continue;
+
+		astrcat(&buf, de->d_name);
+		if (!is_executable(buf))
+			continue;
+
+		entlen = strlen(de->d_name) - prefix_len;
+		if (has_extension(de->d_name, ".exe"))
+			entlen -= 4;
+
+		add_cmdname(cmds, de->d_name + prefix_len, entlen);
+	}
+	closedir(dir);
+	free(buf);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds)
+{
+	const char *env_path = getenv("PATH");
+	char *exec_path = get_argv_exec_path();
+
+	if (exec_path) {
+		list_commands_in_dir(main_cmds, exec_path, prefix);
+		qsort(main_cmds->names, main_cmds->cnt,
+		      sizeof(*main_cmds->names), cmdname_compare);
+		uniq(main_cmds);
+	}
+
+	if (env_path) {
+		char *paths, *path, *colon;
+		path = paths = strdup(env_path);
+		while (1) {
+			if ((colon = strchr(path, ':')))
+				*colon = 0;
+			if (!exec_path || strcmp(path, exec_path))
+				list_commands_in_dir(other_cmds, path, prefix);
+
+			if (!colon)
+				break;
+			path = colon + 1;
+		}
+		free(paths);
+
+		qsort(other_cmds->names, other_cmds->cnt,
+		      sizeof(*other_cmds->names), cmdname_compare);
+		uniq(other_cmds);
+	}
+	free(exec_path);
+	exclude_cmds(other_cmds, main_cmds);
+}
+
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds)
+{
+	unsigned int i, longest = 0;
+
+	for (i = 0; i < main_cmds->cnt; i++)
+		if (longest < main_cmds->names[i]->len)
+			longest = main_cmds->names[i]->len;
+	for (i = 0; i < other_cmds->cnt; i++)
+		if (longest < other_cmds->names[i]->len)
+			longest = other_cmds->names[i]->len;
+
+	if (main_cmds->cnt) {
+		char *exec_path = get_argv_exec_path();
+		printf("available %s in '%s'\n", title, exec_path);
+		printf("----------------");
+		mput_char('-', strlen(title) + strlen(exec_path));
+		putchar('\n');
+		pretty_print_string_list(main_cmds, longest);
+		putchar('\n');
+		free(exec_path);
+	}
+
+	if (other_cmds->cnt) {
+		printf("%s available from elsewhere on your $PATH\n", title);
+		printf("---------------------------------------");
+		mput_char('-', strlen(title));
+		putchar('\n');
+		pretty_print_string_list(other_cmds, longest);
+		putchar('\n');
+	}
+}
+
+int is_in_cmdlist(struct cmdnames *c, const char *s)
+{
+	unsigned int i;
+
+	for (i = 0; i < c->cnt; i++)
+		if (!strcmp(s, c->names[i]->name))
+			return 1;
+	return 0;
+}
diff --git a/tools/lib/subcmd/help.h b/tools/lib/subcmd/help.h
new file mode 100644
index 000000000..355c066c8
--- /dev/null
+++ b/tools/lib/subcmd/help.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_HELP_H
+#define __SUBCMD_HELP_H
+
+#include <sys/types.h>
+#include <stdio.h>
+
+struct cmdnames {
+	size_t alloc;
+	size_t cnt;
+	struct cmdname {
+		size_t len; /* also used for similarity index in help.c */
+		char name[];
+	} **names;
+};
+
+static inline void mput_char(char c, unsigned int num)
+{
+	while(num--)
+		putchar(c);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds);
+void add_cmdname(struct cmdnames *cmds, const char *name, size_t len);
+void clean_cmdnames(struct cmdnames *cmds);
+int cmdname_compare(const void *a, const void *b);
+void uniq(struct cmdnames *cmds);
+/* Here we require that excludes is a sorted list. */
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
+int is_in_cmdlist(struct cmdnames *c, const char *s);
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds);
+
+#endif /* __SUBCMD_HELP_H */
diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c
new file mode 100644
index 000000000..e3d47b59b
--- /dev/null
+++ b/tools/lib/subcmd/pager.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/select.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include "pager.h"
+#include "run-command.h"
+#include "sigchain.h"
+#include "subcmd-config.h"
+
+/*
+ * This is split up from the rest of git so that we can do
+ * something different on Windows.
+ */
+
+static int spawned_pager;
+static int pager_columns;
+
+void pager_init(const char *pager_env)
+{
+	subcmd_config.pager_env = pager_env;
+}
+
+static const char *forced_pager;
+
+void force_pager(const char *pager)
+{
+	forced_pager = pager;
+}
+
+static void pager_preexec(void)
+{
+	/*
+	 * Work around bug in "less" by not starting it until we
+	 * have real input
+	 */
+	fd_set in;
+	fd_set exception;
+
+	FD_ZERO(&in);
+	FD_ZERO(&exception);
+	FD_SET(0, &in);
+	FD_SET(0, &exception);
+	select(1, &in, NULL, &exception, NULL);
+
+	setenv("LESS", "FRSX", 0);
+}
+
+static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
+static struct child_process pager_process;
+
+static void wait_for_pager(void)
+{
+	fflush(stdout);
+	fflush(stderr);
+	/* signal EOF to pager */
+	close(1);
+	close(2);
+	finish_command(&pager_process);
+}
+
+static void wait_for_pager_signal(int signo)
+{
+	wait_for_pager();
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+void setup_pager(void)
+{
+	const char *pager = getenv(subcmd_config.pager_env);
+	struct winsize sz;
+
+	if (forced_pager)
+		pager = forced_pager;
+	if (!isatty(1) && !forced_pager)
+		return;
+	if (ioctl(1, TIOCGWINSZ, &sz) == 0)
+		pager_columns = sz.ws_col;
+	if (!pager)
+		pager = getenv("PAGER");
+	if (!(pager || access("/usr/bin/pager", X_OK)))
+		pager = "/usr/bin/pager";
+	if (!(pager || access("/usr/bin/less", X_OK)))
+		pager = "/usr/bin/less";
+	if (!pager)
+		pager = "cat";
+	if (!*pager || !strcmp(pager, "cat"))
+		return;
+
+	spawned_pager = 1; /* means we are emitting to terminal */
+
+	/* spawn the pager */
+	pager_argv[2] = pager;
+	pager_process.argv = pager_argv;
+	pager_process.in = -1;
+	pager_process.preexec_cb = pager_preexec;
+
+	if (start_command(&pager_process))
+		return;
+
+	/* original process continues, but writes to the pipe */
+	dup2(pager_process.in, 1);
+	if (isatty(2))
+		dup2(pager_process.in, 2);
+	close(pager_process.in);
+
+	/* this makes sure that the parent terminates after the pager */
+	sigchain_push_common(wait_for_pager_signal);
+	atexit(wait_for_pager);
+}
+
+int pager_in_use(void)
+{
+	return spawned_pager;
+}
+
+int pager_get_columns(void)
+{
+	char *s;
+
+	s = getenv("COLUMNS");
+	if (s)
+		return atoi(s);
+
+	return (pager_columns ? pager_columns : 80) - 2;
+}
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
new file mode 100644
index 000000000..a81896469
--- /dev/null
+++ b/tools/lib/subcmd/pager.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_PAGER_H
+#define __SUBCMD_PAGER_H
+
+extern void pager_init(const char *pager_env);
+
+extern void setup_pager(void);
+extern int pager_in_use(void);
+extern int pager_get_columns(void);
+extern void force_pager(const char *);
+
+#endif /* __SUBCMD_PAGER_H */
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
new file mode 100644
index 000000000..39ebf6192
--- /dev/null
+++ b/tools/lib/subcmd/parse-options.c
@@ -0,0 +1,1028 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <ctype.h>
+#include "subcmd-util.h"
+#include "parse-options.h"
+#include "subcmd-config.h"
+#include "pager.h"
+
+#define OPT_SHORT 1
+#define OPT_UNSET 2
+
+char *error_buf;
+
+static int opterror(const struct option *opt, const char *reason, int flags)
+{
+	if (flags & OPT_SHORT)
+		fprintf(stderr, " Error: switch `%c' %s", opt->short_name, reason);
+	else if (flags & OPT_UNSET)
+		fprintf(stderr, " Error: option `no-%s' %s", opt->long_name, reason);
+	else
+		fprintf(stderr, " Error: option `%s' %s", opt->long_name, reason);
+
+	return -1;
+}
+
+static const char *skip_prefix(const char *str, const char *prefix)
+{
+	size_t len = strlen(prefix);
+	return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+static void optwarning(const struct option *opt, const char *reason, int flags)
+{
+	if (flags & OPT_SHORT)
+		fprintf(stderr, " Warning: switch `%c' %s", opt->short_name, reason);
+	else if (flags & OPT_UNSET)
+		fprintf(stderr, " Warning: option `no-%s' %s", opt->long_name, reason);
+	else
+		fprintf(stderr, " Warning: option `%s' %s", opt->long_name, reason);
+}
+
+static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
+		   int flags, const char **arg)
+{
+	const char *res;
+
+	if (p->opt) {
+		res = p->opt;
+		p->opt = NULL;
+	} else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 ||
+		    **(p->argv + 1) == '-')) {
+		res = (const char *)opt->defval;
+	} else if (p->argc > 1) {
+		p->argc--;
+		res = *++p->argv;
+	} else
+		return opterror(opt, "requires a value", flags);
+	if (arg)
+		*arg = res;
+	return 0;
+}
+
+static int get_value(struct parse_opt_ctx_t *p,
+		     const struct option *opt, int flags)
+{
+	const char *s, *arg = NULL;
+	const int unset = flags & OPT_UNSET;
+	int err;
+
+	if (unset && p->opt)
+		return opterror(opt, "takes no value", flags);
+	if (unset && (opt->flags & PARSE_OPT_NONEG))
+		return opterror(opt, "isn't available", flags);
+	if (opt->flags & PARSE_OPT_DISABLED)
+		return opterror(opt, "is not usable", flags);
+
+	if (opt->flags & PARSE_OPT_EXCLUSIVE) {
+		if (p->excl_opt && p->excl_opt != opt) {
+			char msg[128];
+
+			if (((flags & OPT_SHORT) && p->excl_opt->short_name) ||
+			    p->excl_opt->long_name == NULL) {
+				snprintf(msg, sizeof(msg), "cannot be used with switch `%c'",
+					 p->excl_opt->short_name);
+			} else {
+				snprintf(msg, sizeof(msg), "cannot be used with %s",
+					 p->excl_opt->long_name);
+			}
+			opterror(opt, msg, flags);
+			return -3;
+		}
+		p->excl_opt = opt;
+	}
+	if (!(flags & OPT_SHORT) && p->opt) {
+		switch (opt->type) {
+		case OPTION_CALLBACK:
+			if (!(opt->flags & PARSE_OPT_NOARG))
+				break;
+			/* FALLTHROUGH */
+		case OPTION_BOOLEAN:
+		case OPTION_INCR:
+		case OPTION_BIT:
+		case OPTION_SET_UINT:
+		case OPTION_SET_PTR:
+			return opterror(opt, "takes no value", flags);
+		case OPTION_END:
+		case OPTION_ARGUMENT:
+		case OPTION_GROUP:
+		case OPTION_STRING:
+		case OPTION_INTEGER:
+		case OPTION_UINTEGER:
+		case OPTION_LONG:
+		case OPTION_ULONG:
+		case OPTION_U64:
+		default:
+			break;
+		}
+	}
+
+	if (opt->flags & PARSE_OPT_NOBUILD) {
+		char reason[128];
+		bool noarg = false;
+
+		err = snprintf(reason, sizeof(reason),
+				opt->flags & PARSE_OPT_CANSKIP ?
+					"is being ignored because %s " :
+					"is not available because %s",
+				opt->build_opt);
+		reason[sizeof(reason) - 1] = '\0';
+
+		if (err < 0)
+			strncpy(reason, opt->flags & PARSE_OPT_CANSKIP ?
+					"is being ignored" :
+					"is not available",
+					sizeof(reason));
+
+		if (!(opt->flags & PARSE_OPT_CANSKIP))
+			return opterror(opt, reason, flags);
+
+		err = 0;
+		if (unset)
+			noarg = true;
+		if (opt->flags & PARSE_OPT_NOARG)
+			noarg = true;
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			noarg = true;
+
+		switch (opt->type) {
+		case OPTION_BOOLEAN:
+		case OPTION_INCR:
+		case OPTION_BIT:
+		case OPTION_SET_UINT:
+		case OPTION_SET_PTR:
+		case OPTION_END:
+		case OPTION_ARGUMENT:
+		case OPTION_GROUP:
+			noarg = true;
+			break;
+		case OPTION_CALLBACK:
+		case OPTION_STRING:
+		case OPTION_INTEGER:
+		case OPTION_UINTEGER:
+		case OPTION_LONG:
+		case OPTION_ULONG:
+		case OPTION_U64:
+		default:
+			break;
+		}
+
+		if (!noarg)
+			err = get_arg(p, opt, flags, NULL);
+		if (err)
+			return err;
+
+		optwarning(opt, reason, flags);
+		return 0;
+	}
+
+	switch (opt->type) {
+	case OPTION_BIT:
+		if (unset)
+			*(int *)opt->value &= ~opt->defval;
+		else
+			*(int *)opt->value |= opt->defval;
+		return 0;
+
+	case OPTION_BOOLEAN:
+		*(bool *)opt->value = unset ? false : true;
+		if (opt->set)
+			*(bool *)opt->set = true;
+		return 0;
+
+	case OPTION_INCR:
+		*(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
+		return 0;
+
+	case OPTION_SET_UINT:
+		*(unsigned int *)opt->value = unset ? 0 : opt->defval;
+		return 0;
+
+	case OPTION_SET_PTR:
+		*(void **)opt->value = unset ? NULL : (void *)opt->defval;
+		return 0;
+
+	case OPTION_STRING:
+		err = 0;
+		if (unset)
+			*(const char **)opt->value = NULL;
+		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			*(const char **)opt->value = (const char *)opt->defval;
+		else
+			err = get_arg(p, opt, flags, (const char **)opt->value);
+
+		if (opt->set)
+			*(bool *)opt->set = true;
+
+		/* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
+		if (opt->flags & PARSE_OPT_NOEMPTY) {
+			const char *val = *(const char **)opt->value;
+
+			if (!val)
+				return err;
+
+			/* Similar to unset if we are given an empty string. */
+			if (val[0] == '\0') {
+				*(const char **)opt->value = NULL;
+				return 0;
+			}
+		}
+
+		return err;
+
+	case OPTION_CALLBACK:
+		if (opt->set)
+			*(bool *)opt->set = true;
+
+		if (unset)
+			return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_NOARG)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
+
+	case OPTION_INTEGER:
+		if (unset) {
+			*(int *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(int *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(int *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	case OPTION_UINTEGER:
+		if (unset) {
+			*(unsigned int *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(unsigned int *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		if (arg[0] == '-')
+			return opterror(opt, "expects an unsigned numerical value", flags);
+		*(unsigned int *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	case OPTION_LONG:
+		if (unset) {
+			*(long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(long *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	case OPTION_ULONG:
+		if (unset) {
+			*(unsigned long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(unsigned long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	case OPTION_U64:
+		if (unset) {
+			*(u64 *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(u64 *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		if (arg[0] == '-')
+			return opterror(opt, "expects an unsigned numerical value", flags);
+		*(u64 *)opt->value = strtoull(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	case OPTION_END:
+	case OPTION_ARGUMENT:
+	case OPTION_GROUP:
+	default:
+		die("should not happen, someone must be hit on the forehead");
+	}
+}
+
+static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
+{
+retry:
+	for (; options->type != OPTION_END; options++) {
+		if (options->short_name == *p->opt) {
+			p->opt = p->opt[1] ? p->opt + 1 : NULL;
+			return get_value(p, options, OPT_SHORT);
+		}
+	}
+
+	if (options->parent) {
+		options = options->parent;
+		goto retry;
+	}
+
+	return -2;
+}
+
+static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
+                          const struct option *options)
+{
+	const char *arg_end = strchr(arg, '=');
+	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
+	int abbrev_flags = 0, ambiguous_flags = 0;
+
+	if (!arg_end)
+		arg_end = arg + strlen(arg);
+
+retry:
+	for (; options->type != OPTION_END; options++) {
+		const char *rest;
+		int flags = 0;
+
+		if (!options->long_name)
+			continue;
+
+		rest = skip_prefix(arg, options->long_name);
+		if (options->type == OPTION_ARGUMENT) {
+			if (!rest)
+				continue;
+			if (*rest == '=')
+				return opterror(options, "takes no value", flags);
+			if (*rest)
+				continue;
+			p->out[p->cpidx++] = arg - 2;
+			return 0;
+		}
+		if (!rest) {
+			if (strstarts(options->long_name, "no-")) {
+				/*
+				 * The long name itself starts with "no-", so
+				 * accept the option without "no-" so that users
+				 * do not have to enter "no-no-" to get the
+				 * negation.
+				 */
+				rest = skip_prefix(arg, options->long_name + 3);
+				if (rest) {
+					flags |= OPT_UNSET;
+					goto match;
+				}
+				/* Abbreviated case */
+				if (strstarts(options->long_name + 3, arg)) {
+					flags |= OPT_UNSET;
+					goto is_abbreviated;
+				}
+			}
+			/* abbreviated? */
+			if (!strncmp(options->long_name, arg, arg_end - arg)) {
+is_abbreviated:
+				if (abbrev_option) {
+					/*
+					 * If this is abbreviated, it is
+					 * ambiguous. So when there is no
+					 * exact match later, we need to
+					 * error out.
+					 */
+					ambiguous_option = abbrev_option;
+					ambiguous_flags = abbrev_flags;
+				}
+				if (!(flags & OPT_UNSET) && *arg_end)
+					p->opt = arg_end + 1;
+				abbrev_option = options;
+				abbrev_flags = flags;
+				continue;
+			}
+			/* negated and abbreviated very much? */
+			if (strstarts("no-", arg)) {
+				flags |= OPT_UNSET;
+				goto is_abbreviated;
+			}
+			/* negated? */
+			if (strncmp(arg, "no-", 3))
+				continue;
+			flags |= OPT_UNSET;
+			rest = skip_prefix(arg + 3, options->long_name);
+			/* abbreviated and negated? */
+			if (!rest && strstarts(options->long_name, arg + 3))
+				goto is_abbreviated;
+			if (!rest)
+				continue;
+		}
+match:
+		if (*rest) {
+			if (*rest != '=')
+				continue;
+			p->opt = rest + 1;
+		}
+		return get_value(p, options, flags);
+	}
+
+	if (ambiguous_option) {
+		 fprintf(stderr,
+			 " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n",
+			 arg,
+			 (ambiguous_flags & OPT_UNSET) ?  "no-" : "",
+			 ambiguous_option->long_name,
+			 (abbrev_flags & OPT_UNSET) ?  "no-" : "",
+			 abbrev_option->long_name);
+		 return -1;
+	}
+	if (abbrev_option)
+		return get_value(p, abbrev_option, abbrev_flags);
+
+	if (options->parent) {
+		options = options->parent;
+		goto retry;
+	}
+
+	return -2;
+}
+
+static void check_typos(const char *arg, const struct option *options)
+{
+	if (strlen(arg) < 3)
+		return;
+
+	if (strstarts(arg, "no-")) {
+		fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
+		exit(129);
+	}
+
+	for (; options->type != OPTION_END; options++) {
+		if (!options->long_name)
+			continue;
+		if (strstarts(options->long_name, arg)) {
+			fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
+			exit(129);
+		}
+	}
+}
+
+static void parse_options_start(struct parse_opt_ctx_t *ctx,
+				int argc, const char **argv, int flags)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->argc = argc - 1;
+	ctx->argv = argv + 1;
+	ctx->out  = argv;
+	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
+	ctx->flags = flags;
+	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
+	    (flags & PARSE_OPT_STOP_AT_NON_OPTION))
+		die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
+}
+
+static int usage_with_options_internal(const char * const *,
+				       const struct option *, int,
+				       struct parse_opt_ctx_t *);
+
+static int parse_options_step(struct parse_opt_ctx_t *ctx,
+			      const struct option *options,
+			      const char * const usagestr[])
+{
+	int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
+	int excl_short_opt = 1;
+	const char *arg;
+
+	/* we must reset ->opt, unknown short option leave it dangling */
+	ctx->opt = NULL;
+
+	for (; ctx->argc; ctx->argc--, ctx->argv++) {
+		arg = ctx->argv[0];
+		if (*arg != '-' || !arg[1]) {
+			if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
+				break;
+			ctx->out[ctx->cpidx++] = ctx->argv[0];
+			continue;
+		}
+
+		if (arg[1] != '-') {
+			ctx->opt = ++arg;
+			if (internal_help && *ctx->opt == 'h') {
+				return usage_with_options_internal(usagestr, options, 0, ctx);
+			}
+			switch (parse_short_opt(ctx, options)) {
+			case -1:
+				return parse_options_usage(usagestr, options, arg, 1);
+			case -2:
+				goto unknown;
+			case -3:
+				goto exclusive;
+			default:
+				break;
+			}
+			if (ctx->opt)
+				check_typos(arg, options);
+			while (ctx->opt) {
+				if (internal_help && *ctx->opt == 'h')
+					return usage_with_options_internal(usagestr, options, 0, ctx);
+				arg = ctx->opt;
+				switch (parse_short_opt(ctx, options)) {
+				case -1:
+					return parse_options_usage(usagestr, options, arg, 1);
+				case -2:
+					/* fake a short option thing to hide the fact that we may have
+					 * started to parse aggregated stuff
+					 *
+					 * This is leaky, too bad.
+					 */
+					ctx->argv[0] = strdup(ctx->opt - 1);
+					*(char *)ctx->argv[0] = '-';
+					goto unknown;
+				case -3:
+					goto exclusive;
+				default:
+					break;
+				}
+			}
+			continue;
+		}
+
+		if (!arg[2]) { /* "--" */
+			if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
+				ctx->argc--;
+				ctx->argv++;
+			}
+			break;
+		}
+
+		arg += 2;
+		if (internal_help && !strcmp(arg, "help-all"))
+			return usage_with_options_internal(usagestr, options, 1, ctx);
+		if (internal_help && !strcmp(arg, "help"))
+			return usage_with_options_internal(usagestr, options, 0, ctx);
+		if (!strcmp(arg, "list-opts"))
+			return PARSE_OPT_LIST_OPTS;
+		if (!strcmp(arg, "list-cmds"))
+			return PARSE_OPT_LIST_SUBCMDS;
+		switch (parse_long_opt(ctx, arg, options)) {
+		case -1:
+			return parse_options_usage(usagestr, options, arg, 0);
+		case -2:
+			goto unknown;
+		case -3:
+			excl_short_opt = 0;
+			goto exclusive;
+		default:
+			break;
+		}
+		continue;
+unknown:
+		if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
+			return PARSE_OPT_UNKNOWN;
+		ctx->out[ctx->cpidx++] = ctx->argv[0];
+		ctx->opt = NULL;
+	}
+	return PARSE_OPT_DONE;
+
+exclusive:
+	parse_options_usage(usagestr, options, arg, excl_short_opt);
+	if ((excl_short_opt && ctx->excl_opt->short_name) ||
+	    ctx->excl_opt->long_name == NULL) {
+		char opt = ctx->excl_opt->short_name;
+		parse_options_usage(NULL, options, &opt, 1);
+	} else {
+		parse_options_usage(NULL, options, ctx->excl_opt->long_name, 0);
+	}
+	return PARSE_OPT_HELP;
+}
+
+static int parse_options_end(struct parse_opt_ctx_t *ctx)
+{
+	memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
+	ctx->out[ctx->cpidx + ctx->argc] = NULL;
+	return ctx->cpidx + ctx->argc;
+}
+
+int parse_options_subcommand(int argc, const char **argv, const struct option *options,
+			const char *const subcommands[], const char *usagestr[], int flags)
+{
+	struct parse_opt_ctx_t ctx;
+
+	/* build usage string if it's not provided */
+	if (subcommands && !usagestr[0]) {
+		char *buf = NULL;
+
+		astrcatf(&buf, "%s %s [<options>] {", subcmd_config.exec_name, argv[0]);
+
+		for (int i = 0; subcommands[i]; i++) {
+			if (i)
+				astrcat(&buf, "|");
+			astrcat(&buf, subcommands[i]);
+		}
+		astrcat(&buf, "}");
+
+		usagestr[0] = buf;
+	}
+
+	parse_options_start(&ctx, argc, argv, flags);
+	switch (parse_options_step(&ctx, options, usagestr)) {
+	case PARSE_OPT_HELP:
+		exit(129);
+	case PARSE_OPT_DONE:
+		break;
+	case PARSE_OPT_LIST_OPTS:
+		while (options->type != OPTION_END) {
+			if (options->long_name)
+				printf("--%s ", options->long_name);
+			options++;
+		}
+		putchar('\n');
+		exit(130);
+	case PARSE_OPT_LIST_SUBCMDS:
+		if (subcommands) {
+			for (int i = 0; subcommands[i]; i++)
+				printf("%s ", subcommands[i]);
+		}
+		putchar('\n');
+		exit(130);
+	default: /* PARSE_OPT_UNKNOWN */
+		if (ctx.argv[0][1] == '-')
+			astrcatf(&error_buf, "unknown option `%s'",
+				 ctx.argv[0] + 2);
+		else
+			astrcatf(&error_buf, "unknown switch `%c'", *ctx.opt);
+		usage_with_options(usagestr, options);
+	}
+
+	return parse_options_end(&ctx);
+}
+
+int parse_options(int argc, const char **argv, const struct option *options,
+		  const char * const usagestr[], int flags)
+{
+	return parse_options_subcommand(argc, argv, options, NULL,
+					(const char **) usagestr, flags);
+}
+
+#define USAGE_OPTS_WIDTH 24
+#define USAGE_GAP         2
+
+static void print_option_help(const struct option *opts, int full)
+{
+	size_t pos;
+	int pad;
+
+	if (opts->type == OPTION_GROUP) {
+		fputc('\n', stderr);
+		if (*opts->help)
+			fprintf(stderr, "%s\n", opts->help);
+		return;
+	}
+	if (!full && (opts->flags & PARSE_OPT_HIDDEN))
+		return;
+	if (opts->flags & PARSE_OPT_DISABLED)
+		return;
+
+	pos = fprintf(stderr, "    ");
+	if (opts->short_name)
+		pos += fprintf(stderr, "-%c", opts->short_name);
+	else
+		pos += fprintf(stderr, "    ");
+
+	if (opts->long_name && opts->short_name)
+		pos += fprintf(stderr, ", ");
+	if (opts->long_name)
+		pos += fprintf(stderr, "--%s", opts->long_name);
+
+	switch (opts->type) {
+	case OPTION_ARGUMENT:
+		break;
+	case OPTION_LONG:
+	case OPTION_ULONG:
+	case OPTION_U64:
+	case OPTION_INTEGER:
+	case OPTION_UINTEGER:
+		if (opts->flags & PARSE_OPT_OPTARG)
+			if (opts->long_name)
+				pos += fprintf(stderr, "[=<n>]");
+			else
+				pos += fprintf(stderr, "[<n>]");
+		else
+			pos += fprintf(stderr, " <n>");
+		break;
+	case OPTION_CALLBACK:
+		if (opts->flags & PARSE_OPT_NOARG)
+			break;
+		/* FALLTHROUGH */
+	case OPTION_STRING:
+		if (opts->argh) {
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=<%s>]", opts->argh);
+				else
+					pos += fprintf(stderr, "[<%s>]", opts->argh);
+			else
+				pos += fprintf(stderr, " <%s>", opts->argh);
+		} else {
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=...]");
+				else
+					pos += fprintf(stderr, "[...]");
+			else
+				pos += fprintf(stderr, " ...");
+		}
+		break;
+	default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */
+	case OPTION_END:
+	case OPTION_GROUP:
+	case OPTION_BIT:
+	case OPTION_BOOLEAN:
+	case OPTION_INCR:
+	case OPTION_SET_UINT:
+	case OPTION_SET_PTR:
+		break;
+	}
+
+	if (pos <= USAGE_OPTS_WIDTH)
+		pad = USAGE_OPTS_WIDTH - pos;
+	else {
+		fputc('\n', stderr);
+		pad = USAGE_OPTS_WIDTH;
+	}
+	fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+	if (opts->flags & PARSE_OPT_NOBUILD)
+		fprintf(stderr, "%*s(not built-in because %s)\n",
+			USAGE_OPTS_WIDTH + USAGE_GAP, "",
+			opts->build_opt);
+}
+
+static int option__cmp(const void *va, const void *vb)
+{
+	const struct option *a = va, *b = vb;
+	int sa = tolower(a->short_name), sb = tolower(b->short_name), ret;
+
+	if (sa == 0)
+		sa = 'z' + 1;
+	if (sb == 0)
+		sb = 'z' + 1;
+
+	ret = sa - sb;
+
+	if (ret == 0) {
+		const char *la = a->long_name ?: "",
+			   *lb = b->long_name ?: "";
+		ret = strcmp(la, lb);
+	}
+
+	return ret;
+}
+
+static struct option *options__order(const struct option *opts)
+{
+	int nr_opts = 0, len;
+	const struct option *o = opts;
+	struct option *ordered;
+
+	for (o = opts; o->type != OPTION_END; o++)
+		++nr_opts;
+
+	len = sizeof(*o) * (nr_opts + 1);
+	ordered = malloc(len);
+	if (!ordered)
+		goto out;
+	memcpy(ordered, opts, len);
+
+	qsort(ordered, nr_opts, sizeof(*o), option__cmp);
+out:
+	return ordered;
+}
+
+static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx_t *ctx)
+{
+	int i;
+
+	for (i = 1; i < ctx->argc; ++i) {
+		const char *arg = ctx->argv[i];
+
+		if (arg[0] != '-') {
+			if (arg[1] == '\0') {
+				if (arg[0] == opt->short_name)
+					return true;
+				continue;
+			}
+
+			if (opt->long_name && strcmp(opt->long_name, arg) == 0)
+				return true;
+
+			if (opt->help && strcasestr(opt->help, arg) != NULL)
+				return true;
+
+			continue;
+		}
+
+		if (arg[1] == opt->short_name ||
+		    (arg[1] == '-' && opt->long_name && strcmp(opt->long_name, arg + 2) == 0))
+			return true;
+	}
+
+	return false;
+}
+
+static int usage_with_options_internal(const char * const *usagestr,
+				       const struct option *opts, int full,
+				       struct parse_opt_ctx_t *ctx)
+{
+	struct option *ordered;
+
+	if (!usagestr)
+		return PARSE_OPT_HELP;
+
+	setup_pager();
+
+	if (error_buf) {
+		fprintf(stderr, "  Error: %s\n", error_buf);
+		zfree(&error_buf);
+	}
+
+	fprintf(stderr, "\n Usage: %s\n", *usagestr++);
+	while (*usagestr && **usagestr)
+		fprintf(stderr, "    or: %s\n", *usagestr++);
+	while (*usagestr) {
+		fprintf(stderr, "%s%s\n",
+				**usagestr ? "    " : "",
+				*usagestr);
+		usagestr++;
+	}
+
+	if (opts->type != OPTION_GROUP)
+		fputc('\n', stderr);
+
+	ordered = options__order(opts);
+	if (ordered)
+		opts = ordered;
+
+	for (  ; opts->type != OPTION_END; opts++) {
+		if (ctx && ctx->argc > 1 && !option__in_argv(opts, ctx))
+			continue;
+		print_option_help(opts, full);
+	}
+
+	fputc('\n', stderr);
+
+	free(ordered);
+
+	return PARSE_OPT_HELP;
+}
+
+void usage_with_options(const char * const *usagestr,
+			const struct option *opts)
+{
+	usage_with_options_internal(usagestr, opts, 0, NULL);
+	exit(129);
+}
+
+void usage_with_options_msg(const char * const *usagestr,
+			    const struct option *opts, const char *fmt, ...)
+{
+	va_list ap;
+	char *tmp = error_buf;
+
+	va_start(ap, fmt);
+	if (vasprintf(&error_buf, fmt, ap) == -1)
+		die("vasprintf failed");
+	va_end(ap);
+
+	free(tmp);
+
+	usage_with_options_internal(usagestr, opts, 0, NULL);
+	exit(129);
+}
+
+int parse_options_usage(const char * const *usagestr,
+			const struct option *opts,
+			const char *optstr, bool short_opt)
+{
+	if (!usagestr)
+		goto opt;
+
+	fprintf(stderr, "\n Usage: %s\n", *usagestr++);
+	while (*usagestr && **usagestr)
+		fprintf(stderr, "    or: %s\n", *usagestr++);
+	while (*usagestr) {
+		fprintf(stderr, "%s%s\n",
+				**usagestr ? "    " : "",
+				*usagestr);
+		usagestr++;
+	}
+	fputc('\n', stderr);
+
+opt:
+	for (  ; opts->type != OPTION_END; opts++) {
+		if (short_opt) {
+			if (opts->short_name == *optstr) {
+				print_option_help(opts, 0);
+				break;
+			}
+			continue;
+		}
+
+		if (opts->long_name == NULL)
+			continue;
+
+		if (strstarts(opts->long_name, optstr))
+			print_option_help(opts, 0);
+		if (strstarts("no-", optstr) &&
+		    strstarts(opts->long_name, optstr + 3))
+			print_option_help(opts, 0);
+	}
+
+	return PARSE_OPT_HELP;
+}
+
+
+int parse_opt_verbosity_cb(const struct option *opt,
+			   const char *arg __maybe_unused,
+			   int unset)
+{
+	int *target = opt->value;
+
+	if (unset)
+		/* --no-quiet, --no-verbose */
+		*target = 0;
+	else if (opt->short_name == 'v') {
+		if (*target >= 0)
+			(*target)++;
+		else
+			*target = 1;
+	} else {
+		if (*target <= 0)
+			(*target)--;
+		else
+			*target = -1;
+	}
+	return 0;
+}
+
+static struct option *
+find_option(struct option *opts, int shortopt, const char *longopt)
+{
+	for (; opts->type != OPTION_END; opts++) {
+		if ((shortopt && opts->short_name == shortopt) ||
+		    (opts->long_name && longopt &&
+		     !strcmp(opts->long_name, longopt)))
+			return opts;
+	}
+	return NULL;
+}
+
+void set_option_flag(struct option *opts, int shortopt, const char *longopt,
+		     int flag)
+{
+	struct option *opt = find_option(opts, shortopt, longopt);
+
+	if (opt)
+		opt->flags |= flag;
+	return;
+}
+
+void set_option_nobuild(struct option *opts, int shortopt,
+			const char *longopt,
+			const char *build_opt,
+			bool can_skip)
+{
+	struct option *opt = find_option(opts, shortopt, longopt);
+
+	if (!opt)
+		return;
+
+	opt->flags |= PARSE_OPT_NOBUILD;
+	opt->flags |= can_skip ? PARSE_OPT_CANSKIP : 0;
+	opt->build_opt = build_opt;
+}
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
new file mode 100644
index 000000000..d2414144e
--- /dev/null
+++ b/tools/lib/subcmd/parse-options.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_PARSE_OPTIONS_H
+#define __SUBCMD_PARSE_OPTIONS_H
+
+#include <linux/kernel.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifndef NORETURN
+#define NORETURN __attribute__((__noreturn__))
+#endif
+
+enum parse_opt_type {
+	/* special types */
+	OPTION_END,
+	OPTION_ARGUMENT,
+	OPTION_GROUP,
+	/* options with no arguments */
+	OPTION_BIT,
+	OPTION_BOOLEAN,
+	OPTION_INCR,
+	OPTION_SET_UINT,
+	OPTION_SET_PTR,
+	/* options with arguments (usually) */
+	OPTION_STRING,
+	OPTION_INTEGER,
+	OPTION_LONG,
+	OPTION_ULONG,
+	OPTION_CALLBACK,
+	OPTION_U64,
+	OPTION_UINTEGER,
+};
+
+enum parse_opt_flags {
+	PARSE_OPT_KEEP_DASHDASH = 1,
+	PARSE_OPT_STOP_AT_NON_OPTION = 2,
+	PARSE_OPT_KEEP_ARGV0 = 4,
+	PARSE_OPT_KEEP_UNKNOWN = 8,
+	PARSE_OPT_NO_INTERNAL_HELP = 16,
+};
+
+enum parse_opt_option_flags {
+	PARSE_OPT_OPTARG  = 1,
+	PARSE_OPT_NOARG   = 2,
+	PARSE_OPT_NONEG   = 4,
+	PARSE_OPT_HIDDEN  = 8,
+	PARSE_OPT_LASTARG_DEFAULT = 16,
+	PARSE_OPT_DISABLED = 32,
+	PARSE_OPT_EXCLUSIVE = 64,
+	PARSE_OPT_NOEMPTY  = 128,
+	PARSE_OPT_NOBUILD  = 256,
+	PARSE_OPT_CANSKIP  = 512,
+};
+
+struct option;
+typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
+
+/*
+ * `type`::
+ *   holds the type of the option, you must have an OPTION_END last in your
+ *   array.
+ *
+ * `short_name`::
+ *   the character to use as a short option name, '\0' if none.
+ *
+ * `long_name`::
+ *   the long option name, without the leading dashes, NULL if none.
+ *
+ * `value`::
+ *   stores pointers to the values to be filled.
+ *
+ * `argh`::
+ *   token to explain the kind of argument this option wants. Keep it
+ *   homogeneous across the repository.
+ *
+ * `help`::
+ *   the short help associated to what the option does.
+ *   Must never be NULL (except for OPTION_END).
+ *   OPTION_GROUP uses this pointer to store the group header.
+ *
+ * `flags`::
+ *   mask of parse_opt_option_flags.
+ *   PARSE_OPT_OPTARG: says that the argument is optional (not for BOOLEANs)
+ *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
+ *   PARSE_OPT_NONEG: says that this option cannot be negated
+ *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
+ *                    the long one.
+ *
+ * `callback`::
+ *   pointer to the callback to use for OPTION_CALLBACK.
+ *
+ * `defval`::
+ *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
+ *   OPTION_{BIT,SET_UINT,SET_PTR} store the {mask,integer,pointer} to put in
+ *   the value when met.
+ *   CALLBACKS can use it like they want.
+ *
+ * `set`::
+ *   whether an option was set by the user
+ */
+struct option {
+	enum parse_opt_type type;
+	int short_name;
+	const char *long_name;
+	void *value;
+	const char *argh;
+	const char *help;
+	const char *build_opt;
+
+	int flags;
+	parse_opt_cb *callback;
+	intptr_t defval;
+	bool *set;
+	void *data;
+	const struct option *parent;
+};
+
+#define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v )
+
+#define OPT_END()                   { .type = OPTION_END }
+#define OPT_PARENT(p)               { .type = OPTION_END, .parent = (p) }
+#define OPT_ARGUMENT(l, h)          { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) }
+#define OPT_GROUP(h)                { .type = OPTION_GROUP, .help = (h) }
+#define OPT_BIT(s, l, v, h, b)      { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) }
+#define OPT_BOOLEAN(s, l, v, h)     { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) }
+#define OPT_BOOLEAN_FLAG(s, l, v, h, f)     { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h), .flags = (f) }
+#define OPT_BOOLEAN_SET(s, l, v, os, h) \
+	{ .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \
+	.value = check_vtype(v, bool *), .help = (h), \
+	.set = check_vtype(os, bool *)}
+#define OPT_INCR(s, l, v, h)        { .type = OPTION_INCR, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
+#define OPT_SET_UINT(s, l, v, h, i)  { .type = OPTION_SET_UINT, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .defval = (i) }
+#define OPT_SET_PTR(s, l, v, h, p)  { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) }
+#define OPT_INTEGER(s, l, v, h)     { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
+#define OPT_UINTEGER(s, l, v, h)    { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
+#define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_ULONG(s, l, v, h)        { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
+#define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
+#define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
+#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
+	{ .type = OPTION_STRING,  .short_name = (s), .long_name = (l), \
+	  .value = check_vtype(v, const char **), .argh =(a), .help = (h), \
+	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
+#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
+	{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
+	  .value = check_vtype(v, const char **), .argh = (a), .help = (h), \
+	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
+	  .set = check_vtype(os, bool *)}
+#define OPT_STRING_NOEMPTY(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
+#define OPT_DATE(s, l, v, h) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
+#define OPT_CALLBACK(s, l, v, a, h, f) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) }
+#define OPT_CALLBACK_SET(s, l, v, os, a, h, f) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .set = check_vtype(os, bool *)}
+#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
+#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
+#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
+	.value = (v), .arg = (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
+	.flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
+#define OPT_CALLBACK_OPTARG(s, l, v, d, a, h, f) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), \
+	  .value = (v), .argh = (a), .help = (h), .callback = (f), \
+	  .flags = PARSE_OPT_OPTARG, .data = (d) }
+
+/* parse_options() will filter out the processed options and leave the
+ * non-option argments in argv[].
+ * Returns the number of arguments left in argv[].
+ *
+ * NOTE: parse_options() and parse_options_subcommand() may call exit() in the
+ * case of an error (or for 'special' options like --list-cmds or --list-opts).
+ */
+extern int parse_options(int argc, const char **argv,
+                         const struct option *options,
+                         const char * const usagestr[], int flags);
+
+extern int parse_options_subcommand(int argc, const char **argv,
+				const struct option *options,
+				const char *const subcommands[],
+				const char *usagestr[], int flags);
+
+extern NORETURN void usage_with_options(const char * const *usagestr,
+                                        const struct option *options);
+extern NORETURN __attribute__((format(printf,3,4)))
+void usage_with_options_msg(const char * const *usagestr,
+			    const struct option *options,
+			    const char *fmt, ...);
+
+/*----- incremantal advanced APIs -----*/
+
+enum {
+	PARSE_OPT_HELP = -1,
+	PARSE_OPT_DONE,
+	PARSE_OPT_LIST_OPTS,
+	PARSE_OPT_LIST_SUBCMDS,
+	PARSE_OPT_UNKNOWN,
+};
+
+/*
+ * It's okay for the caller to consume argv/argc in the usual way.
+ * Other fields of that structure are private to parse-options and should not
+ * be modified in any way.
+ */
+struct parse_opt_ctx_t {
+	const char **argv;
+	const char **out;
+	int argc, cpidx;
+	const char *opt;
+	const struct option *excl_opt;
+	int flags;
+};
+
+extern int parse_options_usage(const char * const *usagestr,
+			       const struct option *opts,
+			       const char *optstr,
+			       bool short_opt);
+
+
+/*----- some often used options -----*/
+extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
+extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
+extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
+
+#define OPT__VERBOSE(var)  OPT_BOOLEAN('v', "verbose", (var), "be verbose")
+#define OPT__QUIET(var)    OPT_BOOLEAN('q', "quiet",   (var), "be quiet")
+#define OPT__VERBOSITY(var) \
+	{ OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
+	{ OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
+#define OPT__DRY_RUN(var)  OPT_BOOLEAN('n', "dry-run", (var), "dry run")
+#define OPT__ABBREV(var)  \
+	{ OPTION_CALLBACK, 0, "abbrev", (var), "n", \
+	  "use <n> digits to display SHA-1s", \
+	  PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
+
+extern const char *parse_options_fix_filename(const char *prefix, const char *file);
+
+void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag);
+void set_option_nobuild(struct option *opts, int shortopt, const char *longopt,
+			const char *build_opt, bool can_skip);
+
+#endif /* __SUBCMD_PARSE_OPTIONS_H */
diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c
new file mode 100644
index 000000000..5cdac2162
--- /dev/null
+++ b/tools/lib/subcmd/run-command.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <linux/string.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include "subcmd-util.h"
+#include "run-command.h"
+#include "exec-cmd.h"
+
+#define STRERR_BUFSIZE 128
+
+static inline void close_pair(int fd[2])
+{
+	close(fd[0]);
+	close(fd[1]);
+}
+
+static inline void dup_devnull(int to)
+{
+	int fd = open("/dev/null", O_RDWR);
+	dup2(fd, to);
+	close(fd);
+}
+
+int start_command(struct child_process *cmd)
+{
+	int need_in, need_out, need_err;
+	int fdin[2], fdout[2], fderr[2];
+	char sbuf[STRERR_BUFSIZE];
+
+	/*
+	 * In case of errors we must keep the promise to close FDs
+	 * that have been passed in via ->in and ->out.
+	 */
+
+	need_in = !cmd->no_stdin && cmd->in < 0;
+	if (need_in) {
+		if (pipe(fdin) < 0) {
+			if (cmd->out > 0)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->in = fdin[1];
+	}
+
+	need_out = !cmd->no_stdout
+		&& !cmd->stdout_to_stderr
+		&& cmd->out < 0;
+	if (need_out) {
+		if (pipe(fdout) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->out = fdout[0];
+	}
+
+	need_err = !cmd->no_stderr && cmd->err < 0;
+	if (need_err) {
+		if (pipe(fderr) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			if (need_out)
+				close_pair(fdout);
+			else if (cmd->out)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->err = fderr[0];
+	}
+
+	fflush(NULL);
+	cmd->pid = fork();
+	if (!cmd->pid) {
+		if (cmd->no_stdin)
+			dup_devnull(0);
+		else if (need_in) {
+			dup2(fdin[0], 0);
+			close_pair(fdin);
+		} else if (cmd->in) {
+			dup2(cmd->in, 0);
+			close(cmd->in);
+		}
+
+		if (cmd->no_stderr)
+			dup_devnull(2);
+		else if (need_err) {
+			dup2(fderr[1], 2);
+			close_pair(fderr);
+		}
+
+		if (cmd->no_stdout)
+			dup_devnull(1);
+		else if (cmd->stdout_to_stderr)
+			dup2(2, 1);
+		else if (need_out) {
+			dup2(fdout[1], 1);
+			close_pair(fdout);
+		} else if (cmd->out > 1) {
+			dup2(cmd->out, 1);
+			close(cmd->out);
+		}
+
+		if (cmd->dir && chdir(cmd->dir))
+			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
+			    cmd->dir, str_error_r(errno, sbuf, sizeof(sbuf)));
+		if (cmd->env) {
+			for (; *cmd->env; cmd->env++) {
+				if (strchr(*cmd->env, '='))
+					putenv((char*)*cmd->env);
+				else
+					unsetenv(*cmd->env);
+			}
+		}
+		if (cmd->preexec_cb)
+			cmd->preexec_cb();
+		if (cmd->exec_cmd) {
+			execv_cmd(cmd->argv);
+		} else {
+			execvp(cmd->argv[0], (char *const*) cmd->argv);
+		}
+		exit(127);
+	}
+
+	if (cmd->pid < 0) {
+		int err = errno;
+		if (need_in)
+			close_pair(fdin);
+		else if (cmd->in)
+			close(cmd->in);
+		if (need_out)
+			close_pair(fdout);
+		else if (cmd->out)
+			close(cmd->out);
+		if (need_err)
+			close_pair(fderr);
+		return err == ENOENT ?
+			-ERR_RUN_COMMAND_EXEC :
+			-ERR_RUN_COMMAND_FORK;
+	}
+
+	if (need_in)
+		close(fdin[0]);
+	else if (cmd->in)
+		close(cmd->in);
+
+	if (need_out)
+		close(fdout[1]);
+	else if (cmd->out)
+		close(cmd->out);
+
+	if (need_err)
+		close(fderr[1]);
+
+	return 0;
+}
+
+static int wait_or_whine(pid_t pid)
+{
+	char sbuf[STRERR_BUFSIZE];
+
+	for (;;) {
+		int status, code;
+		pid_t waiting = waitpid(pid, &status, 0);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			fprintf(stderr, " Error: waitpid failed (%s)",
+				str_error_r(errno, sbuf, sizeof(sbuf)));
+			return -ERR_RUN_COMMAND_WAITPID;
+		}
+		if (waiting != pid)
+			return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
+		if (WIFSIGNALED(status))
+			return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
+
+		if (!WIFEXITED(status))
+			return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
+		code = WEXITSTATUS(status);
+		switch (code) {
+		case 127:
+			return -ERR_RUN_COMMAND_EXEC;
+		case 0:
+			return 0;
+		default:
+			return -code;
+		}
+	}
+}
+
+int finish_command(struct child_process *cmd)
+{
+	return wait_or_whine(cmd->pid);
+}
+
+int run_command(struct child_process *cmd)
+{
+	int code = start_command(cmd);
+	if (code)
+		return code;
+	return finish_command(cmd);
+}
+
+static void prepare_run_command_v_opt(struct child_process *cmd,
+				      const char **argv,
+				      int opt)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->argv = argv;
+	cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
+	cmd->exec_cmd = opt & RUN_EXEC_CMD ? 1 : 0;
+	cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
+}
+
+int run_command_v_opt(const char **argv, int opt)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	return run_command(&cmd);
+}
diff --git a/tools/lib/subcmd/run-command.h b/tools/lib/subcmd/run-command.h
new file mode 100644
index 000000000..17d969c6a
--- /dev/null
+++ b/tools/lib/subcmd/run-command.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_RUN_COMMAND_H
+#define __SUBCMD_RUN_COMMAND_H
+
+#include <unistd.h>
+
+enum {
+	ERR_RUN_COMMAND_FORK = 10000,
+	ERR_RUN_COMMAND_EXEC,
+	ERR_RUN_COMMAND_PIPE,
+	ERR_RUN_COMMAND_WAITPID,
+	ERR_RUN_COMMAND_WAITPID_WRONG_PID,
+	ERR_RUN_COMMAND_WAITPID_SIGNAL,
+	ERR_RUN_COMMAND_WAITPID_NOEXIT,
+};
+#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
+
+struct child_process {
+	const char **argv;
+	pid_t pid;
+	/*
+	 * Using .in, .out, .err:
+	 * - Specify 0 for no redirections (child inherits stdin, stdout,
+	 *   stderr from parent).
+	 * - Specify -1 to have a pipe allocated as follows:
+	 *     .in: returns the writable pipe end; parent writes to it,
+	 *          the readable pipe end becomes child's stdin
+	 *     .out, .err: returns the readable pipe end; parent reads from
+	 *          it, the writable pipe end becomes child's stdout/stderr
+	 *   The caller of start_command() must close the returned FDs
+	 *   after it has completed reading from/writing to it!
+	 * - Specify > 0 to set a channel to a particular FD as follows:
+	 *     .in: a readable FD, becomes child's stdin
+	 *     .out: a writable FD, becomes child's stdout/stderr
+	 *     .err > 0 not supported
+	 *   The specified FD is closed by start_command(), even in case
+	 *   of errors!
+	 */
+	int in;
+	int out;
+	int err;
+	const char *dir;
+	const char *const *env;
+	unsigned no_stdin:1;
+	unsigned no_stdout:1;
+	unsigned no_stderr:1;
+	unsigned exec_cmd:1; /* if this is to be external sub-command */
+	unsigned stdout_to_stderr:1;
+	void (*preexec_cb)(void);
+};
+
+int start_command(struct child_process *);
+int finish_command(struct child_process *);
+int run_command(struct child_process *);
+
+#define RUN_COMMAND_NO_STDIN 1
+#define RUN_EXEC_CMD	     2	/*If this is to be external sub-command */
+#define RUN_COMMAND_STDOUT_TO_STDERR 4
+int run_command_v_opt(const char **argv, int opt);
+
+#endif /* __SUBCMD_RUN_COMMAND_H */
diff --git a/tools/lib/subcmd/sigchain.c b/tools/lib/subcmd/sigchain.c
new file mode 100644
index 000000000..f0fe3dbef
--- /dev/null
+++ b/tools/lib/subcmd/sigchain.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+#include "subcmd-util.h"
+#include "sigchain.h"
+
+#define SIGCHAIN_MAX_SIGNALS 32
+
+struct sigchain_signal {
+	sigchain_fun *old;
+	int n;
+	int alloc;
+};
+static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS];
+
+static void check_signum(int sig)
+{
+	if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS)
+		die("BUG: signal out of range: %d", sig);
+}
+
+static int sigchain_push(int sig, sigchain_fun f)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+
+	ALLOC_GROW(s->old, s->n + 1, s->alloc);
+	s->old[s->n] = signal(sig, f);
+	if (s->old[s->n] == SIG_ERR)
+		return -1;
+	s->n++;
+	return 0;
+}
+
+int sigchain_pop(int sig)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+	if (s->n < 1)
+		return 0;
+
+	if (signal(sig, s->old[s->n - 1]) == SIG_ERR)
+		return -1;
+	s->n--;
+	return 0;
+}
+
+void sigchain_push_common(sigchain_fun f)
+{
+	sigchain_push(SIGINT, f);
+	sigchain_push(SIGHUP, f);
+	sigchain_push(SIGTERM, f);
+	sigchain_push(SIGQUIT, f);
+	sigchain_push(SIGPIPE, f);
+}
diff --git a/tools/lib/subcmd/sigchain.h b/tools/lib/subcmd/sigchain.h
new file mode 100644
index 000000000..1ec663af4
--- /dev/null
+++ b/tools/lib/subcmd/sigchain.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_SIGCHAIN_H
+#define __SUBCMD_SIGCHAIN_H
+
+typedef void (*sigchain_fun)(int);
+
+int sigchain_pop(int sig);
+
+void sigchain_push_common(sigchain_fun f);
+
+#endif /* __SUBCMD_SIGCHAIN_H */
diff --git a/tools/lib/subcmd/subcmd-config.c b/tools/lib/subcmd/subcmd-config.c
new file mode 100644
index 000000000..84a7cf6c7
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "subcmd-config.h"
+
+#define UNDEFINED "SUBCMD_HAS_NOT_BEEN_INITIALIZED"
+
+struct subcmd_config subcmd_config = {
+	.exec_name	= UNDEFINED,
+	.prefix		= UNDEFINED,
+	.exec_path	= UNDEFINED,
+	.exec_path_env	= UNDEFINED,
+	.pager_env	= UNDEFINED,
+};
diff --git a/tools/lib/subcmd/subcmd-config.h b/tools/lib/subcmd/subcmd-config.h
new file mode 100644
index 000000000..9024dc17d
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SUBCMD_CONFIG_H
+#define __PERF_SUBCMD_CONFIG_H
+
+struct subcmd_config {
+	const char *exec_name;
+	const char *prefix;
+	const char *exec_path;
+	const char *exec_path_env;
+	const char *pager_env;
+};
+
+extern struct subcmd_config subcmd_config;
+
+#endif /* __PERF_SUBCMD_CONFIG_H */
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
new file mode 100644
index 000000000..b2aec04fc
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_UTIL_H
+#define __SUBCMD_UTIL_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NORETURN __attribute__((__noreturn__))
+
+static inline void report(const char *prefix, const char *err, va_list params)
+{
+	char msg[1024];
+	vsnprintf(msg, sizeof(msg), err, params);
+	fprintf(stderr, " %s%s\n", prefix, msg);
+}
+
+static NORETURN inline void die(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	report(" Fatal: ", err, params);
+	exit(128);
+	va_end(params);
+}
+
+#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+	do { \
+		if ((nr) > alloc) { \
+			if (alloc_nr(alloc) < (nr)) \
+				alloc = (nr); \
+			else \
+				alloc = alloc_nr(alloc); \
+			x = xrealloc((x), alloc * sizeof(*(x))); \
+		} \
+	} while(0)
+
+static inline void *xrealloc(void *ptr, size_t size)
+{
+	void *ret = realloc(ptr, size);
+	if (!ret)
+		die("Out of memory, realloc failed");
+	return ret;
+}
+
+#define astrcatf(out, fmt, ...)						\
+({									\
+	char *tmp = *(out);						\
+	if (asprintf((out), "%s" fmt, tmp ?: "", ## __VA_ARGS__) == -1)	\
+		die("asprintf failed");					\
+	free(tmp);							\
+})
+
+static inline void astrcat(char **out, const char *add)
+{
+	char *tmp = *out;
+
+	if (asprintf(out, "%s%s", tmp ?: "", add) == -1)
+		die("asprintf failed");
+
+	free(tmp);
+}
+
+#endif /* __SUBCMD_UTIL_H */
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
new file mode 100644
index 000000000..e335ac2b9
--- /dev/null
+++ b/tools/lib/symbol/kallsyms.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "symbol/kallsyms.h"
+#include "api/io.h"
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+u8 kallsyms2elf_type(char type)
+{
+	type = tolower(type);
+	return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT;
+}
+
+bool kallsyms__is_function(char symbol_type)
+{
+	symbol_type = toupper(symbol_type);
+	return symbol_type == 'T' || symbol_type == 'W';
+}
+
+static void read_to_eol(struct io *io)
+{
+	int ch;
+
+	for (;;) {
+		ch = io__get_char(io);
+		if (ch < 0 || ch == '\n')
+			return;
+	}
+}
+
+int kallsyms__parse(const char *filename, void *arg,
+		    int (*process_symbol)(void *arg, const char *name,
+					  char type, u64 start))
+{
+	struct io io;
+	char bf[BUFSIZ];
+	int err;
+
+	io.fd = open(filename, O_RDONLY, 0);
+
+	if (io.fd < 0)
+		return -1;
+
+	io__init(&io, io.fd, bf, sizeof(bf));
+
+	err = 0;
+	while (!io.eof) {
+		__u64 start;
+		int ch;
+		size_t i;
+		char symbol_type;
+		char symbol_name[KSYM_NAME_LEN + 1];
+
+		if (io__get_hex(&io, &start) != ' ') {
+			read_to_eol(&io);
+			continue;
+		}
+		symbol_type = io__get_char(&io);
+		if (io__get_char(&io) != ' ') {
+			read_to_eol(&io);
+			continue;
+		}
+		for (i = 0; i < sizeof(symbol_name); i++) {
+			ch = io__get_char(&io);
+			if (ch < 0 || ch == '\n')
+				break;
+			symbol_name[i]  = ch;
+		}
+		symbol_name[i]  = '\0';
+
+		err = process_symbol(arg, symbol_name, symbol_type, start);
+		if (err)
+			break;
+	}
+
+	close(io.fd);
+	return err;
+}
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
new file mode 100644
index 000000000..72ab98704
--- /dev/null
+++ b/tools/lib/symbol/kallsyms.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_KALLSYMS_H_
+#define __TOOLS_KALLSYMS_H_ 1
+
+#include <elf.h>
+#include <linux/ctype.h>
+#include <linux/types.h>
+
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 256
+#endif
+
+static inline u8 kallsyms2elf_binding(char type)
+{
+	if (type == 'W')
+		return STB_WEAK;
+
+	return isupper(type) ? STB_GLOBAL : STB_LOCAL;
+}
+
+u8 kallsyms2elf_type(char type);
+
+bool kallsyms__is_function(char symbol_type);
+
+int kallsyms__parse(const char *filename, void *arg,
+		    int (*process_symbol)(void *arg, const char *name,
+					  char type, u64 start));
+
+#endif /* __TOOLS_KALLSYMS_H_ */
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
new file mode 100644
index 000000000..7123c70b9
--- /dev/null
+++ b/tools/lib/traceevent/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+TRACEEVENT-CFLAGS
+libtraceevent-dynamic-list
+libtraceevent.so.*
diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build
new file mode 100644
index 000000000..f9a5d7957
--- /dev/null
+++ b/tools/lib/traceevent/Build
@@ -0,0 +1,8 @@
+libtraceevent-y += event-parse.o
+libtraceevent-y += event-plugin.o
+libtraceevent-y += trace-seq.o
+libtraceevent-y += parse-filter.o
+libtraceevent-y += parse-utils.o
+libtraceevent-y += kbuffer-parse.o
+libtraceevent-y += tep_strerror.o
+libtraceevent-y += event-parse-api.o
diff --git a/tools/lib/traceevent/Documentation/Makefile b/tools/lib/traceevent/Documentation/Makefile
new file mode 100644
index 000000000..aa72ab96c
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/Makefile
@@ -0,0 +1,207 @@
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+# This Makefile and manpage XSL files were taken from tools/perf/Documentation
+# and modified for libtraceevent.
+
+MAN3_TXT= \
+	$(wildcard libtraceevent-*.txt) \
+	libtraceevent.txt
+
+MAN_TXT = $(MAN3_TXT)
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+_DOC_MAN3=$(patsubst %.txt,%.3,$(MAN3_TXT))
+
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
+DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3))
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/libtraceevent-doc
+pdfdir?=$(prefix)/share/doc/libtraceevent-doc
+mandir?=$(prefix)/share/man
+man3dir=$(mandir)/man3
+
+ASCIIDOC=asciidoc
+ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_HTML = xhtml11
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC = asciidoctor
+ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
+ASCIIDOC_EXTRA += -a mansource="libtraceevent" -a manmanual="libtraceevent Manual"
+ASCIIDOC_HTML = xhtml5
+endif
+
+XMLTO=xmlto
+
+_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
+ifeq ($(_tmp_tool_path),)
+	missing_tools = $(ASCIIDOC)
+endif
+
+ifndef USE_ASCIIDOCTOR
+_tmp_tool_path := $(call get-executable,$(XMLTO))
+ifeq ($(_tmp_tool_path),)
+	missing_tools += $(XMLTO)
+endif
+endif
+
+#
+# For asciidoc ...
+#	-7.1.2,	no extra settings are needed.
+#	8.0-,	set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+#	1.69.0,		no extra settings are needed?
+#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
+#	1.71.1,		no extra settings are needed?
+#	1.72.0,		set DOCBOOK_XSL_172.
+#	1.73.0-,	set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff
+MANPAGE_XSL = manpage-1.72.xsl
+else
+	ifdef ASCIIDOC_NO_ROFF
+	# docbook-xsl after 1.72 needs the regular XSL, but will not
+	# pass-thru raw roff codes from asciidoc.conf, so turn them off.
+	ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff
+	endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+SHELL_PATH ?= $(SHELL)
+# Shell quote;
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+export DESTDIR DESTDIR_SQ
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+#   http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifneq ($(V),1)
+	QUIET_ASCIIDOC	= @echo '  ASCIIDOC '$@;
+	QUIET_XMLTO	= @echo '  XMLTO    '$@;
+	QUIET_SUBDIR0	= +@subdir=
+	QUIET_SUBDIR1	= ;$(NO_SUBDIR) \
+			   echo '  SUBDIR   ' $$subdir; \
+			  $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+endif
+endif
+
+all: html man
+
+man: man3
+man3: $(DOC_MAN3)
+
+html: $(MAN_HTML)
+
+$(MAN_HTML) $(DOC_MAN3): asciidoc.conf
+
+install: install-man
+
+check-man-tools:
+ifdef missing_tools
+	$(error "You need to install $(missing_tools) for man pages")
+endif
+
+do-install-man: man
+	$(call QUIET_INSTALL, Documentation-man) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \
+		$(INSTALL) -m 644 $(DOC_MAN3) $(DESTDIR)$(man3dir);
+
+install-man: check-man-tools man do-install-man
+
+uninstall: uninstall-man
+
+uninstall-man:
+	$(call QUIET_UNINST, Documentation-man) \
+		$(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3))
+
+
+ifdef missing_tools
+  DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
+else
+  DO_INSTALL_MAN = do-install-man
+endif
+
+CLEAN_FILES =					\
+	$(MAN_XML) $(addsuffix +,$(MAN_XML))	\
+	$(MAN_HTML) $(addsuffix +,$(MAN_HTML))	\
+	$(DOC_MAN3) *.3
+
+clean:
+	$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
+
+ifdef USE_ASCIIDOCTOR
+$(OUTPUT)%.3 : $(OUTPUT)%.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b manpage -d manpage \
+		$(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
+	mv $@+ $@
+endif
+
+$(OUTPUT)%.3 : $(OUTPUT)%.xml
+	$(QUIET_XMLTO)$(RM) $@ && \
+	$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+$(OUTPUT)%.xml : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b docbook -d manpage \
+		$(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
+	mv $@+ $@
diff --git a/tools/lib/traceevent/Documentation/asciidoc.conf b/tools/lib/traceevent/Documentation/asciidoc.conf
new file mode 100644
index 000000000..07595717f
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/asciidoc.conf
@@ -0,0 +1,120 @@
+## linktep: macro
+#
+# Usage: linktep:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show TEP link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linktep-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::tep-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::tep-asciidoc-no-roff[]
+
+ifdef::tep-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::tep-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">libtraceevent</refmiscinfo>
+<refmiscinfo class="version">{libtraceevent_version}</refmiscinfo>
+<refmiscinfo class="manual">libtraceevent Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+  <refname>{manname1}</refname>
+  <refname>{manname2}</refname>
+  <refname>{manname3}</refname>
+  <refname>{manname4}</refname>
+  <refname>{manname5}</refname>
+  <refname>{manname6}</refname>
+  <refname>{manname7}</refname>
+  <refname>{manname8}</refname>
+  <refname>{manname9}</refname>
+  <refname>{manname10}</refname>
+  <refname>{manname11}</refname>
+  <refname>{manname12}</refname>
+  <refname>{manname13}</refname>
+  <refname>{manname14}</refname>
+  <refname>{manname15}</refname>
+  <refname>{manname16}</refname>
+  <refname>{manname17}</refname>
+  <refname>{manname18}</refname>
+  <refname>{manname19}</refname>
+  <refname>{manname20}</refname>
+  <refname>{manname21}</refname>
+  <refname>{manname22}</refname>
+  <refname>{manname23}</refname>
+  <refname>{manname24}</refname>
+  <refname>{manname25}</refname>
+  <refname>{manname26}</refname>
+  <refname>{manname27}</refname>
+  <refname>{manname28}</refname>
+  <refname>{manname29}</refname>
+  <refname>{manname30}</refname>
+  <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linktep-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt b/tools/lib/traceevent/Documentation/libtraceevent-commands.txt
new file mode 100644
index 000000000..bec552001
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-commands.txt
@@ -0,0 +1,153 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_register_comm, tep_override_comm, tep_pid_is_registered,
+tep_data_comm_from_pid, tep_data_pid_from_comm, tep_cmdline_pid -
+Manage pid to process name mappings.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
+int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
+bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_);
+const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_pevent_, int _pid_);
+struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_pevent_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_);
+int *tep_cmdline_pid*(struct tep_handle pass:[*]_pevent_, struct cmdline pass:[*]_cmdline_);
+--
+
+DESCRIPTION
+-----------
+These functions can be used to handle the mapping between pid and process name.
+The library builds a cache of these mappings, which is used to display the name
+of the process, instead of its pid. This information can be retrieved from
+tracefs/saved_cmdlines file.
+
+The _tep_register_comm()_ function registers a _pid_ / process name mapping.
+If a command with the same _pid_ is already registered, an error is returned.
+The _pid_ argument is the process ID, the _comm_ argument is the process name,
+_tep_ is the event context. The _comm_ is duplicated internally.
+
+The _tep_override_comm()_ function registers a _pid_ / process name mapping.
+If a process with the same pid is already registered, the process name string is
+udapted with the new one. The _pid_ argument is the process ID, the _comm_
+argument is the process name, _tep_ is the event context. The _comm_ is
+duplicated internally.
+
+The _tep_is_pid_registered()_ function checks if a pid has a process name
+mapping registered. The _pid_ argument is the process ID, _tep_ is the event
+context.
+
+The _tep_data_comm_from_pid()_ function returns the process name for a given
+pid. The _pid_ argument is the process ID, _tep_ is the event context.
+The returned string should not be freed, but will be freed when the _tep_
+handler is closed.
+
+The _tep_data_pid_from_comm()_ function returns a pid for a given process name.
+The _comm_ argument is the process name, _tep_ is the event context.
+The argument _next_ is the cmdline structure to search for the next pid.
+As there may be more than one pid for a given process, the result of this call
+can be passed back into a recurring call in the _next_ parameter, to search for
+the next pid. If _next_ is NULL, it will return the first pid associated with
+the _comm_. The function performs a linear search, so it may be slow.
+
+The _tep_cmdline_pid()_ function returns the pid associated with a given
+_cmdline_. The _tep_ argument is the event context.
+
+RETURN VALUE
+------------
+_tep_register_comm()_ function returns 0 on success. In case of an error -1 is
+returned and errno is set to indicate the cause of the problem: ENOMEM, if there
+is not enough memory to duplicate the _comm_ or EEXIST if a mapping for this
+_pid_ is already registered.
+
+_tep_override_comm()_ function returns 0 on success. In case of an error -1 is
+returned and errno is set to indicate the cause of the problem: ENOMEM, if there
+is not enough memory to duplicate the _comm_.
+
+_tep_is_pid_registered()_ function returns true if the _pid_ has a process name
+mapped to it, false otherwise.
+
+_tep_data_comm_from_pid()_ function returns the process name as string, or the
+string "<...>" if there is no mapping for the given pid.
+
+_tep_data_pid_from_comm()_ function returns a pointer to a struct cmdline, that
+holds a pid for a given process, or NULL if none is found. This result can be
+passed back into a recurring call as the _next_ parameter of the function.
+
+_tep_cmdline_pid()_ functions returns the pid for the give cmdline. If _cmdline_
+ is NULL, then -1 is returned.
+
+EXAMPLE
+-------
+The following example registers pid for command "ls", in context of event _tep_
+and performs various searches for pid / process name mappings:
+[source,c]
+--
+#include <event-parse.h>
+...
+int ret;
+int ls_pid = 1021;
+struct tep_handle *tep = tep_alloc();
+...
+	ret = tep_register_comm(tep, "ls", ls_pid);
+	if (ret != 0 && errno == EEXIST)
+		ret = tep_override_comm(tep, "ls", ls_pid);
+	if (ret != 0) {
+		/* Failed to register pid / command mapping */
+	}
+...
+	if (tep_is_pid_registered(tep, ls_pid) == 0) {
+		/* Command mapping for ls_pid is not registered */
+	}
+...
+	const char *comm = tep_data_comm_from_pid(tep, ls_pid);
+	if (comm) {
+		/* Found process name for ls_pid */
+	}
+...
+	int pid;
+	struct cmdline *cmd = tep_data_pid_from_comm(tep, "ls", NULL);
+	while (cmd) {
+		pid = tep_cmdline_pid(tep, cmd);
+		/* Found pid for process "ls" */
+		cmd = tep_data_pid_from_comm(tep, "ls", cmd);
+	}
+--
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt b/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt
new file mode 100644
index 000000000..5ad70e43b
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt
@@ -0,0 +1,77 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_get_cpus, tep_set_cpus - Get / set the number of CPUs, which have a tracing
+buffer representing it. Note, the buffer may be empty.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_get_cpus*(struct tep_handle pass:[*]_tep_);
+void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_);
+--
+
+DESCRIPTION
+-----------
+The _tep_get_cpus()_ function gets the number of CPUs, which have a tracing
+buffer representing it. The _tep_ argument is trace event parser context.
+
+The _tep_set_cpus()_ function sets the number of CPUs, which have a tracing
+buffer representing it. The _tep_ argument is trace event parser context.
+The _cpu_ argument is the number of CPUs with tracing data.
+
+RETURN VALUE
+------------
+The _tep_get_cpus()_ functions returns the number of CPUs, which have tracing
+data recorded.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+	tep_set_cpus(tep, 5);
+...
+	printf("We have tracing data for %d CPUs", tep_get_cpus(tep));
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt
new file mode 100644
index 000000000..e64851b6e
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt
@@ -0,0 +1,78 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_read_number - Reads a number from raw data.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_);
+--
+
+DESCRIPTION
+-----------
+The _tep_read_number()_ function reads an integer from raw data, taking into
+account the endianness of the raw data and the current host. The _tep_ argument
+is the trace event parser context. The _ptr_ is a pointer to the raw data, where
+the integer is, and the _size_ is the size of the integer.
+
+RETURN VALUE
+------------
+The _tep_read_number()_ function returns the integer in the byte order of
+the current host. In case of an error, 0 is returned.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+void process_record(struct tep_record *record)
+{
+	int offset = 24;
+	int data = tep_read_number(tep, record->data + offset, 4);
+
+	/* Read the 4 bytes at the offset 24 of data as an integer */
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt
new file mode 100644
index 000000000..7bc062c9f
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt
@@ -0,0 +1,103 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_find_event,tep_find_event_by_name,tep_find_event_by_record -
+Find events by given key.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
+struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_);
+struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_);
+--
+
+DESCRIPTION
+-----------
+This set of functions can be used to search for an event, based on a given
+criteria. All functions require a pointer to a _tep_, trace event parser
+context.
+
+The _tep_find_event()_ function searches for an event by given event _id_. The
+event ID is assigned dynamically and can be viewed in event's format file,
+"ID" field.
+
+The tep_find_event_by_name()_ function searches for an event by given
+event _name_, under the system _sys_. If the _sys_ is NULL (not specified),
+the first event with _name_ is returned.
+
+The tep_find_event_by_record()_ function searches for an event from a given
+_record_.
+
+RETURN VALUE
+------------
+All these functions return a pointer to the found event, or NULL if there is no
+such event.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+struct tep_event *event;
+
+event = tep_find_event(tep, 1857);
+if (event == NULL) {
+	/* There is no event with ID 1857 */
+}
+
+event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
+if (event == NULL) {
+	/* There is no kvm_exit event, from kvm system */
+}
+
+void event_from_record(struct tep_record *record)
+{
+ struct tep_event *event = tep_find_event_by_record(tep, record);
+	if (event == NULL) {
+		/* There is no event from given record */
+	}
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt
new file mode 100644
index 000000000..6525092fc
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt
@@ -0,0 +1,99 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_get_event, tep_get_first_event, tep_get_events_count - Access events.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_);
+struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_);
+int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
+--
+
+DESCRIPTION
+-----------
+The _tep_get_event()_ function returns a pointer to event at the given _index_.
+The _tep_ argument is trace event parser context, the _index_ is the index of
+the requested event.
+
+The _tep_get_first_event()_ function returns a pointer to the first event.
+As events are stored in an array, this function returns the pointer to the
+beginning of the array. The _tep_ argument is trace event parser context.
+
+The _tep_get_events_count()_ function returns the number of the events
+in the array. The _tep_ argument is trace event parser context.
+
+RETURN VALUE
+------------
+The _tep_get_event()_ returns a pointer to the event located at _index_.
+NULL is returned in case of error, in case there are no events or _index_ is
+out of range.
+
+The _tep_get_first_event()_ returns a pointer to the first event. NULL is
+returned in case of error, or in case there are no events.
+
+The _tep_get_events_count()_ returns the number of the events. 0 is
+returned in case of error, or in case there are no events.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+int i,count = tep_get_events_count(tep);
+struct tep_event *event, *events = tep_get_first_event(tep);
+
+if (events == NULL) {
+	/* There are no events */
+} else {
+	for (i = 0; i < count; i++) {
+		event = (events+i);
+		/* process events[i] */
+	}
+
+	/* Get the last event */
+	event = tep_get_event(tep, count-1);
+}
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt
new file mode 100644
index 000000000..fba350e5a
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt
@@ -0,0 +1,122 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_list_events, tep_list_events_copy -
+Get list of events, sorted by given criteria.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum *tep_event_sort_type* {
+	_TEP_EVENT_SORT_ID_,
+	_TEP_EVENT_SORT_NAME_,
+	_TEP_EVENT_SORT_SYSTEM_,
+};
+
+struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
+struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
+--
+
+DESCRIPTION
+-----------
+The _tep_list_events()_ function returns an array of pointers to the events,
+sorted by the _sort_type_ criteria. The last element of the array is NULL.
+The returned memory must not be freed, it is managed by the library.
+The function is not thread safe. The _tep_ argument is trace event parser
+context. The _sort_type_ argument is the required sort criteria:
+[verse]
+--
+	_TEP_EVENT_SORT_ID_	- sort by the event ID.
+	_TEP_EVENT_SORT_NAME_	- sort by the event (name, system, id) triplet.
+	_TEP_EVENT_SORT_SYSTEM_	- sort by the event (system, name, id) triplet.
+--
+
+The _tep_list_events_copy()_ is a thread safe version of _tep_list_events()_.
+It has the same behavior, but the returned array is allocated internally and
+must be freed by the caller. Note that the content of the array must not be
+freed (see the EXAMPLE below).
+
+RETURN VALUE
+------------
+The _tep_list_events()_ function returns an array of pointers to events.
+In case of an error, NULL is returned. The returned array must not be freed,
+it is managed by the library.
+
+The _tep_list_events_copy()_ function returns an array of pointers to events.
+In case of an error, NULL is returned. The returned array must be freed by
+the caller.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+int i;
+struct tep_event_format **events;
+
+i=0;
+events = tep_list_events(tep, TEP_EVENT_SORT_ID);
+if (events == NULL) {
+	/* Failed to get the events, sorted by ID */
+} else {
+	while(events[i]) {
+		/* walk through the list of the events, sorted by ID */
+		i++;
+	}
+}
+
+i=0;
+events = tep_list_events_copy(tep, TEP_EVENT_SORT_NAME);
+if (events == NULL) {
+	/* Failed to get the events, sorted by name */
+} else {
+	while(events[i]) {
+		/* walk through the list of the events, sorted by name */
+		i++;
+	}
+	free(events);
+}
+
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
new file mode 100644
index 000000000..2c6a61811
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
@@ -0,0 +1,130 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_print_event - Writes event information into a trace sequence.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+*#include <trace-seq.h>*
+
+void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._)
+--
+
+DESCRIPTION
+-----------
+
+The _tep_print_event()_ function parses the event information of the given
+_record_ and writes it into the trace sequence _s_, according to the format
+string _fmt_. The desired information is specified after the format string.
+The _fmt_ is printf-like format string, following arguments are supported:
+[verse]
+--
+	TEP_PRINT_PID, "%d"  - PID of the event.
+	TEP_PRINT_CPU, "%d"  - Event CPU.
+	TEP_PRINT_COMM, "%s" - Event command string.
+	TEP_PRINT_NAME, "%s" - Event name.
+	TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more
+			fields - interrupt state, scheduling state,
+			current context, and preemption count.
+			Field 1 is the interrupt enabled state:
+				d : Interrupts are disabled
+				. : Interrupts are enabled
+				X : The architecture does not support this
+				    information
+			Field 2 is the "need resched" state.
+				N : The task is set to call the scheduler when
+				    possible, as another higher priority task
+				    may need to be scheduled in.
+				. : The task is not set to call the scheduler.
+			Field 3 is the context state.
+				. : Normal context
+				s : Soft interrupt context
+				h : Hard interrupt context
+				H : Hard interrupt context which triggered
+				    during soft interrupt context.
+				z : NMI context
+				Z : NMI context which triggered during hard
+				    interrupt context
+			Field 4 is the preemption count.
+				. : The preempt count is zero.
+			On preemptible kernels (where the task can be scheduled
+			out in arbitrary locations while in kernel context), the
+			preempt count, when non zero, will prevent the kernel
+			from scheduling out the current task. The preempt count
+			number is displayed when it is not zero.
+			Depending on the kernel, it may show other fields
+			(lock depth, or migration disabled, which are unique to
+			specialized kernels).
+	TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be
+			specified as part of this format string:
+			"%precision.divisord". Example:
+			"%3.1000d" - divide the time by 1000 and print the first
+			3 digits before the dot. Thus, the time stamp
+			"123456000" will be printed as "123.456"
+	TEP_PRINT_INFO, "%s" - event information.
+	TEP_PRINT_INFO_RAW, "%s" - event information, in raw format.
+
+--
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct trace_seq seq;
+trace_seq_init(&seq);
+struct tep_handle *tep = tep_alloc();
+...
+void print_my_event(struct tep_record *record)
+{
+	trace_seq_reset(&seq);
+	tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s",
+			TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU,
+			TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME,
+			TEP_PRINT_INFO);
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+	Header file to include in order to have access to trace sequences related APIs.
+	Trace sequences are used to allow a function to call several other functions
+	to create a string of data to use.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt
new file mode 100644
index 000000000..0896af5b9
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt
@@ -0,0 +1,118 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_find_common_field, tep_find_field, tep_find_any_field -
+Search for a field in an event.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
+struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_);
+struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
+--
+
+DESCRIPTION
+-----------
+These functions search for a field with given name in an event. The field
+returned can be used to find the field content from within a data record.
+
+The _tep_find_common_field()_ function searches for a common field with _name_
+in the _event_.
+
+The _tep_find_field()_ function searches for an event specific field with
+_name_ in the _event_.
+
+The _tep_find_any_field()_ function searches for any field with _name_ in the
+_event_.
+
+RETURN VALUE
+------------
+The _tep_find_common_field(), _tep_find_field()_ and _tep_find_any_field()_
+functions return a pointer to the found field, or NULL in case there is no field
+with the requested name.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+void get_htimer_info(struct tep_handle *tep, struct tep_record *record)
+{
+	struct tep_format_field *field;
+	struct tep_event *event;
+	long long softexpires;
+	int mode;
+	int pid;
+
+	event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
+
+	field = tep_find_common_field(event, "common_pid");
+	if (field == NULL) {
+		/* Cannot find "common_pid" field in the event */
+	} else {
+		/* Get pid from the data record */
+		pid = tep_read_number(tep, record->data + field->offset,
+				      field->size);
+	}
+
+	field = tep_find_field(event, "softexpires");
+	if (field == NULL) {
+		/* Cannot find "softexpires" event specific field in the event */
+	} else {
+		/* Get softexpires parameter from the data record */
+		softexpires = tep_read_number(tep, record->data + field->offset,
+					      field->size);
+	}
+
+	field = tep_find_any_field(event, "mode");
+	if (field == NULL) {
+		/* Cannot find "mode" field in the event */
+	} else
+	{
+		/* Get mode parameter from the data record */
+		mode = tep_read_number(tep, record->data + field->offset,
+				       field->size);
+	}
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt
new file mode 100644
index 000000000..6324f0d48
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt
@@ -0,0 +1,122 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_get_any_field_val, tep_get_common_field_val, tep_get_field_val,
+tep_get_field_raw - Get value of a field.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+*#include <trace-seq.h>*
+
+int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
+int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
+int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
+void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_);
+--
+
+DESCRIPTION
+-----------
+These functions can be used to find a field and retrieve its value.
+
+The _tep_get_any_field_val()_ function searches in the _record_ for a field
+with _name_, part of the _event_. If the field is found, its value is stored in
+_val_. If there is an error and _err_ is not zero, then an error string is
+written into _s_.
+
+The _tep_get_common_field_val()_ function does the same as
+_tep_get_any_field_val()_, but searches only in the common fields. This works
+for any event as all events include the common fields.
+
+The _tep_get_field_val()_ function does the same as _tep_get_any_field_val()_,
+but searches only in the event specific fields.
+
+The _tep_get_field_raw()_ function searches in the _record_ for a field with
+_name_, part of the _event_. If the field is found, a pointer to where the field
+exists in the record's raw data is returned. The size of the data is stored in
+_len_. If there is an error and _err_ is not zero, then an error string is
+written into _s_.
+
+RETURN VALUE
+------------
+The _tep_get_any_field_val()_, _tep_get_common_field_val()_ and
+_tep_get_field_val()_ functions return 0 on success, or -1 in case of an error.
+
+The _tep_get_field_raw()_ function returns a pointer to field's raw data, and
+places the length of this data in _len_. In case of an error NULL is returned.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
+...
+void process_record(struct tep_record *record)
+{
+	int len;
+	char *comm;
+	struct tep_event_format *event;
+	unsigned long long val;
+
+	event = tep_find_event_by_record(pevent, record);
+	if (event != NULL) {
+		if (tep_get_common_field_val(NULL, event, "common_type",
+					     record, &val, 0) == 0) {
+			/* Got the value of common type field */
+		}
+		if (tep_get_field_val(NULL, event, "pid", record, &val, 0) == 0) {
+			/* Got the value of pid specific field */
+		}
+		comm = tep_get_field_raw(NULL, event, "comm", record, &len, 0);
+		if (comm != NULL) {
+			/* Got a pointer to the comm event specific field */
+		}
+	}
+}
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+	Header file to include in order to have access to trace sequences
+	related APIs. Trace sequences are used to allow a function to call
+	several other functions to create a string of data to use.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt
new file mode 100644
index 000000000..9a9df98ac
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt
@@ -0,0 +1,126 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_print_field, tep_print_fields, tep_print_num_field, tep_print_func_field -
+Print the field content.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+*#include <trace-seq.h>*
+
+void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_);
+void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_);
+int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
+int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
+--
+
+DESCRIPTION
+-----------
+These functions print recorded field's data, according to the field's type.
+
+The _tep_print_field()_ function extracts from the recorded raw _data_ value of
+the _field_ and prints it into _s_, according to the field type.
+
+The _tep_print_fields()_ prints each field name followed by the record's field
+value according to the field's type:
+[verse]
+--
+"field1_name=field1_value field2_name=field2_value ..."
+--
+It iterates all fields of the _event_, and calls _tep_print_field()_ for each of
+them.
+
+The _tep_print_num_field()_ function prints a numeric field with given format
+string. A search is performed in the _event_ for a field with _name_. If such
+field is found, its value is extracted from the _record_ and is printed in the
+_s_, according to the given format string _fmt_. If the argument _err_ is
+non-zero, and an error occures - it is printed in the _s_.
+
+The _tep_print_func_field()_ function prints a function field with given format
+string.  A search is performed in the _event_ for a field with _name_. If such
+field is found, its value is extracted from the _record_. The value is assumed
+to be a function address, and a search is perform to find the name of this
+function. The function name (if found) and its address are printed in the _s_,
+according to the given format string _fmt_. If the argument _err_ is non-zero,
+and an error occures - it is printed in _s_.
+
+RETURN VALUE
+------------
+The _tep_print_num_field()_ and _tep_print_func_field()_ functions return 1
+on success, -1 in case of an error or 0 if the print buffer _s_ is full.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+struct trace_seq seq;
+trace_seq_init(&seq);
+struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
+...
+void process_record(struct tep_record *record)
+{
+	struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid");
+
+	trace_seq_reset(&seq);
+
+	/* Print the value of "common_pid" */
+	tep_print_field(&seq, record->data, field_pid);
+
+	/* Print all fields of the "hrtimer_start" event */
+	tep_print_fields(&seq, record->data, record->size, event);
+
+	/* Print the value of "expires" field with custom format string */
+	tep_print_num_field(&seq, " timer expires in %llu ", event, "expires", record, 0);
+
+	/* Print the address and the name of "function" field with custom format string */
+	tep_print_func_field(&seq, " timer function is %s ", event, "function", record, 0);
+ }
+ ...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+	Header file to include in order to have access to trace sequences related APIs.
+	Trace sequences are used to allow a function to call several other functions
+	to create a string of data to use.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt
new file mode 100644
index 000000000..64e9e25d3
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt
@@ -0,0 +1,81 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_read_number_field - Reads a number from raw data.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_);
+--
+
+DESCRIPTION
+-----------
+The _tep_read_number_field()_ function reads the value of the _field_ from the
+raw _data_ and stores it in the _value_. The function sets the _value_ according
+to the endianness of the raw data and the current machine and stores it in
+_value_.
+
+RETURN VALUE
+------------
+The _tep_read_number_field()_ function retunrs 0 in case of success, or -1 in
+case of an error.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
+...
+void process_record(struct tep_record *record)
+{
+	unsigned long long pid;
+	struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid");
+
+	if (tep_read_number_field(field_pid, record->data, &pid) != 0) {
+		/* Failed to get "common_pid" value */
+	}
+}
+...
+--
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt b/tools/lib/traceevent/Documentation/libtraceevent-fields.txt
new file mode 100644
index 000000000..1ccb531d5
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-fields.txt
@@ -0,0 +1,105 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_event_common_fields, tep_event_fields - Get a list of fields for an event.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_);
+struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_);
+--
+
+DESCRIPTION
+-----------
+The _tep_event_common_fields()_ function returns an array of pointers to common
+fields for the _event_. The array is allocated in the function and must be freed
+by free(). The last element of the array is NULL.
+
+The _tep_event_fields()_ function returns an array of pointers to event specific
+fields for the _event_. The array is allocated in the function and must be freed
+by free(). The last element of the array is NULL.
+
+RETURN VALUE
+------------
+Both _tep_event_common_fields()_ and _tep_event_fields()_ functions return
+an array of pointers to tep_format_field structures in case of success, or
+NULL in case of an error.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+int i;
+struct tep_format_field **fields;
+struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
+if (event != NULL) {
+	fields = tep_event_common_fields(event);
+	if (fields != NULL) {
+		i = 0;
+		while (fields[i]) {
+			/*
+			  walk through the list of the common fields
+			  of the kvm_exit event
+			*/
+			i++;
+		}
+		free(fields);
+	}
+	fields = tep_event_fields(event);
+	if (fields != NULL) {
+		i = 0;
+		while (fields[i]) {
+			/*
+			  walk through the list of the event specific
+			  fields of the kvm_exit event
+			*/
+			i++;
+		}
+		free(fields);
+	}
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt
new file mode 100644
index 000000000..f401ad311
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt
@@ -0,0 +1,91 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_is_file_bigendian, tep_set_file_bigendian - Get / set the endianness of the
+raw data being accessed by the tep handler.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum *tep_endian* {
+	TEP_LITTLE_ENDIAN = 0,
+	TEP_BIG_ENDIAN
+};
+
+bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_);
+void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
+
+--
+DESCRIPTION
+-----------
+The _tep_is_file_bigendian()_ function gets the endianness of the raw data,
+being accessed by the tep handler. The _tep_ argument is trace event parser
+context.
+
+The _tep_set_file_bigendian()_ function sets the endianness of raw data being
+accessed by the tep handler. The _tep_ argument is trace event parser context.
+[verse]
+--
+The _endian_ argument is the endianness:
+	_TEP_LITTLE_ENDIAN_ - the raw data is in little endian format,
+	_TEP_BIG_ENDIAN_ - the raw data is in big endian format.
+--
+RETURN VALUE
+------------
+The _tep_is_file_bigendian()_ function returns true if the data is in bigendian
+format, false otherwise.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+	tep_set_file_bigendian(tep, TEP_LITTLE_ENDIAN);
+...
+	if (tep_is_file_bigendian(tep)) {
+		/* The raw data is in big endian */
+	} else {
+		/* The raw data is in little endian */
+	}
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt b/tools/lib/traceevent/Documentation/libtraceevent-filter.txt
new file mode 100644
index 000000000..4a9962d8c
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-filter.txt
@@ -0,0 +1,209 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_filter_alloc, tep_filter_free, tep_filter_reset, tep_filter_make_string,
+tep_filter_copy, tep_filter_compare, tep_filter_match, tep_event_filtered,
+tep_filter_remove_event, tep_filter_strerror, tep_filter_add_filter_str -
+Event filter related APIs.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_);
+void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_);
+void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_);
+enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_);
+int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
+int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
+enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_);
+int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_);
+int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
+char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
+int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_);
+--
+
+DESCRIPTION
+-----------
+Filters can be attached to traced events. They can be used to filter out various
+events when outputting them. Each event can be filtered based on its parameters,
+described in the event's format file. This set of functions can be used to
+create, delete, modify and attach event filters.
+
+The _tep_filter_alloc()_ function creates a new event filter. The _tep_ argument
+is the trace event parser context.
+
+The _tep_filter_free()_ function frees an event filter and all resources that it
+had used.
+
+The _tep_filter_reset()_ function removes all rules from an event filter and
+resets it.
+
+The _tep_filter_add_filter_str()_ function adds a new rule to the _filter_. The
+_filter_str_ argument is the filter string, that contains the rule.
+
+The _tep_event_filtered()_ function checks if the event with _event_id_ has
+_filter_.
+
+The _tep_filter_remove_event()_ function removes a _filter_ for an event with
+_event_id_.
+
+The _tep_filter_match()_ function tests if a _record_ matches given _filter_.
+
+The _tep_filter_copy()_ function copies a _source_ filter into a _dest_ filter.
+
+The _tep_filter_compare()_ function compares two filers - _filter1_ and _filter2_.
+
+The _tep_filter_make_string()_ function constructs a string, displaying
+the _filter_ contents for given _event_id_.
+
+The _tep_filter_strerror()_ function copies the _filter_ error buffer into the
+given _buf_ with the size _buflen_. If the error buffer is empty, in the _buf_
+is copied a string, describing the error _err_.
+
+RETURN VALUE
+------------
+The _tep_filter_alloc()_ function returns a pointer to the newly created event
+filter, or NULL in case of an error.
+
+The _tep_filter_add_filter_str()_ function returns 0 if the rule was
+successfully added or a negative error code.  Use _tep_filter_strerror()_ to see
+actual error message in case of an error.
+
+The _tep_event_filtered()_ function returns 1 if the filter is found for given
+event, or 0 otherwise.
+
+The _tep_filter_remove_event()_ function returns 1 if the vent was removed, or
+0 if the event was not found.
+
+The _tep_filter_match()_ function returns _tep_errno_, according to the result:
+[verse]
+--
+_pass:[TEP_ERRNO__FILTER_MATCH]_	- filter found for event, the record matches.
+_pass:[TEP_ERRNO__FILTER_MISS]_		- filter found for event, the record does not match.
+_pass:[TEP_ERRNO__FILTER_NOT_FOUND]_	- no filter found for record's event.
+_pass:[TEP_ERRNO__NO_FILTER]_		- no rules in the filter.
+--
+or any other _tep_errno_, if an error occurred during the test.
+
+The _tep_filter_copy()_ function returns 0 on success or -1 if not all rules
+ were copied.
+
+The _tep_filter_compare()_ function returns 1 if the two filters hold the same
+content, or 0 if they do not.
+
+The _tep_filter_make_string()_ function returns a string, which must be freed
+with free(), or NULL in case of an error.
+
+The _tep_filter_strerror()_ function returns 0 if message was filled
+successfully, or -1 in case of an error.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+char errstr[200];
+int ret;
+
+struct tep_event_filter *filter = tep_filter_alloc(tep);
+struct tep_event_filter *filter1 = tep_filter_alloc(tep);
+ret = tep_filter_add_filter_str(filter, "sched/sched_wakeup:target_cpu==1");
+if(ret < 0) {
+	tep_filter_strerror(filter, ret, errstr, sizeof(errstr));
+	/* Failed to add a new rule to the filter, the error string is in errstr */
+}
+if (tep_filter_copy(filter1, filter) != 0) {
+	/* Failed to copy filter in filter1 */
+}
+...
+if (tep_filter_compare(filter, filter1) != 1) {
+	/* Both filters are different */
+}
+...
+void process_record(struct tep_handle *tep, struct tep_record *record)
+{
+	struct tep_event *event;
+	char *fstring;
+
+	event = tep_find_event_by_record(tep, record);
+
+	if (tep_event_filtered(filter, event->id) == 1) {
+		/* The event has filter */
+		fstring = tep_filter_make_string(filter, event->id);
+		if (fstring != NULL) {
+			/* The filter for the event is in fstring */
+			free(fstring);
+		}
+	}
+
+	switch (tep_filter_match(filter, record)) {
+	case TEP_ERRNO__FILTER_MATCH:
+		/* The filter matches the record */
+		break;
+	case TEP_ERRNO__FILTER_MISS:
+		/* The filter does not match the record */
+		break;
+	case TEP_ERRNO__FILTER_NOT_FOUND:
+		/* No filter found for record's event */
+		break;
+	case TEP_ERRNO__NO_FILTER:
+		/* There are no rules in the filter */
+		break
+	default:
+		/* An error occurred during the test */
+		break;
+	}
+
+	if (tep_filter_remove_event(filter, event->id) == 1) {
+		/* The event was removed from the filter */
+	}
+}
+
+...
+tep_filter_reset(filter);
+...
+tep_filter_free(filter);
+tep_filter_free(filter1);
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
new file mode 100644
index 000000000..f6aca0df2
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
@@ -0,0 +1,183 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_find_function, tep_find_function_address, tep_set_function_resolver,
+tep_reset_function_resolver, tep_register_function, tep_register_print_string -
+function related tep APIs
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+typedef char pass:[*](*tep_func_resolver_t*)(void pass:[*]_priv_, unsigned long long pass:[*]_addrp_, char pass:[**]_modp_);
+int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_);
+void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_);
+const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
+unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
+int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
+int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_);
+--
+
+DESCRIPTION
+-----------
+Some tools may have already a way to resolve the kernel functions. These APIs
+allow them to keep using it instead of duplicating all the entries inside.
+
+The _tep_func_resolver_t_ type is the prototype of the alternative kernel
+functions resolver. This function receives a pointer to its custom context
+(set with the _tep_set_function_resolver()_ call ) and the address of a kernel
+function, which has to be resolved. In case of success, it should return
+the name of the function and its module (if any) in _modp_.
+
+The _tep_set_function_resolver()_ function registers _func_ as an alternative
+kernel functions resolver. The _tep_ argument is trace event parser context.
+The _priv_ argument is a custom context of the _func_ function. The function
+resolver is used by the APIs _tep_find_function()_,
+_tep_find_function_address()_, and _tep_print_func_field()_ to resolve
+a function address to a function name.
+
+The _tep_reset_function_resolver()_ function resets the kernel functions
+resolver to the default function.  The _tep_ argument is trace event parser
+context.
+
+
+These APIs can be used to find function name and start address, by given
+address. The given address does not have to be exact, it will select
+the function that would contain it.
+
+The _tep_find_function()_ function returns the function name, which contains the
+given address _addr_. The _tep_ argument is the trace event parser context.
+
+The _tep_find_function_address()_ function returns the function start address,
+by given address _addr_. The _addr_ does not have to be exact, it will select
+the function that would contain it. The _tep_ argument is the trace event
+parser context.
+
+The _tep_register_function()_ function registers a function name mapped to an
+address and (optional) module. This mapping is used in case the function tracer
+or events have "%pS" parameter in its format string. It is common to pass in
+the kallsyms function names with their corresponding addresses with this
+function. The _tep_ argument is the trace event parser context. The _name_ is
+the name of the function, the string is copied internally. The _addr_ is the
+start address of the function. The _mod_ is the kernel module the function may
+be in (NULL for none).
+
+The _tep_register_print_string()_ function  registers a string by the address
+it was stored in the kernel. Some strings internal to the kernel with static
+address are passed to certain events. The "%s" in the event's format field
+which has an address needs to know what string would be at that address. The
+tep_register_print_string() supplies the parsing with the mapping between kernel
+addresses and those strings. The _tep_ argument is the trace event parser
+context. The _fmt_ is the string to register, it is copied internally.
+The _addr_ is the address the string was located at.
+
+
+RETURN VALUE
+------------
+The _tep_set_function_resolver()_ function returns 0 in case of success, or -1
+in case of an error.
+
+The _tep_find_function()_ function returns the function name, or NULL in case
+it cannot be found.
+
+The _tep_find_function_address()_ function returns the function start address,
+or 0 in case it cannot be found.
+
+The _tep_register_function()_ function returns 0 in case of success. In case of
+an error -1 is returned, and errno is set to the appropriate error number.
+
+The _tep_register_print_string()_ function returns 0 in case of success. In case
+of an error -1 is returned, and errno is set to the appropriate error number.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+char *my_resolve_kernel_addr(void *context,
+			     unsigned long long *addrp, char **modp)
+{
+	struct db *function_database = context;
+	struct symbol *sym = sql_lookup(function_database, *addrp);
+
+	if (!sym)
+		return NULL;
+
+	*modp = sym->module_name;
+	return sym->name;
+}
+
+void show_function( unsigned long long addr)
+{
+	unsigned long long fstart;
+	const char *fname;
+
+	if (tep_set_function_resolver(tep, my_resolve_kernel_addr,
+				      function_database) != 0) {
+		/* failed to register my_resolve_kernel_addr */
+	}
+
+	/* These APIs use my_resolve_kernel_addr() to resolve the addr */
+	fname = tep_find_function(tep, addr);
+	fstart = tep_find_function_address(tep, addr);
+
+	/*
+	   addr is in function named fname, starting at fstart address,
+	   at offset (addr - fstart)
+	*/
+
+	tep_reset_function_resolver(tep);
+
+}
+...
+	if (tep_register_function(tep, "kvm_exit",
+				(unsigned long long) 0x12345678, "kvm") != 0) {
+		/* Failed to register kvm_exit address mapping */
+	}
+...
+	if (tep_register_print_string(tep, "print string",
+				(unsigned long long) 0x87654321, NULL) != 0) {
+		/* Failed to register "print string" address mapping */
+	}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt
new file mode 100644
index 000000000..04840e244
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt
@@ -0,0 +1,88 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_find_function,tep_find_function_address - Find function name / start address.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
+unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
+--
+
+DESCRIPTION
+-----------
+These functions can be used to find function name and start address, by given
+address. The given address does not have to be exact, it will select the function
+that would contain it.
+
+The _tep_find_function()_ function returns the function name, which contains the
+given address _addr_. The _tep_ argument is the trace event parser context.
+
+The _tep_find_function_address()_ function returns the function start address,
+by given address _addr_. The _addr_ does not have to be exact, it will select the
+function that would contain it. The _tep_ argument is the trace event parser context.
+
+RETURN VALUE
+------------
+The _tep_find_function()_ function returns the function name, or NULL in case
+it cannot be found.
+
+The _tep_find_function_address()_ function returns the function start address,
+or 0 in case it cannot be found.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+void show_function( unsigned long long addr)
+{
+	const char *fname = tep_find_function(tep, addr);
+	unsigned long long fstart = tep_find_function_address(tep, addr);
+
+	/* addr is in function named fname, starting at fstart address, at offset (addr - fstart) */
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt
new file mode 100644
index 000000000..45b20172e
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt
@@ -0,0 +1,101 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage
+references of trace event parser context.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_handle pass:[*]*tep_alloc*(void);
+void *tep_free*(struct tep_handle pass:[*]_tep_);
+void *tep_ref*(struct tep_handle pass:[*]_tep_);
+void *tep_unref*(struct tep_handle pass:[*]_tep_);
+int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
+--
+
+DESCRIPTION
+-----------
+These are the main functions to create and destroy tep_handle - the main
+structure, representing the trace event parser context. This context is used as
+the input parameter of most library APIs.
+
+The _tep_alloc()_ function allocates and initializes the tep context.
+
+The _tep_free()_ function will decrement the reference of the _tep_ handler.
+When there is no more references, then it will free the handler, as well
+as clean up all its resources that it had used. The argument _tep_ is
+the pointer to the trace event parser context.
+
+The _tep_ref()_ function adds a reference to the _tep_ handler.
+
+The _tep_unref()_ function removes a reference from the _tep_ handler. When
+the last reference is removed, the _tep_ is destroyed, and all resources that
+it had used are cleaned up.
+
+The _tep_ref_get()_ functions gets the current references of the _tep_ handler.
+
+RETURN VALUE
+------------
+_tep_alloc()_ returns a pointer to a newly created tep_handle structure.
+NULL is returned in case there is not enough free memory to allocate it.
+
+_tep_ref_get()_ returns the current references of _tep_.
+If _tep_ is NULL, 0 is returned.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+
+...
+struct tep_handle *tep = tep_alloc();
+...
+int ref = tep_get_ref(tep);
+tep_ref(tep);
+if ( (ref+1) != tep_get_ref(tep)) {
+	/* Something wrong happened, the counter is not incremented by 1 */
+}
+tep_unref(tep);
+...
+tep_free(tep);
+...
+--
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt b/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt
new file mode 100644
index 000000000..615d117dc
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt
@@ -0,0 +1,102 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_get_header_page_size, tep_get_header_timestamp_size, tep_is_old_format -
+Get the data stored in the header page, in kernel context.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
+int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
+bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
+--
+DESCRIPTION
+-----------
+These functions retrieve information from kernel context, stored in tracefs
+events/header_page. Old kernels do not have header page info, so default values
+from user space context are used.
+
+The _tep_get_header_page_size()_ function returns the size of a long integer,
+in kernel context. The _tep_ argument is trace event parser context.
+This information is retrieved from tracefs events/header_page, "commit" field.
+
+The _tep_get_header_timestamp_size()_ function returns the size of timestamps,
+in kernel context. The _tep_ argument is trace event parser context. This
+information is retrieved from tracefs events/header_page, "timestamp" field.
+
+The _tep_is_old_format()_ function returns true if the kernel predates
+the addition of events/header_page, otherwise it returns false.
+
+RETURN VALUE
+------------
+The _tep_get_header_page_size()_ function returns the size of a long integer,
+in bytes.
+
+The _tep_get_header_timestamp_size()_ function returns the size of timestamps,
+in bytes.
+
+The _tep_is_old_format()_ function returns true, if an old kernel is used to
+generate the tracing data, which has no event/header_page. If the kernel is new,
+or _tep_ is NULL, false is returned.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+	int longsize;
+	int timesize;
+	bool old;
+
+	longsize = tep_get_header_page_size(tep);
+	timesize = tep_get_header_timestamp_size(tep);
+	old = tep_is_old_format(tep);
+
+	printf ("%s kernel is used to generate the tracing data.\n",
+		old?"Old":"New");
+	printf("The size of a long integer is %d bytes.\n", longsize);
+	printf("The timestamps size is %d bytes.\n", timesize);
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt
new file mode 100644
index 000000000..d5d375eb8
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt
@@ -0,0 +1,104 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_is_bigendian, tep_is_local_bigendian, tep_set_local_bigendian - Get / set
+the endianness of the local machine.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum *tep_endian* {
+	TEP_LITTLE_ENDIAN = 0,
+	TEP_BIG_ENDIAN
+};
+
+int *tep_is_bigendian*(void);
+bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_);
+void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
+--
+
+DESCRIPTION
+-----------
+
+The _tep_is_bigendian()_ gets the endianness of the machine, executing
+the function.
+
+The _tep_is_local_bigendian()_ function gets the endianness of the local
+machine, saved in the _tep_ handler. The _tep_ argument is the trace event
+parser context. This API is a bit faster than _tep_is_bigendian()_, as it
+returns cached endianness of the local machine instead of checking it each time.
+
+The _tep_set_local_bigendian()_ function sets the endianness of the local
+machine in the _tep_ handler. The _tep_ argument is trace event parser context.
+The _endian_ argument is the endianness:
+[verse]
+--
+	_TEP_LITTLE_ENDIAN_ - the machine is little endian,
+	_TEP_BIG_ENDIAN_ - the machine is big endian.
+--
+
+RETURN VALUE
+------------
+The _tep_is_bigendian()_ function returns non zero if the endianness of the
+machine, executing the code, is big endian and zero otherwise.
+
+The _tep_is_local_bigendian()_ function returns true, if the endianness of the
+local machine, saved in the _tep_ handler, is big endian, or false otherwise.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+	if (tep_is_bigendian())
+		tep_set_local_bigendian(tep, TEP_BIG_ENDIAN);
+	else
+		tep_set_local_bigendian(tep, TEP_LITTLE_ENDIAN);
+...
+	if (tep_is_local_bigendian(tep))
+		printf("This machine you are running on is bigendian\n");
+	else
+		printf("This machine you are running on is little endian\n");
+
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt
new file mode 100644
index 000000000..01d78ea25
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt
@@ -0,0 +1,78 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_get_long_size, tep_set_long_size - Get / set the size of a long integer on
+the machine, where the trace is generated, in bytes
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_);
+void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
+--
+
+DESCRIPTION
+-----------
+The _tep_get_long_size()_ function returns the size of a long integer on the machine,
+where the trace is generated. The _tep_ argument is trace event parser context.
+
+The _tep_set_long_size()_ function sets the size of a long integer on the machine,
+where the trace is generated. The _tep_ argument is trace event parser context.
+The _long_size_ is the size of a long integer, in bytes.
+
+RETURN VALUE
+------------
+The _tep_get_long_size()_ function returns the size of a long integer on the machine,
+where the trace is generated, in bytes.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+tep_set_long_size(tep, 4);
+...
+int long_size = tep_get_long_size(tep);
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt
new file mode 100644
index 000000000..452c0cfa1
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt
@@ -0,0 +1,82 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_get_page_size, tep_set_page_size - Get / set the size of a memory page on
+the machine, where the trace is generated
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
+void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
+--
+
+DESCRIPTION
+-----------
+The _tep_get_page_size()_ function returns the size of a memory page on
+the machine, where the trace is generated. The _tep_ argument is trace
+event parser context.
+
+The _tep_set_page_size()_ function stores in the _tep_ context the size of a
+memory page on the machine, where the trace is generated.
+The _tep_ argument is trace event parser context.
+The _page_size_ argument is the size of a memory page, in bytes.
+
+RETURN VALUE
+------------
+The _tep_get_page_size()_ function returns size of the memory page, in bytes.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <unistd.h>
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+	int page_size = getpagesize();
+
+	tep_set_page_size(tep, page_size);
+
+	printf("The page size for this machine is %d\n", tep_get_page_size(tep));
+
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt
new file mode 100644
index 000000000..f248114ca
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt
@@ -0,0 +1,90 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_parse_event, tep_parse_format - Parse the event format information
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
+enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
+--
+
+DESCRIPTION
+-----------
+The _tep_parse_event()_ function parses the event format and creates an event
+structure to quickly parse raw data for a given event. The _tep_ argument is
+the trace event parser context. The created event structure is stored in the
+_tep_ context. The _buf_ argument is a buffer with _size_, where the event
+format data is. The event format data can be taken from
+tracefs/events/.../.../format files. The _sys_ argument is the system of
+the event.
+
+The _tep_parse_format()_ function does the same as _tep_parse_event()_. The only
+difference is in the extra _eventp_ argument, where the newly created event
+structure is returned.
+
+RETURN VALUE
+------------
+Both _tep_parse_event()_ and _tep_parse_format()_ functions return 0 on success,
+or TEP_ERRNO__... in case of an error.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+char *buf;
+int size;
+struct tep_event *event = NULL;
+buf = read_file("/sys/kernel/tracing/events/ftrace/print/format", &size);
+if (tep_parse_event(tep, buf, size, "ftrace") != 0) {
+	/* Failed to parse the ftrace print format */
+}
+
+if (tep_parse_format(tep, &event, buf, size, "ftrace") != 0) {
+	/* Failed to parse the ftrace print format */
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt
new file mode 100644
index 000000000..c90f16c7d
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt
@@ -0,0 +1,82 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_parse_header_page - Parses the data stored in the header page.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_);
+--
+
+DESCRIPTION
+-----------
+The _tep_parse_header_page()_ function parses the header page data from _buf_,
+and initializes the _tep_, trace event parser context, with it. The buffer
+_buf_ is with _size_, and is supposed to be copied from
+tracefs/events/header_page.
+
+Some old kernels do not have header page info, in this case the
+_tep_parse_header_page()_ function  can be called with _size_ equal to 0. The
+_tep_ context is initialized with default values. The _long_size_ can be used in
+this use case, to set the size of a long integer to be used.
+
+RETURN VALUE
+------------
+The _tep_parse_header_page()_ function returns 0 in case of success, or -1
+in case of an error.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+char *buf;
+int size;
+buf = read_file("/sys/kernel/tracing/events/header_page", &size);
+if (tep_parse_header_page(tep, buf, size, sizeof(unsigned long)) != 0) {
+	/* Failed to parse the header page */
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
new file mode 100644
index 000000000..4d6394397
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
@@ -0,0 +1,122 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
+void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_,
+			   void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep,
+					       const char pass:[*]path,
+					       const char pass:[*]name,
+					       void pass:[*]data),
+			   void pass:[*]_data_);
+--
+
+DESCRIPTION
+-----------
+The _tep_load_plugins()_ function loads all plugins, located in the plugin
+directories. The _tep_ argument is trace event parser context.
+The plugin directories are :
+[verse]
+--
+	- Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST
+	- System's plugin directory, defined at the library compile time. It
+	  depends on the library installation prefix and usually is
+	  _(install_preffix)/lib/traceevent/plugins_
+	- Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
+	- User's plugin directory, located at _~/.local/lib/traceevent/plugins_
+	- Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST
+--
+Loading of plugins can be controlled by the _tep_flags_, using the
+_tep_set_flag()_ API:
+[verse]
+--
+	_TEP_DISABLE_SYS_PLUGINS_	- do not load plugins, located in
+					the system's plugin directory.
+	_TEP_DISABLE_PLUGINS_		- do not load any plugins.
+--
+The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if
+loading of all plugins is not the desired case.
+
+The _tep_unload_plugins()_ function unloads the plugins, previously loaded by
+_tep_load_plugins()_. The _tep_ argument is trace event parser context. The
+_plugin_list_ is the list of loaded plugins, returned by
+the _tep_load_plugins()_ function.
+
+The _tep_load_plugins_hook_ function walks through all directories with plugins
+and calls user specified _load_plugin()_ hook for each plugin file. Only files
+with given _suffix_ are considered to be plugins. The _data_ is a user specified
+context, passed to _load_plugin()_. Directories and the walk order are the same
+as in _tep_load_plugins()_ API.
+
+RETURN VALUE
+------------
+The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
+or NULL in case no plugins are loaded.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+struct tep_plugin_list *plugins = tep_load_plugins(tep);
+if (plugins == NULL) {
+	/* no plugins are loaded */
+}
+...
+tep_unload_plugins(plugins, tep);
+...
+void print_plugin(struct tep_handle *tep, const char *path,
+		  const char *name, void *data)
+{
+	pritnf("Found libtraceevent plugin %s/%s\n", path, name);
+}
+...
+tep_load_plugins_hook(tep, ".so", print_plugin, NULL);
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt b/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt
new file mode 100644
index 000000000..e9a69116c
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt
@@ -0,0 +1,137 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_data_type, tep_data_pid,tep_data_preempt_count, tep_data_flags -
+Extract common fields from a record.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum *trace_flag_type* {
+	_TRACE_FLAG_IRQS_OFF_,
+	_TRACE_FLAG_IRQS_NOSUPPORT_,
+	_TRACE_FLAG_NEED_RESCHED_,
+	_TRACE_FLAG_HARDIRQ_,
+	_TRACE_FLAG_SOFTIRQ_,
+};
+
+int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+--
+
+DESCRIPTION
+-----------
+This set of functions can be used to extract common fields from a record.
+
+The _tep_data_type()_ function gets the event id from the record _rec_.
+It reads the "common_type" field. The _tep_ argument is the trace event parser
+context.
+
+The _tep_data_pid()_ function gets the process id from the record _rec_.
+It reads the "common_pid" field. The _tep_ argument is the trace event parser
+context.
+
+The _tep_data_preempt_count()_ function gets the preemption count from the
+record _rec_. It reads the "common_preempt_count" field. The _tep_ argument is
+the trace event parser context.
+
+The _tep_data_flags()_ function gets the latency flags from the record _rec_.
+It reads the "common_flags" field. The _tep_ argument is the trace event parser
+context. Supported latency flags are:
+[verse]
+--
+	_TRACE_FLAG_IRQS_OFF_,		Interrupts are disabled.
+	_TRACE_FLAG_IRQS_NOSUPPORT_,	Reading IRQ flag is not supported by the architecture.
+	_TRACE_FLAG_NEED_RESCHED_,	Task needs rescheduling.
+	_TRACE_FLAG_HARDIRQ_,		Hard IRQ is running.
+	_TRACE_FLAG_SOFTIRQ_,		Soft IRQ is running.
+--
+
+RETURN VALUE
+------------
+The _tep_data_type()_ function returns an integer, representing the event id.
+
+The _tep_data_pid()_ function returns an integer, representing the process id
+
+The _tep_data_preempt_count()_ function returns an integer, representing the
+preemption count.
+
+The _tep_data_flags()_ function returns an integer, representing the latency
+flags. Look at the _trace_flag_type_ enum for supported flags.
+
+All these functions in case of an error return a negative integer.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+void process_record(struct tep_record *record)
+{
+	int data;
+
+	data = tep_data_type(tep, record);
+	if (data >= 0) {
+		/* Got the ID of the event */
+	}
+
+	data = tep_data_pid(tep, record);
+	if (data >= 0) {
+		/* Got the process ID */
+	}
+
+	data = tep_data_preempt_count(tep, record);
+	if (data >= 0) {
+		/* Got the preemption count */
+	}
+
+	data = tep_data_flags(tep, record);
+	if (data >= 0) {
+		/* Got the latency flags */
+	}
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt
new file mode 100644
index 000000000..53d37d72a
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt
@@ -0,0 +1,156 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_register_event_handler, tep_unregister_event_handler -  Register /
+unregisters a callback function to parse an event information.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum *tep_reg_handler* {
+	_TEP_REGISTER_SUCCESS_,
+	_TEP_REGISTER_SUCCESS_OVERWRITE_,
+};
+
+int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
+int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
+
+typedef int (*pass:[*]tep_event_handler_func*)(struct trace_seq pass:[*]s, struct tep_record pass:[*]record, struct tep_event pass:[*]event, void pass:[*]context);
+--
+
+DESCRIPTION
+-----------
+The _tep_register_event_handler()_ function registers a handler function,
+which is going to be called to parse the information for a given event.
+The _tep_ argument is the trace event parser context. The _id_ argument is
+the id of the event. The _sys_name_ argument is the name of the system,
+the event belongs to. The _event_name_ argument is the name of the event.
+If _id_ is >= 0, it is used to find the event, otherwise _sys_name_ and
+_event_name_ are used. The _func_ is a pointer to the function, which is going
+to be called to parse the event information. The _context_ argument is a pointer
+to the context data, which will be passed to the _func_. If a handler function
+for the same event is already registered, it will be overridden with the new
+one. This mechanism allows a developer to override the parsing of a given event.
+If for some reason the default print format is not sufficient, the developer
+can register a function for an event to be used to parse the data instead.
+
+The _tep_unregister_event_handler()_ function unregisters the handler function,
+previously registered with _tep_register_event_handler()_. The _tep_ argument
+is the trace event parser context. The _id_, _sys_name_, _event_name_, _func_,
+and _context_ are the same arguments, as when the callback function _func_ was
+registered.
+
+The _tep_event_handler_func_ is the type of the custom event handler
+function. The _s_ argument is the trace sequence, it can be used to create a
+custom string, describing the event. A _record_  to get the event from is passed
+as input parameter and also the _event_ - the handle to the record's event. The
+_context_ is custom context, set when the custom event handler is registered.
+
+RETURN VALUE
+------------
+The _tep_register_event_handler()_ function returns _TEP_REGISTER_SUCCESS_
+if the new handler is registered successfully or
+_TEP_REGISTER_SUCCESS_OVERWRITE_ if an existing handler is overwritten.
+If there is not  enough memory to complete the registration,
+TEP_ERRNO__MEM_ALLOC_FAILED is returned.
+
+The _tep_unregister_event_handler()_ function returns 0 if _func_ was removed
+successful or, -1 if the event was not found.
+
+The _tep_event_handler_func_ should return -1 in case of an error,
+or 0 otherwise.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+int timer_expire_handler(struct trace_seq *s, struct tep_record *record,
+			 struct tep_event *event, void *context)
+{
+	trace_seq_printf(s, "hrtimer=");
+
+	if (tep_print_num_field(s, "0x%llx", event, "timer", record, 0) == -1)
+		tep_print_num_field(s, "0x%llx", event, "hrtimer", record, 1);
+
+	trace_seq_printf(s, " now=");
+
+	tep_print_num_field(s, "%llu", event, "now", record, 1);
+
+	tep_print_func_field(s, " function=%s", event, "function", record, 0);
+
+	return 0;
+}
+...
+	int ret;
+
+	ret = tep_register_event_handler(tep, -1, "timer", "hrtimer_expire_entry",
+					 timer_expire_handler, NULL);
+	if (ret < 0) {
+		char buf[32];
+
+		tep_strerror(tep, ret, buf, 32)
+		printf("Failed to register handler for hrtimer_expire_entry: %s\n", buf);
+	} else {
+		switch (ret) {
+		case TEP_REGISTER_SUCCESS:
+			printf ("Registered handler for hrtimer_expire_entry\n");
+			break;
+		case TEP_REGISTER_SUCCESS_OVERWRITE:
+			printf ("Overwrote handler for hrtimer_expire_entry\n");
+			break;
+		}
+	}
+...
+	ret = tep_unregister_event_handler(tep, -1, "timer", "hrtimer_expire_entry",
+					   timer_expire_handler, NULL);
+	if ( ret )
+		printf ("Failed to unregister handler for hrtimer_expire_entry\n");
+
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+	Header file to include in order to have access to trace sequences
+	related APIs. Trace sequences are used to allow a function to call
+	several other functions to create a string of data to use.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt
new file mode 100644
index 000000000..708dce91e
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt
@@ -0,0 +1,155 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_register_print_function,tep_unregister_print_function -
+Registers / Unregisters a helper function.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum *tep_func_arg_type* {
+	TEP_FUNC_ARG_VOID,
+	TEP_FUNC_ARG_INT,
+	TEP_FUNC_ARG_LONG,
+	TEP_FUNC_ARG_STRING,
+	TEP_FUNC_ARG_PTR,
+	TEP_FUNC_ARG_MAX_TYPES
+};
+
+typedef unsigned long long (*pass:[*]tep_func_handler*)(struct trace_seq pass:[*]s, unsigned long long pass:[*]args);
+
+int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._);
+int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_);
+--
+
+DESCRIPTION
+-----------
+Some events may have helper functions in the print format arguments.
+This allows a plugin to dynamically create a way to process one of
+these functions.
+
+The _tep_register_print_function()_ registers such helper function. The _tep_
+argument is the trace event parser context. The _func_ argument  is a pointer
+to the helper function. The _ret_type_ argument is  the return type of the
+helper function, value from the _tep_func_arg_type_ enum. The _name_ is the name
+of the helper function, as seen in the print format arguments. The _..._ is a
+variable list of _tep_func_arg_type_ enums, the _func_ function arguments.
+This list must end with _TEP_FUNC_ARG_VOID_. See 'EXAMPLE' section.
+
+The _tep_unregister_print_function()_ unregisters a helper function, previously
+registered with _tep_register_print_function()_. The _tep_ argument is the
+trace event parser context. The _func_ and _name_ arguments are the same, used
+when the helper function was registered.
+
+The _tep_func_handler_ is the type of the helper function. The _s_ argument is
+the trace sequence, it can be used to create a custom string.
+The _args_  is a list of arguments, defined when the helper function was
+registered.
+
+RETURN VALUE
+------------
+The _tep_register_print_function()_ function returns 0 in case of success.
+In case of an error, TEP_ERRNO_... code is returned.
+
+The _tep_unregister_print_function()_ returns 0 in case of success, or -1 in
+case of an error.
+
+EXAMPLE
+-------
+Some events have internal functions calls, that appear in the print format
+output. For example "tracefs/events/i915/g4x_wm/format" has:
+[source,c]
+--
+print fmt: "pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
+	    ((REC->pipe) + 'A'), REC->frame, REC->scanline, REC->primary,
+	    REC->sprite, REC->cursor, yesno(REC->cxsr), REC->sr_plane,
+	    REC->sr_cursor, REC->sr_fbc, yesno(REC->hpll), REC->hpll_plane,
+	    REC->hpll_cursor, REC->hpll_fbc, yesno(REC->fbc)
+--
+Notice the call to function _yesno()_ in the print arguments. In the kernel
+context, this function has the following implementation:
+[source,c]
+--
+static const char *yesno(int x)
+{
+	static const char *yes = "yes";
+	static const char *no = "no";
+
+	return x ? yes : no;
+}
+--
+The user space event parser has no idea how to handle this _yesno()_ function.
+The _tep_register_print_function()_ API can be used to register a user space
+helper function, mapped to the kernel's _yesno()_:
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+static const char *yes_no_helper(int x)
+{
+	return x ? "yes" : "no";
+}
+...
+	if ( tep_register_print_function(tep,
+				    yes_no_helper,
+				    TEP_FUNC_ARG_STRING,
+				    "yesno",
+				    TEP_FUNC_ARG_INT,
+				    TEP_FUNC_ARG_VOID) != 0) {
+		/* Failed to register yes_no_helper function */
+	}
+
+/*
+   Now, when the event parser encounters this yesno() function, it will know
+   how to handle it.
+*/
+...
+	if (tep_unregister_print_function(tep, yes_no_helper, "yesno") != 0) {
+		/* Failed to unregister yes_no_helper function */
+	}
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+	Header file to include in order to have access to trace sequences
+	related APIs. Trace sequences are used to allow a function to call
+	several other functions to create a string of data to use.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt b/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt
new file mode 100644
index 000000000..b0599780b
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt
@@ -0,0 +1,104 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_set_flag, tep_clear_flag, tep_test_flag -
+Manage flags of trace event parser context.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+enum *tep_flag* {
+	_TEP_NSEC_OUTPUT_,
+	_TEP_DISABLE_SYS_PLUGINS_,
+	_TEP_DISABLE_PLUGINS_
+};
+void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
+void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
+bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
+--
+
+DESCRIPTION
+-----------
+Trace event parser context flags are defined in *enum tep_flag*:
+[verse]
+--
+_TEP_NSEC_OUTPUT_ - print event's timestamp in nano seconds, instead of micro seconds.
+_TEP_DISABLE_SYS_PLUGINS_ - disable plugins, located in system's plugin
+			directory. This directory is defined at library compile
+			time, and usually depends on library installation
+			prefix: (install_preffix)/lib/traceevent/plugins
+_TEP_DISABLE_PLUGINS_ - disable all library plugins:
+			- in system's plugin directory
+			- in directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
+			- in user's home directory, _~/.traceevent/plugins_
+--
+Note: plugin related flags must me set before calling _tep_load_plugins()_ API.
+
+The _tep_set_flag()_ function sets _flag_ to _tep_ context.
+
+The _tep_clear_flag()_ function clears _flag_ from _tep_ context.
+
+The _tep_test_flag()_ function tests if _flag_ is set to _tep_ context.
+
+RETURN VALUE
+------------
+_tep_test_flag()_ function returns true if _flag_ is set, false otherwise.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+/* Print timestamps in nanoseconds */
+tep_set_flag(tep,  TEP_NSEC_OUTPUT);
+...
+if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) {
+	/* print timestamps in nanoseconds */
+} else {
+	/* print timestamps in microseconds */
+}
+...
+/* Print timestamps in microseconds */
+tep_clear_flag(tep, TEP_NSEC_OUTPUT);
+...
+--
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt b/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt
new file mode 100644
index 000000000..ee4062a00
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt
@@ -0,0 +1,85 @@
+libtraceevent(3)
+================
+
+NAME
+----
+tep_strerror - Returns a string describing regular errno and tep error number.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
+
+--
+DESCRIPTION
+-----------
+The _tep_strerror()_ function converts tep error number into a human
+readable string.
+The _tep_ argument is trace event parser context. The _errnum_ is a regular
+errno, defined in errno.h, or a tep error number. The string, describing this
+error number is copied in the _buf_ argument. The _buflen_ argument is
+the size of the _buf_.
+
+It as a thread safe wrapper around strerror_r(). The library function has two
+different behaviors - POSIX and GNU specific. The _tep_strerror()_ API always
+behaves as the POSIX version - the error string is copied in the user supplied
+buffer.
+
+RETURN VALUE
+------------
+The _tep_strerror()_ function returns 0, if a valid _errnum_ is passed and the
+string is copied into _buf_. If _errnum_ is not a valid error number,
+-1 is returned and _buf_ is not modified.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+...
+struct tep_handle *tep = tep_alloc();
+...
+char buf[32];
+char *pool = calloc(1, 128);
+if (tep == NULL) {
+	tep_strerror(tep, TEP_ERRNO__MEM_ALLOC_FAILED, buf, 32);
+	printf ("The pool is not initialized, %s", buf);
+}
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt b/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt
new file mode 100644
index 000000000..8ac6aa174
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt
@@ -0,0 +1,158 @@
+libtraceevent(3)
+================
+
+NAME
+----
+trace_seq_init, trace_seq_destroy, trace_seq_reset, trace_seq_terminate,
+trace_seq_putc, trace_seq_puts, trace_seq_printf, trace_seq_vprintf,
+trace_seq_do_fprintf, trace_seq_do_printf -
+Initialize / destroy a trace sequence.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+*#include <trace-seq.h>*
+
+void *trace_seq_init*(struct trace_seq pass:[*]_s_);
+void *trace_seq_destroy*(struct trace_seq pass:[*]_s_);
+void *trace_seq_reset*(struct trace_seq pass:[*]_s_);
+void *trace_seq_terminate*(struct trace_seq pass:[*]_s_);
+int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_);
+int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_);
+int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, _..._);
+int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_);
+int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_);
+int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_);
+--
+
+DESCRIPTION
+-----------
+Trace sequences are used to allow a function to call several other functions
+to create a string of data to use.
+
+The _trace_seq_init()_ function initializes the trace sequence _s_.
+
+The _trace_seq_destroy()_ function destroys the trace sequence _s_ and frees
+all its resources that it had used.
+
+The _trace_seq_reset()_ function re-initializes the trace sequence _s_. All
+characters already written in _s_ will be deleted.
+
+The _trace_seq_terminate()_ function terminates the trace sequence _s_. It puts
+the null character pass:['\0'] at the end of the buffer.
+
+The _trace_seq_putc()_ function puts a single character _c_ in the trace
+sequence _s_.
+
+The _trace_seq_puts()_ function puts a NULL terminated string _str_ in the
+trace sequence _s_.
+
+The _trace_seq_printf()_ function puts a formated string _fmt _with
+variable arguments _..._ in the trace sequence _s_.
+
+The _trace_seq_vprintf()_ function puts a formated string _fmt _with
+list of arguments _args_ in the trace sequence _s_.
+
+The _trace_seq_do_printf()_ function prints the buffer of trace sequence _s_ to
+the standard output stdout.
+
+The _trace_seq_do_fprintf()_ function prints the buffer of trace sequence _s_
+to the given file _fp_.
+
+RETURN VALUE
+------------
+Both _trace_seq_putc()_ and _trace_seq_puts()_ functions return the number of
+characters put in the trace sequence, or 0 in case of an error
+
+Both _trace_seq_printf()_ and _trace_seq_vprintf()_ functions return 0 if the
+trace oversizes the buffer's free space, the number of characters printed, or
+a negative value in case of an error.
+
+Both _trace_seq_do_printf()_ and _trace_seq_do_fprintf()_ functions return the
+number of printed characters, or -1 in case of an error.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <event-parse.h>
+#include <trace-seq.h>
+...
+struct trace_seq seq;
+trace_seq_init(&seq);
+...
+void foo_seq_print(struct trace_seq *tseq, char *format, ...)
+{
+	va_list ap;
+	va_start(ap, format);
+	if (trace_seq_vprintf(tseq, format, ap) <= 0) {
+		/* Failed to print in the trace sequence */
+	}
+	va_end(ap);
+}
+
+trace_seq_reset(&seq);
+
+char *str = " MAN page example";
+if (trace_seq_puts(&seq, str) != strlen(str)) {
+	/* Failed to put str in the trace sequence */
+}
+if (trace_seq_putc(&seq, ':') != 1) {
+	/* Failed to put ':' in the trace sequence */
+}
+if (trace_seq_printf(&seq, " trace sequence: %d", 1) <= 0) {
+	/* Failed to print in the trace sequence */
+}
+foo_seq_print( &seq, "  %d\n", 2);
+
+trace_seq_terminate(&seq);
+...
+
+if (trace_seq_do_printf(&seq) < 0 ) {
+	/* Failed to print the sequence buffer to the standard output */
+}
+FILE *fp = fopen("trace.txt", "w");
+if (trace_seq_do_fprintf(&seq, fp) < 0 ) [
+	/* Failed to print the sequence buffer to the trace.txt file */
+}
+
+trace_seq_destroy(&seq);
+...
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+	Header file to include in order to have access to trace sequences related APIs.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_, _trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt
new file mode 100644
index 000000000..d530a7ce8
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/libtraceevent.txt
@@ -0,0 +1,192 @@
+libtraceevent(3)
+================
+
+NAME
+----
+libtraceevent - Linux kernel trace event library
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <event-parse.h>*
+
+Management of tep handler data structure and access of its members:
+	struct tep_handle pass:[*]*tep_alloc*(void);
+	void *tep_free*(struct tep_handle pass:[*]_tep_);
+	void *tep_ref*(struct tep_handle pass:[*]_tep_);
+	void *tep_unref*(struct tep_handle pass:[*]_tep_);
+	int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
+	void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
+	void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
+	bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_);
+	int *tep_get_cpus*(struct tep_handle pass:[*]_tep_);
+	void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_);
+	int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_);
+	void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
+	int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
+	void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
+	int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
+	int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
+	bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
+	int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
+
+Register / unregister APIs:
+	int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
+	int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
+	int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
+	int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_);
+	int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._);
+	int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_);
+
+Plugins management:
+	struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
+	void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+	char pass:[*]pass:[*]*tep_plugin_list_options*(void);
+	void *tep_plugin_free_options_list*(char pass:[*]pass:[*]_list_);
+	int *tep_plugin_add_options*(const char pass:[*]_name_, struct tep_plugin_option pass:[*]_options_);
+	void *tep_plugin_remove_options*(struct tep_plugin_option pass:[*]_options_);
+	void *tep_print_plugins*(struct trace_seq pass:[*]_s_, const char pass:[*]_prefix_, const char pass:[*]_suffix_, const struct tep_plugin_list pass:[*]_list_);
+
+Event related APIs:
+	struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_);
+	struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_);
+	int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
+	struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
+	struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
+	void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._);
+
+Event finding:
+	struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
+	struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_);
+	struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_);
+
+Parsing of event files:
+	int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_);
+	enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
+	enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
+
+APIs related to fields from event's format files:
+	struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_);
+	struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_);
+	void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_);
+	int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
+	int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
+	int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
+	int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_);
+
+Event fields printing:
+	void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_);
+	void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_);
+	int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
+	int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
+
+Event fields finding:
+	struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
+	struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_);
+	struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
+
+Functions resolver:
+	int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_);
+	void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_);
+	const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
+	unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
+
+Filter management:
+	struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_);
+	enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_);
+	enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_);
+	int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_);
+	int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
+	void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_);
+	void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_);
+	char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
+	int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
+	int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_);
+	int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
+
+Parsing various data from the records:
+	int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+	int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+	int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+	int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
+
+Command and task related APIs:
+	const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_tep_, int _pid_);
+	struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_);
+	int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
+	int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
+	bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_);
+	int *tep_cmdline_pid*(struct tep_handle pass:[*]_tep_, struct cmdline pass:[*]_cmdline_);
+
+Endian related APIs:
+	int *tep_is_bigendian*(void);
+	unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_);
+	bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_);
+	void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
+	bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_);
+	void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
+
+Trace sequences:
+*#include <trace-seq.h>*
+	void *trace_seq_init*(struct trace_seq pass:[*]_s_);
+	void *trace_seq_reset*(struct trace_seq pass:[*]_s_);
+	void *trace_seq_destroy*(struct trace_seq pass:[*]_s_);
+	int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, ...);
+	int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_);
+	int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_);
+	int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_);
+	void *trace_seq_terminate*(struct trace_seq pass:[*]_s_);
+	int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_);
+	int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_);
+--
+
+DESCRIPTION
+-----------
+The libtraceevent(3) library provides APIs to access kernel tracepoint events,
+located in the tracefs file system under the events directory.
+
+ENVIRONMENT
+-----------
+[verse]
+--
+TRACEEVENT_PLUGIN_DIR
+	Additional plugin directory. All shared object files, located in this directory will be loaded as traceevent plugins.
+--
+
+FILES
+-----
+[verse]
+--
+*event-parse.h*
+	Header file to include in order to have access to the library APIs.
+*trace-seq.h*
+	Header file to include in order to have access to trace sequences related APIs.
+	Trace sequences are used to allow a function to call several other functions
+	to create a string of data to use.
+*-ltraceevent*
+	Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_trace-cmd(1)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to  <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtraceevent is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/manpage-1.72.xsl b/tools/lib/traceevent/Documentation/manpage-1.72.xsl
new file mode 100644
index 000000000..b4d315cb8
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+     needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-base.xsl b/tools/lib/traceevent/Documentation/manpage-base.xsl
new file mode 100644
index 000000000..a264fa616
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+     git.docbook.backslash and git.docbook.dot params
+     must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB(',
+			      substring-after(@id,'-'),')',
+			      $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>sp&#10;</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB',
+			      substring-after(@arearefs,'-'),
+			      '. ',$git.docbook.backslash,'fR')"/>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl b/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 000000000..608eb5df6
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+     this makes literal text easier to distinguish in manpages
+     viewed on a tty -->
+<xsl:template match="literal">
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fB</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-normal.xsl b/tools/lib/traceevent/Documentation/manpage-normal.xsl
new file mode 100644
index 000000000..a48f5b11f
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot"	>.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl b/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 000000000..a63c7632a
--- /dev/null
+++ b/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles erroneous, inline .sp in manpage output of some
+     versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+     that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+  <xsl:variable name="content">
+    <xsl:apply-templates/>
+  </xsl:variable>
+  <xsl:value-of select="normalize-space($content)"/>
+  <xsl:if test="not(ancestor::authorblurb) and
+                not(ancestor::personblurb)">
+    <xsl:text>&#10;&#10;</xsl:text>
+  </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
new file mode 100644
index 000000000..c874c017c
--- /dev/null
+++ b/tools/lib/traceevent/Makefile
@@ -0,0 +1,300 @@
+# SPDX-License-Identifier: GPL-2.0
+# trace-cmd version
+EP_VERSION = 1
+EP_PATCHLEVEL = 1
+EP_EXTRAVERSION = 0
+
+# file format version
+FILE_VERSION = 6
+
+MAKEFLAGS += --no-print-directory
+
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
+$(call allow-override,PKG_CONFIG,pkg-config)
+
+EXT = -std=gnu99
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+  libdir_relative_temp = lib64
+else
+  libdir_relative_temp = lib
+endif
+
+libdir_relative ?= $(libdir_relative_temp)
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+man_dir = $(prefix)/share/man
+man_dir_SQ = '$(subst ','\'',$(man_dir))'
+pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) 		\
+			--variable pc_path pkg-config | tr ":" " "))
+includedir_relative = traceevent
+includedir = $(prefix)/include/$(includedir_relative)
+includedir_SQ = '$(subst ','\'',$(includedir))'
+
+export man_dir man_dir_SQ INSTALL
+export DESTDIR DESTDIR_SQ
+export EVENT_PARSE_VERSION
+
+include ../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+export prefix libdir src obj
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+CONFIG_INCLUDES = 
+CONFIG_LIBS	=
+CONFIG_FLAGS	=
+
+VERSION		= $(EP_VERSION)
+PATCHLEVEL	= $(EP_PATCHLEVEL)
+EXTRAVERSION	= $(EP_EXTRAVERSION)
+
+OBJ		= $@
+N		=
+
+EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
+
+LIB_TARGET  = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION)
+LIB_INSTALL = libtraceevent.a libtraceevent.so*
+LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL))
+
+INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+# Append required CFLAGS
+override CFLAGS += -fPIC
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Disable command line variables (CFLAGS) override from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+export srctree OUTPUT CC LD CFLAGS V
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+TE_IN      := $(OUTPUT)libtraceevent-in.o
+LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
+
+CMD_TARGETS = $(LIB_TARGET)
+
+TARGETS = $(CMD_TARGETS)
+
+all: all_cmd plugins
+
+all_cmd: $(CMD_TARGETS)
+
+$(TE_IN): force
+	$(Q)$(MAKE) $(build)=libtraceevent
+
+$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN)
+	$(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@
+	@ln -sf $(@F) $(OUTPUT)libtraceevent.so
+	@ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION)
+
+$(OUTPUT)libtraceevent.a: $(TE_IN)
+	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+
+$(OUTPUT)%.so: $(OUTPUT)%-in.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
+
+define make_version.h
+  (echo '/* This file is automatically generated. Do not modify. */';		\
+   echo \#define VERSION_CODE $(shell						\
+   expr $(VERSION) \* 256 + $(PATCHLEVEL));					\
+   echo '#define EXTRAVERSION ' $(EXTRAVERSION);				\
+   echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"';	\
+   echo '#define FILE_VERSION '$(FILE_VERSION);					\
+  ) > $1
+endef
+
+define update_version.h
+  ($(call make_version.h, $@.tmp);		\
+    if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
+      rm -f $@.tmp;				\
+    else					\
+      echo '  UPDATE                 $@';	\
+      mv -f $@.tmp $@;				\
+    fi);
+endef
+
+ep_version.h: force
+	$(Q)$(N)$(call update_version.h)
+
+VERSION_FILES = ep_version.h
+
+define update_dir
+  (echo $1 > $@.tmp;				\
+   if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
+     rm -f $@.tmp;				\
+   else						\
+     echo '  UPDATE                 $@';	\
+     mv -f $@.tmp $@;				\
+   fi);
+endef
+
+tags:	force
+	$(RM) tags
+	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
+
+TAGS:	force
+	$(RM) TAGS
+	find . -name '*.[ch]' | xargs etags \
+	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
+
+define do_install_mkdir
+	if [ ! -d '$(DESTDIR_SQ)$1' ]; then		\
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1';	\
+	fi
+endef
+
+define do_install
+	$(call do_install_mkdir,$2);			\
+	$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+PKG_CONFIG_SOURCE_FILE = libtraceevent.pc
+PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE))
+define do_install_pkgconfig_file
+	if [ -n "${pkgconfig_dir}" ]; then 					\
+		cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE};	\
+		sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; 		\
+		sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \
+		sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \
+		sed -i "s|HEADER_DIR|$(includedir)|g" ${PKG_CONFIG_FILE}; \
+		$(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); 	\
+	else 									\
+		(echo Failed to locate pkg-config directory) 1>&2;		\
+	fi
+endef
+
+install_lib: all_cmd install_plugins install_headers install_pkgconfig
+	$(call QUIET_INSTALL, $(LIB_TARGET)) \
+		$(call do_install_mkdir,$(libdir_SQ)); \
+		cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ)
+
+install_pkgconfig:
+	$(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \
+		$(call do_install_pkgconfig_file,$(prefix))
+
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,event-parse.h,$(includedir_SQ),644); \
+		$(call do_install,event-utils.h,$(includedir_SQ),644); \
+		$(call do_install,trace-seq.h,$(includedir_SQ),644); \
+		$(call do_install,kbuffer.h,$(includedir_SQ),644)
+
+install: install_lib
+
+clean: clean_plugins
+	$(call QUIET_CLEAN, libtraceevent) \
+		$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
+		$(RM) TRACEEVENT-CFLAGS tags TAGS; \
+		$(RM) $(PKG_CONFIG_FILE)
+
+PHONY += doc
+doc:
+	$(call descend,Documentation)
+
+PHONY += doc-clean
+doc-clean:
+	$(call descend,Documentation,clean)
+
+PHONY += doc-install
+doc-install:
+	$(call descend,Documentation,install)
+
+PHONY += doc-uninstall
+doc-uninstall:
+	$(call descend,Documentation,uninstall)
+
+PHONY += help
+help:
+	@echo 'Possible targets:'
+	@echo''
+	@echo '  all                 - default, compile the library and the'\
+				      'plugins'
+	@echo '  plugins             - compile the plugins'
+	@echo '  install             - install the library, the plugins,'\
+					'the header and pkgconfig files'
+	@echo '  clean               - clean the library and the plugins object files'
+	@echo '  doc                 - compile the documentation files - man'\
+					'and html pages, in the Documentation directory'
+	@echo '  doc-clean           - clean the documentation files'
+	@echo '  doc-install         - install the man pages'
+	@echo '  doc-uninstall       - uninstall the man pages'
+	@echo''
+
+PHONY += plugins
+plugins:
+	$(call descend,plugins)
+
+PHONY += install_plugins
+install_plugins:
+	$(call descend,plugins,install)
+
+PHONY += clean_plugins
+clean_plugins:
+	$(call descend,plugins,clean)
+
+force:
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
new file mode 100644
index 000000000..f8361e45d
--- /dev/null
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include "event-parse.h"
+#include "event-parse-local.h"
+#include "event-utils.h"
+
+/**
+ * tep_get_event - returns the event with the given index
+ * @tep: a handle to the tep_handle
+ * @index: index of the requested event, in the range 0 .. nr_events
+ *
+ * This returns pointer to the element of the events array with the given index
+ * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned.
+ */
+struct tep_event *tep_get_event(struct tep_handle *tep, int index)
+{
+	if (tep && tep->events && index < tep->nr_events)
+		return tep->events[index];
+
+	return NULL;
+}
+
+/**
+ * tep_get_first_event - returns the first event in the events array
+ * @tep: a handle to the tep_handle
+ *
+ * This returns pointer to the first element of the events array
+ * If @tep is NULL, NULL is returned.
+ */
+struct tep_event *tep_get_first_event(struct tep_handle *tep)
+{
+	return tep_get_event(tep, 0);
+}
+
+/**
+ * tep_get_events_count - get the number of defined events
+ * @tep: a handle to the tep_handle
+ *
+ * This returns number of elements in event array
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_events_count(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->nr_events;
+	return 0;
+}
+
+/**
+ * tep_set_flag - set event parser flag
+ * @tep: a handle to the tep_handle
+ * @flag: flag, or combination of flags to be set
+ * can be any combination from enum tep_flag
+ *
+ * This sets a flag or combination of flags from enum tep_flag
+ */
+void tep_set_flag(struct tep_handle *tep, int flag)
+{
+	if (tep)
+		tep->flags |= flag;
+}
+
+/**
+ * tep_clear_flag - clear event parser flag
+ * @tep: a handle to the tep_handle
+ * @flag: flag to be cleared
+ *
+ * This clears a tep flag
+ */
+void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag)
+{
+	if (tep)
+		tep->flags &= ~flag;
+}
+
+/**
+ * tep_test_flag - check the state of event parser flag
+ * @tep: a handle to the tep_handle
+ * @flag: flag to be checked
+ *
+ * This returns the state of the requested tep flag.
+ * Returns: true if the flag is set, false otherwise.
+ */
+bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
+{
+	if (tep)
+		return tep->flags & flag;
+	return false;
+}
+
+__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data)
+{
+	unsigned short swap;
+
+	if (!tep || tep->host_bigendian == tep->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 8) |
+		((data & (0xffULL << 8)) >> 8);
+
+	return swap;
+}
+
+__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data)
+{
+	unsigned int swap;
+
+	if (!tep || tep->host_bigendian == tep->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 24) |
+		((data & (0xffULL << 8)) << 8) |
+		((data & (0xffULL << 16)) >> 8) |
+		((data & (0xffULL << 24)) >> 24);
+
+	return swap;
+}
+
+__hidden  unsigned long long
+data2host8(struct tep_handle *tep, unsigned long long data)
+{
+	unsigned long long swap;
+
+	if (!tep || tep->host_bigendian == tep->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 56) |
+		((data & (0xffULL << 8)) << 40) |
+		((data & (0xffULL << 16)) << 24) |
+		((data & (0xffULL << 24)) << 8) |
+		((data & (0xffULL << 32)) >> 8) |
+		((data & (0xffULL << 40)) >> 24) |
+		((data & (0xffULL << 48)) >> 40) |
+		((data & (0xffULL << 56)) >> 56);
+
+	return swap;
+}
+
+/**
+ * tep_get_header_page_size - get size of the header page
+ * @tep: a handle to the tep_handle
+ *
+ * This returns size of the header page
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_header_page_size(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->header_page_size_size;
+	return 0;
+}
+
+/**
+ * tep_get_header_timestamp_size - get size of the timestamp in the header page
+ * @tep: a handle to the tep_handle
+ *
+ * This returns size of the timestamp in the header page
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_header_timestamp_size(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->header_page_ts_size;
+	return 0;
+}
+
+/**
+ * tep_get_cpus - get the number of CPUs
+ * @tep: a handle to the tep_handle
+ *
+ * This returns the number of CPUs
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_cpus(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->cpus;
+	return 0;
+}
+
+/**
+ * tep_set_cpus - set the number of CPUs
+ * @tep: a handle to the tep_handle
+ *
+ * This sets the number of CPUs
+ */
+void tep_set_cpus(struct tep_handle *tep, int cpus)
+{
+	if (tep)
+		tep->cpus = cpus;
+}
+
+/**
+ * tep_get_long_size - get the size of a long integer on the traced machine
+ * @tep: a handle to the tep_handle
+ *
+ * This returns the size of a long integer on the traced machine
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_long_size(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->long_size;
+	return 0;
+}
+
+/**
+ * tep_set_long_size - set the size of a long integer on the traced machine
+ * @tep: a handle to the tep_handle
+ * @size: size, in bytes, of a long integer
+ *
+ * This sets the size of a long integer on the traced machine
+ */
+void tep_set_long_size(struct tep_handle *tep, int long_size)
+{
+	if (tep)
+		tep->long_size = long_size;
+}
+
+/**
+ * tep_get_page_size - get the size of a memory page on the traced machine
+ * @tep: a handle to the tep_handle
+ *
+ * This returns the size of a memory page on the traced machine
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_page_size(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->page_size;
+	return 0;
+}
+
+/**
+ * tep_set_page_size - set the size of a memory page on the traced machine
+ * @tep: a handle to the tep_handle
+ * @_page_size: size of a memory page, in bytes
+ *
+ * This sets the size of a memory page on the traced machine
+ */
+void tep_set_page_size(struct tep_handle *tep, int _page_size)
+{
+	if (tep)
+		tep->page_size = _page_size;
+}
+
+/**
+ * tep_is_file_bigendian - return the endian of the file
+ * @tep: a handle to the tep_handle
+ *
+ * This returns true if the file is in big endian order
+ * If @tep is NULL, false is returned.
+ */
+bool tep_is_file_bigendian(struct tep_handle *tep)
+{
+	if (tep)
+		return (tep->file_bigendian == TEP_BIG_ENDIAN);
+	return false;
+}
+
+/**
+ * tep_set_file_bigendian - set if the file is in big endian order
+ * @tep: a handle to the tep_handle
+ * @endian: non zero, if the file is in big endian order
+ *
+ * This sets if the file is in big endian order
+ */
+void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian)
+{
+	if (tep)
+		tep->file_bigendian = endian;
+}
+
+/**
+ * tep_is_local_bigendian - return the endian of the saved local machine
+ * @tep: a handle to the tep_handle
+ *
+ * This returns true if the saved local machine in @tep is big endian.
+ * If @tep is NULL, false is returned.
+ */
+bool tep_is_local_bigendian(struct tep_handle *tep)
+{
+	if (tep)
+		return (tep->host_bigendian == TEP_BIG_ENDIAN);
+	return 0;
+}
+
+/**
+ * tep_set_local_bigendian - set the stored local machine endian order
+ * @tep: a handle to the tep_handle
+ * @endian: non zero, if the local host has big endian order
+ *
+ * This sets the endian order for the local machine.
+ */
+void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian)
+{
+	if (tep)
+		tep->host_bigendian = endian;
+}
+
+/**
+ * tep_is_old_format - get if an old kernel is used
+ * @tep: a handle to the tep_handle
+ *
+ * This returns true, if an old kernel is used to generate the tracing events or
+ * false if a new kernel is used. Old kernels did not have header page info.
+ * If @tep is NULL, false is returned.
+ */
+bool tep_is_old_format(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->old_format;
+	return false;
+}
+
+/**
+ * tep_set_test_filters - set a flag to test a filter string
+ * @tep: a handle to the tep_handle
+ * @test_filters: the new value of the test_filters flag
+ *
+ * This sets a flag to test a filter string. If this flag is set, when
+ * tep_filter_add_filter_str() API as called,it will print the filter string
+ * instead of adding it.
+ */
+void tep_set_test_filters(struct tep_handle *tep, int test_filters)
+{
+	if (tep)
+		tep->test_filters = test_filters;
+}
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
new file mode 100644
index 000000000..fd4bbcfbb
--- /dev/null
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#ifndef _PARSE_EVENTS_INT_H
+#define _PARSE_EVENTS_INT_H
+
+struct tep_cmdline;
+struct cmdline_list;
+struct func_map;
+struct func_list;
+struct event_handler;
+struct func_resolver;
+struct tep_plugins_dir;
+
+#define __hidden __attribute__((visibility ("hidden")))
+
+struct tep_handle {
+	int ref_count;
+
+	int header_page_ts_offset;
+	int header_page_ts_size;
+	int header_page_size_offset;
+	int header_page_size_size;
+	int header_page_data_offset;
+	int header_page_data_size;
+	int header_page_overwrite;
+
+	enum tep_endian file_bigendian;
+	enum tep_endian host_bigendian;
+
+	int old_format;
+
+	int cpus;
+	int long_size;
+	int page_size;
+
+	struct tep_cmdline *cmdlines;
+	struct cmdline_list *cmdlist;
+	int cmdline_count;
+
+	struct func_map *func_map;
+	struct func_resolver *func_resolver;
+	struct func_list *funclist;
+	unsigned int func_count;
+
+	struct printk_map *printk_map;
+	struct printk_list *printklist;
+	unsigned int printk_count;
+
+	struct tep_event **events;
+	int nr_events;
+	struct tep_event **sort_events;
+	enum tep_event_sort_type last_type;
+
+	int type_offset;
+	int type_size;
+
+	int pid_offset;
+	int pid_size;
+
+	int pc_offset;
+	int pc_size;
+
+	int flags_offset;
+	int flags_size;
+
+	int ld_offset;
+	int ld_size;
+
+	int test_filters;
+
+	int flags;
+
+	struct tep_format_field *bprint_ip_field;
+	struct tep_format_field *bprint_fmt_field;
+	struct tep_format_field *bprint_buf_field;
+
+	struct event_handler *handlers;
+	struct tep_function_handler *func_handlers;
+
+	/* cache */
+	struct tep_event *last_event;
+
+	struct tep_plugins_dir *plugins_dir;
+};
+
+enum tep_print_parse_type {
+	PRINT_FMT_STRING,
+	PRINT_FMT_ARG_DIGIT,
+	PRINT_FMT_ARG_POINTER,
+	PRINT_FMT_ARG_STRING,
+};
+
+struct tep_print_parse {
+	struct tep_print_parse	*next;
+
+	char				*format;
+	int				ls;
+	enum tep_print_parse_type	type;
+	struct tep_print_arg		*arg;
+	struct tep_print_arg		*len_as_arg;
+};
+
+void free_tep_event(struct tep_event *event);
+void free_tep_format_field(struct tep_format_field *field);
+void free_tep_plugin_paths(struct tep_handle *tep);
+
+unsigned short data2host2(struct tep_handle *tep, unsigned short data);
+unsigned int data2host4(struct tep_handle *tep, unsigned int data);
+unsigned long long data2host8(struct tep_handle *tep, unsigned long long data);
+
+/* access to the internal parser */
+int peek_char(void);
+void init_input_buf(const char *buf, unsigned long long size);
+unsigned long long get_input_buf_ptr(void);
+const char *get_input_buf(void);
+enum tep_event_type read_token(char **tok);
+void free_token(char *tok);
+
+#endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
new file mode 100644
index 000000000..fe58843d0
--- /dev/null
+++ b/tools/lib/traceevent/event-parse.c
@@ -0,0 +1,7603 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ *
+ *  The parts for function graph printing was taken and modified from the
+ *  Linux Kernel that were written by
+ *    - Copyright (C) 2009  Frederic Weisbecker,
+ *  Frederic Weisbecker gave his permission to relicense the code to
+ *  the Lesser General Public License.
+ */
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <limits.h>
+#include <linux/time64.h>
+
+#include <netinet/in.h>
+#include "event-parse.h"
+
+#include "event-parse-local.h"
+#include "event-utils.h"
+#include "trace-seq.h"
+
+static const char *input_buf;
+static unsigned long long input_buf_ptr;
+static unsigned long long input_buf_siz;
+
+static int is_flag_field;
+static int is_symbolic_field;
+
+static int show_warning = 1;
+
+#define do_warning(fmt, ...)				\
+	do {						\
+		if (show_warning)			\
+			warning(fmt, ##__VA_ARGS__);	\
+	} while (0)
+
+#define do_warning_event(event, fmt, ...)			\
+	do {							\
+		if (!show_warning)				\
+			continue;				\
+								\
+		if (event)					\
+			warning("[%s:%s] " fmt, event->system,	\
+				event->name, ##__VA_ARGS__);	\
+		else						\
+			warning(fmt, ##__VA_ARGS__);		\
+	} while (0)
+
+/**
+ * init_input_buf - init buffer for parsing
+ * @buf: buffer to parse
+ * @size: the size of the buffer
+ *
+ * Initializes the internal buffer that tep_read_token() will parse.
+ */
+__hidden void init_input_buf(const char *buf, unsigned long long size)
+{
+	input_buf = buf;
+	input_buf_siz = size;
+	input_buf_ptr = 0;
+}
+
+__hidden const char *get_input_buf(void)
+{
+	return input_buf;
+}
+
+__hidden unsigned long long get_input_buf_ptr(void)
+{
+	return input_buf_ptr;
+}
+
+struct event_handler {
+	struct event_handler		*next;
+	int				id;
+	const char			*sys_name;
+	const char			*event_name;
+	tep_event_handler_func		func;
+	void				*context;
+};
+
+struct func_params {
+	struct func_params	*next;
+	enum tep_func_arg_type	type;
+};
+
+struct tep_function_handler {
+	struct tep_function_handler	*next;
+	enum tep_func_arg_type		ret_type;
+	char				*name;
+	tep_func_handler		func;
+	struct func_params		*params;
+	int				nr_args;
+};
+
+static unsigned long long
+process_defined_func(struct trace_seq *s, void *data, int size,
+		     struct tep_event *event, struct tep_print_arg *arg);
+
+static void free_func_handle(struct tep_function_handler *func);
+
+void breakpoint(void)
+{
+	static int x;
+	x++;
+}
+
+static struct tep_print_arg *alloc_arg(void)
+{
+	return calloc(1, sizeof(struct tep_print_arg));
+}
+
+struct tep_cmdline {
+	char *comm;
+	int pid;
+};
+
+static int cmdline_cmp(const void *a, const void *b)
+{
+	const struct tep_cmdline *ca = a;
+	const struct tep_cmdline *cb = b;
+
+	if (ca->pid < cb->pid)
+		return -1;
+	if (ca->pid > cb->pid)
+		return 1;
+
+	return 0;
+}
+
+/* Looking for where to place the key */
+static int cmdline_slot_cmp(const void *a, const void *b)
+{
+	const struct tep_cmdline *ca = a;
+	const struct tep_cmdline *cb = b;
+	const struct tep_cmdline *cb1 = cb + 1;
+
+	if (ca->pid < cb->pid)
+		return -1;
+
+	if (ca->pid > cb->pid) {
+		if (ca->pid <= cb1->pid)
+			return 0;
+		return 1;
+	}
+
+	return 0;
+}
+
+struct cmdline_list {
+	struct cmdline_list	*next;
+	char			*comm;
+	int			pid;
+};
+
+static int cmdline_init(struct tep_handle *tep)
+{
+	struct cmdline_list *cmdlist = tep->cmdlist;
+	struct cmdline_list *item;
+	struct tep_cmdline *cmdlines;
+	int i;
+
+	cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count);
+	if (!cmdlines)
+		return -1;
+
+	i = 0;
+	while (cmdlist) {
+		cmdlines[i].pid = cmdlist->pid;
+		cmdlines[i].comm = cmdlist->comm;
+		i++;
+		item = cmdlist;
+		cmdlist = cmdlist->next;
+		free(item);
+	}
+
+	qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+
+	tep->cmdlines = cmdlines;
+	tep->cmdlist = NULL;
+
+	return 0;
+}
+
+static const char *find_cmdline(struct tep_handle *tep, int pid)
+{
+	const struct tep_cmdline *comm;
+	struct tep_cmdline key;
+
+	if (!pid)
+		return "<idle>";
+
+	if (!tep->cmdlines && cmdline_init(tep))
+		return "<not enough memory for cmdlines!>";
+
+	key.pid = pid;
+
+	comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
+		       sizeof(*tep->cmdlines), cmdline_cmp);
+
+	if (comm)
+		return comm->comm;
+	return "<...>";
+}
+
+/**
+ * tep_is_pid_registered - return if a pid has a cmdline registered
+ * @tep: a handle to the trace event parser context
+ * @pid: The pid to check if it has a cmdline registered with.
+ *
+ * Returns true if the pid has a cmdline mapped to it
+ * false otherwise.
+ */
+bool tep_is_pid_registered(struct tep_handle *tep, int pid)
+{
+	const struct tep_cmdline *comm;
+	struct tep_cmdline key;
+
+	if (!pid)
+		return true;
+
+	if (!tep->cmdlines && cmdline_init(tep))
+		return false;
+
+	key.pid = pid;
+
+	comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
+		       sizeof(*tep->cmdlines), cmdline_cmp);
+
+	if (comm)
+		return true;
+	return false;
+}
+
+/*
+ * If the command lines have been converted to an array, then
+ * we must add this pid. This is much slower than when cmdlines
+ * are added before the array is initialized.
+ */
+static int add_new_comm(struct tep_handle *tep,
+			const char *comm, int pid, bool override)
+{
+	struct tep_cmdline *cmdlines = tep->cmdlines;
+	struct tep_cmdline *cmdline;
+	struct tep_cmdline key;
+	char *new_comm;
+	int cnt;
+
+	if (!pid)
+		return 0;
+
+	/* avoid duplicates */
+	key.pid = pid;
+
+	cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count,
+			  sizeof(*tep->cmdlines), cmdline_cmp);
+	if (cmdline) {
+		if (!override) {
+			errno = EEXIST;
+			return -1;
+		}
+		new_comm = strdup(comm);
+		if (!new_comm) {
+			errno = ENOMEM;
+			return -1;
+		}
+		free(cmdline->comm);
+		cmdline->comm = new_comm;
+
+		return 0;
+	}
+
+	cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1));
+	if (!cmdlines) {
+		errno = ENOMEM;
+		return -1;
+	}
+	tep->cmdlines = cmdlines;
+
+	key.comm = strdup(comm);
+	if (!key.comm) {
+		errno = ENOMEM;
+		return -1;
+	}
+
+	if (!tep->cmdline_count) {
+		/* no entries yet */
+		tep->cmdlines[0] = key;
+		tep->cmdline_count++;
+		return 0;
+	}
+
+	/* Now find where we want to store the new cmdline */
+	cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count - 1,
+			  sizeof(*tep->cmdlines), cmdline_slot_cmp);
+
+	cnt = tep->cmdline_count;
+	if (cmdline) {
+		/* cmdline points to the one before the spot we want */
+		cmdline++;
+		cnt -= cmdline - tep->cmdlines;
+
+	} else {
+		/* The new entry is either before or after the list */
+		if (key.pid > tep->cmdlines[tep->cmdline_count - 1].pid) {
+			tep->cmdlines[tep->cmdline_count++] = key;
+			return 0;
+		}
+		cmdline = &tep->cmdlines[0];
+	}
+	memmove(cmdline + 1, cmdline, (cnt * sizeof(*cmdline)));
+	*cmdline = key;
+
+	tep->cmdline_count++;
+
+	return 0;
+}
+
+static int _tep_register_comm(struct tep_handle *tep,
+			      const char *comm, int pid, bool override)
+{
+	struct cmdline_list *item;
+
+	if (tep->cmdlines)
+		return add_new_comm(tep, comm, pid, override);
+
+	item = malloc(sizeof(*item));
+	if (!item)
+		return -1;
+
+	if (comm)
+		item->comm = strdup(comm);
+	else
+		item->comm = strdup("<...>");
+	if (!item->comm) {
+		free(item);
+		return -1;
+	}
+	item->pid = pid;
+	item->next = tep->cmdlist;
+
+	tep->cmdlist = item;
+	tep->cmdline_count++;
+
+	return 0;
+}
+
+/**
+ * tep_register_comm - register a pid / comm mapping
+ * @tep: a handle to the trace event parser context
+ * @comm: the command line to register
+ * @pid: the pid to map the command line to
+ *
+ * This adds a mapping to search for command line names with
+ * a given pid. The comm is duplicated. If a command with the same pid
+ * already exist, -1 is returned and errno is set to EEXIST
+ */
+int tep_register_comm(struct tep_handle *tep, const char *comm, int pid)
+{
+	return _tep_register_comm(tep, comm, pid, false);
+}
+
+/**
+ * tep_override_comm - register a pid / comm mapping
+ * @tep: a handle to the trace event parser context
+ * @comm: the command line to register
+ * @pid: the pid to map the command line to
+ *
+ * This adds a mapping to search for command line names with
+ * a given pid. The comm is duplicated. If a command with the same pid
+ * already exist, the command string is udapted with the new one
+ */
+int tep_override_comm(struct tep_handle *tep, const char *comm, int pid)
+{
+	if (!tep->cmdlines && cmdline_init(tep)) {
+		errno = ENOMEM;
+		return -1;
+	}
+	return _tep_register_comm(tep, comm, pid, true);
+}
+
+struct func_map {
+	unsigned long long		addr;
+	char				*func;
+	char				*mod;
+};
+
+struct func_list {
+	struct func_list	*next;
+	unsigned long long	addr;
+	char			*func;
+	char			*mod;
+};
+
+static int func_cmp(const void *a, const void *b)
+{
+	const struct func_map *fa = a;
+	const struct func_map *fb = b;
+
+	if (fa->addr < fb->addr)
+		return -1;
+	if (fa->addr > fb->addr)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * We are searching for a record in between, not an exact
+ * match.
+ */
+static int func_bcmp(const void *a, const void *b)
+{
+	const struct func_map *fa = a;
+	const struct func_map *fb = b;
+
+	if ((fa->addr == fb->addr) ||
+
+	    (fa->addr > fb->addr &&
+	     fa->addr < (fb+1)->addr))
+		return 0;
+
+	if (fa->addr < fb->addr)
+		return -1;
+
+	return 1;
+}
+
+static int func_map_init(struct tep_handle *tep)
+{
+	struct func_list *funclist;
+	struct func_list *item;
+	struct func_map *func_map;
+	int i;
+
+	func_map = malloc(sizeof(*func_map) * (tep->func_count + 1));
+	if (!func_map)
+		return -1;
+
+	funclist = tep->funclist;
+
+	i = 0;
+	while (funclist) {
+		func_map[i].func = funclist->func;
+		func_map[i].addr = funclist->addr;
+		func_map[i].mod = funclist->mod;
+		i++;
+		item = funclist;
+		funclist = funclist->next;
+		free(item);
+	}
+
+	qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp);
+
+	/*
+	 * Add a special record at the end.
+	 */
+	func_map[tep->func_count].func = NULL;
+	func_map[tep->func_count].addr = 0;
+	func_map[tep->func_count].mod = NULL;
+
+	tep->func_map = func_map;
+	tep->funclist = NULL;
+
+	return 0;
+}
+
+static struct func_map *
+__find_func(struct tep_handle *tep, unsigned long long addr)
+{
+	struct func_map *func;
+	struct func_map key;
+
+	if (!tep->func_map)
+		func_map_init(tep);
+
+	key.addr = addr;
+
+	func = bsearch(&key, tep->func_map, tep->func_count,
+		       sizeof(*tep->func_map), func_bcmp);
+
+	return func;
+}
+
+struct func_resolver {
+	tep_func_resolver_t	*func;
+	void			*priv;
+	struct func_map		map;
+};
+
+/**
+ * tep_set_function_resolver - set an alternative function resolver
+ * @tep: a handle to the trace event parser context
+ * @resolver: function to be used
+ * @priv: resolver function private state.
+ *
+ * Some tools may have already a way to resolve kernel functions, allow them to
+ * keep using it instead of duplicating all the entries inside tep->funclist.
+ */
+int tep_set_function_resolver(struct tep_handle *tep,
+			      tep_func_resolver_t *func, void *priv)
+{
+	struct func_resolver *resolver = malloc(sizeof(*resolver));
+
+	if (resolver == NULL)
+		return -1;
+
+	resolver->func = func;
+	resolver->priv = priv;
+
+	free(tep->func_resolver);
+	tep->func_resolver = resolver;
+
+	return 0;
+}
+
+/**
+ * tep_reset_function_resolver - reset alternative function resolver
+ * @tep: a handle to the trace event parser context
+ *
+ * Stop using whatever alternative resolver was set, use the default
+ * one instead.
+ */
+void tep_reset_function_resolver(struct tep_handle *tep)
+{
+	free(tep->func_resolver);
+	tep->func_resolver = NULL;
+}
+
+static struct func_map *
+find_func(struct tep_handle *tep, unsigned long long addr)
+{
+	struct func_map *map;
+
+	if (!tep->func_resolver)
+		return __find_func(tep, addr);
+
+	map = &tep->func_resolver->map;
+	map->mod  = NULL;
+	map->addr = addr;
+	map->func = tep->func_resolver->func(tep->func_resolver->priv,
+					     &map->addr, &map->mod);
+	if (map->func == NULL)
+		return NULL;
+
+	return map;
+}
+
+/**
+ * tep_find_function - find a function by a given address
+ * @tep: a handle to the trace event parser context
+ * @addr: the address to find the function with
+ *
+ * Returns a pointer to the function stored that has the given
+ * address. Note, the address does not have to be exact, it
+ * will select the function that would contain the address.
+ */
+const char *tep_find_function(struct tep_handle *tep, unsigned long long addr)
+{
+	struct func_map *map;
+
+	map = find_func(tep, addr);
+	if (!map)
+		return NULL;
+
+	return map->func;
+}
+
+/**
+ * tep_find_function_address - find a function address by a given address
+ * @tep: a handle to the trace event parser context
+ * @addr: the address to find the function with
+ *
+ * Returns the address the function starts at. This can be used in
+ * conjunction with tep_find_function to print both the function
+ * name and the function offset.
+ */
+unsigned long long
+tep_find_function_address(struct tep_handle *tep, unsigned long long addr)
+{
+	struct func_map *map;
+
+	map = find_func(tep, addr);
+	if (!map)
+		return 0;
+
+	return map->addr;
+}
+
+/**
+ * tep_register_function - register a function with a given address
+ * @tep: a handle to the trace event parser context
+ * @function: the function name to register
+ * @addr: the address the function starts at
+ * @mod: the kernel module the function may be in (NULL for none)
+ *
+ * This registers a function name with an address and module.
+ * The @func passed in is duplicated.
+ */
+int tep_register_function(struct tep_handle *tep, char *func,
+			  unsigned long long addr, char *mod)
+{
+	struct func_list *item = malloc(sizeof(*item));
+
+	if (!item)
+		return -1;
+
+	item->next = tep->funclist;
+	item->func = strdup(func);
+	if (!item->func)
+		goto out_free;
+
+	if (mod) {
+		item->mod = strdup(mod);
+		if (!item->mod)
+			goto out_free_func;
+	} else
+		item->mod = NULL;
+	item->addr = addr;
+
+	tep->funclist = item;
+	tep->func_count++;
+
+	return 0;
+
+out_free_func:
+	free(item->func);
+	item->func = NULL;
+out_free:
+	free(item);
+	errno = ENOMEM;
+	return -1;
+}
+
+/**
+ * tep_print_funcs - print out the stored functions
+ * @tep: a handle to the trace event parser context
+ *
+ * This prints out the stored functions.
+ */
+void tep_print_funcs(struct tep_handle *tep)
+{
+	int i;
+
+	if (!tep->func_map)
+		func_map_init(tep);
+
+	for (i = 0; i < (int)tep->func_count; i++) {
+		printf("%016llx %s",
+		       tep->func_map[i].addr,
+		       tep->func_map[i].func);
+		if (tep->func_map[i].mod)
+			printf(" [%s]\n", tep->func_map[i].mod);
+		else
+			printf("\n");
+	}
+}
+
+struct printk_map {
+	unsigned long long		addr;
+	char				*printk;
+};
+
+struct printk_list {
+	struct printk_list	*next;
+	unsigned long long	addr;
+	char			*printk;
+};
+
+static int printk_cmp(const void *a, const void *b)
+{
+	const struct printk_map *pa = a;
+	const struct printk_map *pb = b;
+
+	if (pa->addr < pb->addr)
+		return -1;
+	if (pa->addr > pb->addr)
+		return 1;
+
+	return 0;
+}
+
+static int printk_map_init(struct tep_handle *tep)
+{
+	struct printk_list *printklist;
+	struct printk_list *item;
+	struct printk_map *printk_map;
+	int i;
+
+	printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1));
+	if (!printk_map)
+		return -1;
+
+	printklist = tep->printklist;
+
+	i = 0;
+	while (printklist) {
+		printk_map[i].printk = printklist->printk;
+		printk_map[i].addr = printklist->addr;
+		i++;
+		item = printklist;
+		printklist = printklist->next;
+		free(item);
+	}
+
+	qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp);
+
+	tep->printk_map = printk_map;
+	tep->printklist = NULL;
+
+	return 0;
+}
+
+static struct printk_map *
+find_printk(struct tep_handle *tep, unsigned long long addr)
+{
+	struct printk_map *printk;
+	struct printk_map key;
+
+	if (!tep->printk_map && printk_map_init(tep))
+		return NULL;
+
+	key.addr = addr;
+
+	printk = bsearch(&key, tep->printk_map, tep->printk_count,
+			 sizeof(*tep->printk_map), printk_cmp);
+
+	return printk;
+}
+
+/**
+ * tep_register_print_string - register a string by its address
+ * @tep: a handle to the trace event parser context
+ * @fmt: the string format to register
+ * @addr: the address the string was located at
+ *
+ * This registers a string by the address it was stored in the kernel.
+ * The @fmt passed in is duplicated.
+ */
+int tep_register_print_string(struct tep_handle *tep, const char *fmt,
+			      unsigned long long addr)
+{
+	struct printk_list *item = malloc(sizeof(*item));
+	char *p;
+
+	if (!item)
+		return -1;
+
+	item->next = tep->printklist;
+	item->addr = addr;
+
+	/* Strip off quotes and '\n' from the end */
+	if (fmt[0] == '"')
+		fmt++;
+	item->printk = strdup(fmt);
+	if (!item->printk)
+		goto out_free;
+
+	p = item->printk + strlen(item->printk) - 1;
+	if (*p == '"')
+		*p = 0;
+
+	p -= 2;
+	if (strcmp(p, "\\n") == 0)
+		*p = 0;
+
+	tep->printklist = item;
+	tep->printk_count++;
+
+	return 0;
+
+out_free:
+	free(item);
+	errno = ENOMEM;
+	return -1;
+}
+
+/**
+ * tep_print_printk - print out the stored strings
+ * @tep: a handle to the trace event parser context
+ *
+ * This prints the string formats that were stored.
+ */
+void tep_print_printk(struct tep_handle *tep)
+{
+	int i;
+
+	if (!tep->printk_map)
+		printk_map_init(tep);
+
+	for (i = 0; i < (int)tep->printk_count; i++) {
+		printf("%016llx %s\n",
+		       tep->printk_map[i].addr,
+		       tep->printk_map[i].printk);
+	}
+}
+
+static struct tep_event *alloc_event(void)
+{
+	return calloc(1, sizeof(struct tep_event));
+}
+
+static int add_event(struct tep_handle *tep, struct tep_event *event)
+{
+	int i;
+	struct tep_event **events = realloc(tep->events, sizeof(event) *
+					    (tep->nr_events + 1));
+	if (!events)
+		return -1;
+
+	tep->events = events;
+
+	for (i = 0; i < tep->nr_events; i++) {
+		if (tep->events[i]->id > event->id)
+			break;
+	}
+	if (i < tep->nr_events)
+		memmove(&tep->events[i + 1],
+			&tep->events[i],
+			sizeof(event) * (tep->nr_events - i));
+
+	tep->events[i] = event;
+	tep->nr_events++;
+
+	event->tep = tep;
+
+	return 0;
+}
+
+static int event_item_type(enum tep_event_type type)
+{
+	switch (type) {
+	case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE:
+		return 1;
+	case TEP_EVENT_ERROR ... TEP_EVENT_DELIM:
+	default:
+		return 0;
+	}
+}
+
+static void free_flag_sym(struct tep_print_flag_sym *fsym)
+{
+	struct tep_print_flag_sym *next;
+
+	while (fsym) {
+		next = fsym->next;
+		free(fsym->value);
+		free(fsym->str);
+		free(fsym);
+		fsym = next;
+	}
+}
+
+static void free_arg(struct tep_print_arg *arg)
+{
+	struct tep_print_arg *farg;
+
+	if (!arg)
+		return;
+
+	switch (arg->type) {
+	case TEP_PRINT_ATOM:
+		free(arg->atom.atom);
+		break;
+	case TEP_PRINT_FIELD:
+		free(arg->field.name);
+		break;
+	case TEP_PRINT_FLAGS:
+		free_arg(arg->flags.field);
+		free(arg->flags.delim);
+		free_flag_sym(arg->flags.flags);
+		break;
+	case TEP_PRINT_SYMBOL:
+		free_arg(arg->symbol.field);
+		free_flag_sym(arg->symbol.symbols);
+		break;
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
+		free_arg(arg->hex.field);
+		free_arg(arg->hex.size);
+		break;
+	case TEP_PRINT_INT_ARRAY:
+		free_arg(arg->int_array.field);
+		free_arg(arg->int_array.count);
+		free_arg(arg->int_array.el_size);
+		break;
+	case TEP_PRINT_TYPE:
+		free(arg->typecast.type);
+		free_arg(arg->typecast.item);
+		break;
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+		free(arg->string.string);
+		break;
+	case TEP_PRINT_BITMASK:
+		free(arg->bitmask.bitmask);
+		break;
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+		free(arg->dynarray.index);
+		break;
+	case TEP_PRINT_OP:
+		free(arg->op.op);
+		free_arg(arg->op.left);
+		free_arg(arg->op.right);
+		break;
+	case TEP_PRINT_FUNC:
+		while (arg->func.args) {
+			farg = arg->func.args;
+			arg->func.args = farg->next;
+			free_arg(farg);
+		}
+		break;
+
+	case TEP_PRINT_NULL:
+	default:
+		break;
+	}
+
+	free(arg);
+}
+
+static enum tep_event_type get_type(int ch)
+{
+	if (ch == '\n')
+		return TEP_EVENT_NEWLINE;
+	if (isspace(ch))
+		return TEP_EVENT_SPACE;
+	if (isalnum(ch) || ch == '_')
+		return TEP_EVENT_ITEM;
+	if (ch == '\'')
+		return TEP_EVENT_SQUOTE;
+	if (ch == '"')
+		return TEP_EVENT_DQUOTE;
+	if (!isprint(ch))
+		return TEP_EVENT_NONE;
+	if (ch == '(' || ch == ')' || ch == ',')
+		return TEP_EVENT_DELIM;
+
+	return TEP_EVENT_OP;
+}
+
+static int __read_char(void)
+{
+	if (input_buf_ptr >= input_buf_siz)
+		return -1;
+
+	return input_buf[input_buf_ptr++];
+}
+
+/**
+ * peek_char - peek at the next character that will be read
+ *
+ * Returns the next character read, or -1 if end of buffer.
+ */
+__hidden int peek_char(void)
+{
+	if (input_buf_ptr >= input_buf_siz)
+		return -1;
+
+	return input_buf[input_buf_ptr];
+}
+
+static int extend_token(char **tok, char *buf, int size)
+{
+	char *newtok = realloc(*tok, size);
+
+	if (!newtok) {
+		free(*tok);
+		*tok = NULL;
+		return -1;
+	}
+
+	if (!*tok)
+		strcpy(newtok, buf);
+	else
+		strcat(newtok, buf);
+	*tok = newtok;
+
+	return 0;
+}
+
+static enum tep_event_type force_token(const char *str, char **tok);
+
+static enum tep_event_type __read_token(char **tok)
+{
+	char buf[BUFSIZ];
+	int ch, last_ch, quote_ch, next_ch;
+	int i = 0;
+	int tok_size = 0;
+	enum tep_event_type type;
+
+	*tok = NULL;
+
+
+	ch = __read_char();
+	if (ch < 0)
+		return TEP_EVENT_NONE;
+
+	type = get_type(ch);
+	if (type == TEP_EVENT_NONE)
+		return type;
+
+	buf[i++] = ch;
+
+	switch (type) {
+	case TEP_EVENT_NEWLINE:
+	case TEP_EVENT_DELIM:
+		if (asprintf(tok, "%c", ch) < 0)
+			return TEP_EVENT_ERROR;
+
+		return type;
+
+	case TEP_EVENT_OP:
+		switch (ch) {
+		case '-':
+			next_ch = peek_char();
+			if (next_ch == '>') {
+				buf[i++] = __read_char();
+				break;
+			}
+			/* fall through */
+		case '+':
+		case '|':
+		case '&':
+		case '>':
+		case '<':
+			last_ch = ch;
+			ch = peek_char();
+			if (ch != last_ch)
+				goto test_equal;
+			buf[i++] = __read_char();
+			switch (last_ch) {
+			case '>':
+			case '<':
+				goto test_equal;
+			default:
+				break;
+			}
+			break;
+		case '!':
+		case '=':
+			goto test_equal;
+		default: /* what should we do instead? */
+			break;
+		}
+		buf[i] = 0;
+		*tok = strdup(buf);
+		return type;
+
+ test_equal:
+		ch = peek_char();
+		if (ch == '=')
+			buf[i++] = __read_char();
+		goto out;
+
+	case TEP_EVENT_DQUOTE:
+	case TEP_EVENT_SQUOTE:
+		/* don't keep quotes */
+		i--;
+		quote_ch = ch;
+		last_ch = 0;
+ concat:
+		do {
+			if (i == (BUFSIZ - 1)) {
+				buf[i] = 0;
+				tok_size += BUFSIZ;
+
+				if (extend_token(tok, buf, tok_size) < 0)
+					return TEP_EVENT_NONE;
+				i = 0;
+			}
+			last_ch = ch;
+			ch = __read_char();
+			buf[i++] = ch;
+			/* the '\' '\' will cancel itself */
+			if (ch == '\\' && last_ch == '\\')
+				last_ch = 0;
+		} while (ch != quote_ch || last_ch == '\\');
+		/* remove the last quote */
+		i--;
+
+		/*
+		 * For strings (double quotes) check the next token.
+		 * If it is another string, concatinate the two.
+		 */
+		if (type == TEP_EVENT_DQUOTE) {
+			unsigned long long save_input_buf_ptr = input_buf_ptr;
+
+			do {
+				ch = __read_char();
+			} while (isspace(ch));
+			if (ch == '"')
+				goto concat;
+			input_buf_ptr = save_input_buf_ptr;
+		}
+
+		goto out;
+
+	case TEP_EVENT_ERROR ... TEP_EVENT_SPACE:
+	case TEP_EVENT_ITEM:
+	default:
+		break;
+	}
+
+	while (get_type(peek_char()) == type) {
+		if (i == (BUFSIZ - 1)) {
+			buf[i] = 0;
+			tok_size += BUFSIZ;
+
+			if (extend_token(tok, buf, tok_size) < 0)
+				return TEP_EVENT_NONE;
+			i = 0;
+		}
+		ch = __read_char();
+		buf[i++] = ch;
+	}
+
+ out:
+	buf[i] = 0;
+	if (extend_token(tok, buf, tok_size + i + 1) < 0)
+		return TEP_EVENT_NONE;
+
+	if (type == TEP_EVENT_ITEM) {
+		/*
+		 * Older versions of the kernel has a bug that
+		 * creates invalid symbols and will break the mac80211
+		 * parsing. This is a work around to that bug.
+		 *
+		 * See Linux kernel commit:
+		 *  811cb50baf63461ce0bdb234927046131fc7fa8b
+		 */
+		if (strcmp(*tok, "LOCAL_PR_FMT") == 0) {
+			free(*tok);
+			*tok = NULL;
+			return force_token("\"%s\" ", tok);
+		} else if (strcmp(*tok, "STA_PR_FMT") == 0) {
+			free(*tok);
+			*tok = NULL;
+			return force_token("\" sta:%pM\" ", tok);
+		} else if (strcmp(*tok, "VIF_PR_FMT") == 0) {
+			free(*tok);
+			*tok = NULL;
+			return force_token("\" vif:%p(%d)\" ", tok);
+		}
+	}
+
+	return type;
+}
+
+static enum tep_event_type force_token(const char *str, char **tok)
+{
+	const char *save_input_buf;
+	unsigned long long save_input_buf_ptr;
+	unsigned long long save_input_buf_siz;
+	enum tep_event_type type;
+	
+	/* save off the current input pointers */
+	save_input_buf = input_buf;
+	save_input_buf_ptr = input_buf_ptr;
+	save_input_buf_siz = input_buf_siz;
+
+	init_input_buf(str, strlen(str));
+
+	type = __read_token(tok);
+
+	/* reset back to original token */
+	input_buf = save_input_buf;
+	input_buf_ptr = save_input_buf_ptr;
+	input_buf_siz = save_input_buf_siz;
+
+	return type;
+}
+
+/**
+ * free_token - free a token returned by tep_read_token
+ * @token: the token to free
+ */
+__hidden void free_token(char *tok)
+{
+	if (tok)
+		free(tok);
+}
+
+/**
+ * read_token - access to utilities to use the tep parser
+ * @tok: The token to return
+ *
+ * This will parse tokens from the string given by
+ * tep_init_data().
+ *
+ * Returns the token type.
+ */
+__hidden enum tep_event_type read_token(char **tok)
+{
+	enum tep_event_type type;
+
+	for (;;) {
+		type = __read_token(tok);
+		if (type != TEP_EVENT_SPACE)
+			return type;
+
+		free_token(*tok);
+	}
+
+	/* not reached */
+	*tok = NULL;
+	return TEP_EVENT_NONE;
+}
+
+/* no newline */
+static enum tep_event_type read_token_item(char **tok)
+{
+	enum tep_event_type type;
+
+	for (;;) {
+		type = __read_token(tok);
+		if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE)
+			return type;
+		free_token(*tok);
+		*tok = NULL;
+	}
+
+	/* not reached */
+	*tok = NULL;
+	return TEP_EVENT_NONE;
+}
+
+static int test_type(enum tep_event_type type, enum tep_event_type expect)
+{
+	if (type != expect) {
+		do_warning("Error: expected type %d but read %d",
+		    expect, type);
+		return -1;
+	}
+	return 0;
+}
+
+static int test_type_token(enum tep_event_type type, const char *token,
+		    enum tep_event_type expect, const char *expect_tok)
+{
+	if (type != expect) {
+		do_warning("Error: expected type %d but read %d",
+		    expect, type);
+		return -1;
+	}
+
+	if (strcmp(token, expect_tok) != 0) {
+		do_warning("Error: expected '%s' but read '%s'",
+		    expect_tok, token);
+		return -1;
+	}
+	return 0;
+}
+
+static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok)
+{
+	enum tep_event_type type;
+
+	if (newline_ok)
+		type = read_token(tok);
+	else
+		type = read_token_item(tok);
+	return test_type(type, expect);
+}
+
+static int read_expect_type(enum tep_event_type expect, char **tok)
+{
+	return __read_expect_type(expect, tok, 1);
+}
+
+static int __read_expected(enum tep_event_type expect, const char *str,
+			   int newline_ok)
+{
+	enum tep_event_type type;
+	char *token;
+	int ret;
+
+	if (newline_ok)
+		type = read_token(&token);
+	else
+		type = read_token_item(&token);
+
+	ret = test_type_token(type, token, expect, str);
+
+	free_token(token);
+
+	return ret;
+}
+
+static int read_expected(enum tep_event_type expect, const char *str)
+{
+	return __read_expected(expect, str, 1);
+}
+
+static int read_expected_item(enum tep_event_type expect, const char *str)
+{
+	return __read_expected(expect, str, 0);
+}
+
+static char *event_read_name(void)
+{
+	char *token;
+
+	if (read_expected(TEP_EVENT_ITEM, "name") < 0)
+		return NULL;
+
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
+		return NULL;
+
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto fail;
+
+	return token;
+
+ fail:
+	free_token(token);
+	return NULL;
+}
+
+static int event_read_id(void)
+{
+	char *token;
+	int id;
+
+	if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0)
+		return -1;
+
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
+		return -1;
+
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto fail;
+
+	id = strtoul(token, NULL, 0);
+	free_token(token);
+	return id;
+
+ fail:
+	free_token(token);
+	return -1;
+}
+
+static int field_is_string(struct tep_format_field *field)
+{
+	if ((field->flags & TEP_FIELD_IS_ARRAY) &&
+	    (strstr(field->type, "char") || strstr(field->type, "u8") ||
+	     strstr(field->type, "s8")))
+		return 1;
+
+	return 0;
+}
+
+static int field_is_dynamic(struct tep_format_field *field)
+{
+	if (strncmp(field->type, "__data_loc", 10) == 0)
+		return 1;
+
+	return 0;
+}
+
+static int field_is_long(struct tep_format_field *field)
+{
+	/* includes long long */
+	if (strstr(field->type, "long"))
+		return 1;
+
+	return 0;
+}
+
+static unsigned int type_size(const char *name)
+{
+	/* This covers all TEP_FIELD_IS_STRING types. */
+	static struct {
+		const char *type;
+		unsigned int size;
+	} table[] = {
+		{ "u8",   1 },
+		{ "u16",  2 },
+		{ "u32",  4 },
+		{ "u64",  8 },
+		{ "s8",   1 },
+		{ "s16",  2 },
+		{ "s32",  4 },
+		{ "s64",  8 },
+		{ "char", 1 },
+		{ },
+	};
+	int i;
+
+	for (i = 0; table[i].type; i++) {
+		if (!strcmp(table[i].type, name))
+			return table[i].size;
+	}
+
+	return 0;
+}
+
+static int append(char **buf, const char *delim, const char *str)
+{
+	char *new_buf;
+
+	new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1);
+	if (!new_buf)
+		return -1;
+	strcat(new_buf, delim);
+	strcat(new_buf, str);
+	*buf = new_buf;
+	return 0;
+}
+
+static int event_read_fields(struct tep_event *event, struct tep_format_field **fields)
+{
+	struct tep_format_field *field = NULL;
+	enum tep_event_type type;
+	char *token;
+	char *last_token;
+	char *delim = " ";
+	int count = 0;
+	int ret;
+
+	do {
+		unsigned int size_dynamic = 0;
+
+		type = read_token(&token);
+		if (type == TEP_EVENT_NEWLINE) {
+			free_token(token);
+			return count;
+		}
+
+		count++;
+
+		if (test_type_token(type, token, TEP_EVENT_ITEM, "field"))
+			goto fail;
+		free_token(token);
+
+		type = read_token(&token);
+		/*
+		 * The ftrace fields may still use the "special" name.
+		 * Just ignore it.
+		 */
+		if (event->flags & TEP_EVENT_FL_ISFTRACE &&
+		    type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) {
+			free_token(token);
+			type = read_token(&token);
+		}
+
+		if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0)
+			goto fail;
+
+		free_token(token);
+		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+			goto fail;
+
+		last_token = token;
+
+		field = calloc(1, sizeof(*field));
+		if (!field)
+			goto fail;
+
+		field->event = event;
+
+		/* read the rest of the type */
+		for (;;) {
+			type = read_token(&token);
+			if (type == TEP_EVENT_ITEM ||
+			    (type == TEP_EVENT_OP && strcmp(token, "*") == 0) ||
+			    /*
+			     * Some of the ftrace fields are broken and have
+			     * an illegal "." in them.
+			     */
+			    (event->flags & TEP_EVENT_FL_ISFTRACE &&
+			     type == TEP_EVENT_OP && strcmp(token, ".") == 0)) {
+
+				if (strcmp(token, "*") == 0)
+					field->flags |= TEP_FIELD_IS_POINTER;
+
+				if (field->type) {
+					ret = append(&field->type, delim, last_token);
+					free(last_token);
+					if (ret < 0)
+						goto fail;
+				} else
+					field->type = last_token;
+				last_token = token;
+				delim = " ";
+				continue;
+			}
+
+			/* Handle __attribute__((user)) */
+			if ((type == TEP_EVENT_DELIM) &&
+			    strcmp("__attribute__", last_token) == 0 &&
+			    token[0] == '(') {
+				int depth = 1;
+				int ret;
+
+				ret = append(&field->type, " ", last_token);
+				ret |= append(&field->type, "", "(");
+				if (ret < 0)
+					goto fail;
+
+				delim = " ";
+				while ((type = read_token(&token)) != TEP_EVENT_NONE) {
+					if (type == TEP_EVENT_DELIM) {
+						if (token[0] == '(')
+							depth++;
+						else if (token[0] == ')')
+							depth--;
+						if (!depth)
+							break;
+						ret = append(&field->type, "", token);
+						delim = "";
+					} else {
+						ret = append(&field->type, delim, token);
+						delim = " ";
+					}
+					if (ret < 0)
+						goto fail;
+					free(last_token);
+					last_token = token;
+				}
+				continue;
+			}
+			break;
+		}
+
+		if (!field->type) {
+			do_warning_event(event, "%s: no type found", __func__);
+			goto fail;
+		}
+		field->name = field->alias = last_token;
+
+		if (test_type(type, TEP_EVENT_OP))
+			goto fail;
+
+		if (strcmp(token, "[") == 0) {
+			enum tep_event_type last_type = type;
+			char *brackets = token;
+
+			field->flags |= TEP_FIELD_IS_ARRAY;
+
+			type = read_token(&token);
+
+			if (type == TEP_EVENT_ITEM)
+				field->arraylen = strtoul(token, NULL, 0);
+			else
+				field->arraylen = 0;
+
+		        while (strcmp(token, "]") != 0) {
+				const char *delim;
+
+				if (last_type == TEP_EVENT_ITEM &&
+				    type == TEP_EVENT_ITEM)
+					delim = " ";
+				else
+					delim = "";
+
+				last_type = type;
+
+				ret = append(&brackets, delim, token);
+				if (ret < 0) {
+					free(brackets);
+					goto fail;
+				}
+				/* We only care about the last token */
+				field->arraylen = strtoul(token, NULL, 0);
+				free_token(token);
+				type = read_token(&token);
+				if (type == TEP_EVENT_NONE) {
+					free(brackets);
+					do_warning_event(event, "failed to find token");
+					goto fail;
+				}
+			}
+
+			free_token(token);
+
+			ret = append(&brackets, "", "]");
+			if (ret < 0) {
+				free(brackets);
+				goto fail;
+			}
+
+			/* add brackets to type */
+
+			type = read_token(&token);
+			/*
+			 * If the next token is not an OP, then it is of
+			 * the format: type [] item;
+			 */
+			if (type == TEP_EVENT_ITEM) {
+				ret = append(&field->type, " ", field->name);
+				if (ret < 0) {
+					free(brackets);
+					goto fail;
+				}
+				ret = append(&field->type, "", brackets);
+
+				size_dynamic = type_size(field->name);
+				free_token(field->name);
+				field->name = field->alias = token;
+				type = read_token(&token);
+			} else {
+				ret = append(&field->type, "", brackets);
+				if (ret < 0) {
+					free(brackets);
+					goto fail;
+				}
+			}
+			free(brackets);
+		}
+
+		if (field_is_string(field))
+			field->flags |= TEP_FIELD_IS_STRING;
+		if (field_is_dynamic(field))
+			field->flags |= TEP_FIELD_IS_DYNAMIC;
+		if (field_is_long(field))
+			field->flags |= TEP_FIELD_IS_LONG;
+
+		if (test_type_token(type, token,  TEP_EVENT_OP, ";"))
+			goto fail;
+		free_token(token);
+
+		if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
+			goto fail_expect;
+
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
+			goto fail_expect;
+
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
+			goto fail;
+		field->offset = strtoul(token, NULL, 0);
+		free_token(token);
+
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
+			goto fail_expect;
+
+		if (read_expected(TEP_EVENT_ITEM, "size") < 0)
+			goto fail_expect;
+
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
+			goto fail_expect;
+
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
+			goto fail;
+		field->size = strtoul(token, NULL, 0);
+		free_token(token);
+
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
+			goto fail_expect;
+
+		type = read_token(&token);
+		if (type != TEP_EVENT_NEWLINE) {
+			/* newer versions of the kernel have a "signed" type */
+			if (test_type_token(type, token, TEP_EVENT_ITEM, "signed"))
+				goto fail;
+
+			free_token(token);
+
+			if (read_expected(TEP_EVENT_OP, ":") < 0)
+				goto fail_expect;
+
+			if (read_expect_type(TEP_EVENT_ITEM, &token))
+				goto fail;
+
+			if (strtoul(token, NULL, 0))
+				field->flags |= TEP_FIELD_IS_SIGNED;
+
+			free_token(token);
+			if (read_expected(TEP_EVENT_OP, ";") < 0)
+				goto fail_expect;
+
+			if (read_expect_type(TEP_EVENT_NEWLINE, &token))
+				goto fail;
+		}
+
+		free_token(token);
+
+		if (field->flags & TEP_FIELD_IS_ARRAY) {
+			if (field->arraylen)
+				field->elementsize = field->size / field->arraylen;
+			else if (field->flags & TEP_FIELD_IS_DYNAMIC)
+				field->elementsize = size_dynamic;
+			else if (field->flags & TEP_FIELD_IS_STRING)
+				field->elementsize = 1;
+			else if (field->flags & TEP_FIELD_IS_LONG)
+				field->elementsize = event->tep ?
+						     event->tep->long_size :
+						     sizeof(long);
+		} else
+			field->elementsize = field->size;
+
+		*fields = field;
+		fields = &field->next;
+
+	} while (1);
+
+	return 0;
+
+fail:
+	free_token(token);
+fail_expect:
+	if (field) {
+		free(field->type);
+		free(field->name);
+		free(field);
+	}
+	return -1;
+}
+
+static int event_read_format(struct tep_event *event)
+{
+	char *token;
+	int ret;
+
+	if (read_expected_item(TEP_EVENT_ITEM, "format") < 0)
+		return -1;
+
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
+		return -1;
+
+	if (read_expect_type(TEP_EVENT_NEWLINE, &token))
+		goto fail;
+	free_token(token);
+
+	ret = event_read_fields(event, &event->format.common_fields);
+	if (ret < 0)
+		return ret;
+	event->format.nr_common = ret;
+
+	ret = event_read_fields(event, &event->format.fields);
+	if (ret < 0)
+		return ret;
+	event->format.nr_fields = ret;
+
+	return 0;
+
+ fail:
+	free_token(token);
+	return -1;
+}
+
+static enum tep_event_type
+process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
+		  char **tok, enum tep_event_type type);
+
+static enum tep_event_type
+process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	enum tep_event_type type;
+	char *token;
+
+	type = read_token(&token);
+	*tok = token;
+
+	return process_arg_token(event, arg, tok, type);
+}
+
+static enum tep_event_type
+process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok);
+
+/*
+ * For __print_symbolic() and __print_flags, we need to completely
+ * evaluate the first argument, which defines what to print next.
+ */
+static enum tep_event_type
+process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	enum tep_event_type type;
+
+	type = process_arg(event, arg, tok);
+
+	while (type == TEP_EVENT_OP) {
+		type = process_op(event, arg, tok);
+	}
+
+	return type;
+}
+
+static enum tep_event_type
+process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok)
+{
+	struct tep_print_arg *arg, *left, *right;
+	enum tep_event_type type;
+	char *token = NULL;
+
+	arg = alloc_arg();
+	left = alloc_arg();
+	right = alloc_arg();
+
+	if (!arg || !left || !right) {
+		do_warning_event(event, "%s: not enough memory!", __func__);
+		/* arg will be freed at out_free */
+		free_arg(left);
+		free_arg(right);
+		goto out_free;
+	}
+
+	arg->type = TEP_PRINT_OP;
+	arg->op.left = left;
+	arg->op.right = right;
+
+	*tok = NULL;
+	type = process_arg(event, left, &token);
+
+ again:
+	if (type == TEP_EVENT_ERROR)
+		goto out_free;
+
+	/* Handle other operations in the arguments */
+	if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) {
+		type = process_op(event, left, &token);
+		goto again;
+	}
+
+	if (test_type_token(type, token, TEP_EVENT_OP, ":"))
+		goto out_free;
+
+	arg->op.op = token;
+
+	type = process_arg(event, right, &token);
+
+	top->op.right = arg;
+
+	*tok = token;
+	return type;
+
+out_free:
+	/* Top may point to itself */
+	top->op.right = NULL;
+	free_token(token);
+	free_arg(arg);
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_array(struct tep_event *event, struct tep_print_arg *top, char **tok)
+{
+	struct tep_print_arg *arg;
+	enum tep_event_type type;
+	char *token = NULL;
+
+	arg = alloc_arg();
+	if (!arg) {
+		do_warning_event(event, "%s: not enough memory!", __func__);
+		/* '*tok' is set to top->op.op.  No need to free. */
+		*tok = NULL;
+		return TEP_EVENT_ERROR;
+	}
+
+	*tok = NULL;
+	type = process_arg(event, arg, &token);
+	if (test_type_token(type, token, TEP_EVENT_OP, "]"))
+		goto out_free;
+
+	top->op.right = arg;
+
+	free_token(token);
+	type = read_token_item(&token);
+	*tok = token;
+
+	return type;
+
+out_free:
+	free_token(token);
+	free_arg(arg);
+	return TEP_EVENT_ERROR;
+}
+
+static int get_op_prio(char *op)
+{
+	if (!op[1]) {
+		switch (op[0]) {
+		case '~':
+		case '!':
+			return 4;
+		case '*':
+		case '/':
+		case '%':
+			return 6;
+		case '+':
+		case '-':
+			return 7;
+			/* '>>' and '<<' are 8 */
+		case '<':
+		case '>':
+			return 9;
+			/* '==' and '!=' are 10 */
+		case '&':
+			return 11;
+		case '^':
+			return 12;
+		case '|':
+			return 13;
+		case '?':
+			return 16;
+		default:
+			do_warning("unknown op '%c'", op[0]);
+			return -1;
+		}
+	} else {
+		if (strcmp(op, "++") == 0 ||
+		    strcmp(op, "--") == 0) {
+			return 3;
+		} else if (strcmp(op, ">>") == 0 ||
+			   strcmp(op, "<<") == 0) {
+			return 8;
+		} else if (strcmp(op, ">=") == 0 ||
+			   strcmp(op, "<=") == 0) {
+			return 9;
+		} else if (strcmp(op, "==") == 0 ||
+			   strcmp(op, "!=") == 0) {
+			return 10;
+		} else if (strcmp(op, "&&") == 0) {
+			return 14;
+		} else if (strcmp(op, "||") == 0) {
+			return 15;
+		} else {
+			do_warning("unknown op '%s'", op);
+			return -1;
+		}
+	}
+}
+
+static int set_op_prio(struct tep_print_arg *arg)
+{
+
+	/* single ops are the greatest */
+	if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL)
+		arg->op.prio = 0;
+	else
+		arg->op.prio = get_op_prio(arg->op.op);
+
+	return arg->op.prio;
+}
+
+/* Note, *tok does not get freed, but will most likely be saved */
+static enum tep_event_type
+process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	struct tep_print_arg *left, *right = NULL;
+	enum tep_event_type type;
+	char *token;
+
+	/* the op is passed in via tok */
+	token = *tok;
+
+	if (arg->type == TEP_PRINT_OP && !arg->op.left) {
+		/* handle single op */
+		if (token[1]) {
+			do_warning_event(event, "bad op token %s", token);
+			goto out_free;
+		}
+		switch (token[0]) {
+		case '~':
+		case '!':
+		case '+':
+		case '-':
+			break;
+		default:
+			do_warning_event(event, "bad op token %s", token);
+			goto out_free;
+
+		}
+
+		/* make an empty left */
+		left = alloc_arg();
+		if (!left)
+			goto out_warn_free;
+
+		left->type = TEP_PRINT_NULL;
+		arg->op.left = left;
+
+		right = alloc_arg();
+		if (!right)
+			goto out_warn_free;
+
+		arg->op.right = right;
+
+		/* do not free the token, it belongs to an op */
+		*tok = NULL;
+		type = process_arg(event, right, tok);
+
+	} else if (strcmp(token, "?") == 0) {
+
+		left = alloc_arg();
+		if (!left)
+			goto out_warn_free;
+
+		/* copy the top arg to the left */
+		*left = *arg;
+
+		arg->type = TEP_PRINT_OP;
+		arg->op.op = token;
+		arg->op.left = left;
+		arg->op.prio = 0;
+
+		/* it will set arg->op.right */
+		type = process_cond(event, arg, tok);
+
+	} else if (strcmp(token, ">>") == 0 ||
+		   strcmp(token, "<<") == 0 ||
+		   strcmp(token, "&") == 0 ||
+		   strcmp(token, "|") == 0 ||
+		   strcmp(token, "&&") == 0 ||
+		   strcmp(token, "||") == 0 ||
+		   strcmp(token, "-") == 0 ||
+		   strcmp(token, "+") == 0 ||
+		   strcmp(token, "*") == 0 ||
+		   strcmp(token, "^") == 0 ||
+		   strcmp(token, "/") == 0 ||
+		   strcmp(token, "%") == 0 ||
+		   strcmp(token, "<") == 0 ||
+		   strcmp(token, ">") == 0 ||
+		   strcmp(token, "<=") == 0 ||
+		   strcmp(token, ">=") == 0 ||
+		   strcmp(token, "==") == 0 ||
+		   strcmp(token, "!=") == 0) {
+
+		left = alloc_arg();
+		if (!left)
+			goto out_warn_free;
+
+		/* copy the top arg to the left */
+		*left = *arg;
+
+		arg->type = TEP_PRINT_OP;
+		arg->op.op = token;
+		arg->op.left = left;
+		arg->op.right = NULL;
+
+		if (set_op_prio(arg) == -1) {
+			event->flags |= TEP_EVENT_FL_FAILED;
+			/* arg->op.op (= token) will be freed at out_free */
+			arg->op.op = NULL;
+			goto out_free;
+		}
+
+		type = read_token_item(&token);
+		*tok = token;
+
+		/* could just be a type pointer */
+		if ((strcmp(arg->op.op, "*") == 0) &&
+		    type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) {
+			int ret;
+
+			if (left->type != TEP_PRINT_ATOM) {
+				do_warning_event(event, "bad pointer type");
+				goto out_free;
+			}
+			ret = append(&left->atom.atom, " ", "*");
+			if (ret < 0)
+				goto out_warn_free;
+
+			free(arg->op.op);
+			*arg = *left;
+			free(left);
+
+			return type;
+		}
+
+		right = alloc_arg();
+		if (!right)
+			goto out_warn_free;
+
+		type = process_arg_token(event, right, tok, type);
+		if (type == TEP_EVENT_ERROR) {
+			free_arg(right);
+			/* token was freed in process_arg_token() via *tok */
+			token = NULL;
+			goto out_free;
+		}
+
+		if (right->type == TEP_PRINT_OP &&
+		    get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
+			struct tep_print_arg tmp;
+
+			/* rotate ops according to the priority */
+			arg->op.right = right->op.left;
+
+			tmp = *arg;
+			*arg = *right;
+			*right = tmp;
+
+			arg->op.left = right;
+		} else {
+			arg->op.right = right;
+		}
+
+	} else if (strcmp(token, "[") == 0) {
+
+		left = alloc_arg();
+		if (!left)
+			goto out_warn_free;
+
+		*left = *arg;
+
+		arg->type = TEP_PRINT_OP;
+		arg->op.op = token;
+		arg->op.left = left;
+
+		arg->op.prio = 0;
+
+		/* it will set arg->op.right */
+		type = process_array(event, arg, tok);
+
+	} else {
+		do_warning_event(event, "unknown op '%s'", token);
+		event->flags |= TEP_EVENT_FL_FAILED;
+		/* the arg is now the left side */
+		goto out_free;
+	}
+
+	if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) {
+		int prio;
+
+		/* higher prios need to be closer to the root */
+		prio = get_op_prio(*tok);
+
+		if (prio > arg->op.prio)
+			return process_op(event, arg, tok);
+
+		return process_op(event, right, tok);
+	}
+
+	return type;
+
+out_warn_free:
+	do_warning_event(event, "%s: not enough memory!", __func__);
+out_free:
+	free_token(token);
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
+	      char **tok)
+{
+	enum tep_event_type type;
+	char *field;
+	char *token;
+
+	if (read_expected(TEP_EVENT_OP, "->") < 0)
+		goto out_err;
+
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto out_free;
+	field = token;
+
+	arg->type = TEP_PRINT_FIELD;
+	arg->field.name = field;
+
+	if (is_flag_field) {
+		arg->field.field = tep_find_any_field(event, arg->field.name);
+		arg->field.field->flags |= TEP_FIELD_IS_FLAG;
+		is_flag_field = 0;
+	} else if (is_symbolic_field) {
+		arg->field.field = tep_find_any_field(event, arg->field.name);
+		arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC;
+		is_symbolic_field = 0;
+	}
+
+	type = read_token(&token);
+	*tok = token;
+
+	return type;
+
+ out_free:
+	free_token(token);
+ out_err:
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static int alloc_and_process_delim(struct tep_event *event, char *next_token,
+				   struct tep_print_arg **print_arg)
+{
+	struct tep_print_arg *field;
+	enum tep_event_type type;
+	char *token;
+	int ret = 0;
+
+	field = alloc_arg();
+	if (!field) {
+		do_warning_event(event, "%s: not enough memory!", __func__);
+		errno = ENOMEM;
+		return -1;
+	}
+
+	type = process_arg(event, field, &token);
+
+	if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) {
+		errno = EINVAL;
+		ret = -1;
+		free_arg(field);
+		goto out_free_token;
+	}
+
+	*print_arg = field;
+
+out_free_token:
+	free_token(token);
+
+	return ret;
+}
+
+static char *arg_eval (struct tep_print_arg *arg);
+
+static unsigned long long
+eval_type_str(unsigned long long val, const char *type, int pointer)
+{
+	int sign = 0;
+	char *ref;
+	int len;
+
+	len = strlen(type);
+
+	if (pointer) {
+
+		if (type[len-1] != '*') {
+			do_warning("pointer expected with non pointer type");
+			return val;
+		}
+
+		ref = malloc(len);
+		if (!ref) {
+			do_warning("%s: not enough memory!", __func__);
+			return val;
+		}
+		memcpy(ref, type, len);
+
+		/* chop off the " *" */
+		ref[len - 2] = 0;
+
+		val = eval_type_str(val, ref, 0);
+		free(ref);
+		return val;
+	}
+
+	/* check if this is a pointer */
+	if (type[len - 1] == '*')
+		return val;
+
+	/* Try to figure out the arg size*/
+	if (strncmp(type, "struct", 6) == 0)
+		/* all bets off */
+		return val;
+
+	if (strcmp(type, "u8") == 0)
+		return val & 0xff;
+
+	if (strcmp(type, "u16") == 0)
+		return val & 0xffff;
+
+	if (strcmp(type, "u32") == 0)
+		return val & 0xffffffff;
+
+	if (strcmp(type, "u64") == 0 ||
+	    strcmp(type, "s64") == 0)
+		return val;
+
+	if (strcmp(type, "s8") == 0)
+		return (unsigned long long)(char)val & 0xff;
+
+	if (strcmp(type, "s16") == 0)
+		return (unsigned long long)(short)val & 0xffff;
+
+	if (strcmp(type, "s32") == 0)
+		return (unsigned long long)(int)val & 0xffffffff;
+
+	if (strncmp(type, "unsigned ", 9) == 0) {
+		sign = 0;
+		type += 9;
+	}
+
+	if (strcmp(type, "char") == 0) {
+		if (sign)
+			return (unsigned long long)(char)val & 0xff;
+		else
+			return val & 0xff;
+	}
+
+	if (strcmp(type, "short") == 0) {
+		if (sign)
+			return (unsigned long long)(short)val & 0xffff;
+		else
+			return val & 0xffff;
+	}
+
+	if (strcmp(type, "int") == 0) {
+		if (sign)
+			return (unsigned long long)(int)val & 0xffffffff;
+		else
+			return val & 0xffffffff;
+	}
+
+	return val;
+}
+
+/*
+ * Try to figure out the type.
+ */
+static unsigned long long
+eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer)
+{
+	if (arg->type != TEP_PRINT_TYPE) {
+		do_warning("expected type argument");
+		return 0;
+	}
+
+	return eval_type_str(val, arg->typecast.type, pointer);
+}
+
+static int arg_num_eval(struct tep_print_arg *arg, long long *val)
+{
+	long long left, right;
+	int ret = 1;
+
+	switch (arg->type) {
+	case TEP_PRINT_ATOM:
+		*val = strtoll(arg->atom.atom, NULL, 0);
+		break;
+	case TEP_PRINT_TYPE:
+		ret = arg_num_eval(arg->typecast.item, val);
+		if (!ret)
+			break;
+		*val = eval_type(*val, arg, 0);
+		break;
+	case TEP_PRINT_OP:
+		switch (arg->op.op[0]) {
+		case '|':
+			ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			if (arg->op.op[1])
+				*val = left || right;
+			else
+				*val = left | right;
+			break;
+		case '&':
+			ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			if (arg->op.op[1])
+				*val = left && right;
+			else
+				*val = left & right;
+			break;
+		case '<':
+			ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			switch (arg->op.op[1]) {
+			case 0:
+				*val = left < right;
+				break;
+			case '<':
+				*val = left << right;
+				break;
+			case '=':
+				*val = left <= right;
+				break;
+			default:
+				do_warning("unknown op '%s'", arg->op.op);
+				ret = 0;
+			}
+			break;
+		case '>':
+			ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			switch (arg->op.op[1]) {
+			case 0:
+				*val = left > right;
+				break;
+			case '>':
+				*val = left >> right;
+				break;
+			case '=':
+				*val = left >= right;
+				break;
+			default:
+				do_warning("unknown op '%s'", arg->op.op);
+				ret = 0;
+			}
+			break;
+		case '=':
+			ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+
+			if (arg->op.op[1] != '=') {
+				do_warning("unknown op '%s'", arg->op.op);
+				ret = 0;
+			} else
+				*val = left == right;
+			break;
+		case '!':
+			ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+
+			switch (arg->op.op[1]) {
+			case '=':
+				*val = left != right;
+				break;
+			default:
+				do_warning("unknown op '%s'", arg->op.op);
+				ret = 0;
+			}
+			break;
+		case '-':
+			/* check for negative */
+			if (arg->op.left->type == TEP_PRINT_NULL)
+				left = 0;
+			else
+				ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			*val = left - right;
+			break;
+		case '+':
+			if (arg->op.left->type == TEP_PRINT_NULL)
+				left = 0;
+			else
+				ret = arg_num_eval(arg->op.left, &left);
+			if (!ret)
+				break;
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			*val = left + right;
+			break;
+		case '~':
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			*val = ~right;
+			break;
+		default:
+			do_warning("unknown op '%s'", arg->op.op);
+			ret = 0;
+		}
+		break;
+
+	case TEP_PRINT_NULL:
+	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
+	default:
+		do_warning("invalid eval type %d", arg->type);
+		ret = 0;
+
+	}
+	return ret;
+}
+
+static char *arg_eval (struct tep_print_arg *arg)
+{
+	long long val;
+	static char buf[24];
+
+	switch (arg->type) {
+	case TEP_PRINT_ATOM:
+		return arg->atom.atom;
+	case TEP_PRINT_TYPE:
+		return arg_eval(arg->typecast.item);
+	case TEP_PRINT_OP:
+		if (!arg_num_eval(arg, &val))
+			break;
+		sprintf(buf, "%lld", val);
+		return buf;
+
+	case TEP_PRINT_NULL:
+	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
+	default:
+		do_warning("invalid eval type %d", arg->type);
+		break;
+	}
+
+	return NULL;
+}
+
+static enum tep_event_type
+process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok)
+{
+	enum tep_event_type type;
+	struct tep_print_arg *arg = NULL;
+	struct tep_print_flag_sym *field;
+	char *token = *tok;
+	char *value;
+
+	do {
+		free_token(token);
+		type = read_token_item(&token);
+		if (test_type_token(type, token, TEP_EVENT_OP, "{"))
+			break;
+
+		arg = alloc_arg();
+		if (!arg)
+			goto out_free;
+
+		free_token(token);
+		type = process_arg(event, arg, &token);
+
+		if (type == TEP_EVENT_OP)
+			type = process_op(event, arg, &token);
+
+		if (type == TEP_EVENT_ERROR)
+			goto out_free;
+
+		if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
+			goto out_free;
+
+		field = calloc(1, sizeof(*field));
+		if (!field)
+			goto out_free;
+
+		value = arg_eval(arg);
+		if (value == NULL)
+			goto out_free_field;
+		field->value = strdup(value);
+		if (field->value == NULL)
+			goto out_free_field;
+
+		free_arg(arg);
+		arg = alloc_arg();
+		if (!arg)
+			goto out_free;
+
+		free_token(token);
+		type = process_arg(event, arg, &token);
+		if (test_type_token(type, token, TEP_EVENT_OP, "}"))
+			goto out_free_field;
+
+		value = arg_eval(arg);
+		if (value == NULL)
+			goto out_free_field;
+		field->str = strdup(value);
+		if (field->str == NULL)
+			goto out_free_field;
+		free_arg(arg);
+		arg = NULL;
+
+		*list = field;
+		list = &field->next;
+
+		free_token(token);
+		type = read_token_item(&token);
+	} while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0);
+
+	*tok = token;
+	return type;
+
+out_free_field:
+	free_flag_sym(field);
+out_free:
+	free_arg(arg);
+	free_token(token);
+	*tok = NULL;
+
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	struct tep_print_arg *field;
+	enum tep_event_type type;
+	char *token = NULL;
+
+	memset(arg, 0, sizeof(*arg));
+	arg->type = TEP_PRINT_FLAGS;
+
+	field = alloc_arg();
+	if (!field) {
+		do_warning_event(event, "%s: not enough memory!", __func__);
+		goto out_free;
+	}
+
+	type = process_field_arg(event, field, &token);
+
+	/* Handle operations in the first argument */
+	while (type == TEP_EVENT_OP)
+		type = process_op(event, field, &token);
+
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
+		goto out_free_field;
+	free_token(token);
+
+	arg->flags.field = field;
+
+	type = read_token_item(&token);
+	if (event_item_type(type)) {
+		arg->flags.delim = token;
+		type = read_token_item(&token);
+	}
+
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
+		goto out_free;
+
+	type = process_fields(event, &arg->flags.flags, &token);
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
+		goto out_free;
+
+	free_token(token);
+	type = read_token_item(tok);
+	return type;
+
+out_free_field:
+	free_arg(field);
+out_free:
+	free_token(token);
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	struct tep_print_arg *field;
+	enum tep_event_type type;
+	char *token = NULL;
+
+	memset(arg, 0, sizeof(*arg));
+	arg->type = TEP_PRINT_SYMBOL;
+
+	field = alloc_arg();
+	if (!field) {
+		do_warning_event(event, "%s: not enough memory!", __func__);
+		goto out_free;
+	}
+
+	type = process_field_arg(event, field, &token);
+
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
+		goto out_free_field;
+
+	arg->symbol.field = field;
+
+	type = process_fields(event, &arg->symbol.symbols, &token);
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
+		goto out_free;
+
+	free_token(token);
+	type = read_token_item(tok);
+	return type;
+
+out_free_field:
+	free_arg(field);
+out_free:
+	free_token(token);
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_hex_common(struct tep_event *event, struct tep_print_arg *arg,
+		   char **tok, enum tep_print_arg_type type)
+{
+	memset(arg, 0, sizeof(*arg));
+	arg->type = type;
+
+	if (alloc_and_process_delim(event, ",", &arg->hex.field))
+		goto out;
+
+	if (alloc_and_process_delim(event, ")", &arg->hex.size))
+		goto free_field;
+
+	return read_token_item(tok);
+
+free_field:
+	free_arg(arg->hex.field);
+	arg->hex.field = NULL;
+out:
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	return process_hex_common(event, arg, tok, TEP_PRINT_HEX);
+}
+
+static enum tep_event_type
+process_hex_str(struct tep_event *event, struct tep_print_arg *arg,
+		char **tok)
+{
+	return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR);
+}
+
+static enum tep_event_type
+process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	memset(arg, 0, sizeof(*arg));
+	arg->type = TEP_PRINT_INT_ARRAY;
+
+	if (alloc_and_process_delim(event, ",", &arg->int_array.field))
+		goto out;
+
+	if (alloc_and_process_delim(event, ",", &arg->int_array.count))
+		goto free_field;
+
+	if (alloc_and_process_delim(event, ")", &arg->int_array.el_size))
+		goto free_size;
+
+	return read_token_item(tok);
+
+free_size:
+	free_arg(arg->int_array.count);
+	arg->int_array.count = NULL;
+free_field:
+	free_arg(arg->int_array.field);
+	arg->int_array.field = NULL;
+out:
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	struct tep_format_field *field;
+	enum tep_event_type type;
+	char *token;
+
+	memset(arg, 0, sizeof(*arg));
+	arg->type = TEP_PRINT_DYNAMIC_ARRAY;
+
+	/*
+	 * The item within the parenthesis is another field that holds
+	 * the index into where the array starts.
+	 */
+	type = read_token(&token);
+	*tok = token;
+	if (type != TEP_EVENT_ITEM)
+		goto out_free;
+
+	/* Find the field */
+
+	field = tep_find_field(event, token);
+	if (!field)
+		goto out_free;
+
+	arg->dynarray.field = field;
+	arg->dynarray.index = 0;
+
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
+		goto out_free;
+
+	free_token(token);
+	type = read_token_item(&token);
+	*tok = token;
+	if (type != TEP_EVENT_OP || strcmp(token, "[") != 0)
+		return type;
+
+	free_token(token);
+	arg = alloc_arg();
+	if (!arg) {
+		do_warning_event(event, "%s: not enough memory!", __func__);
+		*tok = NULL;
+		return TEP_EVENT_ERROR;
+	}
+
+	type = process_arg(event, arg, &token);
+	if (type == TEP_EVENT_ERROR)
+		goto out_free_arg;
+
+	if (!test_type_token(type, token, TEP_EVENT_OP, "]"))
+		goto out_free_arg;
+
+	free_token(token);
+	type = read_token_item(tok);
+	return type;
+
+ out_free_arg:
+	free_arg(arg);
+ out_free:
+	free_token(token);
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg,
+			  char **tok)
+{
+	struct tep_format_field *field;
+	enum tep_event_type type;
+	char *token;
+
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto out_free;
+
+	arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN;
+
+	/* Find the field */
+	field = tep_find_field(event, token);
+	if (!field)
+		goto out_free;
+
+	arg->dynarray.field = field;
+	arg->dynarray.index = 0;
+
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
+		goto out_err;
+
+	free_token(token);
+	type = read_token(&token);
+	*tok = token;
+
+	return type;
+
+ out_free:
+	free_token(token);
+ out_err:
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	struct tep_print_arg *item_arg;
+	enum tep_event_type type;
+	char *token;
+
+	type = process_arg(event, arg, &token);
+
+	if (type == TEP_EVENT_ERROR)
+		goto out_free;
+
+	if (type == TEP_EVENT_OP)
+		type = process_op(event, arg, &token);
+
+	if (type == TEP_EVENT_ERROR)
+		goto out_free;
+
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
+		goto out_free;
+
+	free_token(token);
+	type = read_token_item(&token);
+
+	/*
+	 * If the next token is an item or another open paren, then
+	 * this was a typecast.
+	 */
+	if (event_item_type(type) ||
+	    (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) {
+
+		/* make this a typecast and contine */
+
+		/* prevous must be an atom */
+		if (arg->type != TEP_PRINT_ATOM) {
+			do_warning_event(event, "previous needed to be TEP_PRINT_ATOM");
+			goto out_free;
+		}
+
+		item_arg = alloc_arg();
+		if (!item_arg) {
+			do_warning_event(event, "%s: not enough memory!",
+					 __func__);
+			goto out_free;
+		}
+
+		arg->type = TEP_PRINT_TYPE;
+		arg->typecast.type = arg->atom.atom;
+		arg->typecast.item = item_arg;
+		type = process_arg_token(event, item_arg, &token, type);
+
+	}
+
+	*tok = token;
+	return type;
+
+ out_free:
+	free_token(token);
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+
+static enum tep_event_type
+process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
+	    char **tok)
+{
+	enum tep_event_type type;
+	char *token;
+
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto out_free;
+
+	arg->type = TEP_PRINT_STRING;
+	arg->string.string = token;
+	arg->string.offset = -1;
+
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
+		goto out_err;
+
+	type = read_token(&token);
+	*tok = token;
+
+	return type;
+
+ out_free:
+	free_token(token);
+ out_err:
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
+		char **tok)
+{
+	enum tep_event_type type;
+	char *token;
+
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto out_free;
+
+	arg->type = TEP_PRINT_BITMASK;
+	arg->bitmask.bitmask = token;
+	arg->bitmask.offset = -1;
+
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
+		goto out_err;
+
+	type = read_token(&token);
+	*tok = token;
+
+	return type;
+
+ out_free:
+	free_token(token);
+ out_err:
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static struct tep_function_handler *
+find_func_handler(struct tep_handle *tep, char *func_name)
+{
+	struct tep_function_handler *func;
+
+	if (!tep)
+		return NULL;
+
+	for (func = tep->func_handlers; func; func = func->next) {
+		if (strcmp(func->name, func_name) == 0)
+			break;
+	}
+
+	return func;
+}
+
+static void remove_func_handler(struct tep_handle *tep, char *func_name)
+{
+	struct tep_function_handler *func;
+	struct tep_function_handler **next;
+
+	next = &tep->func_handlers;
+	while ((func = *next)) {
+		if (strcmp(func->name, func_name) == 0) {
+			*next = func->next;
+			free_func_handle(func);
+			break;
+		}
+		next = &func->next;
+	}
+}
+
+static enum tep_event_type
+process_func_handler(struct tep_event *event, struct tep_function_handler *func,
+		     struct tep_print_arg *arg, char **tok)
+{
+	struct tep_print_arg **next_arg;
+	struct tep_print_arg *farg;
+	enum tep_event_type type;
+	char *token;
+	int i;
+
+	arg->type = TEP_PRINT_FUNC;
+	arg->func.func = func;
+
+	*tok = NULL;
+
+	next_arg = &(arg->func.args);
+	for (i = 0; i < func->nr_args; i++) {
+		farg = alloc_arg();
+		if (!farg) {
+			do_warning_event(event, "%s: not enough memory!",
+					 __func__);
+			return TEP_EVENT_ERROR;
+		}
+
+		type = process_arg(event, farg, &token);
+		if (i < (func->nr_args - 1)) {
+			if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) {
+				do_warning_event(event,
+					"Error: function '%s()' expects %d arguments but event %s only uses %d",
+					func->name, func->nr_args,
+					event->name, i + 1);
+				goto err;
+			}
+		} else {
+			if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) {
+				do_warning_event(event,
+					"Error: function '%s()' only expects %d arguments but event %s has more",
+					func->name, func->nr_args, event->name);
+				goto err;
+			}
+		}
+
+		*next_arg = farg;
+		next_arg = &(farg->next);
+		free_token(token);
+	}
+
+	type = read_token(&token);
+	*tok = token;
+
+	return type;
+
+err:
+	free_arg(farg);
+	free_token(token);
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	enum tep_event_type type;
+	char *token = NULL;
+
+	/* Handle __builtin_expect( cond, #) */
+	type = process_arg(event, arg, &token);
+
+	if (type != TEP_EVENT_DELIM || token[0] != ',')
+		goto out_free;
+
+	free_token(token);
+
+	/* We don't care what the second parameter is of the __builtin_expect() */
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto out_free;
+
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
+		goto out_free;
+
+	free_token(token);
+	type = read_token_item(tok);
+	return type;
+
+out_free:
+	free_token(token);
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_function(struct tep_event *event, struct tep_print_arg *arg,
+		 char *token, char **tok)
+{
+	struct tep_function_handler *func;
+
+	if (strcmp(token, "__print_flags") == 0) {
+		free_token(token);
+		is_flag_field = 1;
+		return process_flags(event, arg, tok);
+	}
+	if (strcmp(token, "__print_symbolic") == 0) {
+		free_token(token);
+		is_symbolic_field = 1;
+		return process_symbols(event, arg, tok);
+	}
+	if (strcmp(token, "__print_hex") == 0) {
+		free_token(token);
+		return process_hex(event, arg, tok);
+	}
+	if (strcmp(token, "__print_hex_str") == 0) {
+		free_token(token);
+		return process_hex_str(event, arg, tok);
+	}
+	if (strcmp(token, "__print_array") == 0) {
+		free_token(token);
+		return process_int_array(event, arg, tok);
+	}
+	if (strcmp(token, "__get_str") == 0) {
+		free_token(token);
+		return process_str(event, arg, tok);
+	}
+	if (strcmp(token, "__get_bitmask") == 0) {
+		free_token(token);
+		return process_bitmask(event, arg, tok);
+	}
+	if (strcmp(token, "__get_dynamic_array") == 0) {
+		free_token(token);
+		return process_dynamic_array(event, arg, tok);
+	}
+	if (strcmp(token, "__get_dynamic_array_len") == 0) {
+		free_token(token);
+		return process_dynamic_array_len(event, arg, tok);
+	}
+	if (strcmp(token, "__builtin_expect") == 0) {
+		free_token(token);
+		return process_builtin_expect(event, arg, tok);
+	}
+
+	func = find_func_handler(event->tep, token);
+	if (func) {
+		free_token(token);
+		return process_func_handler(event, func, arg, tok);
+	}
+
+	do_warning_event(event, "function %s not defined", token);
+	free_token(token);
+	return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
+process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
+		  char **tok, enum tep_event_type type)
+{
+	char *token;
+	char *atom;
+
+	token = *tok;
+
+	switch (type) {
+	case TEP_EVENT_ITEM:
+		if (strcmp(token, "REC") == 0) {
+			free_token(token);
+			type = process_entry(event, arg, &token);
+			break;
+		}
+		atom = token;
+		/* test the next token */
+		type = read_token_item(&token);
+
+		/*
+		 * If the next token is a parenthesis, then this
+		 * is a function.
+		 */
+		if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) {
+			free_token(token);
+			token = NULL;
+			/* this will free atom. */
+			type = process_function(event, arg, atom, &token);
+			break;
+		}
+		/* atoms can be more than one token long */
+		while (type == TEP_EVENT_ITEM) {
+			int ret;
+
+			ret = append(&atom, " ", token);
+			if (ret < 0) {
+				free(atom);
+				*tok = NULL;
+				free_token(token);
+				return TEP_EVENT_ERROR;
+			}
+			free_token(token);
+			type = read_token_item(&token);
+		}
+
+		arg->type = TEP_PRINT_ATOM;
+		arg->atom.atom = atom;
+		break;
+
+	case TEP_EVENT_DQUOTE:
+	case TEP_EVENT_SQUOTE:
+		arg->type = TEP_PRINT_ATOM;
+		arg->atom.atom = token;
+		type = read_token_item(&token);
+		break;
+	case TEP_EVENT_DELIM:
+		if (strcmp(token, "(") == 0) {
+			free_token(token);
+			type = process_paren(event, arg, &token);
+			break;
+		}
+	case TEP_EVENT_OP:
+		/* handle single ops */
+		arg->type = TEP_PRINT_OP;
+		arg->op.op = token;
+		arg->op.left = NULL;
+		type = process_op(event, arg, &token);
+
+		/* On error, the op is freed */
+		if (type == TEP_EVENT_ERROR)
+			arg->op.op = NULL;
+
+		/* return error type if errored */
+		break;
+
+	case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE:
+	default:
+		do_warning_event(event, "unexpected type %d", type);
+		return TEP_EVENT_ERROR;
+	}
+	*tok = token;
+
+	return type;
+}
+
+static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list)
+{
+	enum tep_event_type type = TEP_EVENT_ERROR;
+	struct tep_print_arg *arg;
+	char *token;
+	int args = 0;
+
+	do {
+		if (type == TEP_EVENT_NEWLINE) {
+			type = read_token_item(&token);
+			continue;
+		}
+
+		arg = alloc_arg();
+		if (!arg) {
+			do_warning_event(event, "%s: not enough memory!",
+					 __func__);
+			return -1;
+		}
+
+		type = process_arg(event, arg, &token);
+
+		if (type == TEP_EVENT_ERROR) {
+			free_token(token);
+			free_arg(arg);
+			return -1;
+		}
+
+		*list = arg;
+		args++;
+
+		if (type == TEP_EVENT_OP) {
+			type = process_op(event, arg, &token);
+			free_token(token);
+			if (type == TEP_EVENT_ERROR) {
+				*list = NULL;
+				free_arg(arg);
+				return -1;
+			}
+			list = &arg->next;
+			continue;
+		}
+
+		if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) {
+			free_token(token);
+			*list = arg;
+			list = &arg->next;
+			continue;
+		}
+		break;
+	} while (type != TEP_EVENT_NONE);
+
+	if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR)
+		free_token(token);
+
+	return args;
+}
+
+static int event_read_print(struct tep_event *event)
+{
+	enum tep_event_type type;
+	char *token;
+	int ret;
+
+	if (read_expected_item(TEP_EVENT_ITEM, "print") < 0)
+		return -1;
+
+	if (read_expected(TEP_EVENT_ITEM, "fmt") < 0)
+		return -1;
+
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
+		return -1;
+
+	if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0)
+		goto fail;
+
+ concat:
+	event->print_fmt.format = token;
+	event->print_fmt.args = NULL;
+
+	/* ok to have no arg */
+	type = read_token_item(&token);
+
+	if (type == TEP_EVENT_NONE)
+		return 0;
+
+	/* Handle concatenation of print lines */
+	if (type == TEP_EVENT_DQUOTE) {
+		char *cat;
+
+		if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0)
+			goto fail;
+		free_token(token);
+		free_token(event->print_fmt.format);
+		event->print_fmt.format = NULL;
+		token = cat;
+		goto concat;
+	}
+			     
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
+		goto fail;
+
+	free_token(token);
+
+	ret = event_read_print_args(event, &event->print_fmt.args);
+	if (ret < 0)
+		return -1;
+
+	return ret;
+
+ fail:
+	free_token(token);
+	return -1;
+}
+
+/**
+ * tep_find_common_field - return a common field by event
+ * @event: handle for the event
+ * @name: the name of the common field to return
+ *
+ * Returns a common field from the event by the given @name.
+ * This only searches the common fields and not all field.
+ */
+struct tep_format_field *
+tep_find_common_field(struct tep_event *event, const char *name)
+{
+	struct tep_format_field *format;
+
+	for (format = event->format.common_fields;
+	     format; format = format->next) {
+		if (strcmp(format->name, name) == 0)
+			break;
+	}
+
+	return format;
+}
+
+/**
+ * tep_find_field - find a non-common field
+ * @event: handle for the event
+ * @name: the name of the non-common field
+ *
+ * Returns a non-common field by the given @name.
+ * This does not search common fields.
+ */
+struct tep_format_field *
+tep_find_field(struct tep_event *event, const char *name)
+{
+	struct tep_format_field *format;
+
+	for (format = event->format.fields;
+	     format; format = format->next) {
+		if (strcmp(format->name, name) == 0)
+			break;
+	}
+
+	return format;
+}
+
+/**
+ * tep_find_any_field - find any field by name
+ * @event: handle for the event
+ * @name: the name of the field
+ *
+ * Returns a field by the given @name.
+ * This searches the common field names first, then
+ * the non-common ones if a common one was not found.
+ */
+struct tep_format_field *
+tep_find_any_field(struct tep_event *event, const char *name)
+{
+	struct tep_format_field *format;
+
+	format = tep_find_common_field(event, name);
+	if (format)
+		return format;
+	return tep_find_field(event, name);
+}
+
+/**
+ * tep_read_number - read a number from data
+ * @tep: a handle to the trace event parser context
+ * @ptr: the raw data
+ * @size: the size of the data that holds the number
+ *
+ * Returns the number (converted to host) from the
+ * raw data.
+ */
+unsigned long long tep_read_number(struct tep_handle *tep,
+				   const void *ptr, int size)
+{
+	unsigned long long val;
+
+	switch (size) {
+	case 1:
+		return *(unsigned char *)ptr;
+	case 2:
+		return data2host2(tep, *(unsigned short *)ptr);
+	case 4:
+		return data2host4(tep, *(unsigned int *)ptr);
+	case 8:
+		memcpy(&val, (ptr), sizeof(unsigned long long));
+		return data2host8(tep, val);
+	default:
+		/* BUG! */
+		return 0;
+	}
+}
+
+/**
+ * tep_read_number_field - read a number from data
+ * @field: a handle to the field
+ * @data: the raw data to read
+ * @value: the value to place the number in
+ *
+ * Reads raw data according to a field offset and size,
+ * and translates it into @value.
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+int tep_read_number_field(struct tep_format_field *field, const void *data,
+			  unsigned long long *value)
+{
+	if (!field)
+		return -1;
+	switch (field->size) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		*value = tep_read_number(field->event->tep,
+					 data + field->offset, field->size);
+		return 0;
+	default:
+		return -1;
+	}
+}
+
+static int get_common_info(struct tep_handle *tep,
+			   const char *type, int *offset, int *size)
+{
+	struct tep_event *event;
+	struct tep_format_field *field;
+
+	/*
+	 * All events should have the same common elements.
+	 * Pick any event to find where the type is;
+	 */
+	if (!tep->events) {
+		do_warning("no event_list!");
+		return -1;
+	}
+
+	event = tep->events[0];
+	field = tep_find_common_field(event, type);
+	if (!field)
+		return -1;
+
+	*offset = field->offset;
+	*size = field->size;
+
+	return 0;
+}
+
+static int __parse_common(struct tep_handle *tep, void *data,
+			  int *size, int *offset, const char *name)
+{
+	int ret;
+
+	if (!*size) {
+		ret = get_common_info(tep, name, offset, size);
+		if (ret < 0)
+			return ret;
+	}
+	return tep_read_number(tep, data + *offset, *size);
+}
+
+static int trace_parse_common_type(struct tep_handle *tep, void *data)
+{
+	return __parse_common(tep, data,
+			      &tep->type_size, &tep->type_offset,
+			      "common_type");
+}
+
+static int parse_common_pid(struct tep_handle *tep, void *data)
+{
+	return __parse_common(tep, data,
+			      &tep->pid_size, &tep->pid_offset,
+			      "common_pid");
+}
+
+static int parse_common_pc(struct tep_handle *tep, void *data)
+{
+	return __parse_common(tep, data,
+			      &tep->pc_size, &tep->pc_offset,
+			      "common_preempt_count");
+}
+
+static int parse_common_flags(struct tep_handle *tep, void *data)
+{
+	return __parse_common(tep, data,
+			      &tep->flags_size, &tep->flags_offset,
+			      "common_flags");
+}
+
+static int parse_common_lock_depth(struct tep_handle *tep, void *data)
+{
+	return __parse_common(tep, data,
+			      &tep->ld_size, &tep->ld_offset,
+			      "common_lock_depth");
+}
+
+static int parse_common_migrate_disable(struct tep_handle *tep, void *data)
+{
+	return __parse_common(tep, data,
+			      &tep->ld_size, &tep->ld_offset,
+			      "common_migrate_disable");
+}
+
+static int events_id_cmp(const void *a, const void *b);
+
+/**
+ * tep_find_event - find an event by given id
+ * @tep: a handle to the trace event parser context
+ * @id: the id of the event
+ *
+ * Returns an event that has a given @id.
+ */
+struct tep_event *tep_find_event(struct tep_handle *tep, int id)
+{
+	struct tep_event **eventptr;
+	struct tep_event key;
+	struct tep_event *pkey = &key;
+
+	/* Check cache first */
+	if (tep->last_event && tep->last_event->id == id)
+		return tep->last_event;
+
+	key.id = id;
+
+	eventptr = bsearch(&pkey, tep->events, tep->nr_events,
+			   sizeof(*tep->events), events_id_cmp);
+
+	if (eventptr) {
+		tep->last_event = *eventptr;
+		return *eventptr;
+	}
+
+	return NULL;
+}
+
+/**
+ * tep_find_event_by_name - find an event by given name
+ * @tep: a handle to the trace event parser context
+ * @sys: the system name to search for
+ * @name: the name of the event to search for
+ *
+ * This returns an event with a given @name and under the system
+ * @sys. If @sys is NULL the first event with @name is returned.
+ */
+struct tep_event *
+tep_find_event_by_name(struct tep_handle *tep,
+		       const char *sys, const char *name)
+{
+	struct tep_event *event = NULL;
+	int i;
+
+	if (tep->last_event &&
+	    strcmp(tep->last_event->name, name) == 0 &&
+	    (!sys || strcmp(tep->last_event->system, sys) == 0))
+		return tep->last_event;
+
+	for (i = 0; i < tep->nr_events; i++) {
+		event = tep->events[i];
+		if (strcmp(event->name, name) == 0) {
+			if (!sys)
+				break;
+			if (strcmp(event->system, sys) == 0)
+				break;
+		}
+	}
+	if (i == tep->nr_events)
+		event = NULL;
+
+	tep->last_event = event;
+	return event;
+}
+
+static unsigned long long
+eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg)
+{
+	struct tep_handle *tep = event->tep;
+	unsigned long long val = 0;
+	unsigned long long left, right;
+	struct tep_print_arg *typearg = NULL;
+	struct tep_print_arg *larg;
+	unsigned long offset;
+	unsigned int field_size;
+
+	switch (arg->type) {
+	case TEP_PRINT_NULL:
+		/* ?? */
+		return 0;
+	case TEP_PRINT_ATOM:
+		return strtoull(arg->atom.atom, NULL, 0);
+	case TEP_PRINT_FIELD:
+		if (!arg->field.field) {
+			arg->field.field = tep_find_any_field(event, arg->field.name);
+			if (!arg->field.field)
+				goto out_warning_field;
+			
+		}
+		/* must be a number */
+		val = tep_read_number(tep, data + arg->field.field->offset,
+				      arg->field.field->size);
+		break;
+	case TEP_PRINT_FLAGS:
+	case TEP_PRINT_SYMBOL:
+	case TEP_PRINT_INT_ARRAY:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
+		break;
+	case TEP_PRINT_TYPE:
+		val = eval_num_arg(data, size, event, arg->typecast.item);
+		return eval_type(val, arg, 0);
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
+		return 0;
+	case TEP_PRINT_FUNC: {
+		struct trace_seq s;
+		trace_seq_init(&s);
+		val = process_defined_func(&s, data, size, event, arg);
+		trace_seq_destroy(&s);
+		return val;
+	}
+	case TEP_PRINT_OP:
+		if (strcmp(arg->op.op, "[") == 0) {
+			/*
+			 * Arrays are special, since we don't want
+			 * to read the arg as is.
+			 */
+			right = eval_num_arg(data, size, event, arg->op.right);
+
+			/* handle typecasts */
+			larg = arg->op.left;
+			while (larg->type == TEP_PRINT_TYPE) {
+				if (!typearg)
+					typearg = larg;
+				larg = larg->typecast.item;
+			}
+
+			/* Default to long size */
+			field_size = tep->long_size;
+
+			switch (larg->type) {
+			case TEP_PRINT_DYNAMIC_ARRAY:
+				offset = tep_read_number(tep,
+						   data + larg->dynarray.field->offset,
+						   larg->dynarray.field->size);
+				if (larg->dynarray.field->elementsize)
+					field_size = larg->dynarray.field->elementsize;
+				/*
+				 * The actual length of the dynamic array is stored
+				 * in the top half of the field, and the offset
+				 * is in the bottom half of the 32 bit field.
+				 */
+				offset &= 0xffff;
+				offset += right;
+				break;
+			case TEP_PRINT_FIELD:
+				if (!larg->field.field) {
+					larg->field.field =
+						tep_find_any_field(event, larg->field.name);
+					if (!larg->field.field) {
+						arg = larg;
+						goto out_warning_field;
+					}
+				}
+				field_size = larg->field.field->elementsize;
+				offset = larg->field.field->offset +
+					right * larg->field.field->elementsize;
+				break;
+			default:
+				goto default_op; /* oops, all bets off */
+			}
+			val = tep_read_number(tep,
+					      data + offset, field_size);
+			if (typearg)
+				val = eval_type(val, typearg, 1);
+			break;
+		} else if (strcmp(arg->op.op, "?") == 0) {
+			left = eval_num_arg(data, size, event, arg->op.left);
+			arg = arg->op.right;
+			if (left)
+				val = eval_num_arg(data, size, event, arg->op.left);
+			else
+				val = eval_num_arg(data, size, event, arg->op.right);
+			break;
+		}
+ default_op:
+		left = eval_num_arg(data, size, event, arg->op.left);
+		right = eval_num_arg(data, size, event, arg->op.right);
+		switch (arg->op.op[0]) {
+		case '!':
+			switch (arg->op.op[1]) {
+			case 0:
+				val = !right;
+				break;
+			case '=':
+				val = left != right;
+				break;
+			default:
+				goto out_warning_op;
+			}
+			break;
+		case '~':
+			val = ~right;
+			break;
+		case '|':
+			if (arg->op.op[1])
+				val = left || right;
+			else
+				val = left | right;
+			break;
+		case '&':
+			if (arg->op.op[1])
+				val = left && right;
+			else
+				val = left & right;
+			break;
+		case '<':
+			switch (arg->op.op[1]) {
+			case 0:
+				val = left < right;
+				break;
+			case '<':
+				val = left << right;
+				break;
+			case '=':
+				val = left <= right;
+				break;
+			default:
+				goto out_warning_op;
+			}
+			break;
+		case '>':
+			switch (arg->op.op[1]) {
+			case 0:
+				val = left > right;
+				break;
+			case '>':
+				val = left >> right;
+				break;
+			case '=':
+				val = left >= right;
+				break;
+			default:
+				goto out_warning_op;
+			}
+			break;
+		case '=':
+			if (arg->op.op[1] != '=')
+				goto out_warning_op;
+
+			val = left == right;
+			break;
+		case '-':
+			val = left - right;
+			break;
+		case '+':
+			val = left + right;
+			break;
+		case '/':
+			val = left / right;
+			break;
+		case '%':
+			val = left % right;
+			break;
+		case '*':
+			val = left * right;
+			break;
+		default:
+			goto out_warning_op;
+		}
+		break;
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+		offset = tep_read_number(tep,
+					 data + arg->dynarray.field->offset,
+					 arg->dynarray.field->size);
+		/*
+		 * The total allocated length of the dynamic array is
+		 * stored in the top half of the field, and the offset
+		 * is in the bottom half of the 32 bit field.
+		 */
+		val = (unsigned long long)(offset >> 16);
+		break;
+	case TEP_PRINT_DYNAMIC_ARRAY:
+		/* Without [], we pass the address to the dynamic data */
+		offset = tep_read_number(tep,
+					 data + arg->dynarray.field->offset,
+					 arg->dynarray.field->size);
+		/*
+		 * The total allocated length of the dynamic array is
+		 * stored in the top half of the field, and the offset
+		 * is in the bottom half of the 32 bit field.
+		 */
+		offset &= 0xffff;
+		val = (unsigned long long)((unsigned long)data + offset);
+		break;
+	default: /* not sure what to do there */
+		return 0;
+	}
+	return val;
+
+out_warning_op:
+	do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op);
+	return 0;
+
+out_warning_field:
+	do_warning_event(event, "%s: field %s not found",
+			 __func__, arg->field.name);
+	return 0;
+}
+
+struct flag {
+	const char *name;
+	unsigned long long value;
+};
+
+static const struct flag flags[] = {
+	{ "HI_SOFTIRQ", 0 },
+	{ "TIMER_SOFTIRQ", 1 },
+	{ "NET_TX_SOFTIRQ", 2 },
+	{ "NET_RX_SOFTIRQ", 3 },
+	{ "BLOCK_SOFTIRQ", 4 },
+	{ "IRQ_POLL_SOFTIRQ", 5 },
+	{ "TASKLET_SOFTIRQ", 6 },
+	{ "SCHED_SOFTIRQ", 7 },
+	{ "HRTIMER_SOFTIRQ", 8 },
+	{ "RCU_SOFTIRQ", 9 },
+
+	{ "HRTIMER_NORESTART", 0 },
+	{ "HRTIMER_RESTART", 1 },
+};
+
+static long long eval_flag(const char *flag)
+{
+	int i;
+
+	/*
+	 * Some flags in the format files do not get converted.
+	 * If the flag is not numeric, see if it is something that
+	 * we already know about.
+	 */
+	if (isdigit(flag[0]))
+		return strtoull(flag, NULL, 0);
+
+	for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
+		if (strcmp(flags[i].name, flag) == 0)
+			return flags[i].value;
+
+	return -1LL;
+}
+
+static void print_str_to_seq(struct trace_seq *s, const char *format,
+			     int len_arg, const char *str)
+{
+	if (len_arg >= 0)
+		trace_seq_printf(s, format, len_arg, str);
+	else
+		trace_seq_printf(s, format, str);
+}
+
+static void print_bitmask_to_seq(struct tep_handle *tep,
+				 struct trace_seq *s, const char *format,
+				 int len_arg, const void *data, int size)
+{
+	int nr_bits = size * 8;
+	int str_size = (nr_bits + 3) / 4;
+	int len = 0;
+	char buf[3];
+	char *str;
+	int index;
+	int i;
+
+	/*
+	 * The kernel likes to put in commas every 32 bits, we
+	 * can do the same.
+	 */
+	str_size += (nr_bits - 1) / 32;
+
+	str = malloc(str_size + 1);
+	if (!str) {
+		do_warning("%s: not enough memory!", __func__);
+		return;
+	}
+	str[str_size] = 0;
+
+	/* Start out with -2 for the two chars per byte */
+	for (i = str_size - 2; i >= 0; i -= 2) {
+		/*
+		 * data points to a bit mask of size bytes.
+		 * In the kernel, this is an array of long words, thus
+		 * endianness is very important.
+		 */
+		if (tep->file_bigendian)
+			index = size - (len + 1);
+		else
+			index = len;
+
+		snprintf(buf, 3, "%02x", *((unsigned char *)data + index));
+		memcpy(str + i, buf, 2);
+		len++;
+		if (!(len & 3) && i > 0) {
+			i--;
+			str[i] = ',';
+		}
+	}
+
+	if (len_arg >= 0)
+		trace_seq_printf(s, format, len_arg, str);
+	else
+		trace_seq_printf(s, format, str);
+
+	free(str);
+}
+
+static void print_str_arg(struct trace_seq *s, void *data, int size,
+			  struct tep_event *event, const char *format,
+			  int len_arg, struct tep_print_arg *arg)
+{
+	struct tep_handle *tep = event->tep;
+	struct tep_print_flag_sym *flag;
+	struct tep_format_field *field;
+	struct printk_map *printk;
+	long long val, fval;
+	unsigned long long addr;
+	char *str;
+	unsigned char *hex;
+	int print;
+	int i, len;
+
+	switch (arg->type) {
+	case TEP_PRINT_NULL:
+		/* ?? */
+		return;
+	case TEP_PRINT_ATOM:
+		print_str_to_seq(s, format, len_arg, arg->atom.atom);
+		return;
+	case TEP_PRINT_FIELD:
+		field = arg->field.field;
+		if (!field) {
+			field = tep_find_any_field(event, arg->field.name);
+			if (!field) {
+				str = arg->field.name;
+				goto out_warning_field;
+			}
+			arg->field.field = field;
+		}
+		/* Zero sized fields, mean the rest of the data */
+		len = field->size ? : size - field->offset;
+
+		/*
+		 * Some events pass in pointers. If this is not an array
+		 * and the size is the same as long_size, assume that it
+		 * is a pointer.
+		 */
+		if (!(field->flags & TEP_FIELD_IS_ARRAY) &&
+		    field->size == tep->long_size) {
+
+			/* Handle heterogeneous recording and processing
+			 * architectures
+			 *
+			 * CASE I:
+			 * Traces recorded on 32-bit devices (32-bit
+			 * addressing) and processed on 64-bit devices:
+			 * In this case, only 32 bits should be read.
+			 *
+			 * CASE II:
+			 * Traces recorded on 64 bit devices and processed
+			 * on 32-bit devices:
+			 * In this case, 64 bits must be read.
+			 */
+			addr = (tep->long_size == 8) ?
+				*(unsigned long long *)(data + field->offset) :
+				(unsigned long long)*(unsigned int *)(data + field->offset);
+
+			/* Check if it matches a print format */
+			printk = find_printk(tep, addr);
+			if (printk)
+				trace_seq_puts(s, printk->printk);
+			else
+				trace_seq_printf(s, "%llx", addr);
+			break;
+		}
+		str = malloc(len + 1);
+		if (!str) {
+			do_warning_event(event, "%s: not enough memory!",
+					 __func__);
+			return;
+		}
+		memcpy(str, data + field->offset, len);
+		str[len] = 0;
+		print_str_to_seq(s, format, len_arg, str);
+		free(str);
+		break;
+	case TEP_PRINT_FLAGS:
+		val = eval_num_arg(data, size, event, arg->flags.field);
+		print = 0;
+		for (flag = arg->flags.flags; flag; flag = flag->next) {
+			fval = eval_flag(flag->value);
+			if (!val && fval < 0) {
+				print_str_to_seq(s, format, len_arg, flag->str);
+				break;
+			}
+			if (fval > 0 && (val & fval) == fval) {
+				if (print && arg->flags.delim)
+					trace_seq_puts(s, arg->flags.delim);
+				print_str_to_seq(s, format, len_arg, flag->str);
+				print = 1;
+				val &= ~fval;
+			}
+		}
+		if (val) {
+			if (print && arg->flags.delim)
+				trace_seq_puts(s, arg->flags.delim);
+			trace_seq_printf(s, "0x%llx", val);
+		}
+		break;
+	case TEP_PRINT_SYMBOL:
+		val = eval_num_arg(data, size, event, arg->symbol.field);
+		for (flag = arg->symbol.symbols; flag; flag = flag->next) {
+			fval = eval_flag(flag->value);
+			if (val == fval) {
+				print_str_to_seq(s, format, len_arg, flag->str);
+				break;
+			}
+		}
+		if (!flag)
+			trace_seq_printf(s, "0x%llx", val);
+		break;
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
+		if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
+			unsigned long offset;
+			offset = tep_read_number(tep,
+				data + arg->hex.field->dynarray.field->offset,
+				arg->hex.field->dynarray.field->size);
+			hex = data + (offset & 0xffff);
+		} else {
+			field = arg->hex.field->field.field;
+			if (!field) {
+				str = arg->hex.field->field.name;
+				field = tep_find_any_field(event, str);
+				if (!field)
+					goto out_warning_field;
+				arg->hex.field->field.field = field;
+			}
+			hex = data + field->offset;
+		}
+		len = eval_num_arg(data, size, event, arg->hex.size);
+		for (i = 0; i < len; i++) {
+			if (i && arg->type == TEP_PRINT_HEX)
+				trace_seq_putc(s, ' ');
+			trace_seq_printf(s, "%02x", hex[i]);
+		}
+		break;
+
+	case TEP_PRINT_INT_ARRAY: {
+		void *num;
+		int el_size;
+
+		if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
+			unsigned long offset;
+			struct tep_format_field *field =
+				arg->int_array.field->dynarray.field;
+			offset = tep_read_number(tep,
+						 data + field->offset,
+						 field->size);
+			num = data + (offset & 0xffff);
+		} else {
+			field = arg->int_array.field->field.field;
+			if (!field) {
+				str = arg->int_array.field->field.name;
+				field = tep_find_any_field(event, str);
+				if (!field)
+					goto out_warning_field;
+				arg->int_array.field->field.field = field;
+			}
+			num = data + field->offset;
+		}
+		len = eval_num_arg(data, size, event, arg->int_array.count);
+		el_size = eval_num_arg(data, size, event,
+				       arg->int_array.el_size);
+		for (i = 0; i < len; i++) {
+			if (i)
+				trace_seq_putc(s, ' ');
+
+			if (el_size == 1) {
+				trace_seq_printf(s, "%u", *(uint8_t *)num);
+			} else if (el_size == 2) {
+				trace_seq_printf(s, "%u", *(uint16_t *)num);
+			} else if (el_size == 4) {
+				trace_seq_printf(s, "%u", *(uint32_t *)num);
+			} else if (el_size == 8) {
+				trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
+			} else {
+				trace_seq_printf(s, "BAD SIZE:%d 0x%x",
+						 el_size, *(uint8_t *)num);
+				el_size = 1;
+			}
+
+			num += el_size;
+		}
+		break;
+	}
+	case TEP_PRINT_TYPE:
+		break;
+	case TEP_PRINT_STRING: {
+		int str_offset;
+
+		if (arg->string.offset == -1) {
+			struct tep_format_field *f;
+
+			f = tep_find_any_field(event, arg->string.string);
+			arg->string.offset = f->offset;
+		}
+		str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset));
+		str_offset &= 0xffff;
+		print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
+		break;
+	}
+	case TEP_PRINT_BSTRING:
+		print_str_to_seq(s, format, len_arg, arg->string.string);
+		break;
+	case TEP_PRINT_BITMASK: {
+		int bitmask_offset;
+		int bitmask_size;
+
+		if (arg->bitmask.offset == -1) {
+			struct tep_format_field *f;
+
+			f = tep_find_any_field(event, arg->bitmask.bitmask);
+			arg->bitmask.offset = f->offset;
+		}
+		bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
+		bitmask_size = bitmask_offset >> 16;
+		bitmask_offset &= 0xffff;
+		print_bitmask_to_seq(tep, s, format, len_arg,
+				     data + bitmask_offset, bitmask_size);
+		break;
+	}
+	case TEP_PRINT_OP:
+		/*
+		 * The only op for string should be ? :
+		 */
+		if (arg->op.op[0] != '?')
+			return;
+		val = eval_num_arg(data, size, event, arg->op.left);
+		if (val)
+			print_str_arg(s, data, size, event,
+				      format, len_arg, arg->op.right->op.left);
+		else
+			print_str_arg(s, data, size, event,
+				      format, len_arg, arg->op.right->op.right);
+		break;
+	case TEP_PRINT_FUNC:
+		process_defined_func(s, data, size, event, arg);
+		break;
+	default:
+		/* well... */
+		break;
+	}
+
+	return;
+
+out_warning_field:
+	do_warning_event(event, "%s: field %s not found",
+			 __func__, arg->field.name);
+}
+
+static unsigned long long
+process_defined_func(struct trace_seq *s, void *data, int size,
+		     struct tep_event *event, struct tep_print_arg *arg)
+{
+	struct tep_function_handler *func_handle = arg->func.func;
+	struct func_params *param;
+	unsigned long long *args;
+	unsigned long long ret;
+	struct tep_print_arg *farg;
+	struct trace_seq str;
+	struct save_str {
+		struct save_str *next;
+		char *str;
+	} *strings = NULL, *string;
+	int i;
+
+	if (!func_handle->nr_args) {
+		ret = (*func_handle->func)(s, NULL);
+		goto out;
+	}
+
+	farg = arg->func.args;
+	param = func_handle->params;
+
+	ret = ULLONG_MAX;
+	args = malloc(sizeof(*args) * func_handle->nr_args);
+	if (!args)
+		goto out;
+
+	for (i = 0; i < func_handle->nr_args; i++) {
+		switch (param->type) {
+		case TEP_FUNC_ARG_INT:
+		case TEP_FUNC_ARG_LONG:
+		case TEP_FUNC_ARG_PTR:
+			args[i] = eval_num_arg(data, size, event, farg);
+			break;
+		case TEP_FUNC_ARG_STRING:
+			trace_seq_init(&str);
+			print_str_arg(&str, data, size, event, "%s", -1, farg);
+			trace_seq_terminate(&str);
+			string = malloc(sizeof(*string));
+			if (!string) {
+				do_warning_event(event, "%s(%d): malloc str",
+						 __func__, __LINE__);
+				goto out_free;
+			}
+			string->next = strings;
+			string->str = strdup(str.buffer);
+			if (!string->str) {
+				free(string);
+				do_warning_event(event, "%s(%d): malloc str",
+						 __func__, __LINE__);
+				goto out_free;
+			}
+			args[i] = (uintptr_t)string->str;
+			strings = string;
+			trace_seq_destroy(&str);
+			break;
+		default:
+			/*
+			 * Something went totally wrong, this is not
+			 * an input error, something in this code broke.
+			 */
+			do_warning_event(event, "Unexpected end of arguments\n");
+			goto out_free;
+		}
+		farg = farg->next;
+		param = param->next;
+	}
+
+	ret = (*func_handle->func)(s, args);
+out_free:
+	free(args);
+	while (strings) {
+		string = strings;
+		strings = string->next;
+		free(string->str);
+		free(string);
+	}
+
+ out:
+	/* TBD : handle return type here */
+	return ret;
+}
+
+static void free_args(struct tep_print_arg *args)
+{
+	struct tep_print_arg *next;
+
+	while (args) {
+		next = args->next;
+
+		free_arg(args);
+		args = next;
+	}
+}
+
+static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event)
+{
+	struct tep_handle *tep = event->tep;
+	struct tep_format_field *field, *ip_field;
+	struct tep_print_arg *args, *arg, **next;
+	unsigned long long ip, val;
+	char *ptr;
+	void *bptr;
+	int vsize = 0;
+
+	field = tep->bprint_buf_field;
+	ip_field = tep->bprint_ip_field;
+
+	if (!field) {
+		field = tep_find_field(event, "buf");
+		if (!field) {
+			do_warning_event(event, "can't find buffer field for binary printk");
+			return NULL;
+		}
+		ip_field = tep_find_field(event, "ip");
+		if (!ip_field) {
+			do_warning_event(event, "can't find ip field for binary printk");
+			return NULL;
+		}
+		tep->bprint_buf_field = field;
+		tep->bprint_ip_field = ip_field;
+	}
+
+	ip = tep_read_number(tep, data + ip_field->offset, ip_field->size);
+
+	/*
+	 * The first arg is the IP pointer.
+	 */
+	args = alloc_arg();
+	if (!args) {
+		do_warning_event(event, "%s(%d): not enough memory!",
+				 __func__, __LINE__);
+		return NULL;
+	}
+	arg = args;
+	arg->next = NULL;
+	next = &arg->next;
+
+	arg->type = TEP_PRINT_ATOM;
+		
+	if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
+		goto out_free;
+
+	/* skip the first "%ps: " */
+	for (ptr = fmt + 5, bptr = data + field->offset;
+	     bptr < data + size && *ptr; ptr++) {
+		int ls = 0;
+
+		if (*ptr == '%') {
+ process_again:
+			ptr++;
+			switch (*ptr) {
+			case '%':
+				break;
+			case 'l':
+				ls++;
+				goto process_again;
+			case 'L':
+				ls = 2;
+				goto process_again;
+			case '0' ... '9':
+				goto process_again;
+			case '.':
+				goto process_again;
+			case 'z':
+			case 'Z':
+				ls = 1;
+				goto process_again;
+			case 'p':
+				ls = 1;
+				if (isalnum(ptr[1])) {
+					ptr++;
+					/* Check for special pointers */
+					switch (*ptr) {
+					case 's':
+					case 'S':
+					case 'x':
+						break;
+					case 'f':
+					case 'F':
+						/*
+						 * Pre-5.5 kernels use %pf and
+						 * %pF for printing symbols
+						 * while kernels since 5.5 use
+						 * %pfw for fwnodes. So check
+						 * %p[fF] isn't followed by 'w'.
+						 */
+						if (ptr[1] != 'w')
+							break;
+						/* fall through */
+					default:
+						/*
+						 * Older kernels do not process
+						 * dereferenced pointers.
+						 * Only process if the pointer
+						 * value is a printable.
+						 */
+						if (isprint(*(char *)bptr))
+							goto process_string;
+					}
+				}
+				/* fall through */
+			case 'd':
+			case 'u':
+			case 'i':
+			case 'x':
+			case 'X':
+			case 'o':
+				switch (ls) {
+				case 0:
+					vsize = 4;
+					break;
+				case 1:
+					vsize = tep->long_size;
+					break;
+				case 2:
+					vsize = 8;
+					break;
+				default:
+					vsize = ls; /* ? */
+					break;
+				}
+			/* fall through */
+			case '*':
+				if (*ptr == '*')
+					vsize = 4;
+
+				/* the pointers are always 4 bytes aligned */
+				bptr = (void *)(((unsigned long)bptr + 3) &
+						~3);
+				val = tep_read_number(tep, bptr, vsize);
+				bptr += vsize;
+				arg = alloc_arg();
+				if (!arg) {
+					do_warning_event(event, "%s(%d): not enough memory!",
+						   __func__, __LINE__);
+					goto out_free;
+				}
+				arg->next = NULL;
+				arg->type = TEP_PRINT_ATOM;
+				if (asprintf(&arg->atom.atom, "%lld", val) < 0) {
+					free(arg);
+					goto out_free;
+				}
+				*next = arg;
+				next = &arg->next;
+				/*
+				 * The '*' case means that an arg is used as the length.
+				 * We need to continue to figure out for what.
+				 */
+				if (*ptr == '*')
+					goto process_again;
+
+				break;
+			case 's':
+ process_string:
+				arg = alloc_arg();
+				if (!arg) {
+					do_warning_event(event, "%s(%d): not enough memory!",
+						   __func__, __LINE__);
+					goto out_free;
+				}
+				arg->next = NULL;
+				arg->type = TEP_PRINT_BSTRING;
+				arg->string.string = strdup(bptr);
+				if (!arg->string.string)
+					goto out_free;
+				bptr += strlen(bptr) + 1;
+				*next = arg;
+				next = &arg->next;
+			default:
+				break;
+			}
+		}
+	}
+
+	return args;
+
+out_free:
+	free_args(args);
+	return NULL;
+}
+
+static char *
+get_bprint_format(void *data, int size __maybe_unused,
+		  struct tep_event *event)
+{
+	struct tep_handle *tep = event->tep;
+	unsigned long long addr;
+	struct tep_format_field *field;
+	struct printk_map *printk;
+	char *format;
+
+	field = tep->bprint_fmt_field;
+
+	if (!field) {
+		field = tep_find_field(event, "fmt");
+		if (!field) {
+			do_warning_event(event, "can't find format field for binary printk");
+			return NULL;
+		}
+		tep->bprint_fmt_field = field;
+	}
+
+	addr = tep_read_number(tep, data + field->offset, field->size);
+
+	printk = find_printk(tep, addr);
+	if (!printk) {
+		if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0)
+			return NULL;
+		return format;
+	}
+
+	if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0)
+		return NULL;
+
+	return format;
+}
+
+static int print_mac_arg(struct trace_seq *s, const char *format,
+			 void *data, int size, struct tep_event *event,
+			 struct tep_print_arg *arg)
+{
+	const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+	bool reverse = false;
+	unsigned char *buf;
+	int ret = 0;
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return 0;
+	}
+
+	if (arg->type != TEP_PRINT_FIELD) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
+				 arg->type);
+		return 0;
+	}
+
+	if (format[0] == 'm') {
+		fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
+	} else if (format[0] == 'M' && format[1] == 'F') {
+		fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x";
+		ret++;
+	}
+	if (format[1] == 'R') {
+		reverse = true;
+		ret++;
+	}
+
+	if (!arg->field.field) {
+		arg->field.field =
+			tep_find_any_field(event, arg->field.name);
+		if (!arg->field.field) {
+			do_warning_event(event, "%s: field %s not found",
+					 __func__, arg->field.name);
+			return ret;
+		}
+	}
+	if (arg->field.field->size != 6) {
+		trace_seq_printf(s, "INVALIDMAC");
+		return ret;
+	}
+
+	buf = data + arg->field.field->offset;
+	if (reverse)
+		trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]);
+	else
+		trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+
+	return ret;
+}
+
+static int parse_ip4_print_args(struct tep_handle *tep,
+				const char *ptr, bool *reverse)
+{
+	int ret = 0;
+
+	*reverse = false;
+
+	/* hnbl */
+	switch (*ptr) {
+	case 'h':
+		if (tep->file_bigendian)
+			*reverse = false;
+		else
+			*reverse = true;
+		ret++;
+		break;
+	case 'l':
+		*reverse = true;
+		ret++;
+		break;
+	case 'n':
+	case 'b':
+		ret++;
+		/* fall through */
+	default:
+		*reverse = false;
+		break;
+	}
+
+	return ret;
+}
+
+static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf)
+{
+	const char *fmt;
+
+	if (i == 'i')
+		fmt = "%03d.%03d.%03d.%03d";
+	else
+		fmt = "%d.%d.%d.%d";
+
+	if (reverse)
+		trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]);
+	else
+		trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+
+}
+
+static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
+{
+	return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) |
+		(unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL;
+}
+
+static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr)
+{
+	return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
+}
+
+static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
+{
+	int i, j, range;
+	unsigned char zerolength[8];
+	int longest = 1;
+	int colonpos = -1;
+	uint16_t word;
+	uint8_t hi, lo;
+	bool needcolon = false;
+	bool useIPv4;
+	struct in6_addr in6;
+
+	memcpy(&in6, addr, sizeof(struct in6_addr));
+
+	useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
+
+	memset(zerolength, 0, sizeof(zerolength));
+
+	if (useIPv4)
+		range = 6;
+	else
+		range = 8;
+
+	/* find position of longest 0 run */
+	for (i = 0; i < range; i++) {
+		for (j = i; j < range; j++) {
+			if (in6.s6_addr16[j] != 0)
+				break;
+			zerolength[i]++;
+		}
+	}
+	for (i = 0; i < range; i++) {
+		if (zerolength[i] > longest) {
+			longest = zerolength[i];
+			colonpos = i;
+		}
+	}
+	if (longest == 1)		/* don't compress a single 0 */
+		colonpos = -1;
+
+	/* emit address */
+	for (i = 0; i < range; i++) {
+		if (i == colonpos) {
+			if (needcolon || i == 0)
+				trace_seq_printf(s, ":");
+			trace_seq_printf(s, ":");
+			needcolon = false;
+			i += longest - 1;
+			continue;
+		}
+		if (needcolon) {
+			trace_seq_printf(s, ":");
+			needcolon = false;
+		}
+		/* hex u16 without leading 0s */
+		word = ntohs(in6.s6_addr16[i]);
+		hi = word >> 8;
+		lo = word & 0xff;
+		if (hi)
+			trace_seq_printf(s, "%x%02x", hi, lo);
+		else
+			trace_seq_printf(s, "%x", lo);
+
+		needcolon = true;
+	}
+
+	if (useIPv4) {
+		if (needcolon)
+			trace_seq_printf(s, ":");
+		print_ip4_addr(s, 'I', false, &in6.s6_addr[12]);
+	}
+
+	return;
+}
+
+static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf)
+{
+	int j;
+
+	for (j = 0; j < 16; j += 2) {
+		trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]);
+		if (i == 'I' && j < 14)
+			trace_seq_printf(s, ":");
+	}
+}
+
+/*
+ * %pi4   print an IPv4 address with leading zeros
+ * %pI4   print an IPv4 address without leading zeros
+ * %pi6   print an IPv6 address without colons
+ * %pI6   print an IPv6 address with colons
+ * %pI6c  print an IPv6 address in compressed form with colons
+ * %pISpc print an IP address based on sockaddr; p adds port.
+ */
+static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
+			  void *data, int size, struct tep_event *event,
+			  struct tep_print_arg *arg)
+{
+	bool reverse = false;
+	unsigned char *buf;
+	int ret;
+
+	ret = parse_ip4_print_args(event->tep, ptr, &reverse);
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return ret;
+	}
+
+	if (arg->type != TEP_PRINT_FIELD) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+		return ret;
+	}
+
+	if (!arg->field.field) {
+		arg->field.field =
+			tep_find_any_field(event, arg->field.name);
+		if (!arg->field.field) {
+			do_warning("%s: field %s not found",
+				   __func__, arg->field.name);
+			return ret;
+		}
+	}
+
+	buf = data + arg->field.field->offset;
+
+	if (arg->field.field->size != 4) {
+		trace_seq_printf(s, "INVALIDIPv4");
+		return ret;
+	}
+
+	print_ip4_addr(s, i, reverse, buf);
+	return ret;
+
+}
+
+static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
+			  void *data, int size, struct tep_event *event,
+			  struct tep_print_arg *arg)
+{
+	char have_c = 0;
+	unsigned char *buf;
+	int rc = 0;
+
+	/* pI6c */
+	if (i == 'I' && *ptr == 'c') {
+		have_c = 1;
+		ptr++;
+		rc++;
+	}
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return rc;
+	}
+
+	if (arg->type != TEP_PRINT_FIELD) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+		return rc;
+	}
+
+	if (!arg->field.field) {
+		arg->field.field =
+			tep_find_any_field(event, arg->field.name);
+		if (!arg->field.field) {
+			do_warning("%s: field %s not found",
+				   __func__, arg->field.name);
+			return rc;
+		}
+	}
+
+	buf = data + arg->field.field->offset;
+
+	if (arg->field.field->size != 16) {
+		trace_seq_printf(s, "INVALIDIPv6");
+		return rc;
+	}
+
+	if (have_c)
+		print_ip6c_addr(s, buf);
+	else
+		print_ip6_addr(s, i, buf);
+
+	return rc;
+}
+
+static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
+			  void *data, int size, struct tep_event *event,
+			  struct tep_print_arg *arg)
+{
+	char have_c = 0, have_p = 0;
+	unsigned char *buf;
+	struct sockaddr_storage *sa;
+	bool reverse = false;
+	int rc = 0;
+	int ret;
+
+	/* pISpc */
+	if (i == 'I') {
+		if (*ptr == 'p') {
+			have_p = 1;
+			ptr++;
+			rc++;
+		}
+		if (*ptr == 'c') {
+			have_c = 1;
+			ptr++;
+			rc++;
+		}
+	}
+	ret = parse_ip4_print_args(event->tep, ptr, &reverse);
+	ptr += ret;
+	rc += ret;
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return rc;
+	}
+
+	if (arg->type != TEP_PRINT_FIELD) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+		return rc;
+	}
+
+	if (!arg->field.field) {
+		arg->field.field =
+			tep_find_any_field(event, arg->field.name);
+		if (!arg->field.field) {
+			do_warning("%s: field %s not found",
+				   __func__, arg->field.name);
+			return rc;
+		}
+	}
+
+	sa = (struct sockaddr_storage *) (data + arg->field.field->offset);
+
+	if (sa->ss_family == AF_INET) {
+		struct sockaddr_in *sa4 = (struct sockaddr_in *) sa;
+
+		if (arg->field.field->size < sizeof(struct sockaddr_in)) {
+			trace_seq_printf(s, "INVALIDIPv4");
+			return rc;
+		}
+
+		print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr);
+		if (have_p)
+			trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
+
+
+	} else if (sa->ss_family == AF_INET6) {
+		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa;
+
+		if (arg->field.field->size < sizeof(struct sockaddr_in6)) {
+			trace_seq_printf(s, "INVALIDIPv6");
+			return rc;
+		}
+
+		if (have_p)
+			trace_seq_printf(s, "[");
+
+		buf = (unsigned char *) &sa6->sin6_addr;
+		if (have_c)
+			print_ip6c_addr(s, buf);
+		else
+			print_ip6_addr(s, i, buf);
+
+		if (have_p)
+			trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port));
+	}
+
+	return rc;
+}
+
+static int print_ip_arg(struct trace_seq *s, const char *ptr,
+			void *data, int size, struct tep_event *event,
+			struct tep_print_arg *arg)
+{
+	char i = *ptr;  /* 'i' or 'I' */
+	int rc = 1;
+
+	/* IP version */
+	ptr++;
+
+	switch (*ptr) {
+	case '4':
+		rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg);
+		break;
+	case '6':
+		rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg);
+		break;
+	case 'S':
+		rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg);
+		break;
+	default:
+		return 0;
+	}
+
+	return rc;
+}
+
+static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15};
+static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+static int print_uuid_arg(struct trace_seq *s, const char *ptr,
+			void *data, int size, struct tep_event *event,
+			struct tep_print_arg *arg)
+{
+	const int *index = uuid_index;
+	char *format = "%02x";
+	int ret = 0;
+	char *buf;
+	int i;
+
+	switch (*(ptr + 1)) {
+	case 'L':
+		format = "%02X";
+		/* fall through */
+	case 'l':
+		index = guid_index;
+		ret++;
+		break;
+	case 'B':
+		format = "%02X";
+		/* fall through */
+	case 'b':
+		ret++;
+		break;
+	}
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return ret;
+	}
+
+	if (arg->type != TEP_PRINT_FIELD) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+		return ret;
+	}
+
+	if (!arg->field.field) {
+		arg->field.field =
+			tep_find_any_field(event, arg->field.name);
+		if (!arg->field.field) {
+			do_warning("%s: field %s not found",
+				   __func__, arg->field.name);
+			return ret;
+		}
+	}
+
+	if (arg->field.field->size != 16) {
+		trace_seq_printf(s, "INVALIDUUID");
+		return ret;
+	}
+
+	buf = data + arg->field.field->offset;
+
+	for (i = 0; i < 16; i++) {
+		trace_seq_printf(s, format, buf[index[i]] & 0xff);
+		switch (i) {
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+			trace_seq_printf(s, "-");
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int print_raw_buff_arg(struct trace_seq *s, const char *ptr,
+			      void *data, int size, struct tep_event *event,
+			      struct tep_print_arg *arg, int print_len)
+{
+	int plen = print_len;
+	char *delim = " ";
+	int ret = 0;
+	char *buf;
+	int i;
+	unsigned long offset;
+	int arr_len;
+
+	switch (*(ptr + 1)) {
+	case 'C':
+		delim = ":";
+		ret++;
+		break;
+	case 'D':
+		delim = "-";
+		ret++;
+		break;
+	case 'N':
+		delim = "";
+		ret++;
+		break;
+	}
+
+	if (arg->type == TEP_PRINT_FUNC) {
+		process_defined_func(s, data, size, event, arg);
+		return ret;
+	}
+
+	if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) {
+		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+		return ret;
+	}
+
+	offset = tep_read_number(event->tep,
+				 data + arg->dynarray.field->offset,
+				 arg->dynarray.field->size);
+	arr_len = (unsigned long long)(offset >> 16);
+	buf = data + (offset & 0xffff);
+
+	if (arr_len < plen)
+		plen = arr_len;
+
+	if (plen < 1)
+		return ret;
+
+	trace_seq_printf(s, "%02x", buf[0] & 0xff);
+	for (i = 1; i < plen; i++)
+		trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff);
+
+	return ret;
+}
+
+static int is_printable_array(char *p, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; i < len && p[i]; i++)
+		if (!isprint(p[i]) && !isspace(p[i]))
+		    return 0;
+	return 1;
+}
+
+void tep_print_field(struct trace_seq *s, void *data,
+		     struct tep_format_field *field)
+{
+	unsigned long long val;
+	unsigned int offset, len, i;
+	struct tep_handle *tep = field->event->tep;
+
+	if (field->flags & TEP_FIELD_IS_ARRAY) {
+		offset = field->offset;
+		len = field->size;
+		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+			val = tep_read_number(tep, data + offset, len);
+			offset = val;
+			len = offset >> 16;
+			offset &= 0xffff;
+		}
+		if (field->flags & TEP_FIELD_IS_STRING &&
+		    is_printable_array(data + offset, len)) {
+			trace_seq_printf(s, "%s", (char *)data + offset);
+		} else {
+			trace_seq_puts(s, "ARRAY[");
+			for (i = 0; i < len; i++) {
+				if (i)
+					trace_seq_puts(s, ", ");
+				trace_seq_printf(s, "%02x",
+						 *((unsigned char *)data + offset + i));
+			}
+			trace_seq_putc(s, ']');
+			field->flags &= ~TEP_FIELD_IS_STRING;
+		}
+	} else {
+		val = tep_read_number(tep, data + field->offset,
+				      field->size);
+		if (field->flags & TEP_FIELD_IS_POINTER) {
+			trace_seq_printf(s, "0x%llx", val);
+		} else if (field->flags & TEP_FIELD_IS_SIGNED) {
+			switch (field->size) {
+			case 4:
+				/*
+				 * If field is long then print it in hex.
+				 * A long usually stores pointers.
+				 */
+				if (field->flags & TEP_FIELD_IS_LONG)
+					trace_seq_printf(s, "0x%x", (int)val);
+				else
+					trace_seq_printf(s, "%d", (int)val);
+				break;
+			case 2:
+				trace_seq_printf(s, "%2d", (short)val);
+				break;
+			case 1:
+				trace_seq_printf(s, "%1d", (char)val);
+				break;
+			default:
+				trace_seq_printf(s, "%lld", val);
+			}
+		} else {
+			if (field->flags & TEP_FIELD_IS_LONG)
+				trace_seq_printf(s, "0x%llx", val);
+			else
+				trace_seq_printf(s, "%llu", val);
+		}
+	}
+}
+
+void tep_print_fields(struct trace_seq *s, void *data,
+		      int size __maybe_unused, struct tep_event *event)
+{
+	struct tep_format_field *field;
+
+	field = event->format.fields;
+	while (field) {
+		trace_seq_printf(s, " %s=", field->name);
+		tep_print_field(s, data, field);
+		field = field->next;
+	}
+}
+
+static int print_function(struct trace_seq *s, const char *format,
+			  void *data, int size, struct tep_event *event,
+			  struct tep_print_arg *arg)
+{
+	struct func_map *func;
+	unsigned long long val;
+
+	val = eval_num_arg(data, size, event, arg);
+	func = find_func(event->tep, val);
+	if (func) {
+		trace_seq_puts(s, func->func);
+		if (*format == 'F' || *format == 'S')
+			trace_seq_printf(s, "+0x%llx", val - func->addr);
+	} else {
+		if (event->tep->long_size == 4)
+			trace_seq_printf(s, "0x%lx", (long)val);
+		else
+			trace_seq_printf(s, "0x%llx", (long long)val);
+	}
+
+	return 0;
+}
+
+static int print_arg_pointer(struct trace_seq *s, const char *format, int plen,
+			     void *data, int size,
+			     struct tep_event *event, struct tep_print_arg *arg)
+{
+	unsigned long long val;
+	int ret = 1;
+
+	if (arg->type == TEP_PRINT_BSTRING) {
+		trace_seq_puts(s, arg->string.string);
+		return 0;
+	}
+	while (*format) {
+		if (*format == 'p') {
+			format++;
+			break;
+		}
+		format++;
+	}
+
+	switch (*format) {
+	case 'F':
+	case 'f':
+	case 'S':
+	case 's':
+		ret += print_function(s, format, data, size, event, arg);
+		break;
+	case 'M':
+	case 'm':
+		ret += print_mac_arg(s, format, data, size, event, arg);
+		break;
+	case 'I':
+	case 'i':
+		ret += print_ip_arg(s, format, data, size, event, arg);
+		break;
+	case 'U':
+		ret += print_uuid_arg(s, format, data, size, event, arg);
+		break;
+	case 'h':
+		ret += print_raw_buff_arg(s, format, data, size, event, arg, plen);
+		break;
+	default:
+		ret = 0;
+		val = eval_num_arg(data, size, event, arg);
+		trace_seq_printf(s, "%p", (void *)(intptr_t)val);
+		break;
+	}
+
+	return ret;
+
+}
+
+static int print_arg_number(struct trace_seq *s, const char *format, int plen,
+			    void *data, int size, int ls,
+			    struct tep_event *event, struct tep_print_arg *arg)
+{
+	unsigned long long val;
+
+	val = eval_num_arg(data, size, event, arg);
+
+	switch (ls) {
+	case -2:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (char)val);
+		else
+			trace_seq_printf(s, format, (char)val);
+		break;
+	case -1:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (short)val);
+		else
+			trace_seq_printf(s, format, (short)val);
+		break;
+	case 0:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (int)val);
+		else
+			trace_seq_printf(s, format, (int)val);
+		break;
+	case 1:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (long)val);
+		else
+			trace_seq_printf(s, format, (long)val);
+		break;
+	case 2:
+		if (plen >= 0)
+			trace_seq_printf(s, format, plen, (long long)val);
+		else
+			trace_seq_printf(s, format, (long long)val);
+		break;
+	default:
+		do_warning_event(event, "bad count (%d)", ls);
+		event->flags |= TEP_EVENT_FL_FAILED;
+	}
+	return 0;
+}
+
+
+static void print_arg_string(struct trace_seq *s, const char *format, int plen,
+			     void *data, int size,
+			     struct tep_event *event, struct tep_print_arg *arg)
+{
+	struct trace_seq p;
+
+	/* Use helper trace_seq */
+	trace_seq_init(&p);
+	print_str_arg(&p, data, size, event,
+		      format, plen, arg);
+	trace_seq_terminate(&p);
+	trace_seq_puts(s, p.buffer);
+	trace_seq_destroy(&p);
+}
+
+static int parse_arg_format_pointer(const char *format)
+{
+	int ret = 0;
+	int index;
+	int loop;
+
+	switch (*format) {
+	case 'F':
+	case 'S':
+	case 'f':
+	case 's':
+		ret++;
+		break;
+	case 'M':
+	case 'm':
+		/* [mM]R , [mM]F */
+		switch (format[1]) {
+		case 'R':
+		case 'F':
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	case 'I':
+	case 'i':
+		index = 2;
+		loop = 1;
+		switch (format[1]) {
+		case 'S':
+			/*[S][pfs]*/
+			while (loop) {
+				switch (format[index]) {
+				case 'p':
+				case 'f':
+				case 's':
+					ret++;
+					index++;
+					break;
+				default:
+					loop = 0;
+					break;
+				}
+			}
+			/* fall through */
+		case '4':
+			/* [4S][hnbl] */
+			switch (format[index]) {
+			case 'h':
+			case 'n':
+			case 'l':
+			case 'b':
+				ret++;
+				index++;
+				break;
+			}
+			if (format[1] == '4') {
+				ret++;
+				break;
+			}
+			/* fall through */
+		case '6':
+			/* [6S]c */
+			if (format[index] == 'c')
+				ret++;
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	case 'U':
+		switch (format[1]) {
+		case 'L':
+		case 'l':
+		case 'B':
+		case 'b':
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	case 'h':
+		switch (format[1]) {
+		case 'C':
+		case 'D':
+		case 'N':
+			ret++;
+			break;
+		}
+		ret++;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void free_parse_args(struct tep_print_parse *arg)
+{
+	struct tep_print_parse *del;
+
+	while (arg) {
+		del = arg;
+		arg = del->next;
+		free(del->format);
+		free(del);
+	}
+}
+
+static int parse_arg_add(struct tep_print_parse **parse, char *format,
+			 enum tep_print_parse_type type,
+			 struct tep_print_arg *arg,
+			 struct tep_print_arg *len_as_arg,
+			 int ls)
+{
+	struct tep_print_parse *parg = NULL;
+
+	parg = calloc(1, sizeof(*parg));
+	if (!parg)
+		goto error;
+	parg->format = strdup(format);
+	if (!parg->format)
+		goto error;
+	parg->type = type;
+	parg->arg = arg;
+	parg->len_as_arg = len_as_arg;
+	parg->ls = ls;
+	*parse = parg;
+	return 0;
+error:
+	if (parg) {
+		free(parg->format);
+		free(parg);
+	}
+	return -1;
+}
+
+static int parse_arg_format(struct tep_print_parse **parse,
+			    struct tep_event *event,
+			    const char *format, struct tep_print_arg **arg)
+{
+	struct tep_print_arg *len_arg = NULL;
+	char print_format[32];
+	const char *start = format;
+	int ret = 0;
+	int ls = 0;
+	int res;
+	int len;
+
+	format++;
+	ret++;
+	for (; *format; format++) {
+		switch (*format) {
+		case '#':
+			/* FIXME: need to handle properly */
+			break;
+		case 'h':
+			ls--;
+			break;
+		case 'l':
+			ls++;
+			break;
+		case 'L':
+			ls = 2;
+			break;
+		case '.':
+		case 'z':
+		case 'Z':
+		case '0' ... '9':
+		case '-':
+			break;
+		case '*':
+			/* The argument is the length. */
+			if (!*arg) {
+				do_warning_event(event, "no argument match");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+			if (len_arg) {
+				do_warning_event(event, "argument already matched");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+			len_arg = *arg;
+			*arg = (*arg)->next;
+			break;
+		case 'p':
+			if (!*arg) {
+				do_warning_event(event, "no argument match");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+			res = parse_arg_format_pointer(format + 1);
+			if (res > 0) {
+				format += res;
+				ret += res;
+			}
+			len = ((unsigned long)format + 1) -
+				(unsigned long)start;
+			/* should never happen */
+			if (len > 31) {
+				do_warning_event(event, "bad format!");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				len = 31;
+			}
+			memcpy(print_format, start, len);
+			print_format[len] = 0;
+
+			parse_arg_add(parse, print_format,
+				      PRINT_FMT_ARG_POINTER, *arg, len_arg, ls);
+			*arg = (*arg)->next;
+			ret++;
+			return ret;
+		case 'd':
+		case 'u':
+		case 'i':
+		case 'x':
+		case 'X':
+		case 'o':
+			if (!*arg) {
+				do_warning_event(event, "no argument match");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+
+			len = ((unsigned long)format + 1) -
+				(unsigned long)start;
+
+			/* should never happen */
+			if (len > 30) {
+				do_warning_event(event, "bad format!");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				len = 31;
+			}
+			memcpy(print_format, start, len);
+			print_format[len] = 0;
+
+			if (event->tep->long_size == 8 && ls == 1 &&
+			    sizeof(long) != 8) {
+				char *p;
+
+				/* make %l into %ll */
+				if (ls == 1 && (p = strchr(print_format, 'l')))
+					memmove(p+1, p, strlen(p)+1);
+				ls = 2;
+			}
+			if (ls < -2 || ls > 2) {
+				do_warning_event(event, "bad count (%d)", ls);
+				event->flags |= TEP_EVENT_FL_FAILED;
+			}
+			parse_arg_add(parse, print_format,
+				      PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls);
+			*arg = (*arg)->next;
+			ret++;
+			return ret;
+		case 's':
+			if (!*arg) {
+				do_warning_event(event, "no matching argument");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				goto out_failed;
+			}
+
+			len = ((unsigned long)format + 1) -
+				(unsigned long)start;
+
+			/* should never happen */
+			if (len > 31) {
+				do_warning_event(event, "bad format!");
+				event->flags |= TEP_EVENT_FL_FAILED;
+				len = 31;
+			}
+
+			memcpy(print_format, start, len);
+			print_format[len] = 0;
+
+			parse_arg_add(parse, print_format,
+					PRINT_FMT_ARG_STRING, *arg, len_arg, 0);
+			*arg = (*arg)->next;
+			ret++;
+			return ret;
+		default:
+			snprintf(print_format, 32, ">%c<", *format);
+			parse_arg_add(parse, print_format,
+					PRINT_FMT_STRING, NULL, NULL, 0);
+			ret++;
+			return ret;
+		}
+		ret++;
+	}
+
+out_failed:
+	return ret;
+
+}
+
+static int parse_arg_string(struct tep_print_parse **parse, const char *format)
+{
+	struct trace_seq s;
+	int ret = 0;
+
+	trace_seq_init(&s);
+	for (; *format; format++) {
+		if (*format == '\\') {
+			format++;
+			ret++;
+			switch (*format) {
+			case 'n':
+				trace_seq_putc(&s, '\n');
+				break;
+			case 't':
+				trace_seq_putc(&s, '\t');
+				break;
+			case 'r':
+				trace_seq_putc(&s, '\r');
+				break;
+			case '\\':
+				trace_seq_putc(&s, '\\');
+				break;
+			default:
+				trace_seq_putc(&s, *format);
+				break;
+			}
+		} else if (*format == '%') {
+			if (*(format + 1) == '%') {
+				trace_seq_putc(&s, '%');
+				format++;
+				ret++;
+			} else
+				break;
+		} else
+			trace_seq_putc(&s, *format);
+
+		ret++;
+	}
+	trace_seq_terminate(&s);
+	parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0);
+	trace_seq_destroy(&s);
+
+	return ret;
+}
+
+static struct tep_print_parse *
+parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg)
+{
+	struct tep_print_parse *parse_ret = NULL;
+	struct tep_print_parse **parse = NULL;
+	int ret;
+	int len;
+
+	len = strlen(format);
+	while (*format) {
+		if (!parse_ret)
+			parse = &parse_ret;
+		if (*format == '%' && *(format + 1) != '%')
+			ret = parse_arg_format(parse, event, format, &arg);
+		else
+			ret = parse_arg_string(parse, format);
+		if (*parse)
+			parse = &((*parse)->next);
+
+		len -= ret;
+		if (len > 0)
+			format += ret;
+		else
+			break;
+	}
+	return parse_ret;
+}
+
+static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s,
+			      void *data, int size, struct tep_event *event)
+{
+	int len_arg;
+
+	while (parse) {
+		if (parse->len_as_arg)
+			len_arg = eval_num_arg(data, size, event, parse->len_as_arg);
+		switch (parse->type) {
+		case PRINT_FMT_ARG_DIGIT:
+			print_arg_number(s, parse->format,
+					parse->len_as_arg ? len_arg : -1, data,
+					 size, parse->ls, event, parse->arg);
+			break;
+		case PRINT_FMT_ARG_POINTER:
+			print_arg_pointer(s, parse->format,
+					  parse->len_as_arg ? len_arg : 1,
+					  data, size, event, parse->arg);
+			break;
+		case PRINT_FMT_ARG_STRING:
+			print_arg_string(s, parse->format,
+					 parse->len_as_arg ? len_arg : -1,
+					 data, size, event, parse->arg);
+			break;
+		case PRINT_FMT_STRING:
+		default:
+			trace_seq_printf(s, "%s", parse->format);
+			break;
+		}
+		parse = parse->next;
+	}
+}
+
+static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+{
+	struct tep_print_parse *parse = event->print_fmt.print_cache;
+	struct tep_print_arg *args = NULL;
+	char *bprint_fmt = NULL;
+
+	if (event->flags & TEP_EVENT_FL_FAILED) {
+		trace_seq_printf(s, "[FAILED TO PARSE]");
+		tep_print_fields(s, data, size, event);
+		return;
+	}
+
+	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+		bprint_fmt = get_bprint_format(data, size, event);
+		args = make_bprint_args(bprint_fmt, data, size, event);
+		parse = parse_args(event, bprint_fmt, args);
+	}
+
+	print_event_cache(parse, s, data, size, event);
+
+	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+		free_parse_args(parse);
+		free_args(args);
+		free(bprint_fmt);
+	}
+}
+
+/*
+ * This parses out the Latency format (interrupts disabled,
+ * need rescheduling, in hard/soft interrupt, preempt count
+ * and lock depth) and places it into the trace_seq.
+ */
+static void data_latency_format(struct tep_handle *tep, struct trace_seq *s,
+				char *format, struct tep_record *record)
+{
+	static int check_lock_depth = 1;
+	static int check_migrate_disable = 1;
+	static int lock_depth_exists;
+	static int migrate_disable_exists;
+	unsigned int lat_flags;
+	struct trace_seq sq;
+	unsigned int pc;
+	int lock_depth = 0;
+	int migrate_disable = 0;
+	int hardirq;
+	int softirq;
+	void *data = record->data;
+
+	trace_seq_init(&sq);
+	lat_flags = parse_common_flags(tep, data);
+	pc = parse_common_pc(tep, data);
+	/* lock_depth may not always exist */
+	if (lock_depth_exists)
+		lock_depth = parse_common_lock_depth(tep, data);
+	else if (check_lock_depth) {
+		lock_depth = parse_common_lock_depth(tep, data);
+		if (lock_depth < 0)
+			check_lock_depth = 0;
+		else
+			lock_depth_exists = 1;
+	}
+
+	/* migrate_disable may not always exist */
+	if (migrate_disable_exists)
+		migrate_disable = parse_common_migrate_disable(tep, data);
+	else if (check_migrate_disable) {
+		migrate_disable = parse_common_migrate_disable(tep, data);
+		if (migrate_disable < 0)
+			check_migrate_disable = 0;
+		else
+			migrate_disable_exists = 1;
+	}
+
+	hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
+	softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
+
+	trace_seq_printf(&sq, "%c%c%c",
+	       (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+	       (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
+	       'X' : '.',
+	       (lat_flags & TRACE_FLAG_NEED_RESCHED) ?
+	       'N' : '.',
+	       (hardirq && softirq) ? 'H' :
+	       hardirq ? 'h' : softirq ? 's' : '.');
+
+	if (pc)
+		trace_seq_printf(&sq, "%x", pc);
+	else
+		trace_seq_printf(&sq, ".");
+
+	if (migrate_disable_exists) {
+		if (migrate_disable < 0)
+			trace_seq_printf(&sq, ".");
+		else
+			trace_seq_printf(&sq, "%d", migrate_disable);
+	}
+
+	if (lock_depth_exists) {
+		if (lock_depth < 0)
+			trace_seq_printf(&sq, ".");
+		else
+			trace_seq_printf(&sq, "%d", lock_depth);
+	}
+
+	if (sq.state == TRACE_SEQ__MEM_ALLOC_FAILED) {
+		s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
+		return;
+	}
+
+	trace_seq_terminate(&sq);
+	trace_seq_puts(s, sq.buffer);
+	trace_seq_destroy(&sq);
+	trace_seq_terminate(s);
+}
+
+/**
+ * tep_data_type - parse out the given event type
+ * @tep: a handle to the trace event parser context
+ * @rec: the record to read from
+ *
+ * This returns the event id from the @rec.
+ */
+int tep_data_type(struct tep_handle *tep, struct tep_record *rec)
+{
+	return trace_parse_common_type(tep, rec->data);
+}
+
+/**
+ * tep_data_pid - parse the PID from record
+ * @tep: a handle to the trace event parser context
+ * @rec: the record to parse
+ *
+ * This returns the PID from a record.
+ */
+int tep_data_pid(struct tep_handle *tep, struct tep_record *rec)
+{
+	return parse_common_pid(tep, rec->data);
+}
+
+/**
+ * tep_data_preempt_count - parse the preempt count from the record
+ * @tep: a handle to the trace event parser context
+ * @rec: the record to parse
+ *
+ * This returns the preempt count from a record.
+ */
+int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec)
+{
+	return parse_common_pc(tep, rec->data);
+}
+
+/**
+ * tep_data_flags - parse the latency flags from the record
+ * @tep: a handle to the trace event parser context
+ * @rec: the record to parse
+ *
+ * This returns the latency flags from a record.
+ *
+ *  Use trace_flag_type enum for the flags (see event-parse.h).
+ */
+int tep_data_flags(struct tep_handle *tep, struct tep_record *rec)
+{
+	return parse_common_flags(tep, rec->data);
+}
+
+/**
+ * tep_data_comm_from_pid - return the command line from PID
+ * @tep: a handle to the trace event parser context
+ * @pid: the PID of the task to search for
+ *
+ * This returns a pointer to the command line that has the given
+ * @pid.
+ */
+const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid)
+{
+	const char *comm;
+
+	comm = find_cmdline(tep, pid);
+	return comm;
+}
+
+static struct tep_cmdline *
+pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next)
+{
+	struct cmdline_list *cmdlist = (struct cmdline_list *)next;
+
+	if (cmdlist)
+		cmdlist = cmdlist->next;
+	else
+		cmdlist = tep->cmdlist;
+
+	while (cmdlist && strcmp(cmdlist->comm, comm) != 0)
+		cmdlist = cmdlist->next;
+
+	return (struct tep_cmdline *)cmdlist;
+}
+
+/**
+ * tep_data_pid_from_comm - return the pid from a given comm
+ * @tep: a handle to the trace event parser context
+ * @comm: the cmdline to find the pid from
+ * @next: the cmdline structure to find the next comm
+ *
+ * This returns the cmdline structure that holds a pid for a given
+ * comm, or NULL if none found. As there may be more than one pid for
+ * a given comm, the result of this call can be passed back into
+ * a recurring call in the @next parameter, and then it will find the
+ * next pid.
+ * Also, it does a linear search, so it may be slow.
+ */
+struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
+					   struct tep_cmdline *next)
+{
+	struct tep_cmdline *cmdline;
+
+	/*
+	 * If the cmdlines have not been converted yet, then use
+	 * the list.
+	 */
+	if (!tep->cmdlines)
+		return pid_from_cmdlist(tep, comm, next);
+
+	if (next) {
+		/*
+		 * The next pointer could have been still from
+		 * a previous call before cmdlines were created
+		 */
+		if (next < tep->cmdlines ||
+		    next >= tep->cmdlines + tep->cmdline_count)
+			next = NULL;
+		else
+			cmdline  = next++;
+	}
+
+	if (!next)
+		cmdline = tep->cmdlines;
+
+	while (cmdline < tep->cmdlines + tep->cmdline_count) {
+		if (strcmp(cmdline->comm, comm) == 0)
+			return cmdline;
+		cmdline++;
+	}
+	return NULL;
+}
+
+/**
+ * tep_cmdline_pid - return the pid associated to a given cmdline
+ * @tep: a handle to the trace event parser context
+ * @cmdline: The cmdline structure to get the pid from
+ *
+ * Returns the pid for a give cmdline. If @cmdline is NULL, then
+ * -1 is returned.
+ */
+int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline)
+{
+	struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
+
+	if (!cmdline)
+		return -1;
+
+	/*
+	 * If cmdlines have not been created yet, or cmdline is
+	 * not part of the array, then treat it as a cmdlist instead.
+	 */
+	if (!tep->cmdlines ||
+	    cmdline < tep->cmdlines ||
+	    cmdline >= tep->cmdlines + tep->cmdline_count)
+		return cmdlist->pid;
+
+	return cmdline->pid;
+}
+
+/*
+ * This parses the raw @data using the given @event information and
+ * writes the print format into the trace_seq.
+ */
+static void print_event_info(struct trace_seq *s, char *format, bool raw,
+			     struct tep_event *event, struct tep_record *record)
+{
+	int print_pretty = 1;
+
+	if (raw || (event->flags & TEP_EVENT_FL_PRINTRAW))
+		tep_print_fields(s, record->data, record->size, event);
+	else {
+
+		if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE))
+			print_pretty = event->handler(s, record, event,
+						      event->context);
+
+		if (print_pretty)
+			pretty_print(s, record->data, record->size, event);
+	}
+
+	trace_seq_terminate(s);
+}
+
+/**
+ * tep_find_event_by_record - return the event from a given record
+ * @tep: a handle to the trace event parser context
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct tep_event *
+tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record)
+{
+	int type;
+
+	if (record->size < 0) {
+		do_warning("ug! negative record size %d", record->size);
+		return NULL;
+	}
+
+	type = trace_parse_common_type(tep, record->data);
+
+	return tep_find_event(tep, type);
+}
+
+/*
+ * Writes the timestamp of the record into @s. Time divisor and precision can be
+ * specified as part of printf @format string. Example:
+ *	"%3.1000d" - divide the time by 1000 and print the first 3 digits
+ *	before the dot. Thus, the timestamp "123456000" will be printed as
+ *	"123.456"
+ */
+static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
+				 char *format, struct tep_event *event,
+				 struct tep_record *record)
+{
+	unsigned long long time;
+	char *divstr;
+	int prec = 0, pr;
+	int div = 0;
+	int p10 = 1;
+
+	if (isdigit(*(format + 1)))
+		prec = atoi(format + 1);
+	divstr = strchr(format, '.');
+	if (divstr && isdigit(*(divstr + 1)))
+		div = atoi(divstr + 1);
+	time = record->ts;
+	if (div) {
+		time += div / 2;
+		time /= div;
+	}
+	pr = prec;
+	while (pr--)
+		p10 *= 10;
+
+	if (p10 > 1 && p10 < time)
+		trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10);
+	else
+		trace_seq_printf(s, "%12llu", time);
+}
+
+struct print_event_type {
+	enum {
+		EVENT_TYPE_INT = 1,
+		EVENT_TYPE_STRING,
+		EVENT_TYPE_UNKNOWN,
+	} type;
+	char format[32];
+};
+
+static void print_string(struct tep_handle *tep, struct trace_seq *s,
+			 struct tep_record *record, struct tep_event *event,
+			 const char *arg, struct print_event_type *type)
+{
+	const char *comm;
+	int pid;
+
+	if (strncmp(arg, TEP_PRINT_LATENCY, strlen(TEP_PRINT_LATENCY)) == 0) {
+		data_latency_format(tep, s, type->format, record);
+	} else if (strncmp(arg, TEP_PRINT_COMM, strlen(TEP_PRINT_COMM)) == 0) {
+		pid = parse_common_pid(tep, record->data);
+		comm = find_cmdline(tep, pid);
+		trace_seq_printf(s, type->format, comm);
+	} else if (strncmp(arg, TEP_PRINT_INFO_RAW, strlen(TEP_PRINT_INFO_RAW)) == 0) {
+		print_event_info(s, type->format, true, event, record);
+	} else if (strncmp(arg, TEP_PRINT_INFO, strlen(TEP_PRINT_INFO)) == 0) {
+		print_event_info(s, type->format, false, event, record);
+	} else if  (strncmp(arg, TEP_PRINT_NAME, strlen(TEP_PRINT_NAME)) == 0) {
+		trace_seq_printf(s, type->format, event->name);
+	} else {
+		trace_seq_printf(s, "[UNKNOWN TEP TYPE %s]", arg);
+	}
+
+}
+
+static void print_int(struct tep_handle *tep, struct trace_seq *s,
+		      struct tep_record *record, struct tep_event *event,
+		      int arg, struct print_event_type *type)
+{
+	int param;
+
+	switch (arg) {
+	case TEP_PRINT_CPU:
+		param = record->cpu;
+		break;
+	case TEP_PRINT_PID:
+		param = parse_common_pid(tep, record->data);
+		break;
+	case TEP_PRINT_TIME:
+		return print_event_time(tep, s, type->format, event, record);
+	default:
+		return;
+	}
+	trace_seq_printf(s, type->format, param);
+}
+
+static int tep_print_event_param_type(char *format,
+				      struct print_event_type *type)
+{
+	char *str = format + 1;
+	int i = 1;
+
+	type->type = EVENT_TYPE_UNKNOWN;
+	while (*str) {
+		switch (*str) {
+		case 'd':
+		case 'u':
+		case 'i':
+		case 'x':
+		case 'X':
+		case 'o':
+			type->type = EVENT_TYPE_INT;
+			break;
+		case 's':
+			type->type = EVENT_TYPE_STRING;
+			break;
+		}
+		str++;
+		i++;
+		if (type->type != EVENT_TYPE_UNKNOWN)
+			break;
+	}
+	memset(type->format, 0, 32);
+	memcpy(type->format, format, i < 32 ? i : 31);
+	return i;
+}
+
+/**
+ * tep_print_event - Write various event information
+ * @tep: a handle to the trace event parser context
+ * @s: the trace_seq to write to
+ * @record: The record to get the event from
+ * @format: a printf format string. Supported event fileds:
+ *	TEP_PRINT_PID, "%d" - event PID
+ *	TEP_PRINT_CPU, "%d" - event CPU
+ *	TEP_PRINT_COMM, "%s" - event command string
+ *	TEP_PRINT_NAME, "%s" - event name
+ *	TEP_PRINT_LATENCY, "%s" - event latency
+ *	TEP_PRINT_TIME, %d - event time stamp. A divisor and precision
+ *			can be specified as part of this format string:
+ *			"%precision.divisord". Example:
+ *			"%3.1000d" - divide the time by 1000 and print the first
+ *			3 digits before the dot. Thus, the time stamp
+ *			"123456000" will be printed as "123.456"
+ *	TEP_PRINT_INFO, "%s" - event information. If any width is specified in
+ *			the format string, the event information will be printed
+ *			in raw format.
+ * Writes the specified event information into @s.
+ */
+void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
+		     struct tep_record *record, const char *fmt, ...)
+{
+	struct print_event_type type;
+	char *format = strdup(fmt);
+	char *current = format;
+	char *str = format;
+	int offset;
+	va_list args;
+	struct tep_event *event;
+
+	if (!format)
+		return;
+
+	event = tep_find_event_by_record(tep, record);
+	va_start(args, fmt);
+	while (*current) {
+		current = strchr(str, '%');
+		if (!current) {
+			trace_seq_puts(s, str);
+			break;
+		}
+		memset(&type, 0, sizeof(type));
+		offset = tep_print_event_param_type(current, &type);
+		*current = '\0';
+		trace_seq_puts(s, str);
+		current += offset;
+		switch (type.type) {
+		case EVENT_TYPE_STRING:
+			print_string(tep, s, record, event,
+				     va_arg(args, char*), &type);
+			break;
+		case EVENT_TYPE_INT:
+			print_int(tep, s, record, event,
+				  va_arg(args, int), &type);
+			break;
+		case EVENT_TYPE_UNKNOWN:
+		default:
+			trace_seq_printf(s, "[UNKNOWN TYPE]");
+			break;
+		}
+		str = current;
+
+	}
+	va_end(args);
+	free(format);
+}
+
+static int events_id_cmp(const void *a, const void *b)
+{
+	struct tep_event * const * ea = a;
+	struct tep_event * const * eb = b;
+
+	if ((*ea)->id < (*eb)->id)
+		return -1;
+
+	if ((*ea)->id > (*eb)->id)
+		return 1;
+
+	return 0;
+}
+
+static int events_name_cmp(const void *a, const void *b)
+{
+	struct tep_event * const * ea = a;
+	struct tep_event * const * eb = b;
+	int res;
+
+	res = strcmp((*ea)->name, (*eb)->name);
+	if (res)
+		return res;
+
+	res = strcmp((*ea)->system, (*eb)->system);
+	if (res)
+		return res;
+
+	return events_id_cmp(a, b);
+}
+
+static int events_system_cmp(const void *a, const void *b)
+{
+	struct tep_event * const * ea = a;
+	struct tep_event * const * eb = b;
+	int res;
+
+	res = strcmp((*ea)->system, (*eb)->system);
+	if (res)
+		return res;
+
+	res = strcmp((*ea)->name, (*eb)->name);
+	if (res)
+		return res;
+
+	return events_id_cmp(a, b);
+}
+
+static struct tep_event **list_events_copy(struct tep_handle *tep)
+{
+	struct tep_event **events;
+
+	if (!tep)
+		return NULL;
+
+	events = malloc(sizeof(*events) * (tep->nr_events + 1));
+	if (!events)
+		return NULL;
+
+	memcpy(events, tep->events, sizeof(*events) * tep->nr_events);
+	events[tep->nr_events] = NULL;
+	return events;
+}
+
+static void list_events_sort(struct tep_event **events, int nr_events,
+			     enum tep_event_sort_type sort_type)
+{
+	int (*sort)(const void *a, const void *b);
+
+	switch (sort_type) {
+	case TEP_EVENT_SORT_ID:
+		sort = events_id_cmp;
+		break;
+	case TEP_EVENT_SORT_NAME:
+		sort = events_name_cmp;
+		break;
+	case TEP_EVENT_SORT_SYSTEM:
+		sort = events_system_cmp;
+		break;
+	default:
+		sort = NULL;
+	}
+
+	if (sort)
+		qsort(events, nr_events, sizeof(*events), sort);
+}
+
+/**
+ * tep_list_events - Get events, sorted by given criteria.
+ * @tep: a handle to the tep context
+ * @sort_type: desired sort order of the events in the array
+ *
+ * Returns an array of pointers to all events, sorted by the given
+ * @sort_type criteria. The last element of the array is NULL. The returned
+ * memory must not be freed, it is managed by the library.
+ * The function is not thread safe.
+ */
+struct tep_event **tep_list_events(struct tep_handle *tep,
+				   enum tep_event_sort_type sort_type)
+{
+	struct tep_event **events;
+
+	if (!tep)
+		return NULL;
+
+	events = tep->sort_events;
+	if (events && tep->last_type == sort_type)
+		return events;
+
+	if (!events) {
+		events = list_events_copy(tep);
+		if (!events)
+			return NULL;
+
+		tep->sort_events = events;
+
+		/* the internal events are sorted by id */
+		if (sort_type == TEP_EVENT_SORT_ID) {
+			tep->last_type = sort_type;
+			return events;
+		}
+	}
+
+	list_events_sort(events, tep->nr_events, sort_type);
+	tep->last_type = sort_type;
+
+	return events;
+}
+
+
+/**
+ * tep_list_events_copy - Thread safe version of tep_list_events()
+ * @tep: a handle to the tep context
+ * @sort_type: desired sort order of the events in the array
+ *
+ * Returns an array of pointers to all events, sorted by the given
+ * @sort_type criteria. The last element of the array is NULL. The returned
+ * array is newly allocated inside the function and must be freed by the caller
+ */
+struct tep_event **tep_list_events_copy(struct tep_handle *tep,
+					enum tep_event_sort_type sort_type)
+{
+	struct tep_event **events;
+
+	if (!tep)
+		return NULL;
+
+	events = list_events_copy(tep);
+	if (!events)
+		return NULL;
+
+	/* the internal events are sorted by id */
+	if (sort_type == TEP_EVENT_SORT_ID)
+		return events;
+
+	list_events_sort(events, tep->nr_events, sort_type);
+
+	return events;
+}
+
+static struct tep_format_field **
+get_event_fields(const char *type, const char *name,
+		 int count, struct tep_format_field *list)
+{
+	struct tep_format_field **fields;
+	struct tep_format_field *field;
+	int i = 0;
+
+	fields = malloc(sizeof(*fields) * (count + 1));
+	if (!fields)
+		return NULL;
+
+	for (field = list; field; field = field->next) {
+		fields[i++] = field;
+		if (i == count + 1) {
+			do_warning("event %s has more %s fields than specified",
+				name, type);
+			i--;
+			break;
+		}
+	}
+
+	if (i != count)
+		do_warning("event %s has less %s fields than specified",
+			name, type);
+
+	fields[i] = NULL;
+
+	return fields;
+}
+
+/**
+ * tep_event_common_fields - return a list of common fields for an event
+ * @event: the event to return the common fields of.
+ *
+ * Returns an allocated array of fields. The last item in the array is NULL.
+ * The array must be freed with free().
+ */
+struct tep_format_field **tep_event_common_fields(struct tep_event *event)
+{
+	return get_event_fields("common", event->name,
+				event->format.nr_common,
+				event->format.common_fields);
+}
+
+/**
+ * tep_event_fields - return a list of event specific fields for an event
+ * @event: the event to return the fields of.
+ *
+ * Returns an allocated array of fields. The last item in the array is NULL.
+ * The array must be freed with free().
+ */
+struct tep_format_field **tep_event_fields(struct tep_event *event)
+{
+	return get_event_fields("event", event->name,
+				event->format.nr_fields,
+				event->format.fields);
+}
+
+static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field)
+{
+	trace_seq_printf(s, "{ %s, %s }", field->value, field->str);
+	if (field->next) {
+		trace_seq_puts(s, ", ");
+		print_fields(s, field->next);
+	}
+}
+
+/* for debugging */
+static void print_args(struct tep_print_arg *args)
+{
+	int print_paren = 1;
+	struct trace_seq s;
+
+	switch (args->type) {
+	case TEP_PRINT_NULL:
+		printf("null");
+		break;
+	case TEP_PRINT_ATOM:
+		printf("%s", args->atom.atom);
+		break;
+	case TEP_PRINT_FIELD:
+		printf("REC->%s", args->field.name);
+		break;
+	case TEP_PRINT_FLAGS:
+		printf("__print_flags(");
+		print_args(args->flags.field);
+		printf(", %s, ", args->flags.delim);
+		trace_seq_init(&s);
+		print_fields(&s, args->flags.flags);
+		trace_seq_do_printf(&s);
+		trace_seq_destroy(&s);
+		printf(")");
+		break;
+	case TEP_PRINT_SYMBOL:
+		printf("__print_symbolic(");
+		print_args(args->symbol.field);
+		printf(", ");
+		trace_seq_init(&s);
+		print_fields(&s, args->symbol.symbols);
+		trace_seq_do_printf(&s);
+		trace_seq_destroy(&s);
+		printf(")");
+		break;
+	case TEP_PRINT_HEX:
+		printf("__print_hex(");
+		print_args(args->hex.field);
+		printf(", ");
+		print_args(args->hex.size);
+		printf(")");
+		break;
+	case TEP_PRINT_HEX_STR:
+		printf("__print_hex_str(");
+		print_args(args->hex.field);
+		printf(", ");
+		print_args(args->hex.size);
+		printf(")");
+		break;
+	case TEP_PRINT_INT_ARRAY:
+		printf("__print_array(");
+		print_args(args->int_array.field);
+		printf(", ");
+		print_args(args->int_array.count);
+		printf(", ");
+		print_args(args->int_array.el_size);
+		printf(")");
+		break;
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+		printf("__get_str(%s)", args->string.string);
+		break;
+	case TEP_PRINT_BITMASK:
+		printf("__get_bitmask(%s)", args->bitmask.bitmask);
+		break;
+	case TEP_PRINT_TYPE:
+		printf("(%s)", args->typecast.type);
+		print_args(args->typecast.item);
+		break;
+	case TEP_PRINT_OP:
+		if (strcmp(args->op.op, ":") == 0)
+			print_paren = 0;
+		if (print_paren)
+			printf("(");
+		print_args(args->op.left);
+		printf(" %s ", args->op.op);
+		print_args(args->op.right);
+		if (print_paren)
+			printf(")");
+		break;
+	default:
+		/* we should warn... */
+		return;
+	}
+	if (args->next) {
+		printf("\n");
+		print_args(args->next);
+	}
+}
+
+static void parse_header_field(const char *field,
+			       int *offset, int *size, int mandatory)
+{
+	unsigned long long save_input_buf_ptr;
+	unsigned long long save_input_buf_siz;
+	char *token;
+	int type;
+
+	save_input_buf_ptr = input_buf_ptr;
+	save_input_buf_siz = input_buf_siz;
+
+	if (read_expected(TEP_EVENT_ITEM, "field") < 0)
+		return;
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
+		return;
+
+	/* type */
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto fail;
+	free_token(token);
+
+	/*
+	 * If this is not a mandatory field, then test it first.
+	 */
+	if (mandatory) {
+		if (read_expected(TEP_EVENT_ITEM, field) < 0)
+			return;
+	} else {
+		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+			goto fail;
+		if (strcmp(token, field) != 0)
+			goto discard;
+		free_token(token);
+	}
+
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
+		return;
+	if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
+		return;
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
+		return;
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto fail;
+	*offset = atoi(token);
+	free_token(token);
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
+		return;
+	if (read_expected(TEP_EVENT_ITEM, "size") < 0)
+		return;
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
+		return;
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+		goto fail;
+	*size = atoi(token);
+	free_token(token);
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
+		return;
+	type = read_token(&token);
+	if (type != TEP_EVENT_NEWLINE) {
+		/* newer versions of the kernel have a "signed" type */
+		if (type != TEP_EVENT_ITEM)
+			goto fail;
+
+		if (strcmp(token, "signed") != 0)
+			goto fail;
+
+		free_token(token);
+
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
+			return;
+
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
+			goto fail;
+
+		free_token(token);
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
+			return;
+
+		if (read_expect_type(TEP_EVENT_NEWLINE, &token))
+			goto fail;
+	}
+ fail:
+	free_token(token);
+	return;
+
+ discard:
+	input_buf_ptr = save_input_buf_ptr;
+	input_buf_siz = save_input_buf_siz;
+	*offset = 0;
+	*size = 0;
+	free_token(token);
+}
+
+/**
+ * tep_parse_header_page - parse the data stored in the header page
+ * @tep: a handle to the trace event parser context
+ * @buf: the buffer storing the header page format string
+ * @size: the size of @buf
+ * @long_size: the long size to use if there is no header
+ *
+ * This parses the header page format for information on the
+ * ring buffer used. The @buf should be copied from
+ *
+ * /sys/kernel/debug/tracing/events/header_page
+ */
+int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
+			  int long_size)
+{
+	int ignore;
+
+	if (!size) {
+		/*
+		 * Old kernels did not have header page info.
+		 * Sorry but we just use what we find here in user space.
+		 */
+		tep->header_page_ts_size = sizeof(long long);
+		tep->header_page_size_size = long_size;
+		tep->header_page_data_offset = sizeof(long long) + long_size;
+		tep->old_format = 1;
+		return -1;
+	}
+	init_input_buf(buf, size);
+
+	parse_header_field("timestamp", &tep->header_page_ts_offset,
+			   &tep->header_page_ts_size, 1);
+	parse_header_field("commit", &tep->header_page_size_offset,
+			   &tep->header_page_size_size, 1);
+	parse_header_field("overwrite", &tep->header_page_overwrite,
+			   &ignore, 0);
+	parse_header_field("data", &tep->header_page_data_offset,
+			   &tep->header_page_data_size, 1);
+
+	return 0;
+}
+
+static int event_matches(struct tep_event *event,
+			 int id, const char *sys_name,
+			 const char *event_name)
+{
+	if (id >= 0 && id != event->id)
+		return 0;
+
+	if (event_name && (strcmp(event_name, event->name) != 0))
+		return 0;
+
+	if (sys_name && (strcmp(sys_name, event->system) != 0))
+		return 0;
+
+	return 1;
+}
+
+static void free_handler(struct event_handler *handle)
+{
+	free((void *)handle->sys_name);
+	free((void *)handle->event_name);
+	free(handle);
+}
+
+static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
+{
+	struct event_handler *handle, **next;
+
+	for (next = &tep->handlers; *next;
+	     next = &(*next)->next) {
+		handle = *next;
+		if (event_matches(event, handle->id,
+				  handle->sys_name,
+				  handle->event_name))
+			break;
+	}
+
+	if (!(*next))
+		return 0;
+
+	pr_stat("overriding event (%d) %s:%s with new print handler",
+		event->id, event->system, event->name);
+
+	event->handler = handle->func;
+	event->context = handle->context;
+
+	*next = handle->next;
+	free_handler(handle);
+
+	return 1;
+}
+
+/**
+ * parse_format - parse the event format
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+static enum tep_errno parse_format(struct tep_event **eventp,
+				   struct tep_handle *tep, const char *buf,
+				   unsigned long size, const char *sys)
+{
+	struct tep_event *event;
+	int ret;
+
+	init_input_buf(buf, size);
+
+	*eventp = event = alloc_event();
+	if (!event)
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+	event->name = event_read_name();
+	if (!event->name) {
+		/* Bad event? */
+		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+		goto event_alloc_failed;
+	}
+
+	if (strcmp(sys, "ftrace") == 0) {
+		event->flags |= TEP_EVENT_FL_ISFTRACE;
+
+		if (strcmp(event->name, "bprint") == 0)
+			event->flags |= TEP_EVENT_FL_ISBPRINT;
+	}
+		
+	event->id = event_read_id();
+	if (event->id < 0) {
+		ret = TEP_ERRNO__READ_ID_FAILED;
+		/*
+		 * This isn't an allocation error actually.
+		 * But as the ID is critical, just bail out.
+		 */
+		goto event_alloc_failed;
+	}
+
+	event->system = strdup(sys);
+	if (!event->system) {
+		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+		goto event_alloc_failed;
+	}
+
+	/* Add tep to event so that it can be referenced */
+	event->tep = tep;
+
+	ret = event_read_format(event);
+	if (ret < 0) {
+		ret = TEP_ERRNO__READ_FORMAT_FAILED;
+		goto event_parse_failed;
+	}
+
+	/*
+	 * If the event has an override, don't print warnings if the event
+	 * print format fails to parse.
+	 */
+	if (tep && find_event_handle(tep, event))
+		show_warning = 0;
+
+	ret = event_read_print(event);
+	show_warning = 1;
+
+	if (ret < 0) {
+		ret = TEP_ERRNO__READ_PRINT_FAILED;
+		goto event_parse_failed;
+	}
+
+	if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) {
+		struct tep_format_field *field;
+		struct tep_print_arg *arg, **list;
+
+		/* old ftrace had no args */
+		list = &event->print_fmt.args;
+		for (field = event->format.fields; field; field = field->next) {
+			arg = alloc_arg();
+			if (!arg) {
+				event->flags |= TEP_EVENT_FL_FAILED;
+				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
+			}
+			arg->type = TEP_PRINT_FIELD;
+			arg->field.name = strdup(field->name);
+			if (!arg->field.name) {
+				event->flags |= TEP_EVENT_FL_FAILED;
+				free_arg(arg);
+				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
+			}
+			arg->field.field = field;
+			*list = arg;
+			list = &arg->next;
+		}
+	}
+
+	if (!(event->flags & TEP_EVENT_FL_ISBPRINT))
+		event->print_fmt.print_cache = parse_args(event,
+							  event->print_fmt.format,
+							  event->print_fmt.args);
+
+	return 0;
+
+ event_parse_failed:
+	event->flags |= TEP_EVENT_FL_FAILED;
+	return ret;
+
+ event_alloc_failed:
+	free(event->system);
+	free(event->name);
+	free(event);
+	*eventp = NULL;
+	return ret;
+}
+
+static enum tep_errno
+__parse_event(struct tep_handle *tep,
+	      struct tep_event **eventp,
+	      const char *buf, unsigned long size,
+	      const char *sys)
+{
+	int ret = parse_format(eventp, tep, buf, size, sys);
+	struct tep_event *event = *eventp;
+
+	if (event == NULL)
+		return ret;
+
+	if (tep && add_event(tep, event)) {
+		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+		goto event_add_failed;
+	}
+
+#define PRINT_ARGS 0
+	if (PRINT_ARGS && event->print_fmt.args)
+		print_args(event->print_fmt.args);
+
+	return 0;
+
+event_add_failed:
+	free_tep_event(event);
+	return ret;
+}
+
+/**
+ * tep_parse_format - parse the event format
+ * @tep: a handle to the trace event parser context
+ * @eventp: returned format
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum tep_errno tep_parse_format(struct tep_handle *tep,
+				struct tep_event **eventp,
+				const char *buf,
+				unsigned long size, const char *sys)
+{
+	return __parse_event(tep, eventp, buf, size, sys);
+}
+
+/**
+ * tep_parse_event - parse the event format
+ * @tep: a handle to the trace event parser context
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
+			       unsigned long size, const char *sys)
+{
+	struct tep_event *event = NULL;
+	return __parse_event(tep, &event, buf, size, sys);
+}
+
+int get_field_val(struct trace_seq *s, struct tep_format_field *field,
+		  const char *name, struct tep_record *record,
+		  unsigned long long *val, int err)
+{
+	if (!field) {
+		if (err)
+			trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
+		return -1;
+	}
+
+	if (tep_read_number_field(field, record->data, val)) {
+		if (err)
+			trace_seq_printf(s, " %s=INVALID", name);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * tep_get_field_raw - return the raw pointer into the data field
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @len: place to store the field length.
+ * @err: print default error if failed.
+ *
+ * Returns a pointer into record->data of the field and places
+ * the length of the field in @len.
+ *
+ * On failure, it returns NULL.
+ */
+void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
+			const char *name, struct tep_record *record,
+			int *len, int err)
+{
+	struct tep_format_field *field;
+	void *data = record->data;
+	unsigned offset;
+	int dummy;
+
+	if (!event)
+		return NULL;
+
+	field = tep_find_field(event, name);
+
+	if (!field) {
+		if (err)
+			trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
+		return NULL;
+	}
+
+	/* Allow @len to be NULL */
+	if (!len)
+		len = &dummy;
+
+	offset = field->offset;
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+		offset = tep_read_number(event->tep,
+					 data + offset, field->size);
+		*len = offset >> 16;
+		offset &= 0xffff;
+	} else
+		*len = field->size;
+
+	return data + offset;
+}
+
+/**
+ * tep_get_field_val - find a field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
+		      const char *name, struct tep_record *record,
+		      unsigned long long *val, int err)
+{
+	struct tep_format_field *field;
+
+	if (!event)
+		return -1;
+
+	field = tep_find_field(event, name);
+
+	return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * tep_get_common_field_val - find a common field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
+			     const char *name, struct tep_record *record,
+			     unsigned long long *val, int err)
+{
+	struct tep_format_field *field;
+
+	if (!event)
+		return -1;
+
+	field = tep_find_common_field(event, name);
+
+	return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * tep_get_any_field_val - find a any field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
+			  const char *name, struct tep_record *record,
+			  unsigned long long *val, int err)
+{
+	struct tep_format_field *field;
+
+	if (!event)
+		return -1;
+
+	field = tep_find_any_field(event, name);
+
+	return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * tep_print_num_field - print a field and a format
+ * @s: The seq to print to
+ * @fmt: The printf format to print the field with.
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @err: print default error if failed.
+ *
+ * Returns positive value on success, negative in case of an error,
+ * or 0 if buffer is full.
+ */
+int tep_print_num_field(struct trace_seq *s, const char *fmt,
+			struct tep_event *event, const char *name,
+			struct tep_record *record, int err)
+{
+	struct tep_format_field *field = tep_find_field(event, name);
+	unsigned long long val;
+
+	if (!field)
+		goto failed;
+
+	if (tep_read_number_field(field, record->data, &val))
+		goto failed;
+
+	return trace_seq_printf(s, fmt, val);
+
+ failed:
+	if (err)
+		trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
+	return -1;
+}
+
+/**
+ * tep_print_func_field - print a field and a format for function pointers
+ * @s: The seq to print to
+ * @fmt: The printf format to print the field with.
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @err: print default error if failed.
+ *
+ * Returns positive value on success, negative in case of an error,
+ * or 0 if buffer is full.
+ */
+int tep_print_func_field(struct trace_seq *s, const char *fmt,
+			 struct tep_event *event, const char *name,
+			 struct tep_record *record, int err)
+{
+	struct tep_format_field *field = tep_find_field(event, name);
+	struct tep_handle *tep = event->tep;
+	unsigned long long val;
+	struct func_map *func;
+	char tmp[128];
+
+	if (!field)
+		goto failed;
+
+	if (tep_read_number_field(field, record->data, &val))
+		goto failed;
+
+	func = find_func(tep, val);
+
+	if (func)
+		snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val);
+	else
+		sprintf(tmp, "0x%08llx", val);
+
+	return trace_seq_printf(s, fmt, tmp);
+
+ failed:
+	if (err)
+		trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
+	return -1;
+}
+
+static void free_func_handle(struct tep_function_handler *func)
+{
+	struct func_params *params;
+
+	free(func->name);
+
+	while (func->params) {
+		params = func->params;
+		func->params = params->next;
+		free(params);
+	}
+
+	free(func);
+}
+
+/**
+ * tep_register_print_function - register a helper function
+ * @tep: a handle to the trace event parser context
+ * @func: the function to process the helper function
+ * @ret_type: the return type of the helper function
+ * @name: the name of the helper function
+ * @parameters: A list of enum tep_func_arg_type
+ *
+ * Some events may have helper functions in the print format arguments.
+ * This allows a plugin to dynamically create a way to process one
+ * of these functions.
+ *
+ * The @parameters is a variable list of tep_func_arg_type enums that
+ * must end with TEP_FUNC_ARG_VOID.
+ */
+int tep_register_print_function(struct tep_handle *tep,
+				tep_func_handler func,
+				enum tep_func_arg_type ret_type,
+				char *name, ...)
+{
+	struct tep_function_handler *func_handle;
+	struct func_params **next_param;
+	struct func_params *param;
+	enum tep_func_arg_type type;
+	va_list ap;
+	int ret;
+
+	func_handle = find_func_handler(tep, name);
+	if (func_handle) {
+		/*
+		 * This is most like caused by the users own
+		 * plugins updating the function. This overrides the
+		 * system defaults.
+		 */
+		pr_stat("override of function helper '%s'", name);
+		remove_func_handler(tep, name);
+	}
+
+	func_handle = calloc(1, sizeof(*func_handle));
+	if (!func_handle) {
+		do_warning("Failed to allocate function handler");
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+	}
+
+	func_handle->ret_type = ret_type;
+	func_handle->name = strdup(name);
+	func_handle->func = func;
+	if (!func_handle->name) {
+		do_warning("Failed to allocate function name");
+		free(func_handle);
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+	}
+
+	next_param = &(func_handle->params);
+	va_start(ap, name);
+	for (;;) {
+		type = va_arg(ap, enum tep_func_arg_type);
+		if (type == TEP_FUNC_ARG_VOID)
+			break;
+
+		if (type >= TEP_FUNC_ARG_MAX_TYPES) {
+			do_warning("Invalid argument type %d", type);
+			ret = TEP_ERRNO__INVALID_ARG_TYPE;
+			goto out_free;
+		}
+
+		param = malloc(sizeof(*param));
+		if (!param) {
+			do_warning("Failed to allocate function param");
+			ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+			goto out_free;
+		}
+		param->type = type;
+		param->next = NULL;
+
+		*next_param = param;
+		next_param = &(param->next);
+
+		func_handle->nr_args++;
+	}
+	va_end(ap);
+
+	func_handle->next = tep->func_handlers;
+	tep->func_handlers = func_handle;
+
+	return 0;
+ out_free:
+	va_end(ap);
+	free_func_handle(func_handle);
+	return ret;
+}
+
+/**
+ * tep_unregister_print_function - unregister a helper function
+ * @tep: a handle to the trace event parser context
+ * @func: the function to process the helper function
+ * @name: the name of the helper function
+ *
+ * This function removes existing print handler for function @name.
+ *
+ * Returns 0 if the handler was removed successully, -1 otherwise.
+ */
+int tep_unregister_print_function(struct tep_handle *tep,
+				  tep_func_handler func, char *name)
+{
+	struct tep_function_handler *func_handle;
+
+	func_handle = find_func_handler(tep, name);
+	if (func_handle && func_handle->func == func) {
+		remove_func_handler(tep, name);
+		return 0;
+	}
+	return -1;
+}
+
+static struct tep_event *search_event(struct tep_handle *tep, int id,
+				      const char *sys_name,
+				      const char *event_name)
+{
+	struct tep_event *event;
+
+	if (id >= 0) {
+		/* search by id */
+		event = tep_find_event(tep, id);
+		if (!event)
+			return NULL;
+		if (event_name && (strcmp(event_name, event->name) != 0))
+			return NULL;
+		if (sys_name && (strcmp(sys_name, event->system) != 0))
+			return NULL;
+	} else {
+		event = tep_find_event_by_name(tep, sys_name, event_name);
+		if (!event)
+			return NULL;
+	}
+	return event;
+}
+
+/**
+ * tep_register_event_handler - register a way to parse an event
+ * @tep: a handle to the trace event parser context
+ * @id: the id of the event to register
+ * @sys_name: the system name the event belongs to
+ * @event_name: the name of the event
+ * @func: the function to call to parse the event information
+ * @context: the data to be passed to @func
+ *
+ * This function allows a developer to override the parsing of
+ * a given event. If for some reason the default print format
+ * is not sufficient, this function will register a function
+ * for an event to be used to parse the data instead.
+ *
+ * If @id is >= 0, then it is used to find the event.
+ * else @sys_name and @event_name are used.
+ *
+ * Returns:
+ *  TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten
+ *  TEP_REGISTER_SUCCESS if a new handler is registered successfully
+ *  negative TEP_ERRNO_... in case of an error
+ *
+ */
+int tep_register_event_handler(struct tep_handle *tep, int id,
+			       const char *sys_name, const char *event_name,
+			       tep_event_handler_func func, void *context)
+{
+	struct tep_event *event;
+	struct event_handler *handle;
+
+	event = search_event(tep, id, sys_name, event_name);
+	if (event == NULL)
+		goto not_found;
+
+	pr_stat("overriding event (%d) %s:%s with new print handler",
+		event->id, event->system, event->name);
+
+	event->handler = func;
+	event->context = context;
+	return TEP_REGISTER_SUCCESS_OVERWRITE;
+
+ not_found:
+	/* Save for later use. */
+	handle = calloc(1, sizeof(*handle));
+	if (!handle) {
+		do_warning("Failed to allocate event handler");
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+	}
+
+	handle->id = id;
+	if (event_name)
+		handle->event_name = strdup(event_name);
+	if (sys_name)
+		handle->sys_name = strdup(sys_name);
+
+	if ((event_name && !handle->event_name) ||
+	    (sys_name && !handle->sys_name)) {
+		do_warning("Failed to allocate event/sys name");
+		free((void *)handle->event_name);
+		free((void *)handle->sys_name);
+		free(handle);
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+	}
+
+	handle->func = func;
+	handle->next = tep->handlers;
+	tep->handlers = handle;
+	handle->context = context;
+
+	return TEP_REGISTER_SUCCESS;
+}
+
+static int handle_matches(struct event_handler *handler, int id,
+			  const char *sys_name, const char *event_name,
+			  tep_event_handler_func func, void *context)
+{
+	if (id >= 0 && id != handler->id)
+		return 0;
+
+	if (event_name && (strcmp(event_name, handler->event_name) != 0))
+		return 0;
+
+	if (sys_name && (strcmp(sys_name, handler->sys_name) != 0))
+		return 0;
+
+	if (func != handler->func || context != handler->context)
+		return 0;
+
+	return 1;
+}
+
+/**
+ * tep_unregister_event_handler - unregister an existing event handler
+ * @tep: a handle to the trace event parser context
+ * @id: the id of the event to unregister
+ * @sys_name: the system name the handler belongs to
+ * @event_name: the name of the event handler
+ * @func: the function to call to parse the event information
+ * @context: the data to be passed to @func
+ *
+ * This function removes existing event handler (parser).
+ *
+ * If @id is >= 0, then it is used to find the event.
+ * else @sys_name and @event_name are used.
+ *
+ * Returns 0 if handler was removed successfully, -1 if event was not found.
+ */
+int tep_unregister_event_handler(struct tep_handle *tep, int id,
+				 const char *sys_name, const char *event_name,
+				 tep_event_handler_func func, void *context)
+{
+	struct tep_event *event;
+	struct event_handler *handle;
+	struct event_handler **next;
+
+	event = search_event(tep, id, sys_name, event_name);
+	if (event == NULL)
+		goto not_found;
+
+	if (event->handler == func && event->context == context) {
+		pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.",
+			event->id, event->system, event->name);
+
+		event->handler = NULL;
+		event->context = NULL;
+		return 0;
+	}
+
+not_found:
+	for (next = &tep->handlers; *next; next = &(*next)->next) {
+		handle = *next;
+		if (handle_matches(handle, id, sys_name, event_name,
+				   func, context))
+			break;
+	}
+
+	if (!(*next))
+		return -1;
+
+	*next = handle->next;
+	free_handler(handle);
+
+	return 0;
+}
+
+/**
+ * tep_alloc - create a tep handle
+ */
+struct tep_handle *tep_alloc(void)
+{
+	struct tep_handle *tep = calloc(1, sizeof(*tep));
+
+	if (tep) {
+		tep->ref_count = 1;
+		tep->host_bigendian = tep_is_bigendian();
+	}
+
+	return tep;
+}
+
+void tep_ref(struct tep_handle *tep)
+{
+	tep->ref_count++;
+}
+
+int tep_get_ref(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->ref_count;
+	return 0;
+}
+
+__hidden void free_tep_format_field(struct tep_format_field *field)
+{
+	free(field->type);
+	if (field->alias != field->name)
+		free(field->alias);
+	free(field->name);
+	free(field);
+}
+
+static void free_format_fields(struct tep_format_field *field)
+{
+	struct tep_format_field *next;
+
+	while (field) {
+		next = field->next;
+		free_tep_format_field(field);
+		field = next;
+	}
+}
+
+static void free_formats(struct tep_format *format)
+{
+	free_format_fields(format->common_fields);
+	free_format_fields(format->fields);
+}
+
+__hidden void free_tep_event(struct tep_event *event)
+{
+	free(event->name);
+	free(event->system);
+
+	free_formats(&event->format);
+
+	free(event->print_fmt.format);
+	free_args(event->print_fmt.args);
+	free_parse_args(event->print_fmt.print_cache);
+	free(event);
+}
+
+/**
+ * tep_free - free a tep handle
+ * @tep: the tep handle to free
+ */
+void tep_free(struct tep_handle *tep)
+{
+	struct cmdline_list *cmdlist, *cmdnext;
+	struct func_list *funclist, *funcnext;
+	struct printk_list *printklist, *printknext;
+	struct tep_function_handler *func_handler;
+	struct event_handler *handle;
+	int i;
+
+	if (!tep)
+		return;
+
+	cmdlist = tep->cmdlist;
+	funclist = tep->funclist;
+	printklist = tep->printklist;
+
+	tep->ref_count--;
+	if (tep->ref_count)
+		return;
+
+	if (tep->cmdlines) {
+		for (i = 0; i < tep->cmdline_count; i++)
+			free(tep->cmdlines[i].comm);
+		free(tep->cmdlines);
+	}
+
+	while (cmdlist) {
+		cmdnext = cmdlist->next;
+		free(cmdlist->comm);
+		free(cmdlist);
+		cmdlist = cmdnext;
+	}
+
+	if (tep->func_map) {
+		for (i = 0; i < (int)tep->func_count; i++) {
+			free(tep->func_map[i].func);
+			free(tep->func_map[i].mod);
+		}
+		free(tep->func_map);
+	}
+
+	while (funclist) {
+		funcnext = funclist->next;
+		free(funclist->func);
+		free(funclist->mod);
+		free(funclist);
+		funclist = funcnext;
+	}
+
+	while (tep->func_handlers) {
+		func_handler = tep->func_handlers;
+		tep->func_handlers = func_handler->next;
+		free_func_handle(func_handler);
+	}
+
+	if (tep->printk_map) {
+		for (i = 0; i < (int)tep->printk_count; i++)
+			free(tep->printk_map[i].printk);
+		free(tep->printk_map);
+	}
+
+	while (printklist) {
+		printknext = printklist->next;
+		free(printklist->printk);
+		free(printklist);
+		printklist = printknext;
+	}
+
+	for (i = 0; i < tep->nr_events; i++)
+		free_tep_event(tep->events[i]);
+
+	while (tep->handlers) {
+		handle = tep->handlers;
+		tep->handlers = handle->next;
+		free_handler(handle);
+	}
+
+	free(tep->events);
+	free(tep->sort_events);
+	free(tep->func_resolver);
+	free_tep_plugin_paths(tep);
+
+	free(tep);
+}
+
+void tep_unref(struct tep_handle *tep)
+{
+	tep_free(tep);
+}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
new file mode 100644
index 000000000..a67ad9a5b
--- /dev/null
+++ b/tools/lib/traceevent/event-parse.h
@@ -0,0 +1,749 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _PARSE_EVENTS_H
+#define _PARSE_EVENTS_H
+
+#include <stdbool.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <regex.h>
+#include <string.h>
+
+#include "trace-seq.h"
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((unused))
+#endif
+
+#ifndef DEBUG_RECORD
+#define DEBUG_RECORD 0
+#endif
+
+struct tep_record {
+	unsigned long long	ts;
+	unsigned long long	offset;
+	long long		missed_events;	/* buffer dropped events before */
+	int			record_size;	/* size of binary record */
+	int			size;		/* size of data */
+	void			*data;
+	int			cpu;
+	int			ref_count;
+	int			locked;		/* Do not free, even if ref_count is zero */
+	void			*priv;
+#if DEBUG_RECORD
+	struct tep_record	*prev;
+	struct tep_record	*next;
+	long			alloc_addr;
+#endif
+};
+
+/* ----------------------- tep ----------------------- */
+
+struct tep_handle;
+struct tep_event;
+
+typedef int (*tep_event_handler_func)(struct trace_seq *s,
+				      struct tep_record *record,
+				      struct tep_event *event,
+				      void *context);
+
+typedef int (*tep_plugin_load_func)(struct tep_handle *tep);
+typedef int (*tep_plugin_unload_func)(struct tep_handle *tep);
+
+struct tep_plugin_option {
+	struct tep_plugin_option	*next;
+	void				*handle;
+	char				*file;
+	char				*name;
+	char				*plugin_alias;
+	char				*description;
+	const char			*value;
+	void				*priv;
+	int				set;
+};
+
+/*
+ * Plugin hooks that can be called:
+ *
+ * TEP_PLUGIN_LOADER:  (required)
+ *   The function name to initialized the plugin.
+ *
+ *   int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+ *
+ * TEP_PLUGIN_UNLOADER:  (optional)
+ *   The function called just before unloading
+ *
+ *   int TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+ *
+ * TEP_PLUGIN_OPTIONS:  (optional)
+ *   Plugin options that can be set before loading
+ *
+ *   struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = {
+ *	{
+ *		.name = "option-name",
+ *		.plugin_alias = "override-file-name", (optional)
+ *		.description = "description of option to show users",
+ *	},
+ *	{
+ *		.name = NULL,
+ *	},
+ *   };
+ *
+ *   Array must end with .name = NULL;
+ *
+ *
+ *   .plugin_alias is used to give a shorter name to access
+ *   the vairable. Useful if a plugin handles more than one event.
+ *
+ *   If .value is not set, then it is considered a boolean and only
+ *   .set will be processed. If .value is defined, then it is considered
+ *   a string option and .set will be ignored.
+ *
+ * TEP_PLUGIN_ALIAS: (optional)
+ *   The name to use for finding options (uses filename if not defined)
+ */
+#define TEP_PLUGIN_LOADER tep_plugin_loader
+#define TEP_PLUGIN_UNLOADER tep_plugin_unloader
+#define TEP_PLUGIN_OPTIONS tep_plugin_options
+#define TEP_PLUGIN_ALIAS tep_plugin_alias
+#define _MAKE_STR(x)	#x
+#define MAKE_STR(x)	_MAKE_STR(x)
+#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER)
+#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER)
+#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS)
+#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS)
+
+enum tep_format_flags {
+	TEP_FIELD_IS_ARRAY	= 1,
+	TEP_FIELD_IS_POINTER	= 2,
+	TEP_FIELD_IS_SIGNED	= 4,
+	TEP_FIELD_IS_STRING	= 8,
+	TEP_FIELD_IS_DYNAMIC	= 16,
+	TEP_FIELD_IS_LONG	= 32,
+	TEP_FIELD_IS_FLAG	= 64,
+	TEP_FIELD_IS_SYMBOLIC	= 128,
+};
+
+struct tep_format_field {
+	struct tep_format_field	*next;
+	struct tep_event	*event;
+	char			*type;
+	char			*name;
+	char			*alias;
+	int			offset;
+	int			size;
+	unsigned int		arraylen;
+	unsigned int		elementsize;
+	unsigned long		flags;
+};
+
+struct tep_format {
+	int			nr_common;
+	int			nr_fields;
+	struct tep_format_field	*common_fields;
+	struct tep_format_field	*fields;
+};
+
+struct tep_print_arg_atom {
+	char			*atom;
+};
+
+struct tep_print_arg_string {
+	char			*string;
+	int			offset;
+};
+
+struct tep_print_arg_bitmask {
+	char			*bitmask;
+	int			offset;
+};
+
+struct tep_print_arg_field {
+	char			*name;
+	struct tep_format_field	*field;
+};
+
+struct tep_print_flag_sym {
+	struct tep_print_flag_sym	*next;
+	char				*value;
+	char				*str;
+};
+
+struct tep_print_arg_typecast {
+	char 			*type;
+	struct tep_print_arg	*item;
+};
+
+struct tep_print_arg_flags {
+	struct tep_print_arg		*field;
+	char				*delim;
+	struct tep_print_flag_sym	*flags;
+};
+
+struct tep_print_arg_symbol {
+	struct tep_print_arg		*field;
+	struct tep_print_flag_sym	*symbols;
+};
+
+struct tep_print_arg_hex {
+	struct tep_print_arg	*field;
+	struct tep_print_arg	*size;
+};
+
+struct tep_print_arg_int_array {
+	struct tep_print_arg	*field;
+	struct tep_print_arg	*count;
+	struct tep_print_arg	*el_size;
+};
+
+struct tep_print_arg_dynarray {
+	struct tep_format_field	*field;
+	struct tep_print_arg	*index;
+};
+
+struct tep_print_arg;
+
+struct tep_print_arg_op {
+	char			*op;
+	int			prio;
+	struct tep_print_arg	*left;
+	struct tep_print_arg	*right;
+};
+
+struct tep_function_handler;
+
+struct tep_print_arg_func {
+	struct tep_function_handler	*func;
+	struct tep_print_arg		*args;
+};
+
+enum tep_print_arg_type {
+	TEP_PRINT_NULL,
+	TEP_PRINT_ATOM,
+	TEP_PRINT_FIELD,
+	TEP_PRINT_FLAGS,
+	TEP_PRINT_SYMBOL,
+	TEP_PRINT_HEX,
+	TEP_PRINT_INT_ARRAY,
+	TEP_PRINT_TYPE,
+	TEP_PRINT_STRING,
+	TEP_PRINT_BSTRING,
+	TEP_PRINT_DYNAMIC_ARRAY,
+	TEP_PRINT_OP,
+	TEP_PRINT_FUNC,
+	TEP_PRINT_BITMASK,
+	TEP_PRINT_DYNAMIC_ARRAY_LEN,
+	TEP_PRINT_HEX_STR,
+};
+
+struct tep_print_arg {
+	struct tep_print_arg		*next;
+	enum tep_print_arg_type		type;
+	union {
+		struct tep_print_arg_atom	atom;
+		struct tep_print_arg_field	field;
+		struct tep_print_arg_typecast	typecast;
+		struct tep_print_arg_flags	flags;
+		struct tep_print_arg_symbol	symbol;
+		struct tep_print_arg_hex	hex;
+		struct tep_print_arg_int_array	int_array;
+		struct tep_print_arg_func	func;
+		struct tep_print_arg_string	string;
+		struct tep_print_arg_bitmask	bitmask;
+		struct tep_print_arg_op		op;
+		struct tep_print_arg_dynarray	dynarray;
+	};
+};
+
+struct tep_print_parse;
+
+struct tep_print_fmt {
+	char			*format;
+	struct tep_print_arg	*args;
+	struct tep_print_parse	*print_cache;
+};
+
+struct tep_event {
+	struct tep_handle	*tep;
+	char			*name;
+	int			id;
+	int			flags;
+	struct tep_format	format;
+	struct tep_print_fmt	print_fmt;
+	char			*system;
+	tep_event_handler_func	handler;
+	void			*context;
+};
+
+enum {
+	TEP_EVENT_FL_ISFTRACE	= 0x01,
+	TEP_EVENT_FL_ISPRINT	= 0x02,
+	TEP_EVENT_FL_ISBPRINT	= 0x04,
+	TEP_EVENT_FL_ISFUNCENT	= 0x10,
+	TEP_EVENT_FL_ISFUNCRET	= 0x20,
+	TEP_EVENT_FL_NOHANDLE	= 0x40,
+	TEP_EVENT_FL_PRINTRAW	= 0x80,
+
+	TEP_EVENT_FL_FAILED	= 0x80000000
+};
+
+enum tep_event_sort_type {
+	TEP_EVENT_SORT_ID,
+	TEP_EVENT_SORT_NAME,
+	TEP_EVENT_SORT_SYSTEM,
+};
+
+enum tep_event_type {
+	TEP_EVENT_ERROR,
+	TEP_EVENT_NONE,
+	TEP_EVENT_SPACE,
+	TEP_EVENT_NEWLINE,
+	TEP_EVENT_OP,
+	TEP_EVENT_DELIM,
+	TEP_EVENT_ITEM,
+	TEP_EVENT_DQUOTE,
+	TEP_EVENT_SQUOTE,
+};
+
+typedef unsigned long long (*tep_func_handler)(struct trace_seq *s,
+					       unsigned long long *args);
+
+enum tep_func_arg_type {
+	TEP_FUNC_ARG_VOID,
+	TEP_FUNC_ARG_INT,
+	TEP_FUNC_ARG_LONG,
+	TEP_FUNC_ARG_STRING,
+	TEP_FUNC_ARG_PTR,
+	TEP_FUNC_ARG_MAX_TYPES
+};
+
+enum tep_flag {
+	TEP_NSEC_OUTPUT		= 1,	/* output in NSECS */
+	TEP_DISABLE_SYS_PLUGINS	= 1 << 1,
+	TEP_DISABLE_PLUGINS	= 1 << 2,
+};
+
+#define TEP_ERRORS 							      \
+	_PE(MEM_ALLOC_FAILED,	"failed to allocate memory"),		      \
+	_PE(PARSE_EVENT_FAILED,	"failed to parse event"),		      \
+	_PE(READ_ID_FAILED,	"failed to read event id"),		      \
+	_PE(READ_FORMAT_FAILED,	"failed to read event format"),		      \
+	_PE(READ_PRINT_FAILED,	"failed to read event print fmt"), 	      \
+	_PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\
+	_PE(INVALID_ARG_TYPE,	"invalid argument type"),		      \
+	_PE(INVALID_EXP_TYPE,	"invalid expression type"),		      \
+	_PE(INVALID_OP_TYPE,	"invalid operator type"),		      \
+	_PE(INVALID_EVENT_NAME,	"invalid event name"),			      \
+	_PE(EVENT_NOT_FOUND,	"no event found"),			      \
+	_PE(SYNTAX_ERROR,	"syntax error"),			      \
+	_PE(ILLEGAL_RVALUE,	"illegal rvalue"),			      \
+	_PE(ILLEGAL_LVALUE,	"illegal lvalue for string comparison"),      \
+	_PE(INVALID_REGEX,	"regex did not compute"),		      \
+	_PE(ILLEGAL_STRING_CMP,	"illegal comparison for string"), 	      \
+	_PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), 	      \
+	_PE(REPARENT_NOT_OP,	"cannot reparent other than OP"),	      \
+	_PE(REPARENT_FAILED,	"failed to reparent filter OP"),	      \
+	_PE(BAD_FILTER_ARG,	"bad arg in filter tree"),		      \
+	_PE(UNEXPECTED_TYPE,	"unexpected type (not a value)"),	      \
+	_PE(ILLEGAL_TOKEN,	"illegal token"),			      \
+	_PE(INVALID_PAREN,	"open parenthesis cannot come here"), 	      \
+	_PE(UNBALANCED_PAREN,	"unbalanced number of parenthesis"),	      \
+	_PE(UNKNOWN_TOKEN,	"unknown token"),			      \
+	_PE(FILTER_NOT_FOUND,	"no filter found"),			      \
+	_PE(NOT_A_NUMBER,	"must have number field"),		      \
+	_PE(NO_FILTER,		"no filters exists"),			      \
+	_PE(FILTER_MISS,	"record does not match to filter")
+
+#undef _PE
+#define _PE(__code, __str) TEP_ERRNO__ ## __code
+enum tep_errno {
+	TEP_ERRNO__SUCCESS			= 0,
+	TEP_ERRNO__FILTER_MATCH			= TEP_ERRNO__SUCCESS,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__TEP_ERRNO__START			= -100000,
+
+	TEP_ERRORS,
+
+	__TEP_ERRNO__END,
+};
+#undef _PE
+
+struct tep_plugin_list;
+
+#define INVALID_PLUGIN_LIST_OPTION	((char **)((unsigned long)-1))
+
+enum tep_plugin_load_priority {
+	TEP_PLUGIN_FIRST,
+	TEP_PLUGIN_LAST,
+};
+
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+			enum tep_plugin_load_priority prio);
+struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
+void tep_unload_plugins(struct tep_plugin_list *plugin_list,
+			struct tep_handle *tep);
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+			   void (*load_plugin)(struct tep_handle *tep,
+					       const char *path,
+					       const char *name,
+					       void *data),
+			   void *data);
+char **tep_plugin_list_options(void);
+void tep_plugin_free_options_list(char **list);
+int tep_plugin_add_options(const char *name,
+			   struct tep_plugin_option *options);
+int tep_plugin_add_option(const char *name, const char *val);
+void tep_plugin_remove_options(struct tep_plugin_option *options);
+void tep_plugin_print_options(struct trace_seq *s);
+void tep_print_plugins(struct trace_seq *s,
+			const char *prefix, const char *suffix,
+			const struct tep_plugin_list *list);
+
+/* tep_handle */
+typedef char *(tep_func_resolver_t)(void *priv,
+				    unsigned long long *addrp, char **modp);
+void tep_set_flag(struct tep_handle *tep, int flag);
+void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag);
+bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags);
+
+static inline int tep_is_bigendian(void)
+{
+	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+	unsigned int val;
+
+	memcpy(&val, str, 4);
+	return val == 0x01020304;
+}
+
+/* taken from kernel/trace/trace.h */
+enum trace_flag_type {
+	TRACE_FLAG_IRQS_OFF		= 0x01,
+	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
+	TRACE_FLAG_NEED_RESCHED		= 0x04,
+	TRACE_FLAG_HARDIRQ		= 0x08,
+	TRACE_FLAG_SOFTIRQ		= 0x10,
+};
+
+int tep_set_function_resolver(struct tep_handle *tep,
+			      tep_func_resolver_t *func, void *priv);
+void tep_reset_function_resolver(struct tep_handle *tep);
+int tep_register_comm(struct tep_handle *tep, const char *comm, int pid);
+int tep_override_comm(struct tep_handle *tep, const char *comm, int pid);
+int tep_register_function(struct tep_handle *tep, char *name,
+			  unsigned long long addr, char *mod);
+int tep_register_print_string(struct tep_handle *tep, const char *fmt,
+			      unsigned long long addr);
+bool tep_is_pid_registered(struct tep_handle *tep, int pid);
+
+struct tep_event *tep_get_event(struct tep_handle *tep, int index);
+
+#define TEP_PRINT_INFO		"INFO"
+#define TEP_PRINT_INFO_RAW	"INFO_RAW"
+#define TEP_PRINT_COMM		"COMM"
+#define TEP_PRINT_LATENCY	"LATENCY"
+#define TEP_PRINT_NAME		"NAME"
+#define TEP_PRINT_PID		1U
+#define TEP_PRINT_TIME		2U
+#define TEP_PRINT_CPU		3U
+
+void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
+		     struct tep_record *record, const char *fmt, ...)
+	__attribute__ ((format (printf, 4, 5)));
+
+int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
+			  int long_size);
+
+enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
+			       unsigned long size, const char *sys);
+enum tep_errno tep_parse_format(struct tep_handle *tep,
+				struct tep_event **eventp,
+				const char *buf,
+				unsigned long size, const char *sys);
+
+void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
+			const char *name, struct tep_record *record,
+			int *len, int err);
+
+int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
+		      const char *name, struct tep_record *record,
+		      unsigned long long *val, int err);
+int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
+			     const char *name, struct tep_record *record,
+			     unsigned long long *val, int err);
+int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
+			  const char *name, struct tep_record *record,
+			  unsigned long long *val, int err);
+
+int tep_print_num_field(struct trace_seq *s, const char *fmt,
+			struct tep_event *event, const char *name,
+			struct tep_record *record, int err);
+
+int tep_print_func_field(struct trace_seq *s, const char *fmt,
+			 struct tep_event *event, const char *name,
+			 struct tep_record *record, int err);
+
+enum tep_reg_handler {
+	TEP_REGISTER_SUCCESS = 0,
+	TEP_REGISTER_SUCCESS_OVERWRITE,
+};
+
+int tep_register_event_handler(struct tep_handle *tep, int id,
+			       const char *sys_name, const char *event_name,
+			       tep_event_handler_func func, void *context);
+int tep_unregister_event_handler(struct tep_handle *tep, int id,
+				 const char *sys_name, const char *event_name,
+				 tep_event_handler_func func, void *context);
+int tep_register_print_function(struct tep_handle *tep,
+				tep_func_handler func,
+				enum tep_func_arg_type ret_type,
+				char *name, ...);
+int tep_unregister_print_function(struct tep_handle *tep,
+				  tep_func_handler func, char *name);
+
+struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name);
+struct tep_format_field *tep_find_field(struct tep_event *event, const char *name);
+struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name);
+
+const char *tep_find_function(struct tep_handle *tep, unsigned long long addr);
+unsigned long long
+tep_find_function_address(struct tep_handle *tep, unsigned long long addr);
+unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size);
+int tep_read_number_field(struct tep_format_field *field, const void *data,
+			  unsigned long long *value);
+
+struct tep_event *tep_get_first_event(struct tep_handle *tep);
+int tep_get_events_count(struct tep_handle *tep);
+struct tep_event *tep_find_event(struct tep_handle *tep, int id);
+
+struct tep_event *
+tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name);
+struct tep_event *
+tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record);
+
+int tep_data_type(struct tep_handle *tep, struct tep_record *rec);
+int tep_data_pid(struct tep_handle *tep, struct tep_record *rec);
+int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec);
+int tep_data_flags(struct tep_handle *tep, struct tep_record *rec);
+const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid);
+struct tep_cmdline;
+struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
+					   struct tep_cmdline *next);
+int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline);
+
+void tep_print_field(struct trace_seq *s, void *data,
+		     struct tep_format_field *field);
+void tep_print_fields(struct trace_seq *s, void *data,
+		      int size __maybe_unused, struct tep_event *event);
+int tep_strerror(struct tep_handle *tep, enum tep_errno errnum,
+		 char *buf, size_t buflen);
+
+struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type);
+struct tep_event **tep_list_events_copy(struct tep_handle *tep,
+					enum tep_event_sort_type);
+struct tep_format_field **tep_event_common_fields(struct tep_event *event);
+struct tep_format_field **tep_event_fields(struct tep_event *event);
+
+enum tep_endian {
+        TEP_LITTLE_ENDIAN = 0,
+        TEP_BIG_ENDIAN
+};
+int tep_get_cpus(struct tep_handle *tep);
+void tep_set_cpus(struct tep_handle *tep, int cpus);
+int tep_get_long_size(struct tep_handle *tep);
+void tep_set_long_size(struct tep_handle *tep, int long_size);
+int tep_get_page_size(struct tep_handle *tep);
+void tep_set_page_size(struct tep_handle *tep, int _page_size);
+bool tep_is_file_bigendian(struct tep_handle *tep);
+void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian);
+bool tep_is_local_bigendian(struct tep_handle *tep);
+void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian);
+int tep_get_header_page_size(struct tep_handle *tep);
+int tep_get_header_timestamp_size(struct tep_handle *tep);
+bool tep_is_old_format(struct tep_handle *tep);
+void tep_set_test_filters(struct tep_handle *tep, int test_filters);
+
+struct tep_handle *tep_alloc(void);
+void tep_free(struct tep_handle *tep);
+void tep_ref(struct tep_handle *tep);
+void tep_unref(struct tep_handle *tep);
+int tep_get_ref(struct tep_handle *tep);
+
+/* for debugging */
+void tep_print_funcs(struct tep_handle *tep);
+void tep_print_printk(struct tep_handle *tep);
+
+/* ----------------------- filtering ----------------------- */
+
+enum tep_filter_boolean_type {
+	TEP_FILTER_FALSE,
+	TEP_FILTER_TRUE,
+};
+
+enum tep_filter_op_type {
+	TEP_FILTER_OP_AND = 1,
+	TEP_FILTER_OP_OR,
+	TEP_FILTER_OP_NOT,
+};
+
+enum tep_filter_cmp_type {
+	TEP_FILTER_CMP_NONE,
+	TEP_FILTER_CMP_EQ,
+	TEP_FILTER_CMP_NE,
+	TEP_FILTER_CMP_GT,
+	TEP_FILTER_CMP_LT,
+	TEP_FILTER_CMP_GE,
+	TEP_FILTER_CMP_LE,
+	TEP_FILTER_CMP_MATCH,
+	TEP_FILTER_CMP_NOT_MATCH,
+	TEP_FILTER_CMP_REGEX,
+	TEP_FILTER_CMP_NOT_REGEX,
+};
+
+enum tep_filter_exp_type {
+	TEP_FILTER_EXP_NONE,
+	TEP_FILTER_EXP_ADD,
+	TEP_FILTER_EXP_SUB,
+	TEP_FILTER_EXP_MUL,
+	TEP_FILTER_EXP_DIV,
+	TEP_FILTER_EXP_MOD,
+	TEP_FILTER_EXP_RSHIFT,
+	TEP_FILTER_EXP_LSHIFT,
+	TEP_FILTER_EXP_AND,
+	TEP_FILTER_EXP_OR,
+	TEP_FILTER_EXP_XOR,
+	TEP_FILTER_EXP_NOT,
+};
+
+enum tep_filter_arg_type {
+	TEP_FILTER_ARG_NONE,
+	TEP_FILTER_ARG_BOOLEAN,
+	TEP_FILTER_ARG_VALUE,
+	TEP_FILTER_ARG_FIELD,
+	TEP_FILTER_ARG_EXP,
+	TEP_FILTER_ARG_OP,
+	TEP_FILTER_ARG_NUM,
+	TEP_FILTER_ARG_STR,
+};
+
+enum tep_filter_value_type {
+	TEP_FILTER_NUMBER,
+	TEP_FILTER_STRING,
+	TEP_FILTER_CHAR
+};
+
+struct tep_filter_arg;
+
+struct tep_filter_arg_boolean {
+	enum tep_filter_boolean_type	value;
+};
+
+struct tep_filter_arg_field {
+	struct tep_format_field		*field;
+};
+
+struct tep_filter_arg_value {
+	enum tep_filter_value_type	type;
+	union {
+		char			*str;
+		unsigned long long	val;
+	};
+};
+
+struct tep_filter_arg_op {
+	enum tep_filter_op_type		type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
+};
+
+struct tep_filter_arg_exp {
+	enum tep_filter_exp_type	type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
+};
+
+struct tep_filter_arg_num {
+	enum tep_filter_cmp_type	type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
+};
+
+struct tep_filter_arg_str {
+	enum tep_filter_cmp_type	type;
+	struct tep_format_field		*field;
+	char				*val;
+	char				*buffer;
+	regex_t				reg;
+};
+
+struct tep_filter_arg {
+	enum tep_filter_arg_type		type;
+	union {
+		struct tep_filter_arg_boolean	boolean;
+		struct tep_filter_arg_field	field;
+		struct tep_filter_arg_value	value;
+		struct tep_filter_arg_op	op;
+		struct tep_filter_arg_exp	exp;
+		struct tep_filter_arg_num	num;
+		struct tep_filter_arg_str	str;
+	};
+};
+
+struct tep_filter_type {
+	int			event_id;
+	struct tep_event	*event;
+	struct tep_filter_arg	*filter;
+};
+
+#define TEP_FILTER_ERROR_BUFSZ  1024
+
+struct tep_event_filter {
+	struct tep_handle	*tep;
+	int			filters;
+	struct tep_filter_type	*event_filters;
+	char			error_buffer[TEP_FILTER_ERROR_BUFSZ];
+};
+
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep);
+
+/* for backward compatibility */
+#define FILTER_NONE		TEP_ERRNO__NO_FILTER
+#define FILTER_NOEXIST		TEP_ERRNO__FILTER_NOT_FOUND
+#define FILTER_MISS		TEP_ERRNO__FILTER_MISS
+#define FILTER_MATCH		TEP_ERRNO__FILTER_MATCH
+
+enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
+					 const char *filter_str);
+
+enum tep_errno tep_filter_match(struct tep_event_filter *filter,
+				struct tep_record *record);
+
+int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
+			char *buf, size_t buflen);
+
+int tep_event_filtered(struct tep_event_filter *filter,
+		       int event_id);
+
+void tep_filter_reset(struct tep_event_filter *filter);
+
+void tep_filter_free(struct tep_event_filter *filter);
+
+char *tep_filter_make_string(struct tep_event_filter *filter, int event_id);
+
+int tep_filter_remove_event(struct tep_event_filter *filter,
+			    int event_id);
+
+int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source);
+
+int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2);
+
+#endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
new file mode 100644
index 000000000..e7f93d5fe
--- /dev/null
+++ b/tools/lib/traceevent/event-plugin.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <errno.h>
+#include "event-parse.h"
+#include "event-parse-local.h"
+#include "event-utils.h"
+#include "trace-seq.h"
+
+#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/"
+
+static struct registered_plugin_options {
+	struct registered_plugin_options	*next;
+	struct tep_plugin_option		*options;
+} *registered_options;
+
+static struct trace_plugin_options {
+	struct trace_plugin_options	*next;
+	char				*plugin;
+	char				*option;
+	char				*value;
+} *trace_plugin_options;
+
+struct tep_plugin_list {
+	struct tep_plugin_list	*next;
+	char			*name;
+	void			*handle;
+};
+
+struct tep_plugins_dir {
+	struct tep_plugins_dir		*next;
+	char				*path;
+	enum tep_plugin_load_priority	prio;
+};
+
+static void lower_case(char *str)
+{
+	if (!str)
+		return;
+	for (; *str; str++)
+		*str = tolower(*str);
+}
+
+static int update_option_value(struct tep_plugin_option *op, const char *val)
+{
+	char *op_val;
+
+	if (!val) {
+		/* toggle, only if option is boolean */
+		if (op->value)
+			/* Warn? */
+			return 0;
+		op->set ^= 1;
+		return 0;
+	}
+
+	/*
+	 * If the option has a value then it takes a string
+	 * otherwise the option is a boolean.
+	 */
+	if (op->value) {
+		op->value = val;
+		return 0;
+	}
+
+	/* Option is boolean, must be either "1", "0", "true" or "false" */
+
+	op_val = strdup(val);
+	if (!op_val)
+		return -1;
+	lower_case(op_val);
+
+	if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0)
+		op->set = 1;
+	else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0)
+		op->set = 0;
+	free(op_val);
+
+	return 0;
+}
+
+/**
+ * tep_plugin_list_options - get list of plugin options
+ *
+ * Returns an array of char strings that list the currently registered
+ * plugin options in the format of <plugin>:<option>. This list can be
+ * used by toggling the option.
+ *
+ * Returns NULL if there's no options registered. On error it returns
+ * INVALID_PLUGIN_LIST_OPTION
+ *
+ * Must be freed with tep_plugin_free_options_list().
+ */
+char **tep_plugin_list_options(void)
+{
+	struct registered_plugin_options *reg;
+	struct tep_plugin_option *op;
+	char **list = NULL;
+	char *name;
+	int count = 0;
+
+	for (reg = registered_options; reg; reg = reg->next) {
+		for (op = reg->options; op->name; op++) {
+			char *alias = op->plugin_alias ? op->plugin_alias : op->file;
+			char **temp = list;
+			int ret;
+
+			ret = asprintf(&name, "%s:%s", alias, op->name);
+			if (ret < 0)
+				goto err;
+
+			list = realloc(list, count + 2);
+			if (!list) {
+				list = temp;
+				free(name);
+				goto err;
+			}
+			list[count++] = name;
+			list[count] = NULL;
+		}
+	}
+	return list;
+
+ err:
+	while (--count >= 0)
+		free(list[count]);
+	free(list);
+
+	return INVALID_PLUGIN_LIST_OPTION;
+}
+
+void tep_plugin_free_options_list(char **list)
+{
+	int i;
+
+	if (!list)
+		return;
+
+	if (list == INVALID_PLUGIN_LIST_OPTION)
+		return;
+
+	for (i = 0; list[i]; i++)
+		free(list[i]);
+
+	free(list);
+}
+
+static int
+update_option(const char *file, struct tep_plugin_option *option)
+{
+	struct trace_plugin_options *op;
+	char *plugin;
+	int ret = 0;
+
+	if (option->plugin_alias) {
+		plugin = strdup(option->plugin_alias);
+		if (!plugin)
+			return -1;
+	} else {
+		char *p;
+		plugin = strdup(file);
+		if (!plugin)
+			return -1;
+		p = strstr(plugin, ".");
+		if (p)
+			*p = '\0';
+	}
+
+	/* first look for named options */
+	for (op = trace_plugin_options; op; op = op->next) {
+		if (!op->plugin)
+			continue;
+		if (strcmp(op->plugin, plugin) != 0)
+			continue;
+		if (strcmp(op->option, option->name) != 0)
+			continue;
+
+		ret = update_option_value(option, op->value);
+		if (ret)
+			goto out;
+		break;
+	}
+
+	/* first look for unnamed options */
+	for (op = trace_plugin_options; op; op = op->next) {
+		if (op->plugin)
+			continue;
+		if (strcmp(op->option, option->name) != 0)
+			continue;
+
+		ret = update_option_value(option, op->value);
+		break;
+	}
+
+ out:
+	free(plugin);
+	return ret;
+}
+
+/**
+ * tep_plugin_add_options - Add a set of options by a plugin
+ * @name: The name of the plugin adding the options
+ * @options: The set of options being loaded
+ *
+ * Sets the options with the values that have been added by user.
+ */
+int tep_plugin_add_options(const char *name,
+			   struct tep_plugin_option *options)
+{
+	struct registered_plugin_options *reg;
+
+	reg = malloc(sizeof(*reg));
+	if (!reg)
+		return -1;
+	reg->next = registered_options;
+	reg->options = options;
+	registered_options = reg;
+
+	while (options->name) {
+		update_option(name, options);
+		options++;
+	}
+	return 0;
+}
+
+/**
+ * tep_plugin_remove_options - remove plugin options that were registered
+ * @options: Options to removed that were registered with tep_plugin_add_options
+ */
+void tep_plugin_remove_options(struct tep_plugin_option *options)
+{
+	struct registered_plugin_options **last;
+	struct registered_plugin_options *reg;
+
+	for (last = &registered_options; *last; last = &(*last)->next) {
+		if ((*last)->options == options) {
+			reg = *last;
+			*last = reg->next;
+			free(reg);
+			return;
+		}
+	}
+}
+
+static int parse_option_name(char **option, char **plugin)
+{
+	char *p;
+
+	*plugin = NULL;
+
+	if ((p = strstr(*option, ":"))) {
+		*plugin = *option;
+		*p = '\0';
+		*option = strdup(p + 1);
+		if (!*option)
+			return -1;
+	}
+	return 0;
+}
+
+static struct tep_plugin_option *
+find_registered_option(const char *plugin, const char *option)
+{
+	struct registered_plugin_options *reg;
+	struct tep_plugin_option *op;
+	const char *op_plugin;
+
+	for (reg = registered_options; reg; reg = reg->next) {
+		for (op = reg->options; op->name; op++) {
+			if (op->plugin_alias)
+				op_plugin = op->plugin_alias;
+			else
+				op_plugin = op->file;
+
+			if (plugin && strcmp(plugin, op_plugin) != 0)
+				continue;
+			if (strcmp(option, op->name) != 0)
+				continue;
+
+			return op;
+		}
+	}
+
+	return NULL;
+}
+
+static int process_option(const char *plugin, const char *option, const char *val)
+{
+	struct tep_plugin_option *op;
+
+	op = find_registered_option(plugin, option);
+	if (!op)
+		return 0;
+
+	return update_option_value(op, val);
+}
+
+/**
+ * tep_plugin_add_option - add an option/val pair to set plugin options
+ * @name: The name of the option (format: <plugin>:<option> or just <option>)
+ * @val: (optional) the value for the option
+ *
+ * Modify a plugin option. If @val is given than the value of the option
+ * is set (note, some options just take a boolean, so @val must be either
+ * "1" or "0" or "true" or "false").
+ */
+int tep_plugin_add_option(const char *name, const char *val)
+{
+	struct trace_plugin_options *op;
+	char *option_str;
+	char *plugin;
+
+	option_str = strdup(name);
+	if (!option_str)
+		return -ENOMEM;
+
+	if (parse_option_name(&option_str, &plugin) < 0)
+		return -ENOMEM;
+
+	/* If the option exists, update the val */
+	for (op = trace_plugin_options; op; op = op->next) {
+		/* Both must be NULL or not NULL */
+		if ((!plugin || !op->plugin) && plugin != op->plugin)
+			continue;
+		if (plugin && strcmp(plugin, op->plugin) != 0)
+			continue;
+		if (strcmp(op->option, option_str) != 0)
+			continue;
+
+		/* update option */
+		free(op->value);
+		if (val) {
+			op->value = strdup(val);
+			if (!op->value)
+				goto out_free;
+		} else
+			op->value = NULL;
+
+		/* plugin and option_str don't get freed at the end */
+		free(plugin);
+		free(option_str);
+
+		plugin = op->plugin;
+		option_str = op->option;
+		break;
+	}
+
+	/* If not found, create */
+	if (!op) {
+		op = malloc(sizeof(*op));
+		if (!op)
+			goto out_free;
+		memset(op, 0, sizeof(*op));
+		op->plugin = plugin;
+		op->option = option_str;
+		if (val) {
+			op->value = strdup(val);
+			if (!op->value) {
+				free(op);
+				goto out_free;
+			}
+		}
+		op->next = trace_plugin_options;
+		trace_plugin_options = op;
+	}
+
+	return process_option(plugin, option_str, val);
+
+out_free:
+	free(plugin);
+	free(option_str);
+	return -ENOMEM;
+}
+
+static void print_op_data(struct trace_seq *s, const char *name,
+			  const char *op)
+{
+	if (op)
+		trace_seq_printf(s, "%8s:\t%s\n", name, op);
+}
+
+/**
+ * tep_plugin_print_options - print out the registered plugin options
+ * @s: The trace_seq descriptor to write the plugin options into
+ *
+ * Writes a list of options into trace_seq @s.
+ */
+void tep_plugin_print_options(struct trace_seq *s)
+{
+	struct registered_plugin_options *reg;
+	struct tep_plugin_option *op;
+
+	for (reg = registered_options; reg; reg = reg->next) {
+		if (reg != registered_options)
+			trace_seq_printf(s, "============\n");
+		for (op = reg->options; op->name; op++) {
+			if (op != reg->options)
+				trace_seq_printf(s, "------------\n");
+			print_op_data(s, "file", op->file);
+			print_op_data(s, "plugin", op->plugin_alias);
+			print_op_data(s, "option", op->name);
+			print_op_data(s, "desc", op->description);
+			print_op_data(s, "value", op->value);
+			trace_seq_printf(s, "%8s:\t%d\n", "set", op->set);
+		}
+	}
+}
+
+/**
+ * tep_print_plugins - print out the list of plugins loaded
+ * @s: the trace_seq descripter to write to
+ * @prefix: The prefix string to add before listing the option name
+ * @suffix: The suffix string ot append after the option name
+ * @list: The list of plugins (usually returned by tep_load_plugins()
+ *
+ * Writes to the trace_seq @s the list of plugins (files) that is
+ * returned by tep_load_plugins(). Use @prefix and @suffix for formating:
+ * @prefix = "  ", @suffix = "\n".
+ */
+void tep_print_plugins(struct trace_seq *s,
+		       const char *prefix, const char *suffix,
+		       const struct tep_plugin_list *list)
+{
+	while (list) {
+		trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix);
+		list = list->next;
+	}
+}
+
+static void
+load_plugin(struct tep_handle *tep, const char *path,
+	    const char *file, void *data)
+{
+	struct tep_plugin_list **plugin_list = data;
+	struct tep_plugin_option *options;
+	tep_plugin_load_func func;
+	struct tep_plugin_list *list;
+	const char *alias;
+	char *plugin;
+	void *handle;
+	int ret;
+
+	ret = asprintf(&plugin, "%s/%s", path, file);
+	if (ret < 0) {
+		warning("could not allocate plugin memory\n");
+		return;
+	}
+
+	handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
+	if (!handle) {
+		warning("could not load plugin '%s'\n%s\n",
+			plugin, dlerror());
+		goto out_free;
+	}
+
+	alias = dlsym(handle, TEP_PLUGIN_ALIAS_NAME);
+	if (!alias)
+		alias = file;
+
+	options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME);
+	if (options) {
+		while (options->name) {
+			ret = update_option(alias, options);
+			if (ret < 0)
+				goto out_free;
+			options++;
+		}
+	}
+
+	func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
+	if (!func) {
+		warning("could not find func '%s' in plugin '%s'\n%s\n",
+			TEP_PLUGIN_LOADER_NAME, plugin, dlerror());
+		goto out_free;
+	}
+
+	list = malloc(sizeof(*list));
+	if (!list) {
+		warning("could not allocate plugin memory\n");
+		goto out_free;
+	}
+
+	list->next = *plugin_list;
+	list->handle = handle;
+	list->name = plugin;
+	*plugin_list = list;
+
+	pr_stat("registering plugin: %s", plugin);
+	func(tep);
+	return;
+
+ out_free:
+	free(plugin);
+}
+
+static void
+load_plugins_dir(struct tep_handle *tep, const char *suffix,
+		 const char *path,
+		 void (*load_plugin)(struct tep_handle *tep,
+				     const char *path,
+				     const char *name,
+				     void *data),
+		 void *data)
+{
+	struct dirent *dent;
+	struct stat st;
+	DIR *dir;
+	int ret;
+
+	ret = stat(path, &st);
+	if (ret < 0)
+		return;
+
+	if (!S_ISDIR(st.st_mode))
+		return;
+
+	dir = opendir(path);
+	if (!dir)
+		return;
+
+	while ((dent = readdir(dir))) {
+		const char *name = dent->d_name;
+
+		if (strcmp(name, ".") == 0 ||
+		    strcmp(name, "..") == 0)
+			continue;
+
+		/* Only load plugins that end in suffix */
+		if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0)
+			continue;
+
+		load_plugin(tep, path, name, data);
+	}
+
+	closedir(dir);
+}
+
+/**
+ * tep_load_plugins_hook - call a user specified callback to load a plugin
+ * @tep: handler to traceevent context
+ * @suffix: filter only plugin files with given suffix
+ * @load_plugin: user specified callback, called for each plugin file
+ * @data: custom context, passed to @load_plugin
+ *
+ * Searches for traceevent plugin files and calls @load_plugin for each
+ * The order of plugins search is:
+ *  - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST
+ *  - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR
+ *  - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR
+ *  - In user's home: ~/.local/lib/traceevent/plugins/
+ *  - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST
+ *
+ */
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+			   void (*load_plugin)(struct tep_handle *tep,
+					       const char *path,
+					       const char *name,
+					       void *data),
+			   void *data)
+{
+	struct tep_plugins_dir *dir = NULL;
+	char *home;
+	char *path;
+	char *envdir;
+	int ret;
+
+	if (tep && tep->flags & TEP_DISABLE_PLUGINS)
+		return;
+
+	if (tep)
+		dir = tep->plugins_dir;
+	while (dir) {
+		if (dir->prio == TEP_PLUGIN_FIRST)
+			load_plugins_dir(tep, suffix, dir->path,
+					 load_plugin, data);
+		dir = dir->next;
+	}
+
+	/*
+	 * If a system plugin directory was defined,
+	 * check that first.
+	 */
+#ifdef PLUGIN_DIR
+	if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS))
+		load_plugins_dir(tep, suffix, PLUGIN_DIR,
+				 load_plugin, data);
+#endif
+
+	/*
+	 * Next let the environment-set plugin directory
+	 * override the system defaults.
+	 */
+	envdir = getenv("TRACEEVENT_PLUGIN_DIR");
+	if (envdir)
+		load_plugins_dir(tep, suffix, envdir, load_plugin, data);
+
+	/*
+	 * Now let the home directory override the environment
+	 * or system defaults.
+	 */
+	home = getenv("HOME");
+	if (!home)
+		return;
+
+	ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR);
+	if (ret < 0) {
+		warning("could not allocate plugin memory\n");
+		return;
+	}
+
+	load_plugins_dir(tep, suffix, path, load_plugin, data);
+
+	if (tep)
+		dir = tep->plugins_dir;
+	while (dir) {
+		if (dir->prio == TEP_PLUGIN_LAST)
+			load_plugins_dir(tep, suffix, dir->path,
+					 load_plugin, data);
+		dir = dir->next;
+	}
+
+	free(path);
+}
+
+struct tep_plugin_list*
+tep_load_plugins(struct tep_handle *tep)
+{
+	struct tep_plugin_list *list = NULL;
+
+	tep_load_plugins_hook(tep, ".so", load_plugin, &list);
+	return list;
+}
+
+/**
+ * tep_add_plugin_path - Add a new plugin directory.
+ * @tep: Trace event handler.
+ * @path: Path to a directory. All plugin files in that
+ *	  directory will be loaded.
+ *@prio: Load priority of the plugins in that directory.
+ *
+ * Returns -1 in case of an error, 0 otherwise.
+ */
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+			enum tep_plugin_load_priority prio)
+{
+	struct tep_plugins_dir *dir;
+
+	if (!tep || !path)
+		return -1;
+
+	dir = calloc(1, sizeof(*dir));
+	if (!dir)
+		return -1;
+
+	dir->path = strdup(path);
+	if (!dir->path) {
+		free(dir);
+		return -1;
+	}
+	dir->prio = prio;
+	dir->next = tep->plugins_dir;
+	tep->plugins_dir = dir;
+
+	return 0;
+}
+
+__hidden void free_tep_plugin_paths(struct tep_handle *tep)
+{
+	struct tep_plugins_dir *dir;
+
+	if (!tep)
+		return;
+
+	dir = tep->plugins_dir;
+	while (dir) {
+		tep->plugins_dir = tep->plugins_dir->next;
+		free(dir->path);
+		free(dir);
+		dir = tep->plugins_dir;
+	}
+}
+
+void
+tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
+{
+	tep_plugin_unload_func func;
+	struct tep_plugin_list *list;
+
+	while (plugin_list) {
+		list = plugin_list;
+		plugin_list = list->next;
+		func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME);
+		if (func)
+			func(tep);
+		dlclose(list->handle);
+		free(list->name);
+		free(list);
+	}
+}
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
new file mode 100644
index 000000000..0560b96a3
--- /dev/null
+++ b/tools/lib/traceevent/event-utils.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef __UTIL_H
+#define __UTIL_H
+
+#include <ctype.h>
+
+/* Can be overridden */
+void warning(const char *fmt, ...);
+void pr_stat(const char *fmt, ...);
+void vpr_stat(const char *fmt, va_list ap);
+
+/* Always available */
+void __warning(const char *fmt, ...);
+void __pr_stat(const char *fmt, ...);
+
+void __vwarning(const char *fmt, ...);
+void __vpr_stat(const char *fmt, ...);
+
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+static inline char *strim(char *string)
+{
+	char *ret;
+
+	if (!string)
+		return NULL;
+	while (*string) {
+		if (!isspace(*string))
+			break;
+		string++;
+	}
+	ret = string;
+
+	string = ret + strlen(ret) - 1;
+	while (string > ret) {
+		if (!isspace(*string))
+			break;
+		string--;
+	}
+	string[1] = 0;
+
+	return ret;
+}
+
+static inline int has_text(const char *text)
+{
+	if (!text)
+		return 0;
+
+	while (*text) {
+		if (!isspace(*text))
+			return 1;
+		text++;
+	}
+
+	return 0;
+}
+
+#endif
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
new file mode 100644
index 000000000..f1640d651
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -0,0 +1,809 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "kbuffer.h"
+
+#define MISSING_EVENTS (1UL << 31)
+#define MISSING_STORED (1UL << 30)
+
+#define COMMIT_MASK ((1 << 27) - 1)
+
+enum {
+	KBUFFER_FL_HOST_BIG_ENDIAN	= (1<<0),
+	KBUFFER_FL_BIG_ENDIAN		= (1<<1),
+	KBUFFER_FL_LONG_8		= (1<<2),
+	KBUFFER_FL_OLD_FORMAT		= (1<<3),
+};
+
+#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN)
+
+/** kbuffer
+ * @timestamp		- timestamp of current event
+ * @lost_events		- # of lost events between this subbuffer and previous
+ * @flags		- special flags of the kbuffer
+ * @subbuffer		- pointer to the sub-buffer page
+ * @data		- pointer to the start of data on the sub-buffer page
+ * @index		- index from @data to the @curr event data
+ * @curr		- offset from @data to the start of current event
+ *			   (includes metadata)
+ * @next		- offset from @data to the start of next event
+ * @size		- The size of data on @data
+ * @start		- The offset from @subbuffer where @data lives
+ *
+ * @read_4		- Function to read 4 raw bytes (may swap)
+ * @read_8		- Function to read 8 raw bytes (may swap)
+ * @read_long		- Function to read a long word (4 or 8 bytes with needed swap)
+ */
+struct kbuffer {
+	unsigned long long 	timestamp;
+	long long		lost_events;
+	unsigned long		flags;
+	void			*subbuffer;
+	void			*data;
+	unsigned int		index;
+	unsigned int		curr;
+	unsigned int		next;
+	unsigned int		size;
+	unsigned int		start;
+
+	unsigned int (*read_4)(void *ptr);
+	unsigned long long (*read_8)(void *ptr);
+	unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr);
+	int (*next_event)(struct kbuffer *kbuf);
+};
+
+static void *zmalloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+static int host_is_bigendian(void)
+{
+	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+	unsigned int *ptr;
+
+	ptr = (unsigned int *)str;
+	return *ptr == 0x01020304;
+}
+
+static int do_swap(struct kbuffer *kbuf)
+{
+	return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) &
+		ENDIAN_MASK;
+}
+
+static unsigned long long __read_8(void *ptr)
+{
+	unsigned long long data = *(unsigned long long *)ptr;
+
+	return data;
+}
+
+static unsigned long long __read_8_sw(void *ptr)
+{
+	unsigned long long data = *(unsigned long long *)ptr;
+	unsigned long long swap;
+
+	swap = ((data & 0xffULL) << 56) |
+		((data & (0xffULL << 8)) << 40) |
+		((data & (0xffULL << 16)) << 24) |
+		((data & (0xffULL << 24)) << 8) |
+		((data & (0xffULL << 32)) >> 8) |
+		((data & (0xffULL << 40)) >> 24) |
+		((data & (0xffULL << 48)) >> 40) |
+		((data & (0xffULL << 56)) >> 56);
+
+	return swap;
+}
+
+static unsigned int __read_4(void *ptr)
+{
+	unsigned int data = *(unsigned int *)ptr;
+
+	return data;
+}
+
+static unsigned int __read_4_sw(void *ptr)
+{
+	unsigned int data = *(unsigned int *)ptr;
+	unsigned int swap;
+
+	swap = ((data & 0xffULL) << 24) |
+		((data & (0xffULL << 8)) << 8) |
+		((data & (0xffULL << 16)) >> 8) |
+		((data & (0xffULL << 24)) >> 24);
+
+	return swap;
+}
+
+static unsigned long long read_8(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_8(ptr);
+}
+
+static unsigned int read_4(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_4(ptr);
+}
+
+static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_8(ptr);
+}
+
+static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_4(ptr);
+}
+
+static unsigned long long read_long(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_long(kbuf, ptr);
+}
+
+static int calc_index(struct kbuffer *kbuf, void *ptr)
+{
+	return (unsigned long)ptr - (unsigned long)kbuf->data;
+}
+
+static int __next_event(struct kbuffer *kbuf);
+
+/**
+ * kbuffer_alloc - allocat a new kbuffer
+ * @size;	enum to denote size of word
+ * @endian:	enum to denote endianness
+ *
+ * Allocates and returns a new kbuffer.
+ */
+struct kbuffer *
+kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian)
+{
+	struct kbuffer *kbuf;
+	int flags = 0;
+
+	switch (size) {
+	case KBUFFER_LSIZE_4:
+		break;
+	case KBUFFER_LSIZE_8:
+		flags |= KBUFFER_FL_LONG_8;
+		break;
+	default:
+		return NULL;
+	}
+
+	switch (endian) {
+	case KBUFFER_ENDIAN_LITTLE:
+		break;
+	case KBUFFER_ENDIAN_BIG:
+		flags |= KBUFFER_FL_BIG_ENDIAN;
+		break;
+	default:
+		return NULL;
+	}
+
+	kbuf = zmalloc(sizeof(*kbuf));
+	if (!kbuf)
+		return NULL;
+
+	kbuf->flags = flags;
+
+	if (host_is_bigendian())
+		kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN;
+
+	if (do_swap(kbuf)) {
+		kbuf->read_8 = __read_8_sw;
+		kbuf->read_4 = __read_4_sw;
+	} else {
+		kbuf->read_8 = __read_8;
+		kbuf->read_4 = __read_4;
+	}
+
+	if (kbuf->flags & KBUFFER_FL_LONG_8)
+		kbuf->read_long = __read_long_8;
+	else
+		kbuf->read_long = __read_long_4;
+
+	/* May be changed by kbuffer_set_old_format() */
+	kbuf->next_event = __next_event;
+
+	return kbuf;
+}
+
+/** kbuffer_free - free an allocated kbuffer
+ * @kbuf:	The kbuffer to free
+ *
+ * Can take NULL as a parameter.
+ */
+void kbuffer_free(struct kbuffer *kbuf)
+{
+	free(kbuf);
+}
+
+static unsigned int type4host(struct kbuffer *kbuf,
+			      unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return (type_len_ts >> 29) & 3;
+	else
+		return type_len_ts & 3;
+}
+
+static unsigned int len4host(struct kbuffer *kbuf,
+			     unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return (type_len_ts >> 27) & 7;
+	else
+		return (type_len_ts >> 2) & 7;
+}
+
+static unsigned int type_len4host(struct kbuffer *kbuf,
+				  unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return (type_len_ts >> 27) & ((1 << 5) - 1);
+	else
+		return type_len_ts & ((1 << 5) - 1);
+}
+
+static unsigned int ts4host(struct kbuffer *kbuf,
+			    unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return type_len_ts & ((1 << 27) - 1);
+	else
+		return type_len_ts >> 5;
+}
+
+/*
+ * Linux 2.6.30 and earlier (not much ealier) had a different
+ * ring buffer format. It should be obsolete, but we handle it anyway.
+ */
+enum old_ring_buffer_type {
+	OLD_RINGBUF_TYPE_PADDING,
+	OLD_RINGBUF_TYPE_TIME_EXTEND,
+	OLD_RINGBUF_TYPE_TIME_STAMP,
+	OLD_RINGBUF_TYPE_DATA,
+};
+
+static unsigned int old_update_pointers(struct kbuffer *kbuf)
+{
+	unsigned long long extend;
+	unsigned int type_len_ts;
+	unsigned int type;
+	unsigned int len;
+	unsigned int delta;
+	unsigned int length;
+	void *ptr = kbuf->data + kbuf->curr;
+
+	type_len_ts = read_4(kbuf, ptr);
+	ptr += 4;
+
+	type = type4host(kbuf, type_len_ts);
+	len = len4host(kbuf, type_len_ts);
+	delta = ts4host(kbuf, type_len_ts);
+
+	switch (type) {
+	case OLD_RINGBUF_TYPE_PADDING:
+		kbuf->next = kbuf->size;
+		return 0;
+
+	case OLD_RINGBUF_TYPE_TIME_EXTEND:
+		extend = read_4(kbuf, ptr);
+		extend <<= TS_SHIFT;
+		extend += delta;
+		delta = extend;
+		ptr += 4;
+		length = 0;
+		break;
+
+	case OLD_RINGBUF_TYPE_TIME_STAMP:
+		/* should never happen! */
+		kbuf->curr = kbuf->size;
+		kbuf->next = kbuf->size;
+		kbuf->index = kbuf->size;
+		return -1;
+	default:
+		if (len)
+			length = len * 4;
+		else {
+			length = read_4(kbuf, ptr);
+			length -= 4;
+			ptr += 4;
+		}
+		break;
+	}
+
+	kbuf->timestamp += delta;
+	kbuf->index = calc_index(kbuf, ptr);
+	kbuf->next = kbuf->index + length;
+
+	return type;
+}
+
+static int __old_next_event(struct kbuffer *kbuf)
+{
+	int type;
+
+	do {
+		kbuf->curr = kbuf->next;
+		if (kbuf->next >= kbuf->size)
+			return -1;
+		type = old_update_pointers(kbuf);
+	} while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING);
+
+	return 0;
+}
+
+static unsigned int
+translate_data(struct kbuffer *kbuf, void *data, void **rptr,
+	       unsigned long long *delta, int *length)
+{
+	unsigned long long extend;
+	unsigned int type_len_ts;
+	unsigned int type_len;
+
+	type_len_ts = read_4(kbuf, data);
+	data += 4;
+
+	type_len = type_len4host(kbuf, type_len_ts);
+	*delta = ts4host(kbuf, type_len_ts);
+
+	switch (type_len) {
+	case KBUFFER_TYPE_PADDING:
+		*length = read_4(kbuf, data);
+		break;
+
+	case KBUFFER_TYPE_TIME_EXTEND:
+	case KBUFFER_TYPE_TIME_STAMP:
+		extend = read_4(kbuf, data);
+		data += 4;
+		extend <<= TS_SHIFT;
+		extend += *delta;
+		*delta = extend;
+		*length = 0;
+		break;
+
+	case 0:
+		*length = read_4(kbuf, data) - 4;
+		*length = (*length + 3) & ~3;
+		data += 4;
+		break;
+	default:
+		*length = type_len * 4;
+		break;
+	}
+
+	*rptr = data;
+
+	return type_len;
+}
+
+static unsigned int update_pointers(struct kbuffer *kbuf)
+{
+	unsigned long long delta;
+	unsigned int type_len;
+	int length;
+	void *ptr = kbuf->data + kbuf->curr;
+
+	type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
+
+	if (type_len == KBUFFER_TYPE_TIME_STAMP)
+		kbuf->timestamp = delta;
+	else
+		kbuf->timestamp += delta;
+
+	kbuf->index = calc_index(kbuf, ptr);
+	kbuf->next = kbuf->index + length;
+
+	return type_len;
+}
+
+/**
+ * kbuffer_translate_data - read raw data to get a record
+ * @swap:	Set to 1 if bytes in words need to be swapped when read
+ * @data:	The raw data to read
+ * @size:	Address to store the size of the event data.
+ *
+ * Returns a pointer to the event data. To determine the entire
+ * record size (record metadata + data) just add the difference between
+ * @data and the returned value to @size.
+ */
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
+{
+	unsigned long long delta;
+	struct kbuffer kbuf;
+	int type_len;
+	int length;
+	void *ptr;
+
+	if (swap) {
+		kbuf.read_8 = __read_8_sw;
+		kbuf.read_4 = __read_4_sw;
+		kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN;
+	} else {
+		kbuf.read_8 = __read_8;
+		kbuf.read_4 = __read_4;
+		kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0;
+	}
+
+	type_len = translate_data(&kbuf, data, &ptr, &delta, &length);
+	switch (type_len) {
+	case KBUFFER_TYPE_PADDING:
+	case KBUFFER_TYPE_TIME_EXTEND:
+	case KBUFFER_TYPE_TIME_STAMP:
+		return NULL;
+	}
+
+	*size = length;
+
+	return ptr;
+}
+
+static int __next_event(struct kbuffer *kbuf)
+{
+	int type;
+
+	do {
+		kbuf->curr = kbuf->next;
+		if (kbuf->next >= kbuf->size)
+			return -1;
+		type = update_pointers(kbuf);
+	} while (type == KBUFFER_TYPE_TIME_EXTEND ||
+		 type == KBUFFER_TYPE_TIME_STAMP ||
+		 type == KBUFFER_TYPE_PADDING);
+
+	return 0;
+}
+
+static int next_event(struct kbuffer *kbuf)
+{
+	return kbuf->next_event(kbuf);
+}
+
+/**
+ * kbuffer_next_event - increment the current pointer
+ * @kbuf:	The kbuffer to read
+ * @ts:		Address to store the next record's timestamp (may be NULL to ignore)
+ *
+ * Increments the pointers into the subbuffer of the kbuffer to point to the
+ * next event so that the next kbuffer_read_event() will return a
+ * new event.
+ *
+ * Returns the data of the next event if a new event exists on the subbuffer,
+ * NULL otherwise.
+ */
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+	int ret;
+
+	if (!kbuf || !kbuf->subbuffer)
+		return NULL;
+
+	ret = next_event(kbuf);
+	if (ret < 0)
+		return NULL;
+
+	if (ts)
+		*ts = kbuf->timestamp;
+
+	return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer
+ * @kbuf:	The kbuffer to load
+ * @subbuffer:	The subbuffer to load into @kbuf.
+ *
+ * Load a new subbuffer (page) into @kbuf. This will reset all
+ * the pointers and update the @kbuf timestamp. The next read will
+ * return the first event on @subbuffer.
+ *
+ * Returns 0 on succes, -1 otherwise.
+ */
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
+{
+	unsigned long long flags;
+	void *ptr = subbuffer;
+
+	if (!kbuf || !subbuffer)
+		return -1;
+
+	kbuf->subbuffer = subbuffer;
+
+	kbuf->timestamp = read_8(kbuf, ptr);
+	ptr += 8;
+
+	kbuf->curr = 0;
+
+	if (kbuf->flags & KBUFFER_FL_LONG_8)
+		kbuf->start = 16;
+	else
+		kbuf->start = 12;
+
+	kbuf->data = subbuffer + kbuf->start;
+
+	flags = read_long(kbuf, ptr);
+	kbuf->size = (unsigned int)flags & COMMIT_MASK;
+
+	if (flags & MISSING_EVENTS) {
+		if (flags & MISSING_STORED) {
+			ptr = kbuf->data + kbuf->size;
+			kbuf->lost_events = read_long(kbuf, ptr);
+		} else
+			kbuf->lost_events = -1;
+	} else
+		kbuf->lost_events = 0;
+
+	kbuf->index = 0;
+	kbuf->next = 0;
+
+	next_event(kbuf);
+
+	return 0;
+}
+
+/**
+ * kbuffer_subbuf_timestamp - read the timestamp from a sub buffer
+ * @kbuf:      The kbuffer to load
+ * @subbuf:    The subbuffer to read from.
+ *
+ * Return the timestamp from a subbuffer.
+ */
+unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf)
+{
+	return kbuf->read_8(subbuf);
+}
+
+/**
+ * kbuffer_ptr_delta - read the delta field from a record
+ * @kbuf:      The kbuffer to load
+ * @ptr:       The record in the buffe.
+ *
+ * Return the timestamp delta from a record
+ */
+unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr)
+{
+	unsigned int type_len_ts;
+
+	type_len_ts = read_4(kbuf, ptr);
+	return ts4host(kbuf, type_len_ts);
+}
+
+
+/**
+ * kbuffer_read_event - read the next event in the kbuffer subbuffer
+ * @kbuf:	The kbuffer to read from
+ * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * Returns a pointer to the data part of the current event.
+ * NULL if no event is left on the subbuffer.
+ */
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+	if (!kbuf || !kbuf->subbuffer)
+		return NULL;
+
+	if (kbuf->curr >= kbuf->size)
+		return NULL;
+
+	if (ts)
+		*ts = kbuf->timestamp;
+	return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_timestamp - Return the timestamp of the current event
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the timestamp of the current (next) event.
+ */
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf)
+{
+	return kbuf->timestamp;
+}
+
+/**
+ * kbuffer_read_at_offset - read the event that is at offset
+ * @kbuf:	The kbuffer to read from
+ * @offset:	The offset into the subbuffer
+ * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * The @offset must be an index from the @kbuf subbuffer beginning.
+ * If @offset is bigger than the stored subbuffer, NULL will be returned.
+ *
+ * Returns the data of the record that is at @offset. Note, @offset does
+ * not need to be the start of the record, the offset just needs to be
+ * in the record (or beginning of it).
+ *
+ * Note, the kbuf timestamp and pointers are updated to the
+ * returned record. That is, kbuffer_read_event() will return the same
+ * data and timestamp, and kbuffer_next_event() will increment from
+ * this record.
+ */
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
+			     unsigned long long *ts)
+{
+	void *data;
+
+	if (offset < kbuf->start)
+		offset = 0;
+	else
+		offset -= kbuf->start;
+
+	/* Reset the buffer */
+	kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
+	data = kbuffer_read_event(kbuf, ts);
+
+	while (kbuf->curr < offset) {
+		data = kbuffer_next_event(kbuf, ts);
+		if (!data)
+			break;
+	}
+
+	return data;
+}
+
+/**
+ * kbuffer_subbuffer_size - the size of the loaded subbuffer
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the size of the subbuffer. Note, this size is
+ * where the last event resides. The stored subbuffer may actually be
+ * bigger due to padding and such.
+ */
+int kbuffer_subbuffer_size(struct kbuffer *kbuf)
+{
+	return kbuf->size;
+}
+
+/**
+ * kbuffer_curr_index - Return the index of the record
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the index from the start of the data part of
+ * the subbuffer to the current location. Note this is not
+ * from the start of the subbuffer. An index of zero will
+ * point to the first record. Use kbuffer_curr_offset() for
+ * the actually offset (that can be used by kbuffer_read_at_offset())
+ */
+int kbuffer_curr_index(struct kbuffer *kbuf)
+{
+	return kbuf->curr;
+}
+
+/**
+ * kbuffer_curr_offset - Return the offset of the record
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the offset from the start of the subbuffer to the
+ * current location.
+ */
+int kbuffer_curr_offset(struct kbuffer *kbuf)
+{
+	return kbuf->curr + kbuf->start;
+}
+
+/**
+ * kbuffer_event_size - return the size of the event data
+ * @kbuf:	The kbuffer to read
+ *
+ * Returns the size of the event data (the payload not counting
+ * the meta data of the record) of the current event.
+ */
+int kbuffer_event_size(struct kbuffer *kbuf)
+{
+	return kbuf->next - kbuf->index;
+}
+
+/**
+ * kbuffer_curr_size - return the size of the entire record
+ * @kbuf:	The kbuffer to read
+ *
+ * Returns the size of the entire record (meta data and payload)
+ * of the current event.
+ */
+int kbuffer_curr_size(struct kbuffer *kbuf)
+{
+	return kbuf->next - kbuf->curr;
+}
+
+/**
+ * kbuffer_missed_events - return the # of missed events from last event.
+ * @kbuf: 	The kbuffer to read from
+ *
+ * Returns the # of missed events (if recorded) before the current
+ * event. Note, only events on the beginning of a subbuffer can
+ * have missed events, all other events within the buffer will be
+ * zero.
+ */
+int kbuffer_missed_events(struct kbuffer *kbuf)
+{
+	/* Only the first event can have missed events */
+	if (kbuf->curr)
+		return 0;
+
+	return kbuf->lost_events;
+}
+
+/**
+ * kbuffer_set_old_forma - set the kbuffer to use the old format parsing
+ * @kbuf:	The kbuffer to set
+ *
+ * This is obsolete (or should be). The first kernels to use the
+ * new ring buffer had a slightly different ring buffer format
+ * (2.6.30 and earlier). It is still somewhat supported by kbuffer,
+ * but should not be counted on in the future.
+ */
+void kbuffer_set_old_format(struct kbuffer *kbuf)
+{
+	kbuf->flags |= KBUFFER_FL_OLD_FORMAT;
+
+	kbuf->next_event = __old_next_event;
+}
+
+/**
+ * kbuffer_start_of_data - return offset of where data starts on subbuffer
+ * @kbuf:	The kbuffer
+ *
+ * Returns the location on the subbuffer where the data starts.
+ */
+int kbuffer_start_of_data(struct kbuffer *kbuf)
+{
+	return kbuf->start;
+}
+
+/**
+ * kbuffer_raw_get - get raw buffer info
+ * @kbuf:	The kbuffer
+ * @subbuf:	Start of mapped subbuffer
+ * @info:	Info descriptor to fill in
+ *
+ * For debugging. This can return internals of the ring buffer.
+ * Expects to have info->next set to what it will read.
+ * The type, length and timestamp delta will be filled in, and
+ * @info->next will be updated to the next element.
+ * The @subbuf is used to know if the info is passed the end of
+ * data and NULL will be returned if it is.
+ */
+struct kbuffer_raw_info *
+kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, struct kbuffer_raw_info *info)
+{
+	unsigned long long flags;
+	unsigned long long delta;
+	unsigned int type_len;
+	unsigned int size;
+	int start;
+	int length;
+	void *ptr = info->next;
+
+	if (!kbuf || !subbuf)
+		return NULL;
+
+	if (kbuf->flags & KBUFFER_FL_LONG_8)
+		start = 16;
+	else
+		start = 12;
+
+	flags = read_long(kbuf, subbuf + 8);
+	size = (unsigned int)flags & COMMIT_MASK;
+
+	if (ptr < subbuf || ptr >= subbuf + start + size)
+		return NULL;
+
+	type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
+
+	info->next = ptr + length;
+
+	info->type = type_len;
+	info->delta = delta;
+	info->length = length;
+
+	return info;
+}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
new file mode 100644
index 000000000..a2b522093
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _KBUFFER_H
+#define _KBUFFER_H
+
+#ifndef TS_SHIFT
+#define TS_SHIFT		27
+#endif
+
+enum kbuffer_endian {
+	KBUFFER_ENDIAN_BIG,
+	KBUFFER_ENDIAN_LITTLE,
+};
+
+enum kbuffer_long_size {
+	KBUFFER_LSIZE_4,
+	KBUFFER_LSIZE_8,
+};
+
+enum {
+	KBUFFER_TYPE_PADDING		= 29,
+	KBUFFER_TYPE_TIME_EXTEND	= 30,
+	KBUFFER_TYPE_TIME_STAMP		= 31,
+};
+
+struct kbuffer;
+
+struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian);
+void kbuffer_free(struct kbuffer *kbuf);
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer);
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts);
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts);
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf);
+unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf);
+unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr);
+
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size);
+
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts);
+
+int kbuffer_curr_index(struct kbuffer *kbuf);
+
+int kbuffer_curr_offset(struct kbuffer *kbuf);
+int kbuffer_curr_size(struct kbuffer *kbuf);
+int kbuffer_event_size(struct kbuffer *kbuf);
+int kbuffer_missed_events(struct kbuffer *kbuf);
+int kbuffer_subbuffer_size(struct kbuffer *kbuf);
+
+void kbuffer_set_old_format(struct kbuffer *kbuf);
+int kbuffer_start_of_data(struct kbuffer *kbuf);
+
+/* Debugging */
+
+struct kbuffer_raw_info {
+	int			type;
+	int			length;
+	unsigned long long	delta;
+	void			*next;
+};
+
+/* Read raw data */
+struct kbuffer_raw_info *kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf,
+					 struct kbuffer_raw_info *info);
+
+#endif /* _K_BUFFER_H */
diff --git a/tools/lib/traceevent/libtraceevent.pc.template b/tools/lib/traceevent/libtraceevent.pc.template
new file mode 100644
index 000000000..86384fcd5
--- /dev/null
+++ b/tools/lib/traceevent/libtraceevent.pc.template
@@ -0,0 +1,10 @@
+prefix=INSTALL_PREFIX
+libdir=LIB_DIR
+includedir=HEADER_DIR
+
+Name: libtraceevent
+URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+Description: Linux kernel trace event library
+Version: LIB_VERSION
+Cflags: -I${includedir}
+Libs: -L${libdir} -ltraceevent
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
new file mode 100644
index 000000000..368826bb5
--- /dev/null
+++ b/tools/lib/traceevent/parse-filter.c
@@ -0,0 +1,2278 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include "event-parse.h"
+#include "event-parse-local.h"
+#include "event-utils.h"
+
+#define COMM "COMM"
+#define CPU "CPU"
+
+static struct tep_format_field comm = {
+	.name = "COMM",
+};
+
+static struct tep_format_field cpu = {
+	.name = "CPU",
+};
+
+struct event_list {
+	struct event_list	*next;
+	struct tep_event	*event;
+};
+
+static void show_error(char *error_buf, const char *fmt, ...)
+{
+	unsigned long long index;
+	const char *input;
+	va_list ap;
+	int len;
+	int i;
+
+	input = get_input_buf();
+	index = get_input_buf_ptr();
+	len = input ? strlen(input) : 0;
+
+	if (len) {
+		strcpy(error_buf, input);
+		error_buf[len] = '\n';
+		for (i = 1; i < len && i < index; i++)
+			error_buf[len+i] = ' ';
+		error_buf[len + i] = '^';
+		error_buf[len + i + 1] = '\n';
+		len += i+2;
+	}
+
+	va_start(ap, fmt);
+	vsnprintf(error_buf + len, TEP_FILTER_ERROR_BUFSZ - len, fmt, ap);
+	va_end(ap);
+}
+
+static enum tep_event_type filter_read_token(char **tok)
+{
+	enum tep_event_type type;
+	char *token = NULL;
+
+	do {
+		free_token(token);
+		type = read_token(&token);
+	} while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE);
+
+	/* If token is = or ! check to see if the next char is ~ */
+	if (token &&
+	    (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
+	    peek_char() == '~') {
+		/* append it */
+		*tok = malloc(3);
+		if (*tok == NULL) {
+			free_token(token);
+			return TEP_EVENT_ERROR;
+		}
+		sprintf(*tok, "%c%c", *token, '~');
+		free_token(token);
+		/* Now remove the '~' from the buffer */
+		read_token(&token);
+		free_token(token);
+	} else
+		*tok = token;
+
+	return type;
+}
+
+static int filter_cmp(const void *a, const void *b)
+{
+	const struct tep_filter_type *ea = a;
+	const struct tep_filter_type *eb = b;
+
+	if (ea->event_id < eb->event_id)
+		return -1;
+
+	if (ea->event_id > eb->event_id)
+		return 1;
+
+	return 0;
+}
+
+static struct tep_filter_type *
+find_filter_type(struct tep_event_filter *filter, int id)
+{
+	struct tep_filter_type *filter_type;
+	struct tep_filter_type key;
+
+	key.event_id = id;
+
+	filter_type = bsearch(&key, filter->event_filters,
+			      filter->filters,
+			      sizeof(*filter->event_filters),
+			      filter_cmp);
+
+	return filter_type;
+}
+
+static struct tep_filter_type *
+add_filter_type(struct tep_event_filter *filter, int id)
+{
+	struct tep_filter_type *filter_type;
+	int i;
+
+	filter_type = find_filter_type(filter, id);
+	if (filter_type)
+		return filter_type;
+
+	filter_type = realloc(filter->event_filters,
+			      sizeof(*filter->event_filters) *
+			      (filter->filters + 1));
+	if (!filter_type)
+		return NULL;
+
+	filter->event_filters = filter_type;
+
+	for (i = 0; i < filter->filters; i++) {
+		if (filter->event_filters[i].event_id > id)
+			break;
+	}
+
+	if (i < filter->filters)
+		memmove(&filter->event_filters[i+1],
+			&filter->event_filters[i],
+			sizeof(*filter->event_filters) *
+			(filter->filters - i));
+
+	filter_type = &filter->event_filters[i];
+	filter_type->event_id = id;
+	filter_type->event = tep_find_event(filter->tep, id);
+	filter_type->filter = NULL;
+
+	filter->filters++;
+
+	return filter_type;
+}
+
+/**
+ * tep_filter_alloc - create a new event filter
+ * @tep: The tep that this filter is associated with
+ */
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep)
+{
+	struct tep_event_filter *filter;
+
+	filter = malloc(sizeof(*filter));
+	if (filter == NULL)
+		return NULL;
+
+	memset(filter, 0, sizeof(*filter));
+	filter->tep = tep;
+	tep_ref(tep);
+
+	return filter;
+}
+
+static struct tep_filter_arg *allocate_arg(void)
+{
+	return calloc(1, sizeof(struct tep_filter_arg));
+}
+
+static void free_arg(struct tep_filter_arg *arg)
+{
+	if (!arg)
+		return;
+
+	switch (arg->type) {
+	case TEP_FILTER_ARG_NONE:
+	case TEP_FILTER_ARG_BOOLEAN:
+		break;
+
+	case TEP_FILTER_ARG_NUM:
+		free_arg(arg->num.left);
+		free_arg(arg->num.right);
+		break;
+
+	case TEP_FILTER_ARG_EXP:
+		free_arg(arg->exp.left);
+		free_arg(arg->exp.right);
+		break;
+
+	case TEP_FILTER_ARG_STR:
+		free(arg->str.val);
+		regfree(&arg->str.reg);
+		free(arg->str.buffer);
+		break;
+
+	case TEP_FILTER_ARG_VALUE:
+		if (arg->value.type == TEP_FILTER_STRING ||
+		    arg->value.type == TEP_FILTER_CHAR)
+			free(arg->value.str);
+		break;
+
+	case TEP_FILTER_ARG_OP:
+		free_arg(arg->op.left);
+		free_arg(arg->op.right);
+	default:
+		break;
+	}
+
+	free(arg);
+}
+
+static int add_event(struct event_list **events,
+		     struct tep_event *event)
+{
+	struct event_list *list;
+
+	list = malloc(sizeof(*list));
+	if (list == NULL)
+		return -1;
+
+	list->next = *events;
+	*events = list;
+	list->event = event;
+	return 0;
+}
+
+static int event_match(struct tep_event *event,
+		       regex_t *sreg, regex_t *ereg)
+{
+	if (sreg) {
+		return !regexec(sreg, event->system, 0, NULL, 0) &&
+			!regexec(ereg, event->name, 0, NULL, 0);
+	}
+
+	return !regexec(ereg, event->system, 0, NULL, 0) ||
+		!regexec(ereg, event->name, 0, NULL, 0);
+}
+
+static enum tep_errno
+find_event(struct tep_handle *tep, struct event_list **events,
+	   char *sys_name, char *event_name)
+{
+	struct tep_event *event;
+	regex_t ereg;
+	regex_t sreg;
+	int match = 0;
+	int fail = 0;
+	char *reg;
+	int ret;
+	int i;
+
+	if (!event_name) {
+		/* if no name is given, then swap sys and name */
+		event_name = sys_name;
+		sys_name = NULL;
+	}
+
+	ret = asprintf(&reg, "^%s$", event_name);
+	if (ret < 0)
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+	ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
+	free(reg);
+
+	if (ret)
+		return TEP_ERRNO__INVALID_EVENT_NAME;
+
+	if (sys_name) {
+		ret = asprintf(&reg, "^%s$", sys_name);
+		if (ret < 0) {
+			regfree(&ereg);
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
+		}
+
+		ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
+		free(reg);
+		if (ret) {
+			regfree(&ereg);
+			return TEP_ERRNO__INVALID_EVENT_NAME;
+		}
+	}
+
+	for (i = 0; i < tep->nr_events; i++) {
+		event = tep->events[i];
+		if (event_match(event, sys_name ? &sreg : NULL, &ereg)) {
+			match = 1;
+			if (add_event(events, event) < 0) {
+				fail = 1;
+				break;
+			}
+		}
+	}
+
+	regfree(&ereg);
+	if (sys_name)
+		regfree(&sreg);
+
+	if (!match)
+		return TEP_ERRNO__EVENT_NOT_FOUND;
+	if (fail)
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+	return 0;
+}
+
+static void free_events(struct event_list *events)
+{
+	struct event_list *event;
+
+	while (events) {
+		event = events;
+		events = events->next;
+		free(event);
+	}
+}
+
+static enum tep_errno
+create_arg_item(struct tep_event *event, const char *token,
+		enum tep_event_type type, struct tep_filter_arg **parg, char *error_str)
+{
+	struct tep_format_field *field;
+	struct tep_filter_arg *arg;
+
+	arg = allocate_arg();
+	if (arg == NULL) {
+		show_error(error_str, "failed to allocate filter arg");
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+	}
+
+	switch (type) {
+
+	case TEP_EVENT_SQUOTE:
+	case TEP_EVENT_DQUOTE:
+		arg->type = TEP_FILTER_ARG_VALUE;
+		arg->value.type =
+			type == TEP_EVENT_DQUOTE ? TEP_FILTER_STRING : TEP_FILTER_CHAR;
+		arg->value.str = strdup(token);
+		if (!arg->value.str) {
+			free_arg(arg);
+			show_error(error_str, "failed to allocate string filter arg");
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
+		}
+		break;
+	case TEP_EVENT_ITEM:
+		/* if it is a number, then convert it */
+		if (isdigit(token[0])) {
+			arg->type = TEP_FILTER_ARG_VALUE;
+			arg->value.type = TEP_FILTER_NUMBER;
+			arg->value.val = strtoull(token, NULL, 0);
+			break;
+		}
+		/* Consider this a field */
+		field = tep_find_any_field(event, token);
+		if (!field) {
+			/* If token is 'COMM' or 'CPU' then it is special */
+			if (strcmp(token, COMM) == 0) {
+				field = &comm;
+			} else if (strcmp(token, CPU) == 0) {
+				field = &cpu;
+			} else {
+				/* not a field, Make it false */
+				arg->type = TEP_FILTER_ARG_BOOLEAN;
+				arg->boolean.value = TEP_FILTER_FALSE;
+				break;
+			}
+		}
+		arg->type = TEP_FILTER_ARG_FIELD;
+		arg->field.field = field;
+		break;
+	default:
+		free_arg(arg);
+		show_error(error_str, "expected a value but found %s", token);
+		return TEP_ERRNO__UNEXPECTED_TYPE;
+	}
+	*parg = arg;
+	return 0;
+}
+
+static struct tep_filter_arg *
+create_arg_op(enum tep_filter_op_type btype)
+{
+	struct tep_filter_arg *arg;
+
+	arg = allocate_arg();
+	if (!arg)
+		return NULL;
+
+	arg->type = TEP_FILTER_ARG_OP;
+	arg->op.type = btype;
+
+	return arg;
+}
+
+static struct tep_filter_arg *
+create_arg_exp(enum tep_filter_exp_type etype)
+{
+	struct tep_filter_arg *arg;
+
+	arg = allocate_arg();
+	if (!arg)
+		return NULL;
+
+	arg->type = TEP_FILTER_ARG_EXP;
+	arg->exp.type = etype;
+
+	return arg;
+}
+
+static struct tep_filter_arg *
+create_arg_cmp(enum tep_filter_cmp_type ctype)
+{
+	struct tep_filter_arg *arg;
+
+	arg = allocate_arg();
+	if (!arg)
+		return NULL;
+
+	/* Use NUM and change if necessary */
+	arg->type = TEP_FILTER_ARG_NUM;
+	arg->num.type = ctype;
+
+	return arg;
+}
+
+static enum tep_errno
+add_right(struct tep_filter_arg *op, struct tep_filter_arg *arg, char *error_str)
+{
+	struct tep_filter_arg *left;
+	char *str;
+	int op_type;
+	int ret;
+
+	switch (op->type) {
+	case TEP_FILTER_ARG_EXP:
+		if (op->exp.right)
+			goto out_fail;
+		op->exp.right = arg;
+		break;
+
+	case TEP_FILTER_ARG_OP:
+		if (op->op.right)
+			goto out_fail;
+		op->op.right = arg;
+		break;
+
+	case TEP_FILTER_ARG_NUM:
+		if (op->op.right)
+			goto out_fail;
+		/*
+		 * The arg must be num, str, or field
+		 */
+		switch (arg->type) {
+		case TEP_FILTER_ARG_VALUE:
+		case TEP_FILTER_ARG_FIELD:
+			break;
+		default:
+			show_error(error_str, "Illegal rvalue");
+			return TEP_ERRNO__ILLEGAL_RVALUE;
+		}
+
+		/*
+		 * Depending on the type, we may need to
+		 * convert this to a string or regex.
+		 */
+		switch (arg->value.type) {
+		case TEP_FILTER_CHAR:
+			/*
+			 * A char should be converted to number if
+			 * the string is 1 byte, and the compare
+			 * is not a REGEX.
+			 */
+			if (strlen(arg->value.str) == 1 &&
+			    op->num.type != TEP_FILTER_CMP_REGEX &&
+			    op->num.type != TEP_FILTER_CMP_NOT_REGEX) {
+				arg->value.type = TEP_FILTER_NUMBER;
+				goto do_int;
+			}
+			/* fall through */
+		case TEP_FILTER_STRING:
+
+			/* convert op to a string arg */
+			op_type = op->num.type;
+			left = op->num.left;
+			str = arg->value.str;
+
+			/* reset the op for the new field */
+			memset(op, 0, sizeof(*op));
+
+			/*
+			 * If left arg was a field not found then
+			 * NULL the entire op.
+			 */
+			if (left->type == TEP_FILTER_ARG_BOOLEAN) {
+				free_arg(left);
+				free_arg(arg);
+				op->type = TEP_FILTER_ARG_BOOLEAN;
+				op->boolean.value = TEP_FILTER_FALSE;
+				break;
+			}
+
+			/* Left arg must be a field */
+			if (left->type != TEP_FILTER_ARG_FIELD) {
+				show_error(error_str,
+					   "Illegal lvalue for string comparison");
+				return TEP_ERRNO__ILLEGAL_LVALUE;
+			}
+
+			/* Make sure this is a valid string compare */
+			switch (op_type) {
+			case TEP_FILTER_CMP_EQ:
+				op_type = TEP_FILTER_CMP_MATCH;
+				break;
+			case TEP_FILTER_CMP_NE:
+				op_type = TEP_FILTER_CMP_NOT_MATCH;
+				break;
+
+			case TEP_FILTER_CMP_REGEX:
+			case TEP_FILTER_CMP_NOT_REGEX:
+				ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB);
+				if (ret) {
+					show_error(error_str,
+						   "RegEx '%s' did not compute",
+						   str);
+					return TEP_ERRNO__INVALID_REGEX;
+				}
+				break;
+			default:
+				show_error(error_str,
+					   "Illegal comparison for string");
+				return TEP_ERRNO__ILLEGAL_STRING_CMP;
+			}
+
+			op->type = TEP_FILTER_ARG_STR;
+			op->str.type = op_type;
+			op->str.field = left->field.field;
+			op->str.val = strdup(str);
+			if (!op->str.val) {
+				show_error(error_str, "Failed to allocate string filter");
+				return TEP_ERRNO__MEM_ALLOC_FAILED;
+			}
+			/*
+			 * Need a buffer to copy data for tests
+			 */
+			op->str.buffer = malloc(op->str.field->size + 1);
+			if (!op->str.buffer) {
+				show_error(error_str, "Failed to allocate string filter");
+				return TEP_ERRNO__MEM_ALLOC_FAILED;
+			}
+			/* Null terminate this buffer */
+			op->str.buffer[op->str.field->size] = 0;
+
+			/* We no longer have left or right args */
+			free_arg(arg);
+			free_arg(left);
+
+			break;
+
+		case TEP_FILTER_NUMBER:
+
+ do_int:
+			switch (op->num.type) {
+			case TEP_FILTER_CMP_REGEX:
+			case TEP_FILTER_CMP_NOT_REGEX:
+				show_error(error_str,
+					   "Op not allowed with integers");
+				return TEP_ERRNO__ILLEGAL_INTEGER_CMP;
+
+			default:
+				break;
+			}
+
+			/* numeric compare */
+			op->num.right = arg;
+			break;
+		default:
+			goto out_fail;
+		}
+		break;
+	default:
+		goto out_fail;
+	}
+
+	return 0;
+
+ out_fail:
+	show_error(error_str, "Syntax error");
+	return TEP_ERRNO__SYNTAX_ERROR;
+}
+
+static struct tep_filter_arg *
+rotate_op_right(struct tep_filter_arg *a, struct tep_filter_arg *b)
+{
+	struct tep_filter_arg *arg;
+
+	arg = a->op.right;
+	a->op.right = b;
+	return arg;
+}
+
+static enum tep_errno add_left(struct tep_filter_arg *op, struct tep_filter_arg *arg)
+{
+	switch (op->type) {
+	case TEP_FILTER_ARG_EXP:
+		if (arg->type == TEP_FILTER_ARG_OP)
+			arg = rotate_op_right(arg, op);
+		op->exp.left = arg;
+		break;
+
+	case TEP_FILTER_ARG_OP:
+		op->op.left = arg;
+		break;
+	case TEP_FILTER_ARG_NUM:
+		if (arg->type == TEP_FILTER_ARG_OP)
+			arg = rotate_op_right(arg, op);
+
+		/* left arg of compares must be a field */
+		if (arg->type != TEP_FILTER_ARG_FIELD &&
+		    arg->type != TEP_FILTER_ARG_BOOLEAN)
+			return TEP_ERRNO__INVALID_ARG_TYPE;
+		op->num.left = arg;
+		break;
+	default:
+		return TEP_ERRNO__INVALID_ARG_TYPE;
+	}
+	return 0;
+}
+
+enum op_type {
+	OP_NONE,
+	OP_BOOL,
+	OP_NOT,
+	OP_EXP,
+	OP_CMP,
+};
+
+static enum op_type process_op(const char *token,
+			       enum tep_filter_op_type *btype,
+			       enum tep_filter_cmp_type *ctype,
+			       enum tep_filter_exp_type *etype)
+{
+	*btype = TEP_FILTER_OP_NOT;
+	*etype = TEP_FILTER_EXP_NONE;
+	*ctype = TEP_FILTER_CMP_NONE;
+
+	if (strcmp(token, "&&") == 0)
+		*btype = TEP_FILTER_OP_AND;
+	else if (strcmp(token, "||") == 0)
+		*btype = TEP_FILTER_OP_OR;
+	else if (strcmp(token, "!") == 0)
+		return OP_NOT;
+
+	if (*btype != TEP_FILTER_OP_NOT)
+		return OP_BOOL;
+
+	/* Check for value expressions */
+	if (strcmp(token, "+") == 0) {
+		*etype = TEP_FILTER_EXP_ADD;
+	} else if (strcmp(token, "-") == 0) {
+		*etype = TEP_FILTER_EXP_SUB;
+	} else if (strcmp(token, "*") == 0) {
+		*etype = TEP_FILTER_EXP_MUL;
+	} else if (strcmp(token, "/") == 0) {
+		*etype = TEP_FILTER_EXP_DIV;
+	} else if (strcmp(token, "%") == 0) {
+		*etype = TEP_FILTER_EXP_MOD;
+	} else if (strcmp(token, ">>") == 0) {
+		*etype = TEP_FILTER_EXP_RSHIFT;
+	} else if (strcmp(token, "<<") == 0) {
+		*etype = TEP_FILTER_EXP_LSHIFT;
+	} else if (strcmp(token, "&") == 0) {
+		*etype = TEP_FILTER_EXP_AND;
+	} else if (strcmp(token, "|") == 0) {
+		*etype = TEP_FILTER_EXP_OR;
+	} else if (strcmp(token, "^") == 0) {
+		*etype = TEP_FILTER_EXP_XOR;
+	} else if (strcmp(token, "~") == 0)
+		*etype = TEP_FILTER_EXP_NOT;
+
+	if (*etype != TEP_FILTER_EXP_NONE)
+		return OP_EXP;
+
+	/* Check for compares */
+	if (strcmp(token, "==") == 0)
+		*ctype = TEP_FILTER_CMP_EQ;
+	else if (strcmp(token, "!=") == 0)
+		*ctype = TEP_FILTER_CMP_NE;
+	else if (strcmp(token, "<") == 0)
+		*ctype = TEP_FILTER_CMP_LT;
+	else if (strcmp(token, ">") == 0)
+		*ctype = TEP_FILTER_CMP_GT;
+	else if (strcmp(token, "<=") == 0)
+		*ctype = TEP_FILTER_CMP_LE;
+	else if (strcmp(token, ">=") == 0)
+		*ctype = TEP_FILTER_CMP_GE;
+	else if (strcmp(token, "=~") == 0)
+		*ctype = TEP_FILTER_CMP_REGEX;
+	else if (strcmp(token, "!~") == 0)
+		*ctype = TEP_FILTER_CMP_NOT_REGEX;
+	else
+		return OP_NONE;
+
+	return OP_CMP;
+}
+
+static int check_op_done(struct tep_filter_arg *arg)
+{
+	switch (arg->type) {
+	case TEP_FILTER_ARG_EXP:
+		return arg->exp.right != NULL;
+
+	case TEP_FILTER_ARG_OP:
+		return arg->op.right != NULL;
+
+	case TEP_FILTER_ARG_NUM:
+		return arg->num.right != NULL;
+
+	case TEP_FILTER_ARG_STR:
+		/* A string conversion is always done */
+		return 1;
+
+	case TEP_FILTER_ARG_BOOLEAN:
+		/* field not found, is ok */
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+
+enum filter_vals {
+	FILTER_VAL_NORM,
+	FILTER_VAL_FALSE,
+	FILTER_VAL_TRUE,
+};
+
+static enum tep_errno
+reparent_op_arg(struct tep_filter_arg *parent, struct tep_filter_arg *old_child,
+		struct tep_filter_arg *arg, char *error_str)
+{
+	struct tep_filter_arg *other_child;
+	struct tep_filter_arg **ptr;
+
+	if (parent->type != TEP_FILTER_ARG_OP &&
+	    arg->type != TEP_FILTER_ARG_OP) {
+		show_error(error_str, "can not reparent other than OP");
+		return TEP_ERRNO__REPARENT_NOT_OP;
+	}
+
+	/* Get the sibling */
+	if (old_child->op.right == arg) {
+		ptr = &old_child->op.right;
+		other_child = old_child->op.left;
+	} else if (old_child->op.left == arg) {
+		ptr = &old_child->op.left;
+		other_child = old_child->op.right;
+	} else {
+		show_error(error_str, "Error in reparent op, find other child");
+		return TEP_ERRNO__REPARENT_FAILED;
+	}
+
+	/* Detach arg from old_child */
+	*ptr = NULL;
+
+	/* Check for root */
+	if (parent == old_child) {
+		free_arg(other_child);
+		*parent = *arg;
+		/* Free arg without recussion */
+		free(arg);
+		return 0;
+	}
+
+	if (parent->op.right == old_child)
+		ptr = &parent->op.right;
+	else if (parent->op.left == old_child)
+		ptr = &parent->op.left;
+	else {
+		show_error(error_str, "Error in reparent op");
+		return TEP_ERRNO__REPARENT_FAILED;
+	}
+
+	*ptr = arg;
+
+	free_arg(old_child);
+	return 0;
+}
+
+/* Returns either filter_vals (success) or tep_errno (failfure) */
+static int test_arg(struct tep_filter_arg *parent, struct tep_filter_arg *arg,
+		    char *error_str)
+{
+	int lval, rval;
+
+	switch (arg->type) {
+
+		/* bad case */
+	case TEP_FILTER_ARG_BOOLEAN:
+		return FILTER_VAL_FALSE + arg->boolean.value;
+
+		/* good cases: */
+	case TEP_FILTER_ARG_STR:
+	case TEP_FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_FIELD:
+		return FILTER_VAL_NORM;
+
+	case TEP_FILTER_ARG_EXP:
+		lval = test_arg(arg, arg->exp.left, error_str);
+		if (lval != FILTER_VAL_NORM)
+			return lval;
+		rval = test_arg(arg, arg->exp.right, error_str);
+		if (rval != FILTER_VAL_NORM)
+			return rval;
+		return FILTER_VAL_NORM;
+
+	case TEP_FILTER_ARG_NUM:
+		lval = test_arg(arg, arg->num.left, error_str);
+		if (lval != FILTER_VAL_NORM)
+			return lval;
+		rval = test_arg(arg, arg->num.right, error_str);
+		if (rval != FILTER_VAL_NORM)
+			return rval;
+		return FILTER_VAL_NORM;
+
+	case TEP_FILTER_ARG_OP:
+		if (arg->op.type != TEP_FILTER_OP_NOT) {
+			lval = test_arg(arg, arg->op.left, error_str);
+			switch (lval) {
+			case FILTER_VAL_NORM:
+				break;
+			case FILTER_VAL_TRUE:
+				if (arg->op.type == TEP_FILTER_OP_OR)
+					return FILTER_VAL_TRUE;
+				rval = test_arg(arg, arg->op.right, error_str);
+				if (rval != FILTER_VAL_NORM)
+					return rval;
+
+				return reparent_op_arg(parent, arg, arg->op.right,
+						       error_str);
+
+			case FILTER_VAL_FALSE:
+				if (arg->op.type == TEP_FILTER_OP_AND)
+					return FILTER_VAL_FALSE;
+				rval = test_arg(arg, arg->op.right, error_str);
+				if (rval != FILTER_VAL_NORM)
+					return rval;
+
+				return reparent_op_arg(parent, arg, arg->op.right,
+						       error_str);
+
+			default:
+				return lval;
+			}
+		}
+
+		rval = test_arg(arg, arg->op.right, error_str);
+		switch (rval) {
+		case FILTER_VAL_NORM:
+		default:
+			break;
+
+		case FILTER_VAL_TRUE:
+			if (arg->op.type == TEP_FILTER_OP_OR)
+				return FILTER_VAL_TRUE;
+			if (arg->op.type == TEP_FILTER_OP_NOT)
+				return FILTER_VAL_FALSE;
+
+			return reparent_op_arg(parent, arg, arg->op.left,
+					       error_str);
+
+		case FILTER_VAL_FALSE:
+			if (arg->op.type == TEP_FILTER_OP_AND)
+				return FILTER_VAL_FALSE;
+			if (arg->op.type == TEP_FILTER_OP_NOT)
+				return FILTER_VAL_TRUE;
+
+			return reparent_op_arg(parent, arg, arg->op.left,
+					       error_str);
+		}
+
+		return rval;
+	default:
+		show_error(error_str, "bad arg in filter tree");
+		return TEP_ERRNO__BAD_FILTER_ARG;
+	}
+	return FILTER_VAL_NORM;
+}
+
+/* Remove any unknown event fields */
+static int collapse_tree(struct tep_filter_arg *arg,
+			 struct tep_filter_arg **arg_collapsed, char *error_str)
+{
+	int ret;
+
+	ret = test_arg(arg, arg, error_str);
+	switch (ret) {
+	case FILTER_VAL_NORM:
+		break;
+
+	case FILTER_VAL_TRUE:
+	case FILTER_VAL_FALSE:
+		free_arg(arg);
+		arg = allocate_arg();
+		if (arg) {
+			arg->type = TEP_FILTER_ARG_BOOLEAN;
+			arg->boolean.value = ret == FILTER_VAL_TRUE;
+		} else {
+			show_error(error_str, "Failed to allocate filter arg");
+			ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+		}
+		break;
+
+	default:
+		/* test_arg() already set the error_str */
+		free_arg(arg);
+		arg = NULL;
+		break;
+	}
+
+	*arg_collapsed = arg;
+	return ret;
+}
+
+static enum tep_errno
+process_filter(struct tep_event *event, struct tep_filter_arg **parg,
+	       char *error_str, int not)
+{
+	enum tep_event_type type;
+	char *token = NULL;
+	struct tep_filter_arg *current_op = NULL;
+	struct tep_filter_arg *current_exp = NULL;
+	struct tep_filter_arg *left_item = NULL;
+	struct tep_filter_arg *arg = NULL;
+	enum op_type op_type;
+	enum tep_filter_op_type btype;
+	enum tep_filter_exp_type etype;
+	enum tep_filter_cmp_type ctype;
+	enum tep_errno ret;
+
+	*parg = NULL;
+
+	do {
+		free(token);
+		type = filter_read_token(&token);
+		switch (type) {
+		case TEP_EVENT_SQUOTE:
+		case TEP_EVENT_DQUOTE:
+		case TEP_EVENT_ITEM:
+			ret = create_arg_item(event, token, type, &arg, error_str);
+			if (ret < 0)
+				goto fail;
+			if (!left_item)
+				left_item = arg;
+			else if (current_exp) {
+				ret = add_right(current_exp, arg, error_str);
+				if (ret < 0)
+					goto fail;
+				left_item = NULL;
+				/* Not's only one one expression */
+				if (not) {
+					arg = NULL;
+					if (current_op)
+						goto fail_syntax;
+					free(token);
+					*parg = current_exp;
+					return 0;
+				}
+			} else
+				goto fail_syntax;
+			arg = NULL;
+			break;
+
+		case TEP_EVENT_DELIM:
+			if (*token == ',') {
+				show_error(error_str, "Illegal token ','");
+				ret = TEP_ERRNO__ILLEGAL_TOKEN;
+				goto fail;
+			}
+
+			if (*token == '(') {
+				if (left_item) {
+					show_error(error_str,
+						   "Open paren can not come after item");
+					ret = TEP_ERRNO__INVALID_PAREN;
+					goto fail;
+				}
+				if (current_exp) {
+					show_error(error_str,
+						   "Open paren can not come after expression");
+					ret = TEP_ERRNO__INVALID_PAREN;
+					goto fail;
+				}
+
+				ret = process_filter(event, &arg, error_str, 0);
+				if (ret != TEP_ERRNO__UNBALANCED_PAREN) {
+					if (ret == 0) {
+						show_error(error_str,
+							   "Unbalanced number of '('");
+						ret = TEP_ERRNO__UNBALANCED_PAREN;
+					}
+					goto fail;
+				}
+				ret = 0;
+
+				/* A not wants just one expression */
+				if (not) {
+					if (current_op)
+						goto fail_syntax;
+					*parg = arg;
+					return 0;
+				}
+
+				if (current_op)
+					ret = add_right(current_op, arg, error_str);
+				else
+					current_exp = arg;
+
+				if (ret < 0)
+					goto fail;
+
+			} else { /* ')' */
+				if (!current_op && !current_exp)
+					goto fail_syntax;
+
+				/* Make sure everything is finished at this level */
+				if (current_exp && !check_op_done(current_exp))
+					goto fail_syntax;
+				if (current_op && !check_op_done(current_op))
+					goto fail_syntax;
+
+				if (current_op)
+					*parg = current_op;
+				else
+					*parg = current_exp;
+				free(token);
+				return TEP_ERRNO__UNBALANCED_PAREN;
+			}
+			break;
+
+		case TEP_EVENT_OP:
+			op_type = process_op(token, &btype, &ctype, &etype);
+
+			/* All expect a left arg except for NOT */
+			switch (op_type) {
+			case OP_BOOL:
+				/* Logic ops need a left expression */
+				if (!current_exp && !current_op)
+					goto fail_syntax;
+				/* fall through */
+			case OP_NOT:
+				/* logic only processes ops and exp */
+				if (left_item)
+					goto fail_syntax;
+				break;
+			case OP_EXP:
+			case OP_CMP:
+				if (!left_item)
+					goto fail_syntax;
+				break;
+			case OP_NONE:
+				show_error(error_str,
+					   "Unknown op token %s", token);
+				ret = TEP_ERRNO__UNKNOWN_TOKEN;
+				goto fail;
+			}
+
+			ret = 0;
+			switch (op_type) {
+			case OP_BOOL:
+				arg = create_arg_op(btype);
+				if (arg == NULL)
+					goto fail_alloc;
+				if (current_op)
+					ret = add_left(arg, current_op);
+				else
+					ret = add_left(arg, current_exp);
+				current_op = arg;
+				current_exp = NULL;
+				break;
+
+			case OP_NOT:
+				arg = create_arg_op(btype);
+				if (arg == NULL)
+					goto fail_alloc;
+				if (current_op)
+					ret = add_right(current_op, arg, error_str);
+				if (ret < 0)
+					goto fail;
+				current_exp = arg;
+				ret = process_filter(event, &arg, error_str, 1);
+				if (ret < 0)
+					goto fail;
+				ret = add_right(current_exp, arg, error_str);
+				if (ret < 0)
+					goto fail;
+				break;
+
+			case OP_EXP:
+			case OP_CMP:
+				if (op_type == OP_EXP)
+					arg = create_arg_exp(etype);
+				else
+					arg = create_arg_cmp(ctype);
+				if (arg == NULL)
+					goto fail_alloc;
+
+				if (current_op)
+					ret = add_right(current_op, arg, error_str);
+				if (ret < 0)
+					goto fail;
+				ret = add_left(arg, left_item);
+				if (ret < 0) {
+					arg = NULL;
+					goto fail_syntax;
+				}
+				current_exp = arg;
+				break;
+			default:
+				break;
+			}
+			arg = NULL;
+			if (ret < 0)
+				goto fail_syntax;
+			break;
+		case TEP_EVENT_NONE:
+			break;
+		case TEP_EVENT_ERROR:
+			goto fail_alloc;
+		default:
+			goto fail_syntax;
+		}
+	} while (type != TEP_EVENT_NONE);
+
+	if (!current_op && !current_exp)
+		goto fail_syntax;
+
+	if (!current_op)
+		current_op = current_exp;
+
+	ret = collapse_tree(current_op, parg, error_str);
+	/* collapse_tree() may free current_op, and updates parg accordingly */
+	current_op = NULL;
+	if (ret < 0)
+		goto fail;
+
+	free(token);
+	return 0;
+
+ fail_alloc:
+	show_error(error_str, "failed to allocate filter arg");
+	ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+	goto fail;
+ fail_syntax:
+	show_error(error_str, "Syntax error");
+	ret = TEP_ERRNO__SYNTAX_ERROR;
+ fail:
+	free_arg(current_op);
+	free_arg(current_exp);
+	free_arg(arg);
+	free(token);
+	return ret;
+}
+
+static enum tep_errno
+process_event(struct tep_event *event, const char *filter_str,
+	      struct tep_filter_arg **parg, char *error_str)
+{
+	int ret;
+
+	init_input_buf(filter_str, strlen(filter_str));
+
+	ret = process_filter(event, parg, error_str, 0);
+	if (ret < 0)
+		return ret;
+
+	/* If parg is NULL, then make it into FALSE */
+	if (!*parg) {
+		*parg = allocate_arg();
+		if (*parg == NULL)
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+		(*parg)->type = TEP_FILTER_ARG_BOOLEAN;
+		(*parg)->boolean.value = TEP_FILTER_FALSE;
+	}
+
+	return 0;
+}
+
+static enum tep_errno
+filter_event(struct tep_event_filter *filter, struct tep_event *event,
+	     const char *filter_str, char *error_str)
+{
+	struct tep_filter_type *filter_type;
+	struct tep_filter_arg *arg;
+	enum tep_errno ret;
+
+	if (filter_str) {
+		ret = process_event(event, filter_str, &arg, error_str);
+		if (ret < 0)
+			return ret;
+
+	} else {
+		/* just add a TRUE arg */
+		arg = allocate_arg();
+		if (arg == NULL)
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+		arg->type = TEP_FILTER_ARG_BOOLEAN;
+		arg->boolean.value = TEP_FILTER_TRUE;
+	}
+
+	filter_type = add_filter_type(filter, event->id);
+	if (filter_type == NULL) {
+		free_arg(arg);
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
+	}
+
+	if (filter_type->filter)
+		free_arg(filter_type->filter);
+	filter_type->filter = arg;
+
+	return 0;
+}
+
+static void filter_init_error_buf(struct tep_event_filter *filter)
+{
+	/* clear buffer to reset show error */
+	init_input_buf("", 0);
+	filter->error_buffer[0] = '\0';
+}
+
+/**
+ * tep_filter_add_filter_str - add a new filter
+ * @filter: the event filter to add to
+ * @filter_str: the filter string that contains the filter
+ *
+ * Returns 0 if the filter was successfully added or a
+ * negative error code.  Use tep_filter_strerror() to see
+ * actual error message in case of error.
+ */
+enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
+					 const char *filter_str)
+{
+	struct tep_handle *tep = filter->tep;
+	struct event_list *event;
+	struct event_list *events = NULL;
+	const char *filter_start;
+	const char *next_event;
+	char *this_event;
+	char *event_name = NULL;
+	char *sys_name = NULL;
+	char *sp;
+	enum tep_errno rtn = 0; /* TEP_ERRNO__SUCCESS */
+	int len;
+	int ret;
+
+	filter_init_error_buf(filter);
+
+	filter_start = strchr(filter_str, ':');
+	if (filter_start)
+		len = filter_start - filter_str;
+	else
+		len = strlen(filter_str);
+
+	do {
+		next_event = strchr(filter_str, ',');
+		if (next_event &&
+		    (!filter_start || next_event < filter_start))
+			len = next_event - filter_str;
+		else if (filter_start)
+			len = filter_start - filter_str;
+		else
+			len = strlen(filter_str);
+
+		this_event = malloc(len + 1);
+		if (this_event == NULL) {
+			/* This can only happen when events is NULL, but still */
+			free_events(events);
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
+		}
+		memcpy(this_event, filter_str, len);
+		this_event[len] = 0;
+
+		if (next_event)
+			next_event++;
+
+		filter_str = next_event;
+
+		sys_name = strtok_r(this_event, "/", &sp);
+		event_name = strtok_r(NULL, "/", &sp);
+
+		if (!sys_name) {
+			/* This can only happen when events is NULL, but still */
+			free_events(events);
+			free(this_event);
+			return TEP_ERRNO__FILTER_NOT_FOUND;
+		}
+
+		/* Find this event */
+		ret = find_event(tep, &events, strim(sys_name), strim(event_name));
+		if (ret < 0) {
+			free_events(events);
+			free(this_event);
+			return ret;
+		}
+		free(this_event);
+	} while (filter_str);
+
+	/* Skip the ':' */
+	if (filter_start)
+		filter_start++;
+
+	/* filter starts here */
+	for (event = events; event; event = event->next) {
+		ret = filter_event(filter, event->event, filter_start,
+				   filter->error_buffer);
+		/* Failures are returned if a parse error happened */
+		if (ret < 0)
+			rtn = ret;
+
+		if (ret >= 0 && tep->test_filters) {
+			char *test;
+			test = tep_filter_make_string(filter, event->event->id);
+			if (test) {
+				printf(" '%s: %s'\n", event->event->name, test);
+				free(test);
+			}
+		}
+	}
+
+	free_events(events);
+
+	return rtn;
+}
+
+static void free_filter_type(struct tep_filter_type *filter_type)
+{
+	free_arg(filter_type->filter);
+}
+
+/**
+ * tep_filter_strerror - fill error message in a buffer
+ * @filter: the event filter contains error
+ * @err: the error code
+ * @buf: the buffer to be filled in
+ * @buflen: the size of the buffer
+ *
+ * Returns 0 if message was filled successfully, -1 if error
+ */
+int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
+			char *buf, size_t buflen)
+{
+	if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END)
+		return -1;
+
+	if (strlen(filter->error_buffer) > 0) {
+		size_t len = snprintf(buf, buflen, "%s", filter->error_buffer);
+
+		if (len > buflen)
+			return -1;
+		return 0;
+	}
+
+	return tep_strerror(filter->tep, err, buf, buflen);
+}
+
+/**
+ * tep_filter_remove_event - remove a filter for an event
+ * @filter: the event filter to remove from
+ * @event_id: the event to remove a filter for
+ *
+ * Removes the filter saved for an event defined by @event_id
+ * from the @filter.
+ *
+ * Returns 1: if an event was removed
+ *   0: if the event was not found
+ */
+int tep_filter_remove_event(struct tep_event_filter *filter,
+			    int event_id)
+{
+	struct tep_filter_type *filter_type;
+	unsigned long len;
+
+	if (!filter->filters)
+		return 0;
+
+	filter_type = find_filter_type(filter, event_id);
+
+	if (!filter_type)
+		return 0;
+
+	free_filter_type(filter_type);
+
+	/* The filter_type points into the event_filters array */
+	len = (unsigned long)(filter->event_filters + filter->filters) -
+		(unsigned long)(filter_type + 1);
+
+	memmove(filter_type, filter_type + 1, len);
+	filter->filters--;
+
+	memset(&filter->event_filters[filter->filters], 0,
+	       sizeof(*filter_type));
+
+	return 1;
+}
+
+/**
+ * tep_filter_reset - clear all filters in a filter
+ * @filter: the event filter to reset
+ *
+ * Removes all filters from a filter and resets it.
+ */
+void tep_filter_reset(struct tep_event_filter *filter)
+{
+	int i;
+
+	for (i = 0; i < filter->filters; i++)
+		free_filter_type(&filter->event_filters[i]);
+
+	free(filter->event_filters);
+	filter->filters = 0;
+	filter->event_filters = NULL;
+}
+
+void tep_filter_free(struct tep_event_filter *filter)
+{
+	tep_unref(filter->tep);
+
+	tep_filter_reset(filter);
+
+	free(filter);
+}
+
+static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg);
+
+static int copy_filter_type(struct tep_event_filter *filter,
+			    struct tep_event_filter *source,
+			    struct tep_filter_type *filter_type)
+{
+	struct tep_filter_arg *arg;
+	struct tep_event *event;
+	const char *sys;
+	const char *name;
+	char *str;
+
+	/* Can't assume that the tep's are the same */
+	sys = filter_type->event->system;
+	name = filter_type->event->name;
+	event = tep_find_event_by_name(filter->tep, sys, name);
+	if (!event)
+		return -1;
+
+	str = arg_to_str(source, filter_type->filter);
+	if (!str)
+		return -1;
+
+	if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
+		/* Add trivial event */
+		arg = allocate_arg();
+		if (arg == NULL) {
+			free(str);
+			return -1;
+		}
+
+		arg->type = TEP_FILTER_ARG_BOOLEAN;
+		if (strcmp(str, "TRUE") == 0)
+			arg->boolean.value = 1;
+		else
+			arg->boolean.value = 0;
+
+		filter_type = add_filter_type(filter, event->id);
+		if (filter_type == NULL) {
+			free(str);
+			free_arg(arg);
+			return -1;
+		}
+
+		filter_type->filter = arg;
+
+		free(str);
+		return 0;
+	}
+
+	filter_event(filter, event, str, NULL);
+	free(str);
+
+	return 0;
+}
+
+/**
+ * tep_filter_copy - copy a filter using another filter
+ * @dest - the filter to copy to
+ * @source - the filter to copy from
+ *
+ * Returns 0 on success and -1 if not all filters were copied
+ */
+int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source)
+{
+	int ret = 0;
+	int i;
+
+	tep_filter_reset(dest);
+
+	for (i = 0; i < source->filters; i++) {
+		if (copy_filter_type(dest, source, &source->event_filters[i]))
+			ret = -1;
+	}
+	return ret;
+}
+
+static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
+		       struct tep_record *record, enum tep_errno *err);
+
+static const char *
+get_comm(struct tep_event *event, struct tep_record *record)
+{
+	const char *comm;
+	int pid;
+
+	pid = tep_data_pid(event->tep, record);
+	comm = tep_data_comm_from_pid(event->tep, pid);
+	return comm;
+}
+
+static unsigned long long
+get_value(struct tep_event *event,
+	  struct tep_format_field *field, struct tep_record *record)
+{
+	unsigned long long val;
+
+	/* Handle our dummy "comm" field */
+	if (field == &comm) {
+		const char *name;
+
+		name = get_comm(event, record);
+		return (unsigned long)name;
+	}
+
+	/* Handle our dummy "cpu" field */
+	if (field == &cpu)
+		return record->cpu;
+
+	tep_read_number_field(field, record->data, &val);
+
+	if (!(field->flags & TEP_FIELD_IS_SIGNED))
+		return val;
+
+	switch (field->size) {
+	case 1:
+		return (char)val;
+	case 2:
+		return (short)val;
+	case 4:
+		return (int)val;
+	case 8:
+		return (long long)val;
+	}
+	return val;
+}
+
+static unsigned long long
+get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
+	      struct tep_record *record, enum tep_errno *err);
+
+static unsigned long long
+get_exp_value(struct tep_event *event, struct tep_filter_arg *arg,
+	      struct tep_record *record, enum tep_errno *err)
+{
+	unsigned long long lval, rval;
+
+	lval = get_arg_value(event, arg->exp.left, record, err);
+	rval = get_arg_value(event, arg->exp.right, record, err);
+
+	if (*err) {
+		/*
+		 * There was an error, no need to process anymore.
+		 */
+		return 0;
+	}
+
+	switch (arg->exp.type) {
+	case TEP_FILTER_EXP_ADD:
+		return lval + rval;
+
+	case TEP_FILTER_EXP_SUB:
+		return lval - rval;
+
+	case TEP_FILTER_EXP_MUL:
+		return lval * rval;
+
+	case TEP_FILTER_EXP_DIV:
+		return lval / rval;
+
+	case TEP_FILTER_EXP_MOD:
+		return lval % rval;
+
+	case TEP_FILTER_EXP_RSHIFT:
+		return lval >> rval;
+
+	case TEP_FILTER_EXP_LSHIFT:
+		return lval << rval;
+
+	case TEP_FILTER_EXP_AND:
+		return lval & rval;
+
+	case TEP_FILTER_EXP_OR:
+		return lval | rval;
+
+	case TEP_FILTER_EXP_XOR:
+		return lval ^ rval;
+
+	case TEP_FILTER_EXP_NOT:
+	default:
+		if (!*err)
+			*err = TEP_ERRNO__INVALID_EXP_TYPE;
+	}
+	return 0;
+}
+
+static unsigned long long
+get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
+	      struct tep_record *record, enum tep_errno *err)
+{
+	switch (arg->type) {
+	case TEP_FILTER_ARG_FIELD:
+		return get_value(event, arg->field.field, record);
+
+	case TEP_FILTER_ARG_VALUE:
+		if (arg->value.type != TEP_FILTER_NUMBER) {
+			if (!*err)
+				*err = TEP_ERRNO__NOT_A_NUMBER;
+		}
+		return arg->value.val;
+
+	case TEP_FILTER_ARG_EXP:
+		return get_exp_value(event, arg, record, err);
+
+	default:
+		if (!*err)
+			*err = TEP_ERRNO__INVALID_ARG_TYPE;
+	}
+	return 0;
+}
+
+static int test_num(struct tep_event *event, struct tep_filter_arg *arg,
+		    struct tep_record *record, enum tep_errno *err)
+{
+	unsigned long long lval, rval;
+
+	lval = get_arg_value(event, arg->num.left, record, err);
+	rval = get_arg_value(event, arg->num.right, record, err);
+
+	if (*err) {
+		/*
+		 * There was an error, no need to process anymore.
+		 */
+		return 0;
+	}
+
+	switch (arg->num.type) {
+	case TEP_FILTER_CMP_EQ:
+		return lval == rval;
+
+	case TEP_FILTER_CMP_NE:
+		return lval != rval;
+
+	case TEP_FILTER_CMP_GT:
+		return lval > rval;
+
+	case TEP_FILTER_CMP_LT:
+		return lval < rval;
+
+	case TEP_FILTER_CMP_GE:
+		return lval >= rval;
+
+	case TEP_FILTER_CMP_LE:
+		return lval <= rval;
+
+	default:
+		if (!*err)
+			*err = TEP_ERRNO__ILLEGAL_INTEGER_CMP;
+		return 0;
+	}
+}
+
+static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record)
+{
+	struct tep_event *event;
+	struct tep_handle *tep;
+	unsigned long long addr;
+	const char *val = NULL;
+	unsigned int size;
+	char hex[64];
+
+	/* If the field is not a string convert it */
+	if (arg->str.field->flags & TEP_FIELD_IS_STRING) {
+		val = record->data + arg->str.field->offset;
+		size = arg->str.field->size;
+
+		if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
+			addr = *(unsigned int *)val;
+			val = record->data + (addr & 0xffff);
+			size = addr >> 16;
+		}
+
+		/*
+		 * We need to copy the data since we can't be sure the field
+		 * is null terminated.
+		 */
+		if (*(val + size - 1)) {
+			/* copy it */
+			memcpy(arg->str.buffer, val, arg->str.field->size);
+			/* the buffer is already NULL terminated */
+			val = arg->str.buffer;
+		}
+
+	} else {
+		event = arg->str.field->event;
+		tep = event->tep;
+		addr = get_value(event, arg->str.field, record);
+
+		if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG))
+			/* convert to a kernel symbol */
+			val = tep_find_function(tep, addr);
+
+		if (val == NULL) {
+			/* just use the hex of the string name */
+			snprintf(hex, 64, "0x%llx", addr);
+			val = hex;
+		}
+	}
+
+	return val;
+}
+
+static int test_str(struct tep_event *event, struct tep_filter_arg *arg,
+		    struct tep_record *record, enum tep_errno *err)
+{
+	const char *val;
+
+	if (arg->str.field == &comm)
+		val = get_comm(event, record);
+	else
+		val = get_field_str(arg, record);
+
+	switch (arg->str.type) {
+	case TEP_FILTER_CMP_MATCH:
+		return strcmp(val, arg->str.val) == 0;
+
+	case TEP_FILTER_CMP_NOT_MATCH:
+		return strcmp(val, arg->str.val) != 0;
+
+	case TEP_FILTER_CMP_REGEX:
+		/* Returns zero on match */
+		return !regexec(&arg->str.reg, val, 0, NULL, 0);
+
+	case TEP_FILTER_CMP_NOT_REGEX:
+		return regexec(&arg->str.reg, val, 0, NULL, 0);
+
+	default:
+		if (!*err)
+			*err = TEP_ERRNO__ILLEGAL_STRING_CMP;
+		return 0;
+	}
+}
+
+static int test_op(struct tep_event *event, struct tep_filter_arg *arg,
+		   struct tep_record *record, enum tep_errno *err)
+{
+	switch (arg->op.type) {
+	case TEP_FILTER_OP_AND:
+		return test_filter(event, arg->op.left, record, err) &&
+			test_filter(event, arg->op.right, record, err);
+
+	case TEP_FILTER_OP_OR:
+		return test_filter(event, arg->op.left, record, err) ||
+			test_filter(event, arg->op.right, record, err);
+
+	case TEP_FILTER_OP_NOT:
+		return !test_filter(event, arg->op.right, record, err);
+
+	default:
+		if (!*err)
+			*err = TEP_ERRNO__INVALID_OP_TYPE;
+		return 0;
+	}
+}
+
+static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
+		       struct tep_record *record, enum tep_errno *err)
+{
+	if (*err) {
+		/*
+		 * There was an error, no need to process anymore.
+		 */
+		return 0;
+	}
+
+	switch (arg->type) {
+	case TEP_FILTER_ARG_BOOLEAN:
+		/* easy case */
+		return arg->boolean.value;
+
+	case TEP_FILTER_ARG_OP:
+		return test_op(event, arg, record, err);
+
+	case TEP_FILTER_ARG_NUM:
+		return test_num(event, arg, record, err);
+
+	case TEP_FILTER_ARG_STR:
+		return test_str(event, arg, record, err);
+
+	case TEP_FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_FIELD:
+		/*
+		 * Expressions, fields and values evaluate
+		 * to true if they return non zero
+		 */
+		return !!get_arg_value(event, arg, record, err);
+
+	default:
+		if (!*err)
+			*err = TEP_ERRNO__INVALID_ARG_TYPE;
+		return 0;
+	}
+}
+
+/**
+ * tep_event_filtered - return true if event has filter
+ * @filter: filter struct with filter information
+ * @event_id: event id to test if filter exists
+ *
+ * Returns 1 if filter found for @event_id
+ *   otherwise 0;
+ */
+int tep_event_filtered(struct tep_event_filter *filter, int event_id)
+{
+	struct tep_filter_type *filter_type;
+
+	if (!filter->filters)
+		return 0;
+
+	filter_type = find_filter_type(filter, event_id);
+
+	return filter_type ? 1 : 0;
+}
+
+/**
+ * tep_filter_match - test if a record matches a filter
+ * @filter: filter struct with filter information
+ * @record: the record to test against the filter
+ *
+ * Returns: match result or error code (prefixed with TEP_ERRNO__)
+ * FILTER_MATCH - filter found for event and @record matches
+ * FILTER_MISS  - filter found for event and @record does not match
+ * FILTER_NOT_FOUND - no filter found for @record's event
+ * NO_FILTER - if no filters exist
+ * otherwise - error occurred during test
+ */
+enum tep_errno tep_filter_match(struct tep_event_filter *filter,
+				struct tep_record *record)
+{
+	struct tep_handle *tep = filter->tep;
+	struct tep_filter_type *filter_type;
+	int event_id;
+	int ret;
+	enum tep_errno err = 0;
+
+	filter_init_error_buf(filter);
+
+	if (!filter->filters)
+		return TEP_ERRNO__NO_FILTER;
+
+	event_id = tep_data_type(tep, record);
+
+	filter_type = find_filter_type(filter, event_id);
+	if (!filter_type)
+		return TEP_ERRNO__FILTER_NOT_FOUND;
+
+	ret = test_filter(filter_type->event, filter_type->filter, record, &err);
+	if (err)
+		return err;
+
+	return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS;
+}
+
+static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
+{
+	char *str = NULL;
+	char *left = NULL;
+	char *right = NULL;
+	char *op = NULL;
+	int left_val = -1;
+	int right_val = -1;
+	int val;
+
+	switch (arg->op.type) {
+	case TEP_FILTER_OP_AND:
+		op = "&&";
+		/* fall through */
+	case TEP_FILTER_OP_OR:
+		if (!op)
+			op = "||";
+
+		left = arg_to_str(filter, arg->op.left);
+		right = arg_to_str(filter, arg->op.right);
+		if (!left || !right)
+			break;
+
+		/* Try to consolidate boolean values */
+		if (strcmp(left, "TRUE") == 0)
+			left_val = 1;
+		else if (strcmp(left, "FALSE") == 0)
+			left_val = 0;
+
+		if (strcmp(right, "TRUE") == 0)
+			right_val = 1;
+		else if (strcmp(right, "FALSE") == 0)
+			right_val = 0;
+
+		if (left_val >= 0) {
+			if ((arg->op.type == TEP_FILTER_OP_AND && !left_val) ||
+			    (arg->op.type == TEP_FILTER_OP_OR && left_val)) {
+				/* Just return left value */
+				str = left;
+				left = NULL;
+				break;
+			}
+			if (right_val >= 0) {
+				/* just evaluate this. */
+				val = 0;
+				switch (arg->op.type) {
+				case TEP_FILTER_OP_AND:
+					val = left_val && right_val;
+					break;
+				case TEP_FILTER_OP_OR:
+					val = left_val || right_val;
+					break;
+				default:
+					break;
+				}
+				if (asprintf(&str, val ? "TRUE" : "FALSE") < 0)
+					str = NULL;
+				break;
+			}
+		}
+		if (right_val >= 0) {
+			if ((arg->op.type == TEP_FILTER_OP_AND && !right_val) ||
+			    (arg->op.type == TEP_FILTER_OP_OR && right_val)) {
+				/* Just return right value */
+				str = right;
+				right = NULL;
+				break;
+			}
+			/* The right value is meaningless */
+			str = left;
+			left = NULL;
+			break;
+		}
+
+		if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0)
+			str = NULL;
+		break;
+
+	case TEP_FILTER_OP_NOT:
+		op = "!";
+		right = arg_to_str(filter, arg->op.right);
+		if (!right)
+			break;
+
+		/* See if we can consolidate */
+		if (strcmp(right, "TRUE") == 0)
+			right_val = 1;
+		else if (strcmp(right, "FALSE") == 0)
+			right_val = 0;
+		if (right_val >= 0) {
+			/* just return the opposite */
+			if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0)
+				str = NULL;
+			break;
+		}
+		if (asprintf(&str, "%s(%s)", op, right) < 0)
+			str = NULL;
+		break;
+
+	default:
+		/* ?? */
+		break;
+	}
+	free(left);
+	free(right);
+	return str;
+}
+
+static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
+{
+	char *str = NULL;
+
+	if (asprintf(&str, "%lld", arg->value.val) < 0)
+		str = NULL;
+
+	return str;
+}
+
+static char *field_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
+{
+	return strdup(arg->field.field->name);
+}
+
+static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
+{
+	char *lstr;
+	char *rstr;
+	char *op;
+	char *str = NULL;
+
+	lstr = arg_to_str(filter, arg->exp.left);
+	rstr = arg_to_str(filter, arg->exp.right);
+	if (!lstr || !rstr)
+		goto out;
+
+	switch (arg->exp.type) {
+	case TEP_FILTER_EXP_ADD:
+		op = "+";
+		break;
+	case TEP_FILTER_EXP_SUB:
+		op = "-";
+		break;
+	case TEP_FILTER_EXP_MUL:
+		op = "*";
+		break;
+	case TEP_FILTER_EXP_DIV:
+		op = "/";
+		break;
+	case TEP_FILTER_EXP_MOD:
+		op = "%";
+		break;
+	case TEP_FILTER_EXP_RSHIFT:
+		op = ">>";
+		break;
+	case TEP_FILTER_EXP_LSHIFT:
+		op = "<<";
+		break;
+	case TEP_FILTER_EXP_AND:
+		op = "&";
+		break;
+	case TEP_FILTER_EXP_OR:
+		op = "|";
+		break;
+	case TEP_FILTER_EXP_XOR:
+		op = "^";
+		break;
+	default:
+		op = "[ERROR IN EXPRESSION TYPE]";
+		break;
+	}
+
+	if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+		str = NULL;
+out:
+	free(lstr);
+	free(rstr);
+
+	return str;
+}
+
+static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
+{
+	char *lstr;
+	char *rstr;
+	char *str = NULL;
+	char *op = NULL;
+
+	lstr = arg_to_str(filter, arg->num.left);
+	rstr = arg_to_str(filter, arg->num.right);
+	if (!lstr || !rstr)
+		goto out;
+
+	switch (arg->num.type) {
+	case TEP_FILTER_CMP_EQ:
+		op = "==";
+		/* fall through */
+	case TEP_FILTER_CMP_NE:
+		if (!op)
+			op = "!=";
+		/* fall through */
+	case TEP_FILTER_CMP_GT:
+		if (!op)
+			op = ">";
+		/* fall through */
+	case TEP_FILTER_CMP_LT:
+		if (!op)
+			op = "<";
+		/* fall through */
+	case TEP_FILTER_CMP_GE:
+		if (!op)
+			op = ">=";
+		/* fall through */
+	case TEP_FILTER_CMP_LE:
+		if (!op)
+			op = "<=";
+
+		if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+			str = NULL;
+		break;
+
+	default:
+		/* ?? */
+		break;
+	}
+
+out:
+	free(lstr);
+	free(rstr);
+	return str;
+}
+
+static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
+{
+	char *str = NULL;
+	char *op = NULL;
+
+	switch (arg->str.type) {
+	case TEP_FILTER_CMP_MATCH:
+		op = "==";
+		/* fall through */
+	case TEP_FILTER_CMP_NOT_MATCH:
+		if (!op)
+			op = "!=";
+		/* fall through */
+	case TEP_FILTER_CMP_REGEX:
+		if (!op)
+			op = "=~";
+		/* fall through */
+	case TEP_FILTER_CMP_NOT_REGEX:
+		if (!op)
+			op = "!~";
+
+		if (asprintf(&str, "%s %s \"%s\"",
+			 arg->str.field->name, op, arg->str.val) < 0)
+			str = NULL;
+		break;
+
+	default:
+		/* ?? */
+		break;
+	}
+	return str;
+}
+
+static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
+{
+	char *str = NULL;
+
+	switch (arg->type) {
+	case TEP_FILTER_ARG_BOOLEAN:
+		if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0)
+			str = NULL;
+		return str;
+
+	case TEP_FILTER_ARG_OP:
+		return op_to_str(filter, arg);
+
+	case TEP_FILTER_ARG_NUM:
+		return num_to_str(filter, arg);
+
+	case TEP_FILTER_ARG_STR:
+		return str_to_str(filter, arg);
+
+	case TEP_FILTER_ARG_VALUE:
+		return val_to_str(filter, arg);
+
+	case TEP_FILTER_ARG_FIELD:
+		return field_to_str(filter, arg);
+
+	case TEP_FILTER_ARG_EXP:
+		return exp_to_str(filter, arg);
+
+	default:
+		/* ?? */
+		return NULL;
+	}
+
+}
+
+/**
+ * tep_filter_make_string - return a string showing the filter
+ * @filter: filter struct with filter information
+ * @event_id: the event id to return the filter string with
+ *
+ * Returns a string that displays the filter contents.
+ *  This string must be freed with free(str).
+ *  NULL is returned if no filter is found or allocation failed.
+ */
+char *
+tep_filter_make_string(struct tep_event_filter *filter, int event_id)
+{
+	struct tep_filter_type *filter_type;
+
+	if (!filter->filters)
+		return NULL;
+
+	filter_type = find_filter_type(filter, event_id);
+
+	if (!filter_type)
+		return NULL;
+
+	return arg_to_str(filter, filter_type->filter);
+}
+
+/**
+ * tep_filter_compare - compare two filters and return if they are the same
+ * @filter1: Filter to compare with @filter2
+ * @filter2: Filter to compare with @filter1
+ *
+ * Returns:
+ *  1 if the two filters hold the same content.
+ *  0 if they do not.
+ */
+int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2)
+{
+	struct tep_filter_type *filter_type1;
+	struct tep_filter_type *filter_type2;
+	char *str1, *str2;
+	int result;
+	int i;
+
+	/* Do the easy checks first */
+	if (filter1->filters != filter2->filters)
+		return 0;
+	if (!filter1->filters && !filter2->filters)
+		return 1;
+
+	/*
+	 * Now take a look at each of the events to see if they have the same
+	 * filters to them.
+	 */
+	for (i = 0; i < filter1->filters; i++) {
+		filter_type1 = &filter1->event_filters[i];
+		filter_type2 = find_filter_type(filter2, filter_type1->event_id);
+		if (!filter_type2)
+			break;
+		if (filter_type1->filter->type != filter_type2->filter->type)
+			break;
+		/* The best way to compare complex filters is with strings */
+		str1 = arg_to_str(filter1, filter_type1->filter);
+		str2 = arg_to_str(filter2, filter_type2->filter);
+		if (str1 && str2)
+			result = strcmp(str1, str2) != 0;
+		else
+			/* bail out if allocation fails */
+			result = 1;
+
+		free(str1);
+		free(str2);
+		if (result)
+			break;
+	}
+
+	if (i < filter1->filters)
+		return 0;
+	return 1;
+}
+
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
new file mode 100644
index 000000000..e99867111
--- /dev/null
+++ b/tools/lib/traceevent/parse-utils.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#define __weak __attribute__((weak))
+
+void __vwarning(const char *fmt, va_list ap)
+{
+	if (errno)
+		perror("libtraceevent");
+	errno = 0;
+
+	fprintf(stderr, "  ");
+	vfprintf(stderr, fmt, ap);
+
+	fprintf(stderr, "\n");
+}
+
+void __warning(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	__vwarning(fmt, ap);
+	va_end(ap);
+}
+
+void __weak warning(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	__vwarning(fmt, ap);
+	va_end(ap);
+}
+
+void __vpr_stat(const char *fmt, va_list ap)
+{
+	vprintf(fmt, ap);
+	printf("\n");
+}
+
+void __pr_stat(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	__vpr_stat(fmt, ap);
+	va_end(ap);
+}
+
+void __weak vpr_stat(const char *fmt, va_list ap)
+{
+	__vpr_stat(fmt, ap);
+}
+
+void __weak pr_stat(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	__vpr_stat(fmt, ap);
+	va_end(ap);
+}
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
new file mode 100644
index 000000000..dd4da823c
--- /dev/null
+++ b/tools/lib/traceevent/plugins/Build
@@ -0,0 +1,12 @@
+plugin_jbd2-y         += plugin_jbd2.o
+plugin_hrtimer-y      += plugin_hrtimer.o
+plugin_kmem-y         += plugin_kmem.o
+plugin_kvm-y          += plugin_kvm.o
+plugin_mac80211-y     += plugin_mac80211.o
+plugin_sched_switch-y += plugin_sched_switch.o
+plugin_function-y     += plugin_function.o
+plugin_futex-y        += plugin_futex.o
+plugin_xen-y          += plugin_xen.o
+plugin_scsi-y         += plugin_scsi.o
+plugin_cfg80211-y     += plugin_cfg80211.o
+plugin_tlb-y          += plugin_tlb.o
+\ No newline at end of file
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
new file mode 100644
index 000000000..47e802553
--- /dev/null
+++ b/tools/lib/traceevent/plugins/Makefile
@@ -0,0 +1,225 @@
+# SPDX-License-Identifier: GPL-2.0
+
+#MAKEFLAGS += --no-print-directory
+
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
+$(call allow-override,PKG_CONFIG,pkg-config)
+
+EXT = -std=gnu99
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+  libdir_relative_tmp = lib64
+else
+  libdir_relative_tmp = lib
+endif
+
+libdir_relative ?= $(libdir_relative_tmp)
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+
+set_plugin_dir := 1
+
+# Set plugin_dir to preffered global plugin location
+# If we install under $HOME directory we go under
+# $(HOME)/.local/lib/traceevent/plugins
+#
+# We dont set PLUGIN_DIR in case we install under $HOME
+# directory, because by default the code looks under:
+# $(HOME)/.local/lib/traceevent/plugins by default.
+#
+ifeq ($(plugin_dir),)
+ifeq ($(prefix),$(HOME))
+override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
+set_plugin_dir := 0
+else
+override plugin_dir = $(libdir)/traceevent/plugins
+endif
+endif
+
+ifeq ($(set_plugin_dir),1)
+PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
+PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
+endif
+
+include ../../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+export prefix libdir src obj
+
+# Shell quotes
+plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
+
+CONFIG_INCLUDES =
+CONFIG_LIBS    =
+CONFIG_FLAGS   =
+
+OBJ            = $@
+N              =
+
+INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+# Append required CFLAGS
+override CFLAGS += -fPIC
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Disable command line variables (CFLAGS) override from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+export srctree OUTPUT CC LD CFLAGS V
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
+
+PLUGINS  = plugin_jbd2.so
+PLUGINS += plugin_hrtimer.so
+PLUGINS += plugin_kmem.so
+PLUGINS += plugin_kvm.so
+PLUGINS += plugin_mac80211.so
+PLUGINS += plugin_sched_switch.so
+PLUGINS += plugin_function.so
+PLUGINS += plugin_futex.so
+PLUGINS += plugin_xen.so
+PLUGINS += plugin_scsi.so
+PLUGINS += plugin_cfg80211.so
+PLUGINS += plugin_tlb.so
+
+PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
+PLUGINS_IN := $(PLUGINS:.so=-in.o)
+
+plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE)
+
+__plugin_obj = $(notdir $@)
+  plugin_obj = $(__plugin_obj:-in.o=)
+
+$(PLUGINS_IN): force
+	$(Q)$(MAKE) $(build)=$(plugin_obj)
+
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+	$(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
+$(OUTPUT)%.so: $(OUTPUT)%-in.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
+
+define update_dir
+  (echo $1 > $@.tmp;                           \
+   if [ -r $@ ] && cmp -s $@ $@.tmp; then      \
+     rm -f $@.tmp;                             \
+   else                                                \
+     echo '  UPDATE                 $@';       \
+     mv -f $@.tmp $@;                          \
+   fi);
+endef
+
+tags:	force
+	$(RM) tags
+	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
+
+TAGS:	force
+	$(RM) TAGS
+	find . -name '*.[ch]' | xargs etags \
+	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
+
+define do_install_mkdir
+	if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+	fi
+endef
+
+define do_install
+	$(call do_install_mkdir,$2);                    \
+	$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+define do_install_plugins
+       for plugin in $1; do                            \
+         $(call do_install,$$plugin,$(plugin_dir_SQ)); \
+       done
+endef
+
+define do_generate_dynamic_list_file
+	symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
+	xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
+	if [ "$$symbol_type" = "U W" ];then				\
+		(echo '{';                                              \
+		$(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\
+		echo '};';                                              \
+		) > $2;                                                 \
+	else                                                            \
+		(echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
+		fi
+endef
+
+install: $(PLUGINS)
+	$(call QUIET_INSTALL, trace_plugins) \
+	$(call do_install_plugins, $(PLUGINS))
+
+clean:
+	$(call QUIET_CLEAN, trace_plugins) \
+		$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
+		$(RM) $(OUTPUT)libtraceevent-dynamic-list \
+		$(RM) TRACEEVENT-CFLAGS tags TAGS;
+
+PHONY += force plugins
+force:
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/plugins/plugin_cfg80211.c b/tools/lib/traceevent/plugins/plugin_cfg80211.c
new file mode 100644
index 000000000..3d43b56a6
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_cfg80211.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <endian.h>
+#include "event-parse.h"
+
+/*
+ * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
+ * Fedora6.
+ */
+#ifndef le16toh
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define le16toh(x) (x)
+# else
+#  define le16toh(x) __bswap_16 (x)
+# endif
+#endif
+
+
+static unsigned long long
+process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
+{
+	uint16_t *val = (uint16_t *) (unsigned long) args[0];
+	return val ? (long long) le16toh(*val) : 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_print_function(tep,
+				    process___le16_to_cpup,
+				    TEP_FUNC_ARG_INT,
+				    "__le16_to_cpup",
+				    TEP_FUNC_ARG_PTR,
+				    TEP_FUNC_ARG_VOID);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_print_function(tep, process___le16_to_cpup,
+				      "__le16_to_cpup");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
new file mode 100644
index 000000000..807b16e1b
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_function.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "event-utils.h"
+#include "trace-seq.h"
+
+static struct func_stack {
+	int size;
+	char **stack;
+} *fstack;
+
+static int cpus = -1;
+
+#define STK_BLK 10
+
+struct tep_plugin_option plugin_options[] =
+{
+	{
+		.name = "parent",
+		.plugin_alias = "ftrace",
+		.description =
+		"Print parent of functions for function events",
+	},
+	{
+		.name = "indent",
+		.plugin_alias = "ftrace",
+		.description =
+		"Try to show function call indents, based on parents",
+		.set = 1,
+	},
+	{
+		.name = "offset",
+		.plugin_alias = "ftrace",
+		.description =
+		"Show function names as well as their offsets",
+		.set = 0,
+	},
+	{
+		.name = NULL,
+	}
+};
+
+static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
+static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
+static struct tep_plugin_option *ftrace_offset = &plugin_options[2];
+
+static void add_child(struct func_stack *stack, const char *child, int pos)
+{
+	int i;
+
+	if (!child)
+		return;
+
+	if (pos < stack->size)
+		free(stack->stack[pos]);
+	else {
+		char **ptr;
+
+		ptr = realloc(stack->stack, sizeof(char *) *
+			      (stack->size + STK_BLK));
+		if (!ptr) {
+			warning("could not allocate plugin memory\n");
+			return;
+		}
+
+		stack->stack = ptr;
+
+		for (i = stack->size; i < stack->size + STK_BLK; i++)
+			stack->stack[i] = NULL;
+		stack->size += STK_BLK;
+	}
+
+	stack->stack[pos] = strdup(child);
+}
+
+static int add_and_get_index(const char *parent, const char *child, int cpu)
+{
+	int i;
+
+	if (cpu < 0)
+		return 0;
+
+	if (cpu > cpus) {
+		struct func_stack *ptr;
+
+		ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1));
+		if (!ptr) {
+			warning("could not allocate plugin memory\n");
+			return 0;
+		}
+
+		fstack = ptr;
+
+		/* Account for holes in the cpu count */
+		for (i = cpus + 1; i <= cpu; i++)
+			memset(&fstack[i], 0, sizeof(fstack[i]));
+		cpus = cpu;
+	}
+
+	for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) {
+		if (strcmp(parent, fstack[cpu].stack[i]) == 0) {
+			add_child(&fstack[cpu], child, i+1);
+			return i;
+		}
+	}
+
+	/* Not found */
+	add_child(&fstack[cpu], parent, 0);
+	add_child(&fstack[cpu], child, 1);
+	return 0;
+}
+
+static void show_function(struct trace_seq *s, struct tep_handle *tep,
+			  const char *func, unsigned long long function)
+{
+	unsigned long long offset;
+
+	trace_seq_printf(s, "%s", func);
+	if (ftrace_offset->set) {
+		offset = tep_find_function_address(tep, function);
+		trace_seq_printf(s, "+0x%x ", (int)(function - offset));
+	}
+}
+
+static int function_handler(struct trace_seq *s, struct tep_record *record,
+			    struct tep_event *event, void *context)
+{
+	struct tep_handle *tep = event->tep;
+	unsigned long long function;
+	unsigned long long pfunction;
+	const char *func;
+	const char *parent;
+	int index = 0;
+
+	if (tep_get_field_val(s, event, "ip", record, &function, 1))
+		return trace_seq_putc(s, '!');
+
+	func = tep_find_function(tep, function);
+
+	if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
+		return trace_seq_putc(s, '!');
+
+	parent = tep_find_function(tep, pfunction);
+
+	if (parent && ftrace_indent->set)
+		index = add_and_get_index(parent, func, record->cpu);
+
+	trace_seq_printf(s, "%*s", index*3, "");
+
+	if (func)
+		show_function(s, tep, func, function);
+	else
+		trace_seq_printf(s, "0x%llx", function);
+
+	if (ftrace_parent->set) {
+		trace_seq_printf(s, " <-- ");
+		if (parent)
+			show_function(s, tep, parent, pfunction);
+		else
+			trace_seq_printf(s, "0x%llx", pfunction);
+	}
+
+	return 0;
+}
+
+static int
+trace_stack_handler(struct trace_seq *s, struct tep_record *record,
+		    struct tep_event *event, void *context)
+{
+	struct tep_format_field *field;
+	unsigned long long addr;
+	const char *func;
+	int long_size;
+	void *data = record->data;
+
+	field = tep_find_any_field(event, "caller");
+	if (!field) {
+		trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller");
+		return 0;
+	}
+
+	trace_seq_puts(s, "<stack trace >\n");
+
+	long_size = tep_get_long_size(event->tep);
+
+	for (data += field->offset; data < record->data + record->size;
+	     data += long_size) {
+		addr = tep_read_number(event->tep, data, long_size);
+
+		if ((long_size == 8 && addr == (unsigned long long)-1) ||
+		    ((int)addr == -1))
+			break;
+
+		func = tep_find_function(event->tep, addr);
+		if (func)
+			trace_seq_printf(s, "=> %s (%llx)\n", func, addr);
+		else
+			trace_seq_printf(s, "=> %llx\n", addr);
+	}
+
+	return 0;
+}
+
+static int
+trace_raw_data_handler(struct trace_seq *s, struct tep_record *record,
+		    struct tep_event *event, void *context)
+{
+	struct tep_format_field *field;
+	unsigned long long id;
+	int long_size;
+	void *data = record->data;
+
+	if (tep_get_field_val(s, event, "id", record, &id, 1))
+		return trace_seq_putc(s, '!');
+
+	trace_seq_printf(s, "# %llx", id);
+
+	field = tep_find_any_field(event, "buf");
+	if (!field) {
+		trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf");
+		return 0;
+	}
+
+	long_size = tep_get_long_size(event->tep);
+
+	for (data += field->offset; data < record->data + record->size;
+	     data += long_size) {
+		int size = sizeof(long);
+		int left = (record->data + record->size) - data;
+		int i;
+
+		if (size > left)
+			size = left;
+
+		for (i = 0; i < size; i++)
+			trace_seq_printf(s, " %02x", *(unsigned char *)(data + i));
+	}
+
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "ftrace", "function",
+				   function_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "ftrace", "kernel_stack",
+				      trace_stack_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "ftrace", "raw_data",
+				      trace_raw_data_handler, NULL);
+
+	tep_plugin_add_options("ftrace", plugin_options);
+
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	int i, x;
+
+	tep_unregister_event_handler(tep, -1, "ftrace", "function",
+				     function_handler, NULL);
+
+	for (i = 0; i <= cpus; i++) {
+		for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
+			free(fstack[i].stack[x]);
+		free(fstack[i].stack);
+	}
+
+	tep_plugin_remove_options(plugin_options);
+
+	free(fstack);
+	fstack = NULL;
+	cpus = -1;
+}
diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c
new file mode 100644
index 000000000..eb7c9f8a8
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_futex.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2017 National Instruments Corp.
+ *
+ * Author: Julia Cartwright <julia@ni.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/futex.h>
+
+#include "event-parse.h"
+
+#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
+
+struct futex_args {
+	unsigned long long	uaddr;
+	unsigned long long	op;
+	unsigned long long	val;
+	unsigned long long	utime; /* or val2 */
+	unsigned long long	uaddr2;
+	unsigned long long	val3;
+};
+
+struct futex_op {
+	const char	*name;
+	const char	*fmt_val;
+	const char	*fmt_utime;
+	const char	*fmt_uaddr2;
+	const char	*fmt_val3;
+};
+
+static const struct futex_op futex_op_tbl[] = {
+	{            "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx",               NULL,             NULL },
+	{            "FUTEX_WAKE",     " val=%llu",              NULL,               NULL,             NULL },
+	{              "FUTEX_FD",     " val=%llu",              NULL,               NULL,             NULL },
+	{         "FUTEX_REQUEUE",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx",             NULL },
+	{     "FUTEX_CMP_REQUEUE",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+	{         "FUTEX_WAKE_OP",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+	{         "FUTEX_LOCK_PI",            NULL, " utime=0x%08llx",               NULL,             NULL },
+	{       "FUTEX_UNLOCK_PI",            NULL,              NULL,               NULL,             NULL },
+	{      "FUTEX_TRYLOCK_PI",            NULL,              NULL,               NULL,             NULL },
+	{     "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx",               NULL, " val3=0x%08llx" },
+	{     "FUTEX_WAKE_BITSET",     " val=%llu",              NULL,               NULL, " val3=0x%08llx" },
+	{ "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" },
+	{  "FUTEX_CMP_REQUEUE_PI",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+};
+
+
+static void futex_print(struct trace_seq *s, const struct futex_args *args,
+			const struct futex_op *fop)
+{
+	trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr);
+
+	if (fop->fmt_val)
+		trace_seq_printf(s, fop->fmt_val, args->val);
+
+	if (fop->fmt_utime)
+		trace_seq_printf(s,fop->fmt_utime, args->utime);
+
+	if (fop->fmt_uaddr2)
+		trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2);
+
+	if (fop->fmt_val3)
+		trace_seq_printf(s, fop->fmt_val3, args->val3);
+}
+
+static int futex_handler(struct trace_seq *s, struct tep_record *record,
+			 struct tep_event *event, void *context)
+{
+	const struct futex_op *fop;
+	struct futex_args args;
+	unsigned long long cmd;
+
+	if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "op", record, &args.op, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "val", record, &args.val, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "utime", record, &args.utime, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1))
+		return 1;
+
+	if (tep_get_field_val(s, event, "val3", record, &args.val3, 1))
+		return 1;
+
+	cmd = args.op & FUTEX_CMD_MASK;
+	if (cmd >= ARRAY_SIZE(futex_op_tbl))
+		return 1;
+
+	fop = &futex_op_tbl[cmd];
+
+	trace_seq_printf(s, "op=%s", fop->name);
+
+	if (args.op & FUTEX_PRIVATE_FLAG)
+		trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG");
+
+	if (args.op & FUTEX_CLOCK_REALTIME)
+		trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME");
+
+	futex_print(s, &args, fop);
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+				   futex_handler, NULL);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+				     futex_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
new file mode 100644
index 000000000..d98466788
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_hrtimer.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+static int timer_expire_handler(struct trace_seq *s,
+				struct tep_record *record,
+				struct tep_event *event, void *context)
+{
+	trace_seq_printf(s, "hrtimer=");
+
+	if (tep_print_num_field(s, "0x%llx", event, "timer",
+				record, 0) == -1)
+		tep_print_num_field(s, "0x%llx", event, "hrtimer",
+				    record, 1);
+
+	trace_seq_printf(s, " now=");
+
+	tep_print_num_field(s, "%llu", event, "now", record, 1);
+
+	tep_print_func_field(s, " function=%s", event, "function",
+				record, 0);
+	return 0;
+}
+
+static int timer_start_handler(struct trace_seq *s,
+			       struct tep_record *record,
+			       struct tep_event *event, void *context)
+{
+	trace_seq_printf(s, "hrtimer=");
+
+	if (tep_print_num_field(s, "0x%llx", event, "timer",
+				record, 0) == -1)
+		tep_print_num_field(s, "0x%llx", event, "hrtimer",
+				    record, 1);
+
+	tep_print_func_field(s, " function=%s", event, "function",
+			     record, 0);
+
+	trace_seq_printf(s, " expires=");
+	tep_print_num_field(s, "%llu", event, "expires", record, 1);
+
+	trace_seq_printf(s, " softexpires=");
+	tep_print_num_field(s, "%llu", event, "softexpires", record, 1);
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1,
+				   "timer", "hrtimer_expire_entry",
+				   timer_expire_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "timer", "hrtimer_start",
+				   timer_start_handler, NULL);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1,
+				     "timer", "hrtimer_expire_entry",
+				     timer_expire_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start",
+				     timer_start_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
new file mode 100644
index 000000000..69111a68d
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_jbd2.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#define MINORBITS	20
+#define MINORMASK	((1U << MINORBITS) - 1)
+
+#define MAJOR(dev)	((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev)	((unsigned int) ((dev) & MINORMASK))
+
+static unsigned long long
+process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args)
+{
+	unsigned int dev = args[0];
+
+	trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev));
+	return 0;
+}
+
+static unsigned long long
+process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
+{
+	unsigned long long jiffies = args[0];
+
+	trace_seq_printf(s, "%lld", jiffies);
+	return jiffies;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_print_function(tep,
+				    process_jbd2_dev_to_name,
+				    TEP_FUNC_ARG_STRING,
+				    "jbd2_dev_to_name",
+				    TEP_FUNC_ARG_INT,
+				    TEP_FUNC_ARG_VOID);
+
+	tep_register_print_function(tep,
+				    process_jiffies_to_msecs,
+				    TEP_FUNC_ARG_LONG,
+				    "jiffies_to_msecs",
+				    TEP_FUNC_ARG_LONG,
+				    TEP_FUNC_ARG_VOID);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_print_function(tep, process_jbd2_dev_to_name,
+				      "jbd2_dev_to_name");
+
+	tep_unregister_print_function(tep, process_jiffies_to_msecs,
+				      "jiffies_to_msecs");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
new file mode 100644
index 000000000..4b4f7f961
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_kmem.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+static int call_site_handler(struct trace_seq *s, struct tep_record *record,
+			     struct tep_event *event, void *context)
+{
+	struct tep_format_field *field;
+	unsigned long long val, addr;
+	void *data = record->data;
+	const char *func;
+
+	field = tep_find_field(event, "call_site");
+	if (!field)
+		return 1;
+
+	if (tep_read_number_field(field, data, &val))
+		return 1;
+
+	func = tep_find_function(event->tep, val);
+	if (!func)
+		return 1;
+
+	addr = tep_find_function_address(event->tep, val);
+
+	trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
+	return 1;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "kmem", "kfree",
+				   call_site_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kmem", "kmalloc",
+				   call_site_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kmem", "kmalloc_node",
+				   call_site_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
+				   call_site_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kmem",
+				   "kmem_cache_alloc_node",
+				   call_site_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free",
+				   call_site_handler, NULL);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1, "kmem", "kfree",
+				     call_site_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kmem", "kmalloc",
+				     call_site_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node",
+				     call_site_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
+				     call_site_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kmem",
+				     "kmem_cache_alloc_node",
+				     call_site_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free",
+				     call_site_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
new file mode 100644
index 000000000..51ceeb914
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -0,0 +1,527 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#ifdef HAVE_UDIS86
+
+#include <udis86.h>
+
+static ud_t ud;
+
+static void init_disassembler(void)
+{
+	ud_init(&ud);
+	ud_set_syntax(&ud, UD_SYN_ATT);
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+			       int cr0_pe, int eflags_vm,
+			       int cs_d, int cs_l)
+{
+	int mode;
+
+	if (!cr0_pe)
+		mode = 16;
+	else if (eflags_vm)
+		mode = 16;
+	else if (cs_l)
+		mode = 64;
+	else if (cs_d)
+		mode = 32;
+	else
+		mode = 16;
+
+	ud_set_pc(&ud, rip);
+	ud_set_mode(&ud, mode);
+	ud_set_input_buffer(&ud, insn, len);
+	ud_disassemble(&ud);
+	return ud_insn_asm(&ud);
+}
+
+#else
+
+static void init_disassembler(void)
+{
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+			       int cr0_pe, int eflags_vm,
+			       int cs_d, int cs_l)
+{
+	static char out[15*3+1];
+	int i;
+
+	for (i = 0; i < len; ++i)
+		sprintf(out + i * 3, "%02x ", insn[i]);
+	out[len*3-1] = '\0';
+	return out;
+}
+
+#endif
+
+
+#define VMX_EXIT_REASONS			\
+	_ER(EXCEPTION_NMI,	 0)		\
+	_ER(EXTERNAL_INTERRUPT,	 1)		\
+	_ER(TRIPLE_FAULT,	 2)		\
+	_ER(PENDING_INTERRUPT,	 7)		\
+	_ER(NMI_WINDOW,		 8)		\
+	_ER(TASK_SWITCH,	 9)		\
+	_ER(CPUID,		 10)		\
+	_ER(HLT,		 12)		\
+	_ER(INVD,		 13)		\
+	_ER(INVLPG,		 14)		\
+	_ER(RDPMC,		 15)		\
+	_ER(RDTSC,		 16)		\
+	_ER(VMCALL,		 18)		\
+	_ER(VMCLEAR,		 19)		\
+	_ER(VMLAUNCH,		 20)		\
+	_ER(VMPTRLD,		 21)		\
+	_ER(VMPTRST,		 22)		\
+	_ER(VMREAD,		 23)		\
+	_ER(VMRESUME,		 24)		\
+	_ER(VMWRITE,		 25)		\
+	_ER(VMOFF,		 26)		\
+	_ER(VMON,		 27)		\
+	_ER(CR_ACCESS,		 28)		\
+	_ER(DR_ACCESS,		 29)		\
+	_ER(IO_INSTRUCTION,	 30)		\
+	_ER(MSR_READ,		 31)		\
+	_ER(MSR_WRITE,		 32)		\
+	_ER(MWAIT_INSTRUCTION,	 36)		\
+	_ER(MONITOR_INSTRUCTION, 39)		\
+	_ER(PAUSE_INSTRUCTION,	 40)		\
+	_ER(MCE_DURING_VMENTRY,	 41)		\
+	_ER(TPR_BELOW_THRESHOLD, 43)		\
+	_ER(APIC_ACCESS,	 44)		\
+	_ER(EOI_INDUCED,	 45)		\
+	_ER(EPT_VIOLATION,	 48)		\
+	_ER(EPT_MISCONFIG,	 49)		\
+	_ER(INVEPT,		 50)		\
+	_ER(PREEMPTION_TIMER,	 52)		\
+	_ER(WBINVD,		 54)		\
+	_ER(XSETBV,		 55)		\
+	_ER(APIC_WRITE,		 56)		\
+	_ER(INVPCID,		 58)		\
+	_ER(PML_FULL,		 62)		\
+	_ER(XSAVES,		 63)		\
+	_ER(XRSTORS,		 64)
+
+#define SVM_EXIT_REASONS \
+	_ER(EXIT_READ_CR0,	0x000)		\
+	_ER(EXIT_READ_CR3,	0x003)		\
+	_ER(EXIT_READ_CR4,	0x004)		\
+	_ER(EXIT_READ_CR8,	0x008)		\
+	_ER(EXIT_WRITE_CR0,	0x010)		\
+	_ER(EXIT_WRITE_CR3,	0x013)		\
+	_ER(EXIT_WRITE_CR4,	0x014)		\
+	_ER(EXIT_WRITE_CR8,	0x018)		\
+	_ER(EXIT_READ_DR0,	0x020)		\
+	_ER(EXIT_READ_DR1,	0x021)		\
+	_ER(EXIT_READ_DR2,	0x022)		\
+	_ER(EXIT_READ_DR3,	0x023)		\
+	_ER(EXIT_READ_DR4,	0x024)		\
+	_ER(EXIT_READ_DR5,	0x025)		\
+	_ER(EXIT_READ_DR6,	0x026)		\
+	_ER(EXIT_READ_DR7,	0x027)		\
+	_ER(EXIT_WRITE_DR0,	0x030)		\
+	_ER(EXIT_WRITE_DR1,	0x031)		\
+	_ER(EXIT_WRITE_DR2,	0x032)		\
+	_ER(EXIT_WRITE_DR3,	0x033)		\
+	_ER(EXIT_WRITE_DR4,	0x034)		\
+	_ER(EXIT_WRITE_DR5,	0x035)		\
+	_ER(EXIT_WRITE_DR6,	0x036)		\
+	_ER(EXIT_WRITE_DR7,	0x037)		\
+	_ER(EXIT_EXCP_DE,	0x040)		\
+	_ER(EXIT_EXCP_DB,	0x041)		\
+	_ER(EXIT_EXCP_BP,	0x043)		\
+	_ER(EXIT_EXCP_OF,	0x044)		\
+	_ER(EXIT_EXCP_BR,	0x045)		\
+	_ER(EXIT_EXCP_UD,	0x046)		\
+	_ER(EXIT_EXCP_NM,	0x047)		\
+	_ER(EXIT_EXCP_DF,	0x048)		\
+	_ER(EXIT_EXCP_TS,	0x04a)		\
+	_ER(EXIT_EXCP_NP,	0x04b)		\
+	_ER(EXIT_EXCP_SS,	0x04c)		\
+	_ER(EXIT_EXCP_GP,	0x04d)		\
+	_ER(EXIT_EXCP_PF,	0x04e)		\
+	_ER(EXIT_EXCP_MF,	0x050)		\
+	_ER(EXIT_EXCP_AC,	0x051)		\
+	_ER(EXIT_EXCP_MC,	0x052)		\
+	_ER(EXIT_EXCP_XF,	0x053)		\
+	_ER(EXIT_INTR,		0x060)		\
+	_ER(EXIT_NMI,		0x061)		\
+	_ER(EXIT_SMI,		0x062)		\
+	_ER(EXIT_INIT,		0x063)		\
+	_ER(EXIT_VINTR,		0x064)		\
+	_ER(EXIT_CR0_SEL_WRITE,	0x065)		\
+	_ER(EXIT_IDTR_READ,	0x066)		\
+	_ER(EXIT_GDTR_READ,	0x067)		\
+	_ER(EXIT_LDTR_READ,	0x068)		\
+	_ER(EXIT_TR_READ,	0x069)		\
+	_ER(EXIT_IDTR_WRITE,	0x06a)		\
+	_ER(EXIT_GDTR_WRITE,	0x06b)		\
+	_ER(EXIT_LDTR_WRITE,	0x06c)		\
+	_ER(EXIT_TR_WRITE,	0x06d)		\
+	_ER(EXIT_RDTSC,		0x06e)		\
+	_ER(EXIT_RDPMC,		0x06f)		\
+	_ER(EXIT_PUSHF,		0x070)		\
+	_ER(EXIT_POPF,		0x071)		\
+	_ER(EXIT_CPUID,		0x072)		\
+	_ER(EXIT_RSM,		0x073)		\
+	_ER(EXIT_IRET,		0x074)		\
+	_ER(EXIT_SWINT,		0x075)		\
+	_ER(EXIT_INVD,		0x076)		\
+	_ER(EXIT_PAUSE,		0x077)		\
+	_ER(EXIT_HLT,		0x078)		\
+	_ER(EXIT_INVLPG,	0x079)		\
+	_ER(EXIT_INVLPGA,	0x07a)		\
+	_ER(EXIT_IOIO,		0x07b)		\
+	_ER(EXIT_MSR,		0x07c)		\
+	_ER(EXIT_TASK_SWITCH,	0x07d)		\
+	_ER(EXIT_FERR_FREEZE,	0x07e)		\
+	_ER(EXIT_SHUTDOWN,	0x07f)		\
+	_ER(EXIT_VMRUN,		0x080)		\
+	_ER(EXIT_VMMCALL,	0x081)		\
+	_ER(EXIT_VMLOAD,	0x082)		\
+	_ER(EXIT_VMSAVE,	0x083)		\
+	_ER(EXIT_STGI,		0x084)		\
+	_ER(EXIT_CLGI,		0x085)		\
+	_ER(EXIT_SKINIT,	0x086)		\
+	_ER(EXIT_RDTSCP,	0x087)		\
+	_ER(EXIT_ICEBP,		0x088)		\
+	_ER(EXIT_WBINVD,	0x089)		\
+	_ER(EXIT_MONITOR,	0x08a)		\
+	_ER(EXIT_MWAIT,		0x08b)		\
+	_ER(EXIT_MWAIT_COND,	0x08c)		\
+	_ER(EXIT_XSETBV,	0x08d)		\
+	_ER(EXIT_NPF, 		0x400)		\
+	_ER(EXIT_AVIC_INCOMPLETE_IPI,		0x401)	\
+	_ER(EXIT_AVIC_UNACCELERATED_ACCESS,	0x402)	\
+	_ER(EXIT_ERR,		-1)
+
+#define _ER(reason, val)	{ #reason, val },
+struct str_values {
+	const char	*str;
+	int		val;
+};
+
+static struct str_values vmx_exit_reasons[] = {
+	VMX_EXIT_REASONS
+	{ NULL, -1}
+};
+
+static struct str_values svm_exit_reasons[] = {
+	SVM_EXIT_REASONS
+	{ NULL, -1}
+};
+
+static struct isa_exit_reasons {
+	unsigned isa;
+	struct str_values *strings;
+} isa_exit_reasons[] = {
+	{ .isa = 1, .strings = vmx_exit_reasons },
+	{ .isa = 2, .strings = svm_exit_reasons },
+	{ }
+};
+
+static const char *find_exit_reason(unsigned isa, int val)
+{
+	struct str_values *strings = NULL;
+	int i;
+
+	for (i = 0; isa_exit_reasons[i].strings; ++i)
+		if (isa_exit_reasons[i].isa == isa) {
+			strings = isa_exit_reasons[i].strings;
+			break;
+		}
+	if (!strings)
+		return "UNKNOWN-ISA";
+	for (i = 0; strings[i].str; i++)
+		if (strings[i].val == val)
+			break;
+
+	return strings[i].str;
+}
+
+static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
+			     struct tep_event *event, const char *field)
+{
+	unsigned long long isa;
+	unsigned long long val;
+	const char *reason;
+
+	if (tep_get_field_val(s, event, field, record, &val, 1) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0)
+		isa = 1;
+
+	reason = find_exit_reason(isa, val);
+	if (reason)
+		trace_seq_printf(s, "reason %s", reason);
+	else
+		trace_seq_printf(s, "reason UNKNOWN (%llu)", val);
+	return 0;
+}
+
+static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
+			    struct tep_event *event, void *context)
+{
+	unsigned long long info1 = 0, info2 = 0;
+
+	if (print_exit_reason(s, record, event, "exit_reason") < 0)
+		return -1;
+
+	tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
+
+	if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0
+	    && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
+		trace_seq_printf(s, " info %llx %llx", info1, info2);
+
+	return 0;
+}
+
+#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
+#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
+#define KVM_EMUL_INSN_F_CS_D   (1 << 2)
+#define KVM_EMUL_INSN_F_CS_L   (1 << 3)
+
+static int kvm_emulate_insn_handler(struct trace_seq *s,
+				    struct tep_record *record,
+				    struct tep_event *event, void *context)
+{
+	unsigned long long rip, csbase, len, flags, failed;
+	int llen;
+	uint8_t *insn;
+	const char *disasm;
+
+	if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "len", record, &len, 1) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0)
+		return -1;
+
+	insn = tep_get_field_raw(s, event, "insn", record, &llen, 1);
+	if (!insn)
+		return -1;
+
+	disasm = disassemble(insn, len, rip,
+			     flags & KVM_EMUL_INSN_F_CR0_PE,
+			     flags & KVM_EMUL_INSN_F_EFL_VM,
+			     flags & KVM_EMUL_INSN_F_CS_D,
+			     flags & KVM_EMUL_INSN_F_CS_L);
+
+	trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm,
+			 failed ? " FAIL" : "");
+	return 0;
+}
+
+
+static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
+					    struct tep_event *event, void *context)
+{
+	if (print_exit_reason(s, record, event, "exit_code") < 0)
+		return -1;
+
+	tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
+	tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
+	tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
+	tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
+
+	return 0;
+}
+
+static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
+				     struct tep_event *event, void *context)
+{
+	tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
+
+	return kvm_nested_vmexit_inject_handler(s, record, event, context);
+}
+
+union kvm_mmu_page_role {
+	unsigned word;
+	struct {
+		unsigned level:4;
+		unsigned cr4_pae:1;
+		unsigned quadrant:2;
+		unsigned direct:1;
+		unsigned access:3;
+		unsigned invalid:1;
+		unsigned nxe:1;
+		unsigned cr0_wp:1;
+		unsigned smep_and_not_wp:1;
+		unsigned smap_and_not_wp:1;
+		unsigned pad_for_nice_hex_output:8;
+		unsigned smm:8;
+	};
+};
+
+static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
+			      struct tep_event *event, void *context)
+{
+	unsigned long long val;
+	static const char *access_str[] = {
+		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
+	};
+	union kvm_mmu_page_role role;
+
+	if (tep_get_field_val(s, event, "role", record, &val, 1) < 0)
+		return -1;
+
+	role.word = (int)val;
+
+	/*
+	 * We can only use the structure if file is of the same
+	 * endianness.
+	 */
+	if (tep_is_file_bigendian(event->tep) ==
+	    tep_is_local_bigendian(event->tep)) {
+
+		trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
+				 role.level,
+				 role.quadrant,
+				 role.direct ? " direct" : "",
+				 access_str[role.access],
+				 role.invalid ? " invalid" : "",
+				 role.cr4_pae ? "" : "!",
+				 role.nxe ? "" : "!",
+				 role.cr0_wp ? "" : "!",
+				 role.smep_and_not_wp ? " smep" : "",
+				 role.smap_and_not_wp ? " smap" : "",
+				 role.smm ? " smm" : "");
+	} else
+		trace_seq_printf(s, "WORD: %08x", role.word);
+
+	tep_print_num_field(s, " root %u ",  event,
+			    "root_count", record, 1);
+
+	if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0)
+		return -1;
+
+	trace_seq_printf(s, "%s%c",  val ? "unsync" : "sync", 0);
+	return 0;
+}
+
+static int kvm_mmu_get_page_handler(struct trace_seq *s,
+				    struct tep_record *record,
+				    struct tep_event *event, void *context)
+{
+	unsigned long long val;
+
+	if (tep_get_field_val(s, event, "created", record, &val, 1) < 0)
+		return -1;
+
+	trace_seq_printf(s, "%s ", val ? "new" : "existing");
+
+	if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0)
+		return -1;
+
+	trace_seq_printf(s, "sp gfn %llx ", val);
+	return kvm_mmu_print_role(s, record, event, context);
+}
+
+#define PT_WRITABLE_SHIFT 1
+#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
+
+static unsigned long long
+process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
+{
+	unsigned long pte = args[0];
+	return pte & PT_WRITABLE_MASK;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	init_disassembler();
+
+	tep_register_event_handler(tep, -1, "kvm", "kvm_exit",
+				   kvm_exit_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
+				   kvm_emulate_insn_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
+				   kvm_nested_vmexit_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
+				   kvm_nested_vmexit_inject_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
+				   kvm_mmu_get_page_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
+				   kvm_mmu_print_role, NULL);
+
+	tep_register_event_handler(tep, -1,
+				   "kvmmmu", "kvm_mmu_unsync_page",
+				   kvm_mmu_print_role, NULL);
+
+	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
+				   kvm_mmu_print_role, NULL);
+
+	tep_register_event_handler(tep, -1, "kvmmmu",
+			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+			NULL);
+
+	tep_register_print_function(tep,
+				    process_is_writable_pte,
+				    TEP_FUNC_ARG_INT,
+				    "is_writable_pte",
+				    TEP_FUNC_ARG_LONG,
+				    TEP_FUNC_ARG_VOID);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit",
+				     kvm_exit_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
+				     kvm_emulate_insn_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
+				     kvm_nested_vmexit_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
+				     kvm_nested_vmexit_inject_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
+				     kvm_mmu_get_page_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
+				     kvm_mmu_print_role, NULL);
+
+	tep_unregister_event_handler(tep, -1,
+				     "kvmmmu", "kvm_mmu_unsync_page",
+				     kvm_mmu_print_role, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
+				     kvm_mmu_print_role, NULL);
+
+	tep_unregister_event_handler(tep, -1, "kvmmmu",
+			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+			NULL);
+
+	tep_unregister_print_function(tep, process_is_writable_pte,
+				      "is_writable_pte");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
new file mode 100644
index 000000000..f48071e3c
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_mac80211.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#define INDENT 65
+
+static void print_string(struct trace_seq *s, struct tep_event *event,
+			 const char *name, const void *data)
+{
+	struct tep_format_field *f = tep_find_field(event, name);
+	int offset;
+	int length;
+
+	if (!f) {
+		trace_seq_printf(s, "NOTFOUND:%s", name);
+		return;
+	}
+
+	offset = f->offset;
+	length = f->size;
+
+	if (!strncmp(f->type, "__data_loc", 10)) {
+		unsigned long long v;
+		if (tep_read_number_field(f, data, &v)) {
+			trace_seq_printf(s, "invalid_data_loc");
+			return;
+		}
+		offset = v & 0xffff;
+		length = v >> 16;
+	}
+
+	trace_seq_printf(s, "%.*s", length, (char *)data + offset);
+}
+
+#define SF(fn)	tep_print_num_field(s, fn ":%d", event, fn, record, 0)
+#define SFX(fn)	tep_print_num_field(s, fn ":%#x", event, fn, record, 0)
+#define SP()	trace_seq_putc(s, ' ')
+
+static int drv_bss_info_changed(struct trace_seq *s,
+				struct tep_record *record,
+				struct tep_event *event, void *context)
+{
+	void *data = record->data;
+
+	print_string(s, event, "wiphy_name", data);
+	trace_seq_printf(s, " vif:");
+	print_string(s, event, "vif_name", data);
+	tep_print_num_field(s, "(%d)", event, "vif_type", record, 1);
+
+	trace_seq_printf(s, "\n%*s", INDENT, "");
+	SF("assoc"); SP();
+	SF("aid"); SP();
+	SF("cts"); SP();
+	SF("shortpre"); SP();
+	SF("shortslot"); SP();
+	SF("dtimper"); SP();
+	trace_seq_printf(s, "\n%*s", INDENT, "");
+	SF("bcnint"); SP();
+	SFX("assoc_cap"); SP();
+	SFX("basic_rates"); SP();
+	SF("enable_beacon");
+	trace_seq_printf(s, "\n%*s", INDENT, "");
+	SF("ht_operation_mode");
+
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "mac80211",
+				   "drv_bss_info_changed",
+				   drv_bss_info_changed, NULL);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1, "mac80211",
+				     "drv_bss_info_changed",
+				     drv_bss_info_changed, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
new file mode 100644
index 000000000..e12fa1038
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_sched_switch.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "trace-seq.h"
+
+static void write_state(struct trace_seq *s, int val)
+{
+	const char states[] = "SDTtZXxW";
+	int found = 0;
+	int i;
+
+	for (i = 0; i < (sizeof(states) - 1); i++) {
+		if (!(val & (1 << i)))
+			continue;
+
+		if (found)
+			trace_seq_putc(s, '|');
+
+		found = 1;
+		trace_seq_putc(s, states[i]);
+	}
+
+	if (!found)
+		trace_seq_putc(s, 'R');
+}
+
+static void write_and_save_comm(struct tep_format_field *field,
+				struct tep_record *record,
+				struct trace_seq *s, int pid)
+{
+	const char *comm;
+	int len;
+
+	comm = (char *)(record->data + field->offset);
+	len = s->len;
+	trace_seq_printf(s, "%.*s",
+			 field->size, comm);
+
+	/* make sure the comm has a \0 at the end. */
+	trace_seq_terminate(s);
+	comm = &s->buffer[len];
+
+	/* Help out the comm to ids. This will handle dups */
+	tep_register_comm(field->event->tep, comm, pid);
+}
+
+static int sched_wakeup_handler(struct trace_seq *s,
+				struct tep_record *record,
+				struct tep_event *event, void *context)
+{
+	struct tep_format_field *field;
+	unsigned long long val;
+
+	if (tep_get_field_val(s, event, "pid", record, &val, 1))
+		return trace_seq_putc(s, '!');
+
+	field = tep_find_any_field(event, "comm");
+	if (field) {
+		write_and_save_comm(field, record, s, val);
+		trace_seq_putc(s, ':');
+	}
+	trace_seq_printf(s, "%lld", val);
+
+	if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0)
+		trace_seq_printf(s, " [%lld]", val);
+
+	if (tep_get_field_val(s, event, "success", record, &val, 1) == 0)
+		trace_seq_printf(s, " success=%lld", val);
+
+	if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
+		trace_seq_printf(s, " CPU:%03llu", val);
+
+	return 0;
+}
+
+static int sched_switch_handler(struct trace_seq *s,
+				struct tep_record *record,
+				struct tep_event *event, void *context)
+{
+	struct tep_format_field *field;
+	unsigned long long val;
+
+	if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
+		return trace_seq_putc(s, '!');
+
+	field = tep_find_any_field(event, "prev_comm");
+	if (field) {
+		write_and_save_comm(field, record, s, val);
+		trace_seq_putc(s, ':');
+	}
+	trace_seq_printf(s, "%lld ", val);
+
+	if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
+		trace_seq_printf(s, "[%d] ", (int) val);
+
+	if (tep_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
+		write_state(s, val);
+
+	trace_seq_puts(s, " ==> ");
+
+	if (tep_get_field_val(s, event, "next_pid", record, &val, 1))
+		return trace_seq_putc(s, '!');
+
+	field = tep_find_any_field(event, "next_comm");
+	if (field) {
+		write_and_save_comm(field, record, s, val);
+		trace_seq_putc(s, ':');
+	}
+	trace_seq_printf(s, "%lld", val);
+
+	if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
+		trace_seq_printf(s, " [%d]", (int) val);
+
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "sched", "sched_switch",
+				   sched_switch_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "sched", "sched_wakeup",
+				   sched_wakeup_handler, NULL);
+
+	tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new",
+				   sched_wakeup_handler, NULL);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1, "sched", "sched_switch",
+				     sched_switch_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup",
+				     sched_wakeup_handler, NULL);
+
+	tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new",
+				     sched_wakeup_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_scsi.c b/tools/lib/traceevent/plugins/plugin_scsi.c
new file mode 100644
index 000000000..5d0387a4b
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_scsi.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include "event-parse.h"
+#include "trace-seq.h"
+
+typedef unsigned long sector_t;
+typedef uint64_t u64;
+typedef unsigned int u32;
+
+/*
+ *      SCSI opcodes
+ */
+#define TEST_UNIT_READY			0x00
+#define REZERO_UNIT			0x01
+#define REQUEST_SENSE			0x03
+#define FORMAT_UNIT			0x04
+#define READ_BLOCK_LIMITS		0x05
+#define REASSIGN_BLOCKS			0x07
+#define INITIALIZE_ELEMENT_STATUS	0x07
+#define READ_6				0x08
+#define WRITE_6				0x0a
+#define SEEK_6				0x0b
+#define READ_REVERSE			0x0f
+#define WRITE_FILEMARKS			0x10
+#define SPACE				0x11
+#define INQUIRY				0x12
+#define RECOVER_BUFFERED_DATA		0x14
+#define MODE_SELECT			0x15
+#define RESERVE				0x16
+#define RELEASE				0x17
+#define COPY				0x18
+#define ERASE				0x19
+#define MODE_SENSE			0x1a
+#define START_STOP			0x1b
+#define RECEIVE_DIAGNOSTIC		0x1c
+#define SEND_DIAGNOSTIC			0x1d
+#define ALLOW_MEDIUM_REMOVAL		0x1e
+
+#define READ_FORMAT_CAPACITIES		0x23
+#define SET_WINDOW			0x24
+#define READ_CAPACITY			0x25
+#define READ_10				0x28
+#define WRITE_10			0x2a
+#define SEEK_10				0x2b
+#define POSITION_TO_ELEMENT		0x2b
+#define WRITE_VERIFY			0x2e
+#define VERIFY				0x2f
+#define SEARCH_HIGH			0x30
+#define SEARCH_EQUAL			0x31
+#define SEARCH_LOW			0x32
+#define SET_LIMITS			0x33
+#define PRE_FETCH			0x34
+#define READ_POSITION			0x34
+#define SYNCHRONIZE_CACHE		0x35
+#define LOCK_UNLOCK_CACHE		0x36
+#define READ_DEFECT_DATA		0x37
+#define MEDIUM_SCAN			0x38
+#define COMPARE				0x39
+#define COPY_VERIFY			0x3a
+#define WRITE_BUFFER			0x3b
+#define READ_BUFFER			0x3c
+#define UPDATE_BLOCK			0x3d
+#define READ_LONG			0x3e
+#define WRITE_LONG			0x3f
+#define CHANGE_DEFINITION		0x40
+#define WRITE_SAME			0x41
+#define UNMAP				0x42
+#define READ_TOC			0x43
+#define READ_HEADER			0x44
+#define GET_EVENT_STATUS_NOTIFICATION	0x4a
+#define LOG_SELECT			0x4c
+#define LOG_SENSE			0x4d
+#define XDWRITEREAD_10			0x53
+#define MODE_SELECT_10			0x55
+#define RESERVE_10			0x56
+#define RELEASE_10			0x57
+#define MODE_SENSE_10			0x5a
+#define PERSISTENT_RESERVE_IN		0x5e
+#define PERSISTENT_RESERVE_OUT		0x5f
+#define VARIABLE_LENGTH_CMD		0x7f
+#define REPORT_LUNS			0xa0
+#define SECURITY_PROTOCOL_IN		0xa2
+#define MAINTENANCE_IN			0xa3
+#define MAINTENANCE_OUT			0xa4
+#define MOVE_MEDIUM			0xa5
+#define EXCHANGE_MEDIUM			0xa6
+#define READ_12				0xa8
+#define SERVICE_ACTION_OUT_12		0xa9
+#define WRITE_12			0xaa
+#define SERVICE_ACTION_IN_12		0xab
+#define WRITE_VERIFY_12			0xae
+#define VERIFY_12			0xaf
+#define SEARCH_HIGH_12			0xb0
+#define SEARCH_EQUAL_12			0xb1
+#define SEARCH_LOW_12			0xb2
+#define SECURITY_PROTOCOL_OUT		0xb5
+#define READ_ELEMENT_STATUS		0xb8
+#define SEND_VOLUME_TAG			0xb6
+#define WRITE_LONG_2			0xea
+#define EXTENDED_COPY			0x83
+#define RECEIVE_COPY_RESULTS		0x84
+#define ACCESS_CONTROL_IN		0x86
+#define ACCESS_CONTROL_OUT		0x87
+#define READ_16				0x88
+#define WRITE_16			0x8a
+#define READ_ATTRIBUTE			0x8c
+#define WRITE_ATTRIBUTE			0x8d
+#define VERIFY_16			0x8f
+#define SYNCHRONIZE_CACHE_16		0x91
+#define WRITE_SAME_16			0x93
+#define SERVICE_ACTION_BIDIRECTIONAL	0x9d
+#define SERVICE_ACTION_IN_16		0x9e
+#define SERVICE_ACTION_OUT_16		0x9f
+/* values for service action in */
+#define	SAI_READ_CAPACITY_16		0x10
+#define SAI_GET_LBA_STATUS		0x12
+/* values for VARIABLE_LENGTH_CMD service action codes
+ * see spc4r17 Section D.3.5, table D.7 and D.8 */
+#define VLC_SA_RECEIVE_CREDENTIAL	0x1800
+/* values for maintenance in */
+#define MI_REPORT_IDENTIFYING_INFORMATION		0x05
+#define MI_REPORT_TARGET_PGS				0x0a
+#define MI_REPORT_ALIASES				0x0b
+#define MI_REPORT_SUPPORTED_OPERATION_CODES		0x0c
+#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS	0x0d
+#define MI_REPORT_PRIORITY				0x0e
+#define MI_REPORT_TIMESTAMP				0x0f
+#define MI_MANAGEMENT_PROTOCOL_IN			0x10
+/* value for MI_REPORT_TARGET_PGS ext header */
+#define MI_EXT_HDR_PARAM_FMT		0x20
+/* values for maintenance out */
+#define MO_SET_IDENTIFYING_INFORMATION	0x06
+#define MO_SET_TARGET_PGS		0x0a
+#define MO_CHANGE_ALIASES		0x0b
+#define MO_SET_PRIORITY			0x0e
+#define MO_SET_TIMESTAMP		0x0f
+#define MO_MANAGEMENT_PROTOCOL_OUT	0x10
+/* values for variable length command */
+#define XDREAD_32			0x03
+#define XDWRITE_32			0x04
+#define XPWRITE_32			0x06
+#define XDWRITEREAD_32			0x07
+#define READ_32				0x09
+#define VERIFY_32			0x0a
+#define WRITE_32			0x0b
+#define WRITE_SAME_32			0x0d
+
+#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f)
+#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9])
+
+static const char *
+scsi_trace_misc(struct trace_seq *, unsigned char *, int);
+
+static const char *
+scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len;
+	sector_t lba = 0, txlen = 0;
+
+	lba |= ((cdb[1] & 0x1F) << 16);
+	lba |=  (cdb[2] << 8);
+	lba |=   cdb[3];
+	txlen = cdb[4];
+
+	trace_seq_printf(p, "lba=%llu txlen=%llu",
+			 (unsigned long long)lba, (unsigned long long)txlen);
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+static const char *
+scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len;
+	sector_t lba = 0, txlen = 0;
+
+	lba |= (cdb[2] << 24);
+	lba |= (cdb[3] << 16);
+	lba |= (cdb[4] << 8);
+	lba |=  cdb[5];
+	txlen |= (cdb[7] << 8);
+	txlen |=  cdb[8];
+
+	trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+			 (unsigned long long)lba, (unsigned long long)txlen,
+			 cdb[1] >> 5);
+
+	if (cdb[0] == WRITE_SAME)
+		trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+static const char *
+scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len;
+	sector_t lba = 0, txlen = 0;
+
+	lba |= (cdb[2] << 24);
+	lba |= (cdb[3] << 16);
+	lba |= (cdb[4] << 8);
+	lba |=  cdb[5];
+	txlen |= (cdb[6] << 24);
+	txlen |= (cdb[7] << 16);
+	txlen |= (cdb[8] << 8);
+	txlen |=  cdb[9];
+
+	trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+			 (unsigned long long)lba, (unsigned long long)txlen,
+			 cdb[1] >> 5);
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+static const char *
+scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len;
+	sector_t lba = 0, txlen = 0;
+
+	lba |= ((u64)cdb[2] << 56);
+	lba |= ((u64)cdb[3] << 48);
+	lba |= ((u64)cdb[4] << 40);
+	lba |= ((u64)cdb[5] << 32);
+	lba |= (cdb[6] << 24);
+	lba |= (cdb[7] << 16);
+	lba |= (cdb[8] << 8);
+	lba |=  cdb[9];
+	txlen |= (cdb[10] << 24);
+	txlen |= (cdb[11] << 16);
+	txlen |= (cdb[12] << 8);
+	txlen |=  cdb[13];
+
+	trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+			 (unsigned long long)lba, (unsigned long long)txlen,
+			 cdb[1] >> 5);
+
+	if (cdb[0] == WRITE_SAME_16)
+		trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+static const char *
+scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len, *cmd;
+	sector_t lba = 0, txlen = 0;
+	u32 ei_lbrt = 0;
+
+	switch (SERVICE_ACTION32(cdb)) {
+	case READ_32:
+		cmd = "READ";
+		break;
+	case VERIFY_32:
+		cmd = "VERIFY";
+		break;
+	case WRITE_32:
+		cmd = "WRITE";
+		break;
+	case WRITE_SAME_32:
+		cmd = "WRITE_SAME";
+		break;
+	default:
+		trace_seq_printf(p, "UNKNOWN");
+		goto out;
+	}
+
+	lba |= ((u64)cdb[12] << 56);
+	lba |= ((u64)cdb[13] << 48);
+	lba |= ((u64)cdb[14] << 40);
+	lba |= ((u64)cdb[15] << 32);
+	lba |= (cdb[16] << 24);
+	lba |= (cdb[17] << 16);
+	lba |= (cdb[18] << 8);
+	lba |=  cdb[19];
+	ei_lbrt |= (cdb[20] << 24);
+	ei_lbrt |= (cdb[21] << 16);
+	ei_lbrt |= (cdb[22] << 8);
+	ei_lbrt |=  cdb[23];
+	txlen |= (cdb[28] << 24);
+	txlen |= (cdb[29] << 16);
+	txlen |= (cdb[30] << 8);
+	txlen |=  cdb[31];
+
+	trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u",
+			 cmd, (unsigned long long)lba,
+			 (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt);
+
+	if (SERVICE_ACTION32(cdb) == WRITE_SAME_32)
+		trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1);
+
+out:
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+static const char *
+scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len;
+	unsigned int regions = cdb[7] << 8 | cdb[8];
+
+	trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+static const char *
+scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len, *cmd;
+	sector_t lba = 0;
+	u32 alloc_len = 0;
+
+	switch (SERVICE_ACTION16(cdb)) {
+	case SAI_READ_CAPACITY_16:
+		cmd = "READ_CAPACITY_16";
+		break;
+	case SAI_GET_LBA_STATUS:
+		cmd = "GET_LBA_STATUS";
+		break;
+	default:
+		trace_seq_printf(p, "UNKNOWN");
+		goto out;
+	}
+
+	lba |= ((u64)cdb[2] << 56);
+	lba |= ((u64)cdb[3] << 48);
+	lba |= ((u64)cdb[4] << 40);
+	lba |= ((u64)cdb[5] << 32);
+	lba |= (cdb[6] << 24);
+	lba |= (cdb[7] << 16);
+	lba |= (cdb[8] << 8);
+	lba |=  cdb[9];
+	alloc_len |= (cdb[10] << 24);
+	alloc_len |= (cdb[11] << 16);
+	alloc_len |= (cdb[12] << 8);
+	alloc_len |=  cdb[13];
+
+	trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd,
+			 (unsigned long long)lba, alloc_len);
+
+out:
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+static const char *
+scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	switch (SERVICE_ACTION32(cdb)) {
+	case READ_32:
+	case VERIFY_32:
+	case WRITE_32:
+	case WRITE_SAME_32:
+		return scsi_trace_rw32(p, cdb, len);
+	default:
+		return scsi_trace_misc(p, cdb, len);
+	}
+}
+
+static const char *
+scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	const char *ret = p->buffer + p->len;
+
+	trace_seq_printf(p, "-");
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+const char *
+scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
+{
+	switch (cdb[0]) {
+	case READ_6:
+	case WRITE_6:
+		return scsi_trace_rw6(p, cdb, len);
+	case READ_10:
+	case VERIFY:
+	case WRITE_10:
+	case WRITE_SAME:
+		return scsi_trace_rw10(p, cdb, len);
+	case READ_12:
+	case VERIFY_12:
+	case WRITE_12:
+		return scsi_trace_rw12(p, cdb, len);
+	case READ_16:
+	case VERIFY_16:
+	case WRITE_16:
+	case WRITE_SAME_16:
+		return scsi_trace_rw16(p, cdb, len);
+	case UNMAP:
+		return scsi_trace_unmap(p, cdb, len);
+	case SERVICE_ACTION_IN_16:
+		return scsi_trace_service_action_in(p, cdb, len);
+	case VARIABLE_LENGTH_CMD:
+		return scsi_trace_varlen(p, cdb, len);
+	default:
+		return scsi_trace_misc(p, cdb, len);
+	}
+}
+
+unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
+						unsigned long long *args)
+{
+	scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]);
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_print_function(tep,
+				    process_scsi_trace_parse_cdb,
+				    TEP_FUNC_ARG_STRING,
+				    "scsi_trace_parse_cdb",
+				    TEP_FUNC_ARG_PTR,
+				    TEP_FUNC_ARG_PTR,
+				    TEP_FUNC_ARG_INT,
+				    TEP_FUNC_ARG_VOID);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_print_function(tep, process_scsi_trace_parse_cdb,
+				      "scsi_trace_parse_cdb");
+}
diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c
new file mode 100644
index 000000000..43657fb60
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_tlb.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+enum tlb_flush_reason {
+	TLB_FLUSH_ON_TASK_SWITCH,
+	TLB_REMOTE_SHOOTDOWN,
+	TLB_LOCAL_SHOOTDOWN,
+	TLB_LOCAL_MM_SHOOTDOWN,
+	NR_TLB_FLUSH_REASONS,
+};
+
+static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record,
+			     struct tep_event *event, void *context)
+{
+	unsigned long long val;
+
+	trace_seq_printf(s, "pages=");
+
+	tep_print_num_field(s, "%ld", event, "pages", record, 1);
+
+	if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0)
+		return -1;
+
+	trace_seq_puts(s, " reason=");
+
+	switch (val) {
+	case TLB_FLUSH_ON_TASK_SWITCH:
+		trace_seq_puts(s, "flush on task switch");
+		break;
+	case TLB_REMOTE_SHOOTDOWN:
+		trace_seq_puts(s, "remote shootdown");
+		break;
+	case TLB_LOCAL_SHOOTDOWN:
+		trace_seq_puts(s, "local shootdown");
+		break;
+	case TLB_LOCAL_MM_SHOOTDOWN:
+		trace_seq_puts(s, "local mm shootdown");
+		break;
+	}
+
+	trace_seq_printf(s, " (%lld)", val);
+
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_event_handler(tep, -1, "tlb", "tlb_flush",
+				   tlb_flush_handler, NULL);
+
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_event_handler(tep, -1,
+				     "tlb", "tlb_flush",
+				     tlb_flush_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_xen.c b/tools/lib/traceevent/plugins/plugin_xen.c
new file mode 100644
index 000000000..993b208d0
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_xen.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "event-parse.h"
+#include "trace-seq.h"
+
+#define __HYPERVISOR_set_trap_table			0
+#define __HYPERVISOR_mmu_update				1
+#define __HYPERVISOR_set_gdt				2
+#define __HYPERVISOR_stack_switch			3
+#define __HYPERVISOR_set_callbacks			4
+#define __HYPERVISOR_fpu_taskswitch			5
+#define __HYPERVISOR_sched_op_compat			6
+#define __HYPERVISOR_dom0_op				7
+#define __HYPERVISOR_set_debugreg			8
+#define __HYPERVISOR_get_debugreg			9
+#define __HYPERVISOR_update_descriptor			10
+#define __HYPERVISOR_memory_op				12
+#define __HYPERVISOR_multicall				13
+#define __HYPERVISOR_update_va_mapping			14
+#define __HYPERVISOR_set_timer_op			15
+#define __HYPERVISOR_event_channel_op_compat		16
+#define __HYPERVISOR_xen_version			17
+#define __HYPERVISOR_console_io				18
+#define __HYPERVISOR_physdev_op_compat			19
+#define __HYPERVISOR_grant_table_op			20
+#define __HYPERVISOR_vm_assist				21
+#define __HYPERVISOR_update_va_mapping_otherdomain	22
+#define __HYPERVISOR_iret				23 /* x86 only */
+#define __HYPERVISOR_vcpu_op				24
+#define __HYPERVISOR_set_segment_base			25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op				26
+#define __HYPERVISOR_acm_op				27
+#define __HYPERVISOR_nmi_op				28
+#define __HYPERVISOR_sched_op				29
+#define __HYPERVISOR_callback_op			30
+#define __HYPERVISOR_xenoprof_op			31
+#define __HYPERVISOR_event_channel_op			32
+#define __HYPERVISOR_physdev_op				33
+#define __HYPERVISOR_hvm_op				34
+#define __HYPERVISOR_tmem_op				38
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0				48
+#define __HYPERVISOR_arch_1				49
+#define __HYPERVISOR_arch_2				50
+#define __HYPERVISOR_arch_3				51
+#define __HYPERVISOR_arch_4				52
+#define __HYPERVISOR_arch_5				53
+#define __HYPERVISOR_arch_6				54
+#define __HYPERVISOR_arch_7				55
+
+#define N(x)	[__HYPERVISOR_##x] = "("#x")"
+static const char *xen_hypercall_names[] = {
+	N(set_trap_table),
+	N(mmu_update),
+	N(set_gdt),
+	N(stack_switch),
+	N(set_callbacks),
+	N(fpu_taskswitch),
+	N(sched_op_compat),
+	N(dom0_op),
+	N(set_debugreg),
+	N(get_debugreg),
+	N(update_descriptor),
+	N(memory_op),
+	N(multicall),
+	N(update_va_mapping),
+	N(set_timer_op),
+	N(event_channel_op_compat),
+	N(xen_version),
+	N(console_io),
+	N(physdev_op_compat),
+	N(grant_table_op),
+	N(vm_assist),
+	N(update_va_mapping_otherdomain),
+	N(iret),
+	N(vcpu_op),
+	N(set_segment_base),
+	N(mmuext_op),
+	N(acm_op),
+	N(nmi_op),
+	N(sched_op),
+	N(callback_op),
+	N(xenoprof_op),
+	N(event_channel_op),
+	N(physdev_op),
+	N(hvm_op),
+
+/* Architecture-specific hypercall definitions. */
+	N(arch_0),
+	N(arch_1),
+	N(arch_2),
+	N(arch_3),
+	N(arch_4),
+	N(arch_5),
+	N(arch_6),
+	N(arch_7),
+};
+#undef N
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static const char *xen_hypercall_name(unsigned op)
+{
+	if (op < ARRAY_SIZE(xen_hypercall_names) &&
+	    xen_hypercall_names[op] != NULL)
+		return xen_hypercall_names[op];
+
+	return "";
+}
+
+unsigned long long process_xen_hypercall_name(struct trace_seq *s,
+					      unsigned long long *args)
+{
+	unsigned int op = args[0];
+
+	trace_seq_printf(s, "%s", xen_hypercall_name(op));
+	return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+	tep_register_print_function(tep,
+				    process_xen_hypercall_name,
+				    TEP_FUNC_ARG_STRING,
+				    "xen_hypercall_name",
+				    TEP_FUNC_ARG_INT,
+				    TEP_FUNC_ARG_VOID);
+	return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+	tep_unregister_print_function(tep, process_xen_hypercall_name,
+				      "xen_hypercall_name");
+}
diff --git a/tools/lib/traceevent/tep_strerror.c b/tools/lib/traceevent/tep_strerror.c
new file mode 100644
index 000000000..4ac26445b
--- /dev/null
+++ b/tools/lib/traceevent/tep_strerror.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: LGPL-2.1
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+
+#include "event-parse.h"
+
+#undef _PE
+#define _PE(code, str) str
+static const char * const tep_error_str[] = {
+	TEP_ERRORS
+};
+#undef _PE
+
+/*
+ * The tools so far have been using the strerror_r() GNU variant, that returns
+ * a string, be it the buffer passed or something else.
+ *
+ * But that, besides being tricky in cases where we expect that the function
+ * using strerror_r() returns the error formatted in a provided buffer (we have
+ * to check if it returned something else and copy that instead), breaks the
+ * build on systems not using glibc, like Alpine Linux, where musl libc is
+ * used.
+ *
+ * So, introduce yet another wrapper, str_error_r(), that has the GNU
+ * interface, but uses the portable XSI variant of strerror_r(), so that users
+ * rest asured that the provided buffer is used and it is what is returned.
+ */
+int tep_strerror(struct tep_handle *tep __maybe_unused,
+		 enum tep_errno errnum, char *buf, size_t buflen)
+{
+	const char *msg;
+	int idx;
+
+	if (!buflen)
+		return 0;
+
+	if (errnum >= 0) {
+		int err = strerror_r(errnum, buf, buflen);
+		buf[buflen - 1] = 0;
+		return err;
+	}
+
+	if (errnum <= __TEP_ERRNO__START ||
+	    errnum >= __TEP_ERRNO__END)
+		return -1;
+
+	idx = errnum - __TEP_ERRNO__START - 1;
+	msg = tep_error_str[idx];
+	snprintf(buf, buflen, "%s", msg);
+
+	return 0;
+}
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
new file mode 100644
index 000000000..8d5ecd2bf
--- /dev/null
+++ b/tools/lib/traceevent/trace-seq.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include "trace-seq.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <asm/bug.h>
+#include "event-parse.h"
+#include "event-utils.h"
+
+/*
+ * The TRACE_SEQ_POISON is to catch the use of using
+ * a trace_seq structure after it was destroyed.
+ */
+#define TRACE_SEQ_POISON	((void *)0xdeadbeef)
+#define TRACE_SEQ_CHECK(s)						\
+do {									\
+	if (WARN_ONCE((s)->buffer == TRACE_SEQ_POISON,			\
+		      "Usage of trace_seq after it was destroyed"))	\
+		(s)->state = TRACE_SEQ__BUFFER_POISONED;		\
+} while (0)
+
+#define TRACE_SEQ_CHECK_RET_N(s, n)		\
+do {						\
+	TRACE_SEQ_CHECK(s);			\
+	if ((s)->state != TRACE_SEQ__GOOD)	\
+		return n; 			\
+} while (0)
+
+#define TRACE_SEQ_CHECK_RET(s)   TRACE_SEQ_CHECK_RET_N(s, )
+#define TRACE_SEQ_CHECK_RET0(s)  TRACE_SEQ_CHECK_RET_N(s, 0)
+
+/**
+ * trace_seq_init - initialize the trace_seq structure
+ * @s: a pointer to the trace_seq structure to initialize
+ */
+void trace_seq_init(struct trace_seq *s)
+{
+	s->len = 0;
+	s->readpos = 0;
+	s->buffer_size = TRACE_SEQ_BUF_SIZE;
+	s->buffer = malloc(s->buffer_size);
+	if (s->buffer != NULL)
+		s->state = TRACE_SEQ__GOOD;
+	else
+		s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
+}
+
+/**
+ * trace_seq_reset - re-initialize the trace_seq structure
+ * @s: a pointer to the trace_seq structure to reset
+ */
+void trace_seq_reset(struct trace_seq *s)
+{
+	if (!s)
+		return;
+	TRACE_SEQ_CHECK(s);
+	s->len = 0;
+	s->readpos = 0;
+}
+
+/**
+ * trace_seq_destroy - free up memory of a trace_seq
+ * @s: a pointer to the trace_seq to free the buffer
+ *
+ * Only frees the buffer, not the trace_seq struct itself.
+ */
+void trace_seq_destroy(struct trace_seq *s)
+{
+	if (!s)
+		return;
+	TRACE_SEQ_CHECK_RET(s);
+	free(s->buffer);
+	s->buffer = TRACE_SEQ_POISON;
+}
+
+static void expand_buffer(struct trace_seq *s)
+{
+	char *buf;
+
+	buf = realloc(s->buffer, s->buffer_size + TRACE_SEQ_BUF_SIZE);
+	if (WARN_ONCE(!buf, "Can't allocate trace_seq buffer memory")) {
+		s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
+		return;
+	}
+
+	s->buffer = buf;
+	s->buffer_size += TRACE_SEQ_BUF_SIZE;
+}
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, the number of characters printed, or a negative
+ * value in case of an error.
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+	va_list ap;
+	int len;
+	int ret;
+
+ try_again:
+	TRACE_SEQ_CHECK_RET0(s);
+
+	len = (s->buffer_size - 1) - s->len;
+
+	va_start(ap, fmt);
+	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+	va_end(ap);
+
+	if (ret >= len) {
+		expand_buffer(s);
+		goto try_again;
+	}
+
+	if (ret > 0)
+		s->len += ret;
+
+	return ret;
+}
+
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, the number of characters printed, or a negative
+ * value in case of an error.
+ * *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+	int len;
+	int ret;
+
+ try_again:
+	TRACE_SEQ_CHECK_RET0(s);
+
+	len = (s->buffer_size - 1) - s->len;
+
+	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+	if (ret >= len) {
+		expand_buffer(s);
+		goto try_again;
+	}
+
+	if (ret > 0)
+		s->len += ret;
+
+	return ret;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+	int len;
+
+	TRACE_SEQ_CHECK_RET0(s);
+
+	len = strlen(str);
+
+	while (len > ((s->buffer_size - 1) - s->len))
+		expand_buffer(s);
+
+	TRACE_SEQ_CHECK_RET0(s);
+
+	memcpy(s->buffer + s->len, str, len);
+	s->len += len;
+
+	return len;
+}
+
+int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+	TRACE_SEQ_CHECK_RET0(s);
+
+	while (s->len >= (s->buffer_size - 1))
+		expand_buffer(s);
+
+	TRACE_SEQ_CHECK_RET0(s);
+
+	s->buffer[s->len++] = c;
+
+	return 1;
+}
+
+void trace_seq_terminate(struct trace_seq *s)
+{
+	TRACE_SEQ_CHECK_RET(s);
+
+	/* There's always one character left on the buffer */
+	s->buffer[s->len] = 0;
+}
+
+int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp)
+{
+	TRACE_SEQ_CHECK(s);
+
+	switch (s->state) {
+	case TRACE_SEQ__GOOD:
+		return fprintf(fp, "%.*s", s->len, s->buffer);
+	case TRACE_SEQ__BUFFER_POISONED:
+		fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed");
+		break;
+	case TRACE_SEQ__MEM_ALLOC_FAILED:
+		fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory");
+		break;
+	}
+	return -1;
+}
+
+int trace_seq_do_printf(struct trace_seq *s)
+{
+	return trace_seq_do_fprintf(s, stdout);
+}
diff --git a/tools/lib/traceevent/trace-seq.h b/tools/lib/traceevent/trace-seq.h
new file mode 100644
index 000000000..d68ec69f8
--- /dev/null
+++ b/tools/lib/traceevent/trace-seq.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#ifndef _TRACE_SEQ_H
+#define _TRACE_SEQ_H
+
+#include <stdarg.h>
+#include <stdio.h>
+
+/* ----------------------- trace_seq ----------------------- */
+
+#ifndef TRACE_SEQ_BUF_SIZE
+#define TRACE_SEQ_BUF_SIZE 4096
+#endif
+
+enum trace_seq_fail {
+	TRACE_SEQ__GOOD,
+	TRACE_SEQ__BUFFER_POISONED,
+	TRACE_SEQ__MEM_ALLOC_FAILED,
+};
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE).
+ */
+
+struct trace_seq {
+	char			*buffer;
+	unsigned int		buffer_size;
+	unsigned int		len;
+	unsigned int		readpos;
+	enum trace_seq_fail	state;
+};
+
+void trace_seq_init(struct trace_seq *s);
+void trace_seq_reset(struct trace_seq *s);
+void trace_seq_destroy(struct trace_seq *s);
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
+
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+
+extern void trace_seq_terminate(struct trace_seq *s);
+
+extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
+extern int trace_seq_do_printf(struct trace_seq *s);
+
+#endif /* _TRACE_SEQ_H */
diff --git a/tools/lib/vsprintf.c b/tools/lib/vsprintf.c
new file mode 100644
index 000000000..8780b4cda
--- /dev/null
+++ b/tools/lib/vsprintf.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <linux/kernel.h>
+#include <stdio.h>
+
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+       int i = vsnprintf(buf, size, fmt, args);
+       ssize_t ssize = size;
+
+       return (i >= ssize) ? (ssize - 1) : i;
+}
+
+int scnprintf(char * buf, size_t size, const char * fmt, ...)
+{
+       ssize_t ssize = size;
+       va_list args;
+       int i;
+
+       va_start(args, fmt);
+       i = vsnprintf(buf, size, fmt, args);
+       va_end(args);
+
+       return (i >= ssize) ? (ssize - 1) : i;
+}
+
+int scnprintf_pad(char * buf, size_t size, const char * fmt, ...)
+{
+	ssize_t ssize = size;
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i = vscnprintf(buf, size, fmt, args);
+	va_end(args);
+
+	if (i < (int) size) {
+		for (; i < (int) size; i++)
+			buf[i] = ' ';
+		buf[i] = 0x0;
+	}
+
+	return (i >= ssize) ? (ssize - 1) : i;
+}
diff --git a/tools/lib/zalloc.c b/tools/lib/zalloc.c
new file mode 100644
index 000000000..9c856d59f
--- /dev/null
+++ b/tools/lib/zalloc.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+#include <stdlib.h>
+#include <linux/zalloc.h>
+
+void *zalloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+void __zfree(void **ptr)
+{
+	free(*ptr);
+	*ptr = NULL;
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
commit	5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree	a94efe259b9009378be6d90eb30d2b019d95c194 /tools/lib
parent	Initial commit. (diff)
download	linux-upstream/5.10.209.tar.xz linux-upstream/5.10.209.zip